├── .gitignore
├── assignment1
    ├── cs231n
    │   ├── __init__.py
    │   ├── datasets
    │   │   ├── .gitignore
    │   │   └── get_datasets.sh
    │   ├── classifiers
    │   │   ├── __init__.py
    │   │   ├── softmax.py
    │   │   ├── linear_svm.py
    │   │   ├── linear_classifier.py
    │   │   └── k_nearest_neighbor.py
    │   ├── vis_utils.py
    │   ├── gradient_check.py
    │   ├── features.py
    │   └── data_utils.py
    ├── .gitignore
    ├── README.md
    ├── start_ipython_osx.sh
    ├── collectSubmission.sh
    ├── frameworkpython
    ├── requirements.txt
    ├── setup_googlecloud.sh
    └── softmax.md
├── assignment2
    ├── cs231n
    │   ├── __init__.py
    │   ├── classifiers
    │   │   ├── __init__.py
    │   │   └── cnn.py
    │   ├── .gitignore
    │   ├── datasets
    │   │   ├── .gitignore
    │   │   └── get_datasets.sh
    │   ├── im2col_cython.cpython-35m-x86_64-linux-gnu.so
    │   ├── setup.py
    │   ├── im2col.py
    │   ├── vis_utils.py
    │   ├── layer_utils.py
    │   ├── gradient_check.py
    │   ├── im2col_cython.pyx
    │   ├── optim.py
    │   ├── data_utils.py
    │   └── fast_layers.py
    ├── .gitignore
    ├── puppy.jpg
    ├── kitten.jpg
    ├── start_ipython_osx.sh
    ├── collectSubmission.sh
    ├── frameworkpython
    ├── requirements.txt
    └── README.md
├── assignment3
    ├── cs231n
    │   ├── __init__.py
    │   ├── classifiers
    │   │   ├── __init__.py
    │   │   └── squeezenet.py
    │   ├── datasets
    │   │   ├── .gitignore
    │   │   ├── get_imagenet_val.sh
    │   │   ├── get_assignment3_data.sh
    │   │   ├── get_squeezenet_tf.sh
    │   │   ├── get_coco_captioning.sh
    │   │   └── imagenet_val_25.npz
    │   ├── .gitignore
    │   ├── setup.py
    │   ├── im2col.py
    │   ├── image_utils.py
    │   ├── optim.py
    │   ├── coco_utils.py
    │   ├── gradient_check.py
    │   ├── layer_utils.py
    │   ├── im2col_cython.pyx
    │   ├── captioning_solver.py
    │   ├── data_utils.py
    │   ├── fast_layers.py
    │   └── layers.py
    ├── .gitignore
    ├── sky.jpg
    ├── kitten.jpg
    ├── gan-checks-tf.npz
    ├── styles
    │   ├── muse.jpg
    │   ├── tubingen.jpg
    │   ├── the_scream.jpg
    │   ├── starry_night.jpg
    │   └── composition_vii.jpg
    ├── gan_outputs_tf.png
    ├── gan_outputs_pytorch.png
    ├── example_styletransfer.png
    ├── style-transfer-checks.npz
    ├── style-transfer-checks-tf.npz
    ├── start_ipython_osx.sh
    ├── where_are_my_drivers.sh
    ├── frameworkpython
    ├── collectSubmission.sh
    └── requirements.txt
├── .dcgan_err.png
├── .style_err.png
├── .dcgan_right.png
├── .style_right.png
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | *.zip


--------------------------------------------------------------------------------
/assignment1/cs231n/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/assignment2/cs231n/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/assignment2/cs231n/classifiers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/classifiers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/assignment1/.gitignore:
--------------------------------------------------------------------------------
1 | *.swp
2 | *.pyc
3 | .env/*
4 | 


--------------------------------------------------------------------------------
/assignment2/.gitignore:
--------------------------------------------------------------------------------
1 | *.swp
2 | *.pyc
3 | .env/*
4 | 


--------------------------------------------------------------------------------
/assignment3/.gitignore:
--------------------------------------------------------------------------------
1 | *.swp
2 | *.pyc
3 | .env/*
4 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/datasets/.gitignore:
--------------------------------------------------------------------------------
1 | coco_captioning/*
2 | MNIST_data/*
3 | 


--------------------------------------------------------------------------------
/assignment2/cs231n/.gitignore:
--------------------------------------------------------------------------------
1 | build/*
2 | im2col_cython.c
3 | im2col_cython.so
4 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/.gitignore:
--------------------------------------------------------------------------------
1 | build/*
2 | im2col_cython.c
3 | im2col_cython.so
4 | 


--------------------------------------------------------------------------------
/.dcgan_err.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/.dcgan_err.png


--------------------------------------------------------------------------------
/.style_err.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/.style_err.png


--------------------------------------------------------------------------------
/assignment3/cs231n/datasets/get_imagenet_val.sh:
--------------------------------------------------------------------------------
1 | wget http://cs231n.stanford.edu/imagenet_val_25.npz
2 | 


--------------------------------------------------------------------------------
/.dcgan_right.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/.dcgan_right.png


--------------------------------------------------------------------------------
/.style_right.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/.style_right.png


--------------------------------------------------------------------------------
/assignment3/sky.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment3/sky.jpg


--------------------------------------------------------------------------------
/assignment2/puppy.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment2/puppy.jpg


--------------------------------------------------------------------------------
/assignment2/kitten.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment2/kitten.jpg


--------------------------------------------------------------------------------
/assignment3/kitten.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment3/kitten.jpg


--------------------------------------------------------------------------------
/assignment3/gan-checks-tf.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment3/gan-checks-tf.npz


--------------------------------------------------------------------------------
/assignment3/styles/muse.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment3/styles/muse.jpg


--------------------------------------------------------------------------------
/assignment3/gan_outputs_tf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment3/gan_outputs_tf.png


--------------------------------------------------------------------------------
/assignment3/styles/tubingen.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment3/styles/tubingen.jpg


--------------------------------------------------------------------------------
/assignment1/cs231n/datasets/.gitignore:
--------------------------------------------------------------------------------
1 | cifar-10-batches-py/*
2 | tiny-imagenet-100-A*
3 | tiny-imagenet-100-B*
4 | tiny-100-A-pretrained/*
5 | 


--------------------------------------------------------------------------------
/assignment2/cs231n/datasets/.gitignore:
--------------------------------------------------------------------------------
1 | cifar-10-batches-py/*
2 | tiny-imagenet-100-A*
3 | tiny-imagenet-100-B*
4 | tiny-100-A-pretrained/*
5 | 


--------------------------------------------------------------------------------
/assignment3/styles/the_scream.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment3/styles/the_scream.jpg


--------------------------------------------------------------------------------
/assignment3/gan_outputs_pytorch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment3/gan_outputs_pytorch.png


--------------------------------------------------------------------------------
/assignment3/styles/starry_night.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment3/styles/starry_night.jpg


--------------------------------------------------------------------------------
/assignment1/cs231n/classifiers/__init__.py:
--------------------------------------------------------------------------------
1 | from cs231n.classifiers.k_nearest_neighbor import *
2 | from cs231n.classifiers.linear_classifier import *
3 | 


--------------------------------------------------------------------------------
/assignment3/example_styletransfer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment3/example_styletransfer.png


--------------------------------------------------------------------------------
/assignment3/style-transfer-checks.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment3/style-transfer-checks.npz


--------------------------------------------------------------------------------
/assignment3/styles/composition_vii.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment3/styles/composition_vii.jpg


--------------------------------------------------------------------------------
/assignment1/README.md:
--------------------------------------------------------------------------------
1 | Details about this assignment can be found [on the course webpage](http://cs231n.github.io/), under Assignment #1 of Spring 2017.
2 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/datasets/get_assignment3_data.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ./get_coco_captioning.sh
3 | ./get_squeezenet_tf.sh
4 | ./get_imagenet_val.sh
5 | 
6 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/datasets/get_squeezenet_tf.sh:
--------------------------------------------------------------------------------
1 | wget "http://cs231n.stanford.edu/squeezenet_tf.zip"
2 | unzip squeezenet_tf.zip
3 | rm squeezenet_tf.zip
4 | 


--------------------------------------------------------------------------------
/assignment3/style-transfer-checks-tf.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment3/style-transfer-checks-tf.npz


--------------------------------------------------------------------------------
/assignment1/start_ipython_osx.sh:
--------------------------------------------------------------------------------
1 | # Assume the virtualenv is called .env
2 | 
3 | cp frameworkpython .env/bin
4 | .env/bin/frameworkpython -m IPython notebook
5 | 


--------------------------------------------------------------------------------
/assignment2/start_ipython_osx.sh:
--------------------------------------------------------------------------------
1 | # Assume the virtualenv is called .env
2 | 
3 | cp frameworkpython .env/bin
4 | .env/bin/frameworkpython -m IPython notebook
5 | 


--------------------------------------------------------------------------------
/assignment3/start_ipython_osx.sh:
--------------------------------------------------------------------------------
1 | # Assume the virtualenv is called .env
2 | 
3 | cp frameworkpython .env/bin
4 | .env/bin/frameworkpython -m IPython notebook
5 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/datasets/get_coco_captioning.sh:
--------------------------------------------------------------------------------
1 | wget "http://cs231n.stanford.edu/coco_captioning.zip"
2 | unzip coco_captioning.zip
3 | rm coco_captioning.zip
4 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/datasets/imagenet_val_25.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment3/cs231n/datasets/imagenet_val_25.npz


--------------------------------------------------------------------------------
/assignment1/cs231n/datasets/get_datasets.sh:
--------------------------------------------------------------------------------
1 | # Get CIFAR10
2 | wget http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
3 | tar -xzvf cifar-10-python.tar.gz
4 | rm cifar-10-python.tar.gz 
5 | 


--------------------------------------------------------------------------------
/assignment2/cs231n/datasets/get_datasets.sh:
--------------------------------------------------------------------------------
1 | # Get CIFAR10
2 | wget http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
3 | tar -xzvf cifar-10-python.tar.gz
4 | rm cifar-10-python.tar.gz 
5 | 


--------------------------------------------------------------------------------
/assignment2/cs231n/im2col_cython.cpython-35m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment2/cs231n/im2col_cython.cpython-35m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/assignment1/collectSubmission.sh:
--------------------------------------------------------------------------------
1 | rm -f assignment1.zip 
2 | zip -r assignment1.zip . -x "*.git*" "*cs231n/datasets*" "*.ipynb_checkpoints*" "*README.md" "*collectSubmission.sh" "*requirements.txt" ".env/*"
3 | 


--------------------------------------------------------------------------------
/assignment2/cs231n/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | from distutils.extension import Extension
 3 | from Cython.Build import cythonize
 4 | import numpy
 5 | 
 6 | extensions = [
 7 |   Extension('im2col_cython', ['im2col_cython.pyx'],
 8 |             include_dirs = [numpy.get_include()]
 9 |   ),
10 | ]
11 | 
12 | setup(
13 |     ext_modules = cythonize(extensions),
14 | )
15 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | from distutils.extension import Extension
 3 | from Cython.Build import cythonize
 4 | import numpy
 5 | 
 6 | extensions = [
 7 |   Extension('im2col_cython', ['im2col_cython.pyx'],
 8 |             include_dirs = [numpy.get_include()]
 9 |   ),
10 | ]
11 | 
12 | setup(
13 |     ext_modules = cythonize(extensions),
14 | )
15 | 


--------------------------------------------------------------------------------
/assignment3/where_are_my_drivers.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | echo "Checking for CUDA and installing."
 3 | # Check for CUDA and try to install.
 4 | if ! dpkg-query -W cuda; then
 5 |   # The 16.04 installer works with 16.10.
 6 |   curl -O http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/cuda-repo-ubuntu1604_8.0.61-1_amd64.deb
 7 |   dpkg -i ./cuda-repo-ubuntu1604_8.0.61-1_amd64.deb
 8 |   apt-get update
 9 |   apt-get install cuda -y
10 | fi
11 | 


--------------------------------------------------------------------------------
/assignment2/collectSubmission.sh:
--------------------------------------------------------------------------------
 1 | files="BatchNormalization.ipynb
 2 | ConvolutionalNetworks.ipynb
 3 | Dropout.ipynb
 4 | FullyConnectedNets.ipynb
 5 | PyTorch.ipynb
 6 | TensorFlow.ipynb"
 7 | 
 8 | for file in $files
 9 | do
10 |     if [ ! -f $file ]; then
11 |         echo "Required notebook $file not found."
12 |         exit 0
13 |     fi
14 | done
15 | 
16 | rm -f assignment2.zip
17 | zip -r assignment2.zip . -x "*.git*" "*cs231n/datasets*" "*.ipynb_checkpoints*" "*README.md" "*collectSubmission.sh" "*requirements.txt" ".env/*" "*.pyc" "*cs231n/build/*"
18 | 


--------------------------------------------------------------------------------
/assignment1/frameworkpython:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # what real Python executable to use
 4 | #PYVER=2.7
 5 | #PATHTOPYTHON=/usr/local/bin/
 6 | #PYTHON=${PATHTOPYTHON}python${PYVER}
 7 | 
 8 | PYTHON=$(which $(readlink .env/bin/python)) # only works with python3
 9 | 
10 | # find the root of the virtualenv, it should be the parent of the dir this script is in
11 | ENV=`$PYTHON -c "import os; print(os.path.abspath(os.path.join(os.path.dirname(\"$0\"), '..')))"`
12 | 
13 | # now run Python with the virtualenv set as Python's HOME
14 | export PYTHONHOME=$ENV
15 | exec $PYTHON "$@"
16 | 


--------------------------------------------------------------------------------
/assignment2/frameworkpython:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # what real Python executable to use
 4 | #PYVER=2.7
 5 | #PATHTOPYTHON=/usr/local/bin/
 6 | #PYTHON=${PATHTOPYTHON}python${PYVER}
 7 | 
 8 | PYTHON=$(which $(readlink .env/bin/python)) # only works with python3
 9 | 
10 | # find the root of the virtualenv, it should be the parent of the dir this script is in
11 | ENV=`$PYTHON -c "import os; print(os.path.abspath(os.path.join(os.path.dirname(\"$0\"), '..')))"`
12 | 
13 | # now run Python with the virtualenv set as Python's HOME
14 | export PYTHONHOME=$ENV
15 | exec $PYTHON "$@"
16 | 


--------------------------------------------------------------------------------
/assignment3/frameworkpython:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # what real Python executable to use
 4 | #PYVER=2.7
 5 | #PATHTOPYTHON=/usr/local/bin/
 6 | #PYTHON=${PATHTOPYTHON}python${PYVER}
 7 | 
 8 | PYTHON=$(which $(readlink .env/bin/python)) # only works with python3
 9 | 
10 | # find the root of the virtualenv, it should be the parent of the dir this script is in
11 | ENV=`$PYTHON -c "import os; print(os.path.abspath(os.path.join(os.path.dirname(\"$0\"), '..')))"`
12 | 
13 | # now run Python with the virtualenv set as Python's HOME
14 | export PYTHONHOME=$ENV
15 | exec $PYTHON "$@"
16 | 


--------------------------------------------------------------------------------
/assignment3/collectSubmission.sh:
--------------------------------------------------------------------------------
 1 | files="GANs-PyTorch.ipynb
 2 | GANs-TensorFlow.ipynb
 3 | LSTM_Captioning.ipynb
 4 | NetworkVisualization-PyTorch.ipynb
 5 | NetworkVisualization-TensorFlow.ipynb
 6 | RNN_Captioning.ipynb
 7 | StyleTransfer-PyTorch.ipynb
 8 | StyleTransfer-TensorFlow.ipynb"
 9 | 
10 | for file in $files
11 | do
12 |     if [ ! -f $file ]; then
13 |         echo "Required notebook $file not found."
14 |         exit 0
15 |     fi
16 | done
17 | 
18 | 
19 | rm -f assignment3.zip
20 | zip -r assignment3.zip . -x "*.git" "*cs231n/datasets*" "*.ipynb_checkpoints*" "*README.md" "*collectSubmission.sh" "*requirements.txt" ".env/*" "*.pyc" "*cs231n/build/*"
21 | 


--------------------------------------------------------------------------------
/assignment1/requirements.txt:
--------------------------------------------------------------------------------
 1 | Cython==0.23.4
 2 | Jinja2==2.8
 3 | MarkupSafe==0.23
 4 | Pillow==3.0.0
 5 | Pygments==2.0.2
 6 | appnope==0.1.0
 7 | argparse==1.2.1
 8 | backports-abc==0.4
 9 | backports.ssl-match-hostname==3.5.0.1
10 | certifi==2015.11.20.1
11 | cycler==0.10.0
12 | decorator==4.0.6
13 | future==0.16.0
14 | gnureadline==6.3.3
15 | ipykernel==4.2.2
16 | ipython==4.0.1
17 | ipython-genutils==0.1.0
18 | ipywidgets==4.1.1
19 | jsonschema==2.5.1
20 | jupyter==1.0.0
21 | jupyter-client==4.1.1
22 | jupyter-console==4.0.3
23 | jupyter-core==4.0.6
24 | matplotlib==2.0.0
25 | mistune==0.7.1
26 | nbconvert==4.1.0
27 | nbformat==4.0.1
28 | notebook==5.4.1
29 | numpy==1.10.4
30 | path.py==8.1.2
31 | pexpect==4.0.1
32 | pickleshare==0.5
33 | ptyprocess==0.5
34 | pyparsing==2.0.7
35 | python-dateutil==2.4.2
36 | pytz==2015.7
37 | pyzmq==15.1.0
38 | qtconsole==4.1.1
39 | scipy==0.16.1
40 | simplegeneric==0.8.1
41 | singledispatch==3.4.0.3
42 | site==0.0.1
43 | six==1.10.0
44 | terminado==0.5
45 | tornado==4.3
46 | traitlets==4.0.0
47 | 


--------------------------------------------------------------------------------
/assignment2/requirements.txt:
--------------------------------------------------------------------------------
 1 | Cython>=0.25.2
 2 | Jinja2>=2.8
 3 | MarkupSafe>=0.23
 4 | Pillow>=3.0.0
 5 | Pygments>=2.0.2
 6 | appnope>=0.1.0
 7 | argparse>=1.2.1
 8 | backports-abc>=0.4
 9 | backports.ssl-match-hostname>=3.5.0.1
10 | certifi>=2015.11.20.1
11 | cycler>=0.10.0
12 | decorator>=4.0.6
13 | future>=0.16.0
14 | #gnureadline>=6.3.3
15 | h5py>=2.7.0
16 | ipykernel>=4.2.2
17 | ipython>=4.0.1
18 | ipython-genutils>=0.1.0
19 | ipywidgets>=4.1.1
20 | jsonschema>=2.5.1
21 | jupyter>=1.0.0
22 | jupyter-client>=4.1.1
23 | jupyter-console>=4.0.3
24 | jupyter-core>=4.0.6
25 | matplotlib>=2.0.0
26 | mistune>=0.7.1
27 | nbconvert>=4.1.0
28 | nbformat>=4.0.1
29 | nltk>=3.2.2
30 | notebook>=5.4.1
31 | numpy>=1.12.1
32 | path.py>=8.1.2
33 | pexpect>=4.0.1
34 | pickleshare>=0.5
35 | ptyprocess>=0.5
36 | pyparsing>=2.0.7
37 | python-dateutil>=2.4.2
38 | pytz>=2015.7
39 | pyzmq>=15.1.0
40 | qtconsole>=4.1.1
41 | scipy>=0.19.0
42 | simplegeneric>=0.8.1
43 | singledispatch>=3.4.0.3
44 | site>=0.0.1
45 | six>=1.10.0
46 | terminado>=0.5
47 | tornado>=4.3
48 | traitlets>=4.0.0
49 | 


--------------------------------------------------------------------------------
/assignment3/requirements.txt:
--------------------------------------------------------------------------------
 1 | Cython==0.25.2
 2 | Jinja2==2.8
 3 | MarkupSafe==0.23
 4 | Pillow==3.0.0
 5 | Pygments==2.0.2
 6 | appnope==0.1.0
 7 | argparse==1.2.1
 8 | backports-abc==0.4
 9 | backports.ssl-match-hostname==3.5.0.1
10 | certifi==2015.11.20.1
11 | cycler==0.10.0
12 | decorator==4.0.6
13 | future==0.16.0
14 | gnureadline==6.3.3
15 | h5py==2.7.0
16 | ipykernel==4.2.2
17 | ipython==4.0.1
18 | ipython-genutils==0.1.0
19 | ipywidgets==4.1.1
20 | jsonschema==2.5.1
21 | jupyter==1.0.0
22 | jupyter-client==4.1.1
23 | jupyter-console==4.0.3
24 | jupyter-core==4.0.6
25 | matplotlib==2.0.0
26 | mistune==0.7.1
27 | nbconvert==4.1.0
28 | nbformat==4.0.1
29 | nltk==3.2.2
30 | notebook==5.4.1
31 | numpy==1.12.1
32 | path.py==8.1.2
33 | pexpect==4.0.1
34 | pickleshare==0.5
35 | ptyprocess==0.5
36 | pyparsing==2.0.7
37 | python-dateutil==2.4.2
38 | pytz==2015.7
39 | pyzmq==15.1.0
40 | qtconsole==4.1.1
41 | scipy==0.19.0
42 | simplegeneric==0.8.1
43 | singledispatch==3.4.0.3
44 | site==0.0.1
45 | six==1.10.0
46 | terminado==0.5
47 | tornado==4.3
48 | traitlets==4.0.0
49 | h5py==2.7.0
50 | 


--------------------------------------------------------------------------------
/assignment1/setup_googlecloud.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This is the set-up script for Google Cloud.
 4 | sudo apt-get update
 5 | sudo apt-get install libncurses5-dev
 6 | sudo apt-get install python-dev
 7 | sudo apt-get install python-pip
 8 | sudo apt-get install libjpeg8-dev
 9 | sudo ln -s /usr/lib/x86_64-linux-gnu/libjpeg.so /usr/lib
10 | pip install pillow
11 | sudo apt-get build-dep python-imaging
12 | sudo apt-get install libjpeg8 libjpeg62-dev libfreetype6 libfreetype6-dev
13 | sudo pip install virtualenv  
14 | virtualenv .env                  # Create a virtual environment
15 | source .env/bin/activate         # Activate the virtual environment
16 | pip install -r requirements.txt  # Install dependencies
17 | deactivate
18 | echo "**************************************************"
19 | echo "*****  End of Google Cloud Set-up Script  ********"
20 | echo "**************************************************"
21 | echo ""
22 | echo "If you had no errors, You can proceed to work with your virtualenv as normal."
23 | echo "(run 'source .env/bin/activate' in your assignment directory to load the venv,"
24 | echo " and run 'deactivate' to exit the venv. See assignment handout for details.)"
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CS231n Spring 2017 homework
 2 | My implementation about [CS231n spring 2017 homework](http://cs231n.github.io/).
 3 | I chose to use tensorfow to implement assignment3. Some weird things happend in DCGAN and styletransfer.
 4 | 
 5 | 1. In questions about DCGAN, all the images in a single step are the same, although all the checks seem to be right.
 6 | 2. In questions about styletransfer, my code can't synthesis images similar to results in the slides. It seems that my network dosen't learn to the right style.
 7 | 
 8 | Hoping someone can pull me out of the mire.
 9 | 
10 | DCGAN error result
11 | 
12 | ![DCGAN error result](https://raw.githubusercontent.com/Psunshine/CS231n-Spring-2017-Assignment/master/.dcgan_err.png) 
13 | 
14 | DCGAN right result
15 | 
16 | ![DCGAN right result](https://raw.githubusercontent.com/Psunshine/CS231n-Spring-2017-Assignment/master/.dcgan_right.png) 
17 | 
18 | styletransfer error result
19 | 
20 | ![style error result](https://raw.githubusercontent.com/Psunshine/CS231n-Spring-2017-Assignment/master/.style_err.png) 
21 | 
22 | styletransfer right result
23 | 
24 | ![style right result](https://raw.githubusercontent.com/Psunshine/CS231n-Spring-2017-Assignment/master/.style_right.png)
25 | 
26 | --------------
27 | 自己完成的[CS231n spring 2017 homework](http://cs231n.github.io/)。
28 | assignment3使用tensorflow实现，但是有两处结果不太对：
29 | 
30 | 1. DCGAN一问中每一步结果都是一样的。而奇怪的是前面的check都通过了。
31 | 2. styletransfer一问中得到的图像和课件上的不一样，应该是没有正确转换style。
32 | 
33 | 
34 | 
35 | 试验了几天也没找到原因，希望有明白的大佬可以提点提点
36 | 


--------------------------------------------------------------------------------
/assignment1/cs231n/vis_utils.py:
--------------------------------------------------------------------------------
 1 | #from past.builtins import xrange
 2 | 
 3 | from math import sqrt, ceil
 4 | import numpy as np
 5 | 
 6 | def visualize_grid(Xs, ubound=255.0, padding=1):
 7 |   """
 8 |   Reshape a 4D tensor of image data to a grid for easy visualization.
 9 | 
10 |   Inputs:
11 |   - Xs: Data of shape (N, H, W, C)
12 |   - ubound: Output grid will have values scaled to the range [0, ubound]
13 |   - padding: The number of blank pixels between elements of the grid
14 |   """
15 |   (N, H, W, C) = Xs.shape
16 |   grid_size = int(ceil(sqrt(N)))
17 |   grid_height = H * grid_size + padding * (grid_size - 1)
18 |   grid_width = W * grid_size + padding * (grid_size - 1)
19 |   grid = np.zeros((grid_height, grid_width, C))
20 |   next_idx = 0
21 |   y0, y1 = 0, H
22 |   for y in xrange(grid_size):
23 |     x0, x1 = 0, W
24 |     for x in xrange(grid_size):
25 |       if next_idx < N:
26 |         img = Xs[next_idx]
27 |         low, high = np.min(img), np.max(img)
28 |         grid[y0:y1, x0:x1] = ubound * (img - low) / (high - low)
29 |         # grid[y0:y1, x0:x1] = Xs[next_idx]
30 |         next_idx += 1
31 |       x0 += W + padding
32 |       x1 += W + padding
33 |     y0 += H + padding
34 |     y1 += H + padding
35 |   # grid_max = np.max(grid)
36 |   # grid_min = np.min(grid)
37 |   # grid = ubound * (grid - grid_min) / (grid_max - grid_min)
38 |   return grid
39 | 
40 | def vis_grid(Xs):
41 |   """ visualize a grid of images """
42 |   (N, H, W, C) = Xs.shape
43 |   A = int(ceil(sqrt(N)))
44 |   G = np.ones((A*H+A, A*W+A, C), Xs.dtype)
45 |   G *= np.min(Xs)
46 |   n = 0
47 |   for y in range(A):
48 |     for x in range(A):
49 |       if n < N:
50 |         G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = Xs[n,:,:,:]
51 |         n += 1
52 |   # normalize to [0,1]
53 |   maxg = G.max()
54 |   ming = G.min()
55 |   G = (G - ming)/(maxg-ming)
56 |   return G
57 |   
58 | def vis_nn(rows):
59 |   """ visualize array of arrays of images """
60 |   N = len(rows)
61 |   D = len(rows[0])
62 |   H,W,C = rows[0][0].shape
63 |   Xs = rows[0][0]
64 |   G = np.ones((N*H+N, D*W+D, C), Xs.dtype)
65 |   for y in range(N):
66 |     for x in range(D):
67 |       G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = rows[y][x]
68 |   # normalize to [0,1]
69 |   maxg = G.max()
70 |   ming = G.min()
71 |   G = (G - ming)/(maxg-ming)
72 |   return G
73 | 
74 | 
75 | 
76 | 


--------------------------------------------------------------------------------
/assignment2/cs231n/im2col.py:
--------------------------------------------------------------------------------
 1 | from builtins import range
 2 | import numpy as np
 3 | 
 4 | 
 5 | def get_im2col_indices(x_shape, field_height, field_width, padding=1, stride=1):
 6 |     # First figure out what the size of the output should be
 7 |     N, C, H, W = x_shape
 8 |     assert (H + 2 * padding - field_height) % stride == 0
 9 |     assert (W + 2 * padding - field_height) % stride == 0
10 |     out_height = (H + 2 * padding - field_height) / stride + 1
11 |     out_width = (W + 2 * padding - field_width) / stride + 1
12 | 
13 |     i0 = np.repeat(np.arange(field_height), field_width)
14 |     i0 = np.tile(i0, C)
15 |     i1 = stride * np.repeat(np.arange(out_height), out_width)
16 |     j0 = np.tile(np.arange(field_width), field_height * C)
17 |     j1 = stride * np.tile(np.arange(out_width), out_height)
18 |     i = i0.reshape(-1, 1) + i1.reshape(1, -1)
19 |     j = j0.reshape(-1, 1) + j1.reshape(1, -1)
20 | 
21 |     k = np.repeat(np.arange(C), field_height * field_width).reshape(-1, 1)
22 | 
23 |     return (k, i, j)
24 | 
25 | 
26 | def im2col_indices(x, field_height, field_width, padding=1, stride=1):
27 |     """ An implementation of im2col based on some fancy indexing """
28 |     # Zero-pad the input
29 |     p = padding
30 |     x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant')
31 | 
32 |     k, i, j = get_im2col_indices(x.shape, field_height, field_width, padding,
33 |                                  stride)
34 | 
35 |     cols = x_padded[:, k, i, j]
36 |     C = x.shape[1]
37 |     cols = cols.transpose(1, 2, 0).reshape(field_height * field_width * C, -1)
38 |     return cols
39 | 
40 | 
41 | def col2im_indices(cols, x_shape, field_height=3, field_width=3, padding=1,
42 |                    stride=1):
43 |     """ An implementation of col2im based on fancy indexing and np.add.at """
44 |     N, C, H, W = x_shape
45 |     H_padded, W_padded = H + 2 * padding, W + 2 * padding
46 |     x_padded = np.zeros((N, C, H_padded, W_padded), dtype=cols.dtype)
47 |     k, i, j = get_im2col_indices(x_shape, field_height, field_width, padding,
48 |                                  stride)
49 |     cols_reshaped = cols.reshape(C * field_height * field_width, -1, N)
50 |     cols_reshaped = cols_reshaped.transpose(2, 0, 1)
51 |     np.add.at(x_padded, (slice(None), k, i, j), cols_reshaped)
52 |     if padding == 0:
53 |         return x_padded
54 |     return x_padded[:, :, padding:-padding, padding:-padding]
55 | 
56 | pass
57 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/im2col.py:
--------------------------------------------------------------------------------
 1 | from builtins import range
 2 | import numpy as np
 3 | 
 4 | 
 5 | def get_im2col_indices(x_shape, field_height, field_width, padding=1, stride=1):
 6 |     # First figure out what the size of the output should be
 7 |     N, C, H, W = x_shape
 8 |     assert (H + 2 * padding - field_height) % stride == 0
 9 |     assert (W + 2 * padding - field_height) % stride == 0
10 |     out_height = (H + 2 * padding - field_height) / stride + 1
11 |     out_width = (W + 2 * padding - field_width) / stride + 1
12 | 
13 |     i0 = np.repeat(np.arange(field_height), field_width)
14 |     i0 = np.tile(i0, C)
15 |     i1 = stride * np.repeat(np.arange(out_height), out_width)
16 |     j0 = np.tile(np.arange(field_width), field_height * C)
17 |     j1 = stride * np.tile(np.arange(out_width), out_height)
18 |     i = i0.reshape(-1, 1) + i1.reshape(1, -1)
19 |     j = j0.reshape(-1, 1) + j1.reshape(1, -1)
20 | 
21 |     k = np.repeat(np.arange(C), field_height * field_width).reshape(-1, 1)
22 | 
23 |     return (k, i, j)
24 | 
25 | 
26 | def im2col_indices(x, field_height, field_width, padding=1, stride=1):
27 |     """ An implementation of im2col based on some fancy indexing """
28 |     # Zero-pad the input
29 |     p = padding
30 |     x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant')
31 | 
32 |     k, i, j = get_im2col_indices(x.shape, field_height, field_width, padding,
33 |                                  stride)
34 | 
35 |     cols = x_padded[:, k, i, j]
36 |     C = x.shape[1]
37 |     cols = cols.transpose(1, 2, 0).reshape(field_height * field_width * C, -1)
38 |     return cols
39 | 
40 | 
41 | def col2im_indices(cols, x_shape, field_height=3, field_width=3, padding=1,
42 |                    stride=1):
43 |     """ An implementation of col2im based on fancy indexing and np.add.at """
44 |     N, C, H, W = x_shape
45 |     H_padded, W_padded = H + 2 * padding, W + 2 * padding
46 |     x_padded = np.zeros((N, C, H_padded, W_padded), dtype=cols.dtype)
47 |     k, i, j = get_im2col_indices(x_shape, field_height, field_width, padding,
48 |                                  stride)
49 |     cols_reshaped = cols.reshape(C * field_height * field_width, -1, N)
50 |     cols_reshaped = cols_reshaped.transpose(2, 0, 1)
51 |     np.add.at(x_padded, (slice(None), k, i, j), cols_reshaped)
52 |     if padding == 0:
53 |         return x_padded
54 |     return x_padded[:, :, padding:-padding, padding:-padding]
55 | 
56 | pass
57 | 


--------------------------------------------------------------------------------
/assignment2/cs231n/vis_utils.py:
--------------------------------------------------------------------------------
 1 | from builtins import range
 2 | from past.builtins import xrange
 3 | 
 4 | from math import sqrt, ceil
 5 | import numpy as np
 6 | 
 7 | def visualize_grid(Xs, ubound=255.0, padding=1):
 8 |     """
 9 |     Reshape a 4D tensor of image data to a grid for easy visualization.
10 | 
11 |     Inputs:
12 |     - Xs: Data of shape (N, H, W, C)
13 |     - ubound: Output grid will have values scaled to the range [0, ubound]
14 |     - padding: The number of blank pixels between elements of the grid
15 |     """
16 |     (N, H, W, C) = Xs.shape
17 |     grid_size = int(ceil(sqrt(N)))
18 |     grid_height = H * grid_size + padding * (grid_size - 1)
19 |     grid_width = W * grid_size + padding * (grid_size - 1)
20 |     grid = np.zeros((grid_height, grid_width, C))
21 |     next_idx = 0
22 |     y0, y1 = 0, H
23 |     for y in range(grid_size):
24 |         x0, x1 = 0, W
25 |         for x in range(grid_size):
26 |             if next_idx < N:
27 |                 img = Xs[next_idx]
28 |                 low, high = np.min(img), np.max(img)
29 |                 grid[y0:y1, x0:x1] = ubound * (img - low) / (high - low)
30 |                 # grid[y0:y1, x0:x1] = Xs[next_idx]
31 |                 next_idx += 1
32 |             x0 += W + padding
33 |             x1 += W + padding
34 |         y0 += H + padding
35 |         y1 += H + padding
36 |     # grid_max = np.max(grid)
37 |     # grid_min = np.min(grid)
38 |     # grid = ubound * (grid - grid_min) / (grid_max - grid_min)
39 |     return grid
40 | 
41 | def vis_grid(Xs):
42 |     """ visualize a grid of images """
43 |     (N, H, W, C) = Xs.shape
44 |     A = int(ceil(sqrt(N)))
45 |     G = np.ones((A*H+A, A*W+A, C), Xs.dtype)
46 |     G *= np.min(Xs)
47 |     n = 0
48 |     for y in range(A):
49 |         for x in range(A):
50 |             if n < N:
51 |                 G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = Xs[n,:,:,:]
52 |                 n += 1
53 |     # normalize to [0,1]
54 |     maxg = G.max()
55 |     ming = G.min()
56 |     G = (G - ming)/(maxg-ming)
57 |     return G
58 | 
59 | def vis_nn(rows):
60 |     """ visualize array of arrays of images """
61 |     N = len(rows)
62 |     D = len(rows[0])
63 |     H,W,C = rows[0][0].shape
64 |     Xs = rows[0][0]
65 |     G = np.ones((N*H+N, D*W+D, C), Xs.dtype)
66 |     for y in range(N):
67 |         for x in range(D):
68 |             G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = rows[y][x]
69 |     # normalize to [0,1]
70 |     maxg = G.max()
71 |     ming = G.min()
72 |     G = (G - ming)/(maxg-ming)
73 |     return G
74 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/image_utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from future import standard_library
 3 | standard_library.install_aliases()
 4 | from builtins import range
 5 | import urllib.request, urllib.error, urllib.parse, os, tempfile
 6 | 
 7 | import numpy as np
 8 | from scipy.misc import imread, imresize
 9 | 
10 | """
11 | Utility functions used for viewing and processing images.
12 | """
13 | 
14 | def blur_image(X):
15 |     """
16 |     A very gentle image blurring operation, to be used as a regularizer for
17 |     image generation.
18 | 
19 |     Inputs:
20 |     - X: Image data of shape (N, 3, H, W)
21 | 
22 |     Returns:
23 |     - X_blur: Blurred version of X, of shape (N, 3, H, W)
24 |     """
25 |     from cs231n.fast_layers import conv_forward_fast
26 |     w_blur = np.zeros((3, 3, 3, 3))
27 |     b_blur = np.zeros(3)
28 |     blur_param = {'stride': 1, 'pad': 1}
29 |     for i in range(3):
30 |         w_blur[i, i] = np.asarray([[1, 2, 1], [2, 188, 2], [1, 2, 1]],
31 |                                   dtype=np.float32)
32 |     w_blur /= 200.0
33 |     return conv_forward_fast(X, w_blur, b_blur, blur_param)[0]
34 | 
35 | 
36 | SQUEEZENET_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32)
37 | SQUEEZENET_STD = np.array([0.229, 0.224, 0.225], dtype=np.float32)
38 | 
39 | def preprocess_image(img):
40 |     """Preprocess an image for squeezenet.
41 |     
42 |     Subtracts the pixel mean and divides by the standard deviation.
43 |     """
44 |     return (img.astype(np.float32)/255.0 - SQUEEZENET_MEAN) / SQUEEZENET_STD
45 | 
46 | 
47 | def deprocess_image(img, rescale=False):
48 |     """Undo preprocessing on an image and convert back to uint8."""
49 |     img = (img * SQUEEZENET_STD + SQUEEZENET_MEAN)
50 |     if rescale:
51 |         vmin, vmax = img.min(), img.max()
52 |         img = (img - vmin) / (vmax - vmin)
53 |     return np.clip(255 * img, 0.0, 255.0).astype(np.uint8)
54 | 
55 | 
56 | def image_from_url(url):
57 |     """
58 |     Read an image from a URL. Returns a numpy array with the pixel data.
59 |     We write the image to a temporary file then read it back. Kinda gross.
60 |     """
61 |     try:
62 |         f = urllib.request.urlopen(url)
63 |         _, fname = tempfile.mkstemp()
64 |         with open(fname, 'wb') as ff:
65 |             ff.write(f.read())
66 |         img = imread(fname)
67 |         os.remove(fname)
68 |         return img
69 |     except urllib.error.URLError as e:
70 |         print('URL Error: ', e.reason, url)
71 |     except urllib.error.HTTPError as e:
72 |         print('HTTP Error: ', e.code, url)
73 | 
74 | 
75 | def load_image(filename, size=None):
76 |     """Load and resize an image from disk.
77 | 
78 |     Inputs:
79 |     - filename: path to file
80 |     - size: size of shortest dimension after rescaling
81 |     """
82 |     img = imread(filename)
83 |     if size is not None:
84 |         orig_shape = np.array(img.shape[:2])
85 |         min_idx = np.argmin(orig_shape)
86 |         scale_factor = float(size) / orig_shape[min_idx]
87 |         new_shape = (orig_shape * scale_factor).astype(int)
88 |         img = imresize(img, scale_factor)
89 |     return img
90 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/optim.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | """
 4 | This file implements various first-order update rules that are commonly used for
 5 | training neural networks. Each update rule accepts current weights and the
 6 | gradient of the loss with respect to those weights and produces the next set of
 7 | weights. Each update rule has the same interface:
 8 | 
 9 | def update(w, dw, config=None):
10 | 
11 | Inputs:
12 |   - w: A numpy array giving the current weights.
13 |   - dw: A numpy array of the same shape as w giving the gradient of the
14 |     loss with respect to w.
15 |   - config: A dictionary containing hyperparameter values such as learning rate,
16 |     momentum, etc. If the update rule requires caching values over many
17 |     iterations, then config will also hold these cached values.
18 | 
19 | Returns:
20 |   - next_w: The next point after the update.
21 |   - config: The config dictionary to be passed to the next iteration of the
22 |     update rule.
23 | 
24 | NOTE: For most update rules, the default learning rate will probably not perform
25 | well; however the default values of the other hyperparameters should work well
26 | for a variety of different problems.
27 | 
28 | For efficiency, update rules may perform in-place updates, mutating w and
29 | setting next_w equal to w.
30 | """
31 | 
32 | 
33 | def sgd(w, dw, config=None):
34 |     """
35 |     Performs vanilla stochastic gradient descent.
36 | 
37 |     config format:
38 |     - learning_rate: Scalar learning rate.
39 |     """
40 |     if config is None: config = {}
41 |     config.setdefault('learning_rate', 1e-2)
42 | 
43 |     w -= config['learning_rate'] * dw
44 |     return w, config
45 | 
46 | 
47 | def adam(x, dx, config=None):
48 |     """
49 |     Uses the Adam update rule, which incorporates moving averages of both the
50 |     gradient and its square and a bias correction term.
51 | 
52 |     config format:
53 |     - learning_rate: Scalar learning rate.
54 |     - beta1: Decay rate for moving average of first moment of gradient.
55 |     - beta2: Decay rate for moving average of second moment of gradient.
56 |     - epsilon: Small scalar used for smoothing to avoid dividing by zero.
57 |     - m: Moving average of gradient.
58 |     - v: Moving average of squared gradient.
59 |     - t: Iteration number.
60 |     """
61 |     if config is None: config = {}
62 |     config.setdefault('learning_rate', 1e-3)
63 |     config.setdefault('beta1', 0.9)
64 |     config.setdefault('beta2', 0.999)
65 |     config.setdefault('epsilon', 1e-8)
66 |     config.setdefault('m', np.zeros_like(x))
67 |     config.setdefault('v', np.zeros_like(x))
68 |     config.setdefault('t', 0)
69 | 
70 |     next_x = None
71 |     beta1, beta2, eps = config['beta1'], config['beta2'], config['epsilon']
72 |     t, m, v = config['t'], config['m'], config['v']
73 |     m = beta1 * m + (1 - beta1) * dx
74 |     v = beta2 * v + (1 - beta2) * (dx * dx)
75 |     t += 1
76 |     alpha = config['learning_rate'] * np.sqrt(1 - beta2 ** t) / (1 - beta1 ** t)
77 |     x -= alpha * (m / (np.sqrt(v) + eps))
78 |     config['t'] = t
79 |     config['m'] = m
80 |     config['v'] = v
81 |     next_x = x
82 | 
83 |     return next_x, config
84 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/coco_utils.py:
--------------------------------------------------------------------------------
 1 | from builtins import range
 2 | import os, json
 3 | import numpy as np
 4 | import h5py
 5 | 
 6 | BASE_DIR = 'cs231n/datasets/coco_captioning'
 7 | 
 8 | def load_coco_data(base_dir=BASE_DIR,
 9 |                    max_train=None,
10 |                    pca_features=True):
11 |     data = {}
12 |     caption_file = os.path.join(base_dir, 'coco2014_captions.h5')
13 |     with h5py.File(caption_file, 'r') as f:
14 |         for k, v in f.items():
15 |             data[k] = np.asarray(v)
16 | 
17 |     if pca_features:
18 |         train_feat_file = os.path.join(base_dir, 'train2014_vgg16_fc7_pca.h5')
19 |     else:
20 |         train_feat_file = os.path.join(base_dir, 'train2014_vgg16_fc7.h5')
21 |     with h5py.File(train_feat_file, 'r') as f:
22 |         data['train_features'] = np.asarray(f['features'])
23 | 
24 |     if pca_features:
25 |         val_feat_file = os.path.join(base_dir, 'val2014_vgg16_fc7_pca.h5')
26 |     else:
27 |         val_feat_file = os.path.join(base_dir, 'val2014_vgg16_fc7.h5')
28 |     with h5py.File(val_feat_file, 'r') as f:
29 |         data['val_features'] = np.asarray(f['features'])
30 | 
31 |     dict_file = os.path.join(base_dir, 'coco2014_vocab.json')
32 |     with open(dict_file, 'r') as f:
33 |         dict_data = json.load(f)
34 |         for k, v in dict_data.items():
35 |             data[k] = v
36 | 
37 |     train_url_file = os.path.join(base_dir, 'train2014_urls.txt')
38 |     with open(train_url_file, 'r') as f:
39 |         train_urls = np.asarray([line.strip() for line in f])
40 |     data['train_urls'] = train_urls
41 | 
42 |     val_url_file = os.path.join(base_dir, 'val2014_urls.txt')
43 |     with open(val_url_file, 'r') as f:
44 |         val_urls = np.asarray([line.strip() for line in f])
45 |     data['val_urls'] = val_urls
46 | 
47 |     # Maybe subsample the training data
48 |     if max_train is not None:
49 |         num_train = data['train_captions'].shape[0]
50 |         mask = np.random.randint(num_train, size=max_train)
51 |         data['train_captions'] = data['train_captions'][mask]
52 |         data['train_image_idxs'] = data['train_image_idxs'][mask]
53 | 
54 |     return data
55 | 
56 | 
57 | def decode_captions(captions, idx_to_word):
58 |     singleton = False
59 |     if captions.ndim == 1:
60 |         singleton = True
61 |         captions = captions[None]
62 |     decoded = []
63 |     N, T = captions.shape
64 |     for i in range(N):
65 |         words = []
66 |         for t in range(T):
67 |             word = idx_to_word[captions[i, t]]
68 |             if word != '<NULL>':
69 |                 words.append(word)
70 |             if word == '<END>':
71 |                 break
72 |         decoded.append(' '.join(words))
73 |     if singleton:
74 |         decoded = decoded[0]
75 |     return decoded
76 | 
77 | 
78 | def sample_coco_minibatch(data, batch_size=100, split='train'):
79 |     split_size = data['%s_captions' % split].shape[0]
80 |     mask = np.random.choice(split_size, batch_size)
81 |     captions = data['%s_captions' % split][mask]
82 |     image_idxs = data['%s_image_idxs' % split][mask]
83 |     image_features = data['%s_features' % split][image_idxs]
84 |     urls = data['%s_urls' % split][image_idxs]
85 |     return captions, image_features, urls
86 | 


--------------------------------------------------------------------------------
/assignment2/cs231n/layer_utils.py:
--------------------------------------------------------------------------------
  1 | pass
  2 | from cs231n.layers import *
  3 | from cs231n.fast_layers import *
  4 | 
  5 | 
  6 | def affine_relu_forward(x, w, b):
  7 |     """
  8 |     Convenience layer that perorms an affine transform followed by a ReLU
  9 | 
 10 |     Inputs:
 11 |     - x: Input to the affine layer
 12 |     - w, b: Weights for the affine layer
 13 | 
 14 |     Returns a tuple of:
 15 |     - out: Output from the ReLU
 16 |     - cache: Object to give to the backward pass
 17 |     """
 18 |     a, fc_cache = affine_forward(x, w, b)
 19 |     out, relu_cache = relu_forward(a)
 20 |     cache = (fc_cache, relu_cache)
 21 |     return out, cache
 22 | 
 23 | 
 24 | def affine_relu_backward(dout, cache):
 25 |     """
 26 |     Backward pass for the affine-relu convenience layer
 27 |     """
 28 |     fc_cache, relu_cache = cache
 29 |     da = relu_backward(dout, relu_cache)
 30 |     dx, dw, db = affine_backward(da, fc_cache)
 31 |     return dx, dw, db
 32 | 
 33 | 
 34 | def conv_relu_forward(x, w, b, conv_param):
 35 |     """
 36 |     A convenience layer that performs a convolution followed by a ReLU.
 37 | 
 38 |     Inputs:
 39 |     - x: Input to the convolutional layer
 40 |     - w, b, conv_param: Weights and parameters for the convolutional layer
 41 | 
 42 |     Returns a tuple of:
 43 |     - out: Output from the ReLU
 44 |     - cache: Object to give to the backward pass
 45 |     """
 46 |     a, conv_cache = conv_forward_fast(x, w, b, conv_param)
 47 |     out, relu_cache = relu_forward(a)
 48 |     cache = (conv_cache, relu_cache)
 49 |     return out, cache
 50 | 
 51 | 
 52 | def conv_relu_backward(dout, cache):
 53 |     """
 54 |     Backward pass for the conv-relu convenience layer.
 55 |     """
 56 |     conv_cache, relu_cache = cache
 57 |     da = relu_backward(dout, relu_cache)
 58 |     dx, dw, db = conv_backward_fast(da, conv_cache)
 59 |     return dx, dw, db
 60 | 
 61 | 
 62 | def conv_bn_relu_forward(x, w, b, gamma, beta, conv_param, bn_param):
 63 |     a, conv_cache = conv_forward_fast(x, w, b, conv_param)
 64 |     an, bn_cache = spatial_batchnorm_forward(a, gamma, beta, bn_param)
 65 |     out, relu_cache = relu_forward(an)
 66 |     cache = (conv_cache, bn_cache, relu_cache)
 67 |     return out, cache
 68 | 
 69 | 
 70 | def conv_bn_relu_backward(dout, cache):
 71 |     conv_cache, bn_cache, relu_cache = cache
 72 |     dan = relu_backward(dout, relu_cache)
 73 |     da, dgamma, dbeta = spatial_batchnorm_backward(dan, bn_cache)
 74 |     dx, dw, db = conv_backward_fast(da, conv_cache)
 75 |     return dx, dw, db, dgamma, dbeta
 76 | 
 77 | 
 78 | def conv_relu_pool_forward(x, w, b, conv_param, pool_param):
 79 |     """
 80 |     Convenience layer that performs a convolution, a ReLU, and a pool.
 81 | 
 82 |     Inputs:
 83 |     - x: Input to the convolutional layer
 84 |     - w, b, conv_param: Weights and parameters for the convolutional layer
 85 |     - pool_param: Parameters for the pooling layer
 86 | 
 87 |     Returns a tuple of:
 88 |     - out: Output from the pooling layer
 89 |     - cache: Object to give to the backward pass
 90 |     """
 91 |     a, conv_cache = conv_forward_fast(x, w, b, conv_param)
 92 |     s, relu_cache = relu_forward(a)
 93 |     out, pool_cache = max_pool_forward_fast(s, pool_param)
 94 |     cache = (conv_cache, relu_cache, pool_cache)
 95 |     return out, cache
 96 | 
 97 | 
 98 | def conv_relu_pool_backward(dout, cache):
 99 |     """
100 |     Backward pass for the conv-relu-pool convenience layer
101 |     """
102 |     conv_cache, relu_cache, pool_cache = cache
103 |     ds = max_pool_backward_fast(dout, pool_cache)
104 |     da = relu_backward(ds, relu_cache)
105 |     dx, dw, db = conv_backward_fast(da, conv_cache)
106 |     return dx, dw, db
107 | 


--------------------------------------------------------------------------------
/assignment1/cs231n/gradient_check.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | #from past.builtins import xrange
  3 | 
  4 | import numpy as np
  5 | from random import randrange
  6 | 
  7 | def eval_numerical_gradient(f, x, verbose=True, h=0.00001):
  8 |   """ 
  9 |   a naive implementation of numerical gradient of f at x 
 10 |   - f should be a function that takes a single argument
 11 |   - x is the point (numpy array) to evaluate the gradient at
 12 |   """ 
 13 | 
 14 |   fx = f(x) # evaluate function value at original point
 15 |   grad = np.zeros_like(x)
 16 |   # iterate over all indexes in x
 17 |   it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
 18 |   while not it.finished:
 19 | 
 20 |     # evaluate function at x+h
 21 |     ix = it.multi_index
 22 |     oldval = x[ix]
 23 |     x[ix] = oldval + h # increment by h
 24 |     fxph = f(x) # evalute f(x + h)
 25 |     x[ix] = oldval - h
 26 |     fxmh = f(x) # evaluate f(x - h)
 27 |     x[ix] = oldval # restore
 28 | 
 29 |     # compute the partial derivative with centered formula
 30 |     grad[ix] = (fxph - fxmh) / (2 * h) # the slope
 31 |     if verbose:
 32 |       print(ix, grad[ix])
 33 |     it.iternext() # step to next dimension
 34 | 
 35 |   return grad
 36 | 
 37 | 
 38 | def eval_numerical_gradient_array(f, x, df, h=1e-5):
 39 |   """
 40 |   Evaluate a numeric gradient for a function that accepts a numpy
 41 |   array and returns a numpy array.
 42 |   """
 43 |   grad = np.zeros_like(x)
 44 |   it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
 45 |   while not it.finished:
 46 |     ix = it.multi_index
 47 |     
 48 |     oldval = x[ix]
 49 |     x[ix] = oldval + h
 50 |     pos = f(x).copy()
 51 |     x[ix] = oldval - h
 52 |     neg = f(x).copy()
 53 |     x[ix] = oldval
 54 |     
 55 |     grad[ix] = np.sum((pos - neg) * df) / (2 * h)
 56 |     it.iternext()
 57 |   return grad
 58 | 
 59 | 
 60 | def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5):
 61 |   """
 62 |   Compute numeric gradients for a function that operates on input
 63 |   and output blobs.
 64 |   
 65 |   We assume that f accepts several input blobs as arguments, followed by a blob
 66 |   into which outputs will be written. For example, f might be called like this:
 67 | 
 68 |   f(x, w, out)
 69 |   
 70 |   where x and w are input Blobs, and the result of f will be written to out.
 71 | 
 72 |   Inputs: 
 73 |   - f: function
 74 |   - inputs: tuple of input blobs
 75 |   - output: output blob
 76 |   - h: step size
 77 |   """
 78 |   numeric_diffs = []
 79 |   for input_blob in inputs:
 80 |     diff = np.zeros_like(input_blob.diffs)
 81 |     it = np.nditer(input_blob.vals, flags=['multi_index'],
 82 |                    op_flags=['readwrite'])
 83 |     while not it.finished:
 84 |       idx = it.multi_index
 85 |       orig = input_blob.vals[idx]
 86 | 
 87 |       input_blob.vals[idx] = orig + h
 88 |       f(*(inputs + (output,)))
 89 |       pos = np.copy(output.vals)
 90 |       input_blob.vals[idx] = orig - h
 91 |       f(*(inputs + (output,)))
 92 |       neg = np.copy(output.vals)
 93 |       input_blob.vals[idx] = orig
 94 |       
 95 |       diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h)
 96 | 
 97 |       it.iternext()
 98 |     numeric_diffs.append(diff)
 99 |   return numeric_diffs
100 | 
101 | 
102 | def eval_numerical_gradient_net(net, inputs, output, h=1e-5):
103 |   return eval_numerical_gradient_blobs(lambda *args: net.forward(),
104 |               inputs, output, h=h)
105 | 
106 | 
107 | def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5):
108 |   """
109 |   sample a few random elements and only return numerical
110 |   in this dimensions.
111 |   """
112 | 
113 |   for i in xrange(num_checks):
114 |     ix = tuple([randrange(m) for m in x.shape])
115 | 
116 |     oldval = x[ix]
117 |     x[ix] = oldval + h # increment by h
118 |     fxph = f(x) # evaluate f(x + h)
119 |     x[ix] = oldval - h # increment by h
120 |     fxmh = f(x) # evaluate f(x - h)
121 |     x[ix] = oldval # reset
122 | 
123 |     grad_numerical = (fxph - fxmh) / (2 * h)
124 |     grad_analytic = analytic_grad[ix]
125 |     rel_error = abs(grad_numerical - grad_analytic) / (abs(grad_numerical) + abs(grad_analytic))
126 |     print('numerical: %f analytic: %f, relative error: %e' % (grad_numerical, grad_analytic, rel_error))
127 | 
128 | 


--------------------------------------------------------------------------------
/assignment2/cs231n/gradient_check.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | from builtins import range
  3 | from past.builtins import xrange
  4 | 
  5 | import numpy as np
  6 | from random import randrange
  7 | 
  8 | def eval_numerical_gradient(f, x, verbose=True, h=0.00001):
  9 |     """
 10 |     a naive implementation of numerical gradient of f at x
 11 |     - f should be a function that takes a single argument
 12 |     - x is the point (numpy array) to evaluate the gradient at
 13 |     """
 14 | 
 15 |     fx = f(x) # evaluate function value at original point
 16 |     grad = np.zeros_like(x)
 17 |     # iterate over all indexes in x
 18 |     it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
 19 |     while not it.finished:
 20 | 
 21 |         # evaluate function at x+h
 22 |         ix = it.multi_index
 23 |         oldval = x[ix]
 24 |         x[ix] = oldval + h # increment by h
 25 |         fxph = f(x) # evalute f(x + h)
 26 |         x[ix] = oldval - h
 27 |         fxmh = f(x) # evaluate f(x - h)
 28 |         x[ix] = oldval # restore
 29 | 
 30 |         # compute the partial derivative with centered formula
 31 |         grad[ix] = (fxph - fxmh) / (2 * h) # the slope
 32 |         if verbose:
 33 |             print(ix, grad[ix])
 34 |         it.iternext() # step to next dimension
 35 | 
 36 |     return grad
 37 | 
 38 | 
 39 | def eval_numerical_gradient_array(f, x, df, h=1e-5):
 40 |     """
 41 |     Evaluate a numeric gradient for a function that accepts a numpy
 42 |     array and returns a numpy array.
 43 |     """
 44 |     grad = np.zeros_like(x)
 45 |     it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
 46 |     while not it.finished:
 47 |         ix = it.multi_index
 48 | 
 49 |         oldval = x[ix]
 50 |         x[ix] = oldval + h
 51 |         pos = f(x).copy()
 52 |         x[ix] = oldval - h
 53 |         neg = f(x).copy()
 54 |         x[ix] = oldval
 55 | 
 56 |         grad[ix] = np.sum((pos - neg) * df) / (2 * h)
 57 |         it.iternext()
 58 |     return grad
 59 | 
 60 | 
 61 | def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5):
 62 |     """
 63 |     Compute numeric gradients for a function that operates on input
 64 |     and output blobs.
 65 | 
 66 |     We assume that f accepts several input blobs as arguments, followed by a
 67 |     blob where outputs will be written. For example, f might be called like:
 68 | 
 69 |     f(x, w, out)
 70 | 
 71 |     where x and w are input Blobs, and the result of f will be written to out.
 72 | 
 73 |     Inputs:
 74 |     - f: function
 75 |     - inputs: tuple of input blobs
 76 |     - output: output blob
 77 |     - h: step size
 78 |     """
 79 |     numeric_diffs = []
 80 |     for input_blob in inputs:
 81 |         diff = np.zeros_like(input_blob.diffs)
 82 |         it = np.nditer(input_blob.vals, flags=['multi_index'],
 83 |                        op_flags=['readwrite'])
 84 |         while not it.finished:
 85 |             idx = it.multi_index
 86 |             orig = input_blob.vals[idx]
 87 | 
 88 |             input_blob.vals[idx] = orig + h
 89 |             f(*(inputs + (output,)))
 90 |             pos = np.copy(output.vals)
 91 |             input_blob.vals[idx] = orig - h
 92 |             f(*(inputs + (output,)))
 93 |             neg = np.copy(output.vals)
 94 |             input_blob.vals[idx] = orig
 95 | 
 96 |             diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h)
 97 | 
 98 |             it.iternext()
 99 |         numeric_diffs.append(diff)
100 |     return numeric_diffs
101 | 
102 | 
103 | def eval_numerical_gradient_net(net, inputs, output, h=1e-5):
104 |     return eval_numerical_gradient_blobs(lambda *args: net.forward(),
105 |                 inputs, output, h=h)
106 | 
107 | 
108 | def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5):
109 |     """
110 |     sample a few random elements and only return numerical
111 |     in this dimensions.
112 |     """
113 | 
114 |     for i in range(num_checks):
115 |         ix = tuple([randrange(m) for m in x.shape])
116 | 
117 |         oldval = x[ix]
118 |         x[ix] = oldval + h # increment by h
119 |         fxph = f(x) # evaluate f(x + h)
120 |         x[ix] = oldval - h # increment by h
121 |         fxmh = f(x) # evaluate f(x - h)
122 |         x[ix] = oldval # reset
123 | 
124 |         grad_numerical = (fxph - fxmh) / (2 * h)
125 |         grad_analytic = analytic_grad[ix]
126 |         rel_error = (abs(grad_numerical - grad_analytic) /
127 |                     (abs(grad_numerical) + abs(grad_analytic)))
128 |         print('numerical: %f analytic: %f, relative error: %e'
129 |               %(grad_numerical, grad_analytic, rel_error))
130 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/gradient_check.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | from builtins import range
  3 | from past.builtins import xrange
  4 | 
  5 | import numpy as np
  6 | from random import randrange
  7 | 
  8 | def eval_numerical_gradient(f, x, verbose=True, h=0.00001):
  9 |     """
 10 |     a naive implementation of numerical gradient of f at x
 11 |     - f should be a function that takes a single argument
 12 |     - x is the point (numpy array) to evaluate the gradient at
 13 |     """
 14 | 
 15 |     fx = f(x) # evaluate function value at original point
 16 |     grad = np.zeros_like(x)
 17 |     # iterate over all indexes in x
 18 |     it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
 19 |     while not it.finished:
 20 | 
 21 |         # evaluate function at x+h
 22 |         ix = it.multi_index
 23 |         oldval = x[ix]
 24 |         x[ix] = oldval + h # increment by h
 25 |         fxph = f(x) # evalute f(x + h)
 26 |         x[ix] = oldval - h
 27 |         fxmh = f(x) # evaluate f(x - h)
 28 |         x[ix] = oldval # restore
 29 | 
 30 |         # compute the partial derivative with centered formula
 31 |         grad[ix] = (fxph - fxmh) / (2 * h) # the slope
 32 |         if verbose:
 33 |             print(ix, grad[ix])
 34 |         it.iternext() # step to next dimension
 35 | 
 36 |     return grad
 37 | 
 38 | 
 39 | def eval_numerical_gradient_array(f, x, df, h=1e-5):
 40 |     """
 41 |     Evaluate a numeric gradient for a function that accepts a numpy
 42 |     array and returns a numpy array.
 43 |     """
 44 |     grad = np.zeros_like(x)
 45 |     it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
 46 |     while not it.finished:
 47 |         ix = it.multi_index
 48 | 
 49 |         oldval = x[ix]
 50 |         x[ix] = oldval + h
 51 |         pos = f(x).copy()
 52 |         x[ix] = oldval - h
 53 |         neg = f(x).copy()
 54 |         x[ix] = oldval
 55 | 
 56 |         grad[ix] = np.sum((pos - neg) * df) / (2 * h)
 57 |         it.iternext()
 58 |     return grad
 59 | 
 60 | 
 61 | def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5):
 62 |     """
 63 |     Compute numeric gradients for a function that operates on input
 64 |     and output blobs.
 65 | 
 66 |     We assume that f accepts several input blobs as arguments, followed by a
 67 |     blob where outputs will be written. For example, f might be called like:
 68 | 
 69 |     f(x, w, out)
 70 | 
 71 |     where x and w are input Blobs, and the result of f will be written to out.
 72 | 
 73 |     Inputs:
 74 |     - f: function
 75 |     - inputs: tuple of input blobs
 76 |     - output: output blob
 77 |     - h: step size
 78 |     """
 79 |     numeric_diffs = []
 80 |     for input_blob in inputs:
 81 |         diff = np.zeros_like(input_blob.diffs)
 82 |         it = np.nditer(input_blob.vals, flags=['multi_index'],
 83 |                        op_flags=['readwrite'])
 84 |         while not it.finished:
 85 |             idx = it.multi_index
 86 |             orig = input_blob.vals[idx]
 87 | 
 88 |             input_blob.vals[idx] = orig + h
 89 |             f(*(inputs + (output,)))
 90 |             pos = np.copy(output.vals)
 91 |             input_blob.vals[idx] = orig - h
 92 |             f(*(inputs + (output,)))
 93 |             neg = np.copy(output.vals)
 94 |             input_blob.vals[idx] = orig
 95 | 
 96 |             diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h)
 97 | 
 98 |             it.iternext()
 99 |         numeric_diffs.append(diff)
100 |     return numeric_diffs
101 | 
102 | 
103 | def eval_numerical_gradient_net(net, inputs, output, h=1e-5):
104 |     return eval_numerical_gradient_blobs(lambda *args: net.forward(),
105 |                 inputs, output, h=h)
106 | 
107 | 
108 | def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5):
109 |     """
110 |     sample a few random elements and only return numerical
111 |     in this dimensions.
112 |     """
113 | 
114 |     for i in range(num_checks):
115 |         ix = tuple([randrange(m) for m in x.shape])
116 | 
117 |         oldval = x[ix]
118 |         x[ix] = oldval + h # increment by h
119 |         fxph = f(x) # evaluate f(x + h)
120 |         x[ix] = oldval - h # increment by h
121 |         fxmh = f(x) # evaluate f(x - h)
122 |         x[ix] = oldval # reset
123 | 
124 |         grad_numerical = (fxph - fxmh) / (2 * h)
125 |         grad_analytic = analytic_grad[ix]
126 |         rel_error = (abs(grad_numerical - grad_analytic) /
127 |                     (abs(grad_numerical) + abs(grad_analytic)))
128 |         print('numerical: %f analytic: %f, relative error: %e'
129 |               %(grad_numerical, grad_analytic, rel_error))
130 | 


--------------------------------------------------------------------------------
/assignment1/cs231n/classifiers/softmax.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from random import shuffle
  3 | #from past.builtins import xrange
  4 | 
  5 | def softmax_loss_naive(W, X, y, reg):
  6 |   """
  7 |   Softmax loss function, naive implementation (with loops)
  8 | 
  9 |   Inputs have dimension D, there are C classes, and we operate on minibatches
 10 |   of N examples.
 11 | 
 12 |   Inputs:
 13 |   - W: A numpy array of shape (D, C) containing weights.
 14 |   - X: A numpy array of shape (N, D) containing a minibatch of data.
 15 |   - y: A numpy array of shape (N,) containing training labels; y[i] = c means
 16 | that X[i] has label c, where 0 <= c < C.
 17 |   - reg: (float) regularization strength
 18 | 
 19 |   Returns a tuple of:
 20 |   - loss as single float
 21 |   - gradient with respect to weights W; an array of same shape as W
 22 |   """
 23 |   # Initialize the loss and gradient to zero.
 24 |   loss = 0.0
 25 |   dW = np.zeros_like(W)
 26 | 
 27 |   #############################################################################
 28 |   # TODO: Compute the softmax loss and its gradient using explicit loops.     #
 29 |   # Store the loss in loss and the gradient in dW. If you are not careful     #
 30 |   # here, it is easy to run into numeric instability. Don't forget the        #
 31 |   # regularization!                                                           #
 32 |   #############################################################################
 33 |   train_num = X.shape[0]
 34 |   num_classes = np.max(y) + 1
 35 |   for i in xrange(train_num):
 36 |       y_pred = X[i,:].dot(W)
 37 |       y_pred = y_pred - np.max(y_pred)
 38 |       y_label = y[i]      
 39 |       prob = np.exp(y_pred[y_label]) / np.sum(np.exp(y_pred))
 40 |       loss += -1*np.log(prob)
 41 |       for j in xrange(num_classes):
 42 |           if j != y[i]:
 43 |               dW[:,j] += np.exp(y_pred[j])/np.sum(np.exp(y_pred)) * X[i]
 44 |           else:
 45 |               dW[:,y[i]] += (-1 + np.exp(y_pred[j])/np.sum(np.exp(y_pred))) * X[i]
 46 |               
 47 |   loss /= train_num
 48 |   dW /= train_num
 49 |   loss += 0.5*reg*np.sum(np.square(W))
 50 |   dW += reg*W
 51 |   
 52 |   #############################################################################
 53 |   #                          END OF YOUR CODE                                 #
 54 |   #############################################################################
 55 | 
 56 |   return loss, dW
 57 | 
 58 | 
 59 | def softmax_loss_vectorized(W, X, y, reg):
 60 |   """
 61 |   Softmax loss function, vectorized version.
 62 | 
 63 |   Inputs and outputs are the same as softmax_loss_naive.
 64 |   """
 65 |   # Initialize the loss and gradient to zero.
 66 |   loss = 0.0
 67 |   dW = np.zeros_like(W)
 68 | 
 69 |   #############################################################################
 70 |   # TODO: Compute the softmax loss and its gradient using no explicit loops.  #
 71 |   # Store the loss in loss and the gradient in dW. If you are not careful     #
 72 |   # here, it is easy to run into numeric instability. Don't forget the        #
 73 |   # regularization!                                                           #
 74 |   #############################################################################
 75 |   num_train , dims = X.shape
 76 |   
 77 |   scores = np.dot(X, W)# N by C
 78 |   scores -= np.max(scores,axis=1,keepdims=True) 
 79 |   expscores = np.exp(scores)
 80 |   p = expscores / np.sum(expscores,axis=1,keepdims=True)
 81 |   y_trueClass = np.zeros_like(p)
 82 |   y_trueClass[range(num_train),y] = 1.0
 83 |   
 84 |   loss = -1 * np.sum(y_trueClass*np.log(p)) / num_train + 0.5*reg*np.sum(np.square(W))
 85 | 
 86 |   
 87 |   dW = np.dot(X.transpose(), p - y_trueClass)
 88 |   dW /= num_train
 89 |   dW += reg * W
 90 | #  scores = X.dot(W)        # N by C
 91 | #  num_train = X.shape[0]
 92 | #  num_classes = W.shape[1]
 93 | #  scores_correct = scores[np.arange(num_train), y]   # 1 by N
 94 | #  scores_correct = np.reshape(scores_correct, (num_train, 1))  # N by 1
 95 | #  margins = scores - scores_correct + 1.0     # N by C
 96 | #  margins[np.arange(num_train), y] = 0.0
 97 | #  margins[margins <= 0] = 0.0
 98 | #  loss += np.sum(margins) / num_train
 99 | #  loss += 0.5 * reg * np.sum(W * W)
100 | #  # compute the gradient
101 | #  margins[margins > 0] = 1.0
102 | #  row_sum = np.sum(margins, axis=1)                  # 1 by N
103 | #  margins[np.arange(num_train), y] = -row_sum        
104 | #  dW += np.dot(X.T, margins)/num_train + reg * W     # D by C
105 |   #############################################################################
106 |   #                          END OF YOUR CODE                                 #
107 |   #############################################################################
108 | 
109 |   return loss, dW
110 | 
111 | 


--------------------------------------------------------------------------------
/assignment1/cs231n/classifiers/linear_svm.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from random import shuffle
  3 | #from past.builtins import xrange
  4 | 
  5 | def svm_loss_naive(W, X, y, reg):
  6 |   """
  7 |   Structured SVM loss function, naive implementation (with loops).
  8 | 
  9 |   Inputs have dimension D, there are C classes, and we operate on minibatches
 10 |   of N examples.
 11 | 
 12 |   Inputs:
 13 |   - W: A numpy array of shape (D, C) containing weights.
 14 |   - X: A numpy array of shape (N, D) containing a minibatch of data.
 15 |   - y: A numpy array of shape (N,) containing training labels; y[i] = c means
 16 |     that X[i] has label c, where 0 <= c < C.
 17 |   - reg: (float) regularization strength
 18 | 
 19 |   Returns a tuple of:
 20 |   - loss as single float
 21 |   - gradient with respect to weights W; an array of same shape as W
 22 |   """
 23 |   dW = np.zeros(W.shape) # initialize the gradient as zero
 24 | 
 25 |   # compute the loss and the gradient
 26 |   num_classes = W.shape[1]
 27 |   num_train = X.shape[0]
 28 |   loss = 0.0
 29 |   for i in xrange(num_train):
 30 |     scores = X[i].dot(W)
 31 |     correct_class_score = scores[y[i]]
 32 |     for j in xrange(num_classes):
 33 |       margin = scores[j] - correct_class_score + 1# note delta = 1
 34 |       if margin > 0:
 35 |         if j != y[i]:
 36 |           loss += margin
 37 |           dW[:, y[i]] += -1 * X[i]
 38 |           dW[:, j] += 1 * X[i]
 39 | 
 40 |   # Right now the loss is a sum over all training examples, but we want it
 41 |   # to be an average instead so we divide by num_train.
 42 |   loss /= num_train
 43 |   dW /= num_train
 44 | 
 45 |   # Add regularization to the loss.
 46 |   loss += reg * np.sum(W * W)
 47 |   dW += 2*reg*W
 48 |   #############################################################################
 49 |   # TODO:                                                                     #
 50 |   # Compute the gradient of the loss function and store it dW.                #
 51 |   # Rather that first computing the loss and then computing the derivative,   #
 52 |   # it may be simpler to compute the derivative at the same time that the     #
 53 |   # loss is being computed. As a result you may need to modify some of the    #
 54 |   # code above to compute the gradient.                                       #
 55 |   #############################################################################
 56 | 
 57 | 
 58 |   return loss, dW
 59 | 
 60 | 
 61 | def svm_loss_vectorized(W, X, y, reg):
 62 |   """
 63 |   Structured SVM loss function, vectorized implementation.
 64 | 
 65 |   Inputs and outputs are the same as svm_loss_naive.
 66 |   """
 67 |   loss = 0.0
 68 |   dW = np.zeros(W.shape) # initialize the gradient as zero
 69 | 
 70 |   #############################################################################
 71 |   # TODO:                                                                     #
 72 |   # Implement a vectorized version of the structured SVM loss, storing the    #
 73 |   # result in loss.                                                           #
 74 |   #############################################################################
 75 |   scores = np.dot(X,W)
 76 |   num_train = X.shape[0]
 77 |   rows = range(num_train)
 78 |   correct_class_score = scores[rows,y]
 79 |   margins = np.maximum(0,scores-np.reshape(correct_class_score,[num_train,1])+1)
 80 |   margins[rows,y] = 0
 81 |   loss = np.sum(margins)
 82 |   loss /= num_train
 83 |   loss += 0.5 * reg * np.sum(W * W)
 84 |   #############################################################################
 85 |   #                             END OF YOUR CODE                              #
 86 |   #############################################################################
 87 | 
 88 | 
 89 |   #############################################################################
 90 |   # TODO:                                                                     #
 91 |   # Implement a vectorized version of the gradient for the structured SVM     #
 92 |   # loss, storing the result in dW.                                           #
 93 |   #                                                                           #
 94 |   # Hint: Instead of computing the gradient from scratch, it may be easier    #
 95 |   # to reuse some of the intermediate values that you used to compute the     #
 96 |   # loss.                                                                     #
 97 |   #############################################################################
 98 |   margins01 = 1 * (margins > 0)
 99 |   margins01[rows,y] = -1*np.sum(margins01, axis=1)
100 |   dW = np.dot(X.transpose(), margins01)
101 |   dW /= num_train
102 |   dW += reg * W
103 |   
104 |   #############################################################################
105 |   #                             END OF YOUR CODE                              #
106 |   #############################################################################
107 | 
108 |   return loss, dW
109 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/layer_utils.py:
--------------------------------------------------------------------------------
  1 | from cs231n.layers import *
  2 | from cs231n.fast_layers import *
  3 | 
  4 | 
  5 | def affine_relu_forward(x, w, b):
  6 |     """
  7 |     Convenience layer that perorms an affine transform followed by a ReLU
  8 | 
  9 |     Inputs:
 10 |     - x: Input to the affine layer
 11 |     - w, b: Weights for the affine layer
 12 | 
 13 |     Returns a tuple of:
 14 |     - out: Output from the ReLU
 15 |     - cache: Object to give to the backward pass
 16 |     """
 17 |     a, fc_cache = affine_forward(x, w, b)
 18 |     out, relu_cache = relu_forward(a)
 19 |     cache = (fc_cache, relu_cache)
 20 |     return out, cache
 21 | 
 22 | 
 23 | def affine_relu_backward(dout, cache):
 24 |     """
 25 |     Backward pass for the affine-relu convenience layer
 26 |     """
 27 |     fc_cache, relu_cache = cache
 28 |     da = relu_backward(dout, relu_cache)
 29 |     dx, dw, db = affine_backward(da, fc_cache)
 30 |     return dx, dw, db
 31 | 
 32 | 
 33 | def affine_bn_relu_forward(x, w, b, gamma, beta, bn_param):
 34 |     """
 35 |     Convenience layer that performs an affine transform, batch normalization,
 36 |     and ReLU.
 37 | 
 38 |     Inputs:
 39 |     - x: Array of shape (N, D1); input to the affine layer
 40 |     - w, b: Arrays of shape (D2, D2) and (D2,) giving the weight and bias for
 41 |       the affine transform.
 42 |     - gamma, beta: Arrays of shape (D2,) and (D2,) giving scale and shift
 43 |       parameters for batch normalization.
 44 |     - bn_param: Dictionary of parameters for batch normalization.
 45 | 
 46 |     Returns:
 47 |     - out: Output from ReLU, of shape (N, D2)
 48 |     - cache: Object to give to the backward pass.
 49 |     """
 50 |     a, fc_cache = affine_forward(x, w, b)
 51 |     a_bn, bn_cache = batchnorm_forward(a, gamma, beta, bn_param)
 52 |     out, relu_cache = relu_forward(a_bn)
 53 |     cache = (fc_cache, bn_cache, relu_cache)
 54 |     return out, cache
 55 | 
 56 | 
 57 | def affine_bn_relu_backward(dout, cache):
 58 |     """
 59 |     Backward pass for the affine-batchnorm-relu convenience layer.
 60 |     """
 61 |     fc_cache, bn_cache, relu_cache = cache
 62 |     da_bn = relu_backward(dout, relu_cache)
 63 |     da, dgamma, dbeta = batchnorm_backward(da_bn, bn_cache)
 64 |     dx, dw, db = affine_backward(da, fc_cache)
 65 |     return dx, dw, db, dgamma, dbeta
 66 | 
 67 | 
 68 | def conv_relu_forward(x, w, b, conv_param):
 69 |     """
 70 |     A convenience layer that performs a convolution followed by a ReLU.
 71 | 
 72 |     Inputs:
 73 |     - x: Input to the convolutional layer
 74 |     - w, b, conv_param: Weights and parameters for the convolutional layer
 75 | 
 76 |     Returns a tuple of:
 77 |     - out: Output from the ReLU
 78 |     - cache: Object to give to the backward pass
 79 |     """
 80 |     a, conv_cache = conv_forward_fast(x, w, b, conv_param)
 81 |     out, relu_cache = relu_forward(a)
 82 |     cache = (conv_cache, relu_cache)
 83 |     return out, cache
 84 | 
 85 | 
 86 | def conv_relu_backward(dout, cache):
 87 |     """
 88 |     Backward pass for the conv-relu convenience layer.
 89 |     """
 90 |     conv_cache, relu_cache = cache
 91 |     da = relu_backward(dout, relu_cache)
 92 |     dx, dw, db = conv_backward_fast(da, conv_cache)
 93 |     return dx, dw, db
 94 | 
 95 | 
 96 | def conv_bn_relu_forward(x, w, b, gamma, beta, conv_param, bn_param):
 97 |     a, conv_cache = conv_forward_fast(x, w, b, conv_param)
 98 |     an, bn_cache = spatial_batchnorm_forward(a, gamma, beta, bn_param)
 99 |     out, relu_cache = relu_forward(an)
100 |     cache = (conv_cache, bn_cache, relu_cache)
101 |     return out, cache
102 | 
103 | 
104 | def conv_bn_relu_backward(dout, cache):
105 |     conv_cache, bn_cache, relu_cache = cache
106 |     dan = relu_backward(dout, relu_cache)
107 |     da, dgamma, dbeta = spatial_batchnorm_backward(dan, bn_cache)
108 |     dx, dw, db = conv_backward_fast(da, conv_cache)
109 |     return dx, dw, db, dgamma, dbeta
110 | 
111 | 
112 | def conv_relu_pool_forward(x, w, b, conv_param, pool_param):
113 |     """
114 |     Convenience layer that performs a convolution, a ReLU, and a pool.
115 | 
116 |     Inputs:
117 |     - x: Input to the convolutional layer
118 |     - w, b, conv_param: Weights and parameters for the convolutional layer
119 |     - pool_param: Parameters for the pooling layer
120 | 
121 |     Returns a tuple of:
122 |     - out: Output from the pooling layer
123 |     - cache: Object to give to the backward pass
124 |     """
125 |     a, conv_cache = conv_forward_fast(x, w, b, conv_param)
126 |     s, relu_cache = relu_forward(a)
127 |     out, pool_cache = max_pool_forward_fast(s, pool_param)
128 |     cache = (conv_cache, relu_cache, pool_cache)
129 |     return out, cache
130 | 
131 | 
132 | def conv_relu_pool_backward(dout, cache):
133 |     """
134 |     Backward pass for the conv-relu-pool convenience layer
135 |     """
136 |     conv_cache, relu_cache, pool_cache = cache
137 |     ds = max_pool_backward_fast(dout, pool_cache)
138 |     da = relu_backward(ds, relu_cache)
139 |     dx, dw, db = conv_backward_fast(da, conv_cache)
140 |     return dx, dw, db
141 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/classifiers/squeezenet.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | 
  3 | NUM_CLASSES = 1000
  4 | 
  5 | def fire_module(x,inp,sp,e11p,e33p):
  6 |     with tf.variable_scope("fire"):
  7 |         with tf.variable_scope("squeeze"):
  8 |             W = tf.get_variable("weights",shape=[1,1,inp,sp])
  9 |             b = tf.get_variable("bias",shape=[sp])
 10 |             s = tf.nn.conv2d(x,W,[1,1,1,1],"VALID")+b
 11 |             s = tf.nn.relu(s)
 12 |         with tf.variable_scope("e11"):
 13 |             W = tf.get_variable("weights",shape=[1,1,sp,e11p])
 14 |             b = tf.get_variable("bias",shape=[e11p])
 15 |             e11 = tf.nn.conv2d(s,W,[1,1,1,1],"VALID")+b
 16 |             e11 = tf.nn.relu(e11)
 17 |         with tf.variable_scope("e33"):
 18 |             W = tf.get_variable("weights",shape=[3,3,sp,e33p])
 19 |             b = tf.get_variable("bias",shape=[e33p])
 20 |             e33 = tf.nn.conv2d(s,W,[1,1,1,1],"SAME")+b
 21 |             e33 = tf.nn.relu(e33)
 22 |         return tf.concat([e11,e33],3)
 23 | 
 24 | 
 25 | class SqueezeNet(object):
 26 |     def extract_features(self, input=None, reuse=True):
 27 |         if input is None:
 28 |             input = self.image
 29 |         x = input
 30 |         layers = []
 31 |         with tf.variable_scope('features', reuse=reuse):
 32 |             with tf.variable_scope('layer0'):
 33 |                 W = tf.get_variable("weights",shape=[3,3,3,64])
 34 |                 b = tf.get_variable("bias",shape=[64])
 35 |                 x = tf.nn.conv2d(x,W,[1,2,2,1],"VALID")
 36 |                 x = tf.nn.bias_add(x,b)
 37 |                 layers.append(x)
 38 |             with tf.variable_scope('layer1'):
 39 |                 x = tf.nn.relu(x)
 40 |                 layers.append(x)
 41 |             with tf.variable_scope('layer2'):
 42 |                 x = tf.nn.max_pool(x,[1,3,3,1],strides=[1,2,2,1],padding='VALID')
 43 |                 layers.append(x)
 44 |             with tf.variable_scope('layer3'):
 45 |                 x = fire_module(x,64,16,64,64)
 46 |                 layers.append(x)
 47 |             with tf.variable_scope('layer4'):
 48 |                 x = fire_module(x,128,16,64,64)
 49 |                 layers.append(x)
 50 |             with tf.variable_scope('layer5'):
 51 |                 x = tf.nn.max_pool(x,[1,3,3,1],strides=[1,2,2,1],padding='VALID')
 52 |                 layers.append(x)
 53 |             with tf.variable_scope('layer6'):
 54 |                 x = fire_module(x,128,32,128,128)
 55 |                 layers.append(x)
 56 |             with tf.variable_scope('layer7'):
 57 |                 x = fire_module(x,256,32,128,128)
 58 |                 layers.append(x)
 59 |             with tf.variable_scope('layer8'):
 60 |                 x = tf.nn.max_pool(x,[1,3,3,1],strides=[1,2,2,1],padding='VALID')
 61 |                 layers.append(x)
 62 |             with tf.variable_scope('layer9'):
 63 |                 x = fire_module(x,256,48,192,192)
 64 |                 layers.append(x)
 65 |             with tf.variable_scope('layer10'):
 66 |                 x = fire_module(x,384,48,192,192)
 67 |                 layers.append(x)
 68 |             with tf.variable_scope('layer11'):
 69 |                 x = fire_module(x,384,64,256,256)
 70 |                 layers.append(x)
 71 |             with tf.variable_scope('layer12'):
 72 |                 x = fire_module(x,512,64,256,256)
 73 |                 layers.append(x)
 74 |         return layers
 75 | 
 76 |     def __init__(self, save_path=None, sess=None):
 77 |         """Create a SqueezeNet model.
 78 |         Inputs:
 79 |         - save_path: path to TensorFlow checkpoint
 80 |         - sess: TensorFlow session
 81 |         - input: optional input to the model. If None, will use placeholder for input.
 82 |         """
 83 |         self.image = tf.placeholder('float',shape=[None,None,None,3],name='input_image')
 84 |         self.labels = tf.placeholder('int32', shape=[None], name='labels')
 85 |         self.layers = []
 86 |         x = self.image
 87 |         self.layers = self.extract_features(x, reuse=False)
 88 |         self.features = self.layers[-1]
 89 |         with tf.variable_scope('classifier'):
 90 |             with tf.variable_scope('layer0'):
 91 |                 x = self.features
 92 |                 self.layers.append(x)
 93 |             with tf.variable_scope('layer1'):
 94 |                 W = tf.get_variable("weights",shape=[1,1,512,1000])
 95 |                 b = tf.get_variable("bias",shape=[1000])
 96 |                 x = tf.nn.conv2d(x,W,[1,1,1,1],"VALID")
 97 |                 x = tf.nn.bias_add(x,b)
 98 |                 self.layers.append(x)
 99 |             with tf.variable_scope('layer2'):
100 |                 x = tf.nn.relu(x)
101 |                 self.layers.append(x)
102 |             with tf.variable_scope('layer3'):
103 |                 x = tf.nn.avg_pool(x,[1,13,13,1],strides=[1,13,13,1],padding='VALID')
104 |                 self.layers.append(x)
105 |         self.classifier = tf.reshape(x,[-1, NUM_CLASSES])
106 | 
107 |         with tf.name_scope('loss'):
108 |             if save_path is not None:
109 |                 saver = tf.train.Saver()
110 |                 saver.restore(sess, save_path)
111 |             self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot(self.labels, NUM_CLASSES), logits=self.classifier))
112 | 
113 |         # writer = tf.summary.FileWriter('/home/zxm/document/Learn/CS231 Assignments/assignment3/tensorboard')
114 |         # writer.add_graph(sess.graph)
115 | 


--------------------------------------------------------------------------------
/assignment2/cs231n/im2col_cython.pyx:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | cimport numpy as np
  3 | cimport cython
  4 | 
  5 | # DTYPE = np.float64
  6 | # ctypedef np.float64_t DTYPE_t
  7 | 
  8 | ctypedef fused DTYPE_t:
  9 |     np.float32_t
 10 |     np.float64_t
 11 | 
 12 | def im2col_cython(np.ndarray[DTYPE_t, ndim=4] x, int field_height,
 13 |                   int field_width, int padding, int stride):
 14 |     cdef int N = x.shape[0]
 15 |     cdef int C = x.shape[1]
 16 |     cdef int H = x.shape[2]
 17 |     cdef int W = x.shape[3]
 18 |     
 19 |     cdef int HH = (H + 2 * padding - field_height) / stride + 1
 20 |     cdef int WW = (W + 2 * padding - field_width) / stride + 1
 21 | 
 22 |     cdef int p = padding
 23 |     cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.pad(x,
 24 |             ((0, 0), (0, 0), (p, p), (p, p)), mode='constant')
 25 | 
 26 |     cdef np.ndarray[DTYPE_t, ndim=2] cols = np.zeros(
 27 |             (C * field_height * field_width, N * HH * WW),
 28 |             dtype=x.dtype)
 29 | 
 30 |     # Moving the inner loop to a C function with no bounds checking works, but does
 31 |     # not seem to help performance in any measurable way.
 32 | 
 33 |     im2col_cython_inner(cols, x_padded, N, C, H, W, HH, WW,
 34 |                         field_height, field_width, padding, stride)
 35 |     return cols
 36 | 
 37 | 
 38 | @cython.boundscheck(False)
 39 | cdef int im2col_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols,
 40 |                              np.ndarray[DTYPE_t, ndim=4] x_padded,
 41 |                              int N, int C, int H, int W, int HH, int WW,
 42 |                              int field_height, int field_width, int padding, int stride) except? -1:
 43 |     cdef int c, ii, jj, row, yy, xx, i, col
 44 | 
 45 |     for c in range(C):
 46 |         for yy in range(HH):
 47 |             for xx in range(WW):
 48 |                 for ii in range(field_height):
 49 |                     for jj in range(field_width):
 50 |                         row = c * field_width * field_height + ii * field_height + jj
 51 |                         for i in range(N):
 52 |                             col = yy * WW * N + xx * N + i
 53 |                             cols[row, col] = x_padded[i, c, stride * yy + ii, stride * xx + jj]
 54 | 
 55 | 
 56 | 
 57 | def col2im_cython(np.ndarray[DTYPE_t, ndim=2] cols, int N, int C, int H, int W,
 58 |                   int field_height, int field_width, int padding, int stride):
 59 |     cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype)
 60 |     cdef int HH = (H + 2 * padding - field_height) / stride + 1
 61 |     cdef int WW = (W + 2 * padding - field_width) / stride + 1
 62 |     cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * padding, W + 2 * padding),
 63 |                                         dtype=cols.dtype)
 64 | 
 65 |     # Moving the inner loop to a C-function with no bounds checking improves
 66 |     # performance quite a bit for col2im.
 67 |     col2im_cython_inner(cols, x_padded, N, C, H, W, HH, WW, 
 68 |                         field_height, field_width, padding, stride)
 69 |     if padding > 0:
 70 |         return x_padded[:, :, padding:-padding, padding:-padding]
 71 |     return x_padded
 72 | 
 73 | 
 74 | @cython.boundscheck(False)
 75 | cdef int col2im_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols,
 76 |                              np.ndarray[DTYPE_t, ndim=4] x_padded,
 77 |                              int N, int C, int H, int W, int HH, int WW,
 78 |                              int field_height, int field_width, int padding, int stride) except? -1:
 79 |     cdef int c, ii, jj, row, yy, xx, i, col
 80 | 
 81 |     for c in range(C):
 82 |         for ii in range(field_height):
 83 |             for jj in range(field_width):
 84 |                 row = c * field_width * field_height + ii * field_height + jj
 85 |                 for yy in range(HH):
 86 |                     for xx in range(WW):
 87 |                         for i in range(N):
 88 |                             col = yy * WW * N + xx * N + i
 89 |                             x_padded[i, c, stride * yy + ii, stride * xx + jj] += cols[row, col]
 90 | 
 91 | 
 92 | @cython.boundscheck(False)
 93 | @cython.wraparound(False)
 94 | cdef col2im_6d_cython_inner(np.ndarray[DTYPE_t, ndim=6] cols,
 95 |                             np.ndarray[DTYPE_t, ndim=4] x_padded,
 96 |                             int N, int C, int H, int W, int HH, int WW,
 97 |                             int out_h, int out_w, int pad, int stride):
 98 | 
 99 |     cdef int c, hh, ww, n, h, w
100 |     for n in range(N):
101 |         for c in range(C):
102 |             for hh in range(HH):
103 |                 for ww in range(WW):
104 |                     for h in range(out_h):
105 |                         for w in range(out_w):
106 |                             x_padded[n, c, stride * h + hh, stride * w + ww] += cols[c, hh, ww, n, h, w]
107 |     
108 | 
109 | def col2im_6d_cython(np.ndarray[DTYPE_t, ndim=6] cols, int N, int C, int H, int W,
110 |         int HH, int WW, int pad, int stride):
111 |     cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype)
112 |     cdef int out_h = (H + 2 * pad - HH) / stride + 1
113 |     cdef int out_w = (W + 2 * pad - WW) / stride + 1
114 |     cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * pad, W + 2 * pad),
115 |                                                   dtype=cols.dtype)
116 | 
117 |     col2im_6d_cython_inner(cols, x_padded, N, C, H, W, HH, WW, out_h, out_w, pad, stride)
118 | 
119 |     if pad > 0:
120 |         return x_padded[:, :, pad:-pad, pad:-pad]
121 |     return x_padded 
122 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/im2col_cython.pyx:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | cimport numpy as np
  3 | cimport cython
  4 | 
  5 | # DTYPE = np.float64
  6 | # ctypedef np.float64_t DTYPE_t
  7 | 
  8 | ctypedef fused DTYPE_t:
  9 |     np.float32_t
 10 |     np.float64_t
 11 | 
 12 | def im2col_cython(np.ndarray[DTYPE_t, ndim=4] x, int field_height,
 13 |                   int field_width, int padding, int stride):
 14 |     cdef int N = x.shape[0]
 15 |     cdef int C = x.shape[1]
 16 |     cdef int H = x.shape[2]
 17 |     cdef int W = x.shape[3]
 18 |     
 19 |     cdef int HH = (H + 2 * padding - field_height) / stride + 1
 20 |     cdef int WW = (W + 2 * padding - field_width) / stride + 1
 21 | 
 22 |     cdef int p = padding
 23 |     cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.pad(x,
 24 |             ((0, 0), (0, 0), (p, p), (p, p)), mode='constant')
 25 | 
 26 |     cdef np.ndarray[DTYPE_t, ndim=2] cols = np.zeros(
 27 |             (C * field_height * field_width, N * HH * WW),
 28 |             dtype=x.dtype)
 29 | 
 30 |     # Moving the inner loop to a C function with no bounds checking works, but does
 31 |     # not seem to help performance in any measurable way.
 32 | 
 33 |     im2col_cython_inner(cols, x_padded, N, C, H, W, HH, WW,
 34 |                         field_height, field_width, padding, stride)
 35 |     return cols
 36 | 
 37 | 
 38 | @cython.boundscheck(False)
 39 | cdef int im2col_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols,
 40 |                              np.ndarray[DTYPE_t, ndim=4] x_padded,
 41 |                              int N, int C, int H, int W, int HH, int WW,
 42 |                              int field_height, int field_width, int padding, int stride) except? -1:
 43 |     cdef int c, ii, jj, row, yy, xx, i, col
 44 | 
 45 |     for c in range(C):
 46 |         for yy in range(HH):
 47 |             for xx in range(WW):
 48 |                 for ii in range(field_height):
 49 |                     for jj in range(field_width):
 50 |                         row = c * field_width * field_height + ii * field_height + jj
 51 |                         for i in range(N):
 52 |                             col = yy * WW * N + xx * N + i
 53 |                             cols[row, col] = x_padded[i, c, stride * yy + ii, stride * xx + jj]
 54 | 
 55 | 
 56 | 
 57 | def col2im_cython(np.ndarray[DTYPE_t, ndim=2] cols, int N, int C, int H, int W,
 58 |                   int field_height, int field_width, int padding, int stride):
 59 |     cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype)
 60 |     cdef int HH = (H + 2 * padding - field_height) / stride + 1
 61 |     cdef int WW = (W + 2 * padding - field_width) / stride + 1
 62 |     cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * padding, W + 2 * padding),
 63 |                                         dtype=cols.dtype)
 64 | 
 65 |     # Moving the inner loop to a C-function with no bounds checking improves
 66 |     # performance quite a bit for col2im.
 67 |     col2im_cython_inner(cols, x_padded, N, C, H, W, HH, WW, 
 68 |                         field_height, field_width, padding, stride)
 69 |     if padding > 0:
 70 |         return x_padded[:, :, padding:-padding, padding:-padding]
 71 |     return x_padded
 72 | 
 73 | 
 74 | @cython.boundscheck(False)
 75 | cdef int col2im_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols,
 76 |                              np.ndarray[DTYPE_t, ndim=4] x_padded,
 77 |                              int N, int C, int H, int W, int HH, int WW,
 78 |                              int field_height, int field_width, int padding, int stride) except? -1:
 79 |     cdef int c, ii, jj, row, yy, xx, i, col
 80 | 
 81 |     for c in range(C):
 82 |         for ii in range(field_height):
 83 |             for jj in range(field_width):
 84 |                 row = c * field_width * field_height + ii * field_height + jj
 85 |                 for yy in range(HH):
 86 |                     for xx in range(WW):
 87 |                         for i in range(N):
 88 |                             col = yy * WW * N + xx * N + i
 89 |                             x_padded[i, c, stride * yy + ii, stride * xx + jj] += cols[row, col]
 90 | 
 91 | 
 92 | @cython.boundscheck(False)
 93 | @cython.wraparound(False)
 94 | cdef col2im_6d_cython_inner(np.ndarray[DTYPE_t, ndim=6] cols,
 95 |                             np.ndarray[DTYPE_t, ndim=4] x_padded,
 96 |                             int N, int C, int H, int W, int HH, int WW,
 97 |                             int out_h, int out_w, int pad, int stride):
 98 | 
 99 |     cdef int c, hh, ww, n, h, w
100 |     for n in range(N):
101 |         for c in range(C):
102 |             for hh in range(HH):
103 |                 for ww in range(WW):
104 |                     for h in range(out_h):
105 |                         for w in range(out_w):
106 |                             x_padded[n, c, stride * h + hh, stride * w + ww] += cols[c, hh, ww, n, h, w]
107 |     
108 | 
109 | def col2im_6d_cython(np.ndarray[DTYPE_t, ndim=6] cols, int N, int C, int H, int W,
110 |         int HH, int WW, int pad, int stride):
111 |     cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype)
112 |     cdef int out_h = (H + 2 * pad - HH) / stride + 1
113 |     cdef int out_w = (W + 2 * pad - WW) / stride + 1
114 |     cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * pad, W + 2 * pad),
115 |                                                   dtype=cols.dtype)
116 | 
117 |     col2im_6d_cython_inner(cols, x_padded, N, C, H, W, HH, WW, out_h, out_w, pad, stride)
118 | 
119 |     if pad > 0:
120 |         return x_padded[:, :, pad:-pad, pad:-pad]
121 |     return x_padded 
122 | 


--------------------------------------------------------------------------------
/assignment1/cs231n/features.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | #from past.builtins import xrange
  3 | 
  4 | import matplotlib
  5 | import numpy as np
  6 | from scipy.ndimage import uniform_filter
  7 | 
  8 | 
  9 | def extract_features(imgs, feature_fns, verbose=False):
 10 |   """
 11 |   Given pixel data for images and several feature functions that can operate on
 12 |   single images, apply all feature functions to all images, concatenating the
 13 |   feature vectors for each image and storing the features for all images in
 14 |   a single matrix.
 15 | 
 16 |   Inputs:
 17 |   - imgs: N x H X W X C array of pixel data for N images.
 18 |   - feature_fns: List of k feature functions. The ith feature function should
 19 |     take as input an H x W x D array and return a (one-dimensional) array of
 20 |     length F_i.
 21 |   - verbose: Boolean; if true, print progress.
 22 | 
 23 |   Returns:
 24 |   An array of shape (N, F_1 + ... + F_k) where each column is the concatenation
 25 |   of all features for a single image.
 26 |   """
 27 |   num_images = imgs.shape[0]
 28 |   if num_images == 0:
 29 |     return np.array([])
 30 | 
 31 |   # Use the first image to determine feature dimensions
 32 |   feature_dims = []
 33 |   first_image_features = []
 34 |   for feature_fn in feature_fns:
 35 |     feats = feature_fn(imgs[0].squeeze())
 36 |     assert len(feats.shape) == 1, 'Feature functions must be one-dimensional'
 37 |     feature_dims.append(feats.size)
 38 |     first_image_features.append(feats)
 39 | 
 40 |   # Now that we know the dimensions of the features, we can allocate a single
 41 |   # big array to store all features as columns.
 42 |   total_feature_dim = sum(feature_dims)
 43 |   imgs_features = np.zeros((num_images, total_feature_dim))
 44 |   imgs_features[0] = np.hstack(first_image_features).T
 45 | 
 46 |   # Extract features for the rest of the images.
 47 |   for i in xrange(1, num_images):
 48 |     idx = 0
 49 |     for feature_fn, feature_dim in zip(feature_fns, feature_dims):
 50 |       next_idx = idx + feature_dim
 51 |       imgs_features[i, idx:next_idx] = feature_fn(imgs[i].squeeze())
 52 |       idx = next_idx
 53 |     if verbose and i % 1000 == 0:
 54 |       print('Done extracting features for %d / %d images' % (i, num_images))
 55 | 
 56 |   return imgs_features
 57 | 
 58 | 
 59 | def rgb2gray(rgb):
 60 |   """Convert RGB image to grayscale
 61 | 
 62 |     Parameters:
 63 |       rgb : RGB image
 64 | 
 65 |     Returns:
 66 |       gray : grayscale image
 67 |   
 68 |   """
 69 |   return np.dot(rgb[...,:3], [0.299, 0.587, 0.144])
 70 | 
 71 | 
 72 | def hog_feature(im):
 73 |   """Compute Histogram of Gradient (HOG) feature for an image
 74 |   
 75 |        Modified from skimage.feature.hog
 76 |        http://pydoc.net/Python/scikits-image/0.4.2/skimage.feature.hog
 77 |      
 78 |      Reference:
 79 |        Histograms of Oriented Gradients for Human Detection
 80 |        Navneet Dalal and Bill Triggs, CVPR 2005
 81 |      
 82 |     Parameters:
 83 |       im : an input grayscale or rgb image
 84 |       
 85 |     Returns:
 86 |       feat: Histogram of Gradient (HOG) feature
 87 |     
 88 |   """
 89 |   
 90 |   # convert rgb to grayscale if needed
 91 |   if im.ndim == 3:
 92 |     image = rgb2gray(im)
 93 |   else:
 94 |     image = np.at_least_2d(im)
 95 | 
 96 |   sx, sy = image.shape # image size
 97 |   orientations = 9 # number of gradient bins
 98 |   cx, cy = (8, 8) # pixels per cell
 99 | 
100 |   gx = np.zeros(image.shape)
101 |   gy = np.zeros(image.shape)
102 |   gx[:, :-1] = np.diff(image, n=1, axis=1) # compute gradient on x-direction
103 |   gy[:-1, :] = np.diff(image, n=1, axis=0) # compute gradient on y-direction
104 |   grad_mag = np.sqrt(gx ** 2 + gy ** 2) # gradient magnitude
105 |   grad_ori = np.arctan2(gy, (gx + 1e-15)) * (180 / np.pi) + 90 # gradient orientation
106 | 
107 |   n_cellsx = int(np.floor(sx / cx))  # number of cells in x
108 |   n_cellsy = int(np.floor(sy / cy))  # number of cells in y
109 |   # compute orientations integral images
110 |   orientation_histogram = np.zeros((n_cellsx, n_cellsy, orientations))
111 |   for i in range(orientations):
112 |     # create new integral image for this orientation
113 |     # isolate orientations in this range
114 |     temp_ori = np.where(grad_ori < 180 / orientations * (i + 1),
115 |                         grad_ori, 0)
116 |     temp_ori = np.where(grad_ori >= 180 / orientations * i,
117 |                         temp_ori, 0)
118 |     # select magnitudes for those orientations
119 |     cond2 = temp_ori > 0
120 |     temp_mag = np.where(cond2, grad_mag, 0)
121 |     orientation_histogram[:,:,i] = uniform_filter(temp_mag, size=(cx, cy))[cx/2::cx, cy/2::cy].T
122 |   
123 |   return orientation_histogram.ravel()
124 | 
125 | 
126 | def color_histogram_hsv(im, nbin=10, xmin=0, xmax=255, normalized=True):
127 |   """
128 |   Compute color histogram for an image using hue.
129 | 
130 |   Inputs:
131 |   - im: H x W x C array of pixel data for an RGB image.
132 |   - nbin: Number of histogram bins. (default: 10)
133 |   - xmin: Minimum pixel value (default: 0)
134 |   - xmax: Maximum pixel value (default: 255)
135 |   - normalized: Whether to normalize the histogram (default: True)
136 | 
137 |   Returns:
138 |     1D vector of length nbin giving the color histogram over the hue of the
139 |     input image.
140 |   """
141 |   ndim = im.ndim
142 |   bins = np.linspace(xmin, xmax, nbin+1)
143 |   hsv = matplotlib.colors.rgb_to_hsv(im/xmax) * xmax
144 |   imhist, bin_edges = np.histogram(hsv[:,:,0], bins=bins, density=normalized)
145 |   imhist = imhist * np.diff(bin_edges)
146 | 
147 |   # return histogram
148 |   return imhist
149 | 
150 | 
151 | pass
152 | 


--------------------------------------------------------------------------------
/assignment1/cs231n/classifiers/linear_classifier.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import numpy as np
  4 | from cs231n.classifiers.linear_svm import *
  5 | from cs231n.classifiers.softmax import *
  6 | #from past.builtins import xrange
  7 | 
  8 | 
  9 | class LinearClassifier(object):
 10 | 
 11 |   def __init__(self):
 12 |     self.W = None
 13 | 
 14 |   def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100,
 15 |             batch_size=200, verbose=False):
 16 |     """
 17 |     Train this linear classifier using stochastic gradient descent.
 18 | 
 19 |     Inputs:
 20 |     - X: A numpy array of shape (N, D) containing training data; there are N
 21 |       training samples each of dimension D.
 22 |     - y: A numpy array of shape (N,) containing training labels; y[i] = c
 23 |       means that X[i] has label 0 <= c < C for C classes.
 24 |     - learning_rate: (float) learning rate for optimization.
 25 |     - reg: (float) regularization strength.
 26 |     - num_iters: (integer) number of steps to take when optimizing
 27 |     - batch_size: (integer) number of training examples to use at each step.
 28 |     - verbose: (boolean) If true, print progress during optimization.
 29 | 
 30 |     Outputs:
 31 |     A list containing the value of the loss function at each training iteration.
 32 |     """
 33 |     num_train, dim = X.shape
 34 |     num_classes = np.max(y) + 1 # assume y takes values 0...K-1 where K is number of classes
 35 |     if self.W is None:
 36 |       # lazily initialize W
 37 |       self.W = 0.001 * np.random.randn(dim, num_classes)
 38 | 
 39 |     # Run stochastic gradient descent to optimize W
 40 |     loss_history = []
 41 |     for it in xrange(num_iters):
 42 |       X_batch = None
 43 |       y_batch = None
 44 | 
 45 |       #########################################################################
 46 |       # TODO:                                                                 #
 47 |       # Sample batch_size elements from the training data and their           #
 48 |       # corresponding labels to use in this round of gradient descent.        #
 49 |       # Store the data in X_batch and their corresponding labels in           #
 50 |       # y_batch; after sampling X_batch should have shape (dim, batch_size)   #
 51 |       # and y_batch should have shape (batch_size,)                           #
 52 |       #                                                                       #
 53 |       # Hint: Use np.random.choice to generate indices. Sampling with         #
 54 |       # replacement is faster than sampling without replacement.              #
 55 |       #########################################################################
 56 |       choice = np.random.choice(num_train,batch_size,replace=True)
 57 |       X_batch = X[choice]
 58 |       y_batch = y[choice]
 59 |       #########################################################################
 60 |       #                       END OF YOUR CODE                                #
 61 |       #########################################################################
 62 | 
 63 |       # evaluate loss and gradient
 64 |       loss, grad = self.loss(X_batch, y_batch, reg)
 65 |       loss_history.append(loss)
 66 | 
 67 |       # perform parameter update
 68 |       #########################################################################
 69 |       # TODO:                                                                 #
 70 |       # Update the weights using the gradient and the learning rate.          #
 71 |       #########################################################################
 72 |       self.W = self.W - learning_rate*grad
 73 |       #########################################################################
 74 |       #                       END OF YOUR CODE                                #
 75 |       #########################################################################
 76 | 
 77 |       if verbose and it % 100 == 0:
 78 |         print('iteration %d / %d: loss %f' % (it, num_iters, loss))
 79 | 
 80 |     return loss_history
 81 | 
 82 |   def predict(self, X):
 83 |     """
 84 |     Use the trained weights of this linear classifier to predict labels for
 85 |     data points.
 86 | 
 87 |     Inputs:
 88 |     - X: A numpy array of shape (N, D) containing training data; there are N
 89 |       training samples each of dimension D.
 90 | 
 91 |     Returns:
 92 |     - y_pred: Predicted labels for the data in X. y_pred is a 1-dimensional
 93 |       array of length N, and each element is an integer giving the predicted
 94 |       class.
 95 |     """
 96 |     y_pred = np.zeros(X.shape[0])
 97 |     ###########################################################################
 98 |     # TODO:                                                                   #
 99 |     # Implement this method. Store the predicted labels in y_pred.            #
100 |     ###########################################################################
101 |     scores = X.dot(self.W)
102 |     y_pred = np.argmax(scores,axis=1)
103 |     ###########################################################################
104 |     #                           END OF YOUR CODE                              #
105 |     ###########################################################################
106 |     return y_pred
107 |   
108 |   def loss(self, X_batch, y_batch, reg):
109 |     """
110 |     Compute the loss function and its derivative. 
111 |     Subclasses will override this.
112 | 
113 |     Inputs:
114 |     - X_batch: A numpy array of shape (N, D) containing a minibatch of N
115 |       data points; each point has dimension D.
116 |     - y_batch: A numpy array of shape (N,) containing labels for the minibatch.
117 |     - reg: (float) regularization strength.
118 | 
119 |     Returns: A tuple containing:
120 |     - loss as a single float
121 |     - gradient with respect to self.W; an array of the same shape as W
122 |     """
123 |     pass
124 | 
125 | 
126 | class LinearSVM(LinearClassifier):
127 |   """ A subclass that uses the Multiclass SVM loss function """
128 | 
129 |   def loss(self, X_batch, y_batch, reg):
130 |     return svm_loss_vectorized(self.W, X_batch, y_batch, reg)
131 | 
132 | 
133 | class Softmax(LinearClassifier):
134 |   """ A subclass that uses the Softmax + Cross-entropy loss function """
135 | 
136 |   def loss(self, X_batch, y_batch, reg):
137 |     return softmax_loss_vectorized(self.W, X_batch, y_batch, reg)
138 | 
139 | 


--------------------------------------------------------------------------------
/assignment2/cs231n/optim.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | """
  4 | This file implements various first-order update rules that are commonly used
  5 | for training neural networks. Each update rule accepts current weights and the
  6 | gradient of the loss with respect to those weights and produces the next set of
  7 | weights. Each update rule has the same interface:
  8 | 
  9 | def update(w, dw, config=None):
 10 | 
 11 | Inputs:
 12 |   - w: A numpy array giving the current weights.
 13 |   - dw: A numpy array of the same shape as w giving the gradient of the
 14 |     loss with respect to w.
 15 |   - config: A dictionary containing hyperparameter values such as learning
 16 |     rate, momentum, etc. If the update rule requires caching values over many
 17 |     iterations, then config will also hold these cached values.
 18 | 
 19 | Returns:
 20 |   - next_w: The next point after the update.
 21 |   - config: The config dictionary to be passed to the next iteration of the
 22 |     update rule.
 23 | 
 24 | NOTE: For most update rules, the default learning rate will probably not
 25 | perform well; however the default values of the other hyperparameters should
 26 | work well for a variety of different problems.
 27 | 
 28 | For efficiency, update rules may perform in-place updates, mutating w and
 29 | setting next_w equal to w.
 30 | """
 31 | 
 32 | 
 33 | def sgd(w, dw, config=None):
 34 |     """
 35 |     Performs vanilla stochastic gradient descent.
 36 | 
 37 |     config format:
 38 |     - learning_rate: Scalar learning rate.
 39 |     """
 40 |     if config is None: config = {}
 41 |     config.setdefault('learning_rate', 1e-2)
 42 | 
 43 |     w -= config['learning_rate'] * dw
 44 |     return w, config
 45 | 
 46 | 
 47 | def sgd_momentum(w, dw, config=None):
 48 |     """
 49 |     Performs stochastic gradient descent with momentum.
 50 | 
 51 |     config format:
 52 |     - learning_rate: Scalar learning rate.
 53 |     - momentum: Scalar between 0 and 1 giving the momentum value.
 54 |       Setting momentum = 0 reduces to sgd.
 55 |     - velocity: A numpy array of the same shape as w and dw used to store a
 56 |       moving average of the gradients.
 57 |     """
 58 |     if config is None:
 59 |         config = {}
 60 |     config.setdefault('learning_rate', 1e-2)
 61 |     config.setdefault('momentum', 0.9)
 62 |     v = config.get('velocity', np.zeros_like(w))
 63 | 
 64 |     next_w = None
 65 |     ###########################################################################
 66 |     # TODO: Implement the momentum update formula. Store the updated value in #
 67 |     # the next_w variable. You should also use and update the velocity v.     #
 68 |     ###########################################################################
 69 |     v = config['momentum'] * v - config['learning_rate'] * dw
 70 |     next_w = w + v
 71 |     ###########################################################################
 72 |     #                             END OF YOUR CODE                            #
 73 |     ###########################################################################
 74 |     config['velocity'] = v
 75 | 
 76 |     return next_w, config
 77 | 
 78 | 
 79 | 
 80 | def rmsprop(x, dx, config=None):
 81 |     """
 82 |     Uses the RMSProp update rule, which uses a moving average of squared
 83 |     gradient values to set adaptive per-parameter learning rates.
 84 | 
 85 |     config format:
 86 |     - learning_rate: Scalar learning rate.
 87 |     - decay_rate: Scalar between 0 and 1 giving the decay rate for the squared
 88 |       gradient cache.
 89 |     - epsilon: Small scalar used for smoothing to avoid dividing by zero.
 90 |     - cache: Moving average of second moments of gradients.
 91 |     """
 92 |     if config is None: config = {}
 93 |     config.setdefault('learning_rate', 1e-2)
 94 |     config.setdefault('decay_rate', 0.99)
 95 |     config.setdefault('epsilon', 1e-8)
 96 |     config.setdefault('cache', np.zeros_like(x))
 97 | 
 98 |     next_x = None
 99 |     ###########################################################################
100 |     # TODO: Implement the RMSprop update formula, storing the next value of x #
101 |     # in the next_x variable. Don't forget to update cache value stored in    #
102 |     # config['cache'].                                                        #
103 |     ###########################################################################
104 |     cache = config['cache']
105 |     cache = config['decay_rate'] * cache + (1 - config['decay_rate']) * dx**2
106 |     next_x = x - config['learning_rate'] * dx / (np.sqrt(cache) + config['epsilon'])
107 |     config['cache'] = cache
108 |     ###########################################################################
109 |     #                             END OF YOUR CODE                            #
110 |     ###########################################################################
111 | 
112 |     return next_x, config
113 | 
114 | 
115 | def adam(x, dx, config=None):
116 |     """
117 |     Uses the Adam update rule, which incorporates moving averages of both the
118 |     gradient and its square and a bias correction term.
119 | 
120 |     config format:
121 |     - learning_rate: Scalar learning rate.
122 |     - beta1: Decay rate for moving average of first moment of gradient.
123 |     - beta2: Decay rate for moving average of second moment of gradient.
124 |     - epsilon: Small scalar used for smoothing to avoid dividing by zero.
125 |     - m: Moving average of gradient.
126 |     - v: Moving average of squared gradient.
127 |     - t: Iteration number.
128 |     """
129 |     if config is None: config = {}
130 |     config.setdefault('learning_rate', 1e-3)
131 |     config.setdefault('beta1', 0.9)
132 |     config.setdefault('beta2', 0.999)
133 |     config.setdefault('epsilon', 1e-8)
134 |     config.setdefault('m', np.zeros_like(x))
135 |     config.setdefault('v', np.zeros_like(x))
136 |     config.setdefault('t', 1)
137 | 
138 |     next_x = None
139 |     ###########################################################################
140 |     # TODO: Implement the Adam update formula, storing the next value of x in #
141 |     # the next_x variable. Don't forget to update the m, v, and t variables   #
142 |     # stored in config.                                                       #
143 |     ###########################################################################
144 |     config['m'] = config['beta1'] * config['m'] + (1 - config['beta1']) * dx
145 |     config['v'] = config['beta2'] * config['v'] + (1 - config['beta2']) * dx**2
146 |     next_x = x - config['learning_rate'] * config['m'] / \
147 |                  (np.sqrt(config['v'])+config['epsilon'])
148 |     ###########################################################################
149 |     #                             END OF YOUR CODE                            #
150 |     ###########################################################################
151 | 
152 |     return next_x, config
153 | 


--------------------------------------------------------------------------------
/assignment2/cs231n/classifiers/cnn.py:
--------------------------------------------------------------------------------
  1 | from builtins import object
  2 | import numpy as np
  3 | 
  4 | from cs231n.layers import *
  5 | from cs231n.fast_layers import *
  6 | from cs231n.layer_utils import *
  7 | 
  8 | 
  9 | class ThreeLayerConvNet(object):
 10 |     """
 11 |     A three-layer convolutional network with the following architecture:
 12 | 
 13 |     conv - relu - 2x2 max pool - affine - relu - affine - softmax
 14 | 
 15 |     The network operates on minibatches of data that have shape (N, C, H, W)
 16 |     consisting of N images, each with height H and width W and with C input
 17 |     channels.
 18 |     """
 19 | 
 20 |     def __init__(self, input_dim=(3, 32, 32), num_filters=32, filter_size=7,
 21 |                  hidden_dim=100, num_classes=10, weight_scale=1e-3, reg=0.0,
 22 |                  dtype=np.float32):
 23 |         """
 24 |         Initialize a new network.
 25 | 
 26 |         Inputs:
 27 |         - input_dim: Tuple (C, H, W) giving size of input data
 28 |         - num_filters: Number of filters to use in the convolutional layer
 29 |         - filter_size: Size of filters to use in the convolutional layer
 30 |         - hidden_dim: Number of units to use in the fully-connected hidden layer
 31 |         - num_classes: Number of scores to produce from the final affine layer.
 32 |         - weight_scale: Scalar giving standard deviation for random initialization
 33 |           of weights.
 34 |         - reg: Scalar giving L2 regularization strength
 35 |         - dtype: numpy datatype to use for computation.
 36 |         """
 37 |         self.params = {}
 38 |         self.reg = reg
 39 |         self.dtype = dtype
 40 | 
 41 |         ############################################################################
 42 |         # TODO: Initialize weights and biases for the three-layer convolutional    #
 43 |         # network. Weights should be initialized from a Gaussian with standard     #
 44 |         # deviation equal to weight_scale; biases should be initialized to zero.   #
 45 |         # All weights and biases should be stored in the dictionary self.params.   #
 46 |         # Store weights and biases for the convolutional layer using the keys 'W1' #
 47 |         # and 'b1'; use keys 'W2' and 'b2' for the weights and biases of the       #
 48 |         # hidden affine layer, and keys 'W3' and 'b3' for the weights and biases   #
 49 |         # of the output affine layer.                                              #
 50 |         ############################################################################
 51 |         C, H, W = input_dim
 52 |         self.params['W1'] = weight_scale * np.random.randn(num_filters, C, filter_size, filter_size)
 53 |         self.params['b1'] = np.zeros(num_filters)
 54 | 
 55 |         self.params['W2'] = weight_scale * np.random.randn(H*W*num_filters//4, hidden_dim)
 56 |         self.params['b2'] = np.zeros(hidden_dim)
 57 | 
 58 |         self.params['W3'] = weight_scale * np.random.randn(hidden_dim, num_classes)
 59 |         self.params['b3'] = np.zeros(num_classes)
 60 |         ############################################################################
 61 |         #                             END OF YOUR CODE                             #
 62 |         ############################################################################
 63 | 
 64 |         for k, v in self.params.items():
 65 |             self.params[k] = v.astype(dtype)
 66 | 
 67 | 
 68 |     def loss(self, X, y=None):
 69 |         """
 70 |         Evaluate loss and gradient for the three-layer convolutional network.
 71 | 
 72 |         Input / output: Same API as TwoLayerNet in fc_net.py.
 73 |         """
 74 |         W1, b1 = self.params['W1'], self.params['b1']
 75 |         W2, b2 = self.params['W2'], self.params['b2']
 76 |         W3, b3 = self.params['W3'], self.params['b3']
 77 | 
 78 |         # pass conv_param to the forward pass for the convolutional layer
 79 |         filter_size = W1.shape[2]
 80 |         conv_param = {'stride': 1, 'pad': (filter_size - 1) // 2}
 81 | 
 82 |         # pass pool_param to the forward pass for the max-pooling layer
 83 |         pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2}
 84 | 
 85 |         scores = None
 86 |         ############################################################################
 87 |         # TODO: Implement the forward pass for the three-layer convolutional net,  #
 88 |         # computing the class scores for X and storing them in the scores          #
 89 |         # variable.                                                                #
 90 |         ############################################################################
 91 | 
 92 |         # conv
 93 |         conv_out, cache_conv = conv_forward_strides(X, W1, b1, conv_param)
 94 |         # |
 95 |         # V
 96 |         # relu
 97 |         relu1_out, cache_relu1 = relu_forward(conv_out)
 98 |         # |
 99 |         # V
100 |         # pool
101 |         pool_out, cache_pool = max_pool_forward_fast(relu1_out, pool_param)
102 |         # |
103 |         # V
104 |         # affine1
105 |         aff1_out, cache_aff1 = affine_forward(pool_out, W2, b2)
106 |         # |
107 |         # V
108 |         # relu2
109 |         relu2_out, cache_relu2 = relu_forward(aff1_out)
110 |         # |
111 |         # V
112 |         # affine2
113 |         scores, cache_aff2 = affine_forward(relu2_out, W3, b3)
114 | 
115 |         ############################################################################
116 |         #                             END OF YOUR CODE                             #
117 |         ############################################################################
118 | 
119 |         if y is None:
120 |             return scores
121 | 
122 |         loss, grads = 0, {}
123 |         ############################################################################
124 |         # TODO: Implement the backward pass for the three-layer convolutional net, #
125 |         # storing the loss and gradients in the loss and grads variables. Compute  #
126 |         # data loss using softmax, and make sure that grads[k] holds the gradients #
127 |         # for self.params[k]. Don't forget to add L2 regularization!               #
128 |         ############################################################################
129 | 
130 |         # loss
131 |         loss, dout = softmax_loss(scores, y)
132 |         # |
133 |         # V
134 |         # affine2
135 |         dx, dW3, db3 = affine_backward(dout,cache_aff2)
136 |         # |
137 |         # V
138 |         # relu2
139 |         dx = relu_backward(dx, cache_relu2)
140 |         # |
141 |         # V
142 |         # affine1
143 |         dx, dW2, db2 = affine_backward(dx, cache_aff1)
144 |         # |
145 |         # V
146 |         # pool
147 |         dx = max_pool_backward_fast(dx, cache_pool)
148 |         # |
149 |         # V
150 |         # relu
151 |         dx = relu_backward(dx, cache_relu1)
152 |         # |
153 |         # V
154 |         # conv
155 |         dx, dW1, db1 = conv_backward_strides(dx, cache_conv)
156 | 
157 |         grads['W1'], grads['b1'] = dW1 + self.reg*self.params['W1'], db1
158 |         grads['W2'], grads['b2'] = dW2 + self.reg*self.params['W2'], db2
159 |         grads['W3'], grads['b3'] = dW3 + self.reg*self.params['W3'], db3
160 | 
161 |         loss += 0.5 * self.reg*(np.sum(W1*W1)+np.sum(W2*W2)+np.sum(W3*W3))
162 |         ############################################################################
163 |         #                             END OF YOUR CODE                             #
164 |         ############################################################################
165 | 
166 |         return loss, grads
167 | 


--------------------------------------------------------------------------------
/assignment2/README.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | layout: page
  3 | mathjax: true
  4 | permalink: /assignments2017/assignment2/
  5 | ---
  6 | 
  7 | In this assignment you will practice writing backpropagation code, and training
  8 | Neural Networks and Convolutional Neural Networks. The goals of this assignment
  9 | are as follows:
 10 | 
 11 | - understand **Neural Networks** and how they are arranged in layered
 12 |   architectures
 13 | - understand and be able to implement (vectorized) **backpropagation**
 14 | - implement various **update rules** used to optimize Neural Networks
 15 | - implement **batch normalization** for training deep networks
 16 | - implement **dropout** to regularize networks
 17 | - effectively **cross-validate** and find the best hyperparameters for Neural
 18 |   Network architecture
 19 | - understand the architecture of **Convolutional Neural Networks** and train
 20 |   gain experience with training these models on data
 21 | 
 22 | ## Setup
 23 | You can work on the assignment in one of two ways: locally on your own machine, or on a virtual machine on Google Cloud.
 24 | 
 25 | ### Working remotely on Google Cloud (Recommended)
 26 | 
 27 | **Note:** after following these instructions, make sure you go to **Working on the assignment** below (you can skip the **Working locally** section).
 28 | 
 29 | As part of this course, you can use Google Cloud for your assignments. We recommend this route for anyone who is having trouble with installation set-up, or if you would like to use better CPU/GPU resources than you may have locally. 
 30 | 
 31 | Please see the Google Cloud GPU set-up tutorial [here](http://cs231n.github.io/gce-tutorial-gpus/) for instructions. 
 32 | 
 33 | We strongly, strongly recommend using Google Cloud with GPU support for the last part of this assignment (the TensorFlow or PyTorch notebooks), since your training will go much, much faster. :)
 34 | 
 35 | ### Working locally
 36 | Here's how you install the necessary dependencies:
 37 | 
 38 | **(OPTIONAL) Installing GPU drivers:**
 39 | If you choose to work locally, you are at no disadvantage for the first 3 parts of the assignment. For the last part, which is in TensorFlow or PyTorch, however, having a GPU will be a significant advantage. We recommend using a Google Cloud Instance with a GPU, at least for this part. If you have your own NVIDIA GPU, however, and wish to use that, that's fine -- you'll need to install the drivers for your GPU, install CUDA, install cuDNN, and then install either [TensorFlow](https://www.tensorflow.org/install/) or [PyTorch](http://pytorch.org/). You could theoretically do the entire assignment with no GPUs, though this will make training much slower in the last part. 
 40 | 
 41 | **Installing Python 3.5+:**
 42 | To use python3, make sure to install version 3.5 or 3.6 on your local machine. If you are on Mac OS X, you can do this using [Homebrew](https://brew.sh) with `brew install python3`. You can find instructions for Ubuntu [here](https://www.digitalocean.com/community/tutorials/how-to-install-python-3-and-set-up-a-local-programming-environment-on-ubuntu-16-04).
 43 | 
 44 | **Virtual environment:**
 45 | If you decide to work locally, we recommend using [virtual environment](http://docs.python-guide.org/en/latest/dev/virtualenvs/) for the project. If you choose not to use a virtual environment, it is up to you to make sure that all dependencies for the code are installed globally on your machine. To set up a virtual environment, run the following:
 46 | 
 47 | ```bash
 48 | cd assignment2
 49 | sudo pip install virtualenv      # This may already be installed
 50 | virtualenv -p python3 .env       # Create a virtual environment (python3)
 51 | source .env/bin/activate         # Activate the virtual environment
 52 | pip install -r requirements.txt  # Install dependencies
 53 | # Note that this does NOT install TensorFlow or PyTorch, 
 54 | # which you need to do yourself.
 55 | 
 56 | # Work on the assignment for a while ...
 57 | # ... and when you're done:
 58 | deactivate                       # Exit the virtual environment
 59 | ```
 60 | 
 61 | Note that every time you want to work on the assignment, you should run `source .env/bin/activate` (from within your `assignment2` folder) to re-activate the virtual environment, and `deactivate` again whenever you are done.
 62 | 
 63 | ## Working on the assignment:
 64 | Get the code as a zip file [here](http://cs231n.stanford.edu/assignments/2017/spring1617_assignment2.zip).
 65 | 
 66 | ### Download data:
 67 | Once you have the starter code (regardless of which method you choose above), you will need to download the CIFAR-10 dataset.
 68 | Run the following from the `assignment2` directory:
 69 | 
 70 | ```bash
 71 | cd cs231n/datasets
 72 | ./get_datasets.sh
 73 | ```
 74 | 
 75 | ### Start IPython:
 76 | After you have the CIFAR-10 data, you should start the IPython notebook server from the
 77 | `assignment2` directory, with the `jupyter notebook` command. (See the [Google Cloud Tutorial](http://cs231n.github.io/gce-tutorial/) for any additional steps you may need to do for setting this up, if you are working remotely)
 78 | 
 79 | If you are unfamiliar with IPython, you can also refer to our
 80 | [IPython tutorial](/ipython-tutorial).
 81 | 
 82 | ### Some Notes
 83 | **NOTE 1:** This year, the `assignment2` code has been tested to be compatible with python versions `3.5` and `3.6` (it may work with other versions of `3.x`, but we won't be officially supporting them). For this assignment, we are NOT officially supporting python2. Use it at your own risk. You will need to make sure that during your `virtualenv` setup that the correct version of `python` is used. You can confirm your python version by (1) activating your virtualenv and (2) running `which python`.
 84 | 
 85 | **NOTE 2:** If you are working in a virtual environment on OSX, you may *potentially* encounter
 86 | errors with matplotlib due to the [issues described here](http://matplotlib.org/faq/virtualenv_faq.html). In our testing, it seems that this issue is no longer present with the most recent version of matplotlib, but if you do end up running into this issue you may have to use the `start_ipython_osx.sh` script from the `assignment1` directory (instead of `jupyter notebook` above) to launch your IPython notebook server. Note that you may have to modify some variables within the script to match your version of python/installation directory. The script assumes that your virtual environment is named `.env`.
 87 | 
 88 | ### Submitting your work:
 89 | Whether you work on the assignment locally or using Google Cloud, once you are done
 90 | working run the `collectSubmission.sh` script; this will produce a file called
 91 | `assignment2.zip`. Please submit this file on [Canvas](https://canvas.stanford.edu/courses/66461/).
 92 | 
 93 | ### Q1: Fully-connected Neural Network (25 points)
 94 | The IPython notebook `FullyConnectedNets.ipynb` will introduce you to our
 95 | modular layer design, and then use those layers to implement fully-connected
 96 | networks of arbitrary depth. To optimize these models you will implement several
 97 | popular update rules.
 98 | 
 99 | ### Q2: Batch Normalization (25 points)
100 | In the IPython notebook `BatchNormalization.ipynb` you will implement batch
101 | normalization, and use it to train deep fully-connected networks.
102 | 
103 | ### Q3: Dropout (10 points)
104 | The IPython notebook `Dropout.ipynb` will help you implement Dropout and explore
105 | its effects on model generalization.
106 | 
107 | ### Q4: Convolutional Networks (30 points)
108 | In the IPython Notebook ConvolutionalNetworks.ipynb you will implement several new layers that are commonly used in convolutional networks.
109 | 
110 | ### Q5: PyTorch / Tensorflow on CIFAR-10 (10 points)
111 | For this last part, you will be working in either TensorFlow or PyTorch, two popular and powerful deep learning frameworks. **You only need to complete ONE of these two notebooks.** You do NOT need to do both, but a very small amount of extra credit will be awarded to those who do. 
112 | 
113 | Open up either `PyTorch.ipynb` or `TensorFlow.ipynb`. There, you will learn how the framework works, culminating in training a  convolutional network of your own design on CIFAR-10 to get the best performance you can.
114 | 
115 | ### Q5: Do something extra! (up to +10 points)
116 | In the process of training your network, you should feel free to implement
117 | anything that you want to get better performance. You can modify the solver,
118 | implement additional layers, use different types of regularization, use an
119 | ensemble of models, or anything else that comes to mind. If you implement these
120 | or other ideas not covered in the assignment then you will be awarded some bonus
121 | points.
122 | 


--------------------------------------------------------------------------------
/assignment1/cs231n/data_utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | from six.moves import cPickle as pickle
  4 | import numpy as np
  5 | import os
  6 | from scipy.misc import imread
  7 | import platform
  8 | 
  9 | def load_pickle(f):
 10 |     version = platform.python_version_tuple()
 11 |     if version[0] == '2':
 12 |         return  pickle.load(f)
 13 |     elif version[0] == '3':
 14 |         return  pickle.load(f, encoding='latin1')
 15 |     raise ValueError("invalid python version: {}".format(version))
 16 | 
 17 | def load_CIFAR_batch(filename):
 18 |   """ load single batch of cifar """
 19 |   with open(filename, 'rb') as f:
 20 |     datadict = load_pickle(f)
 21 |     X = datadict['data']
 22 |     Y = datadict['labels']
 23 |     X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float")
 24 |     Y = np.array(Y)
 25 |     return X, Y
 26 | 
 27 | def load_CIFAR10(ROOT):
 28 |   """ load all of cifar """
 29 |   xs = []
 30 |   ys = []
 31 |   for b in range(1,6):
 32 |     f = os.path.join(ROOT, 'data_batch_%d' % (b, ))
 33 |     X, Y = load_CIFAR_batch(f)
 34 |     xs.append(X)
 35 |     ys.append(Y)    
 36 |   Xtr = np.concatenate(xs)
 37 |   Ytr = np.concatenate(ys)
 38 |   del X, Y
 39 |   Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch'))
 40 |   return Xtr, Ytr, Xte, Yte
 41 | 
 42 | 
 43 | def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000,
 44 |                      subtract_mean=True):
 45 |     """
 46 |     Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
 47 |     it for classifiers. These are the same steps as we used for the SVM, but
 48 |     condensed to a single function.
 49 |     """
 50 |     # Load the raw CIFAR-10 data
 51 |     cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
 52 |     X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
 53 |         
 54 |     # Subsample the data
 55 |     mask = list(range(num_training, num_training + num_validation))
 56 |     X_val = X_train[mask]
 57 |     y_val = y_train[mask]
 58 |     mask = list(range(num_training))
 59 |     X_train = X_train[mask]
 60 |     y_train = y_train[mask]
 61 |     mask = list(range(num_test))
 62 |     X_test = X_test[mask]
 63 |     y_test = y_test[mask]
 64 | 
 65 |     # Normalize the data: subtract the mean image
 66 |     if subtract_mean:
 67 |       mean_image = np.mean(X_train, axis=0)
 68 |       X_train -= mean_image
 69 |       X_val -= mean_image
 70 |       X_test -= mean_image
 71 |     
 72 |     # Transpose so that channels come first
 73 |     X_train = X_train.transpose(0, 3, 1, 2).copy()
 74 |     X_val = X_val.transpose(0, 3, 1, 2).copy()
 75 |     X_test = X_test.transpose(0, 3, 1, 2).copy()
 76 | 
 77 |     # Package data into a dictionary
 78 |     return {
 79 |       'X_train': X_train, 'y_train': y_train,
 80 |       'X_val': X_val, 'y_val': y_val,
 81 |       'X_test': X_test, 'y_test': y_test,
 82 |     }
 83 |     
 84 | 
 85 | def load_tiny_imagenet(path, dtype=np.float32, subtract_mean=True):
 86 |   """
 87 |   Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and
 88 |   TinyImageNet-200 have the same directory structure, so this can be used
 89 |   to load any of them.
 90 | 
 91 |   Inputs:
 92 |   - path: String giving path to the directory to load.
 93 |   - dtype: numpy datatype used to load the data.
 94 |   - subtract_mean: Whether to subtract the mean training image.
 95 | 
 96 |   Returns: A dictionary with the following entries:
 97 |   - class_names: A list where class_names[i] is a list of strings giving the
 98 |     WordNet names for class i in the loaded dataset.
 99 |   - X_train: (N_tr, 3, 64, 64) array of training images
100 |   - y_train: (N_tr,) array of training labels
101 |   - X_val: (N_val, 3, 64, 64) array of validation images
102 |   - y_val: (N_val,) array of validation labels
103 |   - X_test: (N_test, 3, 64, 64) array of testing images.
104 |   - y_test: (N_test,) array of test labels; if test labels are not available
105 |     (such as in student code) then y_test will be None.
106 |   - mean_image: (3, 64, 64) array giving mean training image
107 |   """
108 |   # First load wnids
109 |   with open(os.path.join(path, 'wnids.txt'), 'r') as f:
110 |     wnids = [x.strip() for x in f]
111 | 
112 |   # Map wnids to integer labels
113 |   wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)}
114 | 
115 |   # Use words.txt to get names for each class
116 |   with open(os.path.join(path, 'words.txt'), 'r') as f:
117 |     wnid_to_words = dict(line.split('\t') for line in f)
118 |     for wnid, words in wnid_to_words.iteritems():
119 |       wnid_to_words[wnid] = [w.strip() for w in words.split(',')]
120 |   class_names = [wnid_to_words[wnid] for wnid in wnids]
121 | 
122 |   # Next load training data.
123 |   X_train = []
124 |   y_train = []
125 |   for i, wnid in enumerate(wnids):
126 |     if (i + 1) % 20 == 0:
127 |       print('loading training data for synset %d / %d' % (i + 1, len(wnids)))
128 |     # To figure out the filenames we need to open the boxes file
129 |     boxes_file = os.path.join(path, 'train', wnid, '%s_boxes.txt' % wnid)
130 |     with open(boxes_file, 'r') as f:
131 |       filenames = [x.split('\t')[0] for x in f]
132 |     num_images = len(filenames)
133 |     
134 |     X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype)
135 |     y_train_block = wnid_to_label[wnid] * np.ones(num_images, dtype=np.int64)
136 |     for j, img_file in enumerate(filenames):
137 |       img_file = os.path.join(path, 'train', wnid, 'images', img_file)
138 |       img = imread(img_file)
139 |       if img.ndim == 2:
140 |         ## grayscale file
141 |         img.shape = (64, 64, 1)
142 |       X_train_block[j] = img.transpose(2, 0, 1)
143 |     X_train.append(X_train_block)
144 |     y_train.append(y_train_block)
145 |       
146 |   # We need to concatenate all training data
147 |   X_train = np.concatenate(X_train, axis=0)
148 |   y_train = np.concatenate(y_train, axis=0)
149 |   
150 |   # Next load validation data
151 |   with open(os.path.join(path, 'val', 'val_annotations.txt'), 'r') as f:
152 |     img_files = []
153 |     val_wnids = []
154 |     for line in f:
155 |       img_file, wnid = line.split('\t')[:2]
156 |       img_files.append(img_file)
157 |       val_wnids.append(wnid)
158 |     num_val = len(img_files)
159 |     y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids])
160 |     X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype)
161 |     for i, img_file in enumerate(img_files):
162 |       img_file = os.path.join(path, 'val', 'images', img_file)
163 |       img = imread(img_file)
164 |       if img.ndim == 2:
165 |         img.shape = (64, 64, 1)
166 |       X_val[i] = img.transpose(2, 0, 1)
167 | 
168 |   # Next load test images
169 |   # Students won't have test labels, so we need to iterate over files in the
170 |   # images directory.
171 |   img_files = os.listdir(os.path.join(path, 'test', 'images'))
172 |   X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype)
173 |   for i, img_file in enumerate(img_files):
174 |     img_file = os.path.join(path, 'test', 'images', img_file)
175 |     img = imread(img_file)
176 |     if img.ndim == 2:
177 |       img.shape = (64, 64, 1)
178 |     X_test[i] = img.transpose(2, 0, 1)
179 | 
180 |   y_test = None
181 |   y_test_file = os.path.join(path, 'test', 'test_annotations.txt')
182 |   if os.path.isfile(y_test_file):
183 |     with open(y_test_file, 'r') as f:
184 |       img_file_to_wnid = {}
185 |       for line in f:
186 |         line = line.split('\t')
187 |         img_file_to_wnid[line[0]] = line[1]
188 |     y_test = [wnid_to_label[img_file_to_wnid[img_file]] for img_file in img_files]
189 |     y_test = np.array(y_test)
190 |   
191 |   mean_image = X_train.mean(axis=0)
192 |   if subtract_mean:
193 |     X_train -= mean_image[None]
194 |     X_val -= mean_image[None]
195 |     X_test -= mean_image[None]
196 | 
197 |   return {
198 |     'class_names': class_names,
199 |     'X_train': X_train,
200 |     'y_train': y_train,
201 |     'X_val': X_val,
202 |     'y_val': y_val,
203 |     'X_test': X_test,
204 |     'y_test': y_test,
205 |     'class_names': class_names,
206 |     'mean_image': mean_image,
207 |   }
208 | 
209 | 
210 | def load_models(models_dir):
211 |   """
212 |   Load saved models from disk. This will attempt to unpickle all files in a
213 |   directory; any files that give errors on unpickling (such as README.txt) will
214 |   be skipped.
215 | 
216 |   Inputs:
217 |   - models_dir: String giving the path to a directory containing model files.
218 |     Each model file is a pickled dictionary with a 'model' field.
219 | 
220 |   Returns:
221 |   A dictionary mapping model file names to models.
222 |   """
223 |   models = {}
224 |   for model_file in os.listdir(models_dir):
225 |     with open(os.path.join(models_dir, model_file), 'rb') as f:
226 |       try:
227 |         models[model_file] = load_pickle(f)['model']
228 |       except pickle.UnpicklingError:
229 |         continue
230 |   return models
231 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/captioning_solver.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | from builtins import range
  3 | from builtins import object
  4 | import numpy as np
  5 | 
  6 | from cs231n import optim
  7 | from cs231n.coco_utils import sample_coco_minibatch
  8 | 
  9 | 
 10 | class CaptioningSolver(object):
 11 |     """
 12 |     A CaptioningSolver encapsulates all the logic necessary for training
 13 |     image captioning models. The CaptioningSolver performs stochastic gradient
 14 |     descent using different update rules defined in optim.py.
 15 | 
 16 |     The solver accepts both training and validataion data and labels so it can
 17 |     periodically check classification accuracy on both training and validation
 18 |     data to watch out for overfitting.
 19 | 
 20 |     To train a model, you will first construct a CaptioningSolver instance,
 21 |     passing the model, dataset, and various options (learning rate, batch size,
 22 |     etc) to the constructor. You will then call the train() method to run the
 23 |     optimization procedure and train the model.
 24 | 
 25 |     After the train() method returns, model.params will contain the parameters
 26 |     that performed best on the validation set over the course of training.
 27 |     In addition, the instance variable solver.loss_history will contain a list
 28 |     of all losses encountered during training and the instance variables
 29 |     solver.train_acc_history and solver.val_acc_history will be lists containing
 30 |     the accuracies of the model on the training and validation set at each epoch.
 31 | 
 32 |     Example usage might look something like this:
 33 | 
 34 |     data = load_coco_data()
 35 |     model = MyAwesomeModel(hidden_dim=100)
 36 |     solver = CaptioningSolver(model, data,
 37 |                     update_rule='sgd',
 38 |                     optim_config={
 39 |                       'learning_rate': 1e-3,
 40 |                     },
 41 |                     lr_decay=0.95,
 42 |                     num_epochs=10, batch_size=100,
 43 |                     print_every=100)
 44 |     solver.train()
 45 | 
 46 | 
 47 |     A CaptioningSolver works on a model object that must conform to the following
 48 |     API:
 49 | 
 50 |     - model.params must be a dictionary mapping string parameter names to numpy
 51 |       arrays containing parameter values.
 52 | 
 53 |     - model.loss(features, captions) must be a function that computes
 54 |       training-time loss and gradients, with the following inputs and outputs:
 55 | 
 56 |       Inputs:
 57 |       - features: Array giving a minibatch of features for images, of shape (N, D
 58 |       - captions: Array of captions for those images, of shape (N, T) where
 59 |         each element is in the range (0, V].
 60 | 
 61 |       Returns:
 62 |       - loss: Scalar giving the loss
 63 |       - grads: Dictionary with the same keys as self.params mapping parameter
 64 |         names to gradients of the loss with respect to those parameters.
 65 |     """
 66 | 
 67 |     def __init__(self, model, data, **kwargs):
 68 |         """
 69 |         Construct a new CaptioningSolver instance.
 70 | 
 71 |         Required arguments:
 72 |         - model: A model object conforming to the API described above
 73 |         - data: A dictionary of training and validation data from load_coco_data
 74 | 
 75 |         Optional arguments:
 76 |         - update_rule: A string giving the name of an update rule in optim.py.
 77 |           Default is 'sgd'.
 78 |         - optim_config: A dictionary containing hyperparameters that will be
 79 |           passed to the chosen update rule. Each update rule requires different
 80 |           hyperparameters (see optim.py) but all update rules require a
 81 |           'learning_rate' parameter so that should always be present.
 82 |         - lr_decay: A scalar for learning rate decay; after each epoch the learning
 83 |           rate is multiplied by this value.
 84 |         - batch_size: Size of minibatches used to compute loss and gradient during
 85 |           training.
 86 |         - num_epochs: The number of epochs to run for during training.
 87 |         - print_every: Integer; training losses will be printed every print_every
 88 |           iterations.
 89 |         - verbose: Boolean; if set to false then no output will be printed during
 90 |           training.
 91 |         """
 92 |         self.model = model
 93 |         self.data = data
 94 | 
 95 |         # Unpack keyword arguments
 96 |         self.update_rule = kwargs.pop('update_rule', 'sgd')
 97 |         self.optim_config = kwargs.pop('optim_config', {})
 98 |         self.lr_decay = kwargs.pop('lr_decay', 1.0)
 99 |         self.batch_size = kwargs.pop('batch_size', 100)
100 |         self.num_epochs = kwargs.pop('num_epochs', 10)
101 | 
102 |         self.print_every = kwargs.pop('print_every', 10)
103 |         self.verbose = kwargs.pop('verbose', True)
104 | 
105 |         # Throw an error if there are extra keyword arguments
106 |         if len(kwargs) > 0:
107 |             extra = ', '.join('"%s"' % k for k in list(kwargs.keys()))
108 |             raise ValueError('Unrecognized arguments %s' % extra)
109 | 
110 |         # Make sure the update rule exists, then replace the string
111 |         # name with the actual function
112 |         if not hasattr(optim, self.update_rule):
113 |             raise ValueError('Invalid update_rule "%s"' % self.update_rule)
114 |         self.update_rule = getattr(optim, self.update_rule)
115 | 
116 |         self._reset()
117 | 
118 | 
119 |     def _reset(self):
120 |         """
121 |         Set up some book-keeping variables for optimization. Don't call this
122 |         manually.
123 |         """
124 |         # Set up some variables for book-keeping
125 |         self.epoch = 0
126 |         self.best_val_acc = 0
127 |         self.best_params = {}
128 |         self.loss_history = []
129 |         self.train_acc_history = []
130 |         self.val_acc_history = []
131 | 
132 |         # Make a deep copy of the optim_config for each parameter
133 |         self.optim_configs = {}
134 |         for p in self.model.params:
135 |             d = {k: v for k, v in self.optim_config.items()}
136 |             self.optim_configs[p] = d
137 | 
138 | 
139 |     def _step(self):
140 |         """
141 |         Make a single gradient update. This is called by train() and should not
142 |         be called manually.
143 |         """
144 |         # Make a minibatch of training data
145 |         minibatch = sample_coco_minibatch(self.data,
146 |                       batch_size=self.batch_size,
147 |                       split='train')
148 |         captions, features, urls = minibatch
149 | 
150 |         # Compute loss and gradient
151 |         loss, grads = self.model.loss(features, captions)
152 |         self.loss_history.append(loss)
153 | 
154 |         # Perform a parameter update
155 |         for p, w in self.model.params.items():
156 |             dw = grads[p]
157 |             config = self.optim_configs[p]
158 |             next_w, next_config = self.update_rule(w, dw, config)
159 |             self.model.params[p] = next_w
160 |             self.optim_configs[p] = next_config
161 | 
162 | 
163 |     # TODO: This does nothing right now; maybe implement BLEU?
164 |     def check_accuracy(self, X, y, num_samples=None, batch_size=100):
165 |         """
166 |         Check accuracy of the model on the provided data.
167 | 
168 |         Inputs:
169 |         - X: Array of data, of shape (N, d_1, ..., d_k)
170 |         - y: Array of labels, of shape (N,)
171 |         - num_samples: If not None, subsample the data and only test the model
172 |           on num_samples datapoints.
173 |         - batch_size: Split X and y into batches of this size to avoid using too
174 |           much memory.
175 | 
176 |         Returns:
177 |         - acc: Scalar giving the fraction of instances that were correctly
178 |           classified by the model.
179 |         """
180 |         return 0.0
181 | 
182 |         # Maybe subsample the data
183 |         N = X.shape[0]
184 |         if num_samples is not None and N > num_samples:
185 |             mask = np.random.choice(N, num_samples)
186 |             N = num_samples
187 |             X = X[mask]
188 |             y = y[mask]
189 | 
190 |         # Compute predictions in batches
191 |         num_batches = N / batch_size
192 |         if N % batch_size != 0:
193 |             num_batches += 1
194 |         y_pred = []
195 |         for i in range(num_batches):
196 |             start = i * batch_size
197 |             end = (i + 1) * batch_size
198 |             scores = self.model.loss(X[start:end])
199 |             y_pred.append(np.argmax(scores, axis=1))
200 |         y_pred = np.hstack(y_pred)
201 |         acc = np.mean(y_pred == y)
202 | 
203 |         return acc
204 | 
205 | 
206 |     def train(self):
207 |         """
208 |         Run optimization to train the model.
209 |         """
210 |         num_train = self.data['train_captions'].shape[0]
211 |         iterations_per_epoch = max(num_train // self.batch_size, 1)
212 |         num_iterations = self.num_epochs * iterations_per_epoch
213 | 
214 |         for t in range(num_iterations):
215 |             self._step()
216 | 
217 |             # Maybe print training loss
218 |             if self.verbose and t % self.print_every == 0:
219 |                 print('(Iteration %d / %d) loss: %f' % (
220 |                        t + 1, num_iterations, self.loss_history[-1]))
221 | 
222 |             # At the end of every epoch, increment the epoch counter and decay the
223 |             # learning rate.
224 |             epoch_end = (t + 1) % iterations_per_epoch == 0
225 |             if epoch_end:
226 |                 self.epoch += 1
227 |                 for k in self.optim_configs:
228 |                     self.optim_configs[k]['learning_rate'] *= self.lr_decay
229 | 
230 |             # Check train and val accuracy on the first iteration, the last
231 |             # iteration, and at the end of each epoch.
232 |             # TODO: Implement some logic to check Bleu on validation set periodically
233 | 
234 |         # At the end of training swap the best params into the model
235 |         # self.model.params = self.best_params
236 | 


--------------------------------------------------------------------------------
/assignment2/cs231n/data_utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | from builtins import range
  4 | from six.moves import cPickle as pickle
  5 | import numpy as np
  6 | import os
  7 | from scipy.misc import imread
  8 | import platform
  9 | 
 10 | def load_pickle(f):
 11 |     version = platform.python_version_tuple()
 12 |     if version[0] == '2':
 13 |         return  pickle.load(f)
 14 |     elif version[0] == '3':
 15 |         return  pickle.load(f, encoding='latin1')
 16 |     raise ValueError("invalid python version: {}".format(version))
 17 | 
 18 | def load_CIFAR_batch(filename):
 19 |     """ load single batch of cifar """
 20 |     with open(filename, 'rb') as f:
 21 |         datadict = load_pickle(f)
 22 |         X = datadict['data']
 23 |         Y = datadict['labels']
 24 |         X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float")
 25 |         Y = np.array(Y)
 26 |         return X, Y
 27 | 
 28 | def load_CIFAR10(ROOT):
 29 |     """ load all of cifar """
 30 |     xs = []
 31 |     ys = []
 32 |     for b in range(1,6):
 33 |         f = os.path.join(ROOT, 'data_batch_%d' % (b, ))
 34 |         X, Y = load_CIFAR_batch(f)
 35 |         xs.append(X)
 36 |         ys.append(Y)
 37 |     Xtr = np.concatenate(xs)
 38 |     Ytr = np.concatenate(ys)
 39 |     del X, Y
 40 |     Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch'))
 41 |     return Xtr, Ytr, Xte, Yte
 42 | 
 43 | 
 44 | def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000,
 45 |                      subtract_mean=True):
 46 |     """
 47 |     Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
 48 |     it for classifiers. These are the same steps as we used for the SVM, but
 49 |     condensed to a single function.
 50 |     """
 51 |     # Load the raw CIFAR-10 data
 52 |     cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
 53 |     X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
 54 | 
 55 |     # Subsample the data
 56 |     mask = list(range(num_training, num_training + num_validation))
 57 |     X_val = X_train[mask]
 58 |     y_val = y_train[mask]
 59 |     mask = list(range(num_training))
 60 |     X_train = X_train[mask]
 61 |     y_train = y_train[mask]
 62 |     mask = list(range(num_test))
 63 |     X_test = X_test[mask]
 64 |     y_test = y_test[mask]
 65 | 
 66 |     # Normalize the data: subtract the mean image
 67 |     if subtract_mean:
 68 |         mean_image = np.mean(X_train, axis=0)
 69 |         X_train -= mean_image
 70 |         X_val -= mean_image
 71 |         X_test -= mean_image
 72 | 
 73 |     # Transpose so that channels come first
 74 |     X_train = X_train.transpose(0, 3, 1, 2).copy()
 75 |     X_val = X_val.transpose(0, 3, 1, 2).copy()
 76 |     X_test = X_test.transpose(0, 3, 1, 2).copy()
 77 | 
 78 |     # Package data into a dictionary
 79 |     return {
 80 |       'X_train': X_train, 'y_train': y_train,
 81 |       'X_val': X_val, 'y_val': y_val,
 82 |       'X_test': X_test, 'y_test': y_test,
 83 |     }
 84 | 
 85 | 
 86 | def load_tiny_imagenet(path, dtype=np.float32, subtract_mean=True):
 87 |     """
 88 |     Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and
 89 |     TinyImageNet-200 have the same directory structure, so this can be used
 90 |     to load any of them.
 91 | 
 92 |     Inputs:
 93 |     - path: String giving path to the directory to load.
 94 |     - dtype: numpy datatype used to load the data.
 95 |     - subtract_mean: Whether to subtract the mean training image.
 96 | 
 97 |     Returns: A dictionary with the following entries:
 98 |     - class_names: A list where class_names[i] is a list of strings giving the
 99 |       WordNet names for class i in the loaded dataset.
100 |     - X_train: (N_tr, 3, 64, 64) array of training images
101 |     - y_train: (N_tr,) array of training labels
102 |     - X_val: (N_val, 3, 64, 64) array of validation images
103 |     - y_val: (N_val,) array of validation labels
104 |     - X_test: (N_test, 3, 64, 64) array of testing images.
105 |     - y_test: (N_test,) array of test labels; if test labels are not available
106 |       (such as in student code) then y_test will be None.
107 |     - mean_image: (3, 64, 64) array giving mean training image
108 |     """
109 |     # First load wnids
110 |     with open(os.path.join(path, 'wnids.txt'), 'r') as f:
111 |         wnids = [x.strip() for x in f]
112 | 
113 |     # Map wnids to integer labels
114 |     wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)}
115 | 
116 |     # Use words.txt to get names for each class
117 |     with open(os.path.join(path, 'words.txt'), 'r') as f:
118 |         wnid_to_words = dict(line.split('\t') for line in f)
119 |         for wnid, words in wnid_to_words.items():
120 |             wnid_to_words[wnid] = [w.strip() for w in words.split(',')]
121 |     class_names = [wnid_to_words[wnid] for wnid in wnids]
122 | 
123 |     # Next load training data.
124 |     X_train = []
125 |     y_train = []
126 |     for i, wnid in enumerate(wnids):
127 |         if (i + 1) % 20 == 0:
128 |             print('loading training data for synset %d / %d'
129 |                   % (i + 1, len(wnids)))
130 |         # To figure out the filenames we need to open the boxes file
131 |         boxes_file = os.path.join(path, 'train', wnid, '%s_boxes.txt' % wnid)
132 |         with open(boxes_file, 'r') as f:
133 |             filenames = [x.split('\t')[0] for x in f]
134 |         num_images = len(filenames)
135 | 
136 |         X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype)
137 |         y_train_block = wnid_to_label[wnid] * \
138 |                         np.ones(num_images, dtype=np.int64)
139 |         for j, img_file in enumerate(filenames):
140 |             img_file = os.path.join(path, 'train', wnid, 'images', img_file)
141 |             img = imread(img_file)
142 |             if img.ndim == 2:
143 |         ## grayscale file
144 |                 img.shape = (64, 64, 1)
145 |             X_train_block[j] = img.transpose(2, 0, 1)
146 |         X_train.append(X_train_block)
147 |         y_train.append(y_train_block)
148 | 
149 |     # We need to concatenate all training data
150 |     X_train = np.concatenate(X_train, axis=0)
151 |     y_train = np.concatenate(y_train, axis=0)
152 | 
153 |     # Next load validation data
154 |     with open(os.path.join(path, 'val', 'val_annotations.txt'), 'r') as f:
155 |         img_files = []
156 |         val_wnids = []
157 |         for line in f:
158 |             img_file, wnid = line.split('\t')[:2]
159 |             img_files.append(img_file)
160 |             val_wnids.append(wnid)
161 |         num_val = len(img_files)
162 |         y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids])
163 |         X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype)
164 |         for i, img_file in enumerate(img_files):
165 |             img_file = os.path.join(path, 'val', 'images', img_file)
166 |             img = imread(img_file)
167 |             if img.ndim == 2:
168 |                 img.shape = (64, 64, 1)
169 |             X_val[i] = img.transpose(2, 0, 1)
170 | 
171 |     # Next load test images
172 |     # Students won't have test labels, so we need to iterate over files in the
173 |     # images directory.
174 |     img_files = os.listdir(os.path.join(path, 'test', 'images'))
175 |     X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype)
176 |     for i, img_file in enumerate(img_files):
177 |         img_file = os.path.join(path, 'test', 'images', img_file)
178 |         img = imread(img_file)
179 |         if img.ndim == 2:
180 |             img.shape = (64, 64, 1)
181 |         X_test[i] = img.transpose(2, 0, 1)
182 | 
183 |     y_test = None
184 |     y_test_file = os.path.join(path, 'test', 'test_annotations.txt')
185 |     if os.path.isfile(y_test_file):
186 |         with open(y_test_file, 'r') as f:
187 |             img_file_to_wnid = {}
188 |             for line in f:
189 |                 line = line.split('\t')
190 |                 img_file_to_wnid[line[0]] = line[1]
191 |         y_test = [wnid_to_label[img_file_to_wnid[img_file]]
192 |                   for img_file in img_files]
193 |         y_test = np.array(y_test)
194 | 
195 |     mean_image = X_train.mean(axis=0)
196 |     if subtract_mean:
197 |         X_train -= mean_image[None]
198 |         X_val -= mean_image[None]
199 |         X_test -= mean_image[None]
200 | 
201 |     return {
202 |       'class_names': class_names,
203 |       'X_train': X_train,
204 |       'y_train': y_train,
205 |       'X_val': X_val,
206 |       'y_val': y_val,
207 |       'X_test': X_test,
208 |       'y_test': y_test,
209 |       'class_names': class_names,
210 |       'mean_image': mean_image,
211 |     }
212 | 
213 | 
214 | def load_models(models_dir):
215 |     """
216 |     Load saved models from disk. This will attempt to unpickle all files in a
217 |     directory; any files that give errors on unpickling (such as README.txt)
218 |     will be skipped.
219 | 
220 |     Inputs:
221 |     - models_dir: String giving the path to a directory containing model files.
222 |       Each model file is a pickled dictionary with a 'model' field.
223 | 
224 |     Returns:
225 |     A dictionary mapping model file names to models.
226 |     """
227 |     models = {}
228 |     for model_file in os.listdir(models_dir):
229 |         with open(os.path.join(models_dir, model_file), 'rb') as f:
230 |             try:
231 |                 models[model_file] = load_pickle(f)['model']
232 |             except pickle.UnpicklingError:
233 |                 continue
234 |     return models
235 | 
236 | 
237 | def load_imagenet_val(num=None):
238 |     """Load a handful of validation images from ImageNet.
239 | 
240 |     Inputs:
241 |     - num: Number of images to load (max of 25)
242 | 
243 |     Returns:
244 |     - X: numpy array with shape [num, 224, 224, 3]
245 |     - y: numpy array of integer image labels, shape [num]
246 |     - class_names: dict mapping integer label to class name
247 |     """
248 |     imagenet_fn = 'cs231n/datasets/imagenet_val_25.npz'
249 |     if not os.path.isfile(imagenet_fn):
250 |       print('file %s not found' % imagenet_fn)
251 |       print('Run the following:')
252 |       print('cd cs231n/datasets')
253 |       print('bash get_imagenet_val.sh')
254 |       assert False, 'Need to download imagenet_val_25.npz'
255 |     f = np.load(imagenet_fn)
256 |     X = f['X']
257 |     y = f['y']
258 |     class_names = f['label_map'].item()
259 |     if num is not None:
260 |         X = X[:num]
261 |         y = y[:num]
262 |     return X, y, class_names
263 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/data_utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | from builtins import range
  4 | from six.moves import cPickle as pickle
  5 | import numpy as np
  6 | import os
  7 | from scipy.misc import imread
  8 | import platform
  9 | 
 10 | def load_pickle(f):
 11 |     version = platform.python_version_tuple()
 12 |     if version[0] == '2':
 13 |         return  pickle.load(f)
 14 |     elif version[0] == '3':
 15 |         return  pickle.load(f, encoding='latin1')
 16 |     raise ValueError("invalid python version: {}".format(version))
 17 | 
 18 | def load_CIFAR_batch(filename):
 19 |     """ load single batch of cifar """
 20 |     with open(filename, 'rb') as f:
 21 |         datadict = load_pickle(f)
 22 |         X = datadict['data']
 23 |         Y = datadict['labels']
 24 |         X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float")
 25 |         Y = np.array(Y)
 26 |         return X, Y
 27 | 
 28 | def load_CIFAR10(ROOT):
 29 |     """ load all of cifar """
 30 |     xs = []
 31 |     ys = []
 32 |     for b in range(1,6):
 33 |         f = os.path.join(ROOT, 'data_batch_%d' % (b, ))
 34 |         X, Y = load_CIFAR_batch(f)
 35 |         xs.append(X)
 36 |         ys.append(Y)
 37 |     Xtr = np.concatenate(xs)
 38 |     Ytr = np.concatenate(ys)
 39 |     del X, Y
 40 |     Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch'))
 41 |     return Xtr, Ytr, Xte, Yte
 42 | 
 43 | 
 44 | def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000,
 45 |                      subtract_mean=True):
 46 |     """
 47 |     Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
 48 |     it for classifiers. These are the same steps as we used for the SVM, but
 49 |     condensed to a single function.
 50 |     """
 51 |     # Load the raw CIFAR-10 data
 52 |     cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
 53 |     X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
 54 | 
 55 |     # Subsample the data
 56 |     mask = list(range(num_training, num_training + num_validation))
 57 |     X_val = X_train[mask]
 58 |     y_val = y_train[mask]
 59 |     mask = list(range(num_training))
 60 |     X_train = X_train[mask]
 61 |     y_train = y_train[mask]
 62 |     mask = list(range(num_test))
 63 |     X_test = X_test[mask]
 64 |     y_test = y_test[mask]
 65 | 
 66 |     # Normalize the data: subtract the mean image
 67 |     if subtract_mean:
 68 |         mean_image = np.mean(X_train, axis=0)
 69 |         X_train -= mean_image
 70 |         X_val -= mean_image
 71 |         X_test -= mean_image
 72 | 
 73 |     # Transpose so that channels come first
 74 |     X_train = X_train.transpose(0, 3, 1, 2).copy()
 75 |     X_val = X_val.transpose(0, 3, 1, 2).copy()
 76 |     X_test = X_test.transpose(0, 3, 1, 2).copy()
 77 | 
 78 |     # Package data into a dictionary
 79 |     return {
 80 |       'X_train': X_train, 'y_train': y_train,
 81 |       'X_val': X_val, 'y_val': y_val,
 82 |       'X_test': X_test, 'y_test': y_test,
 83 |     }
 84 | 
 85 | 
 86 | def load_tiny_imagenet(path, dtype=np.float32, subtract_mean=True):
 87 |     """
 88 |     Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and
 89 |     TinyImageNet-200 have the same directory structure, so this can be used
 90 |     to load any of them.
 91 | 
 92 |     Inputs:
 93 |     - path: String giving path to the directory to load.
 94 |     - dtype: numpy datatype used to load the data.
 95 |     - subtract_mean: Whether to subtract the mean training image.
 96 | 
 97 |     Returns: A dictionary with the following entries:
 98 |     - class_names: A list where class_names[i] is a list of strings giving the
 99 |       WordNet names for class i in the loaded dataset.
100 |     - X_train: (N_tr, 3, 64, 64) array of training images
101 |     - y_train: (N_tr,) array of training labels
102 |     - X_val: (N_val, 3, 64, 64) array of validation images
103 |     - y_val: (N_val,) array of validation labels
104 |     - X_test: (N_test, 3, 64, 64) array of testing images.
105 |     - y_test: (N_test,) array of test labels; if test labels are not available
106 |       (such as in student code) then y_test will be None.
107 |     - mean_image: (3, 64, 64) array giving mean training image
108 |     """
109 |     # First load wnids
110 |     with open(os.path.join(path, 'wnids.txt'), 'r') as f:
111 |         wnids = [x.strip() for x in f]
112 | 
113 |     # Map wnids to integer labels
114 |     wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)}
115 | 
116 |     # Use words.txt to get names for each class
117 |     with open(os.path.join(path, 'words.txt'), 'r') as f:
118 |         wnid_to_words = dict(line.split('\t') for line in f)
119 |         for wnid, words in wnid_to_words.items():
120 |             wnid_to_words[wnid] = [w.strip() for w in words.split(',')]
121 |     class_names = [wnid_to_words[wnid] for wnid in wnids]
122 | 
123 |     # Next load training data.
124 |     X_train = []
125 |     y_train = []
126 |     for i, wnid in enumerate(wnids):
127 |         if (i + 1) % 20 == 0:
128 |             print('loading training data for synset %d / %d'
129 |                   % (i + 1, len(wnids)))
130 |         # To figure out the filenames we need to open the boxes file
131 |         boxes_file = os.path.join(path, 'train', wnid, '%s_boxes.txt' % wnid)
132 |         with open(boxes_file, 'r') as f:
133 |             filenames = [x.split('\t')[0] for x in f]
134 |         num_images = len(filenames)
135 | 
136 |         X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype)
137 |         y_train_block = wnid_to_label[wnid] * \
138 |                         np.ones(num_images, dtype=np.int64)
139 |         for j, img_file in enumerate(filenames):
140 |             img_file = os.path.join(path, 'train', wnid, 'images', img_file)
141 |             img = imread(img_file)
142 |             if img.ndim == 2:
143 |         ## grayscale file
144 |                 img.shape = (64, 64, 1)
145 |             X_train_block[j] = img.transpose(2, 0, 1)
146 |         X_train.append(X_train_block)
147 |         y_train.append(y_train_block)
148 | 
149 |     # We need to concatenate all training data
150 |     X_train = np.concatenate(X_train, axis=0)
151 |     y_train = np.concatenate(y_train, axis=0)
152 | 
153 |     # Next load validation data
154 |     with open(os.path.join(path, 'val', 'val_annotations.txt'), 'r') as f:
155 |         img_files = []
156 |         val_wnids = []
157 |         for line in f:
158 |             img_file, wnid = line.split('\t')[:2]
159 |             img_files.append(img_file)
160 |             val_wnids.append(wnid)
161 |         num_val = len(img_files)
162 |         y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids])
163 |         X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype)
164 |         for i, img_file in enumerate(img_files):
165 |             img_file = os.path.join(path, 'val', 'images', img_file)
166 |             img = imread(img_file)
167 |             if img.ndim == 2:
168 |                 img.shape = (64, 64, 1)
169 |             X_val[i] = img.transpose(2, 0, 1)
170 | 
171 |     # Next load test images
172 |     # Students won't have test labels, so we need to iterate over files in the
173 |     # images directory.
174 |     img_files = os.listdir(os.path.join(path, 'test', 'images'))
175 |     X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype)
176 |     for i, img_file in enumerate(img_files):
177 |         img_file = os.path.join(path, 'test', 'images', img_file)
178 |         img = imread(img_file)
179 |         if img.ndim == 2:
180 |             img.shape = (64, 64, 1)
181 |         X_test[i] = img.transpose(2, 0, 1)
182 | 
183 |     y_test = None
184 |     y_test_file = os.path.join(path, 'test', 'test_annotations.txt')
185 |     if os.path.isfile(y_test_file):
186 |         with open(y_test_file, 'r') as f:
187 |             img_file_to_wnid = {}
188 |             for line in f:
189 |                 line = line.split('\t')
190 |                 img_file_to_wnid[line[0]] = line[1]
191 |         y_test = [wnid_to_label[img_file_to_wnid[img_file]]
192 |                   for img_file in img_files]
193 |         y_test = np.array(y_test)
194 | 
195 |     mean_image = X_train.mean(axis=0)
196 |     if subtract_mean:
197 |         X_train -= mean_image[None]
198 |         X_val -= mean_image[None]
199 |         X_test -= mean_image[None]
200 | 
201 |     return {
202 |       'class_names': class_names,
203 |       'X_train': X_train,
204 |       'y_train': y_train,
205 |       'X_val': X_val,
206 |       'y_val': y_val,
207 |       'X_test': X_test,
208 |       'y_test': y_test,
209 |       'class_names': class_names,
210 |       'mean_image': mean_image,
211 |     }
212 | 
213 | 
214 | def load_models(models_dir):
215 |     """
216 |     Load saved models from disk. This will attempt to unpickle all files in a
217 |     directory; any files that give errors on unpickling (such as README.txt)
218 |     will be skipped.
219 | 
220 |     Inputs:
221 |     - models_dir: String giving the path to a directory containing model files.
222 |       Each model file is a pickled dictionary with a 'model' field.
223 | 
224 |     Returns:
225 |     A dictionary mapping model file names to models.
226 |     """
227 |     models = {}
228 |     for model_file in os.listdir(models_dir):
229 |         with open(os.path.join(models_dir, model_file), 'rb') as f:
230 |             try:
231 |                 models[model_file] = load_pickle(f)['model']
232 |             except pickle.UnpicklingError:
233 |                 continue
234 |     return models
235 | 
236 | 
237 | def load_imagenet_val(num=None):
238 |     """Load a handful of validation images from ImageNet.
239 | 
240 |     Inputs:
241 |     - num: Number of images to load (max of 25)
242 | 
243 |     Returns:
244 |     - X: numpy array with shape [num, 224, 224, 3]
245 |     - y: numpy array of integer image labels, shape [num]
246 |     - class_names: dict mapping integer label to class name
247 |     """
248 |     imagenet_fn = 'cs231n/datasets/imagenet_val_25.npz'
249 |     if not os.path.isfile(imagenet_fn):
250 |       print('file %s not found' % imagenet_fn)
251 |       print('Run the following:')
252 |       print('cd cs231n/datasets')
253 |       print('bash get_imagenet_val.sh')
254 |       assert False, 'Need to download imagenet_val_25.npz'
255 |     f = np.load(imagenet_fn)
256 |     X = f['X']
257 |     y = f['y']
258 |     class_names = f['label_map'].item()
259 |     if num is not None:
260 |         X = X[:num]
261 |         y = y[:num]
262 |     return X, y, class_names
263 | 


--------------------------------------------------------------------------------
/assignment1/cs231n/classifiers/k_nearest_neighbor.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | #from past.builtins import xrange
  3 | 
  4 | 
  5 | class KNearestNeighbor(object):
  6 |     """ a kNN classifier with L2 distance """
  7 | 
  8 |     def __init__(self):
  9 |         pass
 10 | 
 11 |     def train(self, X, y):
 12 |         """
 13 |         Train the classifier. For k-nearest neighbors this is just 
 14 |         memorizing the training data.
 15 | 
 16 |         Inputs:
 17 |         - X: A numpy array of shape (num_train, D) containing the training data
 18 |         consisting of num_train samples each of dimension D.
 19 |         - y: A numpy array of shape (N,) containing the training labels, where
 20 |             y[i] is the label for X[i].
 21 |         """
 22 |         self.X_train = X
 23 |         self.y_train = y
 24 | 
 25 |     def predict(self, X, k=1, num_loops=0):
 26 |         """
 27 |         Predict labels for test data using this classifier.
 28 | 
 29 |         Inputs:
 30 |         - X: A numpy array of shape (num_test, D) containing test data consisting
 31 |             of num_test samples each of dimension D.
 32 |         - k: The number of nearest neighbors that vote for the predicted labels.
 33 |         - num_loops: Determines which implementation to use to compute distances
 34 |         between training points and testing points.
 35 | 
 36 |         Returns:
 37 |         - y: A numpy array of shape (num_test,) containing predicted labels for the
 38 |         test data, where y[i] is the predicted label for the test point X[i].  
 39 |         """
 40 |         if num_loops == 0:
 41 |             dists = self.compute_distances_no_loops(X)
 42 |         elif num_loops == 1:
 43 |             dists = self.compute_distances_one_loop(X)
 44 |         elif num_loops == 2:
 45 |             dists = self.compute_distances_two_loops(X)
 46 |         else:
 47 |             raise ValueError('Invalid value %d for num_loops' % num_loops)
 48 | 
 49 |         return self.predict_labels(dists, k=k)
 50 | 
 51 |     def compute_distances_two_loops(self, X):
 52 |         """
 53 |         Compute the distance between each test point in X and each training point
 54 |         in self.X_train using a nested loop over both the training data and the 
 55 |         test data.
 56 | 
 57 |         Inputs:
 58 |         - X: A numpy array of shape (num_test, D) containing test data.
 59 | 
 60 |         Returns:
 61 |         - dists: A numpy array of shape (num_test, num_train) where dists[i, j]
 62 |         is the Euclidean distance between the ith test point and the jth training
 63 |         point.
 64 |         """
 65 |         num_test = X.shape[0]
 66 |         num_train = self.X_train.shape[0]
 67 |         dists = np.zeros((num_test, num_train))
 68 |         for i in xrange(num_test):
 69 |             for j in xrange(num_train):
 70 |                 #####################################################################
 71 |                 # TODO:                                                             #
 72 |                 # Compute the l2 distance between the ith test point and the jth    #
 73 |                 # training point, and store the result in dists[i, j]. You should   #
 74 |                 # not use a loop over dimension.                                    #
 75 |                 #####################################################################
 76 | #                diff = X[i,:]-self.X_train[j,:]
 77 | #                diff = diff.dot(diff)
 78 | #                dists[i,j]=np.sqrt(diff) 
 79 |                 dists[i][j] = np.sqrt(np.sum(np.square(self.X_train[j,:] - X[i,:])))
 80 |                 #####################################################################
 81 |                 #                       END OF YOUR CODE                            #
 82 |                 #####################################################################
 83 | 
 84 |         return dists
 85 | 
 86 |     def compute_distances_one_loop(self, X):
 87 |         """
 88 |         Compute the distance between each test point in X and each training point
 89 |         in self.X_train using a single loop over the test data.
 90 | 
 91 |         Input / Output: Same as compute_distances_two_loops
 92 |         """
 93 |         num_test = X.shape[0]
 94 |         num_train = self.X_train.shape[0]
 95 |         dists = np.zeros((num_test, num_train))
 96 |         for i in xrange(num_test):
 97 |             #######################################################################
 98 |             # TODO:                                                               #
 99 |             # Compute the l2 distance between the ith test point and all training #
100 |             # points, and store the result in dists[i, :].                        #
101 |             #######################################################################
102 |             dists[i] = np.sqrt(np.sum(np.square(X[i,:]-self.X_train),axis=1))
103 |             #######################################################################
104 |             #                         END OF YOUR CODE                            #
105 |             #######################################################################
106 |         return dists
107 | 
108 |     def compute_distances_no_loops(self, X):
109 |         """
110 |         Compute the distance between each test point in X and each training point
111 |         in self.X_train using no explicit loops.
112 | 
113 |         Input / Output: Same as compute_distances_two_loops
114 |         """
115 |         num_test = X.shape[0]
116 |         num_train = self.X_train.shape[0]
117 |         dists = np.zeros((num_test, num_train))
118 |         #########################################################################
119 |         # TODO:                                                                 #
120 |         # Compute the l2 distance between all test points and all training      #
121 |         # points without using any explicit loops, and store the result in      #
122 |         # dists.                                                                #
123 |         #                                                                       #
124 |         # You should implement this function using only basic array operations; #
125 |         # in particular you should not use functions from scipy.                #
126 |         #                                                                       #
127 |         # HINT: Try to formulate the l2 distance using matrix multiplication    #
128 |         #       and two broadcast sums.                                         #
129 |         #########################################################################
130 |         dists = np.multiply(np.dot(X,self.X_train.T),-2) 
131 |         sq1 = np.sum(np.square(X),axis=1,keepdims = True) 
132 |         sq2 = np.sum(np.square(self.X_train),axis=1) 
133 |         dists = np.add(dists,sq1) 
134 |         dists = np.add(dists,sq2) 
135 |         dists = np.sqrt(dists)
136 |         #########################################################################
137 |         #                         END OF YOUR CODE                              #
138 |         #########################################################################
139 |         return dists
140 | 
141 |     def predict_labels(self, dists, k=1):
142 |         """
143 |         Given a matrix of distances between test points and training points,
144 |         predict a label for each test point.
145 | 
146 |         Inputs:
147 |         - dists: A numpy array of shape (num_test, num_train) where dists[i, j]
148 |         gives the distance betwen the ith test point and the jth training point.
149 | 
150 |         Returns:
151 |         - y: A numpy array of shape (num_test,) containing predicted labels for the
152 |         test data, where y[i] is the predicted label for the test point X[i].  
153 |         """
154 |         num_test = dists.shape[0]
155 |         y_pred = np.zeros(num_test)
156 |         for i in xrange(num_test):
157 |             # A list of length k storing the labels of the k nearest neighbors to
158 |             # the ith test point.
159 |             closest_y = []
160 |             #########################################################################
161 |             # TODO:                                                                 #
162 |             # Use the distance matrix to find the k nearest neighbors of the ith    #
163 |             # testing point, and use self.y_train to find the labels of these       #
164 |             # neighbors. Store these labels in closest_y.                           #
165 |             # Hint: Look up the function numpy.argsort.                             #
166 |             #########################################################################
167 |             closest_y = self.y_train[np.argsort(dists[i,:])[:k]]
168 |             #########################################################################
169 |             # TODO:                                                                 #
170 |             # Now that you have found the labels of the k nearest neighbors, you    #
171 |             # need to find the most common label in the list closest_y of labels.   #
172 |             # Store this label in y_pred[i]. Break ties by choosing the smaller     #
173 |             # label.                                                                #
174 |             #########################################################################            
175 |             y_pred[i] = np.argmax(np.bincount(closest_y))      
176 | #            classCount= {}
177 | #            flag = 0
178 | #            for j in range(k):
179 | #                classCount[closest_y[j]] = classCount.get(closest_y[j],0) + 1
180 | #                # indicate has a key more than once
181 | #                if classCount[closest_y[j]] >= 2:
182 | #                    flag = 1
183 | #                    
184 | #            if flag == 1:
185 | #                sortedclassCount = sorted(classCount.iteritems(),key=lambda item:item[1],reverse=True)
186 | #                y_pred[i] = sortedclassCount[0][0]
187 | #            else:
188 | #                sortedclassCount = sorted(classCount.iteritems(),key=lambda item:item[0], reverse=False)
189 | #                y_pred[i]=sortedclassCount[0][0]
190 |               
191 |             #########################################################################
192 |             #                           END OF YOUR CODE                            #
193 |             #########################################################################
194 | 
195 |         return y_pred
196 | 


--------------------------------------------------------------------------------
/assignment2/cs231n/fast_layers.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import numpy as np
  3 | try:
  4 |     from cs231n.im2col_cython import col2im_cython, im2col_cython
  5 |     from cs231n.im2col_cython import col2im_6d_cython
  6 | except ImportError:
  7 |     print('run the following from the cs231n directory and try again:')
  8 |     print('python setup.py build_ext --inplace')
  9 |     print('You may also need to restart your iPython kernel')
 10 | 
 11 | from cs231n.im2col import *
 12 | 
 13 | 
 14 | def conv_forward_im2col(x, w, b, conv_param):
 15 |     """
 16 |     A fast implementation of the forward pass for a convolutional layer
 17 |     based on im2col and col2im.
 18 |     """
 19 |     N, C, H, W = x.shape
 20 |     num_filters, _, filter_height, filter_width = w.shape
 21 |     stride, pad = conv_param['stride'], conv_param['pad']
 22 | 
 23 |     # Check dimensions
 24 |     assert (W + 2 * pad - filter_width) % stride == 0, 'width does not work'
 25 |     assert (H + 2 * pad - filter_height) % stride == 0, 'height does not work'
 26 | 
 27 |     # Create output
 28 |     out_height = (H + 2 * pad - filter_height) // stride + 1
 29 |     out_width = (W + 2 * pad - filter_width) // stride + 1
 30 |     out = np.zeros((N, num_filters, out_height, out_width), dtype=x.dtype)
 31 | 
 32 |     # x_cols = im2col_indices(x, w.shape[2], w.shape[3], pad, stride)
 33 |     x_cols = im2col_cython(x, w.shape[2], w.shape[3], pad, stride)
 34 |     res = w.reshape((w.shape[0], -1)).dot(x_cols) + b.reshape(-1, 1)
 35 | 
 36 |     out = res.reshape(w.shape[0], out.shape[2], out.shape[3], x.shape[0])
 37 |     out = out.transpose(3, 0, 1, 2)
 38 | 
 39 |     cache = (x, w, b, conv_param, x_cols)
 40 |     return out, cache
 41 | 
 42 | 
 43 | def conv_forward_strides(x, w, b, conv_param):
 44 |     N, C, H, W = x.shape
 45 |     F, _, HH, WW = w.shape
 46 |     stride, pad = conv_param['stride'], conv_param['pad']
 47 | 
 48 |     # Check dimensions
 49 |     #assert (W + 2 * pad - WW) % stride == 0, 'width does not work'
 50 |     #assert (H + 2 * pad - HH) % stride == 0, 'height does not work'
 51 | 
 52 |     # Pad the input
 53 |     p = pad
 54 |     x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant')
 55 | 
 56 |     # Figure out output dimensions
 57 |     H += 2 * pad
 58 |     W += 2 * pad
 59 |     out_h = (H - HH) // stride + 1
 60 |     out_w = (W - WW) // stride + 1
 61 | 
 62 |     # Perform an im2col operation by picking clever strides
 63 |     shape = (C, HH, WW, N, out_h, out_w)
 64 |     strides = (H * W, W, 1, C * H * W, stride * W, stride)
 65 |     strides = x.itemsize * np.array(strides)
 66 |     x_stride = np.lib.stride_tricks.as_strided(x_padded,
 67 |                   shape=shape, strides=strides)
 68 |     x_cols = np.ascontiguousarray(x_stride)
 69 |     x_cols.shape = (C * HH * WW, N * out_h * out_w)
 70 | 
 71 |     # Now all our convolutions are a big matrix multiply
 72 |     res = w.reshape(F, -1).dot(x_cols) + b.reshape(-1, 1)
 73 | 
 74 |     # Reshape the output
 75 |     res.shape = (F, N, out_h, out_w)
 76 |     out = res.transpose(1, 0, 2, 3)
 77 | 
 78 |     # Be nice and return a contiguous array
 79 |     # The old version of conv_forward_fast doesn't do this, so for a fair
 80 |     # comparison we won't either
 81 |     out = np.ascontiguousarray(out)
 82 | 
 83 |     cache = (x, w, b, conv_param, x_cols)
 84 |     return out, cache
 85 | 
 86 | 
 87 | def conv_backward_strides(dout, cache):
 88 |     x, w, b, conv_param, x_cols = cache
 89 |     stride, pad = conv_param['stride'], conv_param['pad']
 90 | 
 91 |     N, C, H, W = x.shape
 92 |     F, _, HH, WW = w.shape
 93 |     _, _, out_h, out_w = dout.shape
 94 | 
 95 |     db = np.sum(dout, axis=(0, 2, 3))
 96 | 
 97 |     dout_reshaped = dout.transpose(1, 0, 2, 3).reshape(F, -1)
 98 |     dw = dout_reshaped.dot(x_cols.T).reshape(w.shape)
 99 | 
100 |     dx_cols = w.reshape(F, -1).T.dot(dout_reshaped)
101 |     dx_cols.shape = (C, HH, WW, N, out_h, out_w)
102 |     dx = col2im_6d_cython(dx_cols, N, C, H, W, HH, WW, pad, stride)
103 | 
104 |     return dx, dw, db
105 | 
106 | 
107 | def conv_backward_im2col(dout, cache):
108 |     """
109 |     A fast implementation of the backward pass for a convolutional layer
110 |     based on im2col and col2im.
111 |     """
112 |     x, w, b, conv_param, x_cols = cache
113 |     stride, pad = conv_param['stride'], conv_param['pad']
114 | 
115 |     db = np.sum(dout, axis=(0, 2, 3))
116 | 
117 |     num_filters, _, filter_height, filter_width = w.shape
118 |     dout_reshaped = dout.transpose(1, 2, 3, 0).reshape(num_filters, -1)
119 |     dw = dout_reshaped.dot(x_cols.T).reshape(w.shape)
120 | 
121 |     dx_cols = w.reshape(num_filters, -1).T.dot(dout_reshaped)
122 |     # dx = col2im_indices(dx_cols, x.shape, filter_height, filter_width, pad, stride)
123 |     dx = col2im_cython(dx_cols, x.shape[0], x.shape[1], x.shape[2], x.shape[3],
124 |                        filter_height, filter_width, pad, stride)
125 | 
126 |     return dx, dw, db
127 | 
128 | 
129 | conv_forward_fast = conv_forward_strides
130 | conv_backward_fast = conv_backward_strides
131 | 
132 | 
133 | def max_pool_forward_fast(x, pool_param):
134 |     """
135 |     A fast implementation of the forward pass for a max pooling layer.
136 | 
137 |     This chooses between the reshape method and the im2col method. If the pooling
138 |     regions are square and tile the input image, then we can use the reshape
139 |     method which is very fast. Otherwise we fall back on the im2col method, which
140 |     is not much faster than the naive method.
141 |     """
142 |     N, C, H, W = x.shape
143 |     pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width']
144 |     stride = pool_param['stride']
145 | 
146 |     same_size = pool_height == pool_width == stride
147 |     tiles = H % pool_height == 0 and W % pool_width == 0
148 |     if same_size and tiles:
149 |         out, reshape_cache = max_pool_forward_reshape(x, pool_param)
150 |         cache = ('reshape', reshape_cache)
151 |     else:
152 |         out, im2col_cache = max_pool_forward_im2col(x, pool_param)
153 |         cache = ('im2col', im2col_cache)
154 |     return out, cache
155 | 
156 | 
157 | def max_pool_backward_fast(dout, cache):
158 |     """
159 |     A fast implementation of the backward pass for a max pooling layer.
160 | 
161 |     This switches between the reshape method an the im2col method depending on
162 |     which method was used to generate the cache.
163 |     """
164 |     method, real_cache = cache
165 |     if method == 'reshape':
166 |         return max_pool_backward_reshape(dout, real_cache)
167 |     elif method == 'im2col':
168 |         return max_pool_backward_im2col(dout, real_cache)
169 |     else:
170 |         raise ValueError('Unrecognized method "%s"' % method)
171 | 
172 | 
173 | def max_pool_forward_reshape(x, pool_param):
174 |     """
175 |     A fast implementation of the forward pass for the max pooling layer that uses
176 |     some clever reshaping.
177 | 
178 |     This can only be used for square pooling regions that tile the input.
179 |     """
180 |     N, C, H, W = x.shape
181 |     pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width']
182 |     stride = pool_param['stride']
183 |     assert pool_height == pool_width == stride, 'Invalid pool params'
184 |     assert H % pool_height == 0
185 |     assert W % pool_height == 0
186 |     x_reshaped = x.reshape(N, C, H // pool_height, pool_height,
187 |                            W // pool_width, pool_width)
188 |     out = x_reshaped.max(axis=3).max(axis=4)
189 | 
190 |     cache = (x, x_reshaped, out)
191 |     return out, cache
192 | 
193 | 
194 | def max_pool_backward_reshape(dout, cache):
195 |     """
196 |     A fast implementation of the backward pass for the max pooling layer that
197 |     uses some clever broadcasting and reshaping.
198 | 
199 |     This can only be used if the forward pass was computed using
200 |     max_pool_forward_reshape.
201 | 
202 |     NOTE: If there are multiple argmaxes, this method will assign gradient to
203 |     ALL argmax elements of the input rather than picking one. In this case the
204 |     gradient will actually be incorrect. However this is unlikely to occur in
205 |     practice, so it shouldn't matter much. One possible solution is to split the
206 |     upstream gradient equally among all argmax elements; this should result in a
207 |     valid subgradient. You can make this happen by uncommenting the line below;
208 |     however this results in a significant performance penalty (about 40% slower)
209 |     and is unlikely to matter in practice so we don't do it.
210 |     """
211 |     x, x_reshaped, out = cache
212 | 
213 |     dx_reshaped = np.zeros_like(x_reshaped)
214 |     out_newaxis = out[:, :, :, np.newaxis, :, np.newaxis]
215 |     mask = (x_reshaped == out_newaxis)
216 |     dout_newaxis = dout[:, :, :, np.newaxis, :, np.newaxis]
217 |     dout_broadcast, _ = np.broadcast_arrays(dout_newaxis, dx_reshaped)
218 |     dx_reshaped[mask] = dout_broadcast[mask]
219 |     dx_reshaped /= np.sum(mask, axis=(3, 5), keepdims=True)
220 |     dx = dx_reshaped.reshape(x.shape)
221 | 
222 |     return dx
223 | 
224 | 
225 | def max_pool_forward_im2col(x, pool_param):
226 |     """
227 |     An implementation of the forward pass for max pooling based on im2col.
228 | 
229 |     This isn't much faster than the naive version, so it should be avoided if
230 |     possible.
231 |     """
232 |     N, C, H, W = x.shape
233 |     pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width']
234 |     stride = pool_param['stride']
235 | 
236 |     assert (H - pool_height) % stride == 0, 'Invalid height'
237 |     assert (W - pool_width) % stride == 0, 'Invalid width'
238 | 
239 |     out_height = (H - pool_height) // stride + 1
240 |     out_width = (W - pool_width) // stride + 1
241 | 
242 |     x_split = x.reshape(N * C, 1, H, W)
243 |     x_cols = im2col(x_split, pool_height, pool_width, padding=0, stride=stride)
244 |     x_cols_argmax = np.argmax(x_cols, axis=0)
245 |     x_cols_max = x_cols[x_cols_argmax, np.arange(x_cols.shape[1])]
246 |     out = x_cols_max.reshape(out_height, out_width, N, C).transpose(2, 3, 0, 1)
247 | 
248 |     cache = (x, x_cols, x_cols_argmax, pool_param)
249 |     return out, cache
250 | 
251 | 
252 | def max_pool_backward_im2col(dout, cache):
253 |     """
254 |     An implementation of the backward pass for max pooling based on im2col.
255 | 
256 |     This isn't much faster than the naive version, so it should be avoided if
257 |     possible.
258 |     """
259 |     x, x_cols, x_cols_argmax, pool_param = cache
260 |     N, C, H, W = x.shape
261 |     pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width']
262 |     stride = pool_param['stride']
263 | 
264 |     dout_reshaped = dout.transpose(2, 3, 0, 1).flatten()
265 |     dx_cols = np.zeros_like(x_cols)
266 |     dx_cols[x_cols_argmax, np.arange(dx_cols.shape[1])] = dout_reshaped
267 |     dx = col2im_indices(dx_cols, (N * C, 1, H, W), pool_height, pool_width,
268 |                 padding=0, stride=stride)
269 |     dx = dx.reshape(x.shape)
270 | 
271 |     return dx
272 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/fast_layers.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import numpy as np
  3 | try:
  4 |     from cs231n.im2col_cython import col2im_cython, im2col_cython
  5 |     from cs231n.im2col_cython import col2im_6d_cython
  6 | except ImportError:
  7 |     print('run the following from the cs231n directory and try again:')
  8 |     print('python setup.py build_ext --inplace')
  9 |     print('You may also need to restart your iPython kernel')
 10 | 
 11 | from cs231n.im2col import *
 12 | 
 13 | 
 14 | def conv_forward_im2col(x, w, b, conv_param):
 15 |     """
 16 |     A fast implementation of the forward pass for a convolutional layer
 17 |     based on im2col and col2im.
 18 |     """
 19 |     N, C, H, W = x.shape
 20 |     num_filters, _, filter_height, filter_width = w.shape
 21 |     stride, pad = conv_param['stride'], conv_param['pad']
 22 | 
 23 |     # Check dimensions
 24 |     assert (W + 2 * pad - filter_width) % stride == 0, 'width does not work'
 25 |     assert (H + 2 * pad - filter_height) % stride == 0, 'height does not work'
 26 | 
 27 |     # Create output
 28 |     out_height = (H + 2 * pad - filter_height) // stride + 1
 29 |     out_width = (W + 2 * pad - filter_width) // stride + 1
 30 |     out = np.zeros((N, num_filters, out_height, out_width), dtype=x.dtype)
 31 | 
 32 |     # x_cols = im2col_indices(x, w.shape[2], w.shape[3], pad, stride)
 33 |     x_cols = im2col_cython(x, w.shape[2], w.shape[3], pad, stride)
 34 |     res = w.reshape((w.shape[0], -1)).dot(x_cols) + b.reshape(-1, 1)
 35 | 
 36 |     out = res.reshape(w.shape[0], out.shape[2], out.shape[3], x.shape[0])
 37 |     out = out.transpose(3, 0, 1, 2)
 38 | 
 39 |     cache = (x, w, b, conv_param, x_cols)
 40 |     return out, cache
 41 | 
 42 | 
 43 | def conv_forward_strides(x, w, b, conv_param):
 44 |     N, C, H, W = x.shape
 45 |     F, _, HH, WW = w.shape
 46 |     stride, pad = conv_param['stride'], conv_param['pad']
 47 | 
 48 |     # Check dimensions
 49 |     #assert (W + 2 * pad - WW) % stride == 0, 'width does not work'
 50 |     #assert (H + 2 * pad - HH) % stride == 0, 'height does not work'
 51 | 
 52 |     # Pad the input
 53 |     p = pad
 54 |     x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant')
 55 | 
 56 |     # Figure out output dimensions
 57 |     H += 2 * pad
 58 |     W += 2 * pad
 59 |     out_h = (H - HH) // stride + 1
 60 |     out_w = (W - WW) // stride + 1
 61 | 
 62 |     # Perform an im2col operation by picking clever strides
 63 |     shape = (C, HH, WW, N, out_h, out_w)
 64 |     strides = (H * W, W, 1, C * H * W, stride * W, stride)
 65 |     strides = x.itemsize * np.array(strides)
 66 |     x_stride = np.lib.stride_tricks.as_strided(x_padded,
 67 |                   shape=shape, strides=strides)
 68 |     x_cols = np.ascontiguousarray(x_stride)
 69 |     x_cols.shape = (C * HH * WW, N * out_h * out_w)
 70 | 
 71 |     # Now all our convolutions are a big matrix multiply
 72 |     res = w.reshape(F, -1).dot(x_cols) + b.reshape(-1, 1)
 73 | 
 74 |     # Reshape the output
 75 |     res.shape = (F, N, out_h, out_w)
 76 |     out = res.transpose(1, 0, 2, 3)
 77 | 
 78 |     # Be nice and return a contiguous array
 79 |     # The old version of conv_forward_fast doesn't do this, so for a fair
 80 |     # comparison we won't either
 81 |     out = np.ascontiguousarray(out)
 82 | 
 83 |     cache = (x, w, b, conv_param, x_cols)
 84 |     return out, cache
 85 | 
 86 | 
 87 | def conv_backward_strides(dout, cache):
 88 |     x, w, b, conv_param, x_cols = cache
 89 |     stride, pad = conv_param['stride'], conv_param['pad']
 90 | 
 91 |     N, C, H, W = x.shape
 92 |     F, _, HH, WW = w.shape
 93 |     _, _, out_h, out_w = dout.shape
 94 | 
 95 |     db = np.sum(dout, axis=(0, 2, 3))
 96 | 
 97 |     dout_reshaped = dout.transpose(1, 0, 2, 3).reshape(F, -1)
 98 |     dw = dout_reshaped.dot(x_cols.T).reshape(w.shape)
 99 | 
100 |     dx_cols = w.reshape(F, -1).T.dot(dout_reshaped)
101 |     dx_cols.shape = (C, HH, WW, N, out_h, out_w)
102 |     dx = col2im_6d_cython(dx_cols, N, C, H, W, HH, WW, pad, stride)
103 | 
104 |     return dx, dw, db
105 | 
106 | 
107 | def conv_backward_im2col(dout, cache):
108 |     """
109 |     A fast implementation of the backward pass for a convolutional layer
110 |     based on im2col and col2im.
111 |     """
112 |     x, w, b, conv_param, x_cols = cache
113 |     stride, pad = conv_param['stride'], conv_param['pad']
114 | 
115 |     db = np.sum(dout, axis=(0, 2, 3))
116 | 
117 |     num_filters, _, filter_height, filter_width = w.shape
118 |     dout_reshaped = dout.transpose(1, 2, 3, 0).reshape(num_filters, -1)
119 |     dw = dout_reshaped.dot(x_cols.T).reshape(w.shape)
120 | 
121 |     dx_cols = w.reshape(num_filters, -1).T.dot(dout_reshaped)
122 |     # dx = col2im_indices(dx_cols, x.shape, filter_height, filter_width, pad, stride)
123 |     dx = col2im_cython(dx_cols, x.shape[0], x.shape[1], x.shape[2], x.shape[3],
124 |                        filter_height, filter_width, pad, stride)
125 | 
126 |     return dx, dw, db
127 | 
128 | 
129 | conv_forward_fast = conv_forward_strides
130 | conv_backward_fast = conv_backward_strides
131 | 
132 | 
133 | def max_pool_forward_fast(x, pool_param):
134 |     """
135 |     A fast implementation of the forward pass for a max pooling layer.
136 | 
137 |     This chooses between the reshape method and the im2col method. If the pooling
138 |     regions are square and tile the input image, then we can use the reshape
139 |     method which is very fast. Otherwise we fall back on the im2col method, which
140 |     is not much faster than the naive method.
141 |     """
142 |     N, C, H, W = x.shape
143 |     pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width']
144 |     stride = pool_param['stride']
145 | 
146 |     same_size = pool_height == pool_width == stride
147 |     tiles = H % pool_height == 0 and W % pool_width == 0
148 |     if same_size and tiles:
149 |         out, reshape_cache = max_pool_forward_reshape(x, pool_param)
150 |         cache = ('reshape', reshape_cache)
151 |     else:
152 |         out, im2col_cache = max_pool_forward_im2col(x, pool_param)
153 |         cache = ('im2col', im2col_cache)
154 |     return out, cache
155 | 
156 | 
157 | def max_pool_backward_fast(dout, cache):
158 |     """
159 |     A fast implementation of the backward pass for a max pooling layer.
160 | 
161 |     This switches between the reshape method an the im2col method depending on
162 |     which method was used to generate the cache.
163 |     """
164 |     method, real_cache = cache
165 |     if method == 'reshape':
166 |         return max_pool_backward_reshape(dout, real_cache)
167 |     elif method == 'im2col':
168 |         return max_pool_backward_im2col(dout, real_cache)
169 |     else:
170 |         raise ValueError('Unrecognized method "%s"' % method)
171 | 
172 | 
173 | def max_pool_forward_reshape(x, pool_param):
174 |     """
175 |     A fast implementation of the forward pass for the max pooling layer that uses
176 |     some clever reshaping.
177 | 
178 |     This can only be used for square pooling regions that tile the input.
179 |     """
180 |     N, C, H, W = x.shape
181 |     pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width']
182 |     stride = pool_param['stride']
183 |     assert pool_height == pool_width == stride, 'Invalid pool params'
184 |     assert H % pool_height == 0
185 |     assert W % pool_height == 0
186 |     x_reshaped = x.reshape(N, C, H // pool_height, pool_height,
187 |                            W // pool_width, pool_width)
188 |     out = x_reshaped.max(axis=3).max(axis=4)
189 | 
190 |     cache = (x, x_reshaped, out)
191 |     return out, cache
192 | 
193 | 
194 | def max_pool_backward_reshape(dout, cache):
195 |     """
196 |     A fast implementation of the backward pass for the max pooling layer that
197 |     uses some clever broadcasting and reshaping.
198 | 
199 |     This can only be used if the forward pass was computed using
200 |     max_pool_forward_reshape.
201 | 
202 |     NOTE: If there are multiple argmaxes, this method will assign gradient to
203 |     ALL argmax elements of the input rather than picking one. In this case the
204 |     gradient will actually be incorrect. However this is unlikely to occur in
205 |     practice, so it shouldn't matter much. One possible solution is to split the
206 |     upstream gradient equally among all argmax elements; this should result in a
207 |     valid subgradient. You can make this happen by uncommenting the line below;
208 |     however this results in a significant performance penalty (about 40% slower)
209 |     and is unlikely to matter in practice so we don't do it.
210 |     """
211 |     x, x_reshaped, out = cache
212 | 
213 |     dx_reshaped = np.zeros_like(x_reshaped)
214 |     out_newaxis = out[:, :, :, np.newaxis, :, np.newaxis]
215 |     mask = (x_reshaped == out_newaxis)
216 |     dout_newaxis = dout[:, :, :, np.newaxis, :, np.newaxis]
217 |     dout_broadcast, _ = np.broadcast_arrays(dout_newaxis, dx_reshaped)
218 |     dx_reshaped[mask] = dout_broadcast[mask]
219 |     dx_reshaped /= np.sum(mask, axis=(3, 5), keepdims=True)
220 |     dx = dx_reshaped.reshape(x.shape)
221 | 
222 |     return dx
223 | 
224 | 
225 | def max_pool_forward_im2col(x, pool_param):
226 |     """
227 |     An implementation of the forward pass for max pooling based on im2col.
228 | 
229 |     This isn't much faster than the naive version, so it should be avoided if
230 |     possible.
231 |     """
232 |     N, C, H, W = x.shape
233 |     pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width']
234 |     stride = pool_param['stride']
235 | 
236 |     assert (H - pool_height) % stride == 0, 'Invalid height'
237 |     assert (W - pool_width) % stride == 0, 'Invalid width'
238 | 
239 |     out_height = (H - pool_height) // stride + 1
240 |     out_width = (W - pool_width) // stride + 1
241 | 
242 |     x_split = x.reshape(N * C, 1, H, W)
243 |     x_cols = im2col(x_split, pool_height, pool_width, padding=0, stride=stride)
244 |     x_cols_argmax = np.argmax(x_cols, axis=0)
245 |     x_cols_max = x_cols[x_cols_argmax, np.arange(x_cols.shape[1])]
246 |     out = x_cols_max.reshape(out_height, out_width, N, C).transpose(2, 3, 0, 1)
247 | 
248 |     cache = (x, x_cols, x_cols_argmax, pool_param)
249 |     return out, cache
250 | 
251 | 
252 | def max_pool_backward_im2col(dout, cache):
253 |     """
254 |     An implementation of the backward pass for max pooling based on im2col.
255 | 
256 |     This isn't much faster than the naive version, so it should be avoided if
257 |     possible.
258 |     """
259 |     x, x_cols, x_cols_argmax, pool_param = cache
260 |     N, C, H, W = x.shape
261 |     pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width']
262 |     stride = pool_param['stride']
263 | 
264 |     dout_reshaped = dout.transpose(2, 3, 0, 1).flatten()
265 |     dx_cols = np.zeros_like(x_cols)
266 |     dx_cols[x_cols_argmax, np.arange(dx_cols.shape[1])] = dout_reshaped
267 |     dx = col2im_indices(dx_cols, (N * C, 1, H, W), pool_height, pool_width,
268 |                 padding=0, stride=stride)
269 |     dx = dx.reshape(x.shape)
270 | 
271 |     return dx
272 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/layers.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | def affine_forward(x, w, b):
  5 |     """
  6 |     Computes the forward pass for an affine (fully-connected) layer.
  7 | 
  8 |     The input x has shape (N, d_1, ..., d_k) where x[i] is the ith input.
  9 |     We multiply this against a weight matrix of shape (D, M) where
 10 |     D = \prod_i d_i
 11 | 
 12 |     Inputs:
 13 |     x - Input data, of shape (N, d_1, ..., d_k)
 14 |     w - Weights, of shape (D, M)
 15 |     b - Biases, of shape (M,)
 16 | 
 17 |     Returns a tuple of:
 18 |     - out: output, of shape (N, M)
 19 |     - cache: (x, w, b)
 20 |     """
 21 |     out = x.reshape(x.shape[0], -1).dot(w) + b
 22 |     cache = (x, w, b)
 23 |     return out, cache
 24 | 
 25 | 
 26 | def affine_backward(dout, cache):
 27 |     """
 28 |     Computes the backward pass for an affine layer.
 29 | 
 30 |     Inputs:
 31 |     - dout: Upstream derivative, of shape (N, M)
 32 |     - cache: Tuple of:
 33 |       - x: Input data, of shape (N, d_1, ... d_k)
 34 |       - w: Weights, of shape (D, M)
 35 | 
 36 |     Returns a tuple of:
 37 |     - dx: Gradient with respect to x, of shape (N, d1, ..., d_k)
 38 |     - dw: Gradient with respect to w, of shape (D, M)
 39 |     - db: Gradient with respect to b, of shape (M,)
 40 |     """
 41 |     x, w, b = cache
 42 |     dx = dout.dot(w.T).reshape(x.shape)
 43 |     dw = x.reshape(x.shape[0], -1).T.dot(dout)
 44 |     db = np.sum(dout, axis=0)
 45 |     return dx, dw, db
 46 | 
 47 | 
 48 | def relu_forward(x):
 49 |     """
 50 |     Computes the forward pass for a layer of rectified linear units (ReLUs).
 51 | 
 52 |     Input:
 53 |     - x: Inputs, of any shape
 54 | 
 55 |     Returns a tuple of:
 56 |     - out: Output, of the same shape as x
 57 |     - cache: x
 58 |     """
 59 |     out = np.maximum(0, x)
 60 |     cache = x
 61 |     return out, cache
 62 | 
 63 | 
 64 | def relu_backward(dout, cache):
 65 |     """
 66 |     Computes the backward pass for a layer of rectified linear units (ReLUs).
 67 | 
 68 |     Input:
 69 |     - dout: Upstream derivatives, of any shape
 70 |     - cache: Input x, of same shape as dout
 71 | 
 72 |     Returns:
 73 |     - dx: Gradient with respect to x
 74 |     """
 75 |     x = cache
 76 |     dx = np.where(x > 0, dout, 0)
 77 |     return dx
 78 | 
 79 | 
 80 | def batchnorm_forward(x, gamma, beta, bn_param):
 81 |     """
 82 |     Forward pass for batch normalization.
 83 | 
 84 |     During training the sample mean and (uncorrected) sample variance are
 85 |     computed from minibatch statistics and used to normalize the incoming data.
 86 |     During training we also keep an exponentially decaying running mean of the mean
 87 |     and variance of each feature, and these averages are used to normalize data
 88 |     at test-time.
 89 | 
 90 |     At each timestep we update the running averages for mean and variance using
 91 |     an exponential decay based on the momentum parameter:
 92 | 
 93 |     running_mean = momentum * running_mean + (1 - momentum) * sample_mean
 94 |     running_var = momentum * running_var + (1 - momentum) * sample_var
 95 | 
 96 |     Note that the batch normalization paper suggests a different test-time
 97 |     behavior: they compute sample mean and variance for each feature using a
 98 |     large number of training images rather than using a running average. For
 99 |     this implementation we have chosen to use running averages instead since
100 |     they do not require an additional estimation step; the torch7 implementation
101 |     of batch normalization also uses running averages.
102 | 
103 |     Input:
104 |     - x: Data of shape (N, D)
105 |     - gamma: Scale parameter of shape (D,)
106 |     - beta: Shift paremeter of shape (D,)
107 |     - bn_param: Dictionary with the following keys:
108 |       - mode: 'train' or 'test'; required
109 |       - eps: Constant for numeric stability
110 |       - momentum: Constant for running mean / variance.
111 |       - running_mean: Array of shape (D,) giving running mean of features
112 |       - running_var Array of shape (D,) giving running variance of features
113 | 
114 |     Returns a tuple of:
115 |     - out: of shape (N, D)
116 |     - cache: A tuple of values needed in the backward pass
117 |     """
118 |     mode = bn_param['mode']
119 |     eps = bn_param.get('eps', 1e-5)
120 |     momentum = bn_param.get('momentum', 0.9)
121 | 
122 |     N, D = x.shape
123 |     running_mean = bn_param.get('running_mean', np.zeros(D, dtype=x.dtype))
124 |     running_var = bn_param.get('running_var', np.zeros(D, dtype=x.dtype))
125 | 
126 |     out, cache = None, None
127 |     if mode == 'train':
128 |         # Compute output
129 |         mu = x.mean(axis=0)
130 |         xc = x - mu
131 |         var = np.mean(xc ** 2, axis=0)
132 |         std = np.sqrt(var + eps)
133 |         xn = xc / std
134 |         out = gamma * xn + beta
135 | 
136 |         cache = (mode, x, gamma, xc, std, xn, out)
137 | 
138 |         # Update running average of mean
139 |         running_mean *= momentum
140 |         running_mean += (1 - momentum) * mu
141 | 
142 |         # Update running average of variance
143 |         running_var *= momentum
144 |         running_var += (1 - momentum) * var
145 |     elif mode == 'test':
146 |         # Using running mean and variance to normalize
147 |         std = np.sqrt(running_var + eps)
148 |         xn = (x - running_mean) / std
149 |         out = gamma * xn + beta
150 |         cache = (mode, x, xn, gamma, beta, std)
151 |     else:
152 |         raise ValueError('Invalid forward batchnorm mode "%s"' % mode)
153 | 
154 |     # Store the updated running means back into bn_param
155 |     bn_param['running_mean'] = running_mean
156 |     bn_param['running_var'] = running_var
157 | 
158 |     return out, cache
159 | 
160 | 
161 | def batchnorm_backward(dout, cache):
162 |     """
163 |     Backward pass for batch normalization.
164 | 
165 |     For this implementation, you should write out a computation graph for
166 |     batch normalization on paper and propagate gradients backward through
167 |     intermediate nodes.
168 | 
169 |     Inputs:
170 |     - dout: Upstream derivatives, of shape (N, D)
171 |     - cache: Variable of intermediates from batchnorm_forward.
172 | 
173 |     Returns a tuple of:
174 |     - dx: Gradient with respect to inputs x, of shape (N, D)
175 |     - dgamma: Gradient with respect to scale parameter gamma, of shape (D,)
176 |     - dbeta: Gradient with respect to shift parameter beta, of shape (D,)
177 |     """
178 |     mode = cache[0]
179 |     if mode == 'train':
180 |         mode, x, gamma, xc, std, xn, out = cache
181 | 
182 |         N = x.shape[0]
183 |         dbeta = dout.sum(axis=0)
184 |         dgamma = np.sum(xn * dout, axis=0)
185 |         dxn = gamma * dout
186 |         dxc = dxn / std
187 |         dstd = -np.sum((dxn * xc) / (std * std), axis=0)
188 |         dvar = 0.5 * dstd / std
189 |         dxc += (2.0 / N) * xc * dvar
190 |         dmu = np.sum(dxc, axis=0)
191 |         dx = dxc - dmu / N
192 |     elif mode == 'test':
193 |         mode, x, xn, gamma, beta, std = cache
194 |         dbeta = dout.sum(axis=0)
195 |         dgamma = np.sum(xn * dout, axis=0)
196 |         dxn = gamma * dout
197 |         dx = dxn / std
198 |     else:
199 |         raise ValueError(mode)
200 | 
201 |     return dx, dgamma, dbeta
202 | 
203 | 
204 | def spatial_batchnorm_forward(x, gamma, beta, bn_param):
205 |     """
206 |     Computes the forward pass for spatial batch normalization.
207 | 
208 |     Inputs:
209 |     - x: Input data of shape (N, C, H, W)
210 |     - gamma: Scale parameter, of shape (C,)
211 |     - beta: Shift parameter, of shape (C,)
212 |     - bn_param: Dictionary with the following keys:
213 |       - mode: 'train' or 'test'; required
214 |       - eps: Constant for numeric stability
215 |       - momentum: Constant for running mean / variance. momentum=0 means that
216 |         old information is discarded completely at every time step, while
217 |         momentum=1 means that new information is never incorporated. The
218 |         default of momentum=0.9 should work well in most situations.
219 |       - running_mean: Array of shape (D,) giving running mean of features
220 |       - running_var Array of shape (D,) giving running variance of features
221 | 
222 |     Returns a tuple of:
223 |     - out: Output data, of shape (N, C, H, W)
224 |     - cache: Values needed for the backward pass
225 |     """
226 |     N, C, H, W = x.shape
227 |     x_flat = x.transpose(0, 2, 3, 1).reshape(-1, C)
228 |     out_flat, cache = batchnorm_forward(x_flat, gamma, beta, bn_param)
229 |     out = out_flat.reshape(N, H, W, C).transpose(0, 3, 1, 2)
230 |     return out, cache
231 | 
232 | 
233 | def spatial_batchnorm_backward(dout, cache):
234 |     """
235 |     Computes the backward pass for spatial batch normalization.
236 | 
237 |     Inputs:
238 |     - dout: Upstream derivatives, of shape (N, C, H, W)
239 |     - cache: Values from the forward pass
240 | 
241 |     Returns a tuple of:
242 |     - dx: Gradient with respect to inputs, of shape (N, C, H, W)
243 |     - dgamma: Gradient with respect to scale parameter, of shape (C,)
244 |     - dbeta: Gradient with respect to shift parameter, of shape (C,)
245 |     """
246 |     N, C, H, W = dout.shape
247 |     dout_flat = dout.transpose(0, 2, 3, 1).reshape(-1, C)
248 |     dx_flat, dgamma, dbeta = batchnorm_backward(dout_flat, cache)
249 |     dx = dx_flat.reshape(N, H, W, C).transpose(0, 3, 1, 2)
250 |     return dx, dgamma, dbeta
251 | 
252 | 
253 | def svm_loss(x, y):
254 |     """
255 |     Computes the loss and gradient using for multiclass SVM classification.
256 | 
257 |     Inputs:
258 |     - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class
259 |       for the ith input.
260 |     - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and
261 |       0 <= y[i] < C
262 | 
263 |     Returns a tuple of:
264 |     - loss: Scalar giving the loss
265 |     - dx: Gradient of the loss with respect to x
266 |     """
267 |     N = x.shape[0]
268 |     correct_class_scores = x[np.arange(N), y]
269 |     margins = np.maximum(0, x - correct_class_scores[:, np.newaxis] + 1.0)
270 |     margins[np.arange(N), y] = 0
271 |     loss = np.sum(margins) / N
272 |     num_pos = np.sum(margins > 0, axis=1)
273 |     dx = np.zeros_like(x)
274 |     dx[margins > 0] = 1
275 |     dx[np.arange(N), y] -= num_pos
276 |     dx /= N
277 |     return loss, dx
278 | 
279 | 
280 | def softmax_loss(x, y):
281 |     """
282 |     Computes the loss and gradient for softmax classification.
283 | 
284 |     Inputs:
285 |     - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class
286 |       for the ith input.
287 |     - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and
288 |       0 <= y[i] < C
289 | 
290 |     Returns a tuple of:
291 |     - loss: Scalar giving the loss
292 |     - dx: Gradient of the loss with respect to x
293 |     """
294 |     probs = np.exp(x - np.max(x, axis=1, keepdims=True))
295 |     probs /= np.sum(probs, axis=1, keepdims=True)
296 |     N = x.shape[0]
297 |     loss = -np.sum(np.log(probs[np.arange(N), y])) / N
298 |     dx = probs.copy()
299 |     dx[np.arange(N), y] -= 1
300 |     dx /= N
301 |     return loss, dx
302 | 


--------------------------------------------------------------------------------
/assignment1/softmax.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Softmax exercise
  3 | 
  4 | *Complete and hand in this completed worksheet (including its outputs and any supporting code outside of the worksheet) with your assignment submission. For more details see the [assignments page](http://vision.stanford.edu/teaching/cs231n/assignments.html) on the course website.*
  5 | 
  6 | This exercise is analogous to the SVM exercise. You will:
  7 | 
  8 | - implement a fully-vectorized **loss function** for the Softmax classifier
  9 | - implement the fully-vectorized expression for its **analytic gradient**
 10 | - **check your implementation** with numerical gradient
 11 | - use a validation set to **tune the learning rate and regularization** strength
 12 | - **optimize** the loss function with **SGD**
 13 | - **visualize** the final learned weights
 14 | 
 15 | 
 16 | 
 17 | ```python
 18 | import random
 19 | import numpy as np
 20 | from cs231n.data_utils import load_CIFAR10
 21 | import matplotlib.pyplot as plt
 22 | 
 23 | from __future__ import print_function
 24 | 
 25 | %matplotlib inline
 26 | plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
 27 | plt.rcParams['image.interpolation'] = 'nearest'
 28 | plt.rcParams['image.cmap'] = 'gray'
 29 | 
 30 | # for auto-reloading extenrnal modules
 31 | # see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
 32 | %load_ext autoreload
 33 | %autoreload 2
 34 | ```
 35 | 
 36 | 
 37 | ```python
 38 | def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, num_dev=500):
 39 |     """
 40 |     Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
 41 |     it for the linear classifier. These are the same steps as we used for the
 42 |     SVM, but condensed to a single function.  
 43 |     """
 44 |     # Load the raw CIFAR-10 data
 45 |     cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
 46 |     X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
 47 |     
 48 |     # subsample the data
 49 |     mask = list(range(num_training, num_training + num_validation))
 50 |     X_val = X_train[mask]
 51 |     y_val = y_train[mask]
 52 |     mask = list(range(num_training))
 53 |     X_train = X_train[mask]
 54 |     y_train = y_train[mask]
 55 |     mask = list(range(num_test))
 56 |     X_test = X_test[mask]
 57 |     y_test = y_test[mask]
 58 |     mask = np.random.choice(num_training, num_dev, replace=False)
 59 |     X_dev = X_train[mask]
 60 |     y_dev = y_train[mask]
 61 |     
 62 |     # Preprocessing: reshape the image data into rows
 63 |     X_train = np.reshape(X_train, (X_train.shape[0], -1))
 64 |     X_val = np.reshape(X_val, (X_val.shape[0], -1))
 65 |     X_test = np.reshape(X_test, (X_test.shape[0], -1))
 66 |     X_dev = np.reshape(X_dev, (X_dev.shape[0], -1))
 67 |     
 68 |     # Normalize the data: subtract the mean image
 69 |     mean_image = np.mean(X_train, axis = 0)
 70 |     X_train -= mean_image
 71 |     X_val -= mean_image
 72 |     X_test -= mean_image
 73 |     X_dev -= mean_image
 74 |     
 75 |     # add bias dimension and transform into columns
 76 |     X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
 77 |     X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
 78 |     X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
 79 |     X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])
 80 |     
 81 |     return X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev
 82 | 
 83 | 
 84 | # Invoke the above function to get our data.
 85 | X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev = get_CIFAR10_data()
 86 | print('Train data shape: ', X_train.shape)
 87 | print('Train labels shape: ', y_train.shape)
 88 | print('Validation data shape: ', X_val.shape)
 89 | print('Validation labels shape: ', y_val.shape)
 90 | print('Test data shape: ', X_test.shape)
 91 | print('Test labels shape: ', y_test.shape)
 92 | print('dev data shape: ', X_dev.shape)
 93 | print('dev labels shape: ', y_dev.shape)
 94 | ```
 95 | 
 96 |     Train data shape:  (49000, 3073)
 97 |     Train labels shape:  (49000,)
 98 |     Validation data shape:  (1000, 3073)
 99 |     Validation labels shape:  (1000,)
100 |     Test data shape:  (1000, 3073)
101 |     Test labels shape:  (1000,)
102 |     dev data shape:  (500, 3073)
103 |     dev labels shape:  (500,)
104 | 
105 | 
106 | ## Softmax Classifier
107 | 
108 | Your code for this section will all be written inside **cs231n/classifiers/softmax.py**. 
109 | 
110 | 
111 | 
112 | ```python
113 | # First implement the naive softmax loss function with nested loops.
114 | # Open the file cs231n/classifiers/softmax.py and implement the
115 | # softmax_loss_naive function.
116 | 
117 | from cs231n.classifiers.softmax import softmax_loss_naive
118 | import time
119 | 
120 | # Generate a random softmax weight matrix and use it to compute the loss.
121 | W = np.random.randn(3073, 10) * 0.0001
122 | loss, grad = softmax_loss_naive(W, X_dev, y_dev, 0.0)
123 | 
124 | # As a rough sanity check, our loss should be something close to -log(0.1).
125 | print('loss: %f' % loss)
126 | print('sanity check: %f' % (-np.log(0.1)))
127 | ```
128 | 
129 |     loss: 2.299190
130 |     sanity check: 2.302585
131 | 
132 | 
133 | ## Inline Question 1:
134 | Why do we expect our loss to be close to -log(0.1)? Explain briefly.**
135 | 
136 | **Your answer:** *Fill this in*
137 | 
138 | 
139 | 
140 | ```python
141 | # Complete the implementation of softmax_loss_naive and implement a (naive)
142 | # version of the gradient that uses nested loops.
143 | loss, grad = softmax_loss_naive(W, X_dev, y_dev, 0.0)
144 | 
145 | # As we did for the SVM, use numeric gradient checking as a debugging tool.
146 | # The numeric gradient should be close to the analytic gradient.
147 | from cs231n.gradient_check import grad_check_sparse
148 | f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 0.0)[0]
149 | grad_numerical = grad_check_sparse(f, W, grad, 10)
150 | 
151 | # similar to SVM case, do another gradient check with regularization
152 | loss, grad = softmax_loss_naive(W, X_dev, y_dev, 5e1)
153 | f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 5e1)[0]
154 | grad_numerical = grad_check_sparse(f, W, grad, 10)
155 | ```
156 | 
157 |     numerical: -0.994952 analytic: -0.994952, relative error: 6.994422e-08
158 |     numerical: 2.846696 analytic: 2.846696, relative error: 3.612826e-09
159 |     numerical: -0.671531 analytic: -0.671531, relative error: 2.605211e-09
160 |     numerical: 0.994550 analytic: 0.994550, relative error: 4.481386e-08
161 |     numerical: 2.151402 analytic: 2.151402, relative error: 1.898146e-08
162 |     numerical: 3.071263 analytic: 3.071263, relative error: 1.528335e-08
163 |     numerical: -2.056691 analytic: -2.056691, relative error: 2.350468e-08
164 |     numerical: -2.970053 analytic: -2.970053, relative error: 2.470392e-08
165 |     numerical: -1.339934 analytic: -1.339934, relative error: 1.604170e-08
166 |     numerical: 0.710373 analytic: 0.710373, relative error: 6.659996e-08
167 |     numerical: 3.004861 analytic: 3.004861, relative error: 1.918921e-08
168 |     numerical: -0.390148 analytic: -0.390148, relative error: 6.354846e-09
169 |     numerical: 0.287195 analytic: 0.287195, relative error: 8.483866e-08
170 |     numerical: -0.619845 analytic: -0.619845, relative error: 2.163794e-08
171 |     numerical: 1.122758 analytic: 1.122758, relative error: 1.166628e-08
172 |     numerical: 1.725309 analytic: 1.725309, relative error: 2.307015e-08
173 |     numerical: 1.899298 analytic: 1.899298, relative error: 8.304561e-09
174 |     numerical: 1.446963 analytic: 1.446963, relative error: 3.126118e-08
175 |     numerical: -2.717449 analytic: -2.717449, relative error: 2.436764e-08
176 |     numerical: 0.010049 analytic: 0.010049, relative error: 6.364739e-07
177 | 
178 | 
179 | 
180 | ```python
181 | # Now that we have a naive implementation of the softmax loss function and its gradient,
182 | # implement a vectorized version in softmax_loss_vectorized.
183 | # The two versions should compute the same results, but the vectorized version should be
184 | # much faster.
185 | tic = time.time()
186 | loss_naive, grad_naive = softmax_loss_naive(W, X_dev, y_dev, 0.000005)
187 | toc = time.time()
188 | print('naive loss: %e computed in %fs' % (loss_naive, toc - tic))
189 | 
190 | from cs231n.classifiers.softmax import softmax_loss_vectorized
191 | tic = time.time()
192 | loss_vectorized, grad_vectorized = softmax_loss_vectorized(W, X_dev, y_dev, 0.000005)
193 | toc = time.time()
194 | print('vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic))
195 | 
196 | # As we did for the SVM, we use the Frobenius norm to compare the two versions
197 | # of the gradient.
198 | grad_difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')
199 | print('Loss difference: %f' % np.abs(loss_naive - loss_vectorized))
200 | print('Gradient difference: %f' % grad_difference)
201 | ```
202 | 
203 |     naive loss: 2.321617e+00 computed in 0.105338s
204 |     vectorized loss: 2.321617e+00 computed in 0.004589s
205 |     Loss difference: 0.000000
206 |     Gradient difference: 0.000000
207 | 
208 | 
209 | 
210 | ```python
211 | # Use the validation set to tune hyperparameters (regularization strength and
212 | # learning rate). You should experiment with different ranges for the learning
213 | # rates and regularization strengths; if you are careful you should be able to
214 | # get a classification accuracy of over 0.35 on the validation set.
215 | from cs231n.classifiers import Softmax
216 | results = {}
217 | best_val = -1
218 | best_softmax = None
219 | learning_rates = [1e-7, 5e-7]
220 | regularization_strengths = [2.5e4, 5e4]
221 | 
222 | ################################################################################
223 | # TODO:                                                                        #
224 | # Use the validation set to set the learning rate and regularization strength. #
225 | # This should be identical to the validation that you did for the SVM; save    #
226 | # the best trained softmax classifer in best_softmax.                          #
227 | ################################################################################
228 | for lr in learning_rates:
229 |     for reg in regularization_strengths:
230 |         
231 | ################################################################################
232 | #                              END OF YOUR CODE                                #
233 | ################################################################################
234 |     
235 | # Print out results.
236 | for lr, reg in sorted(results):
237 |     train_accuracy, val_accuracy = results[(lr, reg)]
238 |     print('lr %e reg %e train accuracy: %f val accuracy: %f' % (
239 |                 lr, reg, train_accuracy, val_accuracy))
240 |     
241 | print('best validation accuracy achieved during cross-validation: %f' % best_val)
242 | ```
243 | 
244 |     best validation accuracy achieved during cross-validation: -1.000000
245 | 
246 | 
247 | 
248 | ```python
249 | # evaluate on test set
250 | # Evaluate the best softmax on test set
251 | y_test_pred = best_softmax.predict(X_test)
252 | test_accuracy = np.mean(y_test == y_test_pred)
253 | print('softmax on raw pixels final test set accuracy: %f' % (test_accuracy, ))
254 | ```
255 | 
256 | 
257 | ```python
258 | # Visualize the learned weights for each class
259 | w = best_softmax.W[:-1,:] # strip out the bias
260 | w = w.reshape(32, 32, 3, 10)
261 | 
262 | w_min, w_max = np.min(w), np.max(w)
263 | 
264 | classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
265 | for i in range(10):
266 |     plt.subplot(2, 5, i + 1)
267 |     
268 |     # Rescale the weights to be between 0 and 255
269 |     wimg = 255.0 * (w[:, :, :, i].squeeze() - w_min) / (w_max - w_min)
270 |     plt.imshow(wimg.astype('uint8'))
271 |     plt.axis('off')
272 |     plt.title(classes[i])
273 | ```
274 | 


--------------------------------------------------------------------------------