├── docker
├── .env
├── bin
│ ├── tensorboard
│ ├── nbdiff_checkpoint
│ ├── rm_empty_subdirs
│ └── nbclean_checkpoints
├── bashrc.bash
├── Makefile
├── nbdime-2-toc.patch
├── docker-compose.yml
├── jupyter_notebook_config.py
├── nbdime-1-details.patch
├── README.md
└── Dockerfile
├── images
├── ann
│ └── README
├── cnn
│ ├── README
│ └── test_image.png
├── deep
│ └── README
├── rl
│ └── README
├── rnn
│ └── README
├── svm
│ └── README
├── distributed
│ └── README
├── ensembles
│ └── README
├── tensorflow
│ └── README
├── autoencoders
│ └── README
├── classification
│ └── README
├── decision_trees
│ └── README
├── fundamentals
│ └── README
├── end_to_end_project
│ ├── README
│ └── california.png
├── unsupervised_learning
│ ├── README
│ └── ladybug.png
└── training_linear_models
│ └── README
├── datasets
├── housing
│ ├── housing.tgz
│ └── README.md
├── lifesat
│ ├── gdp_per_capita.csv
│ └── README.md
└── inception
│ └── imagenet_class_names.txt
├── .gitignore
├── requirements.txt
├── index.ipynb
├── README.md
├── INSTALL.md
├── LICENSE
├── work_in_progress
└── extra_autodiff.ipynb
└── book_equations.ipynb
/docker/.env:
--------------------------------------------------------------------------------
1 | COMPOSE_PROJECT_NAME=handson-ml
2 |
--------------------------------------------------------------------------------
/images/ann/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 |
--------------------------------------------------------------------------------
/images/cnn/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 |
--------------------------------------------------------------------------------
/images/deep/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 |
--------------------------------------------------------------------------------
/images/rl/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 |
--------------------------------------------------------------------------------
/images/rnn/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 |
--------------------------------------------------------------------------------
/images/svm/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 |
--------------------------------------------------------------------------------
/images/distributed/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 |
--------------------------------------------------------------------------------
/images/ensembles/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 |
--------------------------------------------------------------------------------
/images/tensorflow/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 |
--------------------------------------------------------------------------------
/images/autoencoders/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 |
--------------------------------------------------------------------------------
/images/classification/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 |
--------------------------------------------------------------------------------
/images/decision_trees/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 |
--------------------------------------------------------------------------------
/images/fundamentals/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 |
--------------------------------------------------------------------------------
/images/end_to_end_project/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 |
--------------------------------------------------------------------------------
/images/unsupervised_learning/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 |
--------------------------------------------------------------------------------
/docker/bin/tensorboard:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python -m tensorboard.main "$@"
3 |
--------------------------------------------------------------------------------
/images/training_linear_models/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 |
--------------------------------------------------------------------------------
/images/cnn/test_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andersy005/handson-ml2/master/images/cnn/test_image.png
--------------------------------------------------------------------------------
/datasets/housing/housing.tgz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andersy005/handson-ml2/master/datasets/housing/housing.tgz
--------------------------------------------------------------------------------
/docker/bashrc.bash:
--------------------------------------------------------------------------------
1 | alias ll="ls -alF"
2 | alias nbd="nbdiff_checkpoint"
3 | alias tb="tensorboard --logdir=tf_logs"
4 |
--------------------------------------------------------------------------------
/datasets/lifesat/gdp_per_capita.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andersy005/handson-ml2/master/datasets/lifesat/gdp_per_capita.csv
--------------------------------------------------------------------------------
/images/end_to_end_project/california.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andersy005/handson-ml2/master/images/end_to_end_project/california.png
--------------------------------------------------------------------------------
/images/unsupervised_learning/ladybug.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andersy005/handson-ml2/master/images/unsupervised_learning/ladybug.png
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.bak
2 | *.ckpt
3 | *.old
4 | *.pyc
5 | .DS_Store
6 | .ipynb_checkpoints
7 | checkpoint
8 | logs/*
9 | tf_logs/*
10 | images/**/*.png
11 | images/**/*.dot
12 | my_*
13 | datasets/flowers
14 | datasets/lifesat/lifesat.csv
15 | datasets/spam
16 | datasets/words
17 |
18 |
--------------------------------------------------------------------------------
/docker/Makefile:
--------------------------------------------------------------------------------
1 |
2 | help:
3 | cat Makefile
4 | run:
5 | docker-compose up
6 | exec:
7 | docker-compose exec handson-ml bash
8 | build: stop .FORCE
9 | docker-compose build
10 | rebuild: stop .FORCE
11 | docker-compose build --force-rm
12 | stop:
13 | docker stop handson-ml || true; docker rm handson-ml || true;
14 | .FORCE:
15 |
--------------------------------------------------------------------------------
/docker/nbdime-2-toc.patch:
--------------------------------------------------------------------------------
1 | --- a/nbdime/diffing/notebooks.py
2 | +++ b/nbdime/diffing/notebooks.py
3 | @@ -553,7 +553,7 @@
4 | del notebook_differs[key]
5 | else:
6 | notebook_differs[key] = diff_ignore_keys(
7 | - inner_differ=diff, ignore_keys=['collapsed', 'autoscroll', 'deletable', 'editable'])
8 | + inner_differ=diff, ignore_keys=['toc', 'collapsed', 'autoscroll', 'deletable', 'editable'])
9 | else:
10 | for key in metadata_keys:
11 | notebook_differs[key] = diff_ignore
12 |
--------------------------------------------------------------------------------
/docker/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3"
2 | services:
3 | handson-ml:
4 | build:
5 | context: ../
6 | dockerfile: ./docker/Dockerfile
7 | args:
8 | - username=devel
9 | - userid=1000
10 | container_name: handson-ml
11 | image: handson-ml
12 | restart: unless-stopped
13 | logging:
14 | driver: json-file
15 | options:
16 | max-size: 50m
17 | ports:
18 | - "8888:8888"
19 | - "6006:6006"
20 | volumes:
21 | - ../:/home/devel/handson-ml
22 | command: /opt/conda/bin/jupyter notebook --ip='0.0.0.0' --port=8888 --no-browser
23 |
--------------------------------------------------------------------------------
/docker/bin/nbdiff_checkpoint:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | if [[ "$#" -lt 1 || "$1" =~ ^((-h)|(--help))$ ]] ; then
3 | echo "usage: nbdiff_checkpoint NOTEBOOK.ipynb"
4 | echo
5 | echo "Show differences between given jupyter notebook and its checkpointed version (in .ipynb_checkpoints subdirectory)"
6 | exit
7 | fi
8 |
9 | DIRNAME=$(dirname "$1")
10 | BASENAME=$(basename "$1" .ipynb)
11 | shift
12 |
13 | WORKING_COPY=$DIRNAME/$BASENAME.ipynb
14 | CHECKPOINT_COPY=$DIRNAME/.ipynb_checkpoints/$BASENAME-checkpoint.ipynb
15 |
16 | echo "----- Analysing how to change $CHECKPOINT_COPY into $WORKING_COPY -----"
17 | nbdiff "$CHECKPOINT_COPY" "$WORKING_COPY" --ignore-details "$@"
18 |
--------------------------------------------------------------------------------
/docker/jupyter_notebook_config.py:
--------------------------------------------------------------------------------
1 | import os
2 | import subprocess
3 |
4 | def export_script_and_view(model, os_path, contents_manager):
5 | if model["type"] != "notebook":
6 | return
7 | dir_name, file_name = os.path.split(os_path)
8 | file_base, file_ext = os.path.splitext(file_name)
9 | if file_base.startswith("Untitled"):
10 | return
11 | export_name = file_base if file_ext == ".ipynb" else file_name
12 | subprocess.check_call(["jupyter", "nbconvert", "--to", "script", file_name, "--output", export_name + "_script"], cwd=dir_name)
13 | subprocess.check_call(["jupyter", "nbconvert", "--to", "html", file_name, "--output", export_name + "_view"], cwd=dir_name)
14 |
15 | c.FileContentsManager.post_save_hook = export_script_and_view
16 |
--------------------------------------------------------------------------------
/docker/nbdime-1-details.patch:
--------------------------------------------------------------------------------
1 | --- a/nbdime/diffing/notebooks.py
2 | +++ b/nbdime/diffing/notebooks.py
3 | @@ -548,8 +548,12 @@ def set_notebook_diff_targets(sources=True, outputs=True, attachments=True, meta
4 | metadata_keys = ("/cells/*/metadata", "/metadata", "/cells/*/outputs/*/metadata")
5 | if metadata:
6 | for key in metadata_keys:
7 | - if key in notebook_differs:
8 | - del notebook_differs[key]
9 | + if details:
10 | + if key in notebook_differs:
11 | + del notebook_differs[key]
12 | + else:
13 | + notebook_differs[key] = diff_ignore_keys(
14 | + inner_differ=diff, ignore_keys=['collapsed', 'autoscroll', 'deletable', 'editable'])
15 | else:
16 | for key in metadata_keys:
17 | notebook_differs[key] = diff_ignore
18 |
--------------------------------------------------------------------------------
/docker/bin/rm_empty_subdirs:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import os
4 |
5 | def remove_empty_directories(initial_dir,
6 | allow_initial_delete=False, ignore_nonexistant_initial=False,
7 | dry_run=False, quiet=False):
8 |
9 | FORBIDDEN_SUBDIRS = set([".git"])
10 |
11 | if not os.path.isdir(initial_dir) and not ignore_nonexistant_initial:
12 | raise RuntimeError("Initial directory '{}' not found!".format(initial_dir))
13 |
14 | message = "removed"
15 | if dry_run:
16 | message = "to be " + message
17 |
18 | deleted = set()
19 |
20 | for (directory, subdirs, files) in os.walk(initial_dir, topdown=False):
21 | forbidden = False
22 | parent = directory
23 | while parent:
24 | parent, dirname = os.path.split(parent)
25 | if dirname in FORBIDDEN_SUBDIRS:
26 | forbidden = True
27 | break
28 | if forbidden:
29 | continue
30 |
31 | is_empty = len(files) < 1 and len(set([os.path.join(directory, s) for s in subdirs]) - deleted) < 1
32 |
33 | if is_empty and (initial_dir != directory or allow_initial_delete):
34 | if not quiet:
35 | print("{}: {}".format(message, directory))
36 | deleted.add(directory)
37 | if not dry_run:
38 | os.rmdir(directory)
39 |
40 | def main():
41 | import argparse
42 | parser = argparse.ArgumentParser(description="Remove empty directories recursively in subtree.")
43 | parser.add_argument("dir", metavar="DIR", type=str, nargs="+", help="directory to be searched")
44 | parser.add_argument("-r", "--allow-dir-removal", action="store_true", help="allow deletion of DIR itself")
45 | parser.add_argument("-i", "--ignore-nonexistent-dir", action="store_true", help="don't throw an error if DIR doesn't exist")
46 | parser.add_argument("-d", "--dry-run", action="store_true", help="only print messages, don't perform any removals")
47 | parser.add_argument("-q", "--quiet", action="store_true", help="don't print names of directories being removed")
48 | args = parser.parse_args()
49 | for directory in args.dir:
50 | remove_empty_directories(directory, args.allow_dir_removal, args.ignore_nonexistent_dir,
51 | args.dry_run, args.quiet)
52 |
53 | if __name__ == "__main__":
54 | main()
55 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # First make sure to update pip:
2 | # $ sudo pip install --upgrade pip
3 | #
4 | # Then you probably want to work in a virtualenv (optional):
5 | # $ sudo pip install --upgrade virtualenv
6 | # Or if you prefer you can install virtualenv using your favorite packaging
7 | # system. E.g., in Ubuntu:
8 | # $ sudo apt-get update && sudo apt-get install virtualenv
9 | # Then:
10 | # $ cd $my_work_dir
11 | # $ virtualenv my_env
12 | # $ . my_env/bin/activate
13 | #
14 | # Next, optionally uncomment the OpenAI gym lines (see below).
15 | # If you do, make sure to install the dependencies first.
16 | # If you are interested in xgboost for high performance Gradient Boosting, you
17 | # should uncomment the xgboost line (used in the ensemble learning notebook).
18 | #
19 | # Then install these requirements:
20 | # $ pip install --upgrade -r requirements.txt
21 | #
22 | # Finally, start jupyter:
23 | # $ jupyter notebook
24 | #
25 |
26 |
27 | ##### Core scientific packages
28 | jupyter==1.0.0
29 | matplotlib==3.0.3
30 | numpy==1.16.2
31 | pandas==0.24.1
32 | scipy==1.2.1
33 |
34 |
35 | ##### Machine Learning packages
36 | scikit-learn==0.20.3
37 |
38 | # Optional: the XGBoost library is only used in the ensemble learning chapter.
39 | xgboost==0.82
40 |
41 |
42 | ##### Deep Learning packages
43 |
44 | # Replace tensorflow with tensorflow-gpu if you want GPU support. If so,
45 | # you need a GPU card with CUDA Compute Capability 3.5 or higher support, and
46 | # you must install CUDA, cuDNN and more: see tensorflow.org for the detailed
47 | # installation instructions.
48 |
49 | #tensorflow
50 | tf-nightly-2.0-preview
51 | ##tensorflow-gpu
52 | #tf-nightly-gpu-2.0-preview
53 |
54 | #tensorboard
55 | tb-nightly
56 |
57 | #tensorflow-datasets
58 | tfds-nightly
59 |
60 | tensorflow-hub
61 |
62 | #tensorflow-probability
63 | tfp-nightly
64 |
65 | tensorflow-transform
66 |
67 |
68 | # Optional: OpenAI gym is only needed for the Reinforcement Learning chapter.
69 | # There are a few dependencies you need to install first, check out:
70 | # https://github.com/openai/gym#installing-everything
71 | #gym[all]==0.10.9
72 | # If you only want to install the Atari dependency, uncomment this line instead:
73 | #gym[atari]==0.10.9
74 |
75 |
76 | ##### Image manipulation
77 | imageio==2.5.0
78 | Pillow==5.4.1
79 | scikit-image==0.14.2
80 |
81 |
82 | ##### Extra packages (optional)
83 |
84 | # Nice utility to diff Jupyter Notebooks.
85 | #nbdime==1.0.5
86 |
87 | # May be useful with Pandas for complex "where" clauses (e.g., Pandas
88 | # tutorial).
89 | numexpr==2.6.9
90 |
91 | # Optional: these libraries can be useful in the classification chapter,
92 | # exercise 4.
93 | nltk==3.4
94 | urlextract==0.9
95 |
96 | # Optional: tqdm displays nice progress bars, ipywidgets for tqdm's notebook support
97 | tqdm==4.31.1
98 | ipywidgets==7.4.2
99 |
--------------------------------------------------------------------------------
/docker/README.md:
--------------------------------------------------------------------------------
1 |
2 | # Hands-on Machine Learning in Docker
3 |
4 | This is the Docker configuration which allows you to run and tweak the book's notebooks without installing any dependencies on your machine!
5 | OK, any except `docker`. With `docker-compose`. Well, you may also want `make` (but it is only used as thin layer to call a few simple `docker-compose` commands).
6 |
7 | ## Prerequisites
8 |
9 | As stated, the two things you need is `docker` and `docker-compose`.
10 |
11 | Follow the instructions on [Install Docker](https://docs.docker.com/engine/installation/) and [Install Docker Compose](https://docs.docker.com/compose/install/) for your environment if you haven't got `docker` already.
12 |
13 | Some general knowledge about `docker` infrastructure might be useful (that's an interesting topic on its own) but is not strictly *required* to just run the notebooks.
14 |
15 | ## Usage
16 |
17 | ### Prepare the image (once)
18 |
19 | Switch to `docker` directory here and run `make build` (or `docker-compose build`) to build your docker image. That may take some time but is only required once. Or perhaps a few times after you tweak something in a `Dockerfile`.
20 |
21 | After the process is finished you have a `handson-ml` image, that will be the base for your experiments. You can confirm that looking on results of `docker images` command.
22 |
23 | ### Run the notebooks
24 |
25 | Run `make run` (or just `docker-compose up`) to start the jupyter server inside the container (also named `handson-ml`, same as image). Just point your browser to the URL printed on the screen (or just if you enabled password authentication) and you're ready to play with the book's code!
26 |
27 | The server runs in the directory containing the notebooks, and the changes you make from the browser will be persisted there.
28 |
29 | You can close the server just by pressing `Ctrl-C` in terminal window.
30 |
31 | ### Run additional commands in container
32 |
33 | Run `make exec` (or `docker-compose exec handson-ml bash`) while the server is running to run an additional `bash` shell inside the `handson-ml` container. Now you're inside the environment prepared within the image.
34 |
35 | One of the usefull things that can be done there would be starting TensorBoard (for example with simple `tb` command, see bashrc file).
36 |
37 | Another one may be comparing versions of the notebooks using the `nbdiff` command if you haven't got `nbdime` installed locally (it is **way** better than plain `diff` for notebooks). See [Tools for diffing and merging of Jupyter notebooks](https://github.com/jupyter/nbdime) for more details.
38 |
39 | You can see changes you made relative to the version in git using `git diff` which is integrated with `nbdiff`.
40 |
41 | You may also try `nbd NOTEBOOK_NAME.ipynb` command (custom, see bashrc file) to compare one of your notebooks with its `checkpointed` version.
42 | To be precise, the output will tell you *what modifications should be re-played on the **manually saved** version of the notebook (located in `.ipynb_checkpoints` subdirectory) to update it to the **current** i.e. **auto-saved** version (given as command's argument - located in working directory)*.
43 |
--------------------------------------------------------------------------------
/datasets/housing/README.md:
--------------------------------------------------------------------------------
1 | # California Housing
2 |
3 | ## Source
4 | This dataset is a modified version of the California Housing dataset available from [Luís Torgo's page](http://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html) (University of Porto). Luís Torgo obtained it from the StatLib repository (which is closed now). The dataset may also be downloaded from StatLib mirrors.
5 |
6 | This dataset appeared in a 1997 paper titled *Sparse Spatial Autoregressions* by Pace, R. Kelley and Ronald Barry, published in the *Statistics and Probability Letters* journal. They built it using the 1990 California census data. It contains one row per census block group. A block group is the smallest geographical unit for which the U.S. Census Bureau publishes sample data (a block group typically has a population of 600 to 3,000 people).
7 |
8 | ## Tweaks
9 | The dataset in this directory is almost identical to the original, with two differences:
10 |
11 | * 207 values were randomly removed from the `total_bedrooms` column, so we can discuss what to do with missing data.
12 | * An additional categorical attribute called `ocean_proximity` was added, indicating (very roughly) whether each block group is near the ocean, near the Bay area, inland or on an island. This allows discussing what to do with categorical data.
13 |
14 | Note that the block groups are called "districts" in the Jupyter notebooks, simply because in some contexts the name "block group" was confusing.
15 |
16 | ## Data description
17 |
18 | >>> housing.info()
19 |
20 | RangeIndex: 20640 entries, 0 to 20639
21 | Data columns (total 10 columns):
22 | longitude 20640 non-null float64
23 | latitude 20640 non-null float64
24 | housing_median_age 20640 non-null float64
25 | total_rooms 20640 non-null float64
26 | total_bedrooms 20433 non-null float64
27 | population 20640 non-null float64
28 | households 20640 non-null float64
29 | median_income 20640 non-null float64
30 | median_house_value 20640 non-null float64
31 | ocean_proximity 20640 non-null object
32 | dtypes: float64(9), object(1)
33 | memory usage: 1.6+ MB
34 |
35 | >>> housing["ocean_proximity"].value_counts()
36 | <1H OCEAN 9136
37 | INLAND 6551
38 | NEAR OCEAN 2658
39 | NEAR BAY 2290
40 | ISLAND 5
41 | Name: ocean_proximity, dtype: int64
42 |
43 | >>> housing.describe()
44 | longitude latitude housing_median_age total_rooms \
45 | count 16513.000000 16513.000000 16513.000000 16513.000000
46 | mean -119.575972 35.639693 28.652335 2622.347605
47 | std 2.002048 2.138279 12.576306 2138.559393
48 | min -124.350000 32.540000 1.000000 6.000000
49 | 25% -121.800000 33.940000 18.000000 1442.000000
50 | 50% -118.510000 34.260000 29.000000 2119.000000
51 | 75% -118.010000 37.720000 37.000000 3141.000000
52 | max -114.310000 41.950000 52.000000 39320.000000
53 |
54 | total_bedrooms population households median_income
55 | count 16355.000000 16513.000000 16513.000000 16513.000000
56 | mean 534.885112 1419.525465 496.975050 3.875651
57 | std 412.716467 1115.715084 375.737945 1.905088
58 | min 2.000000 3.000000 2.000000 0.499900
59 | 25% 295.000000 784.000000 278.000000 2.566800
60 | 50% 433.000000 1164.000000 408.000000 3.541400
61 | 75% 644.000000 1718.000000 602.000000 4.745000
62 | max 6210.000000 35682.000000 5358.000000 15.000100
63 |
64 |
--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM continuumio/anaconda3:5.2.0
2 |
3 | RUN apt-get update && apt-get upgrade -y \
4 | && apt-get install -y \
5 | libpq-dev \
6 | build-essential \
7 | git \
8 | sudo \
9 | cmake zlib1g-dev libjpeg-dev xvfb libav-tools xorg-dev libboost-all-dev libsdl2-dev swig \
10 | && rm -rf /var/lib/apt/lists/*
11 |
12 | RUN conda update -n base conda
13 | RUN conda install -y -c conda-forge \
14 | tensorflow \
15 | jupyter_contrib_nbextensions \
16 | pyopengl
17 | RUN pip install "gym[atari,box2d,classic_control]"
18 |
19 | ARG username
20 | ARG userid
21 |
22 | ARG home=/home/${username}
23 | ARG workdir=${home}/handson-ml
24 |
25 | RUN adduser ${username} --uid ${userid} --gecos '' --disabled-password \
26 | && echo "${username} ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/${username} \
27 | && chmod 0440 /etc/sudoers.d/${username}
28 |
29 | WORKDIR ${workdir}
30 | RUN chown ${username}:${username} ${workdir}
31 |
32 | USER ${username}
33 |
34 | RUN jupyter contrib nbextension install --user
35 | RUN jupyter nbextension enable toc2/main
36 |
37 |
38 | # INFO: Jupyter and nbdime extension are not totally integrated (anaconda image is py36,
39 | # nbdime checks for py35 at the moment, still the config below enables diffing
40 | # notebooks with nbdiff (and nbdiff support in git diff command) after connecting
41 | # to the container by "make exec" (or "docker-compose exec handson-ml bash")
42 | # You may also try running:
43 | # nbd NOTEBOOK_NAME.ipynb
44 | # to get nbdiff between checkpointed version and current version of the given notebook
45 | USER root
46 | WORKDIR /
47 | RUN conda install -y -c conda-forge nbdime
48 | USER ${username}
49 | WORKDIR ${workdir}
50 |
51 | RUN git-nbdiffdriver config --enable --global
52 |
53 | # INFO: Optionally uncomment any (one) of the following RUN commands below to ignore either
54 | # metadata or details in nbdiff within git diff
55 | #RUN git config --global diff.jupyternotebook.command 'git-nbdiffdriver diff --ignore-metadata'
56 | RUN git config --global diff.jupyternotebook.command 'git-nbdiffdriver diff --ignore-details'
57 |
58 |
59 | # INFO: Dirty nbdime patching (ignored if not matching)
60 | COPY docker/nbdime-*.patch /tmp/
61 | USER root
62 | WORKDIR /
63 | RUN patch -d /opt/conda/lib/python3.6/site-packages -p1 --forward --reject-file=- < \
64 | /tmp/nbdime-1-details.patch || true \
65 | && patch -d /opt/conda/lib/python3.6/site-packages -p1 --forward --reject-file=- < \
66 | /tmp/nbdime-2-toc.patch || true
67 | RUN rm /tmp/nbdime-*.patch
68 | USER ${username}
69 | WORKDIR ${workdir}
70 |
71 |
72 | COPY docker/bashrc.bash /tmp/
73 | RUN cat /tmp/bashrc.bash >> ${home}/.bashrc
74 | RUN echo "export PATH=\"${workdir}/docker/bin:$PATH\"" >> ${home}/.bashrc
75 | RUN sudo rm /tmp/bashrc.bash
76 |
77 |
78 | # INFO: Uncomment lines below to enable automatic save of python-only and html-only
79 | # exports alongside the notebook
80 | #COPY docker/jupyter_notebook_config.py /tmp/
81 | #RUN cat /tmp/jupyter_notebook_config.py >> ${home}/.jupyter/jupyter_notebook_config.py
82 | #RUN sudo rm /tmp/jupyter_notebook_config.py
83 |
84 | # INFO: Uncomment the RUN command below to disable git diff paging
85 | #RUN git config --global core.pager ''
86 |
87 | # INFO: Uncomment the RUN command below for easy and constant notebook URL (just localhost:8888)
88 | # That will switch jupyter to using empty password instead of a token.
89 | # To avoid making a security hole you SHOULD in fact not only uncomment but
90 | # regenerate the hash for your own non-empty password and replace the hash below.
91 | # You can compute a password hash in any notebook, just run the code:
92 | # from notebook.auth import passwd
93 | # passwd()
94 | # and take the hash from the output
95 | #RUN mkdir -p ${home}/.jupyter && \
96 | # echo 'c.NotebookApp.password = u"sha1:c6bbcba2d04b:f969e403db876dcfbe26f47affe41909bd53392e"' \
97 | # >> ${home}/.jupyter/jupyter_notebook_config.py
98 |
--------------------------------------------------------------------------------
/index.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Machine Learning Notebooks\n",
8 | "\n",
9 | "*Welcome to the Machine Learning Notebooks!*\n",
10 | "\n",
11 | "[Prerequisites](#Prerequisites) (see below)\n",
12 | "\n",
13 | "## Notebooks\n",
14 | "1. [The Machine Learning landscape](01_the_machine_learning_landscape.ipynb)\n",
15 | "2. [End-to-end Machine Learning project](02_end_to_end_machine_learning_project.ipynb)\n",
16 | "3. [Classification](03_classification.ipynb)\n",
17 | "4. [Training Linear Models](04_training_linear_models.ipynb)\n",
18 | "5. [Support Vector Machines](05_support_vector_machines.ipynb)\n",
19 | "6. [Decision Trees](06_decision_trees.ipynb)\n",
20 | "7. [Ensemble Learning and Random Forests](07_ensemble_learning_and_random_forests.ipynb)\n",
21 | "8. [Dimensionality Reduction](08_dimensionality_reduction.ipynb)\n",
22 | "9. [Unsupervised Learning](09_unsupervised_learning.ipynb)\n",
23 | "10. [Neural Nets with Keras](10_neural_nets_with_keras.ipynb)\n",
24 | "11. [Deep Learning](11_deep_learning.ipynb)\n",
25 | "12. [Custom Models with TensorFlow 2](12_custom_models_with_tensorflow_2.ipynb)\n",
26 | "13. [Loading and Preprocessing Data](13_loading_and_preprocessing_data.ipynb)\n",
27 | "\n",
28 | "Chapters 14 to 18 are in progress.\n",
29 | "\n",
30 | "## Scientific Python tutorials\n",
31 | "* [NumPy](tools_numpy.ipynb)\n",
32 | "* [Matplotlib](tools_matplotlib.ipynb)\n",
33 | "* [Pandas](tools_pandas.ipynb)\n",
34 | "\n",
35 | "## Math Tutorials\n",
36 | "* [Linear Algebra](math_linear_algebra.ipynb)\n",
37 | "\n",
38 | "## Extra Material\n",
39 | "Work in progress\n",
40 | "\n",
41 | "## Misc.\n",
42 | "* [Equations](book_equations.ipynb) (list of equations in the book)\n"
43 | ]
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {
48 | "collapsed": true
49 | },
50 | "source": [
51 | "## Prerequisites\n",
52 | "### To understand\n",
53 | "* **Python** – you don't need to be an expert python programmer, but you do need to know the basics. If you don't, the official [Python tutorial](https://docs.python.org/3/tutorial/) is a good place to start.\n",
54 | "* **Scientific Python** – We will be using a few popular python libraries, in particular NumPy, matplotlib and pandas. If you are not familiar with these libraries, you should probably start by going through the tutorials in the Tools section (especially NumPy).\n",
55 | "* **Math** – We will also use some notions of Linear Algebra, Calculus, Statistics and Probability theory. You should be able to follow along if you learned these in the past as it won't be very advanced, but if you don't know about these topics or you need a refresher then go through the appropriate introduction in the Math section.\n",
56 | "\n",
57 | "### To run the examples\n",
58 | "* **Jupyter** – These notebooks are based on Jupyter. You can run these notebooks in just one click using a hosted platform such as Binder, Deepnote or Colaboratory (no installation required), or you can just view them using Jupyter.org's viewer, or you can install everything on your machine, as you prefer. Check out the [home page](https://github.com/ageron/handson-ml2/) for more details."
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": null,
64 | "metadata": {
65 | "collapsed": true
66 | },
67 | "outputs": [],
68 | "source": []
69 | }
70 | ],
71 | "metadata": {
72 | "kernelspec": {
73 | "display_name": "Python 3",
74 | "language": "python",
75 | "name": "python3"
76 | },
77 | "language_info": {
78 | "codemirror_mode": {
79 | "name": "ipython",
80 | "version": 3
81 | },
82 | "file_extension": ".py",
83 | "mimetype": "text/x-python",
84 | "name": "python",
85 | "nbconvert_exporter": "python",
86 | "pygments_lexer": "ipython3",
87 | "version": "3.6.8"
88 | },
89 | "nav_menu": {},
90 | "toc": {
91 | "navigate_menu": true,
92 | "number_sections": true,
93 | "sideBar": true,
94 | "threshold": 6,
95 | "toc_cell": false,
96 | "toc_section_display": "block",
97 | "toc_window_display": false
98 | }
99 | },
100 | "nbformat": 4,
101 | "nbformat_minor": 1
102 | }
103 |
--------------------------------------------------------------------------------
/datasets/lifesat/README.md:
--------------------------------------------------------------------------------
1 | # Life satisfaction and GDP per capita
2 | ## Life satisfaction
3 | ### Source
4 | This dataset was obtained from the OECD's website at: http://stats.oecd.org/index.aspx?DataSetCode=BLI
5 |
6 | ### Data description
7 |
8 | Int64Index: 3292 entries, 0 to 3291
9 | Data columns (total 17 columns):
10 | "LOCATION" 3292 non-null object
11 | Country 3292 non-null object
12 | INDICATOR 3292 non-null object
13 | Indicator 3292 non-null object
14 | MEASURE 3292 non-null object
15 | Measure 3292 non-null object
16 | INEQUALITY 3292 non-null object
17 | Inequality 3292 non-null object
18 | Unit Code 3292 non-null object
19 | Unit 3292 non-null object
20 | PowerCode Code 3292 non-null int64
21 | PowerCode 3292 non-null object
22 | Reference Period Code 0 non-null float64
23 | Reference Period 0 non-null float64
24 | Value 3292 non-null float64
25 | Flag Codes 1120 non-null object
26 | Flags 1120 non-null object
27 | dtypes: float64(3), int64(1), object(13)
28 | memory usage: 462.9+ KB
29 |
30 | ### Example usage using python Pandas
31 |
32 | >>> life_sat = pd.read_csv("oecd_bli_2015.csv", thousands=',')
33 |
34 | >>> life_sat_total = life_sat[life_sat["INEQUALITY"]=="TOT"]
35 |
36 | >>> life_sat_total = life_sat_total.pivot(index="Country", columns="Indicator", values="Value")
37 |
38 | >>> life_sat_total.info()
39 |
40 | Index: 37 entries, Australia to United States
41 | Data columns (total 24 columns):
42 | Air pollution 37 non-null float64
43 | Assault rate 37 non-null float64
44 | Consultation on rule-making 37 non-null float64
45 | Dwellings without basic facilities 37 non-null float64
46 | Educational attainment 37 non-null float64
47 | Employees working very long hours 37 non-null float64
48 | Employment rate 37 non-null float64
49 | Homicide rate 37 non-null float64
50 | Household net adjusted disposable income 37 non-null float64
51 | Household net financial wealth 37 non-null float64
52 | Housing expenditure 37 non-null float64
53 | Job security 37 non-null float64
54 | Life expectancy 37 non-null float64
55 | Life satisfaction 37 non-null float64
56 | Long-term unemployment rate 37 non-null float64
57 | Personal earnings 37 non-null float64
58 | Quality of support network 37 non-null float64
59 | Rooms per person 37 non-null float64
60 | Self-reported health 37 non-null float64
61 | Student skills 37 non-null float64
62 | Time devoted to leisure and personal care 37 non-null float64
63 | Voter turnout 37 non-null float64
64 | Water quality 37 non-null float64
65 | Years in education 37 non-null float64
66 | dtypes: float64(24)
67 | memory usage: 7.2+ KB
68 |
69 | ## GDP per capita
70 | ### Source
71 | Dataset obtained from the IMF's website at: http://goo.gl/j1MSKe
72 |
73 | ### Data description
74 |
75 | Int64Index: 190 entries, 0 to 189
76 | Data columns (total 7 columns):
77 | Country 190 non-null object
78 | Subject Descriptor 189 non-null object
79 | Units 189 non-null object
80 | Scale 189 non-null object
81 | Country/Series-specific Notes 188 non-null object
82 | 2015 187 non-null float64
83 | Estimates Start After 188 non-null float64
84 | dtypes: float64(2), object(5)
85 | memory usage: 11.9+ KB
86 |
87 | ### Example usage using python Pandas
88 |
89 | >>> gdp_per_capita = pd.read_csv(
90 | ... datapath+"gdp_per_capita.csv", thousands=',', delimiter='\t',
91 | ... encoding='latin1', na_values="n/a", index_col="Country")
92 | ...
93 | >>> gdp_per_capita.rename(columns={"2015": "GDP per capita"}, inplace=True)
94 |
95 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Machine Learning Notebooks
2 | ==========================
3 |
4 | This project aims at teaching you the fundamentals of Machine Learning in
5 | python. It contains the example code and solutions to the exercises in the second edition of my O'Reilly book [Hands-on Machine Learning with Scikit-Learn, Keras and TensorFlow](https://www.oreilly.com/library/view/hands-on-machine-learning/9781492032632/):
6 |
7 |
8 |
9 | **Note**: If you are looking for the first edition notebooks, check out [ageron/handson-ml](https://github.com/ageron/handson-ml).
10 |
11 | ## Quick Start
12 |
13 | ### Want to play with these notebooks without having to install anything?
14 | Use any of the following services.
15 |
16 | **WARNING**: Please be aware that these services provide temporary environments: anything you do will be deleted after a while, so make sure you save anything you care about.
17 |
18 | * Open this repository in [Binder](https://mybinder.org/v2/gh/ageron/handson-ml2/master):
19 |
20 |
21 | * _Note_: Most of the time, Binder starts up quickly and works great, but when handson-ml2 is updated, Binder creates a new environment from scratch, and this can take quite some time.
22 |
23 | * Or open it in [Deepnote](https://beta.deepnote.org/launch?template=data-science&url=https%3A//github.com/ageron/handson-ml2/blob/master/index.ipynb):
24 |
25 |
26 | * _Note_: Deepnote environments start up quickly, but they do not contain the latest Scikit-Learn and TensorFlow libraries, so you will need to run `!python3 -m pip install -U -r requirements.txt` before you import any library (or you must restart the runtime).
27 |
28 | * Or open it in [Colaboratory](https://colab.research.google.com/github/ageron/handson-ml2/blob/master/):
29 |
30 |
31 | * _Note_: Colab environments only contain the notebooks you open, they do not clone the rest of the project, so you need to do it yourself by running `!git clone https://github.com/ageron/handson-ml2` and `!mv handson-ml2/* /content` to have access to other files in this project (such as datasets and images). Moreover, Colab does not come with the latest libraries, so you need to run `!python3 -m pip install -U -r requirements.txt` then restart the environment (but do not reset it!). If you open multiple notebooks from this project, you only need to do this once (as long as you do not reset the runtimes).
32 |
33 | ### Just want to quickly look at some notebooks, without executing any code?
34 |
35 | Browse this repository using [jupyter.org's notebook viewer](http://nbviewer.jupyter.org/github/ageron/handson-ml2/blob/master/index.ipynb):
36 |
37 |
38 | _Note_: [github.com's notebook viewer](https://github.com/ageron/handson-ml2/blob/master/index.ipynb) also works but it is slower and the math equations are not always displayed correctly.
39 |
40 | ### Want to install this project on your own machine?
41 |
42 | If you have a working Python 3.5+ environment and git is installed, then an easy way to install this project and its dependencies is using pip. Open a terminal and run the following commands (do not type the `$` signs, they just indicate that this is a terminal command):
43 |
44 | $ git clone https://github.com/ageron/handson-ml2.git
45 | $ cd handson-ml2
46 | $ python3 -m pip install --user --upgrade pip setuptools
47 | $ # Read `requirements.txt` if you want to use a GPU.
48 | $ python3 -m pip install --user --upgrade -r requirements.txt
49 | $ jupyter notebook
50 |
51 | If you need more detailed installation instructions, or you want to use Anaconda, read the [detailed installation instructions](INSTALL.md).
52 |
53 | ## Contributors
54 | I would like to thank everyone who contributed to this project, either by providing useful feedback, filing issues or submitting Pull Requests. Special thanks go to Haesun Park who helped on some of the exercise solutions, and to Steven Bunkley and Ziembla who created the `docker` directory.
55 |
--------------------------------------------------------------------------------
/docker/bin/nbclean_checkpoints:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import collections
4 | import glob
5 | import hashlib
6 | import os
7 | import subprocess
8 |
9 |
10 | class NotebookAnalyser:
11 |
12 | def __init__(self, dry_run=False, verbose=False, colorful=False):
13 | self._dry_run = dry_run
14 | self._verbose = verbose
15 | self._colors = collections.defaultdict(lambda: "")
16 | if colorful:
17 | for color in [
18 | NotebookAnalyser.COLOR_WHITE,
19 | NotebookAnalyser.COLOR_RED,
20 | NotebookAnalyser.COLOR_GREEN,
21 | NotebookAnalyser.COLOR_YELLOW,
22 | ]:
23 | self._colors[color] = "\033[{}m".format(color)
24 |
25 | NOTEBOOK_SUFFIX = ".ipynb"
26 | CHECKPOINT_DIR = NOTEBOOK_SUFFIX + "_checkpoints"
27 | CHECKPOINT_MASK = "*-checkpoint" + NOTEBOOK_SUFFIX
28 | CHECKPOINT_MASK_LEN = len(CHECKPOINT_MASK) - 1
29 |
30 | @staticmethod
31 | def get_hash(file_path):
32 | with open(file_path, "rb") as input:
33 | hash = hashlib.md5()
34 | for chunk in iter(lambda: input.read(4096), b""):
35 | hash.update(chunk)
36 | return hash.hexdigest()
37 |
38 | MESSAGE_ORPHANED = "missing "
39 | MESSAGE_MODIFIED = "modified"
40 | MESSAGE_DELETED = "DELETING"
41 |
42 | COLOR_WHITE = "0"
43 | COLOR_RED = "31"
44 | COLOR_GREEN = "32"
45 | COLOR_YELLOW = "33"
46 |
47 | def log(self, message, file, color=COLOR_WHITE):
48 | color_on = self._colors[color]
49 | color_off = self._colors[NotebookAnalyser.COLOR_WHITE]
50 | print("{}{}{}: {}".format(color_on, message, color_off, file))
51 |
52 | def clean_checkpoints(self, directory):
53 | for checkpoint_path in sorted(glob.glob(os.path.join(directory, NotebookAnalyser.CHECKPOINT_MASK))):
54 |
55 | workfile_dir = os.path.dirname(os.path.dirname(checkpoint_path))
56 | workfile_name = os.path.basename(checkpoint_path)[:-NotebookAnalyser.CHECKPOINT_MASK_LEN] + NotebookAnalyser.NOTEBOOK_SUFFIX
57 | workfile_path = os.path.join(workfile_dir, workfile_name)
58 |
59 | status = ""
60 | if not os.path.isfile(workfile_path):
61 | if self._verbose:
62 | self.log(NotebookAnalyser.MESSAGE_ORPHANED, workfile_path, NotebookAnalyser.COLOR_RED)
63 | else:
64 | checkpoint_stat = os.stat(checkpoint_path)
65 | workfile_stat = os.stat(workfile_path)
66 |
67 | modified = workfile_stat.st_size != checkpoint_stat.st_size
68 |
69 | if not modified:
70 | checkpoint_hash = NotebookAnalyser.get_hash(checkpoint_path)
71 | workfile_hash = NotebookAnalyser.get_hash(workfile_path)
72 | modified = checkpoint_hash != workfile_hash
73 |
74 | if modified:
75 | if self._verbose:
76 | self.log(NotebookAnalyser.MESSAGE_MODIFIED, workfile_path, NotebookAnalyser.COLOR_YELLOW)
77 | else:
78 | self.log(NotebookAnalyser.MESSAGE_DELETED, checkpoint_path, NotebookAnalyser.COLOR_GREEN)
79 | if not self._dry_run:
80 | os.remove(checkpoint_path)
81 |
82 | if not self._dry_run and not os.listdir(directory):
83 | self.log(NotebookAnalyser.MESSAGE_DELETED, directory, NotebookAnalyser.COLOR_GREEN)
84 | os.rmdir(directory)
85 |
86 | def clean_checkpoints_recursively(self, directory):
87 | for (root, subdirs, files) in os.walk(directory):
88 | subdirs.sort() # INFO: traverse alphabetically
89 | if NotebookAnalyser.CHECKPOINT_DIR in subdirs:
90 | subdirs.remove(NotebookAnalyser.CHECKPOINT_DIR) # INFO: don't recurse there
91 | self.clean_checkpoints(os.path.join(root, NotebookAnalyser.CHECKPOINT_DIR))
92 |
93 |
94 | def main():
95 | import argparse
96 | parser = argparse.ArgumentParser(description="Remove checkpointed versions of those jupyter notebooks that are identical to their working copies.",
97 | epilog="""Notebooks will be reported as either
98 | "DELETED" if the working copy and checkpointed version are identical
99 | (checkpoint will be deleted),
100 | "missing" if there is a checkpoint but no corresponding working file can be found
101 | or "modified" if notebook and the checkpoint are not byte-to-byte identical.
102 | If removal of checkpoints results in empty ".ipynb_checkpoints" directory
103 | that directory is also deleted.
104 | """) #, formatter_class=argparse.RawDescriptionHelpFormatter)
105 | parser.add_argument("dirs", metavar="DIR", type=str, nargs="*", default=".", help="directories to search")
106 | parser.add_argument("-d", "--dry-run", action="store_true", help="only print messages, don't perform any removals")
107 | parser.add_argument("-v", "--verbose", action="store_true", help="verbose mode")
108 | parser.add_argument("-c", "--color", action="store_true", help="colorful mode")
109 | args = parser.parse_args()
110 |
111 | analyser = NotebookAnalyser(args.dry_run, args.verbose, args.color)
112 | for directory in args.dirs:
113 | analyser.clean_checkpoints_recursively(directory)
114 |
115 | if __name__ == "__main__":
116 | main()
117 |
--------------------------------------------------------------------------------
/INSTALL.md:
--------------------------------------------------------------------------------
1 | # Installation
2 | To install this repository and run the Jupyter notebooks on your machine, you will first need git, which you probably have already. If not, you can download it from [git-scm.com](https://git-scm.com/).
3 |
4 | Next, clone this repository by opening a terminal and typing the following commands:
5 |
6 | $ cd $HOME # or any other development directory you prefer
7 | $ git clone https://github.com/ageron/handson-ml2.git
8 | $ cd handson-ml2
9 |
10 | If you do not want to install git, you can instead download [master.zip](https://github.com/ageron/handson-ml2/archive/master.zip), unzip it, rename the resulting directory to `handson-ml2` and move it to your development directory.
11 |
12 | If you want to go through chapter 16 on Reinforcement Learning, you will need to [install OpenAI gym](https://gym.openai.com/docs) and its dependencies for Atari simulations.
13 |
14 | If you have a TensorFlow-compatible GPU card (NVidia card with Compute Capability ≥ 3.5), and you want TensorFlow to use it, then you should follow TensorFlow's [GPU installation instructions](https://tensorflow.org/install/gpu) to install the driver and libraries such as CUDA and CuDNN. Note that the installation instructions are still for TF 1.12, not TF 2.0, so you need to install CUDA 10.0 (not 9.2) with the corresponding NVidia driver (see NVidia's website for details) and CuDNN SDK 7.4 (not 7.2). Also edit `requirements.txt` to replace `tf-nightly-2.0-preview` with `tf-nightly-gpu-2.0-preview`.
15 |
16 | If you are familiar with Python and you know how to install Python libraries, go ahead and install the libraries listed in `requirements.txt` and jump to the [Starting Jupyter](#starting-jupyter) section. If you need detailed instructions, please read on.
17 |
18 | ## Python & Required Libraries
19 | Of course, you obviously need Python. Python 2 is already preinstalled on most systems nowadays, and sometimes even Python 3. You can check which version(s) you have by typing the following commands:
20 |
21 | $ python --version # for Python 2
22 | $ python3 --version # for Python 3
23 |
24 | Right now, only Python 3.6 is supported (TensorFlow support for Python 3.7 is [coming soon](https://github.com/tensorflow/tensorflow/issues/20517)). If you don't have Python 3, I strongly recommend installing it (Python ≥2.7 may work with minor adjustments, but it is deprecated so Python 3 is preferable). To do so, you have several options: on Windows or MacOSX, you can just download it from [python.org](https://www.python.org/downloads/). On MacOSX, you can alternatively use [MacPorts](https://www.macports.org/) or [Homebrew](https://brew.sh/). If you are using Python 3.6 on MacOSX, you need to run the following command to install the `certifi` package of certificates because Python 3.6 on MacOSX has no certificates to validate SSL connections (see this [StackOverflow question](https://stackoverflow.com/questions/27835619/urllib-and-ssl-certificate-verify-failed-error)):
25 |
26 | $ /Applications/Python\ 3.6/Install\ Certificates.command
27 |
28 | On Linux, unless you know what you are doing, you should use your system's packaging system. For example, on Debian or Ubuntu, type:
29 |
30 | $ sudo apt-get update
31 | $ sudo apt-get install python3
32 |
33 | Another option is to download and install [Anaconda](https://www.continuum.io/downloads). This is a package that includes both Python and many scientific libraries. You should prefer the Python 3 version.
34 |
35 | If you choose to use Anaconda, read the next section, or else jump to the [Using pip](#using-pip) section.
36 |
37 | ## Using Anaconda
38 |
39 | **Warning**: this section does not work yet, since TensorFlow 2.0 is not yet available Anaconda repositories.
40 |
41 | When using Anaconda, you can optionally create an isolated Python environment dedicated to this project. This is recommended as it makes it possible to have a different environment for each project (e.g. one for this project), with potentially different libraries and library versions:
42 |
43 | $ conda create -n mlbook python=3.6 anaconda
44 | $ conda activate mlbook
45 |
46 | This creates a fresh Python 3.6 environment called `mlbook` (you can change the name if you want to), and it activates it. This environment contains all the scientific libraries that come with Anaconda. This includes all the libraries we will need (NumPy, Matplotlib, Pandas, Jupyter and a few others), except for TensorFlow, so let's install it:
47 |
48 | $ conda install -n mlbook -c conda-forge tensorflow
49 |
50 | This installs the latest version of TensorFlow available for Anaconda (which is usually *not* the latest TensorFlow version) in the `mlbook` environment (fetching it from the `conda-forge` repository). If you chose not to create an `mlbook` environment, then just remove the `-n mlbook` option.
51 |
52 | Next, you can optionally install Jupyter extensions. These are useful to have nice tables of contents in the notebooks, but they are not required.
53 |
54 | $ conda install -n mlbook -c conda-forge jupyter_contrib_nbextensions
55 |
56 | You are all set! Next, jump to the [Starting Jupyter](#starting-jupyter) section.
57 |
58 | ## Using pip
59 |
60 | If you are not using Anaconda, you need to install several scientific Python libraries that are necessary for this project, in particular NumPy, Matplotlib, Pandas, Jupyter and TensorFlow (and a few others). For this, you can either use Python's integrated packaging system, pip, or you may prefer to use your system's own packaging system (if available, e.g. on Linux, or on MacOSX when using MacPorts or Homebrew). The advantage of using pip is that it is easy to create multiple isolated Python environments with different libraries and different library versions (e.g. one environment for each project). The advantage of using your system's packaging system is that there is less risk of having conflicts between your Python libraries and your system's other packages. Since I have many projects with different library requirements, I prefer to use pip with isolated environments. Moreover, the pip packages are usually the most recent ones available, while Anaconda and system packages often lag behind a bit.
61 |
62 | These are the commands you need to type in a terminal if you want to use pip to install the required libraries. Note: in all the following commands, if you chose to use Python 2 rather than Python 3, you must replace `pip3` with `pip`, and `python3` with `python`.
63 |
64 | First you need to make sure you have the latest version of pip installed:
65 |
66 | $ python3 -m pip install --user --upgrade pip setuptools
67 |
68 | The `--user` option will install the latest version of pip only for the current user. If you prefer to install it system wide (i.e. for all users), you must have administrator rights (e.g. use `sudo python3 -m pip` instead of `python3 -m pip` on Linux), and you should remove the `--user` option. The same is true of the command below that uses the `--user` option.
69 |
70 | Next, you can optionally create an isolated environment. This is recommended as it makes it possible to have a different environment for each project (e.g. one for this project), with potentially very different libraries, and different versions:
71 |
72 | $ python3 -m pip install --user --upgrade virtualenv
73 | $ virtualenv -p `which python3` env
74 |
75 | This creates a new directory called `env` in the current directory, containing an isolated Python environment based on Python 3. If you installed multiple versions of Python 3 on your system, you can replace `` `which python3` `` with the path to the Python executable you prefer to use.
76 |
77 | Now you must activate this environment. You will need to run this command every time you want to use this environment.
78 |
79 | $ source ./env/bin/activate
80 |
81 | On Windows, the command is slightly different:
82 |
83 | $ .\env\Scripts\activate
84 |
85 | Next, use pip to install the required python packages. If you are not using virtualenv, you should add the `--user` option (alternatively you could install the libraries system-wide, but this will probably require administrator rights, e.g. using `sudo pip3` instead of `pip3` on Linux).
86 |
87 | $ python3 -m pip install --upgrade -r requirements.txt
88 |
89 | Great! You're all set, you just need to start Jupyter now.
90 |
91 | ## Starting Jupyter
92 | Okay! You can now start Jupyter, simply type:
93 |
94 | $ jupyter notebook
95 |
96 | This should open up your browser, and you should see Jupyter's tree view, with the contents of the current directory. If your browser does not open automatically, visit [localhost:8888](http://localhost:8888/tree). Click on `index.ipynb` to get started!
97 |
98 | Congrats! You are ready to learn Machine Learning, hands on!
99 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 |
179 |
--------------------------------------------------------------------------------
/datasets/inception/imagenet_class_names.txt:
--------------------------------------------------------------------------------
1 | n01440764 tench, Tinca tinca
2 | n01443537 goldfish, Carassius auratus
3 | n01484850 great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias
4 | n01491361 tiger shark, Galeocerdo cuvieri
5 | n01494475 hammerhead, hammerhead shark
6 | n01496331 electric ray, crampfish, numbfish, torpedo
7 | n01498041 stingray
8 | n01514668 cock
9 | n01514859 hen
10 | n01518878 ostrich, Struthio camelus
11 | n01530575 brambling, Fringilla montifringilla
12 | n01531178 goldfinch, Carduelis carduelis
13 | n01532829 house finch, linnet, Carpodacus mexicanus
14 | n01534433 junco, snowbird
15 | n01537544 indigo bunting, indigo finch, indigo bird, Passerina cyanea
16 | n01558993 robin, American robin, Turdus migratorius
17 | n01560419 bulbul
18 | n01580077 jay
19 | n01582220 magpie
20 | n01592084 chickadee
21 | n01601694 water ouzel, dipper
22 | n01608432 kite
23 | n01614925 bald eagle, American eagle, Haliaeetus leucocephalus
24 | n01616318 vulture
25 | n01622779 great grey owl, great gray owl, Strix nebulosa
26 | n01629819 European fire salamander, Salamandra salamandra
27 | n01630670 common newt, Triturus vulgaris
28 | n01631663 eft
29 | n01632458 spotted salamander, Ambystoma maculatum
30 | n01632777 axolotl, mud puppy, Ambystoma mexicanum
31 | n01641577 bullfrog, Rana catesbeiana
32 | n01644373 tree frog, tree-frog
33 | n01644900 tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui
34 | n01664065 loggerhead, loggerhead turtle, Caretta caretta
35 | n01665541 leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea
36 | n01667114 mud turtle
37 | n01667778 terrapin
38 | n01669191 box turtle, box tortoise
39 | n01675722 banded gecko
40 | n01677366 common iguana, iguana, Iguana iguana
41 | n01682714 American chameleon, anole, Anolis carolinensis
42 | n01685808 whiptail, whiptail lizard
43 | n01687978 agama
44 | n01688243 frilled lizard, Chlamydosaurus kingi
45 | n01689811 alligator lizard
46 | n01692333 Gila monster, Heloderma suspectum
47 | n01693334 green lizard, Lacerta viridis
48 | n01694178 African chameleon, Chamaeleo chamaeleon
49 | n01695060 Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis
50 | n01697457 African crocodile, Nile crocodile, Crocodylus niloticus
51 | n01698640 American alligator, Alligator mississipiensis
52 | n01704323 triceratops
53 | n01728572 thunder snake, worm snake, Carphophis amoenus
54 | n01728920 ringneck snake, ring-necked snake, ring snake
55 | n01729322 hognose snake, puff adder, sand viper
56 | n01729977 green snake, grass snake
57 | n01734418 king snake, kingsnake
58 | n01735189 garter snake, grass snake
59 | n01737021 water snake
60 | n01739381 vine snake
61 | n01740131 night snake, Hypsiglena torquata
62 | n01742172 boa constrictor, Constrictor constrictor
63 | n01744401 rock python, rock snake, Python sebae
64 | n01748264 Indian cobra, Naja naja
65 | n01749939 green mamba
66 | n01751748 sea snake
67 | n01753488 horned viper, cerastes, sand viper, horned asp, Cerastes cornutus
68 | n01755581 diamondback, diamondback rattlesnake, Crotalus adamanteus
69 | n01756291 sidewinder, horned rattlesnake, Crotalus cerastes
70 | n01768244 trilobite
71 | n01770081 harvestman, daddy longlegs, Phalangium opilio
72 | n01770393 scorpion
73 | n01773157 black and gold garden spider, Argiope aurantia
74 | n01773549 barn spider, Araneus cavaticus
75 | n01773797 garden spider, Aranea diademata
76 | n01774384 black widow, Latrodectus mactans
77 | n01774750 tarantula
78 | n01775062 wolf spider, hunting spider
79 | n01776313 tick
80 | n01784675 centipede
81 | n01795545 black grouse
82 | n01796340 ptarmigan
83 | n01797886 ruffed grouse, partridge, Bonasa umbellus
84 | n01798484 prairie chicken, prairie grouse, prairie fowl
85 | n01806143 peacock
86 | n01806567 quail
87 | n01807496 partridge
88 | n01817953 African grey, African gray, Psittacus erithacus
89 | n01818515 macaw
90 | n01819313 sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita
91 | n01820546 lorikeet
92 | n01824575 coucal
93 | n01828970 bee eater
94 | n01829413 hornbill
95 | n01833805 hummingbird
96 | n01843065 jacamar
97 | n01843383 toucan
98 | n01847000 drake
99 | n01855032 red-breasted merganser, Mergus serrator
100 | n01855672 goose
101 | n01860187 black swan, Cygnus atratus
102 | n01871265 tusker
103 | n01872401 echidna, spiny anteater, anteater
104 | n01873310 platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus
105 | n01877812 wallaby, brush kangaroo
106 | n01882714 koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus
107 | n01883070 wombat
108 | n01910747 jellyfish
109 | n01914609 sea anemone, anemone
110 | n01917289 brain coral
111 | n01924916 flatworm, platyhelminth
112 | n01930112 nematode, nematode worm, roundworm
113 | n01943899 conch
114 | n01944390 snail
115 | n01945685 slug
116 | n01950731 sea slug, nudibranch
117 | n01955084 chiton, coat-of-mail shell, sea cradle, polyplacophore
118 | n01968897 chambered nautilus, pearly nautilus, nautilus
119 | n01978287 Dungeness crab, Cancer magister
120 | n01978455 rock crab, Cancer irroratus
121 | n01980166 fiddler crab
122 | n01981276 king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica
123 | n01983481 American lobster, Northern lobster, Maine lobster, Homarus americanus
124 | n01984695 spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish
125 | n01985128 crayfish, crawfish, crawdad, crawdaddy
126 | n01986214 hermit crab
127 | n01990800 isopod
128 | n02002556 white stork, Ciconia ciconia
129 | n02002724 black stork, Ciconia nigra
130 | n02006656 spoonbill
131 | n02007558 flamingo
132 | n02009229 little blue heron, Egretta caerulea
133 | n02009912 American egret, great white heron, Egretta albus
134 | n02011460 bittern
135 | n02012849 crane
136 | n02013706 limpkin, Aramus pictus
137 | n02017213 European gallinule, Porphyrio porphyrio
138 | n02018207 American coot, marsh hen, mud hen, water hen, Fulica americana
139 | n02018795 bustard
140 | n02025239 ruddy turnstone, Arenaria interpres
141 | n02027492 red-backed sandpiper, dunlin, Erolia alpina
142 | n02028035 redshank, Tringa totanus
143 | n02033041 dowitcher
144 | n02037110 oystercatcher, oyster catcher
145 | n02051845 pelican
146 | n02056570 king penguin, Aptenodytes patagonica
147 | n02058221 albatross, mollymawk
148 | n02066245 grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus
149 | n02071294 killer whale, killer, orca, grampus, sea wolf, Orcinus orca
150 | n02074367 dugong, Dugong dugon
151 | n02077923 sea lion
152 | n02085620 Chihuahua
153 | n02085782 Japanese spaniel
154 | n02085936 Maltese dog, Maltese terrier, Maltese
155 | n02086079 Pekinese, Pekingese, Peke
156 | n02086240 Shih-Tzu
157 | n02086646 Blenheim spaniel
158 | n02086910 papillon
159 | n02087046 toy terrier
160 | n02087394 Rhodesian ridgeback
161 | n02088094 Afghan hound, Afghan
162 | n02088238 basset, basset hound
163 | n02088364 beagle
164 | n02088466 bloodhound, sleuthhound
165 | n02088632 bluetick
166 | n02089078 black-and-tan coonhound
167 | n02089867 Walker hound, Walker foxhound
168 | n02089973 English foxhound
169 | n02090379 redbone
170 | n02090622 borzoi, Russian wolfhound
171 | n02090721 Irish wolfhound
172 | n02091032 Italian greyhound
173 | n02091134 whippet
174 | n02091244 Ibizan hound, Ibizan Podenco
175 | n02091467 Norwegian elkhound, elkhound
176 | n02091635 otterhound, otter hound
177 | n02091831 Saluki, gazelle hound
178 | n02092002 Scottish deerhound, deerhound
179 | n02092339 Weimaraner
180 | n02093256 Staffordshire bullterrier, Staffordshire bull terrier
181 | n02093428 American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier
182 | n02093647 Bedlington terrier
183 | n02093754 Border terrier
184 | n02093859 Kerry blue terrier
185 | n02093991 Irish terrier
186 | n02094114 Norfolk terrier
187 | n02094258 Norwich terrier
188 | n02094433 Yorkshire terrier
189 | n02095314 wire-haired fox terrier
190 | n02095570 Lakeland terrier
191 | n02095889 Sealyham terrier, Sealyham
192 | n02096051 Airedale, Airedale terrier
193 | n02096177 cairn, cairn terrier
194 | n02096294 Australian terrier
195 | n02096437 Dandie Dinmont, Dandie Dinmont terrier
196 | n02096585 Boston bull, Boston terrier
197 | n02097047 miniature schnauzer
198 | n02097130 giant schnauzer
199 | n02097209 standard schnauzer
200 | n02097298 Scotch terrier, Scottish terrier, Scottie
201 | n02097474 Tibetan terrier, chrysanthemum dog
202 | n02097658 silky terrier, Sydney silky
203 | n02098105 soft-coated wheaten terrier
204 | n02098286 West Highland white terrier
205 | n02098413 Lhasa, Lhasa apso
206 | n02099267 flat-coated retriever
207 | n02099429 curly-coated retriever
208 | n02099601 golden retriever
209 | n02099712 Labrador retriever
210 | n02099849 Chesapeake Bay retriever
211 | n02100236 German short-haired pointer
212 | n02100583 vizsla, Hungarian pointer
213 | n02100735 English setter
214 | n02100877 Irish setter, red setter
215 | n02101006 Gordon setter
216 | n02101388 Brittany spaniel
217 | n02101556 clumber, clumber spaniel
218 | n02102040 English springer, English springer spaniel
219 | n02102177 Welsh springer spaniel
220 | n02102318 cocker spaniel, English cocker spaniel, cocker
221 | n02102480 Sussex spaniel
222 | n02102973 Irish water spaniel
223 | n02104029 kuvasz
224 | n02104365 schipperke
225 | n02105056 groenendael
226 | n02105162 malinois
227 | n02105251 briard
228 | n02105412 kelpie
229 | n02105505 komondor
230 | n02105641 Old English sheepdog, bobtail
231 | n02105855 Shetland sheepdog, Shetland sheep dog, Shetland
232 | n02106030 collie
233 | n02106166 Border collie
234 | n02106382 Bouvier des Flandres, Bouviers des Flandres
235 | n02106550 Rottweiler
236 | n02106662 German shepherd, German shepherd dog, German police dog, alsatian
237 | n02107142 Doberman, Doberman pinscher
238 | n02107312 miniature pinscher
239 | n02107574 Greater Swiss Mountain dog
240 | n02107683 Bernese mountain dog
241 | n02107908 Appenzeller
242 | n02108000 EntleBucher
243 | n02108089 boxer
244 | n02108422 bull mastiff
245 | n02108551 Tibetan mastiff
246 | n02108915 French bulldog
247 | n02109047 Great Dane
248 | n02109525 Saint Bernard, St Bernard
249 | n02109961 Eskimo dog, husky
250 | n02110063 malamute, malemute, Alaskan malamute
251 | n02110185 Siberian husky
252 | n02110341 dalmatian, coach dog, carriage dog
253 | n02110627 affenpinscher, monkey pinscher, monkey dog
254 | n02110806 basenji
255 | n02110958 pug, pug-dog
256 | n02111129 Leonberg
257 | n02111277 Newfoundland, Newfoundland dog
258 | n02111500 Great Pyrenees
259 | n02111889 Samoyed, Samoyede
260 | n02112018 Pomeranian
261 | n02112137 chow, chow chow
262 | n02112350 keeshond
263 | n02112706 Brabancon griffon
264 | n02113023 Pembroke, Pembroke Welsh corgi
265 | n02113186 Cardigan, Cardigan Welsh corgi
266 | n02113624 toy poodle
267 | n02113712 miniature poodle
268 | n02113799 standard poodle
269 | n02113978 Mexican hairless
270 | n02114367 timber wolf, grey wolf, gray wolf, Canis lupus
271 | n02114548 white wolf, Arctic wolf, Canis lupus tundrarum
272 | n02114712 red wolf, maned wolf, Canis rufus, Canis niger
273 | n02114855 coyote, prairie wolf, brush wolf, Canis latrans
274 | n02115641 dingo, warrigal, warragal, Canis dingo
275 | n02115913 dhole, Cuon alpinus
276 | n02116738 African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus
277 | n02117135 hyena, hyaena
278 | n02119022 red fox, Vulpes vulpes
279 | n02119789 kit fox, Vulpes macrotis
280 | n02120079 Arctic fox, white fox, Alopex lagopus
281 | n02120505 grey fox, gray fox, Urocyon cinereoargenteus
282 | n02123045 tabby, tabby cat
283 | n02123159 tiger cat
284 | n02123394 Persian cat
285 | n02123597 Siamese cat, Siamese
286 | n02124075 Egyptian cat
287 | n02125311 cougar, puma, catamount, mountain lion, painter, panther, Felis concolor
288 | n02127052 lynx, catamount
289 | n02128385 leopard, Panthera pardus
290 | n02128757 snow leopard, ounce, Panthera uncia
291 | n02128925 jaguar, panther, Panthera onca, Felis onca
292 | n02129165 lion, king of beasts, Panthera leo
293 | n02129604 tiger, Panthera tigris
294 | n02130308 cheetah, chetah, Acinonyx jubatus
295 | n02132136 brown bear, bruin, Ursus arctos
296 | n02133161 American black bear, black bear, Ursus americanus, Euarctos americanus
297 | n02134084 ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus
298 | n02134418 sloth bear, Melursus ursinus, Ursus ursinus
299 | n02137549 mongoose
300 | n02138441 meerkat, mierkat
301 | n02165105 tiger beetle
302 | n02165456 ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle
303 | n02167151 ground beetle, carabid beetle
304 | n02168699 long-horned beetle, longicorn, longicorn beetle
305 | n02169497 leaf beetle, chrysomelid
306 | n02172182 dung beetle
307 | n02174001 rhinoceros beetle
308 | n02177972 weevil
309 | n02190166 fly
310 | n02206856 bee
311 | n02219486 ant, emmet, pismire
312 | n02226429 grasshopper, hopper
313 | n02229544 cricket
314 | n02231487 walking stick, walkingstick, stick insect
315 | n02233338 cockroach, roach
316 | n02236044 mantis, mantid
317 | n02256656 cicada, cicala
318 | n02259212 leafhopper
319 | n02264363 lacewing, lacewing fly
320 | n02268443 dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk
321 | n02268853 damselfly
322 | n02276258 admiral
323 | n02277742 ringlet, ringlet butterfly
324 | n02279972 monarch, monarch butterfly, milkweed butterfly, Danaus plexippus
325 | n02280649 cabbage butterfly
326 | n02281406 sulphur butterfly, sulfur butterfly
327 | n02281787 lycaenid, lycaenid butterfly
328 | n02317335 starfish, sea star
329 | n02319095 sea urchin
330 | n02321529 sea cucumber, holothurian
331 | n02325366 wood rabbit, cottontail, cottontail rabbit
332 | n02326432 hare
333 | n02328150 Angora, Angora rabbit
334 | n02342885 hamster
335 | n02346627 porcupine, hedgehog
336 | n02356798 fox squirrel, eastern fox squirrel, Sciurus niger
337 | n02361337 marmot
338 | n02363005 beaver
339 | n02364673 guinea pig, Cavia cobaya
340 | n02389026 sorrel
341 | n02391049 zebra
342 | n02395406 hog, pig, grunter, squealer, Sus scrofa
343 | n02396427 wild boar, boar, Sus scrofa
344 | n02397096 warthog
345 | n02398521 hippopotamus, hippo, river horse, Hippopotamus amphibius
346 | n02403003 ox
347 | n02408429 water buffalo, water ox, Asiatic buffalo, Bubalus bubalis
348 | n02410509 bison
349 | n02412080 ram, tup
350 | n02415577 bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis
351 | n02417914 ibex, Capra ibex
352 | n02422106 hartebeest
353 | n02422699 impala, Aepyceros melampus
354 | n02423022 gazelle
355 | n02437312 Arabian camel, dromedary, Camelus dromedarius
356 | n02437616 llama
357 | n02441942 weasel
358 | n02442845 mink
359 | n02443114 polecat, fitch, foulmart, foumart, Mustela putorius
360 | n02443484 black-footed ferret, ferret, Mustela nigripes
361 | n02444819 otter
362 | n02445715 skunk, polecat, wood pussy
363 | n02447366 badger
364 | n02454379 armadillo
365 | n02457408 three-toed sloth, ai, Bradypus tridactylus
366 | n02480495 orangutan, orang, orangutang, Pongo pygmaeus
367 | n02480855 gorilla, Gorilla gorilla
368 | n02481823 chimpanzee, chimp, Pan troglodytes
369 | n02483362 gibbon, Hylobates lar
370 | n02483708 siamang, Hylobates syndactylus, Symphalangus syndactylus
371 | n02484975 guenon, guenon monkey
372 | n02486261 patas, hussar monkey, Erythrocebus patas
373 | n02486410 baboon
374 | n02487347 macaque
375 | n02488291 langur
376 | n02488702 colobus, colobus monkey
377 | n02489166 proboscis monkey, Nasalis larvatus
378 | n02490219 marmoset
379 | n02492035 capuchin, ringtail, Cebus capucinus
380 | n02492660 howler monkey, howler
381 | n02493509 titi, titi monkey
382 | n02493793 spider monkey, Ateles geoffroyi
383 | n02494079 squirrel monkey, Saimiri sciureus
384 | n02497673 Madagascar cat, ring-tailed lemur, Lemur catta
385 | n02500267 indri, indris, Indri indri, Indri brevicaudatus
386 | n02504013 Indian elephant, Elephas maximus
387 | n02504458 African elephant, Loxodonta africana
388 | n02509815 lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens
389 | n02510455 giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca
390 | n02514041 barracouta, snoek
391 | n02526121 eel
392 | n02536864 coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch
393 | n02606052 rock beauty, Holocanthus tricolor
394 | n02607072 anemone fish
395 | n02640242 sturgeon
396 | n02641379 gar, garfish, garpike, billfish, Lepisosteus osseus
397 | n02643566 lionfish
398 | n02655020 puffer, pufferfish, blowfish, globefish
399 | n02666196 abacus
400 | n02667093 abaya
401 | n02669723 academic gown, academic robe, judge's robe
402 | n02672831 accordion, piano accordion, squeeze box
403 | n02676566 acoustic guitar
404 | n02687172 aircraft carrier, carrier, flattop, attack aircraft carrier
405 | n02690373 airliner
406 | n02692877 airship, dirigible
407 | n02699494 altar
408 | n02701002 ambulance
409 | n02704792 amphibian, amphibious vehicle
410 | n02708093 analog clock
411 | n02727426 apiary, bee house
412 | n02730930 apron
413 | n02747177 ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin
414 | n02749479 assault rifle, assault gun
415 | n02769748 backpack, back pack, knapsack, packsack, rucksack, haversack
416 | n02776631 bakery, bakeshop, bakehouse
417 | n02777292 balance beam, beam
418 | n02782093 balloon
419 | n02783161 ballpoint, ballpoint pen, ballpen, Biro
420 | n02786058 Band Aid
421 | n02787622 banjo
422 | n02788148 bannister, banister, balustrade, balusters, handrail
423 | n02790996 barbell
424 | n02791124 barber chair
425 | n02791270 barbershop
426 | n02793495 barn
427 | n02794156 barometer
428 | n02795169 barrel, cask
429 | n02797295 barrow, garden cart, lawn cart, wheelbarrow
430 | n02799071 baseball
431 | n02802426 basketball
432 | n02804414 bassinet
433 | n02804610 bassoon
434 | n02807133 bathing cap, swimming cap
435 | n02808304 bath towel
436 | n02808440 bathtub, bathing tub, bath, tub
437 | n02814533 beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon
438 | n02814860 beacon, lighthouse, beacon light, pharos
439 | n02815834 beaker
440 | n02817516 bearskin, busby, shako
441 | n02823428 beer bottle
442 | n02823750 beer glass
443 | n02825657 bell cote, bell cot
444 | n02834397 bib
445 | n02835271 bicycle-built-for-two, tandem bicycle, tandem
446 | n02837789 bikini, two-piece
447 | n02840245 binder, ring-binder
448 | n02841315 binoculars, field glasses, opera glasses
449 | n02843684 birdhouse
450 | n02859443 boathouse
451 | n02860847 bobsled, bobsleigh, bob
452 | n02865351 bolo tie, bolo, bola tie, bola
453 | n02869837 bonnet, poke bonnet
454 | n02870880 bookcase
455 | n02871525 bookshop, bookstore, bookstall
456 | n02877765 bottlecap
457 | n02879718 bow
458 | n02883205 bow tie, bow-tie, bowtie
459 | n02892201 brass, memorial tablet, plaque
460 | n02892767 brassiere, bra, bandeau
461 | n02894605 breakwater, groin, groyne, mole, bulwark, seawall, jetty
462 | n02895154 breastplate, aegis, egis
463 | n02906734 broom
464 | n02909870 bucket, pail
465 | n02910353 buckle
466 | n02916936 bulletproof vest
467 | n02917067 bullet train, bullet
468 | n02927161 butcher shop, meat market
469 | n02930766 cab, hack, taxi, taxicab
470 | n02939185 caldron, cauldron
471 | n02948072 candle, taper, wax light
472 | n02950826 cannon
473 | n02951358 canoe
474 | n02951585 can opener, tin opener
475 | n02963159 cardigan
476 | n02965783 car mirror
477 | n02966193 carousel, carrousel, merry-go-round, roundabout, whirligig
478 | n02966687 carpenter's kit, tool kit
479 | n02971356 carton
480 | n02974003 car wheel
481 | n02977058 cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM
482 | n02978881 cassette
483 | n02979186 cassette player
484 | n02980441 castle
485 | n02981792 catamaran
486 | n02988304 CD player
487 | n02992211 cello, violoncello
488 | n02992529 cellular telephone, cellular phone, cellphone, cell, mobile phone
489 | n02999410 chain
490 | n03000134 chainlink fence
491 | n03000247 chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour
492 | n03000684 chain saw, chainsaw
493 | n03014705 chest
494 | n03016953 chiffonier, commode
495 | n03017168 chime, bell, gong
496 | n03018349 china cabinet, china closet
497 | n03026506 Christmas stocking
498 | n03028079 church, church building
499 | n03032252 cinema, movie theater, movie theatre, movie house, picture palace
500 | n03041632 cleaver, meat cleaver, chopper
501 | n03042490 cliff dwelling
502 | n03045698 cloak
503 | n03047690 clog, geta, patten, sabot
504 | n03062245 cocktail shaker
505 | n03063599 coffee mug
506 | n03063689 coffeepot
507 | n03065424 coil, spiral, volute, whorl, helix
508 | n03075370 combination lock
509 | n03085013 computer keyboard, keypad
510 | n03089624 confectionery, confectionary, candy store
511 | n03095699 container ship, containership, container vessel
512 | n03100240 convertible
513 | n03109150 corkscrew, bottle screw
514 | n03110669 cornet, horn, trumpet, trump
515 | n03124043 cowboy boot
516 | n03124170 cowboy hat, ten-gallon hat
517 | n03125729 cradle
518 | n03126707 crane
519 | n03127747 crash helmet
520 | n03127925 crate
521 | n03131574 crib, cot
522 | n03133878 Crock Pot
523 | n03134739 croquet ball
524 | n03141823 crutch
525 | n03146219 cuirass
526 | n03160309 dam, dike, dyke
527 | n03179701 desk
528 | n03180011 desktop computer
529 | n03187595 dial telephone, dial phone
530 | n03188531 diaper, nappy, napkin
531 | n03196217 digital clock
532 | n03197337 digital watch
533 | n03201208 dining table, board
534 | n03207743 dishrag, dishcloth
535 | n03207941 dishwasher, dish washer, dishwashing machine
536 | n03208938 disk brake, disc brake
537 | n03216828 dock, dockage, docking facility
538 | n03218198 dogsled, dog sled, dog sleigh
539 | n03220513 dome
540 | n03223299 doormat, welcome mat
541 | n03240683 drilling platform, offshore rig
542 | n03249569 drum, membranophone, tympan
543 | n03250847 drumstick
544 | n03255030 dumbbell
545 | n03259280 Dutch oven
546 | n03271574 electric fan, blower
547 | n03272010 electric guitar
548 | n03272562 electric locomotive
549 | n03290653 entertainment center
550 | n03291819 envelope
551 | n03297495 espresso maker
552 | n03314780 face powder
553 | n03325584 feather boa, boa
554 | n03337140 file, file cabinet, filing cabinet
555 | n03344393 fireboat
556 | n03345487 fire engine, fire truck
557 | n03347037 fire screen, fireguard
558 | n03355925 flagpole, flagstaff
559 | n03372029 flute, transverse flute
560 | n03376595 folding chair
561 | n03379051 football helmet
562 | n03384352 forklift
563 | n03388043 fountain
564 | n03388183 fountain pen
565 | n03388549 four-poster
566 | n03393912 freight car
567 | n03394916 French horn, horn
568 | n03400231 frying pan, frypan, skillet
569 | n03404251 fur coat
570 | n03417042 garbage truck, dustcart
571 | n03424325 gasmask, respirator, gas helmet
572 | n03425413 gas pump, gasoline pump, petrol pump, island dispenser
573 | n03443371 goblet
574 | n03444034 go-kart
575 | n03445777 golf ball
576 | n03445924 golfcart, golf cart
577 | n03447447 gondola
578 | n03447721 gong, tam-tam
579 | n03450230 gown
580 | n03452741 grand piano, grand
581 | n03457902 greenhouse, nursery, glasshouse
582 | n03459775 grille, radiator grille
583 | n03461385 grocery store, grocery, food market, market
584 | n03467068 guillotine
585 | n03476684 hair slide
586 | n03476991 hair spray
587 | n03478589 half track
588 | n03481172 hammer
589 | n03482405 hamper
590 | n03483316 hand blower, blow dryer, blow drier, hair dryer, hair drier
591 | n03485407 hand-held computer, hand-held microcomputer
592 | n03485794 handkerchief, hankie, hanky, hankey
593 | n03492542 hard disc, hard disk, fixed disk
594 | n03494278 harmonica, mouth organ, harp, mouth harp
595 | n03495258 harp
596 | n03496892 harvester, reaper
597 | n03498962 hatchet
598 | n03527444 holster
599 | n03529860 home theater, home theatre
600 | n03530642 honeycomb
601 | n03532672 hook, claw
602 | n03534580 hoopskirt, crinoline
603 | n03535780 horizontal bar, high bar
604 | n03538406 horse cart, horse-cart
605 | n03544143 hourglass
606 | n03584254 iPod
607 | n03584829 iron, smoothing iron
608 | n03590841 jack-o'-lantern
609 | n03594734 jean, blue jean, denim
610 | n03594945 jeep, landrover
611 | n03595614 jersey, T-shirt, tee shirt
612 | n03598930 jigsaw puzzle
613 | n03599486 jinrikisha, ricksha, rickshaw
614 | n03602883 joystick
615 | n03617480 kimono
616 | n03623198 knee pad
617 | n03627232 knot
618 | n03630383 lab coat, laboratory coat
619 | n03633091 ladle
620 | n03637318 lampshade, lamp shade
621 | n03642806 laptop, laptop computer
622 | n03649909 lawn mower, mower
623 | n03657121 lens cap, lens cover
624 | n03658185 letter opener, paper knife, paperknife
625 | n03661043 library
626 | n03662601 lifeboat
627 | n03666591 lighter, light, igniter, ignitor
628 | n03670208 limousine, limo
629 | n03673027 liner, ocean liner
630 | n03676483 lipstick, lip rouge
631 | n03680355 Loafer
632 | n03690938 lotion
633 | n03691459 loudspeaker, speaker, speaker unit, loudspeaker system, speaker system
634 | n03692522 loupe, jeweler's loupe
635 | n03697007 lumbermill, sawmill
636 | n03706229 magnetic compass
637 | n03709823 mailbag, postbag
638 | n03710193 mailbox, letter box
639 | n03710637 maillot
640 | n03710721 maillot, tank suit
641 | n03717622 manhole cover
642 | n03720891 maraca
643 | n03721384 marimba, xylophone
644 | n03724870 mask
645 | n03729826 matchstick
646 | n03733131 maypole
647 | n03733281 maze, labyrinth
648 | n03733805 measuring cup
649 | n03742115 medicine chest, medicine cabinet
650 | n03743016 megalith, megalithic structure
651 | n03759954 microphone, mike
652 | n03761084 microwave, microwave oven
653 | n03763968 military uniform
654 | n03764736 milk can
655 | n03769881 minibus
656 | n03770439 miniskirt, mini
657 | n03770679 minivan
658 | n03773504 missile
659 | n03775071 mitten
660 | n03775546 mixing bowl
661 | n03776460 mobile home, manufactured home
662 | n03777568 Model T
663 | n03777754 modem
664 | n03781244 monastery
665 | n03782006 monitor
666 | n03785016 moped
667 | n03786901 mortar
668 | n03787032 mortarboard
669 | n03788195 mosque
670 | n03788365 mosquito net
671 | n03791053 motor scooter, scooter
672 | n03792782 mountain bike, all-terrain bike, off-roader
673 | n03792972 mountain tent
674 | n03793489 mouse, computer mouse
675 | n03794056 mousetrap
676 | n03796401 moving van
677 | n03803284 muzzle
678 | n03804744 nail
679 | n03814639 neck brace
680 | n03814906 necklace
681 | n03825788 nipple
682 | n03832673 notebook, notebook computer
683 | n03837869 obelisk
684 | n03838899 oboe, hautboy, hautbois
685 | n03840681 ocarina, sweet potato
686 | n03841143 odometer, hodometer, mileometer, milometer
687 | n03843555 oil filter
688 | n03854065 organ, pipe organ
689 | n03857828 oscilloscope, scope, cathode-ray oscilloscope, CRO
690 | n03866082 overskirt
691 | n03868242 oxcart
692 | n03868863 oxygen mask
693 | n03871628 packet
694 | n03873416 paddle, boat paddle
695 | n03874293 paddlewheel, paddle wheel
696 | n03874599 padlock
697 | n03876231 paintbrush
698 | n03877472 pajama, pyjama, pj's, jammies
699 | n03877845 palace
700 | n03884397 panpipe, pandean pipe, syrinx
701 | n03887697 paper towel
702 | n03888257 parachute, chute
703 | n03888605 parallel bars, bars
704 | n03891251 park bench
705 | n03891332 parking meter
706 | n03895866 passenger car, coach, carriage
707 | n03899768 patio, terrace
708 | n03902125 pay-phone, pay-station
709 | n03903868 pedestal, plinth, footstall
710 | n03908618 pencil box, pencil case
711 | n03908714 pencil sharpener
712 | n03916031 perfume, essence
713 | n03920288 Petri dish
714 | n03924679 photocopier
715 | n03929660 pick, plectrum, plectron
716 | n03929855 pickelhaube
717 | n03930313 picket fence, paling
718 | n03930630 pickup, pickup truck
719 | n03933933 pier
720 | n03935335 piggy bank, penny bank
721 | n03937543 pill bottle
722 | n03938244 pillow
723 | n03942813 ping-pong ball
724 | n03944341 pinwheel
725 | n03947888 pirate, pirate ship
726 | n03950228 pitcher, ewer
727 | n03954731 plane, carpenter's plane, woodworking plane
728 | n03956157 planetarium
729 | n03958227 plastic bag
730 | n03961711 plate rack
731 | n03967562 plow, plough
732 | n03970156 plunger, plumber's helper
733 | n03976467 Polaroid camera, Polaroid Land camera
734 | n03976657 pole
735 | n03977966 police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria
736 | n03980874 poncho
737 | n03982430 pool table, billiard table, snooker table
738 | n03983396 pop bottle, soda bottle
739 | n03991062 pot, flowerpot
740 | n03992509 potter's wheel
741 | n03995372 power drill
742 | n03998194 prayer rug, prayer mat
743 | n04004767 printer
744 | n04005630 prison, prison house
745 | n04008634 projectile, missile
746 | n04009552 projector
747 | n04019541 puck, hockey puck
748 | n04023962 punching bag, punch bag, punching ball, punchball
749 | n04026417 purse
750 | n04033901 quill, quill pen
751 | n04033995 quilt, comforter, comfort, puff
752 | n04037443 racer, race car, racing car
753 | n04039381 racket, racquet
754 | n04040759 radiator
755 | n04041544 radio, wireless
756 | n04044716 radio telescope, radio reflector
757 | n04049303 rain barrel
758 | n04065272 recreational vehicle, RV, R.V.
759 | n04067472 reel
760 | n04069434 reflex camera
761 | n04070727 refrigerator, icebox
762 | n04074963 remote control, remote
763 | n04081281 restaurant, eating house, eating place, eatery
764 | n04086273 revolver, six-gun, six-shooter
765 | n04090263 rifle
766 | n04099969 rocking chair, rocker
767 | n04111531 rotisserie
768 | n04116512 rubber eraser, rubber, pencil eraser
769 | n04118538 rugby ball
770 | n04118776 rule, ruler
771 | n04120489 running shoe
772 | n04125021 safe
773 | n04127249 safety pin
774 | n04131690 saltshaker, salt shaker
775 | n04133789 sandal
776 | n04136333 sarong
777 | n04141076 sax, saxophone
778 | n04141327 scabbard
779 | n04141975 scale, weighing machine
780 | n04146614 school bus
781 | n04147183 schooner
782 | n04149813 scoreboard
783 | n04152593 screen, CRT screen
784 | n04153751 screw
785 | n04154565 screwdriver
786 | n04162706 seat belt, seatbelt
787 | n04179913 sewing machine
788 | n04192698 shield, buckler
789 | n04200800 shoe shop, shoe-shop, shoe store
790 | n04201297 shoji
791 | n04204238 shopping basket
792 | n04204347 shopping cart
793 | n04208210 shovel
794 | n04209133 shower cap
795 | n04209239 shower curtain
796 | n04228054 ski
797 | n04229816 ski mask
798 | n04235860 sleeping bag
799 | n04238763 slide rule, slipstick
800 | n04239074 sliding door
801 | n04243546 slot, one-armed bandit
802 | n04251144 snorkel
803 | n04252077 snowmobile
804 | n04252225 snowplow, snowplough
805 | n04254120 soap dispenser
806 | n04254680 soccer ball
807 | n04254777 sock
808 | n04258138 solar dish, solar collector, solar furnace
809 | n04259630 sombrero
810 | n04263257 soup bowl
811 | n04264628 space bar
812 | n04265275 space heater
813 | n04266014 space shuttle
814 | n04270147 spatula
815 | n04273569 speedboat
816 | n04275548 spider web, spider's web
817 | n04277352 spindle
818 | n04285008 sports car, sport car
819 | n04286575 spotlight, spot
820 | n04296562 stage
821 | n04310018 steam locomotive
822 | n04311004 steel arch bridge
823 | n04311174 steel drum
824 | n04317175 stethoscope
825 | n04325704 stole
826 | n04326547 stone wall
827 | n04328186 stopwatch, stop watch
828 | n04330267 stove
829 | n04332243 strainer
830 | n04335435 streetcar, tram, tramcar, trolley, trolley car
831 | n04336792 stretcher
832 | n04344873 studio couch, day bed
833 | n04346328 stupa, tope
834 | n04347754 submarine, pigboat, sub, U-boat
835 | n04350905 suit, suit of clothes
836 | n04355338 sundial
837 | n04355933 sunglass
838 | n04356056 sunglasses, dark glasses, shades
839 | n04357314 sunscreen, sunblock, sun blocker
840 | n04366367 suspension bridge
841 | n04367480 swab, swob, mop
842 | n04370456 sweatshirt
843 | n04371430 swimming trunks, bathing trunks
844 | n04371774 swing
845 | n04372370 switch, electric switch, electrical switch
846 | n04376876 syringe
847 | n04380533 table lamp
848 | n04389033 tank, army tank, armored combat vehicle, armoured combat vehicle
849 | n04392985 tape player
850 | n04398044 teapot
851 | n04399382 teddy, teddy bear
852 | n04404412 television, television system
853 | n04409515 tennis ball
854 | n04417672 thatch, thatched roof
855 | n04418357 theater curtain, theatre curtain
856 | n04423845 thimble
857 | n04428191 thresher, thrasher, threshing machine
858 | n04429376 throne
859 | n04435653 tile roof
860 | n04442312 toaster
861 | n04443257 tobacco shop, tobacconist shop, tobacconist
862 | n04447861 toilet seat
863 | n04456115 torch
864 | n04458633 totem pole
865 | n04461696 tow truck, tow car, wrecker
866 | n04462240 toyshop
867 | n04465501 tractor
868 | n04467665 trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi
869 | n04476259 tray
870 | n04479046 trench coat
871 | n04482393 tricycle, trike, velocipede
872 | n04483307 trimaran
873 | n04485082 tripod
874 | n04486054 triumphal arch
875 | n04487081 trolleybus, trolley coach, trackless trolley
876 | n04487394 trombone
877 | n04493381 tub, vat
878 | n04501370 turnstile
879 | n04505470 typewriter keyboard
880 | n04507155 umbrella
881 | n04509417 unicycle, monocycle
882 | n04515003 upright, upright piano
883 | n04517823 vacuum, vacuum cleaner
884 | n04522168 vase
885 | n04523525 vault
886 | n04525038 velvet
887 | n04525305 vending machine
888 | n04532106 vestment
889 | n04532670 viaduct
890 | n04536866 violin, fiddle
891 | n04540053 volleyball
892 | n04542943 waffle iron
893 | n04548280 wall clock
894 | n04548362 wallet, billfold, notecase, pocketbook
895 | n04550184 wardrobe, closet, press
896 | n04552348 warplane, military plane
897 | n04553703 washbasin, handbasin, washbowl, lavabo, wash-hand basin
898 | n04554684 washer, automatic washer, washing machine
899 | n04557648 water bottle
900 | n04560804 water jug
901 | n04562935 water tower
902 | n04579145 whiskey jug
903 | n04579432 whistle
904 | n04584207 wig
905 | n04589890 window screen
906 | n04590129 window shade
907 | n04591157 Windsor tie
908 | n04591713 wine bottle
909 | n04592741 wing
910 | n04596742 wok
911 | n04597913 wooden spoon
912 | n04599235 wool, woolen, woollen
913 | n04604644 worm fence, snake fence, snake-rail fence, Virginia fence
914 | n04606251 wreck
915 | n04612504 yawl
916 | n04613696 yurt
917 | n06359193 web site, website, internet site, site
918 | n06596364 comic book
919 | n06785654 crossword puzzle, crossword
920 | n06794110 street sign
921 | n06874185 traffic light, traffic signal, stoplight
922 | n07248320 book jacket, dust cover, dust jacket, dust wrapper
923 | n07565083 menu
924 | n07579787 plate
925 | n07583066 guacamole
926 | n07584110 consomme
927 | n07590611 hot pot, hotpot
928 | n07613480 trifle
929 | n07614500 ice cream, icecream
930 | n07615774 ice lolly, lolly, lollipop, popsicle
931 | n07684084 French loaf
932 | n07693725 bagel, beigel
933 | n07695742 pretzel
934 | n07697313 cheeseburger
935 | n07697537 hotdog, hot dog, red hot
936 | n07711569 mashed potato
937 | n07714571 head cabbage
938 | n07714990 broccoli
939 | n07715103 cauliflower
940 | n07716358 zucchini, courgette
941 | n07716906 spaghetti squash
942 | n07717410 acorn squash
943 | n07717556 butternut squash
944 | n07718472 cucumber, cuke
945 | n07718747 artichoke, globe artichoke
946 | n07720875 bell pepper
947 | n07730033 cardoon
948 | n07734744 mushroom
949 | n07742313 Granny Smith
950 | n07745940 strawberry
951 | n07747607 orange
952 | n07749582 lemon
953 | n07753113 fig
954 | n07753275 pineapple, ananas
955 | n07753592 banana
956 | n07754684 jackfruit, jak, jack
957 | n07760859 custard apple
958 | n07768694 pomegranate
959 | n07802026 hay
960 | n07831146 carbonara
961 | n07836838 chocolate sauce, chocolate syrup
962 | n07860988 dough
963 | n07871810 meat loaf, meatloaf
964 | n07873807 pizza, pizza pie
965 | n07875152 potpie
966 | n07880968 burrito
967 | n07892512 red wine
968 | n07920052 espresso
969 | n07930864 cup
970 | n07932039 eggnog
971 | n09193705 alp
972 | n09229709 bubble
973 | n09246464 cliff, drop, drop-off
974 | n09256479 coral reef
975 | n09288635 geyser
976 | n09332890 lakeside, lakeshore
977 | n09399592 promontory, headland, head, foreland
978 | n09421951 sandbar, sand bar
979 | n09428293 seashore, coast, seacoast, sea-coast
980 | n09468604 valley, vale
981 | n09472597 volcano
982 | n09835506 ballplayer, baseball player
983 | n10148035 groom, bridegroom
984 | n10565667 scuba diver
985 | n11879895 rapeseed
986 | n11939491 daisy
987 | n12057211 yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum
988 | n12144580 corn
989 | n12267677 acorn
990 | n12620546 hip, rose hip, rosehip
991 | n12768682 buckeye, horse chestnut, conker
992 | n12985857 coral fungus
993 | n12998815 agaric
994 | n13037406 gyromitra
995 | n13040303 stinkhorn, carrion fungus
996 | n13044778 earthstar
997 | n13052670 hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa
998 | n13054560 bolete
999 | n13133613 ear, spike, capitulum
1000 | n15075141 toilet tissue, toilet paper, bathroom tissue
--------------------------------------------------------------------------------
/work_in_progress/extra_autodiff.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "**Appendix D – Autodiff**"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "_This notebook contains toy implementations of various autodiff techniques, to explain how they works._"
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "# Setup"
22 | ]
23 | },
24 | {
25 | "cell_type": "markdown",
26 | "metadata": {},
27 | "source": [
28 | "First, let's make sure this notebook works well in both python 2 and 3:"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 1,
34 | "metadata": {},
35 | "outputs": [],
36 | "source": [
37 | "# To support both python 2 and python 3\n",
38 | "from __future__ import absolute_import, division, print_function, unicode_literals"
39 | ]
40 | },
41 | {
42 | "cell_type": "markdown",
43 | "metadata": {},
44 | "source": [
45 | "# Introduction"
46 | ]
47 | },
48 | {
49 | "cell_type": "markdown",
50 | "metadata": {},
51 | "source": [
52 | "Suppose we want to compute the gradients of the function $f(x,y)=x^2y + y + 2$ with regards to the parameters x and y:"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": 2,
58 | "metadata": {},
59 | "outputs": [],
60 | "source": [
61 | "def f(x,y):\n",
62 | " return x*x*y + y + 2"
63 | ]
64 | },
65 | {
66 | "cell_type": "markdown",
67 | "metadata": {},
68 | "source": [
69 | "One approach is to solve this analytically:\n",
70 | "\n",
71 | "$\\dfrac{\\partial f}{\\partial x} = 2xy$\n",
72 | "\n",
73 | "$\\dfrac{\\partial f}{\\partial y} = x^2 + 1$"
74 | ]
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": 3,
79 | "metadata": {},
80 | "outputs": [],
81 | "source": [
82 | "def df(x,y):\n",
83 | " return 2*x*y, x*x + 1"
84 | ]
85 | },
86 | {
87 | "cell_type": "markdown",
88 | "metadata": {},
89 | "source": [
90 | "So for example $\\dfrac{\\partial f}{\\partial x}(3,4) = 24$ and $\\dfrac{\\partial f}{\\partial y}(3,4) = 10$."
91 | ]
92 | },
93 | {
94 | "cell_type": "code",
95 | "execution_count": 4,
96 | "metadata": {},
97 | "outputs": [
98 | {
99 | "data": {
100 | "text/plain": [
101 | "(24, 10)"
102 | ]
103 | },
104 | "execution_count": 4,
105 | "metadata": {},
106 | "output_type": "execute_result"
107 | }
108 | ],
109 | "source": [
110 | "df(3, 4)"
111 | ]
112 | },
113 | {
114 | "cell_type": "markdown",
115 | "metadata": {},
116 | "source": [
117 | "Perfect! We can also find the equations for the second order derivatives (also called Hessians):\n",
118 | "\n",
119 | "$\\dfrac{\\partial^2 f}{\\partial x \\partial x} = \\dfrac{\\partial (2xy)}{\\partial x} = 2y$\n",
120 | "\n",
121 | "$\\dfrac{\\partial^2 f}{\\partial x \\partial y} = \\dfrac{\\partial (2xy)}{\\partial y} = 2x$\n",
122 | "\n",
123 | "$\\dfrac{\\partial^2 f}{\\partial y \\partial x} = \\dfrac{\\partial (x^2 + 1)}{\\partial x} = 2x$\n",
124 | "\n",
125 | "$\\dfrac{\\partial^2 f}{\\partial y \\partial y} = \\dfrac{\\partial (x^2 + 1)}{\\partial y} = 0$"
126 | ]
127 | },
128 | {
129 | "cell_type": "markdown",
130 | "metadata": {},
131 | "source": [
132 | "At x=3 and y=4, these Hessians are respectively 8, 6, 6, 0. Let's use the equations above to compute them:"
133 | ]
134 | },
135 | {
136 | "cell_type": "code",
137 | "execution_count": 5,
138 | "metadata": {},
139 | "outputs": [],
140 | "source": [
141 | "def d2f(x, y):\n",
142 | " return [2*y, 2*x], [2*x, 0]"
143 | ]
144 | },
145 | {
146 | "cell_type": "code",
147 | "execution_count": 6,
148 | "metadata": {},
149 | "outputs": [
150 | {
151 | "data": {
152 | "text/plain": [
153 | "([8, 6], [6, 0])"
154 | ]
155 | },
156 | "execution_count": 6,
157 | "metadata": {},
158 | "output_type": "execute_result"
159 | }
160 | ],
161 | "source": [
162 | "d2f(3, 4)"
163 | ]
164 | },
165 | {
166 | "cell_type": "markdown",
167 | "metadata": {},
168 | "source": [
169 | "Perfect, but this requires some mathematical work. It is not too hard in this case, but for a deep neural network, it is pratically impossible to compute the derivatives this way. So let's look at various ways to automate this!"
170 | ]
171 | },
172 | {
173 | "cell_type": "markdown",
174 | "metadata": {},
175 | "source": [
176 | "# Numeric differentiation"
177 | ]
178 | },
179 | {
180 | "cell_type": "markdown",
181 | "metadata": {},
182 | "source": [
183 | "Here, we compute an approxiation of the gradients using the equation: $\\dfrac{\\partial f}{\\partial x} = \\displaystyle{\\lim_{\\epsilon \\to 0}}\\dfrac{f(x+\\epsilon, y) - f(x, y)}{\\epsilon}$ (and there is a similar definition for $\\dfrac{\\partial f}{\\partial y}$)."
184 | ]
185 | },
186 | {
187 | "cell_type": "code",
188 | "execution_count": 7,
189 | "metadata": {},
190 | "outputs": [],
191 | "source": [
192 | "def gradients(func, vars_list, eps=0.0001):\n",
193 | " partial_derivatives = []\n",
194 | " base_func_eval = func(*vars_list)\n",
195 | " for idx in range(len(vars_list)):\n",
196 | " tweaked_vars = vars_list[:]\n",
197 | " tweaked_vars[idx] += eps\n",
198 | " tweaked_func_eval = func(*tweaked_vars)\n",
199 | " derivative = (tweaked_func_eval - base_func_eval) / eps\n",
200 | " partial_derivatives.append(derivative)\n",
201 | " return partial_derivatives"
202 | ]
203 | },
204 | {
205 | "cell_type": "code",
206 | "execution_count": 8,
207 | "metadata": {},
208 | "outputs": [],
209 | "source": [
210 | "def df(x, y):\n",
211 | " return gradients(f, [x, y])"
212 | ]
213 | },
214 | {
215 | "cell_type": "code",
216 | "execution_count": 9,
217 | "metadata": {},
218 | "outputs": [
219 | {
220 | "data": {
221 | "text/plain": [
222 | "[24.000400000048216, 10.000000000047748]"
223 | ]
224 | },
225 | "execution_count": 9,
226 | "metadata": {},
227 | "output_type": "execute_result"
228 | }
229 | ],
230 | "source": [
231 | "df(3, 4)"
232 | ]
233 | },
234 | {
235 | "cell_type": "markdown",
236 | "metadata": {},
237 | "source": [
238 | "It works well!"
239 | ]
240 | },
241 | {
242 | "cell_type": "markdown",
243 | "metadata": {},
244 | "source": [
245 | "The good news is that it is pretty easy to compute the Hessians. First let's create functions that compute the first order derivatives (also called Jacobians):"
246 | ]
247 | },
248 | {
249 | "cell_type": "code",
250 | "execution_count": 10,
251 | "metadata": {},
252 | "outputs": [
253 | {
254 | "data": {
255 | "text/plain": [
256 | "(24.000400000048216, 10.000000000047748)"
257 | ]
258 | },
259 | "execution_count": 10,
260 | "metadata": {},
261 | "output_type": "execute_result"
262 | }
263 | ],
264 | "source": [
265 | "def dfdx(x, y):\n",
266 | " return gradients(f, [x,y])[0]\n",
267 | "\n",
268 | "def dfdy(x, y):\n",
269 | " return gradients(f, [x,y])[1]\n",
270 | "\n",
271 | "dfdx(3., 4.), dfdy(3., 4.)"
272 | ]
273 | },
274 | {
275 | "cell_type": "markdown",
276 | "metadata": {},
277 | "source": [
278 | "Now we can simply apply the `gradients()` function to these functions:"
279 | ]
280 | },
281 | {
282 | "cell_type": "code",
283 | "execution_count": 11,
284 | "metadata": {},
285 | "outputs": [],
286 | "source": [
287 | "def d2f(x, y):\n",
288 | " return [gradients(dfdx, [3., 4.]), gradients(dfdy, [3., 4.])]"
289 | ]
290 | },
291 | {
292 | "cell_type": "code",
293 | "execution_count": 12,
294 | "metadata": {},
295 | "outputs": [
296 | {
297 | "data": {
298 | "text/plain": [
299 | "[[7.999999951380232, 6.000099261882497],\n",
300 | " [6.000099261882497, -1.4210854715202004e-06]]"
301 | ]
302 | },
303 | "execution_count": 12,
304 | "metadata": {},
305 | "output_type": "execute_result"
306 | }
307 | ],
308 | "source": [
309 | "d2f(3, 4)"
310 | ]
311 | },
312 | {
313 | "cell_type": "markdown",
314 | "metadata": {},
315 | "source": [
316 | "So everything works well, but the result is approximate, and computing the gradients of a function with regards to $n$ variables requires calling that function $n$ times. In deep neural nets, there are often thousands of parameters to tweak using gradient descent (which requires computing the gradients of the loss function with regards to each of these parameters), so this approach would be much too slow."
317 | ]
318 | },
319 | {
320 | "cell_type": "markdown",
321 | "metadata": {},
322 | "source": [
323 | "## Implementing a Toy Computation Graph"
324 | ]
325 | },
326 | {
327 | "cell_type": "markdown",
328 | "metadata": {},
329 | "source": [
330 | "Rather than this numerical approach, let's implement some symbolic autodiff techniques. For this, we will need to define classes to represent constants, variables and operations."
331 | ]
332 | },
333 | {
334 | "cell_type": "code",
335 | "execution_count": 13,
336 | "metadata": {},
337 | "outputs": [],
338 | "source": [
339 | "class Const(object):\n",
340 | " def __init__(self, value):\n",
341 | " self.value = value\n",
342 | " def evaluate(self):\n",
343 | " return self.value\n",
344 | " def __str__(self):\n",
345 | " return str(self.value)\n",
346 | "\n",
347 | "class Var(object):\n",
348 | " def __init__(self, name, init_value=0):\n",
349 | " self.value = init_value\n",
350 | " self.name = name\n",
351 | " def evaluate(self):\n",
352 | " return self.value\n",
353 | " def __str__(self):\n",
354 | " return self.name\n",
355 | "\n",
356 | "class BinaryOperator(object):\n",
357 | " def __init__(self, a, b):\n",
358 | " self.a = a\n",
359 | " self.b = b\n",
360 | "\n",
361 | "class Add(BinaryOperator):\n",
362 | " def evaluate(self):\n",
363 | " return self.a.evaluate() + self.b.evaluate()\n",
364 | " def __str__(self):\n",
365 | " return \"{} + {}\".format(self.a, self.b)\n",
366 | "\n",
367 | "class Mul(BinaryOperator):\n",
368 | " def evaluate(self):\n",
369 | " return self.a.evaluate() * self.b.evaluate()\n",
370 | " def __str__(self):\n",
371 | " return \"({}) * ({})\".format(self.a, self.b)"
372 | ]
373 | },
374 | {
375 | "cell_type": "markdown",
376 | "metadata": {},
377 | "source": [
378 | "Good, now we can build a computation graph to represent the function $f$:"
379 | ]
380 | },
381 | {
382 | "cell_type": "code",
383 | "execution_count": 14,
384 | "metadata": {},
385 | "outputs": [],
386 | "source": [
387 | "x = Var(\"x\")\n",
388 | "y = Var(\"y\")\n",
389 | "f = Add(Mul(Mul(x, x), y), Add(y, Const(2))) # f(x,y) = x²y + y + 2"
390 | ]
391 | },
392 | {
393 | "cell_type": "markdown",
394 | "metadata": {},
395 | "source": [
396 | "And we can run this graph to compute $f$ at any point, for example $f(3, 4)$."
397 | ]
398 | },
399 | {
400 | "cell_type": "code",
401 | "execution_count": 15,
402 | "metadata": {},
403 | "outputs": [
404 | {
405 | "data": {
406 | "text/plain": [
407 | "42"
408 | ]
409 | },
410 | "execution_count": 15,
411 | "metadata": {},
412 | "output_type": "execute_result"
413 | }
414 | ],
415 | "source": [
416 | "x.value = 3\n",
417 | "y.value = 4\n",
418 | "f.evaluate()"
419 | ]
420 | },
421 | {
422 | "cell_type": "markdown",
423 | "metadata": {},
424 | "source": [
425 | "Perfect, it found the ultimate answer."
426 | ]
427 | },
428 | {
429 | "cell_type": "markdown",
430 | "metadata": {},
431 | "source": [
432 | "## Computing gradients"
433 | ]
434 | },
435 | {
436 | "cell_type": "markdown",
437 | "metadata": {},
438 | "source": [
439 | "The autodiff methods we will present below are all based on the *chain rule*."
440 | ]
441 | },
442 | {
443 | "cell_type": "markdown",
444 | "metadata": {},
445 | "source": [
446 | "Suppose we have two functions $u$ and $v$, and we apply them sequentially to some input $x$, and we get the result $z$. So we have $z = v(u(x))$, which we can rewrite as $z = v(s)$ and $s = u(x)$. Now we can apply the chain rule to get the partial derivative of the output $z$ with regards to the input $x$:\n",
447 | "\n",
448 | "$ \\dfrac{\\partial z}{\\partial x} = \\dfrac{\\partial s}{\\partial x} \\cdot \\dfrac{\\partial z}{\\partial s}$"
449 | ]
450 | },
451 | {
452 | "cell_type": "markdown",
453 | "metadata": {},
454 | "source": [
455 | "Now if $z$ is the output of a sequence of functions which have intermediate outputs $s_1, s_2, ..., s_n$, the chain rule still applies:\n",
456 | "\n",
457 | "$ \\dfrac{\\partial z}{\\partial x} = \\dfrac{\\partial s_1}{\\partial x} \\cdot \\dfrac{\\partial s_2}{\\partial s_1} \\cdot \\dfrac{\\partial s_3}{\\partial s_2} \\cdot \\dots \\cdot \\dfrac{\\partial s_{n-1}}{\\partial s_{n-2}} \\cdot \\dfrac{\\partial s_n}{\\partial s_{n-1}} \\cdot \\dfrac{\\partial z}{\\partial s_n}$"
458 | ]
459 | },
460 | {
461 | "cell_type": "markdown",
462 | "metadata": {},
463 | "source": [
464 | "In forward mode autodiff, the algorithm computes these terms \"forward\" (i.e., in the same order as the computations required to compute the output $z$), that is from left to right: first $\\dfrac{\\partial s_1}{\\partial x}$, then $\\dfrac{\\partial s_2}{\\partial s_1}$, and so on. In reverse mode autodiff, the algorithm computes these terms \"backwards\", from right to left: first $\\dfrac{\\partial z}{\\partial s_n}$, then $\\dfrac{\\partial s_n}{\\partial s_{n-1}}$, and so on.\n",
465 | "\n",
466 | "For example, suppose you want to compute the derivative of the function $z(x)=\\sin(x^2)$ at x=3, using forward mode autodiff. The algorithm would first compute the partial derivative $\\dfrac{\\partial s_1}{\\partial x}=\\dfrac{\\partial x^2}{\\partial x}=2x=6$. Next, it would compute $\\dfrac{\\partial z}{\\partial x}=\\dfrac{\\partial s_1}{\\partial x}\\cdot\\dfrac{\\partial z}{\\partial s_1}= 6 \\cdot \\dfrac{\\partial \\sin(s_1)}{\\partial s_1}=6 \\cdot \\cos(s_1)=6 \\cdot \\cos(3^2)\\approx-5.46$."
467 | ]
468 | },
469 | {
470 | "cell_type": "markdown",
471 | "metadata": {},
472 | "source": [
473 | "Let's verify this result using the `gradients()` function defined earlier:"
474 | ]
475 | },
476 | {
477 | "cell_type": "code",
478 | "execution_count": 16,
479 | "metadata": {},
480 | "outputs": [
481 | {
482 | "data": {
483 | "text/plain": [
484 | "[-5.46761419430053]"
485 | ]
486 | },
487 | "execution_count": 16,
488 | "metadata": {},
489 | "output_type": "execute_result"
490 | }
491 | ],
492 | "source": [
493 | "from math import sin\n",
494 | "\n",
495 | "def z(x):\n",
496 | " return sin(x**2)\n",
497 | "\n",
498 | "gradients(z, [3])"
499 | ]
500 | },
501 | {
502 | "cell_type": "markdown",
503 | "metadata": {},
504 | "source": [
505 | "Look good. Now let's do the same thing using reverse mode autodiff. This time the algorithm would start from the right hand side so it would compute $\\dfrac{\\partial z}{\\partial s_1} = \\dfrac{\\partial \\sin(s_1)}{\\partial s_1}=\\cos(s_1)=\\cos(3^2)\\approx -0.91$. Next it would compute $\\dfrac{\\partial z}{\\partial x}=\\dfrac{\\partial s_1}{\\partial x}\\cdot\\dfrac{\\partial z}{\\partial s_1} \\approx \\dfrac{\\partial s_1}{\\partial x} \\cdot -0.91 = \\dfrac{\\partial x^2}{\\partial x} \\cdot -0.91=2x \\cdot -0.91 = 6\\cdot-0.91=-5.46$."
506 | ]
507 | },
508 | {
509 | "cell_type": "markdown",
510 | "metadata": {},
511 | "source": [
512 | "Of course both approaches give the same result (except for rounding errors), and with a single input and output they involve the same number of computations. But when there are several inputs or outputs, they can have very different performance. Indeed, if there are many inputs, the right-most terms will be needed to compute the partial derivatives with regards to each input, so it is a good idea to compute these right-most terms first. That means using reverse-mode autodiff. This way, the right-most terms can be computed just once and used to compute all the partial derivatives. Conversely, if there are many outputs, forward-mode is generally preferable because the left-most terms can be computed just once to compute the partial derivatives of the different outputs. In Deep Learning, there are typically thousands of model parameters, meaning there are lots of inputs, but few outputs. In fact, there is generally just one output during training: the loss. This is why reverse mode autodiff is used in TensorFlow and all major Deep Learning libraries."
513 | ]
514 | },
515 | {
516 | "cell_type": "markdown",
517 | "metadata": {},
518 | "source": [
519 | "There's one additional complexity in reverse mode autodiff: the value of $s_i$ is generally required when computing $\\dfrac{\\partial s_{i+1}}{\\partial s_i}$, and computing $s_i$ requires first computing $s_{i-1}$, which requires computing $s_{i-2}$, and so on. So basically, a first pass forward through the network is required to compute $s_1$, $s_2$, $s_3$, $\\dots$, $s_{n-1}$ and $s_n$, and then the algorithm can compute the partial derivatives from right to left. Storing all the intermediate values $s_i$ in RAM is sometimes a problem, especially when handling images, and when using GPUs which often have limited RAM: to limit this problem, one can reduce the number of layers in the neural network, or configure TensorFlow to make it swap these values from GPU RAM to CPU RAM. Another approach is to only cache every other intermediate value, $s_1$, $s_3$, $s_5$, $\\dots$, $s_{n-4}$, $s_{n-2}$ and $s_n$. This means that when the algorithm computes the partial derivatives, if an intermediate value $s_i$ is missing, it will need to recompute it based on the previous intermediate value $s_{i-1}$. This trades off CPU for RAM (if you are interested, check out [this paper](https://pdfs.semanticscholar.org/f61e/9fd5a4878e1493f7a6b03774a61c17b7e9a4.pdf))."
520 | ]
521 | },
522 | {
523 | "cell_type": "markdown",
524 | "metadata": {},
525 | "source": [
526 | "### Forward mode autodiff"
527 | ]
528 | },
529 | {
530 | "cell_type": "code",
531 | "execution_count": 17,
532 | "metadata": {},
533 | "outputs": [],
534 | "source": [
535 | "Const.gradient = lambda self, var: Const(0)\n",
536 | "Var.gradient = lambda self, var: Const(1) if self is var else Const(0)\n",
537 | "Add.gradient = lambda self, var: Add(self.a.gradient(var), self.b.gradient(var))\n",
538 | "Mul.gradient = lambda self, var: Add(Mul(self.a, self.b.gradient(var)), Mul(self.a.gradient(var), self.b))\n",
539 | "\n",
540 | "x = Var(name=\"x\", init_value=3.)\n",
541 | "y = Var(name=\"y\", init_value=4.)\n",
542 | "f = Add(Mul(Mul(x, x), y), Add(y, Const(2))) # f(x,y) = x²y + y + 2\n",
543 | "\n",
544 | "dfdx = f.gradient(x) # 2xy\n",
545 | "dfdy = f.gradient(y) # x² + 1"
546 | ]
547 | },
548 | {
549 | "cell_type": "code",
550 | "execution_count": 18,
551 | "metadata": {},
552 | "outputs": [
553 | {
554 | "data": {
555 | "text/plain": [
556 | "(24.0, 10.0)"
557 | ]
558 | },
559 | "execution_count": 18,
560 | "metadata": {},
561 | "output_type": "execute_result"
562 | }
563 | ],
564 | "source": [
565 | "dfdx.evaluate(), dfdy.evaluate()"
566 | ]
567 | },
568 | {
569 | "cell_type": "markdown",
570 | "metadata": {},
571 | "source": [
572 | "Since the output of the `gradient()` method is fully symbolic, we are not limited to the first order derivatives, we can also compute second order derivatives, and so on:"
573 | ]
574 | },
575 | {
576 | "cell_type": "code",
577 | "execution_count": 19,
578 | "metadata": {},
579 | "outputs": [],
580 | "source": [
581 | "d2fdxdx = dfdx.gradient(x) # 2y\n",
582 | "d2fdxdy = dfdx.gradient(y) # 2x\n",
583 | "d2fdydx = dfdy.gradient(x) # 2x\n",
584 | "d2fdydy = dfdy.gradient(y) # 0"
585 | ]
586 | },
587 | {
588 | "cell_type": "code",
589 | "execution_count": 20,
590 | "metadata": {},
591 | "outputs": [
592 | {
593 | "data": {
594 | "text/plain": [
595 | "[[8.0, 6.0], [6.0, 0.0]]"
596 | ]
597 | },
598 | "execution_count": 20,
599 | "metadata": {},
600 | "output_type": "execute_result"
601 | }
602 | ],
603 | "source": [
604 | "[[d2fdxdx.evaluate(), d2fdxdy.evaluate()],\n",
605 | " [d2fdydx.evaluate(), d2fdydy.evaluate()]]"
606 | ]
607 | },
608 | {
609 | "cell_type": "markdown",
610 | "metadata": {},
611 | "source": [
612 | "Note that the result is now exact, not an approximation (up to the limit of the machine's float precision, of course)."
613 | ]
614 | },
615 | {
616 | "cell_type": "markdown",
617 | "metadata": {},
618 | "source": [
619 | "### Forward mode autodiff using dual numbers"
620 | ]
621 | },
622 | {
623 | "cell_type": "markdown",
624 | "metadata": {},
625 | "source": [
626 | "A nice way to apply forward mode autodiff is to use [dual numbers](https://en.wikipedia.org/wiki/Dual_number). In short, a dual number $z$ has the form $z = a + b\\epsilon$, where $a$ and $b$ are real numbers, and $\\epsilon$ is an infinitesimal number, positive but smaller than all real numbers, and such that $\\epsilon^2=0$.\n",
627 | "It can be shown that $f(x + \\epsilon) = f(x) + \\dfrac{\\partial f}{\\partial x}\\epsilon$, so simply by computing $f(x + \\epsilon)$ we get both the value of $f(x)$ and the partial derivative of $f$ with regards to $x$. "
628 | ]
629 | },
630 | {
631 | "cell_type": "markdown",
632 | "metadata": {},
633 | "source": [
634 | "Dual numbers have their own arithmetic rules, which are generally quite natural. For example:\n",
635 | "\n",
636 | "**Addition**\n",
637 | "\n",
638 | "$(a_1 + b_1\\epsilon) + (a_2 + b_2\\epsilon) = (a_1 + a_2) + (b_1 + b_2)\\epsilon$\n",
639 | "\n",
640 | "**Subtraction**\n",
641 | "\n",
642 | "$(a_1 + b_1\\epsilon) - (a_2 + b_2\\epsilon) = (a_1 - a_2) + (b_1 - b_2)\\epsilon$\n",
643 | "\n",
644 | "**Multiplication**\n",
645 | "\n",
646 | "$(a_1 + b_1\\epsilon) \\times (a_2 + b_2\\epsilon) = (a_1 a_2) + (a_1 b_2 + a_2 b_1)\\epsilon + b_1 b_2\\epsilon^2 = (a_1 a_2) + (a_1b_2 + a_2b_1)\\epsilon$\n",
647 | "\n",
648 | "**Division**\n",
649 | "\n",
650 | "$\\dfrac{a_1 + b_1\\epsilon}{a_2 + b_2\\epsilon} = \\dfrac{a_1 + b_1\\epsilon}{a_2 + b_2\\epsilon} \\cdot \\dfrac{a_2 - b_2\\epsilon}{a_2 - b_2\\epsilon} = \\dfrac{a_1 a_2 + (b_1 a_2 - a_1 b_2)\\epsilon - b_1 b_2\\epsilon^2}{{a_2}^2 + (a_2 b_2 - a_2 b_2)\\epsilon - {b_2}^2\\epsilon} = \\dfrac{a_1}{a_2} + \\dfrac{a_1 b_2 - b_1 a_2}{{a_2}^2}\\epsilon$\n",
651 | "\n",
652 | "**Power**\n",
653 | "\n",
654 | "$(a + b\\epsilon)^n = a^n + (n a^{n-1}b)\\epsilon$\n",
655 | "\n",
656 | "etc."
657 | ]
658 | },
659 | {
660 | "cell_type": "markdown",
661 | "metadata": {},
662 | "source": [
663 | "Let's create a class to represent dual numbers, and implement a few operations (addition and multiplication). You can try adding some more if you want."
664 | ]
665 | },
666 | {
667 | "cell_type": "code",
668 | "execution_count": 21,
669 | "metadata": {},
670 | "outputs": [],
671 | "source": [
672 | "class DualNumber(object):\n",
673 | " def __init__(self, value=0.0, eps=0.0):\n",
674 | " self.value = value\n",
675 | " self.eps = eps\n",
676 | " def __add__(self, b):\n",
677 | " return DualNumber(self.value + self.to_dual(b).value,\n",
678 | " self.eps + self.to_dual(b).eps)\n",
679 | " def __radd__(self, a):\n",
680 | " return self.to_dual(a).__add__(self)\n",
681 | " def __mul__(self, b):\n",
682 | " return DualNumber(self.value * self.to_dual(b).value,\n",
683 | " self.eps * self.to_dual(b).value + self.value * self.to_dual(b).eps)\n",
684 | " def __rmul__(self, a):\n",
685 | " return self.to_dual(a).__mul__(self)\n",
686 | " def __str__(self):\n",
687 | " if self.eps:\n",
688 | " return \"{:.1f} + {:.1f}ε\".format(self.value, self.eps)\n",
689 | " else:\n",
690 | " return \"{:.1f}\".format(self.value)\n",
691 | " def __repr__(self):\n",
692 | " return str(self)\n",
693 | " @classmethod\n",
694 | " def to_dual(cls, n):\n",
695 | " if hasattr(n, \"value\"):\n",
696 | " return n\n",
697 | " else:\n",
698 | " return cls(n)"
699 | ]
700 | },
701 | {
702 | "cell_type": "markdown",
703 | "metadata": {},
704 | "source": [
705 | "$3 + (3 + 4 \\epsilon) = 6 + 4\\epsilon$"
706 | ]
707 | },
708 | {
709 | "cell_type": "code",
710 | "execution_count": 22,
711 | "metadata": {},
712 | "outputs": [
713 | {
714 | "data": {
715 | "text/plain": [
716 | "6.0 + 4.0ε"
717 | ]
718 | },
719 | "execution_count": 22,
720 | "metadata": {},
721 | "output_type": "execute_result"
722 | }
723 | ],
724 | "source": [
725 | "3 + DualNumber(3, 4)"
726 | ]
727 | },
728 | {
729 | "cell_type": "markdown",
730 | "metadata": {},
731 | "source": [
732 | "$(3 + 4ε)\\times(5 + 7ε)$ = $3 \\times 5 + 3 \\times 7ε + 4ε \\times 5 + 4ε \\times 7ε$ = $15 + 21ε + 20ε + 28ε^2$ = $15 + 41ε + 28 \\times 0$ = $15 + 41ε$"
733 | ]
734 | },
735 | {
736 | "cell_type": "code",
737 | "execution_count": 23,
738 | "metadata": {},
739 | "outputs": [
740 | {
741 | "data": {
742 | "text/plain": [
743 | "15.0 + 41.0ε"
744 | ]
745 | },
746 | "execution_count": 23,
747 | "metadata": {},
748 | "output_type": "execute_result"
749 | }
750 | ],
751 | "source": [
752 | "DualNumber(3, 4) * DualNumber(5, 7)"
753 | ]
754 | },
755 | {
756 | "cell_type": "markdown",
757 | "metadata": {},
758 | "source": [
759 | "Now let's see if the dual numbers work with our toy computation framework:"
760 | ]
761 | },
762 | {
763 | "cell_type": "code",
764 | "execution_count": 24,
765 | "metadata": {},
766 | "outputs": [
767 | {
768 | "data": {
769 | "text/plain": [
770 | "42.0"
771 | ]
772 | },
773 | "execution_count": 24,
774 | "metadata": {},
775 | "output_type": "execute_result"
776 | }
777 | ],
778 | "source": [
779 | "x.value = DualNumber(3.0)\n",
780 | "y.value = DualNumber(4.0)\n",
781 | "\n",
782 | "f.evaluate()"
783 | ]
784 | },
785 | {
786 | "cell_type": "markdown",
787 | "metadata": {},
788 | "source": [
789 | "Yep, sure works. Now let's use this to compute the partial derivatives of $f$ with regards to $x$ and $y$ at x=3 and y=4:"
790 | ]
791 | },
792 | {
793 | "cell_type": "code",
794 | "execution_count": 25,
795 | "metadata": {},
796 | "outputs": [],
797 | "source": [
798 | "x.value = DualNumber(3.0, 1.0) # 3 + ε\n",
799 | "y.value = DualNumber(4.0) # 4\n",
800 | "\n",
801 | "dfdx = f.evaluate().eps\n",
802 | "\n",
803 | "x.value = DualNumber(3.0) # 3\n",
804 | "y.value = DualNumber(4.0, 1.0) # 4 + ε\n",
805 | "\n",
806 | "dfdy = f.evaluate().eps"
807 | ]
808 | },
809 | {
810 | "cell_type": "code",
811 | "execution_count": 26,
812 | "metadata": {},
813 | "outputs": [
814 | {
815 | "data": {
816 | "text/plain": [
817 | "24.0"
818 | ]
819 | },
820 | "execution_count": 26,
821 | "metadata": {},
822 | "output_type": "execute_result"
823 | }
824 | ],
825 | "source": [
826 | "dfdx"
827 | ]
828 | },
829 | {
830 | "cell_type": "code",
831 | "execution_count": 27,
832 | "metadata": {},
833 | "outputs": [
834 | {
835 | "data": {
836 | "text/plain": [
837 | "10.0"
838 | ]
839 | },
840 | "execution_count": 27,
841 | "metadata": {},
842 | "output_type": "execute_result"
843 | }
844 | ],
845 | "source": [
846 | "dfdy"
847 | ]
848 | },
849 | {
850 | "cell_type": "markdown",
851 | "metadata": {},
852 | "source": [
853 | "Great! However, in this implementation we are limited to first order derivatives.\n",
854 | "Now let's look at reverse mode."
855 | ]
856 | },
857 | {
858 | "cell_type": "markdown",
859 | "metadata": {},
860 | "source": [
861 | "### Reverse mode autodiff"
862 | ]
863 | },
864 | {
865 | "cell_type": "markdown",
866 | "metadata": {},
867 | "source": [
868 | "Let's rewrite our toy framework to add reverse mode autodiff:"
869 | ]
870 | },
871 | {
872 | "cell_type": "code",
873 | "execution_count": 28,
874 | "metadata": {},
875 | "outputs": [],
876 | "source": [
877 | "class Const(object):\n",
878 | " def __init__(self, value):\n",
879 | " self.value = value\n",
880 | " def evaluate(self):\n",
881 | " return self.value\n",
882 | " def backpropagate(self, gradient):\n",
883 | " pass\n",
884 | " def __str__(self):\n",
885 | " return str(self.value)\n",
886 | "\n",
887 | "class Var(object):\n",
888 | " def __init__(self, name, init_value=0):\n",
889 | " self.value = init_value\n",
890 | " self.name = name\n",
891 | " self.gradient = 0\n",
892 | " def evaluate(self):\n",
893 | " return self.value\n",
894 | " def backpropagate(self, gradient):\n",
895 | " self.gradient += gradient\n",
896 | " def __str__(self):\n",
897 | " return self.name\n",
898 | "\n",
899 | "class BinaryOperator(object):\n",
900 | " def __init__(self, a, b):\n",
901 | " self.a = a\n",
902 | " self.b = b\n",
903 | "\n",
904 | "class Add(BinaryOperator):\n",
905 | " def evaluate(self):\n",
906 | " self.value = self.a.evaluate() + self.b.evaluate()\n",
907 | " return self.value\n",
908 | " def backpropagate(self, gradient):\n",
909 | " self.a.backpropagate(gradient)\n",
910 | " self.b.backpropagate(gradient)\n",
911 | " def __str__(self):\n",
912 | " return \"{} + {}\".format(self.a, self.b)\n",
913 | "\n",
914 | "class Mul(BinaryOperator):\n",
915 | " def evaluate(self):\n",
916 | " self.value = self.a.evaluate() * self.b.evaluate()\n",
917 | " return self.value\n",
918 | " def backpropagate(self, gradient):\n",
919 | " self.a.backpropagate(gradient * self.b.value)\n",
920 | " self.b.backpropagate(gradient * self.a.value)\n",
921 | " def __str__(self):\n",
922 | " return \"({}) * ({})\".format(self.a, self.b)"
923 | ]
924 | },
925 | {
926 | "cell_type": "code",
927 | "execution_count": 29,
928 | "metadata": {},
929 | "outputs": [],
930 | "source": [
931 | "x = Var(\"x\", init_value=3)\n",
932 | "y = Var(\"y\", init_value=4)\n",
933 | "f = Add(Mul(Mul(x, x), y), Add(y, Const(2))) # f(x,y) = x²y + y + 2\n",
934 | "\n",
935 | "result = f.evaluate()\n",
936 | "f.backpropagate(1.0)"
937 | ]
938 | },
939 | {
940 | "cell_type": "code",
941 | "execution_count": 30,
942 | "metadata": {},
943 | "outputs": [
944 | {
945 | "name": "stdout",
946 | "output_type": "stream",
947 | "text": [
948 | "((x) * (x)) * (y) + y + 2\n"
949 | ]
950 | }
951 | ],
952 | "source": [
953 | "print(f)"
954 | ]
955 | },
956 | {
957 | "cell_type": "code",
958 | "execution_count": 31,
959 | "metadata": {},
960 | "outputs": [
961 | {
962 | "data": {
963 | "text/plain": [
964 | "42"
965 | ]
966 | },
967 | "execution_count": 31,
968 | "metadata": {},
969 | "output_type": "execute_result"
970 | }
971 | ],
972 | "source": [
973 | "result"
974 | ]
975 | },
976 | {
977 | "cell_type": "code",
978 | "execution_count": 32,
979 | "metadata": {},
980 | "outputs": [
981 | {
982 | "data": {
983 | "text/plain": [
984 | "24.0"
985 | ]
986 | },
987 | "execution_count": 32,
988 | "metadata": {},
989 | "output_type": "execute_result"
990 | }
991 | ],
992 | "source": [
993 | "x.gradient"
994 | ]
995 | },
996 | {
997 | "cell_type": "code",
998 | "execution_count": 33,
999 | "metadata": {},
1000 | "outputs": [
1001 | {
1002 | "data": {
1003 | "text/plain": [
1004 | "10.0"
1005 | ]
1006 | },
1007 | "execution_count": 33,
1008 | "metadata": {},
1009 | "output_type": "execute_result"
1010 | }
1011 | ],
1012 | "source": [
1013 | "y.gradient"
1014 | ]
1015 | },
1016 | {
1017 | "cell_type": "markdown",
1018 | "metadata": {},
1019 | "source": [
1020 | "Again, in this implementation the outputs are just numbers, not symbolic expressions, so we are limited to first order derivatives. However, we could have made the `backpropagate()` methods return symbolic expressions rather than values (e.g., return `Add(2,3)` rather than 5). This would make it possible to compute second order gradients (and beyond). This is what TensorFlow does, as do all the major libraries that implement autodiff."
1021 | ]
1022 | },
1023 | {
1024 | "cell_type": "markdown",
1025 | "metadata": {},
1026 | "source": [
1027 | "### Reverse mode autodiff using TensorFlow"
1028 | ]
1029 | },
1030 | {
1031 | "cell_type": "code",
1032 | "execution_count": 34,
1033 | "metadata": {},
1034 | "outputs": [],
1035 | "source": [
1036 | "import tensorflow as tf"
1037 | ]
1038 | },
1039 | {
1040 | "cell_type": "code",
1041 | "execution_count": 35,
1042 | "metadata": {},
1043 | "outputs": [
1044 | {
1045 | "data": {
1046 | "text/plain": [
1047 | "(42.0, [24.0, 10.0])"
1048 | ]
1049 | },
1050 | "execution_count": 35,
1051 | "metadata": {},
1052 | "output_type": "execute_result"
1053 | }
1054 | ],
1055 | "source": [
1056 | "tf.reset_default_graph()\n",
1057 | "\n",
1058 | "x = tf.Variable(3., name=\"x\")\n",
1059 | "y = tf.Variable(4., name=\"y\")\n",
1060 | "f = x*x*y + y + 2\n",
1061 | "\n",
1062 | "jacobians = tf.gradients(f, [x, y])\n",
1063 | "\n",
1064 | "init = tf.global_variables_initializer()\n",
1065 | "\n",
1066 | "with tf.Session() as sess:\n",
1067 | " init.run()\n",
1068 | " f_val, jacobians_val = sess.run([f, jacobians])\n",
1069 | "\n",
1070 | "f_val, jacobians_val"
1071 | ]
1072 | },
1073 | {
1074 | "cell_type": "markdown",
1075 | "metadata": {},
1076 | "source": [
1077 | "Since everything is symbolic, we can compute second order derivatives, and beyond. However, when we compute the derivative of a tensor with regards to a variable that it does not depend on, instead of returning 0.0, the `gradients()` function returns None, which cannot be evaluated by `sess.run()`. So beware of `None` values. Here we just replace them with zero tensors."
1078 | ]
1079 | },
1080 | {
1081 | "cell_type": "code",
1082 | "execution_count": 36,
1083 | "metadata": {},
1084 | "outputs": [
1085 | {
1086 | "data": {
1087 | "text/plain": [
1088 | "([8.0, 6.0], [6.0, 0.0])"
1089 | ]
1090 | },
1091 | "execution_count": 36,
1092 | "metadata": {},
1093 | "output_type": "execute_result"
1094 | }
1095 | ],
1096 | "source": [
1097 | "hessians_x = tf.gradients(jacobians[0], [x, y])\n",
1098 | "hessians_y = tf.gradients(jacobians[1], [x, y])\n",
1099 | "\n",
1100 | "def replace_none_with_zero(tensors):\n",
1101 | " return [tensor if tensor is not None else tf.constant(0.)\n",
1102 | " for tensor in tensors]\n",
1103 | "\n",
1104 | "hessians_x = replace_none_with_zero(hessians_x)\n",
1105 | "hessians_y = replace_none_with_zero(hessians_y)\n",
1106 | "\n",
1107 | "init = tf.global_variables_initializer()\n",
1108 | "\n",
1109 | "with tf.Session() as sess:\n",
1110 | " init.run()\n",
1111 | " hessians_x_val, hessians_y_val = sess.run([hessians_x, hessians_y])\n",
1112 | "\n",
1113 | "hessians_x_val, hessians_y_val"
1114 | ]
1115 | },
1116 | {
1117 | "cell_type": "markdown",
1118 | "metadata": {},
1119 | "source": [
1120 | "And that's all folks! Hope you enjoyed this notebook."
1121 | ]
1122 | },
1123 | {
1124 | "cell_type": "code",
1125 | "execution_count": null,
1126 | "metadata": {},
1127 | "outputs": [],
1128 | "source": []
1129 | }
1130 | ],
1131 | "metadata": {
1132 | "kernelspec": {
1133 | "display_name": "Python 3",
1134 | "language": "python",
1135 | "name": "python3"
1136 | },
1137 | "language_info": {
1138 | "codemirror_mode": {
1139 | "name": "ipython",
1140 | "version": 3
1141 | },
1142 | "file_extension": ".py",
1143 | "mimetype": "text/x-python",
1144 | "name": "python",
1145 | "nbconvert_exporter": "python",
1146 | "pygments_lexer": "ipython3",
1147 | "version": "3.5.2"
1148 | },
1149 | "nav_menu": {
1150 | "height": "603px",
1151 | "width": "616px"
1152 | },
1153 | "toc": {
1154 | "navigate_menu": true,
1155 | "number_sections": true,
1156 | "sideBar": true,
1157 | "threshold": 6,
1158 | "toc_cell": false,
1159 | "toc_section_display": "block",
1160 | "toc_window_display": true
1161 | }
1162 | },
1163 | "nbformat": 4,
1164 | "nbformat_minor": 1
1165 | }
1166 |
--------------------------------------------------------------------------------
/book_equations.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "**Equations**\n",
8 | "\n",
9 | "*This notebook lists all the equations in the book. If you decide to print them on a T-Shirt, I definitely want a copy! ;-)*\n",
10 | "\n",
11 | "**Warning**: GitHub's notebook viewer does not render equations properly. You should either view this notebook within Jupyter itself or use [Jupyter's online viewer](http://nbviewer.jupyter.org/github/ageron/handson-ml/blob/master/book_equations.ipynb)."
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "# Chapter 1\n",
19 | "**Equation 1-1: A simple linear model**\n",
20 | "\n",
21 | "$\n",
22 | "\\text{life_satisfaction} = \\theta_0 + \\theta_1 \\times \\text{GDP_per_capita}\n",
23 | "$\n",
24 | "\n"
25 | ]
26 | },
27 | {
28 | "cell_type": "markdown",
29 | "metadata": {},
30 | "source": [
31 | "# Chapter 2\n",
32 | "**Equation 2-1: Root Mean Square Error (RMSE)**\n",
33 | "\n",
34 | "$\n",
35 | "\\text{RMSE}(\\mathbf{X}, h) = \\sqrt{\\frac{1}{m}\\sum\\limits_{i=1}^{m}\\left(h(\\mathbf{x}^{(i)}) - y^{(i)}\\right)^2}\n",
36 | "$\n",
37 | "\n",
38 | "\n",
39 | "**Notations (page 38):**\n",
40 | "\n",
41 | "$\n",
42 | " \\mathbf{x}^{(1)} = \\begin{pmatrix}\n",
43 | " -118.29 \\\\\n",
44 | " 33.91 \\\\\n",
45 | " 1,416 \\\\\n",
46 | " 38,372\n",
47 | " \\end{pmatrix}\n",
48 | "$\n",
49 | "\n",
50 | "\n",
51 | "$\n",
52 | " y^{(1)}=156,400\n",
53 | "$\n",
54 | "\n",
55 | "\n",
56 | "$\n",
57 | " \\mathbf{X} = \\begin{pmatrix}\n",
58 | " (\\mathbf{x}^{(1)})^T \\\\\n",
59 | " (\\mathbf{x}^{(2)})^T\\\\\n",
60 | " \\vdots \\\\\n",
61 | " (\\mathbf{x}^{(1999)})^T \\\\\n",
62 | " (\\mathbf{x}^{(2000)})^T\n",
63 | " \\end{pmatrix} = \\begin{pmatrix}\n",
64 | " -118.29 & 33.91 & 1,416 & 38,372 \\\\\n",
65 | " \\vdots & \\vdots & \\vdots & \\vdots \\\\\n",
66 | " \\end{pmatrix}\n",
67 | "$\n",
68 | "\n",
69 | "\n",
70 | "**Equation 2-2: Mean Absolute Error**\n",
71 | "\n",
72 | "$\n",
73 | "\\text{MAE}(\\mathbf{X}, h) = \\frac{1}{m}\\sum\\limits_{i=1}^{m}\\left| h(\\mathbf{x}^{(i)}) - y^{(i)} \\right|\n",
74 | "$\n",
75 | "\n",
76 | "**$\\ell_k$ norms (page 39):**\n",
77 | "\n",
78 | "$ \\left\\| \\mathbf{v} \\right\\| _k = (\\left| v_0 \\right|^k + \\left| v_1 \\right|^k + \\dots + \\left| v_n \\right|^k)^{\\frac{1}{k}} $\n"
79 | ]
80 | },
81 | {
82 | "cell_type": "markdown",
83 | "metadata": {},
84 | "source": [
85 | "# Chapter 3\n",
86 | "**Equation 3-1: Precision**\n",
87 | "\n",
88 | "$\n",
89 | "\\text{precision} = \\cfrac{TP}{TP + FP}\n",
90 | "$\n",
91 | "\n",
92 | "\n",
93 | "**Equation 3-2: Recall**\n",
94 | "\n",
95 | "$\n",
96 | "\\text{recall} = \\cfrac{TP}{TP + FN}\n",
97 | "$\n",
98 | "\n",
99 | "\n",
100 | "**Equation 3-3: $F_1$ score**\n",
101 | "\n",
102 | "$\n",
103 | "F_1 = \\cfrac{2}{\\cfrac{1}{\\text{precision}} + \\cfrac{1}{\\text{recall}}} = 2 \\times \\cfrac{\\text{precision}\\, \\times \\, \\text{recall}}{\\text{precision}\\, + \\, \\text{recall}} = \\cfrac{TP}{TP + \\cfrac{FN + FP}{2}}\n",
104 | "$\n",
105 | "\n"
106 | ]
107 | },
108 | {
109 | "cell_type": "markdown",
110 | "metadata": {},
111 | "source": [
112 | "# Chapter 4\n",
113 | "**Equation 4-1: Linear Regression model prediction**\n",
114 | "\n",
115 | "$\n",
116 | "\\hat{y} = \\theta_0 + \\theta_1 x_1 + \\theta_2 x_2 + \\dots + \\theta_n x_n\n",
117 | "$\n",
118 | "\n",
119 | "\n",
120 | "**Equation 4-2: Linear Regression model prediction (vectorized form)**\n",
121 | "\n",
122 | "$\n",
123 | "\\hat{y} = h_{\\boldsymbol{\\theta}}(\\mathbf{x}) = \\boldsymbol{\\theta} \\cdot \\mathbf{x}\n",
124 | "$\n",
125 | "\n",
126 | "\n",
127 | "**Equation 4-3: MSE cost function for a Linear Regression model**\n",
128 | "\n",
129 | "$\n",
130 | "\\text{MSE}(\\mathbf{X}, h_{\\boldsymbol{\\theta}}) = \\dfrac{1}{m} \\sum\\limits_{i=1}^{m}{(\\boldsymbol{\\theta}^T \\mathbf{x}^{(i)} - y^{(i)})^2}\n",
131 | "$\n",
132 | "\n",
133 | "\n",
134 | "**Equation 4-4: Normal Equation**\n",
135 | "\n",
136 | "$\n",
137 | "\\hat{\\boldsymbol{\\theta}} = (\\mathbf{X}^T \\mathbf{X})^{-1} \\mathbf{X}^T \\mathbf{y}\n",
138 | "$\n",
139 | "\n",
140 | "\n",
141 | "** Partial derivatives notation (page 114):**\n",
142 | "\n",
143 | "$\\frac{\\partial}{\\partial \\theta_j} \\text{MSE}(\\boldsymbol{\\theta})$\n",
144 | "\n",
145 | "\n",
146 | "**Equation 4-5: Partial derivatives of the cost function**\n",
147 | "\n",
148 | "$\n",
149 | "\\dfrac{\\partial}{\\partial \\theta_j} \\text{MSE}(\\boldsymbol{\\theta}) = \\dfrac{2}{m}\\sum\\limits_{i=1}^{m}(\\boldsymbol{\\theta}^T \\mathbf{x}^{(i)} - y^{(i)})\\, x_j^{(i)}\n",
150 | "$\n",
151 | "\n",
152 | "\n",
153 | "**Equation 4-6: Gradient vector of the cost function**\n",
154 | "\n",
155 | "$\n",
156 | "\\nabla_{\\boldsymbol{\\theta}}\\, \\text{MSE}(\\boldsymbol{\\theta}) =\n",
157 | "\\begin{pmatrix}\n",
158 | " \\frac{\\partial}{\\partial \\theta_0} \\text{MSE}(\\boldsymbol{\\theta}) \\\\\n",
159 | " \\frac{\\partial}{\\partial \\theta_1} \\text{MSE}(\\boldsymbol{\\theta}) \\\\\n",
160 | " \\vdots \\\\\n",
161 | " \\frac{\\partial}{\\partial \\theta_n} \\text{MSE}(\\boldsymbol{\\theta})\n",
162 | "\\end{pmatrix}\n",
163 | " = \\dfrac{2}{m} \\mathbf{X}^T (\\mathbf{X} \\boldsymbol{\\theta} - \\mathbf{y})\n",
164 | "$\n",
165 | "\n",
166 | "\n",
167 | "**Equation 4-7: Gradient Descent step**\n",
168 | "\n",
169 | "$\n",
170 | "\\boldsymbol{\\theta}^{(\\text{next step})} = \\boldsymbol{\\theta} - \\eta \\nabla_{\\boldsymbol{\\theta}}\\, \\text{MSE}(\\boldsymbol{\\theta})\n",
171 | "$\n",
172 | "\n",
173 | "\n",
174 | "$ O(\\frac{1}{\\text{iterations}}) $\n",
175 | "\n",
176 | "\n",
177 | "$ \\hat{y} = 0.56 x_1^2 + 0.93 x_1 + 1.78 $\n",
178 | "\n",
179 | "\n",
180 | "$ y = 0.5 x_1^2 + 1.0 x_1 + 2.0 + \\text{Gaussian noise} $\n",
181 | "\n",
182 | "\n",
183 | "$ \\dfrac{(n+d)!}{d!\\,n!} $\n",
184 | "\n",
185 | "\n",
186 | "$ \\alpha \\sum_{i=1}^{n}{{\\theta_i}^2}$\n",
187 | "\n",
188 | "\n",
189 | "**Equation 4-8: Ridge Regression cost function**\n",
190 | "\n",
191 | "$\n",
192 | "J(\\boldsymbol{\\theta}) = \\text{MSE}(\\boldsymbol{\\theta}) + \\alpha \\dfrac{1}{2}\\sum\\limits_{i=1}^{n}{\\theta_i}^2\n",
193 | "$\n",
194 | "\n",
195 | "\n",
196 | "**Equation 4-9: Ridge Regression closed-form solution**\n",
197 | "\n",
198 | "$\n",
199 | "\\hat{\\boldsymbol{\\theta}} = (\\mathbf{X}^T \\mathbf{X} + \\alpha \\mathbf{A})^{-1} \\mathbf{X}^T \\mathbf{y}\n",
200 | "$\n",
201 | "\n",
202 | "\n",
203 | "**Equation 4-10: Lasso Regression cost function**\n",
204 | "\n",
205 | "$\n",
206 | "J(\\boldsymbol{\\theta}) = \\text{MSE}(\\boldsymbol{\\theta}) + \\alpha \\sum\\limits_{i=1}^{n}\\left| \\theta_i \\right|\n",
207 | "$\n",
208 | "\n",
209 | "\n",
210 | "**Equation 4-11: Lasso Regression subgradient vector**\n",
211 | "\n",
212 | "$\n",
213 | "g(\\boldsymbol{\\theta}, J) = \\nabla_{\\boldsymbol{\\theta}}\\, \\text{MSE}(\\boldsymbol{\\theta}) + \\alpha\n",
214 | "\\begin{pmatrix}\n",
215 | " \\operatorname{sign}(\\theta_1) \\\\\n",
216 | " \\operatorname{sign}(\\theta_2) \\\\\n",
217 | " \\vdots \\\\\n",
218 | " \\operatorname{sign}(\\theta_n) \\\\\n",
219 | "\\end{pmatrix} \\quad \\text{where } \\operatorname{sign}(\\theta_i) =\n",
220 | "\\begin{cases}\n",
221 | "-1 & \\text{if } \\theta_i < 0 \\\\\n",
222 | "0 & \\text{if } \\theta_i = 0 \\\\\n",
223 | "+1 & \\text{if } \\theta_i > 0\n",
224 | "\\end{cases}\n",
225 | "$\n",
226 | "\n",
227 | "\n",
228 | "**Equation 4-12: Elastic Net cost function**\n",
229 | "\n",
230 | "$\n",
231 | "J(\\boldsymbol{\\theta}) = \\text{MSE}(\\boldsymbol{\\theta}) + r \\alpha \\sum\\limits_{i=1}^{n}\\left| \\theta_i \\right| + \\dfrac{1 - r}{2} \\alpha \\sum\\limits_{i=1}^{n}{{\\theta_i}^2}\n",
232 | "$\n",
233 | "\n",
234 | "\n",
235 | "**Equation 4-13: Logistic Regression model estimated probability (vectorized form)**\n",
236 | "\n",
237 | "$\n",
238 | "\\hat{p} = h_{\\boldsymbol{\\theta}}(\\mathbf{x}) = \\sigma(\\boldsymbol{\\theta}^T \\mathbf{x})\n",
239 | "$\n",
240 | "\n",
241 | "\n",
242 | "**Equation 4-14: Logistic function**\n",
243 | "\n",
244 | "$\n",
245 | "\\sigma(t) = \\dfrac{1}{1 + \\exp(-t)}\n",
246 | "$\n",
247 | "\n",
248 | "\n",
249 | "**Equation 4-15: Logistic Regression model prediction**\n",
250 | "\n",
251 | "$\n",
252 | "\\hat{y} =\n",
253 | "\\begin{cases}\n",
254 | " 0 & \\text{if } \\hat{p} < 0.5, \\\\\n",
255 | " 1 & \\text{if } \\hat{p} \\geq 0.5.\n",
256 | "\\end{cases}\n",
257 | "$\n",
258 | "\n",
259 | "\n",
260 | "**Equation 4-16: Cost function of a single training instance**\n",
261 | "\n",
262 | "$\n",
263 | "c(\\boldsymbol{\\theta}) =\n",
264 | "\\begin{cases}\n",
265 | " -\\log(\\hat{p}) & \\text{if } y = 1, \\\\\n",
266 | " -\\log(1 - \\hat{p}) & \\text{if } y = 0.\n",
267 | "\\end{cases}\n",
268 | "$\n",
269 | "\n",
270 | "\n",
271 | "**Equation 4-17: Logistic Regression cost function (log loss)**\n",
272 | "\n",
273 | "$\n",
274 | "J(\\boldsymbol{\\theta}) = -\\dfrac{1}{m} \\sum\\limits_{i=1}^{m}{\\left[ y^{(i)} log\\left(\\hat{p}^{(i)}\\right) + (1 - y^{(i)}) log\\left(1 - \\hat{p}^{(i)}\\right)\\right]}\n",
275 | "$\n",
276 | "\n",
277 | "\n",
278 | "**Equation 4-18: Logistic cost function partial derivatives**\n",
279 | "\n",
280 | "$\n",
281 | "\\dfrac{\\partial}{\\partial \\theta_j} \\text{J}(\\boldsymbol{\\theta}) = \\dfrac{1}{m}\\sum\\limits_{i=1}^{m}\\left(\\mathbf{\\sigma(\\boldsymbol{\\theta}}^T \\mathbf{x}^{(i)}) - y^{(i)}\\right)\\, x_j^{(i)}\n",
282 | "$\n",
283 | "\n",
284 | "\n",
285 | "**Equation 4-19: Softmax score for class k**\n",
286 | "\n",
287 | "$\n",
288 | "s_k(\\mathbf{x}) = ({\\boldsymbol{\\theta}^{(k)}})^T \\mathbf{x}\n",
289 | "$\n",
290 | "\n",
291 | "\n",
292 | "**Equation 4-20: Softmax function**\n",
293 | "\n",
294 | "$\n",
295 | "\\hat{p}_k = \\sigma\\left(\\mathbf{s}(\\mathbf{x})\\right)_k = \\dfrac{\\exp\\left(s_k(\\mathbf{x})\\right)}{\\sum\\limits_{j=1}^{K}{\\exp\\left(s_j(\\mathbf{x})\\right)}}\n",
296 | "$\n",
297 | "\n",
298 | "\n",
299 | "**Equation 4-21: Softmax Regression classifier prediction**\n",
300 | "\n",
301 | "$\n",
302 | "\\hat{y} = \\underset{k}{\\operatorname{argmax}} \\, \\sigma\\left(\\mathbf{s}(\\mathbf{x})\\right)_k = \\underset{k}{\\operatorname{argmax}} \\, s_k(\\mathbf{x}) = \\underset{k}{\\operatorname{argmax}} \\, \\left( ({\\boldsymbol{\\theta}^{(k)}})^T \\mathbf{x} \\right)\n",
303 | "$\n",
304 | "\n",
305 | "\n",
306 | "**Equation 4-22: Cross entropy cost function**\n",
307 | "\n",
308 | "$\n",
309 | "J(\\boldsymbol{\\Theta}) = - \\dfrac{1}{m}\\sum\\limits_{i=1}^{m}\\sum\\limits_{k=1}^{K}{y_k^{(i)}\\log\\left(\\hat{p}_k^{(i)}\\right)}\n",
310 | "$\n",
311 | "\n",
312 | "**Cross entropy between two discrete probability distributions $p$ and $q$ (page 141):**\n",
313 | "$ H(p, q) = -\\sum\\limits_{x}p(x) \\log q(x) $\n",
314 | "\n",
315 | "\n",
316 | "**Equation 4-23: Cross entropy gradient vector for class _k_**\n",
317 | "\n",
318 | "$\n",
319 | "\\nabla_{\\boldsymbol{\\theta}^{(k)}} \\, J(\\boldsymbol{\\Theta}) = \\dfrac{1}{m} \\sum\\limits_{i=1}^{m}{ \\left ( \\hat{p}^{(i)}_k - y_k^{(i)} \\right ) \\mathbf{x}^{(i)}}\n",
320 | "$\n"
321 | ]
322 | },
323 | {
324 | "cell_type": "markdown",
325 | "metadata": {},
326 | "source": [
327 | "# Chapter 5\n",
328 | "**Equation 5-1: Gaussian RBF**\n",
329 | "\n",
330 | "$\n",
331 | "{\\displaystyle \\phi_{\\gamma}(\\mathbf{x}, \\boldsymbol{\\ell})} = {\\displaystyle \\exp({\\displaystyle -\\gamma \\left\\| \\mathbf{x} - \\boldsymbol{\\ell} \\right\\|^2})}\n",
332 | "$\n",
333 | "\n",
334 | "\n",
335 | "**Equation 5-2: Linear SVM classifier prediction**\n",
336 | "\n",
337 | "$\n",
338 | "\\hat{y} = \\begin{cases}\n",
339 | " 0 & \\text{if } \\mathbf{w}^T \\mathbf{x} + b < 0, \\\\\n",
340 | " 1 & \\text{if } \\mathbf{w}^T \\mathbf{x} + b \\geq 0\n",
341 | "\\end{cases}\n",
342 | "$\n",
343 | "\n",
344 | "\n",
345 | "**Equation 5-3: Hard margin linear SVM classifier objective**\n",
346 | "\n",
347 | "$\n",
348 | "\\begin{split}\n",
349 | "&\\underset{\\mathbf{w}, b}{\\operatorname{minimize}}\\quad{\\frac{1}{2}\\mathbf{w}^T \\mathbf{w}} \\\\\n",
350 | "&\\text{subject to} \\quad t^{(i)}(\\mathbf{w}^T \\mathbf{x}^{(i)} + b) \\ge 1 \\quad \\text{for } i = 1, 2, \\dots, m\n",
351 | "\\end{split}\n",
352 | "$\n",
353 | "\n",
354 | "\n",
355 | "**Equation 5-4: Soft margin linear SVM classifier objective**\n",
356 | "\n",
357 | "$\n",
358 | "\\begin{split}\n",
359 | "&\\underset{\\mathbf{w}, b, \\mathbf{\\zeta}}{\\operatorname{minimize}}\\quad{\\dfrac{1}{2}\\mathbf{w}^T \\mathbf{w} + C \\sum\\limits_{i=1}^m{\\zeta^{(i)}}}\\\\\n",
360 | "&\\text{subject to} \\quad t^{(i)}(\\mathbf{w}^T \\mathbf{x}^{(i)} + b) \\ge 1 - \\zeta^{(i)} \\quad \\text{and} \\quad \\zeta^{(i)} \\ge 0 \\quad \\text{for } i = 1, 2, \\dots, m\n",
361 | "\\end{split}\n",
362 | "$\n",
363 | "\n",
364 | "\n",
365 | "**Equation 5-5: Quadratic Programming problem**\n",
366 | "\n",
367 | "$\n",
368 | "\\begin{split}\n",
369 | "\\underset{\\mathbf{p}}{\\text{Minimize}} \\quad & \\dfrac{1}{2} \\mathbf{p}^T \\mathbf{H} \\mathbf{p} \\quad + \\quad \\mathbf{f}^T \\mathbf{p} \\\\\n",
370 | "\\text{subject to} \\quad & \\mathbf{A} \\mathbf{p} \\le \\mathbf{b} \\\\\n",
371 | "\\text{where } &\n",
372 | "\\begin{cases}\n",
373 | " \\mathbf{p} & \\text{ is an }n_p\\text{-dimensional vector (} n_p = \\text{number of parameters),}\\\\\n",
374 | " \\mathbf{H} & \\text{ is an }n_p \\times n_p \\text{ matrix,}\\\\\n",
375 | " \\mathbf{f} & \\text{ is an }n_p\\text{-dimensional vector,}\\\\\n",
376 | " \\mathbf{A} & \\text{ is an } n_c \\times n_p \\text{ matrix (}n_c = \\text{number of constraints),}\\\\\n",
377 | " \\mathbf{b} & \\text{ is an }n_c\\text{-dimensional vector.}\n",
378 | "\\end{cases}\n",
379 | "\\end{split}\n",
380 | "$\n",
381 | "\n",
382 | "\n",
383 | "**Equation 5-6: Dual form of the linear SVM objective**\n",
384 | "\n",
385 | "$\n",
386 | "\\begin{split}\n",
387 | "\\underset{\\mathbf{\\alpha}}{\\operatorname{minimize}}\n",
388 | "\\dfrac{1}{2}\\sum\\limits_{i=1}^{m}{\n",
389 | " \\sum\\limits_{j=1}^{m}{\n",
390 | " \\alpha^{(i)} \\alpha^{(j)} t^{(i)} t^{(j)} {\\mathbf{x}^{(i)}}^T \\mathbf{x}^{(j)}\n",
391 | " }\n",
392 | "} \\quad - \\quad \\sum\\limits_{i=1}^{m}{\\alpha^{(i)}}\\\\\n",
393 | "\\text{subject to}\\quad \\alpha^{(i)} \\ge 0 \\quad \\text{for }i = 1, 2, \\dots, m\n",
394 | "\\end{split}\n",
395 | "$\n",
396 | "\n",
397 | "\n",
398 | "**Equation 5-7: From the dual solution to the primal solution**\n",
399 | "\n",
400 | "$\n",
401 | "\\begin{split}\n",
402 | "&\\hat{\\mathbf{w}} = \\sum_{i=1}^{m}{\\hat{\\alpha}}^{(i)}t^{(i)}\\mathbf{x}^{(i)}\\\\\n",
403 | "&\\hat{b} = \\dfrac{1}{n_s}\\sum\\limits_{\\scriptstyle i=1 \\atop {\\scriptstyle {\\hat{\\alpha}}^{(i)} > 0}}^{m}{\\left(t^{(i)} - ({\\hat{\\mathbf{w}}}^T \\mathbf{x}^{(i)})\\right)}\n",
404 | "\\end{split}\n",
405 | "$\n",
406 | "\n",
407 | "\n",
408 | "**Equation 5-8: Second-degree polynomial mapping**\n",
409 | "\n",
410 | "$\n",
411 | "\\phi\\left(\\mathbf{x}\\right) = \\phi\\left( \\begin{pmatrix}\n",
412 | " x_1 \\\\\n",
413 | " x_2\n",
414 | "\\end{pmatrix} \\right) = \\begin{pmatrix}\n",
415 | " {x_1}^2 \\\\\n",
416 | " \\sqrt{2} \\, x_1 x_2 \\\\\n",
417 | " {x_2}^2\n",
418 | "\\end{pmatrix}\n",
419 | "$\n",
420 | "\n",
421 | "\n",
422 | "**Equation 5-9: Kernel trick for a 2^nd^-degree polynomial mapping**\n",
423 | "\n",
424 | "$\n",
425 | "\\begin{split}\n",
426 | "\\phi(\\mathbf{a})^T \\phi(\\mathbf{b}) & \\quad = \\begin{pmatrix}\n",
427 | " {a_1}^2 \\\\\n",
428 | " \\sqrt{2} \\, a_1 a_2 \\\\\n",
429 | " {a_2}^2\n",
430 | " \\end{pmatrix}^T \\begin{pmatrix}\n",
431 | " {b_1}^2 \\\\\n",
432 | " \\sqrt{2} \\, b_1 b_2 \\\\\n",
433 | " {b_2}^2\n",
434 | "\\end{pmatrix} = {a_1}^2 {b_1}^2 + 2 a_1 b_1 a_2 b_2 + {a_2}^2 {b_2}^2 \\\\\n",
435 | " & \\quad = \\left( a_1 b_1 + a_2 b_2 \\right)^2 = \\left( \\begin{pmatrix}\n",
436 | " a_1 \\\\\n",
437 | " a_2\n",
438 | "\\end{pmatrix}^T \\begin{pmatrix}\n",
439 | " b_1 \\\\\n",
440 | " b_2\n",
441 | " \\end{pmatrix} \\right)^2 = (\\mathbf{a}^T \\mathbf{b})^2\n",
442 | "\\end{split}\n",
443 | "$\n",
444 | "\n",
445 | "**In the text about the kernel trick (page 162):**\n",
446 | "[...], then you can replace this dot product of transformed vectors simply by $ ({\\mathbf{x}^{(i)}}^T \\mathbf{x}^{(j)})^2 $\n",
447 | "\n",
448 | "\n",
449 | "**Equation 5-10: Common kernels**\n",
450 | "\n",
451 | "$\n",
452 | "\\begin{split}\n",
453 | "\\text{Linear:} & \\quad K(\\mathbf{a}, \\mathbf{b}) = \\mathbf{a}^T \\mathbf{b} \\\\\n",
454 | "\\text{Polynomial:} & \\quad K(\\mathbf{a}, \\mathbf{b}) = \\left(\\gamma \\mathbf{a}^T \\mathbf{b} + r \\right)^d \\\\\n",
455 | "\\text{Gaussian RBF:} & \\quad K(\\mathbf{a}, \\mathbf{b}) = \\exp({\\displaystyle -\\gamma \\left\\| \\mathbf{a} - \\mathbf{b} \\right\\|^2}) \\\\\n",
456 | "\\text{Sigmoid:} & \\quad K(\\mathbf{a}, \\mathbf{b}) = \\tanh\\left(\\gamma \\mathbf{a}^T \\mathbf{b} + r\\right)\n",
457 | "\\end{split}\n",
458 | "$\n",
459 | "\n",
460 | "**Equation 5-11: Making predictions with a kernelized SVM**\n",
461 | "\n",
462 | "$\n",
463 | "\\begin{split}\n",
464 | "h_{\\hat{\\mathbf{w}}, \\hat{b}}\\left(\\phi(\\mathbf{x}^{(n)})\\right) & = \\,\\hat{\\mathbf{w}}^T \\phi(\\mathbf{x}^{(n)}) + \\hat{b} = \\left(\\sum_{i=1}^{m}{\\hat{\\alpha}}^{(i)}t^{(i)}\\phi(\\mathbf{x}^{(i)})\\right)^T \\phi(\\mathbf{x}^{(n)}) + \\hat{b}\\\\\n",
465 | " & = \\, \\sum_{i=1}^{m}{\\hat{\\alpha}}^{(i)}t^{(i)}\\left(\\phi(\\mathbf{x}^{(i)})^T \\phi(\\mathbf{x}^{(n)})\\right) + \\hat{b}\\\\\n",
466 | " & = \\sum\\limits_{\\scriptstyle i=1 \\atop {\\scriptstyle {\\hat{\\alpha}}^{(i)} > 0}}^{m}{\\hat{\\alpha}}^{(i)}t^{(i)} K(\\mathbf{x}^{(i)}, \\mathbf{x}^{(n)}) + \\hat{b}\n",
467 | "\\end{split}\n",
468 | "$\n",
469 | "\n",
470 | "\n",
471 | "**Equation 5-12: Computing the bias term using the kernel trick**\n",
472 | "\n",
473 | "$\n",
474 | "\\begin{split}\n",
475 | "\\hat{b} & = \\dfrac{1}{n_s}\\sum\\limits_{\\scriptstyle i=1 \\atop {\\scriptstyle {\\hat{\\alpha}}^{(i)} > 0}}^{m}{\\left(t^{(i)} - {\\hat{\\mathbf{w}}}^T \\phi(\\mathbf{x}^{(i)})\\right)} = \\dfrac{1}{n_s}\\sum\\limits_{\\scriptstyle i=1 \\atop {\\scriptstyle {\\hat{\\alpha}}^{(i)} > 0}}^{m}{\\left(t^{(i)} - {\n",
476 | " \\left(\\sum_{j=1}^{m}{\\hat{\\alpha}}^{(j)}t^{(j)}\\phi(\\mathbf{x}^{(j)})\\right)\n",
477 | " }^T \\phi(\\mathbf{x}^{(i)})\\right)}\\\\\n",
478 | " & = \\dfrac{1}{n_s}\\sum\\limits_{\\scriptstyle i=1 \\atop {\\scriptstyle {\\hat{\\alpha}}^{(i)} > 0}}^{m}{\\left(t^{(i)} -\n",
479 | "\\sum\\limits_{\\scriptstyle j=1 \\atop {\\scriptstyle {\\hat{\\alpha}}^{(j)} > 0}}^{m}{\n",
480 | " {\\hat{\\alpha}}^{(j)} t^{(j)} K(\\mathbf{x}^{(i)},\\mathbf{x}^{(j)})\n",
481 | "}\n",
482 | "\\right)}\n",
483 | "\\end{split}\n",
484 | "$\n",
485 | "\n",
486 | "\n",
487 | "**Equation 5-13: Linear SVM classifier cost function**\n",
488 | "\n",
489 | "$\n",
490 | "J(\\mathbf{w}, b) = \\dfrac{1}{2} \\mathbf{w}^T \\mathbf{w} \\quad + \\quad C {\\displaystyle \\sum\\limits_{i=1}^{m}max\\left(0, t^{(i)} - (\\mathbf{w}^T \\mathbf{x}^{(i)} + b) \\right)}\n",
491 | "$\n",
492 | "\n",
493 | "\n"
494 | ]
495 | },
496 | {
497 | "cell_type": "markdown",
498 | "metadata": {},
499 | "source": [
500 | "# Chapter 6\n",
501 | "**Equation 6-1: Gini impurity**\n",
502 | "\n",
503 | "$\n",
504 | "G_i = 1 - \\sum\\limits_{k=1}^{n}{{p_{i,k}}^2}\n",
505 | "$\n",
506 | "\n",
507 | "\n",
508 | "**Equation 6-2: CART cost function for classification**\n",
509 | "\n",
510 | "$\n",
511 | "\\begin{split}\n",
512 | "&J(k, t_k) = \\dfrac{m_{\\text{left}}}{m}G_\\text{left} + \\dfrac{m_{\\text{right}}}{m}G_{\\text{right}}\\\\\n",
513 | "&\\text{where }\\begin{cases}\n",
514 | "G_\\text{left/right} \\text{ measures the impurity of the left/right subset,}\\\\\n",
515 | "m_\\text{left/right} \\text{ is the number of instances in the left/right subset.}\n",
516 | "\\end{cases}\n",
517 | "\\end{split}\n",
518 | "$\n",
519 | "\n",
520 | "**Entropy computation example (page 173):**\n",
521 | "\n",
522 | "$ -\\frac{49}{54}\\log_2(\\frac{49}{54}) - \\frac{5}{54}\\log_2(\\frac{5}{54}) $\n",
523 | "\n",
524 | "\n",
525 | "**Equation 6-3: Entropy**\n",
526 | "\n",
527 | "$\n",
528 | "H_i = -\\sum\\limits_{k=1 \\atop p_{i,k} \\ne 0}^{n}{{p_{i,k}}\\log_2(p_{i,k})}\n",
529 | "$\n",
530 | "\n",
531 | "\n",
532 | "**Equation 6-4: CART cost function for regression**\n",
533 | "\n",
534 | "$\n",
535 | "J(k, t_k) = \\dfrac{m_{\\text{left}}}{m}\\text{MSE}_\\text{left} + \\dfrac{m_{\\text{right}}}{m}\\text{MSE}_{\\text{right}} \\quad\n",
536 | "\\text{where }\n",
537 | "\\begin{cases}\n",
538 | "\\text{MSE}_{\\text{node}} = \\sum\\limits_{\\scriptstyle i \\in \\text{node}}(\\hat{y}_{\\text{node}} - y^{(i)})^2\\\\\n",
539 | "\\hat{y}_\\text{node} = \\dfrac{1}{m_{\\text{node}}}\\sum\\limits_{\\scriptstyle i \\in \\text{node}}y^{(i)}\n",
540 | "\\end{cases}\n",
541 | "$\n"
542 | ]
543 | },
544 | {
545 | "cell_type": "markdown",
546 | "metadata": {},
547 | "source": [
548 | "# Chapter 7\n",
549 | "\n",
550 | "**Equation 7-1: Weighted error rate of the $j^\\text{th}$ predictor**\n",
551 | "\n",
552 | "$\n",
553 | "r_j = \\dfrac{\\displaystyle \\sum\\limits_{\\textstyle {i=1 \\atop \\hat{y}_j^{(i)} \\ne y^{(i)}}}^{m}{w^{(i)}}}{\\displaystyle \\sum\\limits_{i=1}^{m}{w^{(i)}}} \\quad\n",
554 | "\\text{where }\\hat{y}_j^{(i)}\\text{ is the }j^{\\text{th}}\\text{ predictor's prediction for the }i^{\\text{th}}\\text{ instance.}\n",
555 | "$\n",
556 | "\n",
557 | "**Equation 7-2: Predictor weight**\n",
558 | "\n",
559 | "$\n",
560 | "\\begin{split}\n",
561 | "\\alpha_j = \\eta \\log{\\dfrac{1 - r_j}{r_j}}\n",
562 | "\\end{split}\n",
563 | "$\n",
564 | "\n",
565 | "\n",
566 | "**Equation 7-3: Weight update rule**\n",
567 | "\n",
568 | "$\n",
569 | "\\begin{split}\n",
570 | "& \\text{ for } i = 1, 2, \\dots, m \\\\\n",
571 | "& w^{(i)} \\leftarrow\n",
572 | "\\begin{cases}\n",
573 | "w^{(i)} & \\text{if }\\hat{y_j}^{(i)} = y^{(i)}\\\\\n",
574 | "w^{(i)} \\exp(\\alpha_j) & \\text{if }\\hat{y_j}^{(i)} \\ne y^{(i)}\n",
575 | "\\end{cases}\n",
576 | "\\end{split}\n",
577 | "$\n",
578 | "\n",
579 | "**In the text page 194:**\n",
580 | "\n",
581 | "Then all the instance weights are normalized (i.e., divided by $ \\sum_{i=1}^{m}{w^{(i)}} $).\n",
582 | "\n",
583 | "\n",
584 | "**Equation 7-4: AdaBoost predictions**\n",
585 | "\n",
586 | "$\n",
587 | "\\hat{y}(\\mathbf{x}) = \\underset{k}{\\operatorname{argmax}}{\\sum\\limits_{\\scriptstyle j=1 \\atop \\scriptstyle \\hat{y}_j(\\mathbf{x}) = k}^{N}{\\alpha_j}} \\quad \\text{where }N\\text{ is the number of predictors.}\n",
588 | "$\n",
589 | "\n",
590 | "\n"
591 | ]
592 | },
593 | {
594 | "cell_type": "markdown",
595 | "metadata": {},
596 | "source": [
597 | "# Chapter 8\n",
598 | "\n",
599 | "**Equation 8-1: Principal components matrix**\n",
600 | "\n",
601 | "$\n",
602 | "\\mathbf{V}^T =\n",
603 | "\\begin{pmatrix}\n",
604 | " \\mid & \\mid & & \\mid \\\\\n",
605 | " \\mathbf{c_1} & \\mathbf{c_2} & \\cdots & \\mathbf{c_n} \\\\\n",
606 | " \\mid & \\mid & & \\mid\n",
607 | "\\end{pmatrix}\n",
608 | "$\n",
609 | "\n",
610 | "\n",
611 | "**Equation 8-2: Projecting the training set down to _d_ dimensions**\n",
612 | "\n",
613 | "$\n",
614 | "\\mathbf{X}_{d\\text{-proj}} = \\mathbf{X} \\mathbf{W}_d\n",
615 | "$\n",
616 | "\n",
617 | "\n",
618 | "**Equation 8-3: PCA inverse transformation, back to the original number of dimensions**\n",
619 | "\n",
620 | "$\n",
621 | "\\mathbf{X}_{\\text{recovered}} = \\mathbf{X}_{d\\text{-proj}} {\\mathbf{W}_d}^T\n",
622 | "$\n",
623 | "\n",
624 | "\n",
625 | "$ \\sum_{j=1}^{m}{w_{i,j}\\mathbf{x}^{(j)}} $\n",
626 | "\n",
627 | "\n",
628 | "**Equation 8-4: LLE step 1: linearly modeling local relationships**\n",
629 | "\n",
630 | "$\n",
631 | "\\begin{split}\n",
632 | "& \\hat{\\mathbf{W}} = \\underset{\\mathbf{W}}{\\operatorname{argmin}}{\\displaystyle \\sum\\limits_{i=1}^{m}} \\left\\|\\mathbf{x}^{(i)} - \\sum\\limits_{j=1}^{m}{w_{i,j}}\\mathbf{x}^{(j)}\\right\\|^2\\\\\n",
633 | "& \\text{subject to }\n",
634 | "\\begin{cases}\n",
635 | " w_{i,j}=0 & \\text{if }\\mathbf{x}^{(j)} \\text{ is not one of the }k\\text{ c.n. of }\\mathbf{x}^{(i)}\\\\\n",
636 | " \\sum\\limits_{j=1}^{m}w_{i,j} = 1 & \\text{for }i=1, 2, \\dots, m\n",
637 | "\\end{cases}\n",
638 | "\\end{split}\n",
639 | "$\n",
640 | "\n",
641 | "**In the text page 223:**\n",
642 | "\n",
643 | "[...] then we want the squared distance between $\\mathbf{z}^{(i)}$ and $ \\sum_{j=1}^{m}{\\hat{w}_{i,j}\\mathbf{z}^{(j)}} $ to be as small as possible.\n",
644 | "\n",
645 | "\n",
646 | "**Equation 8-5: LLE step 2: reducing dimensionality while preserving relationships**\n",
647 | "\n",
648 | "$\n",
649 | "\\hat{\\mathbf{Z}} = \\underset{\\mathbf{Z}}{\\operatorname{argmin}}{\\displaystyle \\sum\\limits_{i=1}^{m}} \\left\\|\\mathbf{z}^{(i)} - \\sum\\limits_{j=1}^{m}{\\hat{w}_{i,j}}\\mathbf{z}^{(j)}\\right\\|^2\n",
650 | "$\n"
651 | ]
652 | },
653 | {
654 | "cell_type": "markdown",
655 | "metadata": {},
656 | "source": [
657 | "# Chapter 9\n",
658 | "\n",
659 | "**Equation 9-1: Rectified linear unit**\n",
660 | "\n",
661 | "$\n",
662 | "h_{\\mathbf{w}, b}(\\mathbf{X}) = \\max(\\mathbf{X} \\mathbf{w} + b, 0)\n",
663 | "$"
664 | ]
665 | },
666 | {
667 | "cell_type": "markdown",
668 | "metadata": {},
669 | "source": [
670 | "# Chapter 10\n",
671 | "\n",
672 | "**Equation 10-1: Common step functions used in Perceptrons**\n",
673 | "\n",
674 | "$\n",
675 | "\\begin{split}\n",
676 | "\\operatorname{heaviside}(z) =\n",
677 | "\\begin{cases}\n",
678 | "0 & \\text{if }z < 0\\\\\n",
679 | "1 & \\text{if }z \\ge 0\n",
680 | "\\end{cases} & \\quad\\quad\n",
681 | "\\operatorname{sgn}(z) =\n",
682 | "\\begin{cases}\n",
683 | "-1 & \\text{if }z < 0\\\\\n",
684 | "0 & \\text{if }z = 0\\\\\n",
685 | "+1 & \\text{if }z > 0\n",
686 | "\\end{cases}\n",
687 | "\\end{split}\n",
688 | "$\n",
689 | "\n",
690 | "\n",
691 | "**Equation 10-2: Perceptron learning rule (weight update)**\n",
692 | "\n",
693 | "$\n",
694 | "{w_{i,j}}^{(\\text{next step})} = w_{i,j} + \\eta (y_j - \\hat{y}_j) x_i\n",
695 | "$\n",
696 | "\n",
697 | "\n",
698 | "**In the text page 266:**\n",
699 | "\n",
700 | "It will be initialized randomly, using a truncated normal (Gaussian) distribution with a standard deviation of $ 2 / \\sqrt{\\text{n}_\\text{inputs}} $.\n"
701 | ]
702 | },
703 | {
704 | "cell_type": "markdown",
705 | "metadata": {},
706 | "source": [
707 | "# Chapter 11\n",
708 | "**Equation 11-1: Xavier initialization (when using the logistic activation function)**\n",
709 | "\n",
710 | "$\n",
711 | "\\begin{split}\n",
712 | "& \\text{Normal distribution with mean 0 and standard deviation }\n",
713 | "\\sigma = \\sqrt{\\dfrac{2}{n_\\text{inputs} + n_\\text{outputs}}}\\\\\n",
714 | "& \\text{Or a uniform distribution between -r and +r, with }\n",
715 | "r = \\sqrt{\\dfrac{6}{n_\\text{inputs} + n_\\text{outputs}}}\n",
716 | "\\end{split}\n",
717 | "$\n",
718 | "\n",
719 | "**In the text page 278:**\n",
720 | "\n",
721 | "When the number of input connections is roughly equal to the number of output\n",
722 | "connections, you get simpler equations (e.g., $ \\sigma = 1 / \\sqrt{n_\\text{inputs}} $ or $ r = \\sqrt{3} / \\sqrt{n_\\text{inputs}} $).\n",
723 | "\n",
724 | "**Table 11-1: Initialization parameters for each type of activation function**\n",
725 | "\n",
726 | "* Logistic uniform: $ r = \\sqrt{\\dfrac{6}{n_\\text{inputs} + n_\\text{outputs}}} $\n",
727 | "* Logistic normal: $ \\sigma = \\sqrt{\\dfrac{2}{n_\\text{inputs} + n_\\text{outputs}}} $\n",
728 | "* Hyperbolic tangent uniform: $ r = 4 \\sqrt{\\dfrac{6}{n_\\text{inputs} + n_\\text{outputs}}} $\n",
729 | "* Hyperbolic tangent normal: $ \\sigma = 4 \\sqrt{\\dfrac{2}{n_\\text{inputs} + n_\\text{outputs}}} $\n",
730 | "* ReLU (and its variants) uniform: $ r = \\sqrt{2} \\sqrt{\\dfrac{6}{n_\\text{inputs} + n_\\text{outputs}}} $\n",
731 | "* ReLU (and its variants) normal: $ \\sigma = \\sqrt{2} \\sqrt{\\dfrac{2}{n_\\text{inputs} + n_\\text{outputs}}} $\n",
732 | "\n",
733 | "**Equation 11-2: ELU activation function**\n",
734 | "\n",
735 | "$\n",
736 | "\\operatorname{ELU}_\\alpha(z) =\n",
737 | "\\begin{cases}\n",
738 | "\\alpha(\\exp(z) - 1) & \\text{if } z < 0\\\\\n",
739 | "z & if z \\ge 0\n",
740 | "\\end{cases}\n",
741 | "$\n",
742 | "\n",
743 | "\n",
744 | "**Equation 11-3: Batch Normalization algorithm**\n",
745 | "\n",
746 | "$\n",
747 | "\\begin{split}\n",
748 | "1.\\quad & \\mathbf{\\mu}_B = \\dfrac{1}{m_B}\\sum\\limits_{i=1}^{m_B}{\\mathbf{x}^{(i)}}\\\\\n",
749 | "2.\\quad & {\\mathbf{\\sigma}_B}^2 = \\dfrac{1}{m_B}\\sum\\limits_{i=1}^{m_B}{(\\mathbf{x}^{(i)} - \\mathbf{\\mu}_B)^2}\\\\\n",
750 | "3.\\quad & \\hat{\\mathbf{x}}^{(i)} = \\dfrac{\\mathbf{x}^{(i)} - \\mathbf{\\mu}_B}{\\sqrt{{\\mathbf{\\sigma}_B}^2 + \\epsilon}}\\\\\n",
751 | "4.\\quad & \\mathbf{z}^{(i)} = \\gamma \\hat{\\mathbf{x}}^{(i)} + \\beta\n",
752 | "\\end{split}\n",
753 | "$\n",
754 | "\n",
755 | "**In the text page 285:**\n",
756 | "\n",
757 | "[...] given a new value $v$, the running average $v$ is updated through the equation:\n",
758 | "\n",
759 | "$ \\hat{v} \\gets \\hat{v} \\times \\text{momentum} + v \\times (1 - \\text{momentum}) $\n",
760 | "\n",
761 | "**Equation 11-4: Momentum algorithm**\n",
762 | "\n",
763 | "1. $\\mathbf{m} \\gets \\beta \\mathbf{m} - \\eta \\nabla_\\boldsymbol{\\theta}J(\\boldsymbol{\\theta})$\n",
764 | "2. $\\boldsymbol{\\theta} \\gets \\boldsymbol{\\theta} + \\mathbf{m}$\n",
765 | "\n",
766 | "**In the text page 296:**\n",
767 | "\n",
768 | "You can easily verify that if the gradient remains constant, the terminal velocity (i.e., the maximum size of the weight updates) is equal to that gradient multiplied by the learning rate η multiplied by $ \\frac{1}{1 - \\beta} $.\n",
769 | "\n",
770 | "\n",
771 | "**Equation 11-5: Nesterov Accelerated Gradient algorithm**\n",
772 | "\n",
773 | "1. $\\mathbf{m} \\gets \\beta \\mathbf{m} - \\eta \\nabla_\\boldsymbol{\\theta}J(\\boldsymbol{\\theta} + \\beta \\mathbf{m})$\n",
774 | "2. $\\boldsymbol{\\theta} \\gets \\boldsymbol{\\theta} + \\mathbf{m}$\n",
775 | "\n",
776 | "**Equation 11-6: AdaGrad algorithm**\n",
777 | "\n",
778 | "1. $\\mathbf{s} \\gets \\mathbf{s} + \\nabla_\\boldsymbol{\\theta}J(\\boldsymbol{\\theta}) \\otimes \\nabla_\\boldsymbol{\\theta}J(\\boldsymbol{\\theta})$\n",
779 | "2. $\\boldsymbol{\\theta} \\gets \\boldsymbol{\\theta} - \\eta \\, \\nabla_\\boldsymbol{\\theta}J(\\boldsymbol{\\theta}) \\oslash {\\sqrt{\\mathbf{s} + \\epsilon}}$\n",
780 | "\n",
781 | "**In the text page 298-299:**\n",
782 | "\n",
783 | "This vectorized form is equivalent to computing $s_i \\gets s_i + \\left( \\dfrac{\\partial J(\\boldsymbol{\\theta})}{\\partial \\theta_i} \\right)^2$ for each element $s_i$ of the vector $\\mathbf{s}$.\n",
784 | "\n",
785 | "**In the text page 299:**\n",
786 | "\n",
787 | "This vectorized form is equivalent to computing $ \\theta_i \\gets \\theta_i - \\eta \\, \\dfrac{\\partial J(\\boldsymbol{\\theta})}{\\partial \\theta_i} \\dfrac{1}{\\sqrt{s_i + \\epsilon}} $ for all parameters $\\theta_i$ (simultaneously).\n",
788 | "\n",
789 | "\n",
790 | "**Equation 11-7: RMSProp algorithm**\n",
791 | "\n",
792 | "1. $\\mathbf{s} \\gets \\beta \\mathbf{s} + (1 - \\beta ) \\nabla_\\boldsymbol{\\theta}J(\\boldsymbol{\\theta}) \\otimes \\nabla_\\boldsymbol{\\theta}J(\\boldsymbol{\\theta})$\n",
793 | "2. $\\boldsymbol{\\theta} \\gets \\boldsymbol{\\theta} - \\eta \\, \\nabla_\\boldsymbol{\\theta}J(\\boldsymbol{\\theta}) \\oslash {\\sqrt{\\mathbf{s} + \\epsilon}}$\n",
794 | "\n",
795 | "\n",
796 | "**Equation 11-8: Adam algorithm**\n",
797 | "\n",
798 | "1. $\\mathbf{m} \\gets \\beta_1 \\mathbf{m} - (1 - \\beta_1) \\nabla_\\boldsymbol{\\theta}J(\\boldsymbol{\\theta})$\n",
799 | "2. $\\mathbf{s} \\gets \\beta_2 \\mathbf{s} + (1 - \\beta_2) \\nabla_\\boldsymbol{\\theta}J(\\boldsymbol{\\theta}) \\otimes \\nabla_\\boldsymbol{\\theta}J(\\boldsymbol{\\theta})$\n",
800 | "3. $\\hat{\\mathbf{m}} \\gets \\left(\\dfrac{\\mathbf{m}}{1 - {\\beta_1}^T}\\right)$\n",
801 | "4. $\\hat{\\mathbf{s}} \\gets \\left(\\dfrac{\\mathbf{s}}{1 - {\\beta_2}^T}\\right)$\n",
802 | "5. $\\boldsymbol{\\theta} \\gets \\boldsymbol{\\theta} + \\eta \\, \\hat{\\mathbf{m}} \\oslash {\\sqrt{\\hat{\\mathbf{s}} + \\epsilon}}$\n",
803 | "\n",
804 | "**In the text page 309:**\n",
805 | "\n",
806 | "We typically implement this constraint by computing $\\left\\| \\mathbf{w} \\right\\|_2$ after each training step\n",
807 | "and clipping $\\mathbf{w}$ if needed $ \\left( \\mathbf{w} \\gets \\mathbf{w} \\dfrac{r}{\\left\\| \\mathbf{w} \\right\\|_2} \\right) $.\n",
808 | "\n",
809 | "\n"
810 | ]
811 | },
812 | {
813 | "cell_type": "markdown",
814 | "metadata": {},
815 | "source": [
816 | "# Chapter 13\n",
817 | "\n",
818 | "**Equation 13-1: Computing the output of a neuron in a convolutional layer**\n",
819 | "\n",
820 | "$\n",
821 | "z_{i,j,k} = b_k + \\sum\\limits_{u = 0}^{f_h - 1} \\, \\, \\sum\\limits_{v = 0}^{f_w - 1} \\, \\, \\sum\\limits_{k' = 0}^{f_{n'} - 1} \\, \\, x_{i', j', k'} \\times w_{u, v, k', k}\n",
822 | "\\quad \\text{with }\n",
823 | "\\begin{cases}\n",
824 | "i' = i \\times s_h + u \\\\\n",
825 | "j' = j \\times s_w + v\n",
826 | "\\end{cases}\n",
827 | "$\n",
828 | "\n",
829 | "**Equation 13-2: Local response normalization**\n",
830 | "\n",
831 | "$\n",
832 | "b_i = a_i \\left(k + \\alpha \\sum\\limits_{j=j_\\text{low}}^{j_\\text{high}}{{a_j}^2} \\right)^{-\\beta} \\quad \\text{with }\n",
833 | "\\begin{cases}\n",
834 | " j_\\text{high} = \\min\\left(i + \\dfrac{r}{2}, f_n-1\\right) \\\\\n",
835 | " j_\\text{low} = \\max\\left(0, i - \\dfrac{r}{2}\\right)\n",
836 | "\\end{cases}\n",
837 | "$\n",
838 | "\n",
839 | "\n"
840 | ]
841 | },
842 | {
843 | "cell_type": "markdown",
844 | "metadata": {},
845 | "source": [
846 | "# Chapter 14\n",
847 | "\n",
848 | "**Equation 14-1: Output of a recurrent layer for a single instance**\n",
849 | "\n",
850 | "$\n",
851 | "\\mathbf{y}_{(t)} = \\phi\\left({\\mathbf{W}_x}^T{\\mathbf{x}_{(t)}} + {{\\mathbf{W}_y}^T\\mathbf{y}_{(t-1)}} + \\mathbf{b} \\right)\n",
852 | "$\n",
853 | "\n",
854 | "\n",
855 | "**Equation 14-2: Outputs of a layer of recurrent neurons for all instances in a mini-batch**\n",
856 | "\n",
857 | "$\n",
858 | "\\begin{split}\n",
859 | "\\mathbf{Y}_{(t)} & = \\phi\\left(\\mathbf{X}_{(t)} \\mathbf{W}_{x} + \\mathbf{Y}_{(t-1)} \\mathbf{W}_{y} + \\mathbf{b} \\right) \\\\\n",
860 | "& = \\phi\\left(\n",
861 | "\\left[\\mathbf{X}_{(t)} \\quad \\mathbf{Y}_{(t-1)} \\right]\n",
862 | " \\mathbf{W} + \\mathbf{b} \\right) \\text{ with } \\mathbf{W}=\n",
863 | "\\left[ \\begin{matrix}\n",
864 | " \\mathbf{W}_x\\\\\n",
865 | " \\mathbf{W}_y\n",
866 | "\\end{matrix} \\right]\n",
867 | "\\end{split}\n",
868 | "$\n",
869 | "\n",
870 | "**In the text page 391:**\n",
871 | "\n",
872 | "Just like in regular backpropagation, there is a first forward pass through the unrolled network (represented by the dashed arrows); then the output sequence is evaluated using a cost function $ C(\\mathbf{Y}_{(t_\\text{min})}, \\mathbf{Y}_{(t_\\text{min}+1)}, \\dots, \\mathbf{Y}_{(t_\\text{max})}) $ (where $t_\\text{min}$ and $t_\\text{max}$ are the first and last output time steps, not counting the ignored outputs)[...]\n",
873 | "\n",
874 | "\n",
875 | "**Equation 14-3: LSTM computations**\n",
876 | "\n",
877 | "$\n",
878 | "\\begin{split}\n",
879 | "\\mathbf{i}_{(t)}&=\\sigma({\\mathbf{W}_{xi}}^T \\mathbf{x}_{(t)} + {\\mathbf{W}_{hi}}^T \\mathbf{h}_{(t-1)} + \\mathbf{b}_i)\\\\\n",
880 | "\\mathbf{f}_{(t)}&=\\sigma({\\mathbf{W}_{xf}}^T \\mathbf{x}_{(t)} + {\\mathbf{W}_{hf}}^T \\mathbf{h}_{(t-1)} + \\mathbf{b}_f)\\\\\n",
881 | "\\mathbf{o}_{(t)}&=\\sigma({\\mathbf{W}_{xo}}^T \\mathbf{x}_{(t)} + {\\mathbf{W}_{ho}}^T \\mathbf{h}_{(t-1)} + \\mathbf{b}_o)\\\\\n",
882 | "\\mathbf{g}_{(t)}&=\\operatorname{tanh}({\\mathbf{W}_{xg}}^T \\mathbf{x}_{(t)} + {\\mathbf{W}_{hg}}^T \\mathbf{h}_{(t-1)} + \\mathbf{b}_g)\\\\\n",
883 | "\\mathbf{c}_{(t)}&=\\mathbf{f}_{(t)} \\otimes \\mathbf{c}_{(t-1)} \\, + \\, \\mathbf{i}_{(t)} \\otimes \\mathbf{g}_{(t)}\\\\\n",
884 | "\\mathbf{y}_{(t)}&=\\mathbf{h}_{(t)} = \\mathbf{o}_{(t)} \\otimes \\operatorname{tanh}(\\mathbf{c}_{(t)})\n",
885 | "\\end{split}\n",
886 | "$\n",
887 | "\n",
888 | "\n",
889 | "**Equation 14-4: GRU computations**\n",
890 | "\n",
891 | "$\n",
892 | "\\begin{split}\n",
893 | "\\mathbf{z}_{(t)}&=\\sigma({\\mathbf{W}_{xz}}^T \\mathbf{x}_{(t)} + {\\mathbf{W}_{hz}}^T \\mathbf{h}_{(t-1)}) \\\\\n",
894 | "\\mathbf{r}_{(t)}&=\\sigma({\\mathbf{W}_{xr}}^T \\mathbf{x}_{(t)} + {\\mathbf{W}_{hr}}^T \\mathbf{h}_{(t-1)}) \\\\\n",
895 | "\\mathbf{g}_{(t)}&=\\operatorname{tanh}\\left({\\mathbf{W}_{xg}}^T \\mathbf{x}_{(t)} + {\\mathbf{W}_{hg}}^T (\\mathbf{r}_{(t)} \\otimes \\mathbf{h}_{(t-1)})\\right) \\\\\n",
896 | "\\mathbf{h}_{(t)}&=(1-\\mathbf{z}_{(t)}) \\otimes \\mathbf{h}_{(t-1)} + \\mathbf{z}_{(t)} \\otimes \\mathbf{g}_{(t)}\n",
897 | "\\end{split}\n",
898 | "$\n",
899 | "\n",
900 | "\n"
901 | ]
902 | },
903 | {
904 | "cell_type": "markdown",
905 | "metadata": {},
906 | "source": [
907 | "# Chapter 15\n",
908 | "\n",
909 | "**Equation 15-1: Kullback–Leibler divergence**\n",
910 | "\n",
911 | "$\n",
912 | "D_{\\mathrm{KL}}(P\\|Q) = \\sum\\limits_{i} P(i) \\log \\dfrac{P(i)}{Q(i)}\n",
913 | "$\n",
914 | "\n",
915 | "\n",
916 | "**Equation: KL divergence between the target sparsity _p_ and the actual sparsity _q_**\n",
917 | "\n",
918 | "$\n",
919 | "D_{\\mathrm{KL}}(p\\|q) = p \\, \\log \\dfrac{p}{q} + (1-p) \\log \\dfrac{1-p}{1-q}\n",
920 | "$\n",
921 | "\n",
922 | "**In the text page 433:**\n",
923 | "\n",
924 | "One common variant is to train the encoder to output $\\gamma = \\log\\left(\\sigma^2\\right)$ rather than $\\sigma$.\n",
925 | "Wherever we need $\\sigma$ we can just compute $ \\sigma = \\exp\\left(\\dfrac{\\gamma}{2}\\right) $.\n",
926 | "\n",
927 | "\n"
928 | ]
929 | },
930 | {
931 | "cell_type": "markdown",
932 | "metadata": {},
933 | "source": [
934 | "# Chapter 16\n",
935 | "\n",
936 | "**Equation 16-1: Bellman Optimality Equation**\n",
937 | "\n",
938 | "$\n",
939 | "V^*(s) = \\underset{a}{\\max}\\sum\\limits_{s'}{T(s, a, s') [R(s, a, s') + \\gamma . V^*(s')]} \\quad \\text{for all }s\n",
940 | "$\n",
941 | "\n",
942 | "**Equation 16-2: Value Iteration algorithm**\n",
943 | "\n",
944 | "$\n",
945 | " V_{k+1}(s) \\gets \\underset{a}{\\max}\\sum\\limits_{s'}{T(s, a, s') [R(s, a, s') + \\gamma . V_k(s')]} \\quad \\text{for all }s\n",
946 | "$\n",
947 | "\n",
948 | "\n",
949 | "**Equation 16-3: Q-Value Iteration algorithm**\n",
950 | "\n",
951 | "$\n",
952 | " Q_{k+1}(s, a) \\gets \\sum\\limits_{s'}{T(s, a, s') [R(s, a, s') + \\gamma . \\underset{a'}{\\max}\\,{Q_k(s',a')}]} \\quad \\text{for all } (s,a)\n",
953 | "$\n",
954 | "\n",
955 | "**In the text page 458:**\n",
956 | "\n",
957 | "Once you have the optimal Q-Values, defining the optimal policy, noted $\\pi^{*}(s)$, is trivial: when the agent is in state $s$, it should choose the action with the highest Q-Value for that state: $ \\pi^{*}(s) = \\underset{a}{\\operatorname{argmax}} \\, Q^*(s, a) $.\n",
958 | "\n",
959 | "\n",
960 | "**Equation 16-4: TD Learning algorithm**\n",
961 | "\n",
962 | "$\n",
963 | "V_{k+1}(s) \\gets (1-\\alpha)V_k(s) + \\alpha\\left(r + \\gamma . V_k(s')\\right)\n",
964 | "$\n",
965 | "\n",
966 | "\n",
967 | "**Equation 16-5: Q-Learning algorithm**\n",
968 | "\n",
969 | "$\n",
970 | "Q_{k+1}(s, a) \\gets (1-\\alpha)Q_k(s,a) + \\alpha\\left(r + \\gamma . \\underset{a'}{\\max} \\, Q_k(s', a')\\right)\n",
971 | "$\n",
972 | "\n",
973 | "\n",
974 | "**Equation 16-6: Q-Learning using an exploration function**\n",
975 | "\n",
976 | "$\n",
977 | "Q(s, a) \\gets (1-\\alpha)Q(s,a) + \\alpha\\left(r + \\gamma \\, \\underset{a'}{\\max}f(Q(s', a'), N(s', a'))\\right)\n",
978 | "$\n",
979 | "\n",
980 | "**Equation 16-7: Target Q-Value**\n",
981 | "\n",
982 | "$\n",
983 | "y(s,a)=r+\\gamma\\,\\max_{a'}\\,Q_\\boldsymbol\\theta(s',a')\n",
984 | "$"
985 | ]
986 | },
987 | {
988 | "cell_type": "markdown",
989 | "metadata": {},
990 | "source": [
991 | "# Appendix A\n",
992 | "\n",
993 | "Equations that appear in the text:\n",
994 | "\n",
995 | "$\n",
996 | "\\mathbf{H} =\n",
997 | "\\begin{pmatrix}\n",
998 | "\\mathbf{H'} & 0 & \\cdots\\\\\n",
999 | "0 & 0 & \\\\\n",
1000 | "\\vdots & & \\ddots\n",
1001 | "\\end{pmatrix}\n",
1002 | "$\n",
1003 | "\n",
1004 | "\n",
1005 | "$\n",
1006 | "\\mathbf{A} =\n",
1007 | "\\begin{pmatrix}\n",
1008 | "\\mathbf{A'} & \\mathbf{I}_m \\\\\n",
1009 | "\\mathbf{0} & -\\mathbf{I}_m\n",
1010 | "\\end{pmatrix}\n",
1011 | "$\n",
1012 | "\n",
1013 | "\n",
1014 | "$ 1 - \\frac{1}{5}^2 - \\frac{4}{5}^2 $\n",
1015 | "\n",
1016 | "\n",
1017 | "$ 1 - \\frac{1}{2}^2 - \\frac{1}{2}^2 $\n",
1018 | "\n",
1019 | "\n",
1020 | "$ \\frac{2}{5} \\times $\n",
1021 | "\n",
1022 | "\n",
1023 | "$ \\frac{3}{5} \\times 0 $"
1024 | ]
1025 | },
1026 | {
1027 | "cell_type": "markdown",
1028 | "metadata": {},
1029 | "source": [
1030 | "# Appendix C"
1031 | ]
1032 | },
1033 | {
1034 | "cell_type": "markdown",
1035 | "metadata": {},
1036 | "source": [
1037 | "Equations that appear in the text:\n",
1038 | "\n",
1039 | "$ (\\hat{x}, \\hat{y}) $\n",
1040 | "\n",
1041 | "\n",
1042 | "$ \\hat{\\alpha} $\n",
1043 | "\n",
1044 | "\n",
1045 | "$ (\\hat{x}, \\hat{y}, \\hat{\\alpha}) $\n",
1046 | "\n",
1047 | "\n",
1048 | "$\n",
1049 | "\\begin{cases}\n",
1050 | "\\frac{\\partial}{\\partial x}g(x, y, \\alpha) = 2x - 3\\alpha\\\\\n",
1051 | "\\frac{\\partial}{\\partial y}g(x, y, \\alpha) = 2 - 2\\alpha\\\\\n",
1052 | "\\frac{\\partial}{\\partial \\alpha}g(x, y, \\alpha) = -3x - 2y - 1\\\\\n",
1053 | "\\end{cases}\n",
1054 | "$\n",
1055 | "\n",
1056 | "\n",
1057 | "$ 2\\hat{x} - 3\\hat{\\alpha} = 2 - 2\\hat{\\alpha} = -3\\hat{x} - 2\\hat{y} - 1 = 0 $\n",
1058 | "\n",
1059 | "\n",
1060 | "$ \\hat{x} = \\frac{3}{2} $\n",
1061 | "\n",
1062 | "\n",
1063 | "$ \\hat{y} = -\\frac{11}{4} $\n",
1064 | "\n",
1065 | "\n",
1066 | "$ \\hat{\\alpha} = 1 $\n",
1067 | "\n",
1068 | "\n",
1069 | "**Equation C-1: Generalized Lagrangian for the hard margin problem**\n",
1070 | "\n",
1071 | "$\n",
1072 | "\\begin{split}\n",
1073 | "\\mathcal{L}(\\mathbf{w}, b, \\mathbf{\\alpha}) = \\frac{1}{2}\\mathbf{w}^T \\mathbf{w} - \\sum\\limits_{i=1}^{m}{\\alpha^{(i)} \\left(t^{(i)}(\\mathbf{w}^T \\mathbf{x}^{(i)} + b) - 1\\right)} \\\\\n",
1074 | "\\text{with}\\quad \\alpha^{(i)} \\ge 0 \\quad \\text{for }i = 1, 2, \\dots, m\n",
1075 | "\\end{split}\n",
1076 | "$\n",
1077 | "\n",
1078 | "**More equations in the text:**\n",
1079 | "\n",
1080 | "$ (\\hat{\\mathbf{w}}, \\hat{b}, \\hat{\\mathbf{\\alpha}}) $\n",
1081 | "\n",
1082 | "\n",
1083 | "$ t^{(i)}(\\hat{\\mathbf{w}}^T \\mathbf{x}^{(i)} + \\hat{b}) \\ge 1 \\quad \\text{for } i = 1, 2, \\dots, m $\n",
1084 | "\n",
1085 | "\n",
1086 | "$ {\\hat{\\alpha}}^{(i)} \\ge 0 \\quad \\text{for } i = 1, 2, \\dots, m $\n",
1087 | "\n",
1088 | "\n",
1089 | "$ {\\hat{\\alpha}}^{(i)} = 0 $\n",
1090 | "\n",
1091 | "\n",
1092 | "$ t^{(i)}((\\hat{\\mathbf{w}})^T \\mathbf{x}^{(i)} + \\hat{b}) = 1 $\n",
1093 | "\n",
1094 | "\n",
1095 | "$ {\\hat{\\alpha}}^{(i)} = 0 $\n",
1096 | "\n",
1097 | "\n",
1098 | "**Equation C-2: Partial derivatives of the generalized Lagrangian**\n",
1099 | "\n",
1100 | "$\n",
1101 | "\\begin{split}\n",
1102 | "\\nabla_{\\mathbf{w}}\\mathcal{L}(\\mathbf{w}, b, \\mathbf{\\alpha}) = \\mathbf{w} - \\sum\\limits_{i=1}^{m}\\alpha^{(i)}t^{(i)}\\mathbf{x}^{(i)}\\\\\n",
1103 | "\\dfrac{\\partial}{\\partial b}\\mathcal{L}(\\mathbf{w}, b, \\mathbf{\\alpha}) = -\\sum\\limits_{i=1}^{m}\\alpha^{(i)}t^{(i)}\n",
1104 | "\\end{split}\n",
1105 | "$\n",
1106 | "\n",
1107 | "\n",
1108 | "**Equation C-3: Properties of the stationary points**\n",
1109 | "\n",
1110 | "$\n",
1111 | "\\begin{split}\n",
1112 | "\\hat{\\mathbf{w}} = \\sum_{i=1}^{m}{\\hat{\\alpha}}^{(i)}t^{(i)}\\mathbf{x}^{(i)}\\\\\n",
1113 | "\\sum_{i=1}^{m}{\\hat{\\alpha}}^{(i)}t^{(i)} = 0\n",
1114 | "\\end{split}\n",
1115 | "$\n",
1116 | "\n",
1117 | "\n",
1118 | "**Equation C-4: Dual form of the SVM problem**\n",
1119 | "\n",
1120 | "$\n",
1121 | "\\begin{split}\n",
1122 | "\\mathcal{L}(\\hat{\\mathbf{w}}, \\hat{b}, \\mathbf{\\alpha}) = \\dfrac{1}{2}\\sum\\limits_{i=1}^{m}{\n",
1123 | " \\sum\\limits_{j=1}^{m}{\n",
1124 | " \\alpha^{(i)} \\alpha^{(j)} t^{(i)} t^{(j)} {\\mathbf{x}^{(i)}}^T \\mathbf{x}^{(j)}\n",
1125 | " }\n",
1126 | "} \\quad - \\quad \\sum\\limits_{i=1}^{m}{\\alpha^{(i)}}\\\\\n",
1127 | "\\text{with}\\quad \\alpha^{(i)} \\ge 0 \\quad \\text{for }i = 1, 2, \\dots, m\n",
1128 | "\\end{split}\n",
1129 | "$\n",
1130 | "\n",
1131 | "**Some more equations in the text:**\n",
1132 | "\n",
1133 | "$ \\hat{\\mathbf{\\alpha}} $\n",
1134 | "\n",
1135 | "\n",
1136 | "$ {\\hat{\\alpha}}^{(i)} \\ge 0 $\n",
1137 | "\n",
1138 | "\n",
1139 | "$ \\hat{\\mathbf{\\alpha}} $\n",
1140 | "\n",
1141 | "\n",
1142 | "$ \\hat{\\mathbf{w}} $\n",
1143 | "\n",
1144 | "\n",
1145 | "$ \\hat{b} $\n",
1146 | "\n",
1147 | "\n",
1148 | "$ \\hat{b} = t^{(k)} - {\\hat{\\mathbf{w}}}^T \\mathbf{x}^{(k)} $\n",
1149 | "\n",
1150 | "\n",
1151 | "**Equation C-5: Bias term estimation using the dual form**\n",
1152 | "\n",
1153 | "$\n",
1154 | "\\hat{b} = \\dfrac{1}{n_s}\\sum\\limits_{\\scriptstyle i=1 \\atop {\\scriptstyle {\\hat{\\alpha}}^{(i)} > 0}}^{m}{\\left[t^{(i)} - {\\hat{\\mathbf{w}}}^T \\mathbf{x}^{(i)}\\right]}\n",
1155 | "$"
1156 | ]
1157 | },
1158 | {
1159 | "cell_type": "markdown",
1160 | "metadata": {},
1161 | "source": [
1162 | "# Appendix D"
1163 | ]
1164 | },
1165 | {
1166 | "cell_type": "markdown",
1167 | "metadata": {},
1168 | "source": [
1169 | "**Equation D-1: Partial derivatives of $f(x,y)$**\n",
1170 | "\n",
1171 | "$\n",
1172 | "\\begin{split}\n",
1173 | "\\dfrac{\\partial f}{\\partial x} & = \\dfrac{\\partial(x^2y)}{\\partial x} + \\dfrac{\\partial y}{\\partial x} + \\dfrac{\\partial 2}{\\partial x} = y \\dfrac{\\partial(x^2)}{\\partial x} + 0 + 0 = 2xy \\\\\n",
1174 | "\\dfrac{\\partial f}{\\partial y} & = \\dfrac{\\partial(x^2y)}{\\partial y} + \\dfrac{\\partial y}{\\partial y} + \\dfrac{\\partial 2}{\\partial y} = x^2 + 1 + 0 = x^2 + 1 \\\\\n",
1175 | "\\end{split}\n",
1176 | "$\n",
1177 | "\n",
1178 | "**In the text:**\n",
1179 | "\n",
1180 | "$ \\frac{\\partial g}{\\partial x} = 0 + (0 \\times x + y \\times 1) = y $\n",
1181 | "\n",
1182 | "\n",
1183 | "$ \\frac{\\partial x}{\\partial x} = 1 $\n",
1184 | "\n",
1185 | "\n",
1186 | "$ \\frac{\\partial y}{\\partial x} = 0 $\n",
1187 | "\n",
1188 | "\n",
1189 | "$ \\frac{\\partial (u \\times v)}{\\partial x} = \\frac{\\partial v}{\\partial x} \\times u + \\frac{\\partial u}{\\partial x} \\times u $\n",
1190 | "\n",
1191 | "\n",
1192 | "$ \\frac{\\partial g}{\\partial x} = 0 + (0 \\times x + y \\times 1) $\n",
1193 | "\n",
1194 | "\n",
1195 | "$ \\frac{\\partial g}{\\partial x} = y $\n",
1196 | "\n",
1197 | "\n",
1198 | "**Equation D-2: Derivative of a function _h_(_x_) at point _x_~0~**\n",
1199 | "\n",
1200 | "$\n",
1201 | "\\begin{split}\n",
1202 | "h'(x) & = \\underset{\\textstyle x \\to x_0}{\\lim}\\dfrac{h(x) - h(x_0)}{x - x_0}\\\\\n",
1203 | " & = \\underset{\\textstyle \\epsilon \\to 0}{\\lim}\\dfrac{h(x_0 + \\epsilon) - h(x_0)}{\\epsilon}\n",
1204 | "\\end{split}\n",
1205 | "$\n",
1206 | "\n",
1207 | "\n",
1208 | "**Equation D-3: A few operations with dual numbers**\n",
1209 | "\n",
1210 | "$\n",
1211 | "\\begin{split}\n",
1212 | "&\\lambda(a + b\\epsilon) = \\lambda a + \\lambda b \\epsilon\\\\\n",
1213 | "&(a + b\\epsilon) + (c + d\\epsilon) = (a + c) + (b + d)\\epsilon \\\\\n",
1214 | "&(a + b\\epsilon) \\times (c + d\\epsilon) = ac + (ad + bc)\\epsilon + (bd)\\epsilon^2 = ac + (ad + bc)\\epsilon\\\\\n",
1215 | "\\end{split}\n",
1216 | "$\n",
1217 | "\n",
1218 | "**In the text:**\n",
1219 | "\n",
1220 | "$ \\frac{\\partial f}{\\partial x}(3, 4) $\n",
1221 | "\n",
1222 | "\n",
1223 | "$ \\frac{\\partial f}{\\partial y}(3, 4) $\n",
1224 | "\n",
1225 | "\n",
1226 | "**Equation D-4: Chain rule**\n",
1227 | "\n",
1228 | "$\n",
1229 | "\\dfrac{\\partial f}{\\partial x} = \\dfrac{\\partial f}{\\partial n_i} \\times \\dfrac{\\partial n_i}{\\partial x}\n",
1230 | "$\n",
1231 | "\n",
1232 | "**In the text:**\n",
1233 | "\n",
1234 | "$ \\frac{\\partial f}{\\partial n_7} = 1 $\n",
1235 | "\n",
1236 | "\n",
1237 | "$ \\frac{\\partial f}{\\partial n_5} = \\frac{\\partial f}{\\partial n_7} \\times \\frac{\\partial n_7}{\\partial n_5} $\n",
1238 | "\n",
1239 | "\n",
1240 | "$ \\frac{\\partial f}{\\partial n_7} = 1 $\n",
1241 | "\n",
1242 | "\n",
1243 | "$ \\frac{\\partial n_7}{\\partial n_5} $\n",
1244 | "\n",
1245 | "\n",
1246 | "$ \\frac{\\partial n_7}{\\partial n_5} = 1 $\n",
1247 | "\n",
1248 | "\n",
1249 | "$ \\frac{\\partial f}{\\partial n_5} = 1 \\times 1 = 1 $\n",
1250 | "\n",
1251 | "\n",
1252 | "$ \\frac{\\partial f}{\\partial n_4} = \\frac{\\partial f}{\\partial n_5} \\times \\frac{\\partial n_5}{\\partial n_4} $\n",
1253 | "\n",
1254 | "\n",
1255 | "$ \\frac{\\partial n_5}{\\partial n_4} = n_2 $\n",
1256 | "\n",
1257 | "\n",
1258 | "$ \\frac{\\partial f}{\\partial n_4} = 1 \\times n_2 = 4 $\n",
1259 | "\n",
1260 | "\n",
1261 | "$ \\frac{\\partial f}{\\partial x} = 24 $\n",
1262 | "\n",
1263 | "\n",
1264 | "$ \\frac{\\partial f}{\\partial y} = 10 $"
1265 | ]
1266 | },
1267 | {
1268 | "cell_type": "markdown",
1269 | "metadata": {},
1270 | "source": [
1271 | "# Appendix E"
1272 | ]
1273 | },
1274 | {
1275 | "cell_type": "markdown",
1276 | "metadata": {},
1277 | "source": [
1278 | "**Equation E-1: Probability that the i^th^ neuron will output 1**\n",
1279 | "\n",
1280 | "$\n",
1281 | "p\\left(s_i^{(\\text{next step})} = 1\\right) \\, = \\, \\sigma\\left(\\frac{\\textstyle \\sum\\limits_{j = 1}^N{w_{i,j}s_j + b_i}}{\\textstyle T}\\right)\n",
1282 | "$\n",
1283 | "\n",
1284 | "**In the text:**\n",
1285 | "\n",
1286 | "$ \\dot{\\mathbf{x}} $\n",
1287 | "\n",
1288 | "\n",
1289 | "$ \\dot{\\mathbf{h}} $\n",
1290 | "\n",
1291 | "\n",
1292 | "**Equation E-2: Contrastive divergence weight update**\n",
1293 | "\n",
1294 | "$\n",
1295 | "w_{i,j}^{(\\text{next step})} = w_{i,j} + \\eta(\\mathbf{x}\\mathbf{h}^T - \\dot{\\mathbf{x}} \\dot {\\mathbf{h}}^T)\n",
1296 | "$"
1297 | ]
1298 | },
1299 | {
1300 | "cell_type": "markdown",
1301 | "metadata": {},
1302 | "source": [
1303 | "# Glossary\n",
1304 | "\n",
1305 | "In the text:\n",
1306 | "\n",
1307 | "$\\ell _1$\n",
1308 | "\n",
1309 | "\n",
1310 | "$\\ell _2$\n",
1311 | "\n",
1312 | "\n",
1313 | "$\\ell _k$\n",
1314 | "\n",
1315 | "\n",
1316 | "$ \\chi^2 $\n"
1317 | ]
1318 | },
1319 | {
1320 | "cell_type": "markdown",
1321 | "metadata": {},
1322 | "source": [
1323 | "Just in case your eyes hurt after all these equations, let's finish with the single most beautiful equation in the world. No, it's not $E = mc²$, it's obviously Euler's identity:"
1324 | ]
1325 | },
1326 | {
1327 | "cell_type": "markdown",
1328 | "metadata": {},
1329 | "source": [
1330 | "$e^{i\\pi}+1=0$"
1331 | ]
1332 | },
1333 | {
1334 | "cell_type": "code",
1335 | "execution_count": null,
1336 | "metadata": {
1337 | "collapsed": true
1338 | },
1339 | "outputs": [],
1340 | "source": []
1341 | }
1342 | ],
1343 | "metadata": {
1344 | "kernelspec": {
1345 | "display_name": "Python 3",
1346 | "language": "python",
1347 | "name": "python3"
1348 | },
1349 | "language_info": {
1350 | "codemirror_mode": {
1351 | "name": "ipython",
1352 | "version": 3
1353 | },
1354 | "file_extension": ".py",
1355 | "mimetype": "text/x-python",
1356 | "name": "python",
1357 | "nbconvert_exporter": "python",
1358 | "pygments_lexer": "ipython3",
1359 | "version": "3.6.5"
1360 | }
1361 | },
1362 | "nbformat": 4,
1363 | "nbformat_minor": 2
1364 | }
1365 |
--------------------------------------------------------------------------------