├── .gitignore
├── README.md
├── notebooks
├── Makefile
├── README.md
├── img
│ ├── .DS_Store
│ ├── 1d_conv.jpeg
│ ├── aae_dim_reduc_2.png
│ ├── ai_ml_dl.png
│ ├── alexnet.png
│ ├── algorithms1.png
│ ├── algorithms10.png
│ ├── algorithms11.png
│ ├── algorithms12.png
│ ├── algorithms13.png
│ ├── algorithms2.png
│ ├── algorithms3.png
│ ├── algorithms4.png
│ ├── algorithms5.png
│ ├── algorithms6.png
│ ├── algorithms7.png
│ ├── algorithms8.png
│ ├── algorithms9.png
│ ├── alphago.jpeg
│ ├── approximation.png
│ ├── breakthrough.jpg
│ ├── cartoonLVmodel.png
│ ├── celebA.png
│ ├── cnns
│ │ ├── .DS_Store
│ │ ├── 2dconv.gif
│ │ ├── alexnet.png
│ │ ├── cnn-filters-all.png
│ │ ├── cnn-filters1.png
│ │ ├── cnn-filters2.png
│ │ ├── cnn-filters4.png
│ │ ├── cnn.png
│ │ ├── convlayer.png
│ │ ├── flybrain.gif
│ │ ├── hierarchy.jpg
│ │ ├── hog.jpg
│ │ ├── hubel.ppm
│ │ ├── imagecaption.png
│ │ ├── imagenet.png
│ │ ├── layer1.png
│ │ ├── layer2.png
│ │ ├── layer3.png
│ │ ├── layer45.png
│ │ ├── lenet.png
│ │ ├── levels.png
│ │ ├── pathway.png
│ │ ├── pooling.jpg
│ │ ├── resnet-block-im.png
│ │ ├── resnet-block.png
│ │ ├── sift.jpg
│ │ ├── stylegan.png
│ │ └── unet.png
│ ├── coordinate_descent.png
│ ├── cornell_tech1.png
│ ├── cornell_tech2.svg
│ ├── cornell_tech3.png
│ ├── cornell_tech4.jpg
│ ├── cornell_tech5.png
│ ├── darts.png
│ ├── dcgan_feats.png
│ ├── decision_tree.png
│ ├── dl_data_curve.png
│ ├── dna_map.jpg
│ ├── functional_gradient.png
│ ├── functional_gradient.pptx
│ ├── google.png
│ ├── google_assistant.png
│ ├── image_captioning.jpeg
│ ├── kmeans_convergence.gif
│ ├── l1-vs-l2-annotated.png
│ ├── l1-vs-l2.png
│ ├── learning.png
│ ├── learning_curve1.png
│ ├── learning_curve2.png
│ ├── learning_curve3.png
│ ├── learning_curve4.png
│ ├── learning_curve5.png
│ ├── learning_curve6.png
│ ├── learning_curve7.png
│ ├── learning_curve8.png
│ ├── life3.0.jpg
│ ├── loss_curve1.png
│ ├── loss_curve2.png
│ ├── loss_curve3.png
│ ├── loss_curve4.png
│ ├── margin.png
│ ├── mogdata.png
│ ├── mogdata2.png
│ ├── mogdata2_v2.png
│ ├── mogdata_v2.png
│ ├── mogdensity.png
│ ├── mogdensity1d.png
│ ├── mogdensity1d_v2.png
│ ├── mogdensity_v2.png
│ ├── mogdensity_v3.png
│ ├── nns
│ │ ├── aneuron.jpeg
│ │ ├── backprop
│ │ │ ├── backprop-abstract-figure0.png
│ │ │ ├── backprop-abstract-figure1.png
│ │ │ ├── backprop-abstract-figure2.png
│ │ │ ├── backprop-abstract-figure3.png
│ │ │ ├── backprop-abstract-figure4.png
│ │ │ ├── backprop-abstract-figure5.png
│ │ │ ├── backprop-abstract-figure6.png
│ │ │ ├── backprop-twolayers-figure0.png
│ │ │ ├── backprop-twolayers-figure1.png
│ │ │ ├── backprop-twolayers-figure10.png
│ │ │ ├── backprop-twolayers-figure11.png
│ │ │ ├── backprop-twolayers-figure2.png
│ │ │ ├── backprop-twolayers-figure3.png
│ │ │ ├── backprop-twolayers-figure4.png
│ │ │ ├── backprop-twolayers-figure5.png
│ │ │ ├── backprop-twolayers-figure6.png
│ │ │ ├── backprop-twolayers-figure7.png
│ │ │ ├── backprop-twolayers-figure8.png
│ │ │ ├── backprop-twolayers-figure9.png
│ │ │ └── backprop-twolayers-legend.png
│ │ ├── bio-neuron.png
│ │ ├── bp1.png
│ │ ├── layers-1.png
│ │ ├── layers-2.png
│ │ ├── layers-3.png
│ │ ├── layers-4.png
│ │ ├── layers-L.png
│ │ ├── net-intro.png
│ │ └── nn-example.png
│ ├── obama.jpg
│ ├── obama2.jpg
│ ├── oldfMOG_v2.png
│ ├── oldfSingle_v2.png
│ ├── oldfaithful_v2.png
│ ├── pca_decomposition.png
│ ├── pca_example.png
│ ├── pca_example_plane.png
│ ├── pca_projection1.png
│ ├── pca_projection2.png
│ ├── pca_projection_data.png
│ ├── pca_reconstruction.png
│ ├── pca_two_views.png
│ ├── rl.png
│ ├── siri.png
│ ├── spam.png
│ ├── svm-margin-fig.key
│ ├── tesla.jpg
│ ├── tesla_data.png
│ ├── tesla_zoom.jpg
│ └── waymo.jpg
├── lecture1-introduction.ipynb
├── lecture10-svm-dual.ipynb
├── lecture11-kernels.ipynb
├── lecture12-decision-trees.ipynb
├── lecture12-neural-networks.ipynb
├── lecture13-boosting.ipynb
├── lecture14-neural-networks.ipynb
├── lecture15-deep-learning.ipynb
├── lecture16-unsupervised-learning.ipynb
├── lecture17-density-estimation.ipynb
├── lecture18-clustering.ipynb
├── lecture19-dimensionality-reduction.bak.ipynb
├── lecture19-dimensionality-reduction.ipynb
├── lecture2-supervised-learning.ipynb
├── lecture20-evaluation.ipynb
├── lecture21-model-iteration.ipynb
├── lecture22-evaluation-tools.ipynb
├── lecture23-overview.ipynb
├── lecture3-linear-regression.ipynb
├── lecture4-foundations-supervised-learning.ipynb
├── lecture5-maximum-likelihood.ipynb
├── lecture5b-probabilistic-perspectives.ipynb
├── lecture6-classification.ipynb
├── lecture7-generative-models.ipynb
├── lecture8-naive-bayes.ipynb
└── lecture9-support-vector-machines.ipynb
└── requirements.txt
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Applied Machine Learning (Cornell CS5785)
2 |
3 | This repo contains executable course notes and slides for the Applied ML course at Cornell and Cornell Tech.
4 |
5 |
8 |
9 | These materials accompany a set of Youtube [lecture videos](https://www.youtube.com/watch?v=vcE9WGbi4QY&list=PL2UML_KCiC0UlY7iCQDSiGDMovaupqc83) from the Fall 2020 edition of the course.
10 |
11 | ## Contents
12 |
13 | This repo is organized as follows.
14 |
15 | ```
16 | .
17 | ├── README.md
18 | ├── notebooks # Notebooks and slides
19 | └── requirements.txt # Packages needed for your virtualenv
20 | ```
21 |
22 | ## Setup
23 |
24 | ### Requirements
25 |
26 | You should be able to run all the contents of this repo using the packages provided in `requirements.txt`.
27 |
28 | In a new `virtualenv`, run this:
29 | ```
30 | pip install -r requirements.txt
31 | ```
32 |
33 | ## Feedback
34 |
35 | Please send feedback to [Volodymyr Kuleshov](https://www.cs.cornell.edu/~kuleshov/)
36 |
--------------------------------------------------------------------------------
/notebooks/Makefile:
--------------------------------------------------------------------------------
1 | #LECTURES:=lecture1 lecture2 lecture3 lecture4 lecture5b lecture6
2 |
3 | NOTEBOOKS:=$(wildcard *.ipynb)
4 | SLIDES:=$(subst ipynb,slides.html,$(NOTEBOOKS))
5 | HTML:=$(subst ipynb,html,$(NOTEBOOKS))
6 | PDFS:=$(subst ipynb,pdf,$(NOTEBOOKS))
7 |
8 | $(SLIDES): %.slides.html: %.ipynb
9 | jupyter nbconvert $< --to slides --reveal-prefix "http://cdnjs.cloudflare.com/ajax/libs/reveal.js/3.3.0"
10 |
11 | $(HTML): %.html: %.ipynb
12 | jupyter nbconvert $< --to html_embed --reveal-prefix "http://cdnjs.cloudflare.com/ajax/libs/reveal.js/3.3.0"
13 |
14 | $(PDFS): %.pdf: %.ipynb
15 | jupyter nbconvert $< --to pdf --reveal-prefix "http://cdnjs.cloudflare.com/ajax/libs/reveal.js/3.3.0"
16 |
17 | notes:
18 | make $(HTML)
19 |
20 | slides:
21 | make $(SLIDES)
22 |
23 | pdfs:
24 | make $(PDFS)
25 |
26 | clean:
27 | rm *.html *.pdf
28 |
29 | all:
30 | make notes slides pdfs
31 |
--------------------------------------------------------------------------------
/notebooks/README.md:
--------------------------------------------------------------------------------
1 | # Applied Machine Learning (Cornell CS5785): Notebooks and Slides
2 |
3 | This repo contains executable course notes and slides for the Applied ML course at Cornell and Cornell Tech.
4 |
5 | There are four types of files you can obtain from this folder. These contain all the materials shown in the video lectures.
6 | * Exectuable notebooks in `*.ipynb` format.
7 | * Slides used in the video lectures in HTML format and powered by Reveal.js `*.slides.html`
8 | * Portable course notes in `*.html` format; images are embedded in the notes.
9 | * Portable `*.pdf` files
10 |
11 | All three types of files have the same content, they're just in a different format.
12 |
13 | The Jupyter notebooks are fully interactive and you can use them to regenerate all the materials for the course.
14 |
15 | ## Setup
16 |
17 | In order to generate all the files, use the provded Makefile.
18 | ```
19 | make all
20 | ```
21 |
22 | If you want to generate each type of file separately, you can type
23 | ```
24 | make notes
25 | make slides
26 | make pdfs
27 | ```
28 |
29 | You can also generate specific files, e.g.:
30 | ```
31 | make lecture15-deep-learning.slides.html
32 | ```
33 |
34 | To reset the repo and remove the generated files, do this:
35 | ```
36 | make clean
37 | ```
38 |
39 | ### Requirements
40 |
41 | You should be able to run all the contents of this repo using the packages provided in `requirements.txt` at the root of the repo.
42 |
43 | In a new `virtualenv`, run this:
44 | ```
45 | pip install -r requirements.txt
46 | ```
47 |
48 | ## Issues
49 |
50 | Currently, there are issues in linking images to the contents:
51 | * The `*.slides.html` must be located in the same folder as the `img` folder in order to display images.
52 | * The `*.pdf` files do not contain images due a limitation of `nbconvert`.
53 |
54 | However the `*.html` notes are fully portable.
55 |
56 | ## Feedback
57 |
58 | Please send feedback to [Volodymyr Kuleshov](https://www.cs.cornell.edu/~kuleshov/)
59 |
--------------------------------------------------------------------------------
/notebooks/img/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/.DS_Store
--------------------------------------------------------------------------------
/notebooks/img/1d_conv.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/1d_conv.jpeg
--------------------------------------------------------------------------------
/notebooks/img/aae_dim_reduc_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/aae_dim_reduc_2.png
--------------------------------------------------------------------------------
/notebooks/img/ai_ml_dl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/ai_ml_dl.png
--------------------------------------------------------------------------------
/notebooks/img/alexnet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/alexnet.png
--------------------------------------------------------------------------------
/notebooks/img/algorithms1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/algorithms1.png
--------------------------------------------------------------------------------
/notebooks/img/algorithms10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/algorithms10.png
--------------------------------------------------------------------------------
/notebooks/img/algorithms11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/algorithms11.png
--------------------------------------------------------------------------------
/notebooks/img/algorithms12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/algorithms12.png
--------------------------------------------------------------------------------
/notebooks/img/algorithms13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/algorithms13.png
--------------------------------------------------------------------------------
/notebooks/img/algorithms2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/algorithms2.png
--------------------------------------------------------------------------------
/notebooks/img/algorithms3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/algorithms3.png
--------------------------------------------------------------------------------
/notebooks/img/algorithms4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/algorithms4.png
--------------------------------------------------------------------------------
/notebooks/img/algorithms5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/algorithms5.png
--------------------------------------------------------------------------------
/notebooks/img/algorithms6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/algorithms6.png
--------------------------------------------------------------------------------
/notebooks/img/algorithms7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/algorithms7.png
--------------------------------------------------------------------------------
/notebooks/img/algorithms8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/algorithms8.png
--------------------------------------------------------------------------------
/notebooks/img/algorithms9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/algorithms9.png
--------------------------------------------------------------------------------
/notebooks/img/alphago.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/alphago.jpeg
--------------------------------------------------------------------------------
/notebooks/img/approximation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/approximation.png
--------------------------------------------------------------------------------
/notebooks/img/breakthrough.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/breakthrough.jpg
--------------------------------------------------------------------------------
/notebooks/img/cartoonLVmodel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cartoonLVmodel.png
--------------------------------------------------------------------------------
/notebooks/img/celebA.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/celebA.png
--------------------------------------------------------------------------------
/notebooks/img/cnns/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/.DS_Store
--------------------------------------------------------------------------------
/notebooks/img/cnns/2dconv.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/2dconv.gif
--------------------------------------------------------------------------------
/notebooks/img/cnns/alexnet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/alexnet.png
--------------------------------------------------------------------------------
/notebooks/img/cnns/cnn-filters-all.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/cnn-filters-all.png
--------------------------------------------------------------------------------
/notebooks/img/cnns/cnn-filters1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/cnn-filters1.png
--------------------------------------------------------------------------------
/notebooks/img/cnns/cnn-filters2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/cnn-filters2.png
--------------------------------------------------------------------------------
/notebooks/img/cnns/cnn-filters4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/cnn-filters4.png
--------------------------------------------------------------------------------
/notebooks/img/cnns/cnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/cnn.png
--------------------------------------------------------------------------------
/notebooks/img/cnns/convlayer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/convlayer.png
--------------------------------------------------------------------------------
/notebooks/img/cnns/flybrain.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/flybrain.gif
--------------------------------------------------------------------------------
/notebooks/img/cnns/hierarchy.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/hierarchy.jpg
--------------------------------------------------------------------------------
/notebooks/img/cnns/hog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/hog.jpg
--------------------------------------------------------------------------------
/notebooks/img/cnns/hubel.ppm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/hubel.ppm
--------------------------------------------------------------------------------
/notebooks/img/cnns/imagecaption.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/imagecaption.png
--------------------------------------------------------------------------------
/notebooks/img/cnns/imagenet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/imagenet.png
--------------------------------------------------------------------------------
/notebooks/img/cnns/layer1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/layer1.png
--------------------------------------------------------------------------------
/notebooks/img/cnns/layer2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/layer2.png
--------------------------------------------------------------------------------
/notebooks/img/cnns/layer3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/layer3.png
--------------------------------------------------------------------------------
/notebooks/img/cnns/layer45.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/layer45.png
--------------------------------------------------------------------------------
/notebooks/img/cnns/lenet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/lenet.png
--------------------------------------------------------------------------------
/notebooks/img/cnns/levels.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/levels.png
--------------------------------------------------------------------------------
/notebooks/img/cnns/pathway.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/pathway.png
--------------------------------------------------------------------------------
/notebooks/img/cnns/pooling.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/pooling.jpg
--------------------------------------------------------------------------------
/notebooks/img/cnns/resnet-block-im.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/resnet-block-im.png
--------------------------------------------------------------------------------
/notebooks/img/cnns/resnet-block.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/resnet-block.png
--------------------------------------------------------------------------------
/notebooks/img/cnns/sift.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/sift.jpg
--------------------------------------------------------------------------------
/notebooks/img/cnns/stylegan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/stylegan.png
--------------------------------------------------------------------------------
/notebooks/img/cnns/unet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/unet.png
--------------------------------------------------------------------------------
/notebooks/img/coordinate_descent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/coordinate_descent.png
--------------------------------------------------------------------------------
/notebooks/img/cornell_tech1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cornell_tech1.png
--------------------------------------------------------------------------------
/notebooks/img/cornell_tech2.svg:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/notebooks/img/cornell_tech3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cornell_tech3.png
--------------------------------------------------------------------------------
/notebooks/img/cornell_tech4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cornell_tech4.jpg
--------------------------------------------------------------------------------
/notebooks/img/cornell_tech5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cornell_tech5.png
--------------------------------------------------------------------------------
/notebooks/img/darts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/darts.png
--------------------------------------------------------------------------------
/notebooks/img/dcgan_feats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/dcgan_feats.png
--------------------------------------------------------------------------------
/notebooks/img/decision_tree.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/decision_tree.png
--------------------------------------------------------------------------------
/notebooks/img/dl_data_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/dl_data_curve.png
--------------------------------------------------------------------------------
/notebooks/img/dna_map.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/dna_map.jpg
--------------------------------------------------------------------------------
/notebooks/img/functional_gradient.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/functional_gradient.png
--------------------------------------------------------------------------------
/notebooks/img/functional_gradient.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/functional_gradient.pptx
--------------------------------------------------------------------------------
/notebooks/img/google.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/google.png
--------------------------------------------------------------------------------
/notebooks/img/google_assistant.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/google_assistant.png
--------------------------------------------------------------------------------
/notebooks/img/image_captioning.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/image_captioning.jpeg
--------------------------------------------------------------------------------
/notebooks/img/kmeans_convergence.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/kmeans_convergence.gif
--------------------------------------------------------------------------------
/notebooks/img/l1-vs-l2-annotated.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/l1-vs-l2-annotated.png
--------------------------------------------------------------------------------
/notebooks/img/l1-vs-l2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/l1-vs-l2.png
--------------------------------------------------------------------------------
/notebooks/img/learning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/learning.png
--------------------------------------------------------------------------------
/notebooks/img/learning_curve1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/learning_curve1.png
--------------------------------------------------------------------------------
/notebooks/img/learning_curve2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/learning_curve2.png
--------------------------------------------------------------------------------
/notebooks/img/learning_curve3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/learning_curve3.png
--------------------------------------------------------------------------------
/notebooks/img/learning_curve4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/learning_curve4.png
--------------------------------------------------------------------------------
/notebooks/img/learning_curve5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/learning_curve5.png
--------------------------------------------------------------------------------
/notebooks/img/learning_curve6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/learning_curve6.png
--------------------------------------------------------------------------------
/notebooks/img/learning_curve7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/learning_curve7.png
--------------------------------------------------------------------------------
/notebooks/img/learning_curve8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/learning_curve8.png
--------------------------------------------------------------------------------
/notebooks/img/life3.0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/life3.0.jpg
--------------------------------------------------------------------------------
/notebooks/img/loss_curve1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/loss_curve1.png
--------------------------------------------------------------------------------
/notebooks/img/loss_curve2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/loss_curve2.png
--------------------------------------------------------------------------------
/notebooks/img/loss_curve3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/loss_curve3.png
--------------------------------------------------------------------------------
/notebooks/img/loss_curve4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/loss_curve4.png
--------------------------------------------------------------------------------
/notebooks/img/margin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/margin.png
--------------------------------------------------------------------------------
/notebooks/img/mogdata.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/mogdata.png
--------------------------------------------------------------------------------
/notebooks/img/mogdata2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/mogdata2.png
--------------------------------------------------------------------------------
/notebooks/img/mogdata2_v2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/mogdata2_v2.png
--------------------------------------------------------------------------------
/notebooks/img/mogdata_v2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/mogdata_v2.png
--------------------------------------------------------------------------------
/notebooks/img/mogdensity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/mogdensity.png
--------------------------------------------------------------------------------
/notebooks/img/mogdensity1d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/mogdensity1d.png
--------------------------------------------------------------------------------
/notebooks/img/mogdensity1d_v2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/mogdensity1d_v2.png
--------------------------------------------------------------------------------
/notebooks/img/mogdensity_v2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/mogdensity_v2.png
--------------------------------------------------------------------------------
/notebooks/img/mogdensity_v3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/mogdensity_v3.png
--------------------------------------------------------------------------------
/notebooks/img/nns/aneuron.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/aneuron.jpeg
--------------------------------------------------------------------------------
/notebooks/img/nns/backprop/backprop-abstract-figure0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-abstract-figure0.png
--------------------------------------------------------------------------------
/notebooks/img/nns/backprop/backprop-abstract-figure1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-abstract-figure1.png
--------------------------------------------------------------------------------
/notebooks/img/nns/backprop/backprop-abstract-figure2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-abstract-figure2.png
--------------------------------------------------------------------------------
/notebooks/img/nns/backprop/backprop-abstract-figure3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-abstract-figure3.png
--------------------------------------------------------------------------------
/notebooks/img/nns/backprop/backprop-abstract-figure4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-abstract-figure4.png
--------------------------------------------------------------------------------
/notebooks/img/nns/backprop/backprop-abstract-figure5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-abstract-figure5.png
--------------------------------------------------------------------------------
/notebooks/img/nns/backprop/backprop-abstract-figure6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-abstract-figure6.png
--------------------------------------------------------------------------------
/notebooks/img/nns/backprop/backprop-twolayers-figure0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-twolayers-figure0.png
--------------------------------------------------------------------------------
/notebooks/img/nns/backprop/backprop-twolayers-figure1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-twolayers-figure1.png
--------------------------------------------------------------------------------
/notebooks/img/nns/backprop/backprop-twolayers-figure10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-twolayers-figure10.png
--------------------------------------------------------------------------------
/notebooks/img/nns/backprop/backprop-twolayers-figure11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-twolayers-figure11.png
--------------------------------------------------------------------------------
/notebooks/img/nns/backprop/backprop-twolayers-figure2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-twolayers-figure2.png
--------------------------------------------------------------------------------
/notebooks/img/nns/backprop/backprop-twolayers-figure3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-twolayers-figure3.png
--------------------------------------------------------------------------------
/notebooks/img/nns/backprop/backprop-twolayers-figure4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-twolayers-figure4.png
--------------------------------------------------------------------------------
/notebooks/img/nns/backprop/backprop-twolayers-figure5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-twolayers-figure5.png
--------------------------------------------------------------------------------
/notebooks/img/nns/backprop/backprop-twolayers-figure6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-twolayers-figure6.png
--------------------------------------------------------------------------------
/notebooks/img/nns/backprop/backprop-twolayers-figure7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-twolayers-figure7.png
--------------------------------------------------------------------------------
/notebooks/img/nns/backprop/backprop-twolayers-figure8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-twolayers-figure8.png
--------------------------------------------------------------------------------
/notebooks/img/nns/backprop/backprop-twolayers-figure9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-twolayers-figure9.png
--------------------------------------------------------------------------------
/notebooks/img/nns/backprop/backprop-twolayers-legend.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-twolayers-legend.png
--------------------------------------------------------------------------------
/notebooks/img/nns/bio-neuron.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/bio-neuron.png
--------------------------------------------------------------------------------
/notebooks/img/nns/bp1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/bp1.png
--------------------------------------------------------------------------------
/notebooks/img/nns/layers-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/layers-1.png
--------------------------------------------------------------------------------
/notebooks/img/nns/layers-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/layers-2.png
--------------------------------------------------------------------------------
/notebooks/img/nns/layers-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/layers-3.png
--------------------------------------------------------------------------------
/notebooks/img/nns/layers-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/layers-4.png
--------------------------------------------------------------------------------
/notebooks/img/nns/layers-L.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/layers-L.png
--------------------------------------------------------------------------------
/notebooks/img/nns/net-intro.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/net-intro.png
--------------------------------------------------------------------------------
/notebooks/img/nns/nn-example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/nn-example.png
--------------------------------------------------------------------------------
/notebooks/img/obama.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/obama.jpg
--------------------------------------------------------------------------------
/notebooks/img/obama2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/obama2.jpg
--------------------------------------------------------------------------------
/notebooks/img/oldfMOG_v2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/oldfMOG_v2.png
--------------------------------------------------------------------------------
/notebooks/img/oldfSingle_v2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/oldfSingle_v2.png
--------------------------------------------------------------------------------
/notebooks/img/oldfaithful_v2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/oldfaithful_v2.png
--------------------------------------------------------------------------------
/notebooks/img/pca_decomposition.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/pca_decomposition.png
--------------------------------------------------------------------------------
/notebooks/img/pca_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/pca_example.png
--------------------------------------------------------------------------------
/notebooks/img/pca_example_plane.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/pca_example_plane.png
--------------------------------------------------------------------------------
/notebooks/img/pca_projection1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/pca_projection1.png
--------------------------------------------------------------------------------
/notebooks/img/pca_projection2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/pca_projection2.png
--------------------------------------------------------------------------------
/notebooks/img/pca_projection_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/pca_projection_data.png
--------------------------------------------------------------------------------
/notebooks/img/pca_reconstruction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/pca_reconstruction.png
--------------------------------------------------------------------------------
/notebooks/img/pca_two_views.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/pca_two_views.png
--------------------------------------------------------------------------------
/notebooks/img/rl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/rl.png
--------------------------------------------------------------------------------
/notebooks/img/siri.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/siri.png
--------------------------------------------------------------------------------
/notebooks/img/spam.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/spam.png
--------------------------------------------------------------------------------
/notebooks/img/svm-margin-fig.key:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/svm-margin-fig.key
--------------------------------------------------------------------------------
/notebooks/img/tesla.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/tesla.jpg
--------------------------------------------------------------------------------
/notebooks/img/tesla_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/tesla_data.png
--------------------------------------------------------------------------------
/notebooks/img/tesla_zoom.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/tesla_zoom.jpg
--------------------------------------------------------------------------------
/notebooks/img/waymo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/waymo.jpg
--------------------------------------------------------------------------------
/notebooks/lecture12-neural-networks.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "colab_type": "text",
7 | "id": "i_f5u2x9nn6I",
8 | "slideshow": {
9 | "slide_type": "slide"
10 | }
11 | },
12 | "source": [
13 | "
\n",
14 | "\n",
15 | "# Lecture 13: Neural Networks\n",
16 | "\n",
17 | "### Applied Machine Learning\n",
18 | "\n",
19 | "__Volodymyr Kuleshov, Jin Sun__
Cornell Tech"
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {
25 | "slideshow": {
26 | "slide_type": "slide"
27 | }
28 | },
29 | "source": [
30 | "# Part 1: An Artifical Neuron\n",
31 | "\n",
32 | "In this lecture, we will learn about a new class of machine learning algorithms inspired by the brain.\n",
33 | "\n",
34 | "We will start by defining a few building blocks for these algorithms, and draw connections to neuroscience."
35 | ]
36 | },
37 | {
38 | "cell_type": "markdown",
39 | "metadata": {},
40 | "source": [
41 | "# Review: Components of A Supervised Machine Learning Problem\n",
42 | "\n",
43 | "At a high level, a supervised machine learning problem has the following structure:\n",
44 | "\n",
45 | "$$ \\underbrace{\\text{Training Dataset}}_\\text{Attributes + Features} + \\underbrace{\\text{Learning Algorithm}}_\\text{Model Class + Objective + Optimizer } \\to \\text{Predictive Model} $$\n",
46 | "\n",
47 | "Where does the dataset come from?"
48 | ]
49 | },
50 | {
51 | "cell_type": "markdown",
52 | "metadata": {
53 | "slideshow": {
54 | "slide_type": "slide"
55 | }
56 | },
57 | "source": [
58 | "# Review: Binary Classification\n",
59 | "\n",
60 | "In supervised learning, we fit a model of the form\n",
61 | "$$ f : \\mathcal{X} \\to \\mathcal{Y} $$\n",
62 | "that maps inputs $x \\in \\mathcal{X}$ to targets $y \\in \\mathcal{Y}$."
63 | ]
64 | },
65 | {
66 | "cell_type": "markdown",
67 | "metadata": {
68 | "slideshow": {
69 | "slide_type": "fragment"
70 | }
71 | },
72 | "source": [
73 | "In classification, the space of targets $\\mathcal{Y}$ is *discrete*. Classification is binary if $\\mathcal{Y} = \\{0,1\\}$"
74 | ]
75 | },
76 | {
77 | "cell_type": "markdown",
78 | "metadata": {
79 | "slideshow": {
80 | "slide_type": "fragment"
81 | }
82 | },
83 | "source": [
84 | "Each value of $y$ value is a *class* and we are interested in finding a hyperplane that separates the different classes."
85 | ]
86 | },
87 | {
88 | "cell_type": "markdown",
89 | "metadata": {
90 | "slideshow": {
91 | "slide_type": "slide"
92 | }
93 | },
94 | "source": [
95 | "# Review: Logistic Regression\n",
96 | "\n",
97 | "Logistic regression fits a model of the form\n",
98 | "$$ f(x) = \\sigma(\\theta^\\top x) = \\frac{1}{1 + \\exp(-\\theta^\\top x)}, $$\n",
99 | "where\n",
100 | "$$ \\sigma(z) = \\frac{1}{1 + \\exp(-z)} $$\n",
101 | "is known as the *sigmoid* or *logistic* function."
102 | ]
103 | },
104 | {
105 | "cell_type": "markdown",
106 | "metadata": {
107 | "slideshow": {
108 | "slide_type": "subslide"
109 | }
110 | },
111 | "source": [
112 | "Here is how the logistic function looks like."
113 | ]
114 | },
115 | {
116 | "cell_type": "code",
117 | "execution_count": 1,
118 | "metadata": {
119 | "slideshow": {
120 | "slide_type": "-"
121 | }
122 | },
123 | "outputs": [
124 | {
125 | "data": {
126 | "text/plain": [
127 | "[]"
128 | ]
129 | },
130 | "execution_count": 1,
131 | "metadata": {},
132 | "output_type": "execute_result"
133 | },
134 | {
135 | "data": {
136 | "image/png": "\n",
137 | "text/plain": [
138 | ""
139 | ]
140 | },
141 | "metadata": {
142 | "needs_background": "light"
143 | },
144 | "output_type": "display_data"
145 | }
146 | ],
147 | "source": [
148 | "import numpy as np\n",
149 | "from matplotlib import pyplot as plt\n",
150 | "\n",
151 | "z = np.linspace(-5, 5)\n",
152 | "sigma = 1/(1+np.exp(-z))\n",
153 | "\n",
154 | "plt.plot(z, sigma)"
155 | ]
156 | },
157 | {
158 | "cell_type": "markdown",
159 | "metadata": {},
160 | "source": [
161 | "# A Biological Neuron\n",
162 | "\n",
163 | "In order to define an artifical neuron, let's look first a biological one.\n",
164 | "\n",
165 | "TODO: PUT NEURON IMAGE FROM HERE: https://cs231n.github.io/neural-networks-1/\n",
166 | "\n",
167 | "* Each neuron receives input signals from its dendrites\n",
168 | "* It produces output signals along its axon, which connects to the dendrites of other neurons."
169 | ]
170 | },
171 | {
172 | "cell_type": "markdown",
173 | "metadata": {},
174 | "source": [
175 | "# An Artificial Neuron: Example\n",
176 | "\n",
177 | "We can imitate this machinery using an idealized artifical neuron.\n",
178 | "* The neuron receives signals $x_j$ at dendrites, which are modulated multiplicatively: $w_j \\cdot x_j$.\n",
179 | "* The body of the neuron sums the modulated inputs: $\\sum_{j=1}^d w_j \\cdot x_j$.\n",
180 | "* These go into the activation function that produces an ouput.\n",
181 | "\n",
182 | "TODO: PUT ARTIFICIAL NEURON IMAGE FROM HERE: https://cs231n.github.io/neural-networks-1/"
183 | ]
184 | },
185 | {
186 | "cell_type": "markdown",
187 | "metadata": {},
188 | "source": [
189 | "# An Artificial Neuron: Notation\n",
190 | "\n",
191 | "More formally, we say that a neuron is a model $f : \\mathbb{R}^d \\to [0,1]$, with the following components:\n",
192 | "* Inputs $x_1,x_2,...,x_d$, denoted by a vector $x$.\n",
193 | "* Weight vector $w \\in \\mathbb{R}^d$ that modulates input $x$ as $w^\\top x$.\n",
194 | "* An activation function $\\sigma: \\mathbb{R} \\to \\mathbb{R}$ that computes the output $\\sigma(w^\\top x)$ of the neuron based on the sum of modulated features $w^\\top x$."
195 | ]
196 | },
197 | {
198 | "cell_type": "markdown",
199 | "metadata": {},
200 | "source": [
201 | "# Logistic Regression as an Artifical Neuron\n",
202 | "\n",
203 | "Logistic regression is a model of the form\n",
204 | "$$ f(x) = \\sigma(\\theta^\\top x) = \\frac{1}{1 + \\exp(-\\theta^\\top x)}, $$\n",
205 | "that can be interpreted as a neuron that uses the *sigmoid* as the activation function."
206 | ]
207 | },
208 | {
209 | "cell_type": "markdown",
210 | "metadata": {},
211 | "source": [
212 | "# Perceptron\n",
213 | "\n",
214 | "Another model of a neuron."
215 | ]
216 | },
217 | {
218 | "cell_type": "markdown",
219 | "metadata": {},
220 | "source": [
221 | "# Example\n",
222 | "\n",
223 | "Need to implement a small example. Can probably copy-paste implementation of LR from the LR slides."
224 | ]
225 | },
226 | {
227 | "cell_type": "markdown",
228 | "metadata": {},
229 | "source": [
230 | "# Activation Functions\n",
231 | "\n",
232 | "Let's list a few."
233 | ]
234 | },
235 | {
236 | "cell_type": "markdown",
237 | "metadata": {},
238 | "source": [
239 | "
\n",
240 | "# Part 2: Artificial Neural Networks\n",
241 | "\n",
242 | "Let's now see how we can connect neurons into networks that form complex models that further mimic the brain."
243 | ]
244 | },
245 | {
246 | "cell_type": "markdown",
247 | "metadata": {},
248 | "source": [
249 | "# Review: Artificial Neuron\n",
250 | "\n",
251 | "We say that a neuron is a model $f : \\mathbb{R}^d \\to [0,1]$, with the following components:\n",
252 | "* Inputs $x_1,x_2,...,x_d$, denoted by a vector $x$.\n",
253 | "* Weight vector $w \\in \\mathbb{R}^d$ that modulates input $x$ as $w^\\top x$.\n",
254 | "* An activation function $\\sigma: \\mathbb{R} \\to \\mathbb{R}$ that computes the output $\\sigma(w^\\top x)$ of the neuron based on the sum of modulated features $w^\\top x$."
255 | ]
256 | },
257 | {
258 | "cell_type": "markdown",
259 | "metadata": {},
260 | "source": [
261 | "# Review: Logistic Regression as Neuron\n",
262 | "\n",
263 | "Logistic regression is a model of the form\n",
264 | "$$ f(x) = \\sigma(\\theta^\\top x) = \\frac{1}{1 + \\exp(-\\theta^\\top x)}, $$\n",
265 | "that can be interpreted as a neuron that uses the *sigmoid* as the activation function."
266 | ]
267 | },
268 | {
269 | "cell_type": "markdown",
270 | "metadata": {},
271 | "source": [
272 | "# Neural Networks: Intuition\n",
273 | "\n",
274 | "A neural network is a directed graph in which a node is a neuron that takes as input the outputs of the neurons that are connected to it.\n",
275 | "\n",
276 | "TODO: Add an image here. Maybe layer image from here: https://cs231n.github.io/neural-networks-1/ (It probably needs some annotations)\n",
277 | "\n",
278 | "Networks are typically organized in layers."
279 | ]
280 | },
281 | {
282 | "cell_type": "markdown",
283 | "metadata": {
284 | "slideshow": {
285 | "slide_type": "slide"
286 | }
287 | },
288 | "source": [
289 | "# Neural Networks: Layers\n",
290 | "\n",
291 | "A neural network layer is a model $f : \\mathbb{R}^d \\to \\mathbb{R}^p$ that applies $p$ neurons ih parallel to an input $x$.\n",
292 | "$$ f(x) = \\begin{bmatrix}\n",
293 | "\\sigma(w_1^\\top x) \\\\\n",
294 | "\\sigma(w_2^\\top x) \\\\\n",
295 | "\\vdots \\\\\n",
296 | "\\sigma(w_p^\\top x)\n",
297 | "\\end{bmatrix}.\n",
298 | "$$\n",
299 | "where each $w_k$ is the vector of weights for the $k$-th neuron. We refer to $p$ as the *size* of the layer."
300 | ]
301 | },
302 | {
303 | "cell_type": "markdown",
304 | "metadata": {
305 | "slideshow": {
306 | "slide_type": "subslide"
307 | }
308 | },
309 | "source": [
310 | "By combining the $w_k$ into one matrix $W$, we can write in a more succinct vectorized form:\n",
311 | "$$f(x) = \\sigma(W\\cdot x) = \\begin{bmatrix}\n",
312 | "\\sigma(w_1^\\top x) \\\\\n",
313 | "\\sigma(w_2^\\top x) \\\\\n",
314 | "\\vdots \\\\\n",
315 | "\\sigma(w_p^\\top x)\n",
316 | "\\end{bmatrix},\n",
317 | "$$\n",
318 | "where $\\sigma(W\\cdot x)_k = \\sigma(w_k^\\top x)$ and $W_{kj} = (w_k)_j$. "
319 | ]
320 | },
321 | {
322 | "cell_type": "markdown",
323 | "metadata": {},
324 | "source": [
325 | "# Neural Networks: Notation\n",
326 | "\n",
327 | "A neural network is a model $f : \\mathbb{R} \\to \\mathbb{R}$ that consists of a composition of $L$ neural network layers:\n",
328 | "$$ f(x) = f_L \\circ f_{L-1} \\circ \\ldots f_1 (x). $$\n",
329 | "The final layer $f_L$ has size one (assuming the neural net has one ouput); intermediary layers $f_l$ can have any number of neurons.\n",
330 | "\n",
331 | "The notation $f \\circ g(x)$ denotes the composition $f(g(x))$ of functions"
332 | ]
333 | },
334 | {
335 | "cell_type": "markdown",
336 | "metadata": {},
337 | "source": [
338 | "# Example of a Neural Network\n",
339 | "\n",
340 | "* Let's implement a small neural net in the same that we implemented logistic regression\n",
341 | "* Then we just run it"
342 | ]
343 | },
344 | {
345 | "cell_type": "markdown",
346 | "metadata": {},
347 | "source": [
348 | "# Types of Neural Network Layers\n",
349 | "\n",
350 | "There are many types of neural network layers that can exist. Here are a few:\n",
351 | "* Ouput layer: normally has one neuron and special activation function that depends on the problem\n",
352 | "* Input layer: normally, this is just the input vector $x$.\n",
353 | "* Hidden layer: Any layer between input and output.\n",
354 | "* Dense layer: A layer in which every input is connected ot every neuron.\n",
355 | "* Convolutional layer: A layer in which the operation $w^\\top x$ implements a mathematical [convolution](https://en.wikipedia.org/wiki/Convolution).\n",
356 | "* Anything else?"
357 | ]
358 | },
359 | {
360 | "cell_type": "markdown",
361 | "metadata": {},
362 | "source": [
363 | "# Neuroscience Angle\n",
364 | "\n",
365 | "Annything we should say here?"
366 | ]
367 | },
368 | {
369 | "cell_type": "markdown",
370 | "metadata": {},
371 | "source": [
372 | "
\n",
373 | "# Part 3: Backpropagation\n",
374 | "\n",
375 | "We have defined what is an artificial neural network.\n",
376 | "\n",
377 | "Let's not see how we can train it."
378 | ]
379 | },
380 | {
381 | "cell_type": "markdown",
382 | "metadata": {},
383 | "source": [
384 | "# Review: Neural Network Layers\n",
385 | "\n",
386 | "A neural network layer is a model $f : \\mathbb{R}^d \\to \\mathbb{R}^p$ that applies $p$ neurons ih parallel to an input $x$.\n",
387 | "$$f(x) = \\sigma(W\\cdot x) = \\begin{bmatrix}\n",
388 | "\\sigma(w_1^\\top x) \\\\\n",
389 | "\\sigma(w_2^\\top x) \\\\\n",
390 | "\\vdots \\\\\n",
391 | "\\sigma(w_p^\\top x)\n",
392 | "\\end{bmatrix},\n",
393 | "$$\n",
394 | "where each $w_k$ is the vector of weights for the $k$-th neuron and $W_{kj} = (w_k)_j$. We refer to $p$ as the *size* of the layer."
395 | ]
396 | },
397 | {
398 | "cell_type": "markdown",
399 | "metadata": {},
400 | "source": [
401 | "# Review: Neural Networks\n",
402 | "\n",
403 | "A neural network is a model $f : \\mathbb{R} \\to \\mathbb{R}$ that consists of a composition of $L$ neural network layers:\n",
404 | "$$ f(x) = f_L \\circ f_{L-1} \\circ \\ldots f_1 (x). $$\n",
405 | "The final layer $f_L$ has size one (assuming the neural net has one ouput); intermediary layers $f_l$ can have any number of neurons.\n",
406 | "\n",
407 | "The notation $f \\circ g(x)$ denotes the composition $f(g(x))$ of functions\n",
408 | "\n",
409 | "TODO: Add some kind of image from the previous part of the lecture"
410 | ]
411 | },
412 | {
413 | "cell_type": "markdown",
414 | "metadata": {},
415 | "source": [
416 | "# Review: The Gradient\n",
417 | "\n",
418 | "The gradient $\\nabla_\\theta f$ further extends the derivative to multivariate functions $f : \\mathbb{R}^d \\to \\mathbb{R}$, and is defined at a point $\\theta_0$ as\n",
419 | "\n",
420 | "$$ \\nabla_\\theta f (\\theta_0) = \\begin{bmatrix}\n",
421 | "\\frac{\\partial f(\\theta_0)}{\\partial \\theta_1} \\\\\n",
422 | "\\frac{\\partial f(\\theta_0)}{\\partial \\theta_2} \\\\\n",
423 | "\\vdots \\\\\n",
424 | "\\frac{\\partial f(\\theta_0)}{\\partial \\theta_d}\n",
425 | "\\end{bmatrix}.$$\n",
426 | "\n",
427 | "In other words, the $j$-th entry of the vector $\\nabla_\\theta f (\\theta_0)$ is the partial derivative $\\frac{\\partial f(\\theta_0)}{\\partial \\theta_j}$ of $f$ with respect to the $j$-th component of $\\theta$."
428 | ]
429 | },
430 | {
431 | "cell_type": "markdown",
432 | "metadata": {},
433 | "source": [
434 | "# Review: Gradient Descent\n",
435 | "\n",
436 | "If we want to optimize an objective $J(\\theta)$, we start with an initial guess $\\theta_0$ for the parameters and repeat the following update until the function is no longer decreasing:\n",
437 | "$$ \\theta_i := \\theta_{i-1} - \\alpha \\cdot \\nabla_\\theta J(\\theta_{i-1}). $$\n",
438 | "\n",
439 | "As code, this method may look as follows:\n",
440 | "```python\n",
441 | "theta, theta_prev = random_initialization()\n",
442 | "while abs(J(theta) - J(theta_prev)) > conv_threshold:\n",
443 | " theta_prev = theta\n",
444 | " theta = theta_prev - step_size * gradient(theta_prev)\n",
445 | "```"
446 | ]
447 | },
448 | {
449 | "cell_type": "markdown",
450 | "metadata": {},
451 | "source": [
452 | "# Backpropagation\n",
453 | "\n",
454 | "How do we apply gradient descent to a neural network?\n",
455 | "\n",
456 | "Explain backpropgation"
457 | ]
458 | },
459 | {
460 | "cell_type": "markdown",
461 | "metadata": {},
462 | "source": [
463 | "# Review: Chain Rule of Calculus\n",
464 | "\n",
465 | "Probably will need to review this at some point."
466 | ]
467 | },
468 | {
469 | "cell_type": "markdown",
470 | "metadata": {},
471 | "source": [
472 | "# Example\n",
473 | "\n",
474 | "Let's implement backprop with the simple NN model we had earlier."
475 | ]
476 | },
477 | {
478 | "cell_type": "markdown",
479 | "metadata": {},
480 | "source": [
481 | "
\n",
482 | "# Part 4: Stochastic Gradient Descent\n",
483 | "\n",
484 | "In practice, neural networks are often trained on very large datasets.\n",
485 | "\n",
486 | "This requires a mosification to the gradient descent algorithm that we have seen earlier."
487 | ]
488 | },
489 | {
490 | "cell_type": "markdown",
491 | "metadata": {},
492 | "source": [
493 | "Volodymyr will create this section"
494 | ]
495 | },
496 | {
497 | "cell_type": "code",
498 | "execution_count": null,
499 | "metadata": {},
500 | "outputs": [],
501 | "source": []
502 | }
503 | ],
504 | "metadata": {
505 | "accelerator": "GPU",
506 | "celltoolbar": "Slideshow",
507 | "colab": {
508 | "collapsed_sections": [],
509 | "name": "neural-ode.ipynb",
510 | "provenance": []
511 | },
512 | "kernelspec": {
513 | "display_name": "aml",
514 | "language": "python",
515 | "name": "aml"
516 | },
517 | "language_info": {
518 | "codemirror_mode": {
519 | "name": "ipython",
520 | "version": 3
521 | },
522 | "file_extension": ".py",
523 | "mimetype": "text/x-python",
524 | "name": "python",
525 | "nbconvert_exporter": "python",
526 | "pygments_lexer": "ipython3",
527 | "version": "3.6.7"
528 | },
529 | "rise": {
530 | "controlsTutorial": false,
531 | "height": 900,
532 | "help": false,
533 | "margin": 0,
534 | "maxScale": 2,
535 | "minScale": 0.2,
536 | "progress": true,
537 | "scroll": true,
538 | "theme": "simple",
539 | "width": 1200
540 | }
541 | },
542 | "nbformat": 4,
543 | "nbformat_minor": 1
544 | }
545 |
--------------------------------------------------------------------------------
/notebooks/lecture23-overview.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "colab_type": "text",
7 | "id": "i_f5u2x9nn6I",
8 | "slideshow": {
9 | "slide_type": "slide"
10 | }
11 | },
12 | "source": [
13 | "
\n",
14 | "\n",
15 | "# Lecture 23: Course Overview\n",
16 | "\n",
17 | "### Applied Machine Learning\n",
18 | "\n",
19 | "__Volodymyr Kuleshov__
Cornell Tech"
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {
25 | "slideshow": {
26 | "slide_type": "slide"
27 | }
28 | },
29 | "source": [
30 | "# Congratulations on Finishing Applied Machine Learning!\n",
31 | "\n",
32 | "You have made it! This is our last machine learning lecture, in which we will do an overview of the diffrent algorithms seen in the course."
33 | ]
34 | },
35 | {
36 | "cell_type": "markdown",
37 | "metadata": {
38 | "slideshow": {
39 | "slide_type": "slide"
40 | }
41 | },
42 | "source": [
43 | "# A Map of Applied Machine Learning\n",
44 | "\n",
45 | "We will go through the following map of algorithms from the course.\n",
46 | "
"
47 | ]
48 | },
49 | {
50 | "cell_type": "markdown",
51 | "metadata": {
52 | "slideshow": {
53 | "slide_type": "slide"
54 | }
55 | },
56 | "source": [
57 | "# Supervised Machine Learning\n",
58 | "\n",
59 | "At a high level, a supervised machine learning problem has the following structure:\n",
60 | "\n",
61 | "$$ \\underbrace{\\text{Dataset}}_\\text{Features, Attributes} + \\underbrace{\\text{Learning Algorithm}}_\\text{Model Class + Objective + Optimizer} \\to \\text{Predictive Model} $$\n",
62 | "\n",
63 | "The predictive model is chosen to model the relationship between inputs and targets. For instance, it can predict future targets."
64 | ]
65 | },
66 | {
67 | "cell_type": "markdown",
68 | "metadata": {
69 | "slideshow": {
70 | "slide_type": "slide"
71 | }
72 | },
73 | "source": [
74 | "# Linear Regression\n",
75 | "\n",
76 | "In linear regression, we fit a model\n",
77 | "$$ f_\\theta(x) := \\theta^\\top \\phi(x) $$\n",
78 | "that is linear in $\\theta$. \n",
79 | "\n",
80 | "The features $\\phi(x) : \\mathbb{R} \\to \\mathbb{R}^p$ are non-linear may non-linear in $x$ (e.g., polynomial features), allowing us to fit complex functions."
81 | ]
82 | },
83 | {
84 | "cell_type": "markdown",
85 | "metadata": {
86 | "slideshow": {
87 | "slide_type": "subslide"
88 | }
89 | },
90 | "source": [
91 | "
"
92 | ]
93 | },
94 | {
95 | "cell_type": "markdown",
96 | "metadata": {
97 | "slideshow": {
98 | "slide_type": "slide"
99 | }
100 | },
101 | "source": [
102 | "# Overfitting\n",
103 | "\n",
104 | "Overfitting is one of the most common failure modes of machine learning.\n",
105 | "* A very expressive model (a high degree polynomial) fits the training dataset perfectly.\n",
106 | "* The model also makes wildly incorrect prediction outside this dataset, and doesn't generalize."
107 | ]
108 | },
109 | {
110 | "cell_type": "markdown",
111 | "metadata": {
112 | "slideshow": {
113 | "slide_type": "slide"
114 | }
115 | },
116 | "source": [
117 | "# Regularization\n",
118 | "\n",
119 | "The idea of regularization is to penalize complex models that may overfit the data."
120 | ]
121 | },
122 | {
123 | "cell_type": "markdown",
124 | "metadata": {
125 | "slideshow": {
126 | "slide_type": "fragment"
127 | }
128 | },
129 | "source": [
130 | "Regularized least squares optimizes the following objective (__Ridge__).\n",
131 | "$$ J(\\theta) = \\frac{1}{2n} \\sum_{i=1}^n \\left( y^{(i)} - \\theta^\\top \\phi(x^{(i)}) \\right)^2 + \\frac{\\lambda}{2} \\cdot ||\\theta||_2^2. $$\n",
132 | "If we use the L1 norm, we have the __LASSO__."
133 | ]
134 | },
135 | {
136 | "cell_type": "markdown",
137 | "metadata": {
138 | "slideshow": {
139 | "slide_type": "subslide"
140 | }
141 | },
142 | "source": [
143 | "
"
144 | ]
145 | },
146 | {
147 | "cell_type": "markdown",
148 | "metadata": {
149 | "slideshow": {
150 | "slide_type": "slide"
151 | }
152 | },
153 | "source": [
154 | "# Regression vs. Classification\n",
155 | "\n",
156 | "Consider a training dataset $\\mathcal{D} = \\{(x^{(1)}, y^{(1)}), (x^{(2)}, y^{(2)}), \\ldots, (x^{(n)}, y^{(n)})\\}$.\n",
157 | "\n",
158 | "We distinguish between two types of supervised learning problems depnding on the targets $y^{(i)}$. \n",
159 | "\n",
160 | "1. __Regression__: The target variable $y \\in \\mathcal{Y}$ is continuous: $\\mathcal{Y} \\subseteq \\mathbb{R}$.\n",
161 | "2. __Classification__: The target variable $y$ is discrete and takes on one of $K$ possible values: $\\mathcal{Y} = \\{y_1, y_2, \\ldots y_K\\}$. Each discrete value corresponds to a *class* that we want to predict."
162 | ]
163 | },
164 | {
165 | "cell_type": "markdown",
166 | "metadata": {
167 | "slideshow": {
168 | "slide_type": "subslide"
169 | }
170 | },
171 | "source": [
172 | "
"
173 | ]
174 | },
175 | {
176 | "cell_type": "markdown",
177 | "metadata": {
178 | "slideshow": {
179 | "slide_type": "slide"
180 | }
181 | },
182 | "source": [
183 | "# Parametric vs. Non-Parametric Models\n",
184 | "\n",
185 | "Nearest neighbors is an example of a *non-parametric* model.\n",
186 | "* A parametric model $f_\\theta(x) : \\mathcal{X} \\times \\Theta \\to \\mathcal{Y}$ is defined by a finite set of parameters $\\theta \\in \\Theta$ whose dimensionality is constant with respect to the dataset"
187 | ]
188 | },
189 | {
190 | "cell_type": "markdown",
191 | "metadata": {
192 | "slideshow": {
193 | "slide_type": "fragment"
194 | }
195 | },
196 | "source": [
197 | "* In a non-parametric model, the function $f$ uses the entire training dataset to make predictions, and the complexity of the model increases with dataset size."
198 | ]
199 | },
200 | {
201 | "cell_type": "markdown",
202 | "metadata": {
203 | "slideshow": {
204 | "slide_type": "subslide"
205 | }
206 | },
207 | "source": [
208 | "* Non-parametric models have the advantage of not loosing any information at training time. \n",
209 | "* However, they are also computationally less tractable and may easily overfit the training set."
210 | ]
211 | },
212 | {
213 | "cell_type": "markdown",
214 | "metadata": {
215 | "slideshow": {
216 | "slide_type": "slide"
217 | }
218 | },
219 | "source": [
220 | "# Probabilistic vs. Non-Probabilistic Models\n",
221 | "\n",
222 | "A probabilistic model is a probability distribution\n",
223 | "$$P(x,y) : \\mathcal{X} \\times \\mathcal{Y} \\to [0,1].$$\n",
224 | "This model can approximate the data distribution $P_\\text{data}(x,y)$.\n",
225 | "\n",
226 | "If we know $P(x,y)$, we can use the conditional $P(y|x)$ for prediction."
227 | ]
228 | },
229 | {
230 | "cell_type": "markdown",
231 | "metadata": {
232 | "slideshow": {
233 | "slide_type": "slide"
234 | }
235 | },
236 | "source": [
237 | "# Maximum Likelihood Learning\n",
238 | "\n",
239 | "Maximum likelihood is an objective that can be used to fit any probabilistic model:\n",
240 | "$$ \\theta_\\text{MLE} = \\arg\\max_\\theta \\mathbb{E}_{x, y \\sim \\mathbb{P}_\\text{data}} \\log P(x, y; \\theta). $$\n",
241 | "It minimizes the KL divergence between the model and data distributions:\n",
242 | "$$\\theta_\\text{MLE} = \\arg\\min_\\theta \\text{KL}(P_\\text{data} \\mid\\mid P_\\theta).$$"
243 | ]
244 | },
245 | {
246 | "cell_type": "markdown",
247 | "metadata": {
248 | "slideshow": {
249 | "slide_type": "slide"
250 | }
251 | },
252 | "source": [
253 | "# Discriminative vs. Generative Models\n",
254 | "\n",
255 | "There are two types of probabilistic models: *generative* and *discriminative*.\n",
256 | "\\begin{align*}\n",
257 | "\\underbrace{P_\\theta(x,y) : \\mathcal{X} \\times \\mathcal{Y} \\to [0,1]}_\\text{generative model} & \\;\\; & \\underbrace{P_\\theta(y|x) : \\mathcal{X} \\times \\mathcal{Y} \\to [0,1]}_\\text{discriminative model}\n",
258 | "\\end{align*}\n",
259 | "\n",
260 | "We can obtain predictions from generative models via $\\max_y P_\\theta(x,y)$."
261 | ]
262 | },
263 | {
264 | "cell_type": "markdown",
265 | "metadata": {
266 | "slideshow": {
267 | "slide_type": "subslide"
268 | }
269 | },
270 | "source": [
271 | "
"
272 | ]
273 | },
274 | {
275 | "cell_type": "markdown",
276 | "metadata": {
277 | "slideshow": {
278 | "slide_type": "slide"
279 | }
280 | },
281 | "source": [
282 | "# The Max-Margin Principle\n",
283 | "\n",
284 | "Intuitively, we want to select linear decision boundaries with high *margin*. \n",
285 | "\n",
286 | "This means that we are as confident as possible for every point and we are as far as possible from the decision boundary."
287 | ]
288 | },
289 | {
290 | "cell_type": "code",
291 | "execution_count": 2,
292 | "metadata": {
293 | "slideshow": {
294 | "slide_type": "subslide"
295 | }
296 | },
297 | "outputs": [
298 | {
299 | "data": {
300 | "text/html": [
301 | "\n",
302 | "\n",
315 | "
\n",
316 | " \n",
317 | " \n",
318 | " | \n",
319 | " sepal length (cm) | \n",
320 | " sepal width (cm) | \n",
321 | " petal length (cm) | \n",
322 | " petal width (cm) | \n",
323 | " target | \n",
324 | "
\n",
325 | " \n",
326 | " \n",
327 | " \n",
328 | " 0 | \n",
329 | " 5.1 | \n",
330 | " 3.5 | \n",
331 | " 1.4 | \n",
332 | " 0.2 | \n",
333 | " -1 | \n",
334 | "
\n",
335 | " \n",
336 | " 4 | \n",
337 | " 5.0 | \n",
338 | " 3.6 | \n",
339 | " 1.4 | \n",
340 | " 0.2 | \n",
341 | " -1 | \n",
342 | "
\n",
343 | " \n",
344 | " 8 | \n",
345 | " 4.4 | \n",
346 | " 2.9 | \n",
347 | " 1.4 | \n",
348 | " 0.2 | \n",
349 | " -1 | \n",
350 | "
\n",
351 | " \n",
352 | " 12 | \n",
353 | " 4.8 | \n",
354 | " 3.0 | \n",
355 | " 1.4 | \n",
356 | " 0.1 | \n",
357 | " -1 | \n",
358 | "
\n",
359 | " \n",
360 | " 16 | \n",
361 | " 5.4 | \n",
362 | " 3.9 | \n",
363 | " 1.3 | \n",
364 | " 0.4 | \n",
365 | " -1 | \n",
366 | "
\n",
367 | " \n",
368 | "
\n",
369 | "
"
370 | ],
371 | "text/plain": [
372 | " sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \\\n",
373 | "0 5.1 3.5 1.4 0.2 \n",
374 | "4 5.0 3.6 1.4 0.2 \n",
375 | "8 4.4 2.9 1.4 0.2 \n",
376 | "12 4.8 3.0 1.4 0.1 \n",
377 | "16 5.4 3.9 1.3 0.4 \n",
378 | "\n",
379 | " target \n",
380 | "0 -1 \n",
381 | "4 -1 \n",
382 | "8 -1 \n",
383 | "12 -1 \n",
384 | "16 -1 "
385 | ]
386 | },
387 | "execution_count": 2,
388 | "metadata": {},
389 | "output_type": "execute_result"
390 | }
391 | ],
392 | "source": [
393 | "import numpy as np\n",
394 | "import pandas as pd\n",
395 | "from sklearn import datasets\n",
396 | "\n",
397 | "# Load the Iris dataset\n",
398 | "iris = datasets.load_iris(as_frame=True)\n",
399 | "iris_X, iris_y = iris.data, iris.target\n",
400 | "\n",
401 | "# subsample to a third of the data points\n",
402 | "iris_X = iris_X.loc[::4]\n",
403 | "iris_y = iris_y.loc[::4]\n",
404 | "\n",
405 | "# create a binary classification dataset with labels +/- 1\n",
406 | "iris_y2 = iris_y.copy()\n",
407 | "iris_y2[iris_y2==2] = 1\n",
408 | "iris_y2[iris_y2==0] = -1\n",
409 | "\n",
410 | "# print part of the dataset\n",
411 | "pd.concat([iris_X, iris_y2], axis=1).head()"
412 | ]
413 | },
414 | {
415 | "cell_type": "code",
416 | "execution_count": 3,
417 | "metadata": {
418 | "slideshow": {
419 | "slide_type": "subslide"
420 | }
421 | },
422 | "outputs": [
423 | {
424 | "data": {
425 | "text/plain": [
426 | ""
427 | ]
428 | },
429 | "execution_count": 3,
430 | "metadata": {},
431 | "output_type": "execute_result"
432 | },
433 | {
434 | "data": {
435 | "image/png": "\n",
436 | "text/plain": [
437 | ""
438 | ]
439 | },
440 | "metadata": {
441 | "needs_background": "light"
442 | },
443 | "output_type": "display_data"
444 | }
445 | ],
446 | "source": [
447 | "# https://scikit-learn.org/stable/auto_examples/neighbors/plot_classification.html\n",
448 | "%matplotlib inline\n",
449 | "import matplotlib.pyplot as plt\n",
450 | "plt.rcParams['figure.figsize'] = [12, 4]\n",
451 | "import warnings\n",
452 | "warnings.filterwarnings(\"ignore\")\n",
453 | "\n",
454 | "# create 2d version of dataset and subsample it\n",
455 | "X = iris_X.to_numpy()[:,:2]\n",
456 | "x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5\n",
457 | "y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5\n",
458 | "xx, yy = np.meshgrid(np.arange(x_min, x_max, .02), np.arange(y_min, y_max, .02))\n",
459 | "\n",
460 | "# Plot also the training points\n",
461 | "p1 = plt.scatter(X[:, 0], X[:, 1], c=iris_y2, s=60, cmap=plt.cm.Paired)\n",
462 | "plt.xlabel('Petal Length')\n",
463 | "plt.ylabel('Petal Width')\n",
464 | "plt.legend(handles=p1.legend_elements()[0], labels=['Setosa', 'Not Setosa'], loc='lower right')"
465 | ]
466 | },
467 | {
468 | "cell_type": "code",
469 | "execution_count": 4,
470 | "metadata": {
471 | "slideshow": {
472 | "slide_type": "subslide"
473 | }
474 | },
475 | "outputs": [
476 | {
477 | "data": {
478 | "image/png": "\n",
479 | "text/plain": [
480 | ""
481 | ]
482 | },
483 | "metadata": {
484 | "needs_background": "light"
485 | },
486 | "output_type": "display_data"
487 | }
488 | ],
489 | "source": [
490 | "from sklearn.linear_model import Perceptron, RidgeClassifier\n",
491 | "from sklearn.svm import SVC\n",
492 | "models = [SVC(kernel='linear', C=10000), Perceptron(), RidgeClassifier()]\n",
493 | "\n",
494 | "def fit_and_create_boundary(model):\n",
495 | " model.fit(X, iris_y2)\n",
496 | " Z = model.predict(np.c_[xx.ravel(), yy.ravel()])\n",
497 | " Z = Z.reshape(xx.shape)\n",
498 | " return Z\n",
499 | "\n",
500 | "plt.figure(figsize=(12,3))\n",
501 | "for i, model in enumerate(models):\n",
502 | " plt.subplot('13%d' % (i+1))\n",
503 | " Z = fit_and_create_boundary(model)\n",
504 | " plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired) \n",
505 | "\n",
506 | " # Plot also the training points\n",
507 | " plt.scatter(X[:, 0], X[:, 1], c=iris_y2, edgecolors='k', cmap=plt.cm.Paired)\n",
508 | " if i == 0:\n",
509 | " plt.title('Good Margin')\n",
510 | " else:\n",
511 | " plt.title('Bad Margin')\n",
512 | " plt.xlabel('Sepal length')\n",
513 | " plt.ylabel('Sepal width')\n",
514 | "\n",
515 | "plt.show()"
516 | ]
517 | },
518 | {
519 | "cell_type": "markdown",
520 | "metadata": {
521 | "slideshow": {
522 | "slide_type": "subslide"
523 | }
524 | },
525 | "source": [
526 | "
"
527 | ]
528 | },
529 | {
530 | "cell_type": "markdown",
531 | "metadata": {
532 | "slideshow": {
533 | "slide_type": "slide"
534 | }
535 | },
536 | "source": [
537 | "# The Kernel Trick\n",
538 | "\n",
539 | "Many algorithms in machine learning only involve dot products $\\phi(x)^\\top \\phi(z)$ but not the features $\\phi$ themselves."
540 | ]
541 | },
542 | {
543 | "cell_type": "markdown",
544 | "metadata": {
545 | "slideshow": {
546 | "slide_type": "fragment"
547 | }
548 | },
549 | "source": [
550 | "We can often compute $\\phi(x)^\\top \\phi(z)$ very efficiently for complex $\\phi$ using a kernel function $K(x,z) = \\phi(x)^\\top \\phi(z)$. This is the __kernel trick__."
551 | ]
552 | },
553 | {
554 | "cell_type": "markdown",
555 | "metadata": {
556 | "slideshow": {
557 | "slide_type": "subslide"
558 | }
559 | },
560 | "source": [
561 | "
"
562 | ]
563 | },
564 | {
565 | "cell_type": "markdown",
566 | "metadata": {
567 | "slideshow": {
568 | "slide_type": "slide"
569 | }
570 | },
571 | "source": [
572 | "# Tree-Based Models\n",
573 | "\n",
574 | "Decision trees output target based on a tree of human-interpretable decision rules.\n",
575 | "* __Random forests__ combine large trees using *bagging* to reduce overfitting.\n",
576 | "* __Boosted trees__ combine small trees to reduce underfitting."
577 | ]
578 | },
579 | {
580 | "cell_type": "markdown",
581 | "metadata": {
582 | "slideshow": {
583 | "slide_type": "subslide"
584 | }
585 | },
586 | "source": [
587 | "
"
588 | ]
589 | },
590 | {
591 | "cell_type": "markdown",
592 | "metadata": {
593 | "slideshow": {
594 | "slide_type": "slide"
595 | }
596 | },
597 | "source": [
598 | "# Neural Networks\n",
599 | "\n",
600 | "Neural network models are inspired by the brain.\n",
601 | "* A Perceptron is an artificial model of a neuron.\n",
602 | "* MLP stack multiple layers of artifical neurons.\n",
603 | "* ConvNets tie the weights of neighboring neurons into receptive fields that implement the convolution operation."
604 | ]
605 | },
606 | {
607 | "cell_type": "markdown",
608 | "metadata": {
609 | "slideshow": {
610 | "slide_type": "subslide"
611 | }
612 | },
613 | "source": [
614 | "
"
615 | ]
616 | },
617 | {
618 | "cell_type": "markdown",
619 | "metadata": {
620 | "slideshow": {
621 | "slide_type": "slide"
622 | }
623 | },
624 | "source": [
625 | "# Unsupervised Learning\n",
626 | "\n",
627 | "We have a dataset *without* labels. Our goal is to learn something interesting about the structure of the data:\n",
628 | "* __Clusters__ hidden in the dataset.\n",
629 | "* A __low-dimensional representation__ of the data.\n",
630 | "* Recover the __probability density__ that generated the data."
631 | ]
632 | },
633 | {
634 | "cell_type": "markdown",
635 | "metadata": {
636 | "slideshow": {
637 | "slide_type": "subslide"
638 | }
639 | },
640 | "source": [
641 | "
"
642 | ]
643 | },
644 | {
645 | "cell_type": "markdown",
646 | "metadata": {
647 | "slideshow": {
648 | "slide_type": "subslide"
649 | }
650 | },
651 | "source": [
652 | "
"
653 | ]
654 | },
655 | {
656 | "cell_type": "markdown",
657 | "metadata": {
658 | "slideshow": {
659 | "slide_type": "subslide"
660 | }
661 | },
662 | "source": [
663 | "
"
664 | ]
665 | },
666 | {
667 | "cell_type": "markdown",
668 | "metadata": {
669 | "slideshow": {
670 | "slide_type": "slide"
671 | }
672 | },
673 | "source": [
674 | "# How To Decide Which Algorithm to Use\n",
675 | "\n",
676 | "One factor is how much data you have. In the __small data__ (<10,000) regime, consider:\n",
677 | "* Linear models with hand-crafted features (LASSO, LR, NB, SVMs)\n",
678 | "* Kernel methods often work best (e.g., SVM + RBF kernel)\n",
679 | "* Non-parametric methods (kernels, nearest neighbors) are also powerful"
680 | ]
681 | },
682 | {
683 | "cell_type": "markdown",
684 | "metadata": {
685 | "slideshow": {
686 | "slide_type": "subslide"
687 | }
688 | },
689 | "source": [
690 | "In the __big data__ regime,\n",
691 | "* If using \"high-level\" features, gradient boosted trees are state-of-the-art\n",
692 | "* When using \"low-level\" representations (images, sound signals), neural networks work best\n",
693 | "* Linear models with good features are also good and reliable"
694 | ]
695 | },
696 | {
697 | "cell_type": "markdown",
698 | "metadata": {
699 | "slideshow": {
700 | "slide_type": "subslide"
701 | }
702 | },
703 | "source": [
704 | "Some additional advice:\n",
705 | "* If interpretability matters, use decision trees or LASSO.\n",
706 | "* When uncertainty estimates are important use probabilistic methods.\n",
707 | "* If you know the data generating process, use generative models."
708 | ]
709 | },
710 | {
711 | "cell_type": "markdown",
712 | "metadata": {
713 | "slideshow": {
714 | "slide_type": "slide"
715 | }
716 | },
717 | "source": [
718 | "# What's Next? Ideas for Courses\n",
719 | "\n",
720 | "Consider the following courses to keep learning about ML:\n",
721 | "* Graduate courses in the Spring semester at Cornell (generative models, NLP, etc.)\n",
722 | "* Masters courses: Deep Learning Clinic, ML Engineering, Data Science, etc.\n",
723 | "* Online courses, e.g. Full Stack Deep Learning"
724 | ]
725 | },
726 | {
727 | "cell_type": "markdown",
728 | "metadata": {
729 | "slideshow": {
730 | "slide_type": "slide"
731 | }
732 | },
733 | "source": [
734 | "# What's Next? Ideas for Research\n",
735 | "\n",
736 | "In order to get involved in research, I recommend:\n",
737 | "* Contacting research groups at Cornell for openings\n",
738 | "* Watching online ML tutorials, e.g. NeurIPS\n",
739 | "* Reading and implementing ML papers on your own"
740 | ]
741 | },
742 | {
743 | "cell_type": "markdown",
744 | "metadata": {
745 | "slideshow": {
746 | "slide_type": "slide"
747 | }
748 | },
749 | "source": [
750 | "# What's Next? Ideas for Industry Projects\n",
751 | "\n",
752 | "Finally, a few ideas for how to get more practice applying ML in the real world:\n",
753 | "* Participate in Kaggle competitions and review solutions\n",
754 | "* Build an open-source project that you like and host it on Github"
755 | ]
756 | },
757 | {
758 | "cell_type": "markdown",
759 | "metadata": {
760 | "slideshow": {
761 | "slide_type": "slide"
762 | }
763 | },
764 | "source": [
765 | "# Thank You For Taking Applied Machine Learning 2020!\n",
766 | "\n",
767 | "
"
768 | ]
769 | }
770 | ],
771 | "metadata": {
772 | "accelerator": "GPU",
773 | "celltoolbar": "Slideshow",
774 | "colab": {
775 | "collapsed_sections": [],
776 | "name": "neural-ode.ipynb",
777 | "provenance": []
778 | },
779 | "kernelspec": {
780 | "display_name": "aml",
781 | "language": "python",
782 | "name": "aml"
783 | },
784 | "language_info": {
785 | "codemirror_mode": {
786 | "name": "ipython",
787 | "version": 3
788 | },
789 | "file_extension": ".py",
790 | "mimetype": "text/x-python",
791 | "name": "python",
792 | "nbconvert_exporter": "python",
793 | "pygments_lexer": "ipython3",
794 | "version": "3.6.7"
795 | },
796 | "rise": {
797 | "controlsTutorial": false,
798 | "height": 900,
799 | "help": false,
800 | "margin": 0,
801 | "maxScale": 2,
802 | "minScale": 0.2,
803 | "progress": true,
804 | "scroll": true,
805 | "theme": "simple",
806 | "width": 1200
807 | }
808 | },
809 | "nbformat": 4,
810 | "nbformat_minor": 1
811 | }
812 |
--------------------------------------------------------------------------------
/notebooks/lecture5b-probabilistic-perspectives.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "colab_type": "text",
7 | "id": "i_f5u2x9nn6I",
8 | "slideshow": {
9 | "slide_type": "slide"
10 | }
11 | },
12 | "source": [
13 | "
\n",
14 | "\n",
15 | "# Lecture 5b: Probabilistic Perspectives on ML Algorithms\n",
16 | "\n",
17 | "### Applied Machine Learning\n",
18 | "\n",
19 | "__Volodymyr Kuleshov__
Cornell Tech"
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {
25 | "slideshow": {
26 | "slide_type": "slide"
27 | }
28 | },
29 | "source": [
30 | "# Part 1: Probabilistic Linear Regression\n",
31 | "\n",
32 | "Previously, we derived *maximum likelihood learning* as a general way of learning machine models.\n",
33 | "\n",
34 | "We will now seehow the algorithms we've seen so far are special cases of this principle."
35 | ]
36 | },
37 | {
38 | "cell_type": "markdown",
39 | "metadata": {
40 | "slideshow": {
41 | "slide_type": "slide"
42 | }
43 | },
44 | "source": [
45 | "# Review: Probabilistic Models\n",
46 | "\n",
47 | "A probabilistic model is a probability distribution\n",
48 | "$$P(x,y) : \\mathcal{X} \\times \\mathcal{Y} \\to [0,1].$$\n",
49 | "This model can approximate the data distribution $P_\\text{data}(x,y)$."
50 | ]
51 | },
52 | {
53 | "cell_type": "markdown",
54 | "metadata": {
55 | "slideshow": {
56 | "slide_type": "fragment"
57 | }
58 | },
59 | "source": [
60 | "If we know $P(x,y)$, we can use the conditional $P(y|x)$ for prediction."
61 | ]
62 | },
63 | {
64 | "cell_type": "markdown",
65 | "metadata": {
66 | "slideshow": {
67 | "slide_type": "fragment"
68 | }
69 | },
70 | "source": [
71 | "Probabilistic models may also have *parameters* $\\theta \\in \\Theta$, which we denote as\n",
72 | "$$P_\\theta(x,y) : \\mathcal{X} \\times \\mathcal{Y} \\to [0,1].$$"
73 | ]
74 | },
75 | {
76 | "cell_type": "markdown",
77 | "metadata": {
78 | "slideshow": {
79 | "slide_type": "slide"
80 | }
81 | },
82 | "source": [
83 | "# Review: Conditional Maximum Likelihood\n",
84 | "\n",
85 | "A general approach of optimizing conditional models of the form $P_\\theta(y|x)$ is by minimizing expected KL divergence with respect to the data distribution:\n",
86 | "$$ \\min_\\theta \\mathbb{E}_{x \\sim \\mathbb{P}_\\text{data}} \\left[ D(P_\\text{data}(y|x) \\mid\\mid P_\\theta(y|x)) \\right]. $$"
87 | ]
88 | },
89 | {
90 | "cell_type": "markdown",
91 | "metadata": {
92 | "slideshow": {
93 | "slide_type": "fragment"
94 | }
95 | },
96 | "source": [
97 | "With a bit of math, we can show that the maximum likelihood objective becomes\n",
98 | "$$ \\max_\\theta \\mathbb{E}_{x, y \\sim \\mathbb{P}_\\text{data}} \\log P_\\theta(y|x). $$\n",
99 | "This is the principle of *conditional maximum likelihood*."
100 | ]
101 | },
102 | {
103 | "cell_type": "markdown",
104 | "metadata": {
105 | "slideshow": {
106 | "slide_type": "slide"
107 | }
108 | },
109 | "source": [
110 | "# Review: Least Squares\n",
111 | "\n",
112 | "Recall that the linear regression algorithm fits a linear model of the form\n",
113 | "$$ f(x) = \\sum_{j=0}^d \\theta_j \\cdot x_j = \\theta^\\top x. $$"
114 | ]
115 | },
116 | {
117 | "cell_type": "markdown",
118 | "metadata": {
119 | "slideshow": {
120 | "slide_type": "fragment"
121 | }
122 | },
123 | "source": [
124 | "It minimizes the mean squared error (MSE)\n",
125 | "$$J(\\theta)= \\frac{1}{2n} \\sum_{i=1}^n(y^{(i)}-\\theta^\\top x^{(i)})^2$$\n",
126 | "on a dataset $\\{(x^{(1)}, y^{(1)}), (x^{(2)}, y^{(2)}), \\ldots, (x^{(n)}, y^{(n)})\\}$."
127 | ]
128 | },
129 | {
130 | "cell_type": "markdown",
131 | "metadata": {
132 | "slideshow": {
133 | "slide_type": "subslide"
134 | }
135 | },
136 | "source": [
137 | "Is there a specific reason for us to be optimizing the mean squared error to fit our linear model?\n",
138 | "\n",
139 | "The answer to this can be found by looking at the algorithm from a probabilistic perspective."
140 | ]
141 | },
142 | {
143 | "cell_type": "markdown",
144 | "metadata": {
145 | "slideshow": {
146 | "slide_type": "slide"
147 | }
148 | },
149 | "source": [
150 | "# Probabilistic Least Squares\n",
151 | "\n",
152 | "Let's derive a probabilistic algorithm by defining a class of probabilistic models and use maximum likelihood as our objective."
153 | ]
154 | },
155 | {
156 | "cell_type": "markdown",
157 | "metadata": {
158 | "slideshow": {
159 | "slide_type": "fragment"
160 | }
161 | },
162 | "source": [
163 | "1. Let's choose our model family $\\mathcal{M}$ to be the set of Gaussian distributions of the form\n",
164 | "$$ p(y | x; \\theta) = \\frac{1}{\\sqrt{2\\pi}\\sigma} \\exp\\left( -\\frac{(y - \\theta^\\top x)^2}{2 \\sigma^2} \\right).$$\n",
165 | "Each model $\\mathcal{N}(y; \\mu(x), \\sigma)$ is a Gaussian with a standard deviation $\\sigma$ of one and a mean of $\\mu(x) = \\theta^\\top x$ that is parametrized by the parameters $\\theta$."
166 | ]
167 | },
168 | {
169 | "cell_type": "markdown",
170 | "metadata": {
171 | "slideshow": {
172 | "slide_type": "subslide"
173 | }
174 | },
175 | "source": [
176 | "2. We optimize the model using maximum likelihood. The log-likelihood function at a point $(x,y)$ equals\n",
177 | "\\begin{align*}\n",
178 | "\\log L(\\theta) = \\log p(y | x; \\theta) & = \\log \\frac{1}{\\sqrt{2\\pi}\\sigma} \\exp\\left( -\\frac{(y - \\theta^\\top x)^2}{2 \\sigma^2} \\right) \\\\\n",
179 | "& = -\\frac{(y - \\theta^\\top x)^2}{2 \\sigma^2} + \\text{const.}\n",
180 | "\\end{align*}"
181 | ]
182 | },
183 | {
184 | "cell_type": "markdown",
185 | "metadata": {
186 | "slideshow": {
187 | "slide_type": "fragment"
188 | }
189 | },
190 | "source": [
191 | "Note how this is a mean squared error (MSE) objective! \n",
192 | "\n",
193 | "Thus, minimizing MSE is equivalent to maximizing the log-likelihood of a Normal distribution $\\mathcal{N}(y; \\mu(x), \\sigma)$."
194 | ]
195 | },
196 | {
197 | "cell_type": "markdown",
198 | "metadata": {
199 | "slideshow": {
200 | "slide_type": "slide"
201 | }
202 | },
203 | "source": [
204 | "# Algorithm: Gaussian Ordinary Least Squares\n",
205 | "\n",
206 | "* __Type__: Supervised learning (regression)\n",
207 | "* __Model family__: Linear models\n",
208 | "* __Objective function__: Mean squared error\n",
209 | "* __Optimizer__: Normal equations\n",
210 | "* __Probabilistic interpretation__: Conditional Gaussian fit using max-likelihood."
211 | ]
212 | },
213 | {
214 | "cell_type": "markdown",
215 | "metadata": {
216 | "slideshow": {
217 | "slide_type": "slide"
218 | }
219 | },
220 | "source": [
221 | "# Extensions of Gaussian Least Squares\n",
222 | "\n",
223 | "This is an example of how we can interpret a machine learning algorithm in a probabilistic framework.\n",
224 | "\n",
225 | "We will see many algorithms that have these kinds of interpretations. Here are some simple extensions."
226 | ]
227 | },
228 | {
229 | "cell_type": "markdown",
230 | "metadata": {
231 | "slideshow": {
232 | "slide_type": "subslide"
233 | }
234 | },
235 | "source": [
236 | "We can use a Gaussian model and also parametrize the standard deviation. \n",
237 | "* This is called heteroscedastic regression, and allows us to obtain confidence intevals for our predictions."
238 | ]
239 | },
240 | {
241 | "cell_type": "markdown",
242 | "metadata": {
243 | "slideshow": {
244 | "slide_type": "subslide"
245 | }
246 | },
247 | "source": [
248 | "We can can also parametrize other distributions, not just the Gaussian. \n",
249 | "* Exponential or Gamma distributions for continuous variables \n",
250 | "* Bernoulli distribution for discrete variables\n",
251 | "\n",
252 | "This yields many new machine learning algorithms."
253 | ]
254 | },
255 | {
256 | "cell_type": "markdown",
257 | "metadata": {
258 | "slideshow": {
259 | "slide_type": "slide"
260 | }
261 | },
262 | "source": [
263 | "
\n",
264 | "# Part 2: Bayesian Algorithms\n",
265 | "\n",
266 | "We can also use what we learned about Bayesian ML do interpret several algrothims that we've seen as special cases of the Bayesian framework."
267 | ]
268 | },
269 | {
270 | "cell_type": "markdown",
271 | "metadata": {
272 | "slideshow": {
273 | "slide_type": "slide"
274 | }
275 | },
276 | "source": [
277 | "# Review: The Bayesian Approach\n",
278 | "\n",
279 | "In Bayesian statistics, $\\theta$ is a *random* variable whose value happens to be unknown."
280 | ]
281 | },
282 | {
283 | "cell_type": "markdown",
284 | "metadata": {
285 | "slideshow": {
286 | "slide_type": "subslide"
287 | }
288 | },
289 | "source": [
290 | "We formulate two models:\n",
291 | "* A *likelihood* model $P(x, y | \\theta)$ that defines the probability of $x,y$ for any fixed value of $\\theta$.\n",
292 | "* A *prior* $P(\\theta)$ that specifies us existing belief about the distribution of the random variable $\\theta$."
293 | ]
294 | },
295 | {
296 | "cell_type": "markdown",
297 | "metadata": {
298 | "slideshow": {
299 | "slide_type": "fragment"
300 | }
301 | },
302 | "source": [
303 | "Together, these two models define the *joint* distribution\n",
304 | "$$ P(x, y, \\theta) = P(x, y \\mid \\theta) P(\\theta) $$\n",
305 | "in which both the $x, y$ and the parameters $\\theta$ are random variables."
306 | ]
307 | },
308 | {
309 | "cell_type": "markdown",
310 | "metadata": {
311 | "slideshow": {
312 | "slide_type": "slide"
313 | }
314 | },
315 | "source": [
316 | "# Review: A Posteriori Learning\n",
317 | "\n",
318 | "Recall that in maximum a posteriori (MAP) learning, we optimize the following objective.\n",
319 | "\\begin{align*}\n",
320 | "\\theta_\\text{MAP} = \\arg\\max_\\theta \\left( \\log \\prod_{i=1}^n P(x^{(i)}, y^{(i)} \\mid \\theta) + \\log P(\\theta) \\right),\n",
321 | "\\end{align*}\n",
322 | "\n",
323 | "Note that we used the same formula as we used for maximum likelihood, except that we have added the prior term $\\log P(\\theta)$."
324 | ]
325 | },
326 | {
327 | "cell_type": "markdown",
328 | "metadata": {
329 | "slideshow": {
330 | "slide_type": "slide"
331 | }
332 | },
333 | "source": [
334 | "# Review: Ridge Regression\n",
335 | "\n",
336 | "Recall that the ridge regression algorithm fits a linear model \n",
337 | "$$ f(x) = \\sum_{j=0}^d \\theta_j \\cdot x_j = \\theta^\\top x. $$"
338 | ]
339 | },
340 | {
341 | "cell_type": "markdown",
342 | "metadata": {
343 | "slideshow": {
344 | "slide_type": "subslide"
345 | }
346 | },
347 | "source": [
348 | "We minimize the L2-regualrized mean squared error (MSE)\n",
349 | "$$J(\\theta)= \\frac{1}{2n} \\sum_{i=1}^n(y^{(i)}-\\theta^\\top x^{(i)})^2 + \\frac{\\lambda}{2}\\sum_{j=1}^d \\theta_j^2$$\n",
350 | "on a dataset $\\{(x^{(1)}, y^{(1)}), (x^{(2)}, y^{(2)}), \\ldots, (x^{(n)}, y^{(n)})\\}$.\n",
351 | "The term $\\frac{1}{2}\\sum_{j=1}^d \\theta_j^2 = \\frac{1}{2}||\\theta||_2^2$ is called the regularizer."
352 | ]
353 | },
354 | {
355 | "cell_type": "markdown",
356 | "metadata": {
357 | "slideshow": {
358 | "slide_type": "slide"
359 | }
360 | },
361 | "source": [
362 | "# Probabilistic Ridge Regession\n",
363 | "\n",
364 | "We can interpet ridge regression as maximum apriori (MAP) estimation as follows."
365 | ]
366 | },
367 | {
368 | "cell_type": "markdown",
369 | "metadata": {
370 | "slideshow": {
371 | "slide_type": "slide"
372 | }
373 | },
374 | "source": [
375 | "1. First, we select our model family $\\mathcal{M}$ to be the set of Gaussian distributions of the form (let's assume $x \\in \\mathbb{R}$ for simplicity).\n",
376 | "$$ p(y | x; \\theta) = \\frac{1}{\\sqrt{2\\pi}\\sigma} \\exp\\left( -\\frac{(y - \\theta^\\top x)^2}{2 \\sigma^2} \\right).$$"
377 | ]
378 | },
379 | {
380 | "cell_type": "markdown",
381 | "metadata": {
382 | "slideshow": {
383 | "slide_type": "fragment"
384 | }
385 | },
386 | "source": [
387 | "2. We assume a Gaussian prior with mean zero and variance $\\tau$ on the parameters $\\theta$:\n",
388 | "$$ p(\\theta) = \\prod_{j=1}^d \\frac{1}{\\sqrt{2\\pi}\\tau} \\exp\\left( -\\frac{\\theta_j^2}{2\\tau^2} \\right).$$"
389 | ]
390 | },
391 | {
392 | "cell_type": "markdown",
393 | "metadata": {
394 | "slideshow": {
395 | "slide_type": "subslide"
396 | }
397 | },
398 | "source": [
399 | "3. We optimize the model using the MAP approach. The objective at a point $(x,y)$ equals\n",
400 | "\\begin{align*}\n",
401 | "\\log L(\\theta) & = \\log p(y | x; \\theta) + \\log p(\\theta) \n",
402 | "\\\\ & = \\log \\frac{1}{\\sqrt{2\\pi}\\sigma} \\exp\\left( -\\frac{(y - \\theta^\\top x)^2}{2 \\sigma^2} \\right) \\\\\n",
403 | "& \\;\\;\\;\\;\\;\\;\\;\\;\\;\\;\\;\\;\\;\\;\\;\\;\\; + \\log \\prod_{j=1}^d \\frac{1}{\\sqrt{2\\pi}\\tau} \\exp\\left( -\\frac{\\theta_j^2}{2\\tau^2} \\right) \\\\\n",
404 | "& = -\\frac{(y - \\theta^\\top x)^2}{2 \\sigma^2} - \\frac{1}{2\\tau^2}\\sum_{j=1}^d \\theta_j^2 + \\text{const.}\n",
405 | "\\end{align*}"
406 | ]
407 | },
408 | {
409 | "cell_type": "markdown",
410 | "metadata": {
411 | "slideshow": {
412 | "slide_type": "fragment"
413 | }
414 | },
415 | "source": [
416 | "Thus, we see that ridge regression actually amounts to performing MAP estimation with a Gaussian prior. The strength of the regularizer $\\lambda$ equals $1/\\tau^2$."
417 | ]
418 | },
419 | {
420 | "cell_type": "markdown",
421 | "metadata": {
422 | "slideshow": {
423 | "slide_type": "slide"
424 | }
425 | },
426 | "source": [
427 | "# Algorithm: Probabilistic Ridge Least Squares\n",
428 | "\n",
429 | "* __Type__: Supervised learning (regression)\n",
430 | "* __Model family__: Linear models\n",
431 | "* __Objective function__: L2-regularized mean squared error\n",
432 | "* __Optimizer__: Normal equations\n",
433 | "* __Probabilistic interpretation__: Conditional Gaussian likelihood and Gaussian prior fit using MAP."
434 | ]
435 | },
436 | {
437 | "cell_type": "markdown",
438 | "metadata": {
439 | "slideshow": {
440 | "slide_type": "slide"
441 | }
442 | },
443 | "source": [
444 | "# Bayesian View on ML Algorithms\n",
445 | "\n",
446 | "Very often, we can interpret classical ML algorithms as applications of the probabilistic or Bayesian approaches (although we can derive them in other ways as well!)"
447 | ]
448 | },
449 | {
450 | "cell_type": "markdown",
451 | "metadata": {
452 | "slideshow": {
453 | "slide_type": "fragment"
454 | }
455 | },
456 | "source": [
457 | "* Regularization can often be seen as applying a prior on the weights."
458 | ]
459 | },
460 | {
461 | "cell_type": "markdown",
462 | "metadata": {
463 | "slideshow": {
464 | "slide_type": "fragment"
465 | }
466 | },
467 | "source": [
468 | "* L1 regularization can be seen as applying a *Laplace* prior."
469 | ]
470 | },
471 | {
472 | "cell_type": "markdown",
473 | "metadata": {
474 | "slideshow": {
475 | "slide_type": "fragment"
476 | }
477 | },
478 | "source": [
479 | "* Many other algorithms will have similar interpretations."
480 | ]
481 | },
482 | {
483 | "cell_type": "markdown",
484 | "metadata": {
485 | "slideshow": {
486 | "slide_type": "skip"
487 | }
488 | },
489 | "source": [
490 | "
\n",
491 | "# Part 3: Bayesian Ridge Regression\n",
492 | "\n",
493 | "Let's now look at an example of a fully Bayesian machinne learning algorithm.\n",
494 | "\n",
495 | "This section is still under construction and not part of the main lecture."
496 | ]
497 | },
498 | {
499 | "cell_type": "markdown",
500 | "metadata": {
501 | "slideshow": {
502 | "slide_type": "skip"
503 | }
504 | },
505 | "source": [
506 | "# Review: The Bayesian Approach\n",
507 | "\n",
508 | "In Bayesian statistics, $\\theta$ is a *random* variable whose value happens to be unknown."
509 | ]
510 | },
511 | {
512 | "cell_type": "markdown",
513 | "metadata": {
514 | "slideshow": {
515 | "slide_type": "skip"
516 | }
517 | },
518 | "source": [
519 | "We formulate two models:\n",
520 | "* A *likelihood* model $P(x, y | \\theta)$ that defines the probability of $x,y$ for any fixed value of $\\theta$.\n",
521 | "* A *prior* $P(\\theta)$ that specifies us existing belief about the distribution of the random variable $\\theta$."
522 | ]
523 | },
524 | {
525 | "cell_type": "markdown",
526 | "metadata": {
527 | "slideshow": {
528 | "slide_type": "skip"
529 | }
530 | },
531 | "source": [
532 | "Together, these two models define the *joint* distribution\n",
533 | "$$ P(x, y, \\theta) = P(x, y \\mid \\theta) P(\\theta) $$\n",
534 | "in which both the $x, y$ and the parameters $\\theta$ are random variables."
535 | ]
536 | },
537 | {
538 | "cell_type": "markdown",
539 | "metadata": {
540 | "slideshow": {
541 | "slide_type": "skip"
542 | }
543 | },
544 | "source": [
545 | "# Review: Ridge Regression\n",
546 | "\n",
547 | "Recall that the ridge regression algorithm fits a linear model \n",
548 | "$$ f(x) = \\sum_{j=0}^d \\theta_j \\cdot x_j = \\theta^\\top x. $$"
549 | ]
550 | },
551 | {
552 | "cell_type": "markdown",
553 | "metadata": {
554 | "slideshow": {
555 | "slide_type": "skip"
556 | }
557 | },
558 | "source": [
559 | "We minimize the L2-regualrized mean squared error (MSE)\n",
560 | "$$J(\\theta)= \\frac{1}{2n} \\sum_{i=1}^n(y_i-x_i^\\top\\theta)^2 + \\frac{1}{2}\\sum_{j=1}^d \\theta_j^2$$\n",
561 | "on a dataset $\\{(x^{(1)}, y^{(1)}), (x^{(2)}, y^{(2)}), \\ldots, (x^{(n)}, y^{(n)})\\}$.\n",
562 | "The term $\\frac{1}{2}\\sum_{j=1}^d \\theta_j^2 = \\frac{1}{2}||\\theta||_2^2$ is called the regularizer."
563 | ]
564 | },
565 | {
566 | "cell_type": "markdown",
567 | "metadata": {
568 | "slideshow": {
569 | "slide_type": "skip"
570 | }
571 | },
572 | "source": [
573 | "# Probabilistic Ridge Regession\n",
574 | "\n",
575 | "We can interpet ridge regression as maximum apriori (MAP) estimation as follows."
576 | ]
577 | },
578 | {
579 | "cell_type": "markdown",
580 | "metadata": {
581 | "slideshow": {
582 | "slide_type": "skip"
583 | }
584 | },
585 | "source": [
586 | "# Bayesian Predictions\n",
587 | "\n",
588 | "Suppose we now want to predict the value of $y$ from $x$. Unlike in the frequentist setting, we no longer have a single estimate $\\theta$ of the model params, but instead we have a distribution.\n",
589 | "\n",
590 | "The Bayesian approach to predicting $y$ given an input $x$ and a training dataset $\\mathcal{D}$ consists of taking the prediction of all the possible models\n",
591 | "$$ P(y | x, \\mathcal{D}) = \\int_\\theta P(y \\mid x, \\theta) P(\\theta \\mid \\mathcal{D}) d\\theta. $$\n",
592 | "This is called the *posterior predictive* distribution. Note how each $P(y \\mid x, \\theta)$ is weighted by the probability of $\\theta$ given $\\mathcal{D}$."
593 | ]
594 | }
595 | ],
596 | "metadata": {
597 | "accelerator": "GPU",
598 | "celltoolbar": "Slideshow",
599 | "colab": {
600 | "collapsed_sections": [],
601 | "name": "neural-ode.ipynb",
602 | "provenance": []
603 | },
604 | "kernelspec": {
605 | "display_name": "aml",
606 | "language": "python",
607 | "name": "aml"
608 | },
609 | "language_info": {
610 | "codemirror_mode": {
611 | "name": "ipython",
612 | "version": 3
613 | },
614 | "file_extension": ".py",
615 | "mimetype": "text/x-python",
616 | "name": "python",
617 | "nbconvert_exporter": "python",
618 | "pygments_lexer": "ipython3",
619 | "version": "3.6.7"
620 | },
621 | "rise": {
622 | "controlsTutorial": false,
623 | "height": 900,
624 | "help": false,
625 | "margin": 0,
626 | "maxScale": 2,
627 | "minScale": 0.2,
628 | "progress": true,
629 | "scroll": true,
630 | "theme": "simple",
631 | "width": 1200
632 | }
633 | },
634 | "nbformat": 4,
635 | "nbformat_minor": 1
636 | }
637 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | appnope==0.1.0
2 | astroid==2.4.1
3 | attrs==19.3.0
4 | backcall==0.2.0
5 | bleach==3.1.5
6 | cycler==0.10.0
7 | decorator==4.4.2
8 | defusedxml==0.6.0
9 | entrypoints==0.3
10 | importlib-metadata==1.7.0
11 | ipykernel==5.3.4
12 | ipython==7.16.1
13 | ipython-genutils==0.2.0
14 | isort==4.3.21
15 | jedi==0.17.2
16 | Jinja2==2.11.2
17 | jsonschema==3.2.0
18 | jupyter-client==6.1.6
19 | jupyter-contrib-core==0.3.3
20 | jupyter-contrib-nbextensions==0.5.1
21 | jupyter-core==4.6.3
22 | jupyter-highlight-selected-word==0.2.0
23 | jupyter-latex-envs==1.4.6
24 | jupyter-nbextensions-configurator==0.4.1
25 | kiwisolver==1.2.0
26 | latexcodec==2.0.1
27 | lazy-object-proxy==1.4.3
28 | lxml==4.6.3
29 | MarkupSafe==1.1.1
30 | matplotlib==3.2.1
31 | mccabe==0.6.1
32 | mistune==0.8.4
33 | nbconvert==5.6.1
34 | nbformat==5.0.7
35 | notebook==6.0.3
36 | numpy==1.18.1
37 | packaging==20.4
38 | pandocfilters==1.4.2
39 | parso==0.7.1
40 | pexpect==4.8.0
41 | pickleshare==0.7.5
42 | Pillow==7.0.0
43 | prometheus-client==0.8.0
44 | prompt-toolkit==3.0.5
45 | ptyprocess==0.6.0
46 | pybtex==0.23.0
47 | Pygments==2.6.1
48 | pylint==2.5.2
49 | pyparsing==2.4.7
50 | pyrsistent==0.16.0
51 | python-dateutil==2.8.1
52 | PyYAML==5.3
53 | pyzmq==19.0.1
54 | rise==5.6.1
55 | scipy==1.4.1
56 | Send2Trash==1.5.0
57 | six==1.14.0
58 | terminado==0.8.3
59 | testpath==0.4.4
60 | toml==0.10.1
61 | torch==1.4.0
62 | torchdiffeq==0.0.1
63 | torchvision==0.5.0
64 | tornado==6.1.0
65 | tqdm==4.42.1
66 | traitlets==4.3.3
67 | typed-ast==1.4.1
68 | virtualenv-clone==0.5.4
69 | wcwidth==0.2.5
70 | webencodings==0.5.1
71 | wrapt==1.12.1
72 | zipp==3.1.0
73 |
--------------------------------------------------------------------------------