├── .gitignore ├── README.md ├── notebooks ├── Makefile ├── README.md ├── img │ ├── .DS_Store │ ├── 1d_conv.jpeg │ ├── aae_dim_reduc_2.png │ ├── ai_ml_dl.png │ ├── alexnet.png │ ├── algorithms1.png │ ├── algorithms10.png │ ├── algorithms11.png │ ├── algorithms12.png │ ├── algorithms13.png │ ├── algorithms2.png │ ├── algorithms3.png │ ├── algorithms4.png │ ├── algorithms5.png │ ├── algorithms6.png │ ├── algorithms7.png │ ├── algorithms8.png │ ├── algorithms9.png │ ├── alphago.jpeg │ ├── approximation.png │ ├── breakthrough.jpg │ ├── cartoonLVmodel.png │ ├── celebA.png │ ├── cnns │ │ ├── .DS_Store │ │ ├── 2dconv.gif │ │ ├── alexnet.png │ │ ├── cnn-filters-all.png │ │ ├── cnn-filters1.png │ │ ├── cnn-filters2.png │ │ ├── cnn-filters4.png │ │ ├── cnn.png │ │ ├── convlayer.png │ │ ├── flybrain.gif │ │ ├── hierarchy.jpg │ │ ├── hog.jpg │ │ ├── hubel.ppm │ │ ├── imagecaption.png │ │ ├── imagenet.png │ │ ├── layer1.png │ │ ├── layer2.png │ │ ├── layer3.png │ │ ├── layer45.png │ │ ├── lenet.png │ │ ├── levels.png │ │ ├── pathway.png │ │ ├── pooling.jpg │ │ ├── resnet-block-im.png │ │ ├── resnet-block.png │ │ ├── sift.jpg │ │ ├── stylegan.png │ │ └── unet.png │ ├── coordinate_descent.png │ ├── cornell_tech1.png │ ├── cornell_tech2.svg │ ├── cornell_tech3.png │ ├── cornell_tech4.jpg │ ├── cornell_tech5.png │ ├── darts.png │ ├── dcgan_feats.png │ ├── decision_tree.png │ ├── dl_data_curve.png │ ├── dna_map.jpg │ ├── functional_gradient.png │ ├── functional_gradient.pptx │ ├── google.png │ ├── google_assistant.png │ ├── image_captioning.jpeg │ ├── kmeans_convergence.gif │ ├── l1-vs-l2-annotated.png │ ├── l1-vs-l2.png │ ├── learning.png │ ├── learning_curve1.png │ ├── learning_curve2.png │ ├── learning_curve3.png │ ├── learning_curve4.png │ ├── learning_curve5.png │ ├── learning_curve6.png │ ├── learning_curve7.png │ ├── learning_curve8.png │ ├── life3.0.jpg │ ├── loss_curve1.png │ ├── loss_curve2.png │ ├── loss_curve3.png │ ├── loss_curve4.png │ ├── margin.png │ ├── mogdata.png │ ├── mogdata2.png │ ├── mogdata2_v2.png │ ├── mogdata_v2.png │ ├── mogdensity.png │ ├── mogdensity1d.png │ ├── mogdensity1d_v2.png │ ├── mogdensity_v2.png │ ├── mogdensity_v3.png │ ├── nns │ │ ├── aneuron.jpeg │ │ ├── backprop │ │ │ ├── backprop-abstract-figure0.png │ │ │ ├── backprop-abstract-figure1.png │ │ │ ├── backprop-abstract-figure2.png │ │ │ ├── backprop-abstract-figure3.png │ │ │ ├── backprop-abstract-figure4.png │ │ │ ├── backprop-abstract-figure5.png │ │ │ ├── backprop-abstract-figure6.png │ │ │ ├── backprop-twolayers-figure0.png │ │ │ ├── backprop-twolayers-figure1.png │ │ │ ├── backprop-twolayers-figure10.png │ │ │ ├── backprop-twolayers-figure11.png │ │ │ ├── backprop-twolayers-figure2.png │ │ │ ├── backprop-twolayers-figure3.png │ │ │ ├── backprop-twolayers-figure4.png │ │ │ ├── backprop-twolayers-figure5.png │ │ │ ├── backprop-twolayers-figure6.png │ │ │ ├── backprop-twolayers-figure7.png │ │ │ ├── backprop-twolayers-figure8.png │ │ │ ├── backprop-twolayers-figure9.png │ │ │ └── backprop-twolayers-legend.png │ │ ├── bio-neuron.png │ │ ├── bp1.png │ │ ├── layers-1.png │ │ ├── layers-2.png │ │ ├── layers-3.png │ │ ├── layers-4.png │ │ ├── layers-L.png │ │ ├── net-intro.png │ │ └── nn-example.png │ ├── obama.jpg │ ├── obama2.jpg │ ├── oldfMOG_v2.png │ ├── oldfSingle_v2.png │ ├── oldfaithful_v2.png │ ├── pca_decomposition.png │ ├── pca_example.png │ ├── pca_example_plane.png │ ├── pca_projection1.png │ ├── pca_projection2.png │ ├── pca_projection_data.png │ ├── pca_reconstruction.png │ ├── pca_two_views.png │ ├── rl.png │ ├── siri.png │ ├── spam.png │ ├── svm-margin-fig.key │ ├── tesla.jpg │ ├── tesla_data.png │ ├── tesla_zoom.jpg │ └── waymo.jpg ├── lecture1-introduction.ipynb ├── lecture10-svm-dual.ipynb ├── lecture11-kernels.ipynb ├── lecture12-decision-trees.ipynb ├── lecture12-neural-networks.ipynb ├── lecture13-boosting.ipynb ├── lecture14-neural-networks.ipynb ├── lecture15-deep-learning.ipynb ├── lecture16-unsupervised-learning.ipynb ├── lecture17-density-estimation.ipynb ├── lecture18-clustering.ipynb ├── lecture19-dimensionality-reduction.bak.ipynb ├── lecture19-dimensionality-reduction.ipynb ├── lecture2-supervised-learning.ipynb ├── lecture20-evaluation.ipynb ├── lecture21-model-iteration.ipynb ├── lecture22-evaluation-tools.ipynb ├── lecture23-overview.ipynb ├── lecture3-linear-regression.ipynb ├── lecture4-foundations-supervised-learning.ipynb ├── lecture5-maximum-likelihood.ipynb ├── lecture5b-probabilistic-perspectives.ipynb ├── lecture6-classification.ipynb ├── lecture7-generative-models.ipynb ├── lecture8-naive-bayes.ipynb └── lecture9-support-vector-machines.ipynb └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Applied Machine Learning (Cornell CS5785) 2 | 3 | This repo contains executable course notes and slides for the Applied ML course at Cornell and Cornell Tech. 4 | 5 | 8 | 9 | These materials accompany a set of Youtube [lecture videos](https://www.youtube.com/watch?v=vcE9WGbi4QY&list=PL2UML_KCiC0UlY7iCQDSiGDMovaupqc83) from the Fall 2020 edition of the course. 10 | 11 | ## Contents 12 | 13 | This repo is organized as follows. 14 | 15 | ``` 16 | . 17 | ├── README.md 18 | ├── notebooks # Notebooks and slides 19 | └── requirements.txt # Packages needed for your virtualenv 20 | ``` 21 | 22 | ## Setup 23 | 24 | ### Requirements 25 | 26 | You should be able to run all the contents of this repo using the packages provided in `requirements.txt`. 27 | 28 | In a new `virtualenv`, run this: 29 | ``` 30 | pip install -r requirements.txt 31 | ``` 32 | 33 | ## Feedback 34 | 35 | Please send feedback to [Volodymyr Kuleshov](https://www.cs.cornell.edu/~kuleshov/) 36 | -------------------------------------------------------------------------------- /notebooks/Makefile: -------------------------------------------------------------------------------- 1 | #LECTURES:=lecture1 lecture2 lecture3 lecture4 lecture5b lecture6 2 | 3 | NOTEBOOKS:=$(wildcard *.ipynb) 4 | SLIDES:=$(subst ipynb,slides.html,$(NOTEBOOKS)) 5 | HTML:=$(subst ipynb,html,$(NOTEBOOKS)) 6 | PDFS:=$(subst ipynb,pdf,$(NOTEBOOKS)) 7 | 8 | $(SLIDES): %.slides.html: %.ipynb 9 | jupyter nbconvert $< --to slides --reveal-prefix "http://cdnjs.cloudflare.com/ajax/libs/reveal.js/3.3.0" 10 | 11 | $(HTML): %.html: %.ipynb 12 | jupyter nbconvert $< --to html_embed --reveal-prefix "http://cdnjs.cloudflare.com/ajax/libs/reveal.js/3.3.0" 13 | 14 | $(PDFS): %.pdf: %.ipynb 15 | jupyter nbconvert $< --to pdf --reveal-prefix "http://cdnjs.cloudflare.com/ajax/libs/reveal.js/3.3.0" 16 | 17 | notes: 18 | make $(HTML) 19 | 20 | slides: 21 | make $(SLIDES) 22 | 23 | pdfs: 24 | make $(PDFS) 25 | 26 | clean: 27 | rm *.html *.pdf 28 | 29 | all: 30 | make notes slides pdfs 31 | -------------------------------------------------------------------------------- /notebooks/README.md: -------------------------------------------------------------------------------- 1 | # Applied Machine Learning (Cornell CS5785): Notebooks and Slides 2 | 3 | This repo contains executable course notes and slides for the Applied ML course at Cornell and Cornell Tech. 4 | 5 | There are four types of files you can obtain from this folder. These contain all the materials shown in the video lectures. 6 | * Exectuable notebooks in `*.ipynb` format. 7 | * Slides used in the video lectures in HTML format and powered by Reveal.js `*.slides.html` 8 | * Portable course notes in `*.html` format; images are embedded in the notes. 9 | * Portable `*.pdf` files 10 | 11 | All three types of files have the same content, they're just in a different format. 12 | 13 | The Jupyter notebooks are fully interactive and you can use them to regenerate all the materials for the course. 14 | 15 | ## Setup 16 | 17 | In order to generate all the files, use the provded Makefile. 18 | ``` 19 | make all 20 | ``` 21 | 22 | If you want to generate each type of file separately, you can type 23 | ``` 24 | make notes 25 | make slides 26 | make pdfs 27 | ``` 28 | 29 | You can also generate specific files, e.g.: 30 | ``` 31 | make lecture15-deep-learning.slides.html 32 | ``` 33 | 34 | To reset the repo and remove the generated files, do this: 35 | ``` 36 | make clean 37 | ``` 38 | 39 | ### Requirements 40 | 41 | You should be able to run all the contents of this repo using the packages provided in `requirements.txt` at the root of the repo. 42 | 43 | In a new `virtualenv`, run this: 44 | ``` 45 | pip install -r requirements.txt 46 | ``` 47 | 48 | ## Issues 49 | 50 | Currently, there are issues in linking images to the contents: 51 | * The `*.slides.html` must be located in the same folder as the `img` folder in order to display images. 52 | * The `*.pdf` files do not contain images due a limitation of `nbconvert`. 53 | 54 | However the `*.html` notes are fully portable. 55 | 56 | ## Feedback 57 | 58 | Please send feedback to [Volodymyr Kuleshov](https://www.cs.cornell.edu/~kuleshov/) 59 | -------------------------------------------------------------------------------- /notebooks/img/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/.DS_Store -------------------------------------------------------------------------------- /notebooks/img/1d_conv.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/1d_conv.jpeg -------------------------------------------------------------------------------- /notebooks/img/aae_dim_reduc_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/aae_dim_reduc_2.png -------------------------------------------------------------------------------- /notebooks/img/ai_ml_dl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/ai_ml_dl.png -------------------------------------------------------------------------------- /notebooks/img/alexnet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/alexnet.png -------------------------------------------------------------------------------- /notebooks/img/algorithms1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/algorithms1.png -------------------------------------------------------------------------------- /notebooks/img/algorithms10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/algorithms10.png -------------------------------------------------------------------------------- /notebooks/img/algorithms11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/algorithms11.png -------------------------------------------------------------------------------- /notebooks/img/algorithms12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/algorithms12.png -------------------------------------------------------------------------------- /notebooks/img/algorithms13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/algorithms13.png -------------------------------------------------------------------------------- /notebooks/img/algorithms2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/algorithms2.png -------------------------------------------------------------------------------- /notebooks/img/algorithms3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/algorithms3.png -------------------------------------------------------------------------------- /notebooks/img/algorithms4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/algorithms4.png -------------------------------------------------------------------------------- /notebooks/img/algorithms5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/algorithms5.png -------------------------------------------------------------------------------- /notebooks/img/algorithms6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/algorithms6.png -------------------------------------------------------------------------------- /notebooks/img/algorithms7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/algorithms7.png -------------------------------------------------------------------------------- /notebooks/img/algorithms8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/algorithms8.png -------------------------------------------------------------------------------- /notebooks/img/algorithms9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/algorithms9.png -------------------------------------------------------------------------------- /notebooks/img/alphago.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/alphago.jpeg -------------------------------------------------------------------------------- /notebooks/img/approximation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/approximation.png -------------------------------------------------------------------------------- /notebooks/img/breakthrough.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/breakthrough.jpg -------------------------------------------------------------------------------- /notebooks/img/cartoonLVmodel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cartoonLVmodel.png -------------------------------------------------------------------------------- /notebooks/img/celebA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/celebA.png -------------------------------------------------------------------------------- /notebooks/img/cnns/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/.DS_Store -------------------------------------------------------------------------------- /notebooks/img/cnns/2dconv.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/2dconv.gif -------------------------------------------------------------------------------- /notebooks/img/cnns/alexnet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/alexnet.png -------------------------------------------------------------------------------- /notebooks/img/cnns/cnn-filters-all.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/cnn-filters-all.png -------------------------------------------------------------------------------- /notebooks/img/cnns/cnn-filters1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/cnn-filters1.png -------------------------------------------------------------------------------- /notebooks/img/cnns/cnn-filters2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/cnn-filters2.png -------------------------------------------------------------------------------- /notebooks/img/cnns/cnn-filters4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/cnn-filters4.png -------------------------------------------------------------------------------- /notebooks/img/cnns/cnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/cnn.png -------------------------------------------------------------------------------- /notebooks/img/cnns/convlayer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/convlayer.png -------------------------------------------------------------------------------- /notebooks/img/cnns/flybrain.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/flybrain.gif -------------------------------------------------------------------------------- /notebooks/img/cnns/hierarchy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/hierarchy.jpg -------------------------------------------------------------------------------- /notebooks/img/cnns/hog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/hog.jpg -------------------------------------------------------------------------------- /notebooks/img/cnns/hubel.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/hubel.ppm -------------------------------------------------------------------------------- /notebooks/img/cnns/imagecaption.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/imagecaption.png -------------------------------------------------------------------------------- /notebooks/img/cnns/imagenet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/imagenet.png -------------------------------------------------------------------------------- /notebooks/img/cnns/layer1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/layer1.png -------------------------------------------------------------------------------- /notebooks/img/cnns/layer2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/layer2.png -------------------------------------------------------------------------------- /notebooks/img/cnns/layer3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/layer3.png -------------------------------------------------------------------------------- /notebooks/img/cnns/layer45.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/layer45.png -------------------------------------------------------------------------------- /notebooks/img/cnns/lenet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/lenet.png -------------------------------------------------------------------------------- /notebooks/img/cnns/levels.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/levels.png -------------------------------------------------------------------------------- /notebooks/img/cnns/pathway.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/pathway.png -------------------------------------------------------------------------------- /notebooks/img/cnns/pooling.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/pooling.jpg -------------------------------------------------------------------------------- /notebooks/img/cnns/resnet-block-im.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/resnet-block-im.png -------------------------------------------------------------------------------- /notebooks/img/cnns/resnet-block.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/resnet-block.png -------------------------------------------------------------------------------- /notebooks/img/cnns/sift.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/sift.jpg -------------------------------------------------------------------------------- /notebooks/img/cnns/stylegan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/stylegan.png -------------------------------------------------------------------------------- /notebooks/img/cnns/unet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cnns/unet.png -------------------------------------------------------------------------------- /notebooks/img/coordinate_descent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/coordinate_descent.png -------------------------------------------------------------------------------- /notebooks/img/cornell_tech1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cornell_tech1.png -------------------------------------------------------------------------------- /notebooks/img/cornell_tech2.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | red 5 | Created with Sketch. 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /notebooks/img/cornell_tech3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cornell_tech3.png -------------------------------------------------------------------------------- /notebooks/img/cornell_tech4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cornell_tech4.jpg -------------------------------------------------------------------------------- /notebooks/img/cornell_tech5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/cornell_tech5.png -------------------------------------------------------------------------------- /notebooks/img/darts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/darts.png -------------------------------------------------------------------------------- /notebooks/img/dcgan_feats.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/dcgan_feats.png -------------------------------------------------------------------------------- /notebooks/img/decision_tree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/decision_tree.png -------------------------------------------------------------------------------- /notebooks/img/dl_data_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/dl_data_curve.png -------------------------------------------------------------------------------- /notebooks/img/dna_map.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/dna_map.jpg -------------------------------------------------------------------------------- /notebooks/img/functional_gradient.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/functional_gradient.png -------------------------------------------------------------------------------- /notebooks/img/functional_gradient.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/functional_gradient.pptx -------------------------------------------------------------------------------- /notebooks/img/google.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/google.png -------------------------------------------------------------------------------- /notebooks/img/google_assistant.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/google_assistant.png -------------------------------------------------------------------------------- /notebooks/img/image_captioning.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/image_captioning.jpeg -------------------------------------------------------------------------------- /notebooks/img/kmeans_convergence.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/kmeans_convergence.gif -------------------------------------------------------------------------------- /notebooks/img/l1-vs-l2-annotated.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/l1-vs-l2-annotated.png -------------------------------------------------------------------------------- /notebooks/img/l1-vs-l2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/l1-vs-l2.png -------------------------------------------------------------------------------- /notebooks/img/learning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/learning.png -------------------------------------------------------------------------------- /notebooks/img/learning_curve1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/learning_curve1.png -------------------------------------------------------------------------------- /notebooks/img/learning_curve2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/learning_curve2.png -------------------------------------------------------------------------------- /notebooks/img/learning_curve3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/learning_curve3.png -------------------------------------------------------------------------------- /notebooks/img/learning_curve4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/learning_curve4.png -------------------------------------------------------------------------------- /notebooks/img/learning_curve5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/learning_curve5.png -------------------------------------------------------------------------------- /notebooks/img/learning_curve6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/learning_curve6.png -------------------------------------------------------------------------------- /notebooks/img/learning_curve7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/learning_curve7.png -------------------------------------------------------------------------------- /notebooks/img/learning_curve8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/learning_curve8.png -------------------------------------------------------------------------------- /notebooks/img/life3.0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/life3.0.jpg -------------------------------------------------------------------------------- /notebooks/img/loss_curve1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/loss_curve1.png -------------------------------------------------------------------------------- /notebooks/img/loss_curve2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/loss_curve2.png -------------------------------------------------------------------------------- /notebooks/img/loss_curve3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/loss_curve3.png -------------------------------------------------------------------------------- /notebooks/img/loss_curve4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/loss_curve4.png -------------------------------------------------------------------------------- /notebooks/img/margin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/margin.png -------------------------------------------------------------------------------- /notebooks/img/mogdata.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/mogdata.png -------------------------------------------------------------------------------- /notebooks/img/mogdata2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/mogdata2.png -------------------------------------------------------------------------------- /notebooks/img/mogdata2_v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/mogdata2_v2.png -------------------------------------------------------------------------------- /notebooks/img/mogdata_v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/mogdata_v2.png -------------------------------------------------------------------------------- /notebooks/img/mogdensity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/mogdensity.png -------------------------------------------------------------------------------- /notebooks/img/mogdensity1d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/mogdensity1d.png -------------------------------------------------------------------------------- /notebooks/img/mogdensity1d_v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/mogdensity1d_v2.png -------------------------------------------------------------------------------- /notebooks/img/mogdensity_v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/mogdensity_v2.png -------------------------------------------------------------------------------- /notebooks/img/mogdensity_v3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/mogdensity_v3.png -------------------------------------------------------------------------------- /notebooks/img/nns/aneuron.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/aneuron.jpeg -------------------------------------------------------------------------------- /notebooks/img/nns/backprop/backprop-abstract-figure0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-abstract-figure0.png -------------------------------------------------------------------------------- /notebooks/img/nns/backprop/backprop-abstract-figure1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-abstract-figure1.png -------------------------------------------------------------------------------- /notebooks/img/nns/backprop/backprop-abstract-figure2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-abstract-figure2.png -------------------------------------------------------------------------------- /notebooks/img/nns/backprop/backprop-abstract-figure3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-abstract-figure3.png -------------------------------------------------------------------------------- /notebooks/img/nns/backprop/backprop-abstract-figure4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-abstract-figure4.png -------------------------------------------------------------------------------- /notebooks/img/nns/backprop/backprop-abstract-figure5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-abstract-figure5.png -------------------------------------------------------------------------------- /notebooks/img/nns/backprop/backprop-abstract-figure6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-abstract-figure6.png -------------------------------------------------------------------------------- /notebooks/img/nns/backprop/backprop-twolayers-figure0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-twolayers-figure0.png -------------------------------------------------------------------------------- /notebooks/img/nns/backprop/backprop-twolayers-figure1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-twolayers-figure1.png -------------------------------------------------------------------------------- /notebooks/img/nns/backprop/backprop-twolayers-figure10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-twolayers-figure10.png -------------------------------------------------------------------------------- /notebooks/img/nns/backprop/backprop-twolayers-figure11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-twolayers-figure11.png -------------------------------------------------------------------------------- /notebooks/img/nns/backprop/backprop-twolayers-figure2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-twolayers-figure2.png -------------------------------------------------------------------------------- /notebooks/img/nns/backprop/backprop-twolayers-figure3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-twolayers-figure3.png -------------------------------------------------------------------------------- /notebooks/img/nns/backprop/backprop-twolayers-figure4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-twolayers-figure4.png -------------------------------------------------------------------------------- /notebooks/img/nns/backprop/backprop-twolayers-figure5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-twolayers-figure5.png -------------------------------------------------------------------------------- /notebooks/img/nns/backprop/backprop-twolayers-figure6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-twolayers-figure6.png -------------------------------------------------------------------------------- /notebooks/img/nns/backprop/backprop-twolayers-figure7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-twolayers-figure7.png -------------------------------------------------------------------------------- /notebooks/img/nns/backprop/backprop-twolayers-figure8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-twolayers-figure8.png -------------------------------------------------------------------------------- /notebooks/img/nns/backprop/backprop-twolayers-figure9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-twolayers-figure9.png -------------------------------------------------------------------------------- /notebooks/img/nns/backprop/backprop-twolayers-legend.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/backprop/backprop-twolayers-legend.png -------------------------------------------------------------------------------- /notebooks/img/nns/bio-neuron.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/bio-neuron.png -------------------------------------------------------------------------------- /notebooks/img/nns/bp1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/bp1.png -------------------------------------------------------------------------------- /notebooks/img/nns/layers-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/layers-1.png -------------------------------------------------------------------------------- /notebooks/img/nns/layers-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/layers-2.png -------------------------------------------------------------------------------- /notebooks/img/nns/layers-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/layers-3.png -------------------------------------------------------------------------------- /notebooks/img/nns/layers-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/layers-4.png -------------------------------------------------------------------------------- /notebooks/img/nns/layers-L.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/layers-L.png -------------------------------------------------------------------------------- /notebooks/img/nns/net-intro.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/net-intro.png -------------------------------------------------------------------------------- /notebooks/img/nns/nn-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/nns/nn-example.png -------------------------------------------------------------------------------- /notebooks/img/obama.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/obama.jpg -------------------------------------------------------------------------------- /notebooks/img/obama2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/obama2.jpg -------------------------------------------------------------------------------- /notebooks/img/oldfMOG_v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/oldfMOG_v2.png -------------------------------------------------------------------------------- /notebooks/img/oldfSingle_v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/oldfSingle_v2.png -------------------------------------------------------------------------------- /notebooks/img/oldfaithful_v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/oldfaithful_v2.png -------------------------------------------------------------------------------- /notebooks/img/pca_decomposition.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/pca_decomposition.png -------------------------------------------------------------------------------- /notebooks/img/pca_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/pca_example.png -------------------------------------------------------------------------------- /notebooks/img/pca_example_plane.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/pca_example_plane.png -------------------------------------------------------------------------------- /notebooks/img/pca_projection1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/pca_projection1.png -------------------------------------------------------------------------------- /notebooks/img/pca_projection2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/pca_projection2.png -------------------------------------------------------------------------------- /notebooks/img/pca_projection_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/pca_projection_data.png -------------------------------------------------------------------------------- /notebooks/img/pca_reconstruction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/pca_reconstruction.png -------------------------------------------------------------------------------- /notebooks/img/pca_two_views.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/pca_two_views.png -------------------------------------------------------------------------------- /notebooks/img/rl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/rl.png -------------------------------------------------------------------------------- /notebooks/img/siri.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/siri.png -------------------------------------------------------------------------------- /notebooks/img/spam.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/spam.png -------------------------------------------------------------------------------- /notebooks/img/svm-margin-fig.key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/svm-margin-fig.key -------------------------------------------------------------------------------- /notebooks/img/tesla.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/tesla.jpg -------------------------------------------------------------------------------- /notebooks/img/tesla_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/tesla_data.png -------------------------------------------------------------------------------- /notebooks/img/tesla_zoom.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/tesla_zoom.jpg -------------------------------------------------------------------------------- /notebooks/img/waymo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuleshov/cornell-cs5785-2020-applied-ml/30cd86f9de63ff1baf970dcdf631465a897d2087/notebooks/img/waymo.jpg -------------------------------------------------------------------------------- /notebooks/lecture12-neural-networks.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "colab_type": "text", 7 | "id": "i_f5u2x9nn6I", 8 | "slideshow": { 9 | "slide_type": "slide" 10 | } 11 | }, 12 | "source": [ 13 | "\n", 14 | "\n", 15 | "# Lecture 13: Neural Networks\n", 16 | "\n", 17 | "### Applied Machine Learning\n", 18 | "\n", 19 | "__Volodymyr Kuleshov, Jin Sun__
Cornell Tech" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": { 25 | "slideshow": { 26 | "slide_type": "slide" 27 | } 28 | }, 29 | "source": [ 30 | "# Part 1: An Artifical Neuron\n", 31 | "\n", 32 | "In this lecture, we will learn about a new class of machine learning algorithms inspired by the brain.\n", 33 | "\n", 34 | "We will start by defining a few building blocks for these algorithms, and draw connections to neuroscience." 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "# Review: Components of A Supervised Machine Learning Problem\n", 42 | "\n", 43 | "At a high level, a supervised machine learning problem has the following structure:\n", 44 | "\n", 45 | "$$ \\underbrace{\\text{Training Dataset}}_\\text{Attributes + Features} + \\underbrace{\\text{Learning Algorithm}}_\\text{Model Class + Objective + Optimizer } \\to \\text{Predictive Model} $$\n", 46 | "\n", 47 | "Where does the dataset come from?" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": { 53 | "slideshow": { 54 | "slide_type": "slide" 55 | } 56 | }, 57 | "source": [ 58 | "# Review: Binary Classification\n", 59 | "\n", 60 | "In supervised learning, we fit a model of the form\n", 61 | "$$ f : \\mathcal{X} \\to \\mathcal{Y} $$\n", 62 | "that maps inputs $x \\in \\mathcal{X}$ to targets $y \\in \\mathcal{Y}$." 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": { 68 | "slideshow": { 69 | "slide_type": "fragment" 70 | } 71 | }, 72 | "source": [ 73 | "In classification, the space of targets $\\mathcal{Y}$ is *discrete*. Classification is binary if $\\mathcal{Y} = \\{0,1\\}$" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": { 79 | "slideshow": { 80 | "slide_type": "fragment" 81 | } 82 | }, 83 | "source": [ 84 | "Each value of $y$ value is a *class* and we are interested in finding a hyperplane that separates the different classes." 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": { 90 | "slideshow": { 91 | "slide_type": "slide" 92 | } 93 | }, 94 | "source": [ 95 | "# Review: Logistic Regression\n", 96 | "\n", 97 | "Logistic regression fits a model of the form\n", 98 | "$$ f(x) = \\sigma(\\theta^\\top x) = \\frac{1}{1 + \\exp(-\\theta^\\top x)}, $$\n", 99 | "where\n", 100 | "$$ \\sigma(z) = \\frac{1}{1 + \\exp(-z)} $$\n", 101 | "is known as the *sigmoid* or *logistic* function." 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": { 107 | "slideshow": { 108 | "slide_type": "subslide" 109 | } 110 | }, 111 | "source": [ 112 | "Here is how the logistic function looks like." 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 1, 118 | "metadata": { 119 | "slideshow": { 120 | "slide_type": "-" 121 | } 122 | }, 123 | "outputs": [ 124 | { 125 | "data": { 126 | "text/plain": [ 127 | "[]" 128 | ] 129 | }, 130 | "execution_count": 1, 131 | "metadata": {}, 132 | "output_type": "execute_result" 133 | }, 134 | { 135 | "data": { 136 | "image/png": "\n", 137 | "text/plain": [ 138 | "
" 139 | ] 140 | }, 141 | "metadata": { 142 | "needs_background": "light" 143 | }, 144 | "output_type": "display_data" 145 | } 146 | ], 147 | "source": [ 148 | "import numpy as np\n", 149 | "from matplotlib import pyplot as plt\n", 150 | "\n", 151 | "z = np.linspace(-5, 5)\n", 152 | "sigma = 1/(1+np.exp(-z))\n", 153 | "\n", 154 | "plt.plot(z, sigma)" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "# A Biological Neuron\n", 162 | "\n", 163 | "In order to define an artifical neuron, let's look first a biological one.\n", 164 | "\n", 165 | "TODO: PUT NEURON IMAGE FROM HERE: https://cs231n.github.io/neural-networks-1/\n", 166 | "\n", 167 | "* Each neuron receives input signals from its dendrites\n", 168 | "* It produces output signals along its axon, which connects to the dendrites of other neurons." 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": {}, 174 | "source": [ 175 | "# An Artificial Neuron: Example\n", 176 | "\n", 177 | "We can imitate this machinery using an idealized artifical neuron.\n", 178 | "* The neuron receives signals $x_j$ at dendrites, which are modulated multiplicatively: $w_j \\cdot x_j$.\n", 179 | "* The body of the neuron sums the modulated inputs: $\\sum_{j=1}^d w_j \\cdot x_j$.\n", 180 | "* These go into the activation function that produces an ouput.\n", 181 | "\n", 182 | "TODO: PUT ARTIFICIAL NEURON IMAGE FROM HERE: https://cs231n.github.io/neural-networks-1/" 183 | ] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "metadata": {}, 188 | "source": [ 189 | "# An Artificial Neuron: Notation\n", 190 | "\n", 191 | "More formally, we say that a neuron is a model $f : \\mathbb{R}^d \\to [0,1]$, with the following components:\n", 192 | "* Inputs $x_1,x_2,...,x_d$, denoted by a vector $x$.\n", 193 | "* Weight vector $w \\in \\mathbb{R}^d$ that modulates input $x$ as $w^\\top x$.\n", 194 | "* An activation function $\\sigma: \\mathbb{R} \\to \\mathbb{R}$ that computes the output $\\sigma(w^\\top x)$ of the neuron based on the sum of modulated features $w^\\top x$." 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": {}, 200 | "source": [ 201 | "# Logistic Regression as an Artifical Neuron\n", 202 | "\n", 203 | "Logistic regression is a model of the form\n", 204 | "$$ f(x) = \\sigma(\\theta^\\top x) = \\frac{1}{1 + \\exp(-\\theta^\\top x)}, $$\n", 205 | "that can be interpreted as a neuron that uses the *sigmoid* as the activation function." 206 | ] 207 | }, 208 | { 209 | "cell_type": "markdown", 210 | "metadata": {}, 211 | "source": [ 212 | "# Perceptron\n", 213 | "\n", 214 | "Another model of a neuron." 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": {}, 220 | "source": [ 221 | "# Example\n", 222 | "\n", 223 | "Need to implement a small example. Can probably copy-paste implementation of LR from the LR slides." 224 | ] 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "metadata": {}, 229 | "source": [ 230 | "# Activation Functions\n", 231 | "\n", 232 | "Let's list a few." 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": {}, 238 | "source": [ 239 | "\n", 240 | "# Part 2: Artificial Neural Networks\n", 241 | "\n", 242 | "Let's now see how we can connect neurons into networks that form complex models that further mimic the brain." 243 | ] 244 | }, 245 | { 246 | "cell_type": "markdown", 247 | "metadata": {}, 248 | "source": [ 249 | "# Review: Artificial Neuron\n", 250 | "\n", 251 | "We say that a neuron is a model $f : \\mathbb{R}^d \\to [0,1]$, with the following components:\n", 252 | "* Inputs $x_1,x_2,...,x_d$, denoted by a vector $x$.\n", 253 | "* Weight vector $w \\in \\mathbb{R}^d$ that modulates input $x$ as $w^\\top x$.\n", 254 | "* An activation function $\\sigma: \\mathbb{R} \\to \\mathbb{R}$ that computes the output $\\sigma(w^\\top x)$ of the neuron based on the sum of modulated features $w^\\top x$." 255 | ] 256 | }, 257 | { 258 | "cell_type": "markdown", 259 | "metadata": {}, 260 | "source": [ 261 | "# Review: Logistic Regression as Neuron\n", 262 | "\n", 263 | "Logistic regression is a model of the form\n", 264 | "$$ f(x) = \\sigma(\\theta^\\top x) = \\frac{1}{1 + \\exp(-\\theta^\\top x)}, $$\n", 265 | "that can be interpreted as a neuron that uses the *sigmoid* as the activation function." 266 | ] 267 | }, 268 | { 269 | "cell_type": "markdown", 270 | "metadata": {}, 271 | "source": [ 272 | "# Neural Networks: Intuition\n", 273 | "\n", 274 | "A neural network is a directed graph in which a node is a neuron that takes as input the outputs of the neurons that are connected to it.\n", 275 | "\n", 276 | "TODO: Add an image here. Maybe layer image from here: https://cs231n.github.io/neural-networks-1/ (It probably needs some annotations)\n", 277 | "\n", 278 | "Networks are typically organized in layers." 279 | ] 280 | }, 281 | { 282 | "cell_type": "markdown", 283 | "metadata": { 284 | "slideshow": { 285 | "slide_type": "slide" 286 | } 287 | }, 288 | "source": [ 289 | "# Neural Networks: Layers\n", 290 | "\n", 291 | "A neural network layer is a model $f : \\mathbb{R}^d \\to \\mathbb{R}^p$ that applies $p$ neurons ih parallel to an input $x$.\n", 292 | "$$ f(x) = \\begin{bmatrix}\n", 293 | "\\sigma(w_1^\\top x) \\\\\n", 294 | "\\sigma(w_2^\\top x) \\\\\n", 295 | "\\vdots \\\\\n", 296 | "\\sigma(w_p^\\top x)\n", 297 | "\\end{bmatrix}.\n", 298 | "$$\n", 299 | "where each $w_k$ is the vector of weights for the $k$-th neuron. We refer to $p$ as the *size* of the layer." 300 | ] 301 | }, 302 | { 303 | "cell_type": "markdown", 304 | "metadata": { 305 | "slideshow": { 306 | "slide_type": "subslide" 307 | } 308 | }, 309 | "source": [ 310 | "By combining the $w_k$ into one matrix $W$, we can write in a more succinct vectorized form:\n", 311 | "$$f(x) = \\sigma(W\\cdot x) = \\begin{bmatrix}\n", 312 | "\\sigma(w_1^\\top x) \\\\\n", 313 | "\\sigma(w_2^\\top x) \\\\\n", 314 | "\\vdots \\\\\n", 315 | "\\sigma(w_p^\\top x)\n", 316 | "\\end{bmatrix},\n", 317 | "$$\n", 318 | "where $\\sigma(W\\cdot x)_k = \\sigma(w_k^\\top x)$ and $W_{kj} = (w_k)_j$. " 319 | ] 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "metadata": {}, 324 | "source": [ 325 | "# Neural Networks: Notation\n", 326 | "\n", 327 | "A neural network is a model $f : \\mathbb{R} \\to \\mathbb{R}$ that consists of a composition of $L$ neural network layers:\n", 328 | "$$ f(x) = f_L \\circ f_{L-1} \\circ \\ldots f_1 (x). $$\n", 329 | "The final layer $f_L$ has size one (assuming the neural net has one ouput); intermediary layers $f_l$ can have any number of neurons.\n", 330 | "\n", 331 | "The notation $f \\circ g(x)$ denotes the composition $f(g(x))$ of functions" 332 | ] 333 | }, 334 | { 335 | "cell_type": "markdown", 336 | "metadata": {}, 337 | "source": [ 338 | "# Example of a Neural Network\n", 339 | "\n", 340 | "* Let's implement a small neural net in the same that we implemented logistic regression\n", 341 | "* Then we just run it" 342 | ] 343 | }, 344 | { 345 | "cell_type": "markdown", 346 | "metadata": {}, 347 | "source": [ 348 | "# Types of Neural Network Layers\n", 349 | "\n", 350 | "There are many types of neural network layers that can exist. Here are a few:\n", 351 | "* Ouput layer: normally has one neuron and special activation function that depends on the problem\n", 352 | "* Input layer: normally, this is just the input vector $x$.\n", 353 | "* Hidden layer: Any layer between input and output.\n", 354 | "* Dense layer: A layer in which every input is connected ot every neuron.\n", 355 | "* Convolutional layer: A layer in which the operation $w^\\top x$ implements a mathematical [convolution](https://en.wikipedia.org/wiki/Convolution).\n", 356 | "* Anything else?" 357 | ] 358 | }, 359 | { 360 | "cell_type": "markdown", 361 | "metadata": {}, 362 | "source": [ 363 | "# Neuroscience Angle\n", 364 | "\n", 365 | "Annything we should say here?" 366 | ] 367 | }, 368 | { 369 | "cell_type": "markdown", 370 | "metadata": {}, 371 | "source": [ 372 | "\n", 373 | "# Part 3: Backpropagation\n", 374 | "\n", 375 | "We have defined what is an artificial neural network.\n", 376 | "\n", 377 | "Let's not see how we can train it." 378 | ] 379 | }, 380 | { 381 | "cell_type": "markdown", 382 | "metadata": {}, 383 | "source": [ 384 | "# Review: Neural Network Layers\n", 385 | "\n", 386 | "A neural network layer is a model $f : \\mathbb{R}^d \\to \\mathbb{R}^p$ that applies $p$ neurons ih parallel to an input $x$.\n", 387 | "$$f(x) = \\sigma(W\\cdot x) = \\begin{bmatrix}\n", 388 | "\\sigma(w_1^\\top x) \\\\\n", 389 | "\\sigma(w_2^\\top x) \\\\\n", 390 | "\\vdots \\\\\n", 391 | "\\sigma(w_p^\\top x)\n", 392 | "\\end{bmatrix},\n", 393 | "$$\n", 394 | "where each $w_k$ is the vector of weights for the $k$-th neuron and $W_{kj} = (w_k)_j$. We refer to $p$ as the *size* of the layer." 395 | ] 396 | }, 397 | { 398 | "cell_type": "markdown", 399 | "metadata": {}, 400 | "source": [ 401 | "# Review: Neural Networks\n", 402 | "\n", 403 | "A neural network is a model $f : \\mathbb{R} \\to \\mathbb{R}$ that consists of a composition of $L$ neural network layers:\n", 404 | "$$ f(x) = f_L \\circ f_{L-1} \\circ \\ldots f_1 (x). $$\n", 405 | "The final layer $f_L$ has size one (assuming the neural net has one ouput); intermediary layers $f_l$ can have any number of neurons.\n", 406 | "\n", 407 | "The notation $f \\circ g(x)$ denotes the composition $f(g(x))$ of functions\n", 408 | "\n", 409 | "TODO: Add some kind of image from the previous part of the lecture" 410 | ] 411 | }, 412 | { 413 | "cell_type": "markdown", 414 | "metadata": {}, 415 | "source": [ 416 | "# Review: The Gradient\n", 417 | "\n", 418 | "The gradient $\\nabla_\\theta f$ further extends the derivative to multivariate functions $f : \\mathbb{R}^d \\to \\mathbb{R}$, and is defined at a point $\\theta_0$ as\n", 419 | "\n", 420 | "$$ \\nabla_\\theta f (\\theta_0) = \\begin{bmatrix}\n", 421 | "\\frac{\\partial f(\\theta_0)}{\\partial \\theta_1} \\\\\n", 422 | "\\frac{\\partial f(\\theta_0)}{\\partial \\theta_2} \\\\\n", 423 | "\\vdots \\\\\n", 424 | "\\frac{\\partial f(\\theta_0)}{\\partial \\theta_d}\n", 425 | "\\end{bmatrix}.$$\n", 426 | "\n", 427 | "In other words, the $j$-th entry of the vector $\\nabla_\\theta f (\\theta_0)$ is the partial derivative $\\frac{\\partial f(\\theta_0)}{\\partial \\theta_j}$ of $f$ with respect to the $j$-th component of $\\theta$." 428 | ] 429 | }, 430 | { 431 | "cell_type": "markdown", 432 | "metadata": {}, 433 | "source": [ 434 | "# Review: Gradient Descent\n", 435 | "\n", 436 | "If we want to optimize an objective $J(\\theta)$, we start with an initial guess $\\theta_0$ for the parameters and repeat the following update until the function is no longer decreasing:\n", 437 | "$$ \\theta_i := \\theta_{i-1} - \\alpha \\cdot \\nabla_\\theta J(\\theta_{i-1}). $$\n", 438 | "\n", 439 | "As code, this method may look as follows:\n", 440 | "```python\n", 441 | "theta, theta_prev = random_initialization()\n", 442 | "while abs(J(theta) - J(theta_prev)) > conv_threshold:\n", 443 | " theta_prev = theta\n", 444 | " theta = theta_prev - step_size * gradient(theta_prev)\n", 445 | "```" 446 | ] 447 | }, 448 | { 449 | "cell_type": "markdown", 450 | "metadata": {}, 451 | "source": [ 452 | "# Backpropagation\n", 453 | "\n", 454 | "How do we apply gradient descent to a neural network?\n", 455 | "\n", 456 | "Explain backpropgation" 457 | ] 458 | }, 459 | { 460 | "cell_type": "markdown", 461 | "metadata": {}, 462 | "source": [ 463 | "# Review: Chain Rule of Calculus\n", 464 | "\n", 465 | "Probably will need to review this at some point." 466 | ] 467 | }, 468 | { 469 | "cell_type": "markdown", 470 | "metadata": {}, 471 | "source": [ 472 | "# Example\n", 473 | "\n", 474 | "Let's implement backprop with the simple NN model we had earlier." 475 | ] 476 | }, 477 | { 478 | "cell_type": "markdown", 479 | "metadata": {}, 480 | "source": [ 481 | "\n", 482 | "# Part 4: Stochastic Gradient Descent\n", 483 | "\n", 484 | "In practice, neural networks are often trained on very large datasets.\n", 485 | "\n", 486 | "This requires a mosification to the gradient descent algorithm that we have seen earlier." 487 | ] 488 | }, 489 | { 490 | "cell_type": "markdown", 491 | "metadata": {}, 492 | "source": [ 493 | "Volodymyr will create this section" 494 | ] 495 | }, 496 | { 497 | "cell_type": "code", 498 | "execution_count": null, 499 | "metadata": {}, 500 | "outputs": [], 501 | "source": [] 502 | } 503 | ], 504 | "metadata": { 505 | "accelerator": "GPU", 506 | "celltoolbar": "Slideshow", 507 | "colab": { 508 | "collapsed_sections": [], 509 | "name": "neural-ode.ipynb", 510 | "provenance": [] 511 | }, 512 | "kernelspec": { 513 | "display_name": "aml", 514 | "language": "python", 515 | "name": "aml" 516 | }, 517 | "language_info": { 518 | "codemirror_mode": { 519 | "name": "ipython", 520 | "version": 3 521 | }, 522 | "file_extension": ".py", 523 | "mimetype": "text/x-python", 524 | "name": "python", 525 | "nbconvert_exporter": "python", 526 | "pygments_lexer": "ipython3", 527 | "version": "3.6.7" 528 | }, 529 | "rise": { 530 | "controlsTutorial": false, 531 | "height": 900, 532 | "help": false, 533 | "margin": 0, 534 | "maxScale": 2, 535 | "minScale": 0.2, 536 | "progress": true, 537 | "scroll": true, 538 | "theme": "simple", 539 | "width": 1200 540 | } 541 | }, 542 | "nbformat": 4, 543 | "nbformat_minor": 1 544 | } 545 | -------------------------------------------------------------------------------- /notebooks/lecture23-overview.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "colab_type": "text", 7 | "id": "i_f5u2x9nn6I", 8 | "slideshow": { 9 | "slide_type": "slide" 10 | } 11 | }, 12 | "source": [ 13 | "\n", 14 | "\n", 15 | "# Lecture 23: Course Overview\n", 16 | "\n", 17 | "### Applied Machine Learning\n", 18 | "\n", 19 | "__Volodymyr Kuleshov__
Cornell Tech" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": { 25 | "slideshow": { 26 | "slide_type": "slide" 27 | } 28 | }, 29 | "source": [ 30 | "# Congratulations on Finishing Applied Machine Learning!\n", 31 | "\n", 32 | "You have made it! This is our last machine learning lecture, in which we will do an overview of the diffrent algorithms seen in the course." 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": { 38 | "slideshow": { 39 | "slide_type": "slide" 40 | } 41 | }, 42 | "source": [ 43 | "# A Map of Applied Machine Learning\n", 44 | "\n", 45 | "We will go through the following map of algorithms from the course.\n", 46 | "
" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": { 52 | "slideshow": { 53 | "slide_type": "slide" 54 | } 55 | }, 56 | "source": [ 57 | "# Supervised Machine Learning\n", 58 | "\n", 59 | "At a high level, a supervised machine learning problem has the following structure:\n", 60 | "\n", 61 | "$$ \\underbrace{\\text{Dataset}}_\\text{Features, Attributes} + \\underbrace{\\text{Learning Algorithm}}_\\text{Model Class + Objective + Optimizer} \\to \\text{Predictive Model} $$\n", 62 | "\n", 63 | "The predictive model is chosen to model the relationship between inputs and targets. For instance, it can predict future targets." 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": { 69 | "slideshow": { 70 | "slide_type": "slide" 71 | } 72 | }, 73 | "source": [ 74 | "# Linear Regression\n", 75 | "\n", 76 | "In linear regression, we fit a model\n", 77 | "$$ f_\\theta(x) := \\theta^\\top \\phi(x) $$\n", 78 | "that is linear in $\\theta$. \n", 79 | "\n", 80 | "The features $\\phi(x) : \\mathbb{R} \\to \\mathbb{R}^p$ are non-linear may non-linear in $x$ (e.g., polynomial features), allowing us to fit complex functions." 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": { 86 | "slideshow": { 87 | "slide_type": "subslide" 88 | } 89 | }, 90 | "source": [ 91 | "
" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": { 97 | "slideshow": { 98 | "slide_type": "slide" 99 | } 100 | }, 101 | "source": [ 102 | "# Overfitting\n", 103 | "\n", 104 | "Overfitting is one of the most common failure modes of machine learning.\n", 105 | "* A very expressive model (a high degree polynomial) fits the training dataset perfectly.\n", 106 | "* The model also makes wildly incorrect prediction outside this dataset, and doesn't generalize." 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": { 112 | "slideshow": { 113 | "slide_type": "slide" 114 | } 115 | }, 116 | "source": [ 117 | "# Regularization\n", 118 | "\n", 119 | "The idea of regularization is to penalize complex models that may overfit the data." 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": { 125 | "slideshow": { 126 | "slide_type": "fragment" 127 | } 128 | }, 129 | "source": [ 130 | "Regularized least squares optimizes the following objective (__Ridge__).\n", 131 | "$$ J(\\theta) = \\frac{1}{2n} \\sum_{i=1}^n \\left( y^{(i)} - \\theta^\\top \\phi(x^{(i)}) \\right)^2 + \\frac{\\lambda}{2} \\cdot ||\\theta||_2^2. $$\n", 132 | "If we use the L1 norm, we have the __LASSO__." 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": { 138 | "slideshow": { 139 | "slide_type": "subslide" 140 | } 141 | }, 142 | "source": [ 143 | "
" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": { 149 | "slideshow": { 150 | "slide_type": "slide" 151 | } 152 | }, 153 | "source": [ 154 | "# Regression vs. Classification\n", 155 | "\n", 156 | "Consider a training dataset $\\mathcal{D} = \\{(x^{(1)}, y^{(1)}), (x^{(2)}, y^{(2)}), \\ldots, (x^{(n)}, y^{(n)})\\}$.\n", 157 | "\n", 158 | "We distinguish between two types of supervised learning problems depnding on the targets $y^{(i)}$. \n", 159 | "\n", 160 | "1. __Regression__: The target variable $y \\in \\mathcal{Y}$ is continuous: $\\mathcal{Y} \\subseteq \\mathbb{R}$.\n", 161 | "2. __Classification__: The target variable $y$ is discrete and takes on one of $K$ possible values: $\\mathcal{Y} = \\{y_1, y_2, \\ldots y_K\\}$. Each discrete value corresponds to a *class* that we want to predict." 162 | ] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": { 167 | "slideshow": { 168 | "slide_type": "subslide" 169 | } 170 | }, 171 | "source": [ 172 | "
" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": { 178 | "slideshow": { 179 | "slide_type": "slide" 180 | } 181 | }, 182 | "source": [ 183 | "# Parametric vs. Non-Parametric Models\n", 184 | "\n", 185 | "Nearest neighbors is an example of a *non-parametric* model.\n", 186 | "* A parametric model $f_\\theta(x) : \\mathcal{X} \\times \\Theta \\to \\mathcal{Y}$ is defined by a finite set of parameters $\\theta \\in \\Theta$ whose dimensionality is constant with respect to the dataset" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": { 192 | "slideshow": { 193 | "slide_type": "fragment" 194 | } 195 | }, 196 | "source": [ 197 | "* In a non-parametric model, the function $f$ uses the entire training dataset to make predictions, and the complexity of the model increases with dataset size." 198 | ] 199 | }, 200 | { 201 | "cell_type": "markdown", 202 | "metadata": { 203 | "slideshow": { 204 | "slide_type": "subslide" 205 | } 206 | }, 207 | "source": [ 208 | "* Non-parametric models have the advantage of not loosing any information at training time. \n", 209 | "* However, they are also computationally less tractable and may easily overfit the training set." 210 | ] 211 | }, 212 | { 213 | "cell_type": "markdown", 214 | "metadata": { 215 | "slideshow": { 216 | "slide_type": "slide" 217 | } 218 | }, 219 | "source": [ 220 | "# Probabilistic vs. Non-Probabilistic Models\n", 221 | "\n", 222 | "A probabilistic model is a probability distribution\n", 223 | "$$P(x,y) : \\mathcal{X} \\times \\mathcal{Y} \\to [0,1].$$\n", 224 | "This model can approximate the data distribution $P_\\text{data}(x,y)$.\n", 225 | "\n", 226 | "If we know $P(x,y)$, we can use the conditional $P(y|x)$ for prediction." 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": { 232 | "slideshow": { 233 | "slide_type": "slide" 234 | } 235 | }, 236 | "source": [ 237 | "# Maximum Likelihood Learning\n", 238 | "\n", 239 | "Maximum likelihood is an objective that can be used to fit any probabilistic model:\n", 240 | "$$ \\theta_\\text{MLE} = \\arg\\max_\\theta \\mathbb{E}_{x, y \\sim \\mathbb{P}_\\text{data}} \\log P(x, y; \\theta). $$\n", 241 | "It minimizes the KL divergence between the model and data distributions:\n", 242 | "$$\\theta_\\text{MLE} = \\arg\\min_\\theta \\text{KL}(P_\\text{data} \\mid\\mid P_\\theta).$$" 243 | ] 244 | }, 245 | { 246 | "cell_type": "markdown", 247 | "metadata": { 248 | "slideshow": { 249 | "slide_type": "slide" 250 | } 251 | }, 252 | "source": [ 253 | "# Discriminative vs. Generative Models\n", 254 | "\n", 255 | "There are two types of probabilistic models: *generative* and *discriminative*.\n", 256 | "\\begin{align*}\n", 257 | "\\underbrace{P_\\theta(x,y) : \\mathcal{X} \\times \\mathcal{Y} \\to [0,1]}_\\text{generative model} & \\;\\; & \\underbrace{P_\\theta(y|x) : \\mathcal{X} \\times \\mathcal{Y} \\to [0,1]}_\\text{discriminative model}\n", 258 | "\\end{align*}\n", 259 | "\n", 260 | "We can obtain predictions from generative models via $\\max_y P_\\theta(x,y)$." 261 | ] 262 | }, 263 | { 264 | "cell_type": "markdown", 265 | "metadata": { 266 | "slideshow": { 267 | "slide_type": "subslide" 268 | } 269 | }, 270 | "source": [ 271 | "
" 272 | ] 273 | }, 274 | { 275 | "cell_type": "markdown", 276 | "metadata": { 277 | "slideshow": { 278 | "slide_type": "slide" 279 | } 280 | }, 281 | "source": [ 282 | "# The Max-Margin Principle\n", 283 | "\n", 284 | "Intuitively, we want to select linear decision boundaries with high *margin*. \n", 285 | "\n", 286 | "This means that we are as confident as possible for every point and we are as far as possible from the decision boundary." 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": 2, 292 | "metadata": { 293 | "slideshow": { 294 | "slide_type": "subslide" 295 | } 296 | }, 297 | "outputs": [ 298 | { 299 | "data": { 300 | "text/html": [ 301 | "
\n", 302 | "\n", 315 | "\n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | "
sepal length (cm)sepal width (cm)petal length (cm)petal width (cm)target
05.13.51.40.2-1
45.03.61.40.2-1
84.42.91.40.2-1
124.83.01.40.1-1
165.43.91.30.4-1
\n", 369 | "
" 370 | ], 371 | "text/plain": [ 372 | " sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \\\n", 373 | "0 5.1 3.5 1.4 0.2 \n", 374 | "4 5.0 3.6 1.4 0.2 \n", 375 | "8 4.4 2.9 1.4 0.2 \n", 376 | "12 4.8 3.0 1.4 0.1 \n", 377 | "16 5.4 3.9 1.3 0.4 \n", 378 | "\n", 379 | " target \n", 380 | "0 -1 \n", 381 | "4 -1 \n", 382 | "8 -1 \n", 383 | "12 -1 \n", 384 | "16 -1 " 385 | ] 386 | }, 387 | "execution_count": 2, 388 | "metadata": {}, 389 | "output_type": "execute_result" 390 | } 391 | ], 392 | "source": [ 393 | "import numpy as np\n", 394 | "import pandas as pd\n", 395 | "from sklearn import datasets\n", 396 | "\n", 397 | "# Load the Iris dataset\n", 398 | "iris = datasets.load_iris(as_frame=True)\n", 399 | "iris_X, iris_y = iris.data, iris.target\n", 400 | "\n", 401 | "# subsample to a third of the data points\n", 402 | "iris_X = iris_X.loc[::4]\n", 403 | "iris_y = iris_y.loc[::4]\n", 404 | "\n", 405 | "# create a binary classification dataset with labels +/- 1\n", 406 | "iris_y2 = iris_y.copy()\n", 407 | "iris_y2[iris_y2==2] = 1\n", 408 | "iris_y2[iris_y2==0] = -1\n", 409 | "\n", 410 | "# print part of the dataset\n", 411 | "pd.concat([iris_X, iris_y2], axis=1).head()" 412 | ] 413 | }, 414 | { 415 | "cell_type": "code", 416 | "execution_count": 3, 417 | "metadata": { 418 | "slideshow": { 419 | "slide_type": "subslide" 420 | } 421 | }, 422 | "outputs": [ 423 | { 424 | "data": { 425 | "text/plain": [ 426 | "" 427 | ] 428 | }, 429 | "execution_count": 3, 430 | "metadata": {}, 431 | "output_type": "execute_result" 432 | }, 433 | { 434 | "data": { 435 | "image/png": "\n", 436 | "text/plain": [ 437 | "
" 438 | ] 439 | }, 440 | "metadata": { 441 | "needs_background": "light" 442 | }, 443 | "output_type": "display_data" 444 | } 445 | ], 446 | "source": [ 447 | "# https://scikit-learn.org/stable/auto_examples/neighbors/plot_classification.html\n", 448 | "%matplotlib inline\n", 449 | "import matplotlib.pyplot as plt\n", 450 | "plt.rcParams['figure.figsize'] = [12, 4]\n", 451 | "import warnings\n", 452 | "warnings.filterwarnings(\"ignore\")\n", 453 | "\n", 454 | "# create 2d version of dataset and subsample it\n", 455 | "X = iris_X.to_numpy()[:,:2]\n", 456 | "x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5\n", 457 | "y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5\n", 458 | "xx, yy = np.meshgrid(np.arange(x_min, x_max, .02), np.arange(y_min, y_max, .02))\n", 459 | "\n", 460 | "# Plot also the training points\n", 461 | "p1 = plt.scatter(X[:, 0], X[:, 1], c=iris_y2, s=60, cmap=plt.cm.Paired)\n", 462 | "plt.xlabel('Petal Length')\n", 463 | "plt.ylabel('Petal Width')\n", 464 | "plt.legend(handles=p1.legend_elements()[0], labels=['Setosa', 'Not Setosa'], loc='lower right')" 465 | ] 466 | }, 467 | { 468 | "cell_type": "code", 469 | "execution_count": 4, 470 | "metadata": { 471 | "slideshow": { 472 | "slide_type": "subslide" 473 | } 474 | }, 475 | "outputs": [ 476 | { 477 | "data": { 478 | "image/png": "\n", 479 | "text/plain": [ 480 | "
" 481 | ] 482 | }, 483 | "metadata": { 484 | "needs_background": "light" 485 | }, 486 | "output_type": "display_data" 487 | } 488 | ], 489 | "source": [ 490 | "from sklearn.linear_model import Perceptron, RidgeClassifier\n", 491 | "from sklearn.svm import SVC\n", 492 | "models = [SVC(kernel='linear', C=10000), Perceptron(), RidgeClassifier()]\n", 493 | "\n", 494 | "def fit_and_create_boundary(model):\n", 495 | " model.fit(X, iris_y2)\n", 496 | " Z = model.predict(np.c_[xx.ravel(), yy.ravel()])\n", 497 | " Z = Z.reshape(xx.shape)\n", 498 | " return Z\n", 499 | "\n", 500 | "plt.figure(figsize=(12,3))\n", 501 | "for i, model in enumerate(models):\n", 502 | " plt.subplot('13%d' % (i+1))\n", 503 | " Z = fit_and_create_boundary(model)\n", 504 | " plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired) \n", 505 | "\n", 506 | " # Plot also the training points\n", 507 | " plt.scatter(X[:, 0], X[:, 1], c=iris_y2, edgecolors='k', cmap=plt.cm.Paired)\n", 508 | " if i == 0:\n", 509 | " plt.title('Good Margin')\n", 510 | " else:\n", 511 | " plt.title('Bad Margin')\n", 512 | " plt.xlabel('Sepal length')\n", 513 | " plt.ylabel('Sepal width')\n", 514 | "\n", 515 | "plt.show()" 516 | ] 517 | }, 518 | { 519 | "cell_type": "markdown", 520 | "metadata": { 521 | "slideshow": { 522 | "slide_type": "subslide" 523 | } 524 | }, 525 | "source": [ 526 | "
" 527 | ] 528 | }, 529 | { 530 | "cell_type": "markdown", 531 | "metadata": { 532 | "slideshow": { 533 | "slide_type": "slide" 534 | } 535 | }, 536 | "source": [ 537 | "# The Kernel Trick\n", 538 | "\n", 539 | "Many algorithms in machine learning only involve dot products $\\phi(x)^\\top \\phi(z)$ but not the features $\\phi$ themselves." 540 | ] 541 | }, 542 | { 543 | "cell_type": "markdown", 544 | "metadata": { 545 | "slideshow": { 546 | "slide_type": "fragment" 547 | } 548 | }, 549 | "source": [ 550 | "We can often compute $\\phi(x)^\\top \\phi(z)$ very efficiently for complex $\\phi$ using a kernel function $K(x,z) = \\phi(x)^\\top \\phi(z)$. This is the __kernel trick__." 551 | ] 552 | }, 553 | { 554 | "cell_type": "markdown", 555 | "metadata": { 556 | "slideshow": { 557 | "slide_type": "subslide" 558 | } 559 | }, 560 | "source": [ 561 | "
" 562 | ] 563 | }, 564 | { 565 | "cell_type": "markdown", 566 | "metadata": { 567 | "slideshow": { 568 | "slide_type": "slide" 569 | } 570 | }, 571 | "source": [ 572 | "# Tree-Based Models\n", 573 | "\n", 574 | "Decision trees output target based on a tree of human-interpretable decision rules.\n", 575 | "* __Random forests__ combine large trees using *bagging* to reduce overfitting.\n", 576 | "* __Boosted trees__ combine small trees to reduce underfitting." 577 | ] 578 | }, 579 | { 580 | "cell_type": "markdown", 581 | "metadata": { 582 | "slideshow": { 583 | "slide_type": "subslide" 584 | } 585 | }, 586 | "source": [ 587 | "
" 588 | ] 589 | }, 590 | { 591 | "cell_type": "markdown", 592 | "metadata": { 593 | "slideshow": { 594 | "slide_type": "slide" 595 | } 596 | }, 597 | "source": [ 598 | "# Neural Networks\n", 599 | "\n", 600 | "Neural network models are inspired by the brain.\n", 601 | "* A Perceptron is an artificial model of a neuron.\n", 602 | "* MLP stack multiple layers of artifical neurons.\n", 603 | "* ConvNets tie the weights of neighboring neurons into receptive fields that implement the convolution operation." 604 | ] 605 | }, 606 | { 607 | "cell_type": "markdown", 608 | "metadata": { 609 | "slideshow": { 610 | "slide_type": "subslide" 611 | } 612 | }, 613 | "source": [ 614 | "
" 615 | ] 616 | }, 617 | { 618 | "cell_type": "markdown", 619 | "metadata": { 620 | "slideshow": { 621 | "slide_type": "slide" 622 | } 623 | }, 624 | "source": [ 625 | "# Unsupervised Learning\n", 626 | "\n", 627 | "We have a dataset *without* labels. Our goal is to learn something interesting about the structure of the data:\n", 628 | "* __Clusters__ hidden in the dataset.\n", 629 | "* A __low-dimensional representation__ of the data.\n", 630 | "* Recover the __probability density__ that generated the data." 631 | ] 632 | }, 633 | { 634 | "cell_type": "markdown", 635 | "metadata": { 636 | "slideshow": { 637 | "slide_type": "subslide" 638 | } 639 | }, 640 | "source": [ 641 | "
" 642 | ] 643 | }, 644 | { 645 | "cell_type": "markdown", 646 | "metadata": { 647 | "slideshow": { 648 | "slide_type": "subslide" 649 | } 650 | }, 651 | "source": [ 652 | "
" 653 | ] 654 | }, 655 | { 656 | "cell_type": "markdown", 657 | "metadata": { 658 | "slideshow": { 659 | "slide_type": "subslide" 660 | } 661 | }, 662 | "source": [ 663 | "
" 664 | ] 665 | }, 666 | { 667 | "cell_type": "markdown", 668 | "metadata": { 669 | "slideshow": { 670 | "slide_type": "slide" 671 | } 672 | }, 673 | "source": [ 674 | "# How To Decide Which Algorithm to Use\n", 675 | "\n", 676 | "One factor is how much data you have. In the __small data__ (<10,000) regime, consider:\n", 677 | "* Linear models with hand-crafted features (LASSO, LR, NB, SVMs)\n", 678 | "* Kernel methods often work best (e.g., SVM + RBF kernel)\n", 679 | "* Non-parametric methods (kernels, nearest neighbors) are also powerful" 680 | ] 681 | }, 682 | { 683 | "cell_type": "markdown", 684 | "metadata": { 685 | "slideshow": { 686 | "slide_type": "subslide" 687 | } 688 | }, 689 | "source": [ 690 | "In the __big data__ regime,\n", 691 | "* If using \"high-level\" features, gradient boosted trees are state-of-the-art\n", 692 | "* When using \"low-level\" representations (images, sound signals), neural networks work best\n", 693 | "* Linear models with good features are also good and reliable" 694 | ] 695 | }, 696 | { 697 | "cell_type": "markdown", 698 | "metadata": { 699 | "slideshow": { 700 | "slide_type": "subslide" 701 | } 702 | }, 703 | "source": [ 704 | "Some additional advice:\n", 705 | "* If interpretability matters, use decision trees or LASSO.\n", 706 | "* When uncertainty estimates are important use probabilistic methods.\n", 707 | "* If you know the data generating process, use generative models." 708 | ] 709 | }, 710 | { 711 | "cell_type": "markdown", 712 | "metadata": { 713 | "slideshow": { 714 | "slide_type": "slide" 715 | } 716 | }, 717 | "source": [ 718 | "# What's Next? Ideas for Courses\n", 719 | "\n", 720 | "Consider the following courses to keep learning about ML:\n", 721 | "* Graduate courses in the Spring semester at Cornell (generative models, NLP, etc.)\n", 722 | "* Masters courses: Deep Learning Clinic, ML Engineering, Data Science, etc.\n", 723 | "* Online courses, e.g. Full Stack Deep Learning" 724 | ] 725 | }, 726 | { 727 | "cell_type": "markdown", 728 | "metadata": { 729 | "slideshow": { 730 | "slide_type": "slide" 731 | } 732 | }, 733 | "source": [ 734 | "# What's Next? Ideas for Research\n", 735 | "\n", 736 | "In order to get involved in research, I recommend:\n", 737 | "* Contacting research groups at Cornell for openings\n", 738 | "* Watching online ML tutorials, e.g. NeurIPS\n", 739 | "* Reading and implementing ML papers on your own" 740 | ] 741 | }, 742 | { 743 | "cell_type": "markdown", 744 | "metadata": { 745 | "slideshow": { 746 | "slide_type": "slide" 747 | } 748 | }, 749 | "source": [ 750 | "# What's Next? Ideas for Industry Projects\n", 751 | "\n", 752 | "Finally, a few ideas for how to get more practice applying ML in the real world:\n", 753 | "* Participate in Kaggle competitions and review solutions\n", 754 | "* Build an open-source project that you like and host it on Github" 755 | ] 756 | }, 757 | { 758 | "cell_type": "markdown", 759 | "metadata": { 760 | "slideshow": { 761 | "slide_type": "slide" 762 | } 763 | }, 764 | "source": [ 765 | "# Thank You For Taking Applied Machine Learning 2020!\n", 766 | "\n", 767 | "" 768 | ] 769 | } 770 | ], 771 | "metadata": { 772 | "accelerator": "GPU", 773 | "celltoolbar": "Slideshow", 774 | "colab": { 775 | "collapsed_sections": [], 776 | "name": "neural-ode.ipynb", 777 | "provenance": [] 778 | }, 779 | "kernelspec": { 780 | "display_name": "aml", 781 | "language": "python", 782 | "name": "aml" 783 | }, 784 | "language_info": { 785 | "codemirror_mode": { 786 | "name": "ipython", 787 | "version": 3 788 | }, 789 | "file_extension": ".py", 790 | "mimetype": "text/x-python", 791 | "name": "python", 792 | "nbconvert_exporter": "python", 793 | "pygments_lexer": "ipython3", 794 | "version": "3.6.7" 795 | }, 796 | "rise": { 797 | "controlsTutorial": false, 798 | "height": 900, 799 | "help": false, 800 | "margin": 0, 801 | "maxScale": 2, 802 | "minScale": 0.2, 803 | "progress": true, 804 | "scroll": true, 805 | "theme": "simple", 806 | "width": 1200 807 | } 808 | }, 809 | "nbformat": 4, 810 | "nbformat_minor": 1 811 | } 812 | -------------------------------------------------------------------------------- /notebooks/lecture5b-probabilistic-perspectives.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "colab_type": "text", 7 | "id": "i_f5u2x9nn6I", 8 | "slideshow": { 9 | "slide_type": "slide" 10 | } 11 | }, 12 | "source": [ 13 | "\n", 14 | "\n", 15 | "# Lecture 5b: Probabilistic Perspectives on ML Algorithms\n", 16 | "\n", 17 | "### Applied Machine Learning\n", 18 | "\n", 19 | "__Volodymyr Kuleshov__
Cornell Tech" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": { 25 | "slideshow": { 26 | "slide_type": "slide" 27 | } 28 | }, 29 | "source": [ 30 | "# Part 1: Probabilistic Linear Regression\n", 31 | "\n", 32 | "Previously, we derived *maximum likelihood learning* as a general way of learning machine models.\n", 33 | "\n", 34 | "We will now seehow the algorithms we've seen so far are special cases of this principle." 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": { 40 | "slideshow": { 41 | "slide_type": "slide" 42 | } 43 | }, 44 | "source": [ 45 | "# Review: Probabilistic Models\n", 46 | "\n", 47 | "A probabilistic model is a probability distribution\n", 48 | "$$P(x,y) : \\mathcal{X} \\times \\mathcal{Y} \\to [0,1].$$\n", 49 | "This model can approximate the data distribution $P_\\text{data}(x,y)$." 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": { 55 | "slideshow": { 56 | "slide_type": "fragment" 57 | } 58 | }, 59 | "source": [ 60 | "If we know $P(x,y)$, we can use the conditional $P(y|x)$ for prediction." 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": { 66 | "slideshow": { 67 | "slide_type": "fragment" 68 | } 69 | }, 70 | "source": [ 71 | "Probabilistic models may also have *parameters* $\\theta \\in \\Theta$, which we denote as\n", 72 | "$$P_\\theta(x,y) : \\mathcal{X} \\times \\mathcal{Y} \\to [0,1].$$" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": { 78 | "slideshow": { 79 | "slide_type": "slide" 80 | } 81 | }, 82 | "source": [ 83 | "# Review: Conditional Maximum Likelihood\n", 84 | "\n", 85 | "A general approach of optimizing conditional models of the form $P_\\theta(y|x)$ is by minimizing expected KL divergence with respect to the data distribution:\n", 86 | "$$ \\min_\\theta \\mathbb{E}_{x \\sim \\mathbb{P}_\\text{data}} \\left[ D(P_\\text{data}(y|x) \\mid\\mid P_\\theta(y|x)) \\right]. $$" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": { 92 | "slideshow": { 93 | "slide_type": "fragment" 94 | } 95 | }, 96 | "source": [ 97 | "With a bit of math, we can show that the maximum likelihood objective becomes\n", 98 | "$$ \\max_\\theta \\mathbb{E}_{x, y \\sim \\mathbb{P}_\\text{data}} \\log P_\\theta(y|x). $$\n", 99 | "This is the principle of *conditional maximum likelihood*." 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": { 105 | "slideshow": { 106 | "slide_type": "slide" 107 | } 108 | }, 109 | "source": [ 110 | "# Review: Least Squares\n", 111 | "\n", 112 | "Recall that the linear regression algorithm fits a linear model of the form\n", 113 | "$$ f(x) = \\sum_{j=0}^d \\theta_j \\cdot x_j = \\theta^\\top x. $$" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": { 119 | "slideshow": { 120 | "slide_type": "fragment" 121 | } 122 | }, 123 | "source": [ 124 | "It minimizes the mean squared error (MSE)\n", 125 | "$$J(\\theta)= \\frac{1}{2n} \\sum_{i=1}^n(y^{(i)}-\\theta^\\top x^{(i)})^2$$\n", 126 | "on a dataset $\\{(x^{(1)}, y^{(1)}), (x^{(2)}, y^{(2)}), \\ldots, (x^{(n)}, y^{(n)})\\}$." 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": { 132 | "slideshow": { 133 | "slide_type": "subslide" 134 | } 135 | }, 136 | "source": [ 137 | "Is there a specific reason for us to be optimizing the mean squared error to fit our linear model?\n", 138 | "\n", 139 | "The answer to this can be found by looking at the algorithm from a probabilistic perspective." 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": { 145 | "slideshow": { 146 | "slide_type": "slide" 147 | } 148 | }, 149 | "source": [ 150 | "# Probabilistic Least Squares\n", 151 | "\n", 152 | "Let's derive a probabilistic algorithm by defining a class of probabilistic models and use maximum likelihood as our objective." 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": { 158 | "slideshow": { 159 | "slide_type": "fragment" 160 | } 161 | }, 162 | "source": [ 163 | "1. Let's choose our model family $\\mathcal{M}$ to be the set of Gaussian distributions of the form\n", 164 | "$$ p(y | x; \\theta) = \\frac{1}{\\sqrt{2\\pi}\\sigma} \\exp\\left( -\\frac{(y - \\theta^\\top x)^2}{2 \\sigma^2} \\right).$$\n", 165 | "Each model $\\mathcal{N}(y; \\mu(x), \\sigma)$ is a Gaussian with a standard deviation $\\sigma$ of one and a mean of $\\mu(x) = \\theta^\\top x$ that is parametrized by the parameters $\\theta$." 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": { 171 | "slideshow": { 172 | "slide_type": "subslide" 173 | } 174 | }, 175 | "source": [ 176 | "2. We optimize the model using maximum likelihood. The log-likelihood function at a point $(x,y)$ equals\n", 177 | "\\begin{align*}\n", 178 | "\\log L(\\theta) = \\log p(y | x; \\theta) & = \\log \\frac{1}{\\sqrt{2\\pi}\\sigma} \\exp\\left( -\\frac{(y - \\theta^\\top x)^2}{2 \\sigma^2} \\right) \\\\\n", 179 | "& = -\\frac{(y - \\theta^\\top x)^2}{2 \\sigma^2} + \\text{const.}\n", 180 | "\\end{align*}" 181 | ] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "metadata": { 186 | "slideshow": { 187 | "slide_type": "fragment" 188 | } 189 | }, 190 | "source": [ 191 | "Note how this is a mean squared error (MSE) objective! \n", 192 | "\n", 193 | "Thus, minimizing MSE is equivalent to maximizing the log-likelihood of a Normal distribution $\\mathcal{N}(y; \\mu(x), \\sigma)$." 194 | ] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "metadata": { 199 | "slideshow": { 200 | "slide_type": "slide" 201 | } 202 | }, 203 | "source": [ 204 | "# Algorithm: Gaussian Ordinary Least Squares\n", 205 | "\n", 206 | "* __Type__: Supervised learning (regression)\n", 207 | "* __Model family__: Linear models\n", 208 | "* __Objective function__: Mean squared error\n", 209 | "* __Optimizer__: Normal equations\n", 210 | "* __Probabilistic interpretation__: Conditional Gaussian fit using max-likelihood." 211 | ] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "metadata": { 216 | "slideshow": { 217 | "slide_type": "slide" 218 | } 219 | }, 220 | "source": [ 221 | "# Extensions of Gaussian Least Squares\n", 222 | "\n", 223 | "This is an example of how we can interpret a machine learning algorithm in a probabilistic framework.\n", 224 | "\n", 225 | "We will see many algorithms that have these kinds of interpretations. Here are some simple extensions." 226 | ] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "metadata": { 231 | "slideshow": { 232 | "slide_type": "subslide" 233 | } 234 | }, 235 | "source": [ 236 | "We can use a Gaussian model and also parametrize the standard deviation. \n", 237 | "* This is called heteroscedastic regression, and allows us to obtain confidence intevals for our predictions." 238 | ] 239 | }, 240 | { 241 | "cell_type": "markdown", 242 | "metadata": { 243 | "slideshow": { 244 | "slide_type": "subslide" 245 | } 246 | }, 247 | "source": [ 248 | "We can can also parametrize other distributions, not just the Gaussian. \n", 249 | "* Exponential or Gamma distributions for continuous variables \n", 250 | "* Bernoulli distribution for discrete variables\n", 251 | "\n", 252 | "This yields many new machine learning algorithms." 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": { 258 | "slideshow": { 259 | "slide_type": "slide" 260 | } 261 | }, 262 | "source": [ 263 | "\n", 264 | "# Part 2: Bayesian Algorithms\n", 265 | "\n", 266 | "We can also use what we learned about Bayesian ML do interpret several algrothims that we've seen as special cases of the Bayesian framework." 267 | ] 268 | }, 269 | { 270 | "cell_type": "markdown", 271 | "metadata": { 272 | "slideshow": { 273 | "slide_type": "slide" 274 | } 275 | }, 276 | "source": [ 277 | "# Review: The Bayesian Approach\n", 278 | "\n", 279 | "In Bayesian statistics, $\\theta$ is a *random* variable whose value happens to be unknown." 280 | ] 281 | }, 282 | { 283 | "cell_type": "markdown", 284 | "metadata": { 285 | "slideshow": { 286 | "slide_type": "subslide" 287 | } 288 | }, 289 | "source": [ 290 | "We formulate two models:\n", 291 | "* A *likelihood* model $P(x, y | \\theta)$ that defines the probability of $x,y$ for any fixed value of $\\theta$.\n", 292 | "* A *prior* $P(\\theta)$ that specifies us existing belief about the distribution of the random variable $\\theta$." 293 | ] 294 | }, 295 | { 296 | "cell_type": "markdown", 297 | "metadata": { 298 | "slideshow": { 299 | "slide_type": "fragment" 300 | } 301 | }, 302 | "source": [ 303 | "Together, these two models define the *joint* distribution\n", 304 | "$$ P(x, y, \\theta) = P(x, y \\mid \\theta) P(\\theta) $$\n", 305 | "in which both the $x, y$ and the parameters $\\theta$ are random variables." 306 | ] 307 | }, 308 | { 309 | "cell_type": "markdown", 310 | "metadata": { 311 | "slideshow": { 312 | "slide_type": "slide" 313 | } 314 | }, 315 | "source": [ 316 | "# Review: A Posteriori Learning\n", 317 | "\n", 318 | "Recall that in maximum a posteriori (MAP) learning, we optimize the following objective.\n", 319 | "\\begin{align*}\n", 320 | "\\theta_\\text{MAP} = \\arg\\max_\\theta \\left( \\log \\prod_{i=1}^n P(x^{(i)}, y^{(i)} \\mid \\theta) + \\log P(\\theta) \\right),\n", 321 | "\\end{align*}\n", 322 | "\n", 323 | "Note that we used the same formula as we used for maximum likelihood, except that we have added the prior term $\\log P(\\theta)$." 324 | ] 325 | }, 326 | { 327 | "cell_type": "markdown", 328 | "metadata": { 329 | "slideshow": { 330 | "slide_type": "slide" 331 | } 332 | }, 333 | "source": [ 334 | "# Review: Ridge Regression\n", 335 | "\n", 336 | "Recall that the ridge regression algorithm fits a linear model \n", 337 | "$$ f(x) = \\sum_{j=0}^d \\theta_j \\cdot x_j = \\theta^\\top x. $$" 338 | ] 339 | }, 340 | { 341 | "cell_type": "markdown", 342 | "metadata": { 343 | "slideshow": { 344 | "slide_type": "subslide" 345 | } 346 | }, 347 | "source": [ 348 | "We minimize the L2-regualrized mean squared error (MSE)\n", 349 | "$$J(\\theta)= \\frac{1}{2n} \\sum_{i=1}^n(y^{(i)}-\\theta^\\top x^{(i)})^2 + \\frac{\\lambda}{2}\\sum_{j=1}^d \\theta_j^2$$\n", 350 | "on a dataset $\\{(x^{(1)}, y^{(1)}), (x^{(2)}, y^{(2)}), \\ldots, (x^{(n)}, y^{(n)})\\}$.\n", 351 | "The term $\\frac{1}{2}\\sum_{j=1}^d \\theta_j^2 = \\frac{1}{2}||\\theta||_2^2$ is called the regularizer." 352 | ] 353 | }, 354 | { 355 | "cell_type": "markdown", 356 | "metadata": { 357 | "slideshow": { 358 | "slide_type": "slide" 359 | } 360 | }, 361 | "source": [ 362 | "# Probabilistic Ridge Regession\n", 363 | "\n", 364 | "We can interpet ridge regression as maximum apriori (MAP) estimation as follows." 365 | ] 366 | }, 367 | { 368 | "cell_type": "markdown", 369 | "metadata": { 370 | "slideshow": { 371 | "slide_type": "slide" 372 | } 373 | }, 374 | "source": [ 375 | "1. First, we select our model family $\\mathcal{M}$ to be the set of Gaussian distributions of the form (let's assume $x \\in \\mathbb{R}$ for simplicity).\n", 376 | "$$ p(y | x; \\theta) = \\frac{1}{\\sqrt{2\\pi}\\sigma} \\exp\\left( -\\frac{(y - \\theta^\\top x)^2}{2 \\sigma^2} \\right).$$" 377 | ] 378 | }, 379 | { 380 | "cell_type": "markdown", 381 | "metadata": { 382 | "slideshow": { 383 | "slide_type": "fragment" 384 | } 385 | }, 386 | "source": [ 387 | "2. We assume a Gaussian prior with mean zero and variance $\\tau$ on the parameters $\\theta$:\n", 388 | "$$ p(\\theta) = \\prod_{j=1}^d \\frac{1}{\\sqrt{2\\pi}\\tau} \\exp\\left( -\\frac{\\theta_j^2}{2\\tau^2} \\right).$$" 389 | ] 390 | }, 391 | { 392 | "cell_type": "markdown", 393 | "metadata": { 394 | "slideshow": { 395 | "slide_type": "subslide" 396 | } 397 | }, 398 | "source": [ 399 | "3. We optimize the model using the MAP approach. The objective at a point $(x,y)$ equals\n", 400 | "\\begin{align*}\n", 401 | "\\log L(\\theta) & = \\log p(y | x; \\theta) + \\log p(\\theta) \n", 402 | "\\\\ & = \\log \\frac{1}{\\sqrt{2\\pi}\\sigma} \\exp\\left( -\\frac{(y - \\theta^\\top x)^2}{2 \\sigma^2} \\right) \\\\\n", 403 | "& \\;\\;\\;\\;\\;\\;\\;\\;\\;\\;\\;\\;\\;\\;\\;\\;\\; + \\log \\prod_{j=1}^d \\frac{1}{\\sqrt{2\\pi}\\tau} \\exp\\left( -\\frac{\\theta_j^2}{2\\tau^2} \\right) \\\\\n", 404 | "& = -\\frac{(y - \\theta^\\top x)^2}{2 \\sigma^2} - \\frac{1}{2\\tau^2}\\sum_{j=1}^d \\theta_j^2 + \\text{const.}\n", 405 | "\\end{align*}" 406 | ] 407 | }, 408 | { 409 | "cell_type": "markdown", 410 | "metadata": { 411 | "slideshow": { 412 | "slide_type": "fragment" 413 | } 414 | }, 415 | "source": [ 416 | "Thus, we see that ridge regression actually amounts to performing MAP estimation with a Gaussian prior. The strength of the regularizer $\\lambda$ equals $1/\\tau^2$." 417 | ] 418 | }, 419 | { 420 | "cell_type": "markdown", 421 | "metadata": { 422 | "slideshow": { 423 | "slide_type": "slide" 424 | } 425 | }, 426 | "source": [ 427 | "# Algorithm: Probabilistic Ridge Least Squares\n", 428 | "\n", 429 | "* __Type__: Supervised learning (regression)\n", 430 | "* __Model family__: Linear models\n", 431 | "* __Objective function__: L2-regularized mean squared error\n", 432 | "* __Optimizer__: Normal equations\n", 433 | "* __Probabilistic interpretation__: Conditional Gaussian likelihood and Gaussian prior fit using MAP." 434 | ] 435 | }, 436 | { 437 | "cell_type": "markdown", 438 | "metadata": { 439 | "slideshow": { 440 | "slide_type": "slide" 441 | } 442 | }, 443 | "source": [ 444 | "# Bayesian View on ML Algorithms\n", 445 | "\n", 446 | "Very often, we can interpret classical ML algorithms as applications of the probabilistic or Bayesian approaches (although we can derive them in other ways as well!)" 447 | ] 448 | }, 449 | { 450 | "cell_type": "markdown", 451 | "metadata": { 452 | "slideshow": { 453 | "slide_type": "fragment" 454 | } 455 | }, 456 | "source": [ 457 | "* Regularization can often be seen as applying a prior on the weights." 458 | ] 459 | }, 460 | { 461 | "cell_type": "markdown", 462 | "metadata": { 463 | "slideshow": { 464 | "slide_type": "fragment" 465 | } 466 | }, 467 | "source": [ 468 | "* L1 regularization can be seen as applying a *Laplace* prior." 469 | ] 470 | }, 471 | { 472 | "cell_type": "markdown", 473 | "metadata": { 474 | "slideshow": { 475 | "slide_type": "fragment" 476 | } 477 | }, 478 | "source": [ 479 | "* Many other algorithms will have similar interpretations." 480 | ] 481 | }, 482 | { 483 | "cell_type": "markdown", 484 | "metadata": { 485 | "slideshow": { 486 | "slide_type": "skip" 487 | } 488 | }, 489 | "source": [ 490 | "\n", 491 | "# Part 3: Bayesian Ridge Regression\n", 492 | "\n", 493 | "Let's now look at an example of a fully Bayesian machinne learning algorithm.\n", 494 | "\n", 495 | "This section is still under construction and not part of the main lecture." 496 | ] 497 | }, 498 | { 499 | "cell_type": "markdown", 500 | "metadata": { 501 | "slideshow": { 502 | "slide_type": "skip" 503 | } 504 | }, 505 | "source": [ 506 | "# Review: The Bayesian Approach\n", 507 | "\n", 508 | "In Bayesian statistics, $\\theta$ is a *random* variable whose value happens to be unknown." 509 | ] 510 | }, 511 | { 512 | "cell_type": "markdown", 513 | "metadata": { 514 | "slideshow": { 515 | "slide_type": "skip" 516 | } 517 | }, 518 | "source": [ 519 | "We formulate two models:\n", 520 | "* A *likelihood* model $P(x, y | \\theta)$ that defines the probability of $x,y$ for any fixed value of $\\theta$.\n", 521 | "* A *prior* $P(\\theta)$ that specifies us existing belief about the distribution of the random variable $\\theta$." 522 | ] 523 | }, 524 | { 525 | "cell_type": "markdown", 526 | "metadata": { 527 | "slideshow": { 528 | "slide_type": "skip" 529 | } 530 | }, 531 | "source": [ 532 | "Together, these two models define the *joint* distribution\n", 533 | "$$ P(x, y, \\theta) = P(x, y \\mid \\theta) P(\\theta) $$\n", 534 | "in which both the $x, y$ and the parameters $\\theta$ are random variables." 535 | ] 536 | }, 537 | { 538 | "cell_type": "markdown", 539 | "metadata": { 540 | "slideshow": { 541 | "slide_type": "skip" 542 | } 543 | }, 544 | "source": [ 545 | "# Review: Ridge Regression\n", 546 | "\n", 547 | "Recall that the ridge regression algorithm fits a linear model \n", 548 | "$$ f(x) = \\sum_{j=0}^d \\theta_j \\cdot x_j = \\theta^\\top x. $$" 549 | ] 550 | }, 551 | { 552 | "cell_type": "markdown", 553 | "metadata": { 554 | "slideshow": { 555 | "slide_type": "skip" 556 | } 557 | }, 558 | "source": [ 559 | "We minimize the L2-regualrized mean squared error (MSE)\n", 560 | "$$J(\\theta)= \\frac{1}{2n} \\sum_{i=1}^n(y_i-x_i^\\top\\theta)^2 + \\frac{1}{2}\\sum_{j=1}^d \\theta_j^2$$\n", 561 | "on a dataset $\\{(x^{(1)}, y^{(1)}), (x^{(2)}, y^{(2)}), \\ldots, (x^{(n)}, y^{(n)})\\}$.\n", 562 | "The term $\\frac{1}{2}\\sum_{j=1}^d \\theta_j^2 = \\frac{1}{2}||\\theta||_2^2$ is called the regularizer." 563 | ] 564 | }, 565 | { 566 | "cell_type": "markdown", 567 | "metadata": { 568 | "slideshow": { 569 | "slide_type": "skip" 570 | } 571 | }, 572 | "source": [ 573 | "# Probabilistic Ridge Regession\n", 574 | "\n", 575 | "We can interpet ridge regression as maximum apriori (MAP) estimation as follows." 576 | ] 577 | }, 578 | { 579 | "cell_type": "markdown", 580 | "metadata": { 581 | "slideshow": { 582 | "slide_type": "skip" 583 | } 584 | }, 585 | "source": [ 586 | "# Bayesian Predictions\n", 587 | "\n", 588 | "Suppose we now want to predict the value of $y$ from $x$. Unlike in the frequentist setting, we no longer have a single estimate $\\theta$ of the model params, but instead we have a distribution.\n", 589 | "\n", 590 | "The Bayesian approach to predicting $y$ given an input $x$ and a training dataset $\\mathcal{D}$ consists of taking the prediction of all the possible models\n", 591 | "$$ P(y | x, \\mathcal{D}) = \\int_\\theta P(y \\mid x, \\theta) P(\\theta \\mid \\mathcal{D}) d\\theta. $$\n", 592 | "This is called the *posterior predictive* distribution. Note how each $P(y \\mid x, \\theta)$ is weighted by the probability of $\\theta$ given $\\mathcal{D}$." 593 | ] 594 | } 595 | ], 596 | "metadata": { 597 | "accelerator": "GPU", 598 | "celltoolbar": "Slideshow", 599 | "colab": { 600 | "collapsed_sections": [], 601 | "name": "neural-ode.ipynb", 602 | "provenance": [] 603 | }, 604 | "kernelspec": { 605 | "display_name": "aml", 606 | "language": "python", 607 | "name": "aml" 608 | }, 609 | "language_info": { 610 | "codemirror_mode": { 611 | "name": "ipython", 612 | "version": 3 613 | }, 614 | "file_extension": ".py", 615 | "mimetype": "text/x-python", 616 | "name": "python", 617 | "nbconvert_exporter": "python", 618 | "pygments_lexer": "ipython3", 619 | "version": "3.6.7" 620 | }, 621 | "rise": { 622 | "controlsTutorial": false, 623 | "height": 900, 624 | "help": false, 625 | "margin": 0, 626 | "maxScale": 2, 627 | "minScale": 0.2, 628 | "progress": true, 629 | "scroll": true, 630 | "theme": "simple", 631 | "width": 1200 632 | } 633 | }, 634 | "nbformat": 4, 635 | "nbformat_minor": 1 636 | } 637 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | appnope==0.1.0 2 | astroid==2.4.1 3 | attrs==19.3.0 4 | backcall==0.2.0 5 | bleach==3.1.5 6 | cycler==0.10.0 7 | decorator==4.4.2 8 | defusedxml==0.6.0 9 | entrypoints==0.3 10 | importlib-metadata==1.7.0 11 | ipykernel==5.3.4 12 | ipython==7.16.1 13 | ipython-genutils==0.2.0 14 | isort==4.3.21 15 | jedi==0.17.2 16 | Jinja2==2.11.2 17 | jsonschema==3.2.0 18 | jupyter-client==6.1.6 19 | jupyter-contrib-core==0.3.3 20 | jupyter-contrib-nbextensions==0.5.1 21 | jupyter-core==4.6.3 22 | jupyter-highlight-selected-word==0.2.0 23 | jupyter-latex-envs==1.4.6 24 | jupyter-nbextensions-configurator==0.4.1 25 | kiwisolver==1.2.0 26 | latexcodec==2.0.1 27 | lazy-object-proxy==1.4.3 28 | lxml==4.6.3 29 | MarkupSafe==1.1.1 30 | matplotlib==3.2.1 31 | mccabe==0.6.1 32 | mistune==0.8.4 33 | nbconvert==5.6.1 34 | nbformat==5.0.7 35 | notebook==6.0.3 36 | numpy==1.18.1 37 | packaging==20.4 38 | pandocfilters==1.4.2 39 | parso==0.7.1 40 | pexpect==4.8.0 41 | pickleshare==0.7.5 42 | Pillow==7.0.0 43 | prometheus-client==0.8.0 44 | prompt-toolkit==3.0.5 45 | ptyprocess==0.6.0 46 | pybtex==0.23.0 47 | Pygments==2.6.1 48 | pylint==2.5.2 49 | pyparsing==2.4.7 50 | pyrsistent==0.16.0 51 | python-dateutil==2.8.1 52 | PyYAML==5.3 53 | pyzmq==19.0.1 54 | rise==5.6.1 55 | scipy==1.4.1 56 | Send2Trash==1.5.0 57 | six==1.14.0 58 | terminado==0.8.3 59 | testpath==0.4.4 60 | toml==0.10.1 61 | torch==1.4.0 62 | torchdiffeq==0.0.1 63 | torchvision==0.5.0 64 | tornado==6.1.0 65 | tqdm==4.42.1 66 | traitlets==4.3.3 67 | typed-ast==1.4.1 68 | virtualenv-clone==0.5.4 69 | wcwidth==0.2.5 70 | webencodings==0.5.1 71 | wrapt==1.12.1 72 | zipp==3.1.0 73 | --------------------------------------------------------------------------------