├── content
├── chapters
│ ├── _index.md
│ ├── 22_online_learning
│ │ ├── _index.md
│ │ ├── 22-04-ftrl.md
│ │ ├── 22-05-ftl-oqo.md
│ │ ├── 22-02-simple.md
│ │ ├── 22-03-ftl.md
│ │ ├── 22-07-oco-2.md
│ │ ├── 22-06-oco-1.md
│ │ └── 22-01-intro.md
│ ├── 21_multitarget_learning
│ │ ├── _index.md
│ │ ├── 21-03-methods-1.md
│ │ ├── 21-04-methods-2.md
│ │ ├── 21-02-losses.md
│ │ └── 21-01-intro.md
│ ├── 20_imbalanced_learning
│ │ ├── _index.md
│ │ ├── 20-09-smpl-2.md
│ │ ├── 20-05-cs-3.md
│ │ ├── 20-06-cc-1.md
│ │ ├── 20-04-cs-2.md
│ │ ├── 20-07-cc-2.md
│ │ ├── 20-03-cs-1.md
│ │ ├── 20-08-smpl-1.md
│ │ ├── 20-01-intro.md
│ │ └── 20-02-perf-msr.md
│ ├── 13_information_theory
│ │ ├── _index.md
│ │ ├── 13-12-literature.md
│ │ ├── 13-10-sourcecoding2.md
│ │ ├── 13-11-mi-deepdive.md
│ │ ├── 13-09-sourcecoding.md
│ │ ├── 13-03-diffent.md
│ │ ├── 13-02-entropy2.md
│ │ ├── 13-01-entropy.md
│ │ ├── 13-05-cross-entropy-kld.md
│ │ ├── 13-04-kl.md
│ │ ├── 13-06-ml.md
│ │ ├── 13-07-mutual-info.md
│ │ └── 13-08-mutual-info2.md
│ ├── 18_boosting
│ │ ├── _index.md
│ │ ├── 18-13-literature.md
│ │ ├── 18-11-CWB-GLM.md
│ │ ├── 18-01-intro-adaboost.md
│ │ ├── 18-09-CWB-1.md
│ │ ├── 18-08-gradient-boosting-xgboost.md
│ │ ├── 18-10-CWB-2.md
│ │ ├── 18-03-gradient-boosting-illustration.md
│ │ ├── 18-12-adv-CWB.md
│ │ ├── 18-05-gradient-boosting-classification.md
│ │ ├── 18-06-gradient-boosting-trees-1.md
│ │ ├── 18-07-gradient-boosting-trees-2.md
│ │ ├── 18-04-gradient-boosting-regularization.md
│ │ └── 18-02-gradient-boosting-concept.md
│ ├── 06_cart
│ │ ├── 06-08-literature.md
│ │ ├── _index.md
│ │ ├── 06-00-nutshell-cart.md
│ │ ├── 06-02-treegrowing.md
│ │ ├── 06-03-splitcriteria-regression.md
│ │ ├── 06-04-splitcriteria-classification.md
│ │ ├── 06-06-stoppingpruning.md
│ │ ├── 06-01-predictions.md
│ │ ├── 06-05-computationalaspects.md
│ │ └── 06-07-discussion.md
│ ├── 30_feature_selection
│ │ ├── _index.md
│ │ ├── 30-06-literature.md
│ │ ├── 30-01-introduction.md
│ │ ├── 30-03-filters1.md
│ │ ├── 30-05-wrapper.md
│ │ ├── 30-04-filters2.md
│ │ └── 30-02-motivating-examples.md
│ ├── 07_forests
│ │ ├── 07-06-literature.md
│ │ ├── 07-02-basics.md
│ │ ├── _index.md
│ │ ├── 07-00-nutshell-random-forest.md
│ │ ├── 07-04-featureimportance.md
│ │ ├── 07-03-oob-error.md
│ │ ├── 07-01-bagging.md
│ │ └── 07-05-proximities.md
│ ├── 11_advriskmin
│ │ ├── 11-19-literature.md
│ │ ├── _index.md
│ │ ├── 11-07-classification-01.md
│ │ ├── 11-16-bias-variance-decomposition.md
│ │ ├── 11-03-pseudo-residuals.md
│ │ ├── 11-11-classification-brier-l1.md
│ │ ├── 11-17-bias-variance-decomposition2.md
│ │ ├── 11-02-loss-properties.md
│ │ ├── 11-01-risk-minimizer.md
│ │ ├── 11-06-regression-further-losses.md
│ │ ├── 11-05-l1-deep-dive.md
│ │ ├── 11-18-bias-variance-deep-dive.md
│ │ ├── 11-10-classification-logreg-deep-dive.md
│ │ ├── 11-12-classification-further-losses.md
│ │ ├── 11-13-proper-scoring-rules.md
│ │ ├── 11-09-bernoulli-deep-dive.md
│ │ ├── 11-08-classification-bernoulli.md
│ │ ├── 11-04-regression-l2-l1.md
│ │ ├── 11-14-tree-splitting-deep-dive.md
│ │ └── 11-15-max-likelihood.md
│ ├── 15_regularization
│ │ ├── 15-15-literature.md
│ │ ├── _index.md
│ │ ├── 15-02-l2.md
│ │ ├── 15-03-l1.md
│ │ ├── 15-14-lasso-deep.md
│ │ ├── 15-04-l1vsl12.md
│ │ ├── 15-01-regu-intro.md
│ │ ├── 15-12-early-stopping.md
│ │ ├── 15-13-ridge-deep.md
│ │ ├── 15-07-nonlin.md
│ │ ├── 15-06-other.md
│ │ ├── 15-08-bayes.md
│ │ ├── 15-11-geom-l1.md
│ │ ├── 15-09-wd.md
│ │ ├── 15-05-enetlogreg.md
│ │ └── 15-10-geom-l2.md
│ ├── 19_gaussian_processes
│ │ ├── 19-06-literature.md
│ │ ├── _index.md
│ │ ├── 19-03-covariance.md
│ │ ├── 19-04-prediction.md
│ │ ├── 19-05-training.md
│ │ ├── 19-01-bayes-lm.md
│ │ └── 19-02-basic.md
│ ├── 10_nested_resampling
│ │ ├── _index.md
│ │ ├── 10-02-trainvalidtest.md
│ │ ├── 10-03-nestedresampling.md
│ │ └── 10-01-nestedintro.md
│ ├── 17_nonlinear_svm
│ │ ├── _index.md
│ │ ├── 17-06-model-sel.md
│ │ ├── 17-05-kernel-rbf.md
│ │ ├── 17-02-kernel-trick.md
│ │ ├── 17-01-featuregen.md
│ │ ├── 17-03-kernel-poly.md
│ │ └── 17-04-rkhs-repr.md
│ ├── 12_multiclass
│ │ ├── _index.md
│ │ ├── 12-04-codebooks.md
│ │ ├── 12-02-softmax-regression.md
│ │ ├── 12-01-losses.md
│ │ └── 12-03-binary-reduction.md
│ ├── 16_linear_svm
│ │ ├── _index.md
│ │ ├── 16-04-erm.md
│ │ ├── 16-05-optimization.md
│ │ ├── 16-01-hard-margin.md
│ │ ├── 16-02-hard-margin-dual.md
│ │ └── 16-03-soft-margin.md
│ ├── 01_ml_basics
│ │ ├── _index.md
│ │ ├── 01-00-nutshell-basics.md
│ │ ├── 01-05-learner.md
│ │ ├── 01-03-tasks.md
│ │ ├── 01-02-data.md
│ │ ├── 01-06-riskminimization.md
│ │ ├── 01-07-optimization.md
│ │ ├── 01-04-models-parameters.md
│ │ ├── 01-01-what_is_ml.md
│ │ └── 01-08-learnercomponents-hro.md
│ ├── 05_knn
│ │ ├── _index.md
│ │ └── 05-01-knn.md
│ ├── 14_cod
│ │ ├── _index.md
│ │ ├── 14-01-cod.md
│ │ └── 14-02-cod-examples.md
│ ├── 08_neural_networks
│ │ ├── _index.md
│ │ ├── 08-06-history.md
│ │ ├── 08-00-nutshell-nn.md
│ │ ├── 08-07-backprob1.md
│ │ ├── 08-05-mulitlayerNN.md
│ │ ├── 08-01-intro.md
│ │ ├── 08-04-NN4multiclass.md
│ │ ├── 08-03-single-hidden-layer.md
│ │ └── 08-02-single-neuron.md
│ ├── 09_tuning
│ │ ├── _index.md
│ │ ├── 09-00-nutshell-tuning.md
│ │ ├── 09-01-intro.md
│ │ ├── 09-05-tuning-pipelines.md
│ │ ├── 09-04-tuning-advanced.md
│ │ ├── 09-02-tuning-tuningproblem.md
│ │ ├── 09-03-basicalgos.md
│ │ └── further-material.md
│ ├── 02_supervised_regression
│ │ ├── _index.md
│ │ ├── 02-02-ols.md
│ │ ├── 02-00-nutshell-regression.md
│ │ ├── 02-04-polynomials.md
│ │ ├── 02-01-l2-loss.md
│ │ ├── 02-03-l1-loss.md
│ │ └── Bananas.svg
│ ├── coding_ml_r
│ │ └── _index.md
│ ├── 04_evaluation
│ │ ├── _index.md
│ │ ├── 04-13-auc-mwu.md
│ │ ├── 04-03-train.md
│ │ ├── 04-00-nutshell-evaluation.md
│ │ ├── 04-12-prcurves.md
│ │ ├── 04-04-test.md
│ │ ├── 04-11-partialauc-mcauc.md
│ │ ├── 04-07-resampling-2.md
│ │ ├── 04-08-measures-classification.md
│ │ ├── 04-01-generalization-error.md
│ │ ├── 04-02-measures-regression.md
│ │ ├── 04-09-rocbasics.md
│ │ ├── 04-06-resampling-1.md
│ │ ├── 04-10-roccurves.md
│ │ └── 04-05-overfitting-underfitting.md
│ ├── 03_supervised_classification
│ │ ├── _index.md
│ │ ├── 03-01-tasks.md
│ │ ├── 03-00-nutshell-classification.md
│ │ ├── 03-02-classification-basicdefs.md
│ │ ├── 03-05-classification-discranalysis.md
│ │ ├── 03-03-classification-linear.md
│ │ ├── 03-04-classification-logistic.md
│ │ └── 03-06-classification-naivebayes.md
│ ├── 00_all
│ │ └── _index.md
│ └── coding_ml_python
│ │ └── _index.md
├── appendix
│ ├── _index.md
│ ├── cheatsheet_notation.pdf
│ ├── 00_learner_slides.md
│ ├── 03_related.md
│ ├── 01_cheat_sheets.md
│ ├── 04_data.md
│ └── 02_errata.md
├── contributing
│ └── _index.md
├── prerequisites
│ └── _index.md
├── _index.md
├── team
│ └── _index.md
├── literature
│ └── _index.md
└── exercises
│ └── _index.md
├── static
├── favicon.ico
├── favicon-16x16.png
├── favicon-32x32.png
├── mstile-70x70.png
├── apple-touch-icon.png
├── mstile-144x144.png
├── mstile-150x150.png
├── mstile-310x310.png
├── android-chrome-192x192.png
├── android-chrome-512x512.png
├── browserconfig.xml
├── site.webmanifest
├── i2ml_old.svg
└── safari-pinned-tab.svg
├── archetypes
└── default.md
├── .gitmodules
├── .gitignore
├── netlify.toml
├── Makefile
├── .github
└── workflows
│ └── gh-pages.yml
├── LICENSE
├── config.toml
└── README.md
/content/chapters/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Chapters
3 | ---
--------------------------------------------------------------------------------
/content/appendix/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Appendix
3 | show_in_index: content
4 | ---
--------------------------------------------------------------------------------
/static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/slds-lmu/i2ml/HEAD/static/favicon.ico
--------------------------------------------------------------------------------
/static/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/slds-lmu/i2ml/HEAD/static/favicon-16x16.png
--------------------------------------------------------------------------------
/static/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/slds-lmu/i2ml/HEAD/static/favicon-32x32.png
--------------------------------------------------------------------------------
/static/mstile-70x70.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/slds-lmu/i2ml/HEAD/static/mstile-70x70.png
--------------------------------------------------------------------------------
/static/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/slds-lmu/i2ml/HEAD/static/apple-touch-icon.png
--------------------------------------------------------------------------------
/static/mstile-144x144.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/slds-lmu/i2ml/HEAD/static/mstile-144x144.png
--------------------------------------------------------------------------------
/static/mstile-150x150.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/slds-lmu/i2ml/HEAD/static/mstile-150x150.png
--------------------------------------------------------------------------------
/static/mstile-310x310.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/slds-lmu/i2ml/HEAD/static/mstile-310x310.png
--------------------------------------------------------------------------------
/static/android-chrome-192x192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/slds-lmu/i2ml/HEAD/static/android-chrome-192x192.png
--------------------------------------------------------------------------------
/static/android-chrome-512x512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/slds-lmu/i2ml/HEAD/static/android-chrome-512x512.png
--------------------------------------------------------------------------------
/archetypes/default.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "{{ replace .Name "-" " " | title }}"
3 | date: {{ .Date }}
4 | draft: true
5 | ---
6 |
7 |
--------------------------------------------------------------------------------
/content/appendix/cheatsheet_notation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/slds-lmu/i2ml/HEAD/content/appendix/cheatsheet_notation.pdf
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "themes/website_theme"]
2 | path = themes/website_theme
3 | url = git@github.com:slds-lmu/website_theme.git
4 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .hugo_build.lock
2 | .Rproj.user
3 | *.Rproj
4 | *.Rhistory
5 |
6 | .DS_Store
7 |
8 | # rendered content
9 | public
10 |
--------------------------------------------------------------------------------
/content/chapters/22_online_learning/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 22: Online Learning"
3 | ---
4 | This chapter introduces online learning.
--------------------------------------------------------------------------------
/content/chapters/21_multitarget_learning/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 21: Multitarget Learning"
3 | ---
4 | This chapter introduces multitarget learning techniques.
--------------------------------------------------------------------------------
/content/chapters/20_imbalanced_learning/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 20: Imbalanced Learning"
3 | ---
4 | This chapter introduces techniques for learning on imbalanced datasets.
5 |
--------------------------------------------------------------------------------
/content/chapters/13_information_theory/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 13: Information Theory"
3 | ---
4 | This chapter covers basic information-theoretic concepts and discusses their relation to machine learning.
--------------------------------------------------------------------------------
/content/chapters/18_boosting/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 18: Boosting"
3 | ---
4 | This chapter introduces boosting as a sequential ensemble method that creates powerful committees from different kinds of base learners.
--------------------------------------------------------------------------------
/content/chapters/06_cart/06-08-literature.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter Literature"
3 | weight: 6008
4 | ---
5 |
6 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/slides-pdf/chapter-literature-cart.pdf" >}}
7 |
--------------------------------------------------------------------------------
/content/chapters/30_feature_selection/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Extra Chapter: Feature Selection"
3 | ---
4 | This chapter introduces feature selection, i.e., dinding a well-performing, hopefully small set of
5 | features for a task.
--------------------------------------------------------------------------------
/content/chapters/07_forests/07-06-literature.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter Literature"
3 | weight: 7006
4 | ---
5 |
6 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/slides-pdf/chapter-literature-forests.pdf" >}}
7 |
--------------------------------------------------------------------------------
/content/chapters/18_boosting/18-13-literature.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter Literature"
3 | weight: 180013
4 | ---
5 |
6 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/chapter-literature-boosting.pdf" >}}
7 |
--------------------------------------------------------------------------------
/content/chapters/11_advriskmin/11-19-literature.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter Literature"
3 | weight: 11019
4 | ---
5 |
6 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/chapter-literature-advriskmin.pdf" >}}
7 |
--------------------------------------------------------------------------------
/content/chapters/15_regularization/15-15-literature.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter Literature"
3 | weight: 15015
4 | ---
5 |
6 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/chapter-literature-regularization.pdf" >}}
7 |
--------------------------------------------------------------------------------
/content/chapters/30_feature_selection/30-06-literature.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter Literature"
3 | weight: 30006
4 | ---
5 |
6 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/chapter-literature-feature-selection.pdf" >}}
7 |
--------------------------------------------------------------------------------
/content/chapters/13_information_theory/13-12-literature.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter Literature"
3 | weight: 13012
4 | ---
5 |
6 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/chapter-literature-information-theory.pdf" >}}
7 |
--------------------------------------------------------------------------------
/content/chapters/19_gaussian_processes/19-06-literature.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter Literature"
3 | weight: 19006
4 | ---
5 |
6 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/chapter-literature-gaussian-processes.pdf" >}}
7 |
--------------------------------------------------------------------------------
/netlify.toml:
--------------------------------------------------------------------------------
1 | [build.environment]
2 | HUGO_VERSION = "0.145.0"
3 |
4 | [context.deploy-preview]
5 | command = "hugo --gc --minify --buildFuture -b $DEPLOY_PRIME_URL"
6 |
7 | [context.branch-deploy]
8 | command = "hugo --gc --minify -b $DEPLOY_PRIME_URL"
9 |
--------------------------------------------------------------------------------
/content/appendix/00_learner_slides.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Important Learners in ML
3 | ---
4 |
5 | ## Look-up slides for important ML learners
6 |
7 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/learners-overview/slides-learners.pdf" >}}
--------------------------------------------------------------------------------
/content/appendix/03_related.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Related Courses
3 | ---
4 |
5 | ## Other ML courses
6 |
7 | - [Deep Learning for NLP (DL4NLP)](https://slds-lmu.github.io/dl4nlp/)
8 | - [Introduction to Deep Learning (I2DL)](https://slds-lmu.github.io/i2dl/)
9 |
--------------------------------------------------------------------------------
/content/chapters/10_nested_resampling/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 10: Nested Resampling"
3 | ---
4 | This chapter first defines the untouched-test-set principle and proceeds to explain the concepts of train-validation-test split and nested resampling.
5 |
--------------------------------------------------------------------------------
/content/chapters/17_nonlinear_svm/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 17: Nonlinear Support Vector Machines"
3 | ---
4 | Many classification problems warrant nonlinear decision boundaries. This chapter introduces nonlinear support vector machines as a crucial extension to the linear variant.
--------------------------------------------------------------------------------
/content/chapters/19_gaussian_processes/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 19: Gaussian Processes"
3 | ---
4 | This chapter introduces Gaussian processes as a model class. Gaussian processes are non-parametric approaches with ubiquitous application that model entire distributions in function space.
--------------------------------------------------------------------------------
/static/browserconfig.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | #da532c
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/content/chapters/12_multiclass/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 12: Multiclass Classification"
3 | ---
4 | This chapter treats the multiclass case of classification. Tasks with more than two classes preclude the application of some techniques studied in the binary scenario and require an adaptation of loss functions.
--------------------------------------------------------------------------------
/content/chapters/15_regularization/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 15: Regularization"
3 | ---
4 | Regularization is a vital tool in machine learning to prevent overfitting and foster generalization ability. This chapter introduces the concept of regularization and discusses common regularization techniques in more depth.
--------------------------------------------------------------------------------
/content/chapters/16_linear_svm/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 16: Linear Support Vector Machines"
3 | ---
4 | This chapter introduces the linear support vector machine (SVM), a linear classifier that finds decision boundaries by maximizing margins to the closest data points, possibly allowing for violations to a certain extent.
--------------------------------------------------------------------------------
/content/chapters/01_ml_basics/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 1: ML Basics"
3 | ---
4 | This chapter introduces the basic concepts of Machine Learning. We focus on supervised learning, explain the difference between regression and classification, show how to evaluate and compare Machine Learning models and formalize the concept of learning.
--------------------------------------------------------------------------------
/content/chapters/05_knn/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 05: k-Nearest Neighbors (k-NN)"
3 | ---
4 | This chapter addresses \\(k\\)-nearest neighbors, a distance-based algorithm suited to both regression and classification. Predictions are made based upon neighboring observations, assuming feature similarity translates to target similarity.
--------------------------------------------------------------------------------
/content/chapters/14_cod/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 14: Curse of Dimensionality"
3 | ---
4 | Frequently, our intuition developed in low-dimensional spaces does not generalize to higher dimensions. This chapter introduces the phenomenon of the curse of dimensionality and discusses its effects on the behavior of machine learning models.
--------------------------------------------------------------------------------
/content/chapters/08_neural_networks/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 08: Neural Networks"
3 | ---
4 | This chapter introduces the basic concepts of neural networks. We integrated chapters from our [course on Deep Learning](https://slds-lmu.github.io/i2dl/) in order to be able to use (simple) neural networks for supervised ML on tabular data.
5 |
--------------------------------------------------------------------------------
/content/chapters/09_tuning/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 09: Tuning"
3 | ---
4 | This chapter introduces and formalizes the problem of hyperparameter tuning. We cover basic techniques such as grid search and random search as well as more advanced techniques like evolutionary algorithms, model-based optimization and multi-fidelity optimization.
5 |
--------------------------------------------------------------------------------
/content/chapters/08_neural_networks/08-06-history.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Extra: Brief History"
3 | weight: 8006
4 | ---
5 | We overview history of DL development.
6 |
7 |
8 |
9 |
10 | ### Lecture slides
11 |
12 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/slides-pdf/slides-nn-intro-brief-history.pdf" >}}
13 |
14 |
--------------------------------------------------------------------------------
/content/chapters/02_supervised_regression/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 2: Supervised Regression"
3 | ---
4 | This chapter treats the supervised regression task in more detail. We will see different loss functions for regression, how a linear regression model can be used from a Machine Learning perspective, and how to extend it with polynomials for greater flexibility.
5 |
--------------------------------------------------------------------------------
/content/appendix/01_cheat_sheets.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Cheat Sheets
3 | ---
4 |
5 | - I2ML :: BASICS
6 |
7 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/raw/master/cheatsheets/cheatsheet_notation.pdf" >}}
8 |
9 | - I2ML :: EVALUATION & TUNING
10 |
11 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/raw/master/cheatsheets/cheatsheet_eval_tuning.pdf" >}}
12 |
--------------------------------------------------------------------------------
/content/chapters/11_advriskmin/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 11: Advanced Risk Minimization"
3 | ---
4 | This chapter revisits the theory of risk minimization, providing more in-depth analysis on established losses and the connection between empirical risk minimization and maximum likelihood estimation. We also introduce some more advanced loss functions for regression and classification.
--------------------------------------------------------------------------------
/content/chapters/06_cart/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 06: Classification and Regression Trees (CART)"
3 | ---
4 | This chapter introduces Classification and Regression Trees (CART), a well-established machine learning procedure. We explain the main idea and give details on splitting criteria, discuss computational aspects of growing a tree, and illustrate the idea of stopping criteria and pruning.
--------------------------------------------------------------------------------
/content/chapters/coding_ml_r/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Coding ML [R and mlr3]"
3 | ---
4 | For an introduction to the R package mlr3 we recommend walking through some chapters of the mlr3 book as summarized in this [document](https://docs.google.com/document/d/1ZYCwqzra6kKMWVyDaNsZlG9dAqk_i-GVxoXek5ND3IY/edit?usp=sharing). After some basic concepts, this focuses on resampling, tuning and pipelines.
5 |
--------------------------------------------------------------------------------
/content/chapters/04_evaluation/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 04: Performance Evaluation"
3 | ---
4 | This chapter treats the challenge of evaluating the performance of a model. We will introduce different performance measures for regression and classification tasks, explain the problem of overfitting as well as the difference between training and test error, and, lastly, present a variety of resampling techniques.
--------------------------------------------------------------------------------
/content/chapters/03_supervised_classification/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 03: Supervised Classification"
3 | ---
4 | This chapter treats the supervised classification task in more detail. We will see examples of binary and multiclass classification and the differences between discriminative and generative approaches. In particular, we will address logistic regression, discriminant analysis and naive Bayes classifiers.
--------------------------------------------------------------------------------
/content/chapters/18_boosting/18-11-CWB-GLM.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 18.11: CWB and GLMs"
3 | weight: 180011
4 | ---
5 | We explain the relationship between CWB and GLMs.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="6jKXf8FRY1E" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/blob/main/slides-pdf/slides-boosting-cwb-glm.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/13_information_theory/13-10-sourcecoding2.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 13.10: Entropy and Optimal Code Length II"
3 | weight: 13010
4 | ---
5 | In this section, we continue our discussion on source coding and its relation to entropy.
6 |
7 |
8 |
9 | ### Lecture slides
10 |
11 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-info-sourcecoding2.pdf" >}}
12 |
--------------------------------------------------------------------------------
/content/chapters/13_information_theory/13-11-mi-deepdive.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 13.11: MI under Reparametrization: Deep Dive"
3 | weight: 13011
4 | ---
5 | In this deep dive, we discuss the invariance of MI under certain reparametrizations.
6 |
7 |
8 |
9 |
10 | ### Lecture slides
11 |
12 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-info-mi-deepdive.pdf" >}}
13 |
--------------------------------------------------------------------------------
/content/chapters/15_regularization/15-02-l2.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 15.02: Ridge Regression"
3 | weight: 15002
4 | ---
5 | We introduce Ridge regression as a key approach to regularizing linear models.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="OzKjWchY-AU" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-regu-l2.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/15_regularization/15-03-l1.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 15.03: Lasso Regression"
3 | weight: 15003
4 | ---
5 | We introduce Lasso regression as a key approach to regularizing linear models.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="iqlQLg5u9Nc" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-regu-l1.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/13_information_theory/13-09-sourcecoding.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 13.09: Entropy and Optimal Code Length I"
3 | weight: 13009
4 | ---
5 | In this section, we introduce source coding and discuss how entropy can be understood as optimal code length.
6 |
7 |
8 |
9 | ### Lecture slides
10 |
11 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-info-sourcecoding.pdf" >}}
12 |
--------------------------------------------------------------------------------
/content/chapters/15_regularization/15-14-lasso-deep.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 15.14: Soft-thresholding and L1 regularization: Deep Dive"
3 | weight: 15014
4 | ---
5 | In this section, we prove the previously stated proposition regarding soft-thresholding and L1 regularization.
6 |
7 |
8 |
9 | ### Lecture slides
10 |
11 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-regu-lasso-deepdive.pdf" >}}
12 |
--------------------------------------------------------------------------------
/content/chapters/30_feature_selection/30-01-introduction.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 30.01: Introduction"
3 | weight: 30001
4 | ---
5 | We motivate feature selection and discuss the difference to feature extraction.
6 |
7 |
8 | ### Lecture video
9 |
10 | {{< video id="xiVB1EmlU9A" >}}
11 |
12 | ### Lecture slides
13 |
14 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-fs-introduction.pdf" >}}
15 |
--------------------------------------------------------------------------------
/content/chapters/13_information_theory/13-03-diffent.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 13.03: Differential Entropy"
3 | weight: 13003
4 | ---
5 | In this section, we extend the definition of entropy to the continuous case.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="aeJzIzKNLWI" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-info-diffent.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/15_regularization/15-04-l1vsl12.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 15.04: Lasso vs Ridge Regression"
3 | weight: 15004
4 | ---
5 | This section provides a detailed comparison between Lasso and Ridge regression.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="XHZT-ZtOVf0" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-regu-l1vsl2.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/30_feature_selection/30-03-filters1.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 30.03: Filter Methods I"
3 | weight: 30003
4 | ---
5 | We introduce how filter methods work and how they can be used for feature selection.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="RcDyvExpCSg" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-fs-filters1.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/30_feature_selection/30-05-wrapper.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 30.05: Wrapper Methods"
3 | weight: 30005
4 | ---
5 | This section explains wrapper methods and explains how they can aid feature selection.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="XmvlHUCGNbc" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-fs-wrapper.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/07_forests/07-02-basics.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 07.02: Basics"
3 | weight: 7002
4 | quizdown: true
5 | ---
6 | In this section we investigate random forests, a modification of bagging for trees.
7 |
8 |
9 |
10 | ### Lecture video
11 |
12 | {{< video id="NY3Tux1Zt4g" >}}
13 |
14 | ### Lecture slides
15 |
16 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/slides-pdf/slides-forests-basics.pdf" >}}
17 |
--------------------------------------------------------------------------------
/content/chapters/20_imbalanced_learning/20-09-smpl-2.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 20.09: Sampling Methods 2"
3 | weight: 20009
4 | ---
5 | We introduce the state-of-art oversampling technique SMOTE.
6 |
7 |
8 | ### Lecture video
9 |
10 | {{< video id="6T5_SJmuiR0" >}}
11 |
12 | ### Lecture slides
13 |
14 | {{< pdfjs file="https://github.com/slds-lmu/lecture_advml/blob/main/slides-pdf/slides-imbalanced-learning-sampling-methods-2.pdf" >}}
15 |
--------------------------------------------------------------------------------
/content/chapters/15_regularization/15-01-regu-intro.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 15.01: Introduction to Regularization"
3 | weight: 15001
4 | ---
5 | In this section, we revisit overfitting and introduce regularization as a remedy.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="EjmPEAD9twg" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-regu-intro.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/18_boosting/18-01-intro-adaboost.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 18.01: Introduction to Boosting / AdaBoost"
3 | weight: 18001
4 | ---
5 | In this section, we introduce the pioneering AdaBoost algorithm.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="EKatfc6W1DU" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-boosting-intro-adaboost.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/01_ml_basics/01-00-nutshell-basics.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 01.00: ML Basics: In a Nutshell"
3 | weight: 1000
4 | ---
5 | In this nutshell chunk, we dive into the foundational principles of Machine Learning.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="1f0gilKVx2I" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-basics-nutshell.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/04_evaluation/04-13-auc-mwu.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 04.13: AUC & Mann-Whitney-U Test"
3 | weight: 4013
4 | ---
5 |
6 | We demonstrate that the AUC is equivalent to the normalized test statistic in
7 | the Mann-Whitney-U test, both of which are effectively rank-based metrics.
8 |
9 |
10 |
11 | ### Lecture slides
12 |
13 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-evaluation-auc-mwu.pdf" >}}
--------------------------------------------------------------------------------
/content/chapters/06_cart/06-00-nutshell-cart.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 06.00: CART: In a Nutshell"
3 | weight: 6000
4 | ---
5 | In this nutshell chunk, we unravel the workings of CARTs (Classification and Regression Trees).
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="t2PB42edS7Q" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-cart-nutshell.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/15_regularization/15-12-early-stopping.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 15.12: Early Stopping"
3 | weight: 15012
4 | ---
5 | In this section, we introduce early stopping and show how it can act as a regularizer.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="5e2fgW4Ok3s" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-regu-early-stopping.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/17_nonlinear_svm/17-06-model-sel.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 17.06: SVM Model Selection"
3 | weight: 17006
4 | ---
5 | In this section, we discuss the importance of SVM hyperparameters for adequate solutions.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="drcLf-c9Tv8" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-nonlinsvm-modelsel.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/18_boosting/18-09-CWB-1.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 18.09: Component Wise Boosting Basics 1"
3 | weight: 18009
4 | ---
5 | We introduce the concept of CWB, common base learners and built-in feature selection.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="9l2mUSOkWRc" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/blob/main/slides-pdf/slides-boosting-cwb-basics.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/22_online_learning/22-04-ftrl.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 22.04: Follow the regularized Leader"
3 | weight: 22004
4 | ---
5 | In this chapter we introduce FTLR as a stable alternative to FTL.
6 |
7 |
8 | ### Lecture video
9 |
10 | {{< video id="9qyHz1kXx3s" >}}
11 |
12 | ### Lecture slides
13 |
14 | {{< pdfjs file="https://github.com/slds-lmu/lecture_advml/blob/main/slides-pdf/slides-online-learning-simple-learners-ftrl.pdf" >}}
15 |
--------------------------------------------------------------------------------
/content/chapters/01_ml_basics/01-05-learner.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 01.05: Learner"
3 | weight: 1005
4 | ---
5 | Roughly speaking, learners (endowed with a specific hyperparameter configuration) take training data and return a model.
6 |
7 |
8 | ### Lecture video
9 |
10 | {{< video id="we4Y8z9naZk">}}
11 |
12 | ### Lecture slides
13 |
14 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-basics-learner.pdf" >}}
15 |
--------------------------------------------------------------------------------
/content/chapters/11_advriskmin/11-07-classification-01.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 11.07: Classification and 0-1-Loss"
3 | weight: 11007
4 | ---
5 | In this section, we revisit the 0-1-loss and derive its risk minimizer.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="MKRn03fZS7A" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-advriskmin-classification-01.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/12_multiclass/12-04-codebooks.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 12.04: Designing Codebooks and ECOC"
3 | weight: 12004
4 | ---
5 | In this section, we introduce codebooks as a general concept for multiclass-to- binary reduction.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="sbT-1XUN3iM" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-mc-codebooks.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/17_nonlinear_svm/17-05-kernel-rbf.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 17.05: The Gaussian RBF Kernel"
3 | weight: 17005
4 | ---
5 | In this section, we introduce the popular Gaussian RBF kernel and discuss its properties.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="hGqWSP8PzHg" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-nonlinsvm-kernel-rbf.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/18_boosting/18-08-gradient-boosting-xgboost.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 18.08: XGBoost"
3 | weight: 18008
4 | ---
5 | We introduce XGBoost, a highly efficient, tree-based boosting system with additional regularizers.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="HnSPxlA7j6A" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-boosting-xgboost.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/12_multiclass/12-02-softmax-regression.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 12.02: Softmax Regression"
3 | weight: 12002
4 | ---
5 | In this section, we introduce softmax regression as a generalization of logistic regression.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="cgQUG-ZWPL8" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-mc-softmax-regression.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/02_supervised_regression/02-02-ols.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 02.02: Proof OLS Regression: Deep Dive"
3 | weight: 2002
4 | ---
5 | In this section, we provide you with a proof for the ordinary least squares (OLS) method.
6 |
7 |
8 | ### Lecture video
9 |
10 | {{< video id="JyIdkeBaqD8" >}}
11 |
12 | ### Lecture slides
13 |
14 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-regression-deepdive-ols.pdf" >}}
15 |
--------------------------------------------------------------------------------
/content/chapters/15_regularization/15-13-ridge-deep.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 15.13: Details on Ridge Regression: Deep Dive"
3 | weight: 15013
4 | ---
5 | In this section, we consider Ridge regression as row-augmentation and as minimizing risk under feature noise. We also discuss the bias-variance tradeoff.
6 |
7 |
8 |
9 | ### Lecture slides
10 |
11 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-regu-ridge-deepdive.pdf" >}}
12 |
--------------------------------------------------------------------------------
/content/chapters/11_advriskmin/11-16-bias-variance-decomposition.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 11.16: Bias Variance Decomposition I"
3 | weight: 11016
4 | ---
5 | We discuss how to decompose the generalization error of a learner.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="AOHhV6pMxzk" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-advriskmin-bias-variance-decomposition-1.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/13_information_theory/13-02-entropy2.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 13.02: Entropy II"
3 | weight: 13002
4 | ---
5 | We continue our discussion about entropy and introduce joint entropy, the uniqueness theorem and the maximum entropy principle.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="aFYF459PE-w" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-info-entropy2.pdf" >}}
--------------------------------------------------------------------------------
/content/chapters/15_regularization/15-07-nonlin.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 15.07: Non-Linear Models and Structural Risk Minimization"
3 | weight: 15007
4 | ---
5 | In this section, we demonstrate regularization in non-linear models like neural networks.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="_jNeD2AUJqI" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-regu-nonlin.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/16_linear_svm/16-04-erm.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 16.04: SVMs and Empirical Risk Minimization"
3 | weight: 16004
4 | ---
5 | In this section, we show how the SVM problem can be understood as an instance of empirical risk minimization.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="bulWkppkZ4Y" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-linsvm-erm.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/18_boosting/18-10-CWB-2.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 18.10: Component Wise Boosting Basics 2"
3 | weight: 180010
4 | ---
5 | We explain the handling of categorical features and of the intercept and introduce a practical example.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="ALA5F8n-8aU" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/blob/main/slides-pdf/slides-boosting-cwb-basics2.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/20_imbalanced_learning/20-05-cs-3.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 20.05: Cost-Sensitive Learning 3"
3 | weight: 20005
4 | ---
5 | We explain the concepts of instance specific costs and cost-sensitive OVO.
6 |
7 |
8 | ### Lecture video
9 |
10 | {{< video id="qRb_nlJA7v0" >}}
11 |
12 | ### Lecture slides
13 |
14 | {{< pdfjs file="https://github.com/slds-lmu/lecture_advml/blob/main/slides-pdf/slides-imbalanced-learning-cost-sensitive-learning-3.pdf" >}}
15 |
16 |
--------------------------------------------------------------------------------
/content/chapters/20_imbalanced_learning/20-06-cc-1.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 20.06: Cost Curves 1"
3 | weight: 20006
4 | ---
5 | We introduce cost curves for misclassif error and explain the duality between ROC points and cost lines.
6 |
7 |
8 | ### Lecture video
9 |
10 | {{< video id="w2BFjL6LRwQ" >}}
11 |
12 | ### Lecture slides
13 |
14 | {{< pdfjs file="https://github.com/slds-lmu/lecture_advml/blob/main/slides-pdf/slides-imbalanced-learning-costcurves-1.pdf" >}}
15 |
--------------------------------------------------------------------------------
/content/chapters/07_forests/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 07: Random Forests"
3 | ---
4 | This chapter introduces bagging as a method to increase the performance of trees (or other base learners). A modification of bagging leads to random forests. We explain the main idea of random forests, benchmark their performance with the methods seen so far and show how to quantify the impact of a single feature on the performance of the random forest as well as how to compute proximities between observations.
--------------------------------------------------------------------------------
/content/chapters/16_linear_svm/16-05-optimization.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 16.05: SVM Training"
3 | weight: 16005
4 | ---
5 | The linear SVM problem is challenging due to its non-differentiability. In this section, we present methods of optimization.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="14_0A1a9XtQ" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-linsvm-optimization.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/19_gaussian_processes/19-03-covariance.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 19.03: Covariance Functions for GPs"
3 | weight: 19003
4 | ---
5 | In this section, we discuss the role of covariance functions in GPs and introduce the most common choices.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="8fB3RwxNObw" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/blob/main/slides-pdf/slides-gp-covariance.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/22_online_learning/22-05-ftl-oqo.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 22.05: Follow the Leader on OQO problems"
3 | weight: 22005
4 | ---
5 | In this chapter we prove that FTL works for online quadratic problems.
6 |
7 |
8 | ### Lecture video
9 |
10 | {{< video id="UK-pCAD9GRY" >}}
11 |
12 | ### Lecture slides
13 |
14 | {{< pdfjs file="https://github.com/slds-lmu/lecture_advml/blob/main/slides-pdf/slides-online-learning-simple-learners-ftl-oqo-deep-dive.pdf" >}}
15 |
--------------------------------------------------------------------------------
/content/chapters/14_cod/14-01-cod.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 14.01: Curse of Dimensionality"
3 | weight: 14001
4 | ---
5 | In this section, we discuss why our geometric intuition fails in high-dimensional spaces and introduce the phenomenon of the curse of dimensionality.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="MYzDeYbkqV4" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-cod.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/14_cod/14-02-cod-examples.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 14.02: Curse of Dimensionality - Examples"
3 | weight: 14002
4 | ---
5 | In this section, we show examples of how \\(k\\)-NN and the linear model suffer from the the curse of dimensionality.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="6xKwcos63jo" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-cod-examples.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/15_regularization/15-06-other.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 15.06: Other Types of Regularization"
3 | weight: 15006
4 | ---
5 | In this section, we introduce other regularization approaches besides the important special cases \\(L1\\) and \\(L2\\).
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="wrK1lvI8VDY" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-regu-others.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/20_imbalanced_learning/20-04-cs-2.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 20.04: Cost-Sensitive Learning 2"
3 | weight: 20004
4 | ---
5 | In this section we focus on empirical thresholding and model-agnostic Meta Costs.
6 |
7 |
8 | ### Lecture video
9 |
10 | {{< video id="aPYUKBaSLOo" >}}
11 |
12 | ### Lecture slides
13 |
14 | {{< pdfjs file="https://github.com/slds-lmu/lecture_advml/blob/main/slides-pdf/slides-imbalanced-learning-cost-sensitive-learning-2.pdf" >}}
15 |
16 |
--------------------------------------------------------------------------------
/content/chapters/20_imbalanced_learning/20-07-cc-2.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 20.07: Cost Curves 2"
3 | weight: 20007
4 | ---
5 | We explain cost curves with cost matrices and comparing classifiers. In addition we do a wrap-up comparision to ROC.
6 |
7 |
8 | ### Lecture video
9 |
10 | {{< video id="9qKCmVIqUbI" >}}
11 |
12 | ### Lecture slides
13 |
14 | {{< pdfjs file="https://github.com/slds-lmu/lecture_advml/blob/main/slides-pdf/slides-imbalanced-learning-costcurves-2.pdf" >}}
15 |
--------------------------------------------------------------------------------
/content/chapters/22_online_learning/22-02-simple.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 22.02: Simple Online Learning Algorithm"
3 | weight: 22002
4 | ---
5 | In this chapter we introduce the formalization of online learning algorithms and the FTL algorithm.
6 |
7 |
8 | ### Lecture video
9 |
10 | {{< video id="bomA-elLg40" >}}
11 |
12 | ### Lecture slides
13 |
14 | {{< pdfjs file="https://github.com/slds-lmu/lecture_advml/blob/main/slides-pdf/slides-online-learning-simple-learners.pdf" >}}
15 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: publish
2 |
3 | all: clean publish
4 |
5 | publish:
6 | git worktree add public gh-pages
7 | hugo
8 | cd public; git add --all; git commit -m "Publish via makefile"; git push origin gh-pages --force
9 | rm -rf public
10 | git worktree prune
11 |
12 | clean:
13 | rm -rf public
14 | git worktree prune
15 |
16 | update_theme:
17 | git submodule update --recursive --remote
18 | git add themes/courseTheme
19 | git commit -m "update theme"
20 |
21 | watch: clean; hugo server -D
--------------------------------------------------------------------------------
/content/chapters/13_information_theory/13-01-entropy.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 13.01: Entropy I"
3 | weight: 13001
4 | ---
5 | We introduce entropy, which expresses the expected information for discrete random variables, as a central concept in information theory.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="UWv2ZPnifvw" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-info-entropy.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/17_nonlinear_svm/17-02-kernel-trick.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 17.02: The Kernel Trick"
3 | weight: 17002
4 | ---
5 | In this section, we show how nonlinear SVMs work their magic by introducing nonlinearity efficiently via the kernel trick.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="6_ykYgGENlA" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-nonlinsvm-kernel-trick.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/18_boosting/18-03-gradient-boosting-illustration.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 18.03: Boosting Illustration"
3 | weight: 18003
4 | ---
5 | We show several illustrative regression examples to visualize the boosting
6 | principle.
7 |
8 |
9 |
10 | ### Lecture video
11 |
12 | {{< video id="WGx3Jz5v5UE" >}}
13 |
14 | ### Lecture slides
15 |
16 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-boosting-regression-illustrations.pdf" >}}
17 |
--------------------------------------------------------------------------------
/content/chapters/22_online_learning/22-03-ftl.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 22.03: Follow the Leader on OLO problems"
3 | weight: 22003
4 | ---
5 | In this chapter we introduce OLO problems and explain why some FTL might fail on these problems.
6 |
7 |
8 | ### Lecture video
9 |
10 | {{< video id="tvRxb8GHHM8" >}}
11 |
12 | ### Lecture slides
13 |
14 | {{< pdfjs file="https://github.com/slds-lmu/lecture_advml/blob/main/slides-pdf/slides-online-learning-simple-learners-ftl-olo.pdf" >}}
15 |
--------------------------------------------------------------------------------
/content/chapters/04_evaluation/04-03-train.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 04.03: Training Error"
3 | weight: 4003
4 | ---
5 | There are two types of errors: training errors and test errors. The focus of this section is on the training error and related difficulties.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="XskndmMybfM" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-evaluation-train.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/18_boosting/18-12-adv-CWB.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 18.12: Advanced CWB"
3 | weight: 180012
4 | ---
5 | We explain the details of nonlinear BLs and splines, decomposition for splines, fair base learner selection and feature importance and PDPs.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="3RnpVW8uDzY" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/blob/main/slides-pdf/slides-boosting-cwb-advanced.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/19_gaussian_processes/19-04-prediction.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 19.04: Gaussian Process Prediction"
3 | weight: 19004
4 | ---
5 | In this section, we show how to derive the posterior process and discuss further properties of GPs as well as noisy GPs.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="qlfUlFaP94g" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/blob/main/slides-pdf/slides-gp-prediction.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/30_feature_selection/30-04-filters2.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 30.04: Filter Methods II (Examples and Caveats)"
3 | weight: 30004
4 | ---
5 | In this section, we discuss how filter methods can be misleading and show how they work in practical applications.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="X3FpzGnGA7o" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-fs-filters2.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/15_regularization/15-08-bayes.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 15.08: Bayesian Priors"
3 | weight: 15008
4 | ---
5 | In this section, we motivate regularization from a Bayesian perspective, showing how different penalty terms correspond to different Bayesian priors.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="d_FB9Vyp7oE" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-regu-bayes.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/16_linear_svm/16-01-hard-margin.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 16.01: Linear Hard Margin SVM"
3 | weight: 16001
4 | ---
5 | Hard margin SVMs seek perfect data separation. We introduce the linear hard margin SVM problem as a quadratic optimization program.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="hGxESuljZII" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-linsvm-hard-margin.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/16_linear_svm/16-02-hard-margin-dual.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 16.02: Hard Margin SVM Dual"
3 | weight: 16002
4 | ---
5 | In this section, we derive the dual variant of the linear hard-margin SVM problem, a computationally favorable formulation.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="YHHX_fi7m8s" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-linsvm-hard-margin-dual.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/18_boosting/18-05-gradient-boosting-classification.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 18.05: Boosting for Classification"
3 | weight: 18005
4 | ---
5 | We introduce boosting algorithms for both binary and multiclass classification with several examples.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="1S5rRHp631s" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-boosting-gbm-classification.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/11_advriskmin/11-03-pseudo-residuals.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 11.03: Pseudo-Residuals"
3 | weight: 11003
4 | ---
5 | We introduce the concept of pseudo-residuals, i.e., loss residuals in function space, and discuss their relation to gradient descent.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="kzJjTcioC1Q" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-advriskmin-pseudo-residuals.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/15_regularization/15-11-geom-l1.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 15.11: Geometry of L1 Regularization"
3 | weight: 15011
4 | ---
5 | In this section, we provide a geometric understanding of \\(L1\\) regularization and show that it encourages sparsity in the parameter vector.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="txVJzfLEPmU" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-regu-geom-l1.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/17_nonlinear_svm/17-01-featuregen.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 17.01: Feature Generation for Nonlinear Separation"
3 | weight: 17001
4 | ---
5 | We show how nonlinear feature maps project the input data to transformed spaces, where they become linearly separable.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="RMEZXkDr7Ac" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-nonlinsvm-featuregen.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/08_neural_networks/08-00-nutshell-nn.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 08.00: Neural Networks: In a Nutshell"
3 | weight: 8000
4 | ---
5 | In this nutshell chunk, we learn about neural networks, the driving force behind many of today's cutting-edge machine learning applications.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="htwEzmtc2M0" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-nn-nutshell.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/11_advriskmin/11-11-classification-brier-l1.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 11.11: Brier Score - L2/L1 Loss on Probabilities"
3 | weight: 11011
4 | ---
5 | In this section, we introduce the Brier score and derive its risk minimizer and optimal constant model.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="gS7QP7PwiUw" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-advriskmin-classification-brier-l1.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/11_advriskmin/11-17-bias-variance-decomposition2.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 11.17: Bias Variance Decomposition II"
3 | weight: 11017
4 | ---
5 | We discuss how to decompose the excess risk into the estimation, approximation and optimization error.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="RN7z3jL1lUs" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-advriskmin-bias-variance-decomposition-2.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/15_regularization/15-09-wd.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 15.09: Weight decay and L2"
3 | weight: 15009
4 | ---
5 | In this section, we show that L2 regularization with gradient descent is equivalent to weight decay and see how weight decay changes the optimization trajectory.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="xASHDEAWP0U" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/blob/main/slides-pdf/slides-regu-wd-vs-l2.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/17_nonlinear_svm/17-03-kernel-poly.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 17.03: The Polynomial Kernel"
3 | weight: 17003
4 | ---
5 | In this section, we introduce the polynomial kernel in the context of SVMs and demonstrate how different polynomial degrees affect decision boundaries.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="YsMdZ4tjuMM" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-nonlinsvm-kernel-poly.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/20_imbalanced_learning/20-03-cs-1.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 20.03: Cost-Sensitive Learning 1"
3 | weight: 20003
4 | ---
5 | We introduce the concept of a Cost Matrix, the Minimum expected cost priciple and the optimal theoretical threshold.
6 |
7 |
8 | ### Lecture video
9 |
10 | {{< video id="Yc1uWlRLdPU" >}}
11 |
12 | ### Lecture slides
13 |
14 | {{< pdfjs file="https://github.com/slds-lmu/lecture_advml/blob/main/slides-pdf/slides-imbalanced-learning-cost-sensitive-learning-1.pdf" >}}
15 |
16 |
--------------------------------------------------------------------------------
/content/chapters/20_imbalanced_learning/20-08-smpl-1.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 20.08: Sampling Methods 1"
3 | weight: 20008
4 | ---
5 | We introduce the idea of sampling methods for dealing with imbalanced data. In addition, we explain certain undersampling techniques.
6 |
7 |
8 | ### Lecture video
9 |
10 | {{< video id="zCO6CPCvrAY" >}}
11 |
12 | ### Lecture slides
13 |
14 | {{< pdfjs file="https://github.com/slds-lmu/lecture_advml/blob/main/slides-pdf/slides-imbalanced-learning-sampling-methods-1.pdf" >}}
15 |
--------------------------------------------------------------------------------
/content/chapters/22_online_learning/22-07-oco-2.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 22.07: Online Convex optimization 2"
3 | weight: 22007
4 | ---
5 | In this chapter we explain the connection between OGD and FTRL via linearization of convex functions and how this implies regret bounds for OGD.
6 |
7 |
8 | ### Lecture video
9 |
10 | {{< video id="A8dbluBNw-8" >}}
11 |
12 | ### Lecture slides
13 |
14 | {{< pdfjs file="https://github.com/slds-lmu/lecture_advml/blob/main/slides-pdf/slides-online-learning-oco-2.pdf" >}}
15 |
--------------------------------------------------------------------------------
/content/chapters/30_feature_selection/30-02-motivating-examples.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 30.02: Motivating Examples"
3 | weight: 30002
4 | ---
5 | In this section, we explain the practical importance of feature selection and show that models with
6 | integrated selection do not always work.
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="1BwgTptjDs4" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-fs-motivating-examples.pdf" >}}
16 |
--------------------------------------------------------------------------------
/static/site.webmanifest:
--------------------------------------------------------------------------------
1 | {
2 | "name": "",
3 | "short_name": "",
4 | "icons": [
5 | {
6 | "src": "/android-chrome-192x192.png",
7 | "sizes": "192x192",
8 | "type": "image/png"
9 | },
10 | {
11 | "src": "/android-chrome-512x512.png",
12 | "sizes": "512x512",
13 | "type": "image/png"
14 | }
15 | ],
16 | "theme_color": "#ffffff",
17 | "background_color": "#ffffff",
18 | "display": "standalone"
19 | }
20 |
--------------------------------------------------------------------------------
/content/chapters/01_ml_basics/01-03-tasks.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 01.03: Tasks"
3 | weight: 1003
4 | ---
5 | The tasks of supervised learning can roughly be divided in two categories: regression (for continuous outcome) and classification (for categorical outcome). We will present some examples.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="RpgQX73RKUU">}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-basics-task.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/21_multitarget_learning/21-03-methods-1.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 21.03: Methods for Multi-target Prediction 1"
3 | weight: 21003
4 | ---
5 | In this chapter we introduce the concepts of independent models for targets, mean regularization, stacking and weight sharing in DL.
6 |
7 |
8 | ### Lecture video
9 |
10 | {{< video id="pq_k1aFsYi8" >}}
11 |
12 | ### Lecture slides
13 |
14 | {{< pdfjs file="https://github.com/slds-lmu/lecture_advml/blob/main/slides-pdf/slides-multitarget-methods-1.pdf" >}}
15 |
--------------------------------------------------------------------------------
/content/chapters/12_multiclass/12-01-losses.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 12.01: Multiclass Classification and Losses"
3 | weight: 12001
4 | ---
5 | In this section, we introduce the basic concepts in multiclass (MC) classification and important MC losses: MC 0-1 loss, MC brier score, and MC logarithmic loss.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="sISj6dUCrro" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-mc-losses.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/15_regularization/15-05-enetlogreg.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 15.05: Elastic Net and Regularization for GLMs"
3 | weight: 15005
4 | ---
5 | In this section, we introduce the elastic net as a combination of Ridge and Lasso regression and discuss regularization for logistic regression.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="Bfy_Vgxwxro" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-regu-enetlogreg.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/18_boosting/18-06-gradient-boosting-trees-1.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 18.06: Gradient Boosting with Trees I"
3 | weight: 18006
4 | ---
5 | We discuss trees as the most popular base learners in gradient boosting, with special emphasis on model structure and interaction depth.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="iOe-kTA3kxE" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-boosting-gbm-with-trees-1.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/18_boosting/18-07-gradient-boosting-trees-2.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 18.07: Gradient Boosting with Trees II"
3 | weight: 18007
4 | ---
5 | We explain how terminal coefficients are found in a risk-minimal manner and briefly discuss tree-based boosting for multiclass problems.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="X0AnhTuWFrM" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-boosting-gbm-with-trees-2.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/18_boosting/18-04-gradient-boosting-regularization.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 18.04: Boosting Regularization"
3 | weight: 18004
4 | ---
5 | Powerful boosting learners tend to overfit. We discuss the number of iterations, base learner complexity, and shrinkage as countermeasures.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="NGLosW3Z1IA" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-boosting-gbm-regularization.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/20_imbalanced_learning/20-01-intro.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 20.01: Introduction"
3 | weight: 20001
4 | ---
5 | We define the phenomenon of imbalanced data sets and explain its consequences on accuarcy. Furthermore, we introduce some techniques for handling imbalanced data sets.
6 |
7 |
8 | ### Lecture video
9 |
10 | {{< video id="fKRuCjbmA6I" >}}
11 |
12 | ### Lecture slides
13 |
14 | {{< pdfjs file="https://github.com/slds-lmu/lecture_advml/blob/main/slides-pdf/slides-imbalanced-learning-intro.pdf" >}}
15 |
16 |
--------------------------------------------------------------------------------
/content/chapters/11_advriskmin/11-02-loss-properties.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 11.02: Properties of Loss Functions"
3 | weight: 11002
4 | ---
5 | We introduce key properties of loss functions and explore how these influence model assumptions, sensitivity to outliers, and the tractability of training.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="gwvc2uw_GI4" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-advriskmin-losses-properties.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/22_online_learning/22-06-oco-1.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 22.06: Online Convex optimization 1"
3 | weight: 22006
4 | ---
5 | In this chapter we introduce the class of online convex optimization problems and derive the online gradient descent as a suitable learning algorithm for such cases.
6 |
7 |
8 | ### Lecture video
9 |
10 | {{< video id="f5i5clqlU-8" >}}
11 |
12 | ### Lecture slides
13 |
14 | {{< pdfjs file="https://github.com/slds-lmu/lecture_advml/blob/main/slides-pdf/slides-online-learning-oco-1.pdf" >}}
15 |
--------------------------------------------------------------------------------
/content/chapters/07_forests/07-00-nutshell-random-forest.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 07.00: Random Forests: In a Nutshell"
3 | weight: 7000
4 | ---
5 | In this nutshell chunk, we delve into Random Forests, an ensemble method that harnesses multiple decision trees for improved prediction accuracy and robustness.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="fD9WPzkD93c" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-forests-nutshell.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/09_tuning/09-00-nutshell-tuning.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 09.00: Tuning & Nested Resampling: In a Nutshell"
3 | weight: 9000
4 | ---
5 | In this nutshell chunk, we explore tuning and nested resampling, focusing on their roles in evaluating and optimizing the performance of machine learning models.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="ArJtoONcT1o" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-tuning-nutshell.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/13_information_theory/13-05-cross-entropy-kld.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 13.05: Cross-Entropy and KL"
3 | weight: 13005
4 | ---
5 | We introduce cross-entropy as a further information-theoretic concept and discuss the connection between entropy, cross-entropy, and Kullback-Leibler divergence.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="vtS6h0UYs4E" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-info-cross-entropy-kld.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/21_multitarget_learning/21-04-methods-2.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 21.04: Methods for Multi-target Prediction 2"
3 | weight: 21004
4 | ---
5 | In this chapter we introduce the Kronecker kernel ridge regression, graph relations in targets, probabilistic classifier chains and low-rank approximations.
6 |
7 |
8 | ### Lecture video
9 |
10 | {{< video id="sGkjzOOchqg" >}}
11 |
12 | ### Lecture slides
13 |
14 | {{< pdfjs file="https://github.com/slds-lmu/lecture_advml/blob/main/slides-pdf/slides-multitarget-methods-2.pdf" >}}
15 |
--------------------------------------------------------------------------------
/content/chapters/22_online_learning/22-01-intro.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 22.01: Introduction"
3 | weight: 22001
4 | ---
5 | In this chapter we explain the differences between online and batch learning, the extended learning protocol in online learning and the strategies to measure performance in online learning.
6 |
7 |
8 | ### Lecture video
9 |
10 | {{< video id="afNt5wuPmm4" >}}
11 |
12 | ### Lecture slides
13 |
14 | {{< pdfjs file="https://github.com/slds-lmu/lecture_advml/blob/main/slides-pdf/slides-online-learning-intro.pdf" >}}
15 |
--------------------------------------------------------------------------------
/content/chapters/04_evaluation/04-00-nutshell-evaluation.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 04.00: Evaluation: In a Nutshell"
3 | weight: 4000
4 | ---
5 | In this nutshell chunk, we delve into the critical aspects of evaluation, unraveling how we measure and ensure the effectiveness and accuracy of machine learning models.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="grZ8V0lo0LA" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-evaluation-nutshell.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/11_advriskmin/11-01-risk-minimizer.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 11.01: Risk Minimization Basics"
3 | weight: 11001
4 | ---
5 | We introduce important concepts in theoretical risk minimization: risk minimizer, Bayes risk, Bayes regret, consistent learners and the optimal constant model.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="WPKzlanJhS0" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-advriskmin-risk-minimization-basics.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/11_advriskmin/11-06-regression-further-losses.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 11.06: Advanced Regression Losses"
3 | weight: 11006
4 | ---
5 | In this section, we introduce and discuss the following advanced regression losses: Huber, log-cosh, Cauchy, epsilon-insensitive, and quantile loss.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="mLZQxHdqdOM" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-advriskmin-regression-further-losses.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/02_supervised_regression/02-00-nutshell-regression.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 02.00: Supervised Regression: In a Nutshell"
3 | weight: 2000
4 | ---
5 | In this nutshell chunk, we explore the fundamentals of supervised regression, where we teach machines to predict continuous outcomes based on input data.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="jGxycO-QkM4" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-regression-nutshell.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/03_supervised_classification/03-01-tasks.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 03.01: Classification Tasks"
3 | weight: 3001
4 | ---
5 | In classification, the task is to predict a categorical (binary or multiclass) label. In this section, we illustrate the concept of classification with some typical examples.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="KhKY8nlpMH0" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-classification-tasks.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/04_evaluation/04-12-prcurves.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 04.12: Precision-Recall Curves"
3 | weight: 4012
4 | ---
5 |
6 | Besides plotting TPR against FPR to obtain the ROC curve, it sometimes makes
7 | sense to instead consider precision (= PPV) vs recall (= TPR), especially when
8 | data are imbalanced.
9 |
10 |
11 |
12 | ### Lecture video
13 |
14 | {{< video id="6tJXX3vmYl0" >}}
15 |
16 | ### Lecture slides
17 |
18 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-evaluation-prcurves.pdf" >}}
--------------------------------------------------------------------------------
/content/chapters/09_tuning/09-01-intro.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 09.01: Introduction"
3 | weight: 9001
4 | ---
5 | While model parameters are optimized during training, hyperparameters must be specified in advance. In this section, we will motivate why it is crucial to find good values for, i.e. to tune, these hyperparameters.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="Lc0u2iiQn0E" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/slides-pdf/slides-tuning-intro.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/11_advriskmin/11-05-l1-deep-dive.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 11.05: L1 loss: Deep Dive"
3 | weight: 11005
4 | ---
5 | In this **deep dive**, we revisit \\(L1\\) loss and derive its risk minimizer -- the conditional median -- and optimal constant model -- the empirical median of observed target values. Please note that there are no videos accompanying this section.
6 |
7 |
8 |
9 | ### Lecture slides
10 |
11 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-advriskmin-regression-l1-deepdive.pdf" >}}
12 |
--------------------------------------------------------------------------------
/content/chapters/01_ml_basics/01-02-data.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 01.02: Data"
3 | weight: 1002
4 | ---
5 | In this section we explain the basic structure of tabular data used in machine learning. We will differentiate targets from features, talk about labeled and unlabeled data and introduce the concept of the data generating process.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="j7-Ci5VWILA">}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-basics-data.pdf" >}}
--------------------------------------------------------------------------------
/content/chapters/08_neural_networks/08-07-backprob1.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Extra: Basic Backpropagation 1"
3 | weight: 8007
4 | ---
5 | This section introduces forward and backward passes, chain rule, and the details of backprop in deep learning.
6 |
7 |
8 | ### Lecture video
9 |
10 | [video](https://drive.google.com/file/d/1rLKDhBrFE92x7ysoU0rlw_k8ojnKMMz6/view?usp=sharing)
11 |
12 | ### Lecture slides
13 |
14 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/slides-pdf/slides-nn-basic-backpropagation1.pdf" >}}
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/content/chapters/11_advriskmin/11-18-bias-variance-deep-dive.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 11.18: Bias Variance Decomposition: Deep Dive"
3 | weight: 11018
4 | ---
5 | In this segment, we discuss details of the decomposition of the generalization error of a learner. This section is presented as a **deep-dive**. Please note that there are no videos accompanying this section.
6 |
7 |
8 |
9 | ### Lecture slides
10 |
11 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/blob/main/slides-pdf/slides-advriskmin-bias-variance-decomposition-deepdive.pdf" >}}
12 |
--------------------------------------------------------------------------------
/content/chapters/17_nonlinear_svm/17-04-rkhs-repr.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 17.04: Reproducing Kernel Hilbert Space and Representer Theorem"
3 | weight: 17004
4 | ---
5 | In this section, we introduce important theoretical background on nonlinear SVMs that essentially allows us to express them as a weighted sum of basis functions.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="TPmDwKc5MOg" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-nonlinsvm-rkhs-repr.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/19_gaussian_processes/19-05-training.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 19.05: Gaussian Process Training"
3 | weight: 19005
4 | ---
5 | In this section, we show how Gaussian processes are actually trained using maximum likelihood estimation and exploiting the fact that we can learn covariance functions' hyperparameters on the fly.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="S0GqTy2gLf0" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/blob/main/slides-pdf/slides-gp-training.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/01_ml_basics/01-06-riskminimization.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 01.06: Losses and Risk Minimization"
3 | weight: 1006
4 | ---
5 | In order to find good solutions we need a concept to evaluate and compare models. To this end, the concepts of *loss function*, *risk* and *empirical risk minimization* are introduced.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="NMNfjOL40y0">}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-basics-riskminimization.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/03_supervised_classification/03-00-nutshell-classification.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 03.00: Supervised Classification: In a Nutshell"
3 | weight: 3000
4 | ---
5 | In this nutshell chunk, we delve into the basics of supervised classification, where we train machines to categorize input data into predefined labels.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="x8TG-Jrb_80" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-classification-nutshell.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/06_cart/06-02-treegrowing.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 06.02: Growing a Tree"
3 | weight: 6002
4 | ---
5 | In this section, we explain how to grow a tree starting with an empty tree, i.e., a root node containing all the data. It will be shown that trees are grown by recursively applying greedy optimization to each node.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="SWbrSLJv8wc" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/slides-pdf/slides-cart-treegrowing.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/11_advriskmin/11-10-classification-logreg-deep-dive.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 11.10: Logistic Regression: Deep Dive"
3 | weight: 11010
4 | ---
5 | In this segment, we derive the gradient and Hessian of logistic regression and show that logistic regression is a convex problem. This section is presented as a **deep-dive**. Please note that there are no videos accompanying this section.
6 |
7 |
8 |
9 | ### Lecture slides
10 |
11 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/blob/main/slides-pdf/slides-advriskmin-logreg-deepdive.pdf" >}}
12 |
--------------------------------------------------------------------------------
/content/chapters/19_gaussian_processes/19-01-bayes-lm.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 19.01: The Bayesian Linear Model"
3 | weight: 19001
4 | ---
5 | We begin by reviewing the Bayesian formulation of a linear model and show that instead of point estimates for parameters and predictions, we obtain an entire posterior and predictive distribution.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="H7Qy1X12Ypo" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/blob/main/slides-pdf/slides-gp-bayes-lm.pdf" >}}
16 |
17 |
--------------------------------------------------------------------------------
/content/chapters/08_neural_networks/08-05-mulitlayerNN.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 08.05: MLP: Multi-Layer Feedforward Neural Networks"
3 | weight: 8005
4 | ---
5 | Architectures of deep neural networks and deep neural networks as chained functions are the learning goal of this part.
6 |
7 |
8 | ### Lecture video
9 |
10 | [video](https://drive.google.com/file/d/1cK5Iu-NSUQkSUPq6x4wnbr7jwuQ32rbZ/view?usp=sharing)
11 |
12 | ### Lecture slides
13 |
14 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/slides-pdf/slides-nn-mlps-multilayer-FNNs.pdf" >}}
15 |
--------------------------------------------------------------------------------
/content/chapters/18_boosting/18-02-gradient-boosting-concept.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 18.02: Boosting Concept"
3 | weight: 18002
4 | ---
5 | In this section, we discuss the general boosting principle: performing gradient descent in function space by repeatedly fitting new base learner components to the current pseudo-residuals.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="cM4JeV7FLCA" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-boosting-gradient-boosting-concept.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/01_ml_basics/01-07-optimization.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 01.07: Optimization"
3 | weight: 1007
4 | ---
5 | In this section we study parameter optimization as computational solution to machine learning problems. We address pitfalls in non-convex optimization problems and introduce the fundamental concept of gradient descent.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="W25BTei5zQU">}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-basics-optimization.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/04_evaluation/04-04-test.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 04.04: Test Error"
3 | weight: 4004
4 | ---
5 | While we can infer some information about the learning process from training errors (e.g., the state of iterative optimization), we are truly interested in generalization ability, and thus in the test error on previously unseen data.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="ikz87m84um8" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-evaluation-test.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/08_neural_networks/08-01-intro.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 08.01: Introduction"
3 | weight: 8001
4 | ---
5 | In this section, we introduces the relationship of DL and ML, give basic intro about feature learning, and discuss the use-cases and data types for DL methods.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | [video](https://drive.google.com/file/d/1SiN3Zo12lUOrTY5ISBCDemNbS-vlwUa8/view?usp=sharing)
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/slides-pdf/slides-nn-intro-introduction.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/11_advriskmin/11-12-classification-further-losses.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 11.12: Advanced Classification Losses"
3 | weight: 11012
4 | ---
5 | In this section, we introduce and discuss the following advanced classification losses: (squared) hinge loss, \\(L2\\) loss on scores, exponential loss, and AUC loss.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="XNk2NpX5K3A" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-advriskmin-classification-furtherlosses.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/12_multiclass/12-03-binary-reduction.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 12.03: One-vs-One and One-vs-Rest"
3 | weight: 12003
4 | ---
5 | It is sometimes advisable to address a multiclass problem as a set of binary ones. We discuss two ways to reduce a multiclass problem to multiple binary classification problems: one-vs-one and one-vs-rest.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="Q24cKI_BS7Q" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-mc-binary-reduction.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/11_advriskmin/11-13-proper-scoring-rules.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 11.13: Proper Scoring Rules"
3 | weight: 11013
4 | ---
5 | We dive into how proper scoring rules - like log loss and the Brier score (unlike L1) - guarantee strictly proper, probability-calibrated predictions by satisfying a first‑order optimality condition.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="1nydD0QSnXk" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/blob/main/slides-pdf/slides-advriskmin-proper-scoring-rules.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/13_information_theory/13-04-kl.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 13.04: Kullback-Leibler Divergence"
3 | weight: 13004
4 | ---
5 | The Kullback-Leibler divergence (KL) is an important quantity for measuring the difference between two probability distributions. We discuss different intuitions for KL and relate it to risk minimization and likelihood ratios.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="7ZaY4fvuFg0" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-info-kl.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/20_imbalanced_learning/20-02-perf-msr.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 20.02: Performance Measures"
3 | weight: 20002
4 | ---
5 | We introduce performance measures other than accuracy and explain their advantages over accuracy for imbalanced date. In addition we introduce extensions of these measures for multiclass settings.
6 |
7 |
8 | ### Lecture video
9 |
10 | {{< video id="50F3KlzyG6o" >}}
11 |
12 | ### Lecture slides
13 |
14 | {{< pdfjs file="https://github.com/slds-lmu/lecture_advml/blob/main/slides-pdf/slides-imbalanced-learning-performance-measures.pdf" >}}
15 |
--------------------------------------------------------------------------------
/content/chapters/11_advriskmin/11-09-bernoulli-deep-dive.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 11.09: Some details on classification losses: Deep Dive"
3 | weight: 11009
4 | ---
5 | In this section, we will discuss the equivalence of different classification losses and derive the risk minimizers in various settings. This section is presented as a **deep-dive**. Please note that there are no videos accompanying this section.
6 |
7 |
8 |
9 | ### Lecture slides
10 |
11 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/blob/main/slides-pdf/slides-advriskmin-some-details-deepdive.pdf" >}}
12 |
--------------------------------------------------------------------------------
/content/chapters/21_multitarget_learning/21-02-losses.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 21.02: Loss functions"
3 | weight: 21002
4 | ---
5 | In this chapter we introduce loss functions for multi-target prediction problems, explain the differences between instance-wise and decomposable losses and introduce the risk minimizer for both the hamming and 0/1 subset losses.
6 |
7 |
8 | ### Lecture video
9 |
10 | {{< video id="0HsXwhvb5Sc" >}}
11 |
12 | ### Lecture slides
13 |
14 | {{< pdfjs file="https://github.com/slds-lmu/lecture_advml/blob/main/slides-pdf/slides-multitarget-losses.pdf" >}}
15 |
--------------------------------------------------------------------------------
/content/chapters/09_tuning/09-05-tuning-pipelines.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 09.05: Pipelines and AutoML"
3 | weight: 9005
4 | ---
5 | Some aspects of the machine learning lifecycle can be automated via
6 | AutoML. In this section we look into pipelines as part of AutoML and how (HPO-) pipelines can be represented as directed acyclic graphs (DAGs).
7 |
8 |
9 |
10 |
11 | ### Lecture video
12 |
13 | {{< video id="BB_8zQVCN3Q" >}}
14 |
15 | ### Lecture slides
16 |
17 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-tuning-pipelines.pdf" >}}
18 |
--------------------------------------------------------------------------------
/content/chapters/15_regularization/15-10-geom-l2.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 15.10: Geometry of L2 Regularization"
3 | weight: 15010
4 | ---
5 | In this section, we provide a geometric understanding of \\(L2\\) regularization, showing how parameters are shrunk according to the eigenvalues of the Hessian of empirical risk, and discuss its correspondence to weight decay.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="PhOL5HB8FIw" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-regu-geom-l2.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/11_advriskmin/11-08-classification-bernoulli.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 11.08: Bernoulli Loss"
3 | weight: 11008
4 | ---
5 | We study the Bernoulli loss and derive its risk minimizer and optimal constant model. We further discuss the connection between Bernoulli loss minimization and tree splitting according to the entropy criterion.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="F_o_QrW2P6M" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-advriskmin-classification-bernoulli.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/21_multitarget_learning/21-01-intro.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 21.01: Introduction"
3 | weight: 21001
4 | ---
5 | In this chapter we emphasize the practical relevance of multi-target prediction problems. In addition, we name some special cases of multi-target prediction and establish the differences between transductive and inductive learning problems.
6 |
7 |
8 | ### Lecture video
9 |
10 | {{< video id="Or2clVNB5Bo" >}}
11 |
12 | ### Lecture slides
13 |
14 | {{< pdfjs file="https://github.com/slds-lmu/lecture_advml/blob/main/slides-pdf/slides-multitarget-intro.pdf" >}}
15 |
--------------------------------------------------------------------------------
/content/chapters/13_information_theory/13-06-ml.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 13.06: Information Theory for Machine Learning"
3 | weight: 13006
4 | ---
5 | In this section, we discuss how information-theoretic concepts are used in machine learning and demonstrate the equivalence of KL minimization and maximum likelihood maximization, as well as how (cross-)entropy can be used as a loss function.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="setfSvof9Io" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-info-ml.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/13_information_theory/13-07-mutual-info.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 13.07: Joint Entropy and Mutual Information I"
3 | weight: 13007
4 | ---
5 | Information theory also provides means of quantifying relations between two random variables that extend the concept of (linear) correlation. We discuss joint entropy, conditional entropy, and mutual information in this context.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="1iEYzS5NqL4" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-info-mutual-info.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/07_forests/07-04-featureimportance.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 07.04: Feature Importance"
3 | weight: 7004
4 | quizdown: true
5 | ---
6 |
7 | In a complex machine learning model, the contributions of the different features to the model performance are difficult to evaluate. The concept of feature importance allows to quantify these effects for random forests.
8 |
9 |
10 |
11 | ### Lecture video
12 |
13 | {{< video id="8h3H0j2f24I" >}}
14 |
15 | ### Lecture slides
16 |
17 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/slides-pdf/slides-forests-featureimportance.pdf" >}}
18 |
--------------------------------------------------------------------------------
/content/chapters/13_information_theory/13-08-mutual-info2.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 13.08: Joint Entropy and Mutual Information II"
3 | weight: 13008
4 | ---
5 | Information theory also provides means of quantifying relations between two random variables that extend the concept of (linear) correlation. We discuss joint entropy, conditional entropy, and mutual information in this context.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="DQxMJENXIG8" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-info-mutual-info2.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/19_gaussian_processes/19-02-basic.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 19.02: Gaussian Processes"
3 | weight: 19002
4 | ---
5 | In this section, we introduce the basic idea behind Gaussian processes. We move from weight to function space and build some intuition on distributions over functions, discuss GPs' marginalization property, derive GP priors, and interpret GPs as indexed families.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="Uv54SlxflhQ" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/blob/main/slides-pdf/slides-gp-basic.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/00_all/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "All Slides Chapters 1-10 and 11-19"
3 | ---
4 |
5 |
6 |
7 | Complete PDF of all lecture slides from chapters 1-10:
8 |
9 | [Download](https://drive.google.com/file/d/1_-OmWkw19x5dlfmSt0Gk5fLGicB3z4Jd)
10 |
11 | Complete PDF of all lecture slides from chapters 11-19:
12 |
13 | [Download](https://drive.google.com/file/d/1og1D6s2tsPta7EGYmDlrxOvcQ6f4F9ka)
14 |
15 |
19 |
--------------------------------------------------------------------------------
/content/chapters/08_neural_networks/08-04-NN4multiclass.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 08.04: Single Hidden Layer Networks for Multi-Class Classification"
3 | weight: 8004
4 | ---
5 | In this section, we discuss a neural network architectures for multi-class classification, softmax activation function as well as the Softmax loss.
6 |
7 |
8 | ### Lecture video
9 |
10 | [video](https://drive.google.com/file/d/1UQzYXirQsYPfSPf-u8JY2S21lGnLhPxK/view?usp=sharing)
11 |
12 | ### Lecture slides
13 |
14 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/slides-pdf/slides-nn-mlps-multiclass-classification.pdf" >}}
15 |
--------------------------------------------------------------------------------
/content/chapters/11_advriskmin/11-04-regression-l2-l1.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 11.04: Regression Losses: L2 and L1 loss"
3 | weight: 11004
4 | ---
5 | In this section, we revisit L2 and L1 loss, highlighting that their risk minimizers are the conditional mean and median, respectively, and that their optimal constant models correspond to the empirical mean and median of observed targets.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="4CqLlzUoVWU" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-advriskmin-regression-l2-l1.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/08_neural_networks/08-03-single-hidden-layer.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 08.03: Single Hidden Layer NN"
3 | weight: 8003
4 | ---
5 | We introduce architecture of single hidden layer neural networks and discuss the advantage of hidden layers. Then, we explain the typical (non-linear) activation
6 | functions.
7 |
8 |
9 |
10 | ### Lecture video
11 |
12 | [video](https://drive.google.com/file/d/1OL3RSWHPvwzt1dDJ6DlZ6Md6cVUX0MST/view?usp=sharing)
13 |
14 | ### Lecture slides
15 |
16 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/slides-pdf/slides-nn-mlps-single-hidden-layer-networks.pdf" >}}
17 |
--------------------------------------------------------------------------------
/content/chapters/11_advriskmin/11-14-tree-splitting-deep-dive.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 11.14: Loss functions and Tree Splitting: Deep Dive"
3 | weight: 11014
4 | ---
5 | Learn how minimizing Bernoulli (log) loss yields entropy‐based splits and minimizing the Brier score yields Gini‐based splits, unifying impurity and risk views for optimal tree splitting. This section is presented as a **deep-dive**. Please note that there are no videos accompanying this section.
6 |
7 |
8 |
9 | ### Lecture slides
10 |
11 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-advriskmin-tree-splitting-deepdive.pdf" >}}
12 |
--------------------------------------------------------------------------------
/content/chapters/09_tuning/09-04-tuning-advanced.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 09.04: Advanced Tuning Techniques"
3 | weight: 9004
4 | ---
5 | Besides grid search and random search there are several more advanced techniques for hyperparameter optimization. In this section we focus on model based optimization methods such as Bayesian optimization. Furthermore, we look into multi-fidelity methods such as the hyperband algorithm.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="z84ArbJuJmQ" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-tuning-advanced.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/01_ml_basics/01-04-models-parameters.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 01.04: Models and Parameters"
3 | weight: 1004
4 | ---
5 | We introduce models as functional hypotheses about the mapping from feature to target space that allow us to make predictions by computing a function of the input data. Frequently in machine learning, models are understood to be parameterized curves, which is illustrated by several examples.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="ImjHkMc_esc">}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-basics-models-parameters.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/11_advriskmin/11-15-max-likelihood.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 11.15: Maximum Likelihood Estimation vs. Empirical Risk Minimization"
3 | weight: 11015
4 | ---
5 | We discuss the connection between maximum likelihood estimation and risk minimization, then demonstrate the correspondence between a Gaussian error distribution and \\(L2\\) loss and that alternative likelihoods give rise to the \\(L1\\) and Bernoulli loss.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="zrLxo3Evx_A" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-advriskmin-max-likelihood.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/16_linear_svm/16-03-soft-margin.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 16.03: Soft Margin SVM"
3 | weight: 16003
4 | ---
5 | Hard margin SVMs are often not applicable to practical questions because they fail when the data are not linearly separable. Moreover, for the sake of generalization, we will often accept some violations to keep the margin large enough for robust class separation. Therefore, we introduce the soft margin linear SVM.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="4wLJVWSLowo" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_sl/raw/main/slides-pdf/slides-linsvm-soft-margin.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/08_neural_networks/08-02-single-neuron.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 08.02: Single Neuron"
3 | weight: 8002
4 | ---
5 | In this section we explain the graphical representation of a single neuron and describe affine transformations and non-linear activation functions. Moreover, we talk about the hypothesis spaces of a single neuron and name some typical loss functions.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | [video](https://drive.google.com/file/d/1EMIA3KPyyAWMfwyr3WUGao3JZDQceiPC/view?usp=sharing)
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/slides-pdf/slides-nn-mlps-single-neuron.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/10_nested_resampling/10-02-trainvalidtest.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 10.02: Training - Validation - Testing"
3 | weight: 10002
4 | quizdown: true
5 | ---
6 | The simplest method to achieve an untouched test set is a 3-way split: the models are first trained on the *training set* and then evaluated and compared on the *validation set*. After selecting the best model, the final performance will be evaluated on the *test set*.
7 |
8 |
9 |
10 | ### Lecture video
11 |
12 | {{< video id="8LdpxLyH34c" >}}
13 |
14 | ### Lecture slides
15 |
16 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/slides-pdf/slides-nested-trainvalidtest.pdf" >}}
17 |
--------------------------------------------------------------------------------
/content/chapters/01_ml_basics/01-01-what_is_ml.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 01.01: What is ML?"
3 | weight: 1001
4 | ---
5 | As subtopic of artificial intelligence, machine learning is a mathematically well-defined discipline and usually constructs predictive or decision models from data, instead of explicitly programming them. In this section, you will see some typical examples of where machine learning is applied and the main directions of machine learning.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="AvpS4lyLB_c" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-basics-whatisml.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/04_evaluation/04-11-partialauc-mcauc.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 04.11: Partial AUC & Multi-Class AUC"
3 | weight: 4011
4 | ---
5 |
6 | We discuss both the partial AUC, which restricts the AUC to the relevant area
7 | for a specific application, and possible extensions of the AUC to multi-class
8 | classification.
9 |
10 |
11 |
12 | ### Lecture video
13 |
14 | {{< video id="JGag-IwJ93E" >}}
15 |
16 | ### Lecture slides
17 |
18 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-evaluation-partialauc.pdf" >}}
19 |
20 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-evaluation-mcauc.pdf" >}}
21 |
--------------------------------------------------------------------------------
/content/chapters/04_evaluation/04-07-resampling-2.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 04.07: Resampling 2"
3 | weight: 4007
4 | ---
5 |
6 | We provide a deep dive on resampling, showing its superiority to holdout
7 | splitting and analyzing the bias-variance decomposition of its MSE.
8 | We further point out the dependence between CV fold results and that
9 | hypothesis testing is therefore not applicable, and give some practical tips
10 | to choose resampling strategies.
11 |
12 |
13 |
14 | ### Lecture video
15 |
16 | {{< video id="7O2N5idB8iw" >}}
17 |
18 | ### Lecture slides
19 |
20 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-evaluation-resampling-2.pdf" >}}
21 |
--------------------------------------------------------------------------------
/content/chapters/06_cart/06-03-splitcriteria-regression.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 06.03: Splitting Criteria for Regression"
3 | weight: 6003
4 | ---
5 | CART algorithms require splitting criteria for trees, which are usually defined in terms of impurity reduction. In this section we formalize the idea of splitting criteria and explain the details of splitting. We start with regression and doing so we show how split criteria fit into our framework of empirical risk minimization.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="3Zzp64RgUoY" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/slides-pdf/slides-cart-splitcriteria-regression.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/03_supervised_classification/03-02-classification-basicdefs.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 03.02: Basic Definitions"
3 | weight: 3002
4 | ---
5 | Although we are primarily interested in actual class labels, classification models usually output scores or probabilities first. We will explain why, introduce the concepts of decision regions and decision boundaries, and discern two fundamental approaches to constructing classifiers: the generative approach and the discriminant approach.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="zIzIi8YI_xI" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-classification-basicdefs.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/06_cart/06-04-splitcriteria-classification.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 06.04: Splitting Criteria for Classification"
3 | weight: 6004
4 | ---
5 | We extend splitting criteria to classification task. Here, we see that there are analogies of ERM and impurity reduction. While these analogies are interested, proving the equivalence of ERM and impurity reduction are beyond the scope of this lecture. The interested reader can refer to chapter 11 of this lecture, where we proof the equivalence.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="Q0DSNh6qEQw" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/slides-pdf/slides-cart-splitcriteria-classification.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/chapters/04_evaluation/04-08-measures-classification.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 04.08: Measures Classification"
3 | weight: 4008
4 | ---
5 | Analogous to regression, we consider essential performance measures for classification. As a classifier predicts either class labels or scores/probabilities, its performance can be evaluated based on these two notions. We show some performance measures for classification, including misclassification error rate (MCE), accuracy (ACC) and Brier score (BS). In addition, we will see confusion matrices and learn about costs.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="K8rUkpq6IYg" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-evaluation-measures-classification.pdf" >}}
16 |
--------------------------------------------------------------------------------
/content/contributing/_index.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 |
3 | __If you love teaching ML and have free resources available, please consider joining the team and email us now! (bernd.bischl@stat.uni-muenchen.de or ludwig.bothmann@stat.uni-muenchen.de).__
4 |
5 | Our contributing guidelines may be found [here](https://github.com/slds-lmu/lecture_i2ml/blob/master/CONTRIBUTING.md).
6 |
7 |
8 | ## License
9 |
10 | [](http://creativecommons.org/licenses/by/4.0/)
11 | This work is licensed under a [Creative Commons Attribution 4.0 International License](http://creativecommons.org/licenses/by/4.0/).
12 |
13 | We are developing the course on [GitHub](https://github.com/slds-lmu/lecture_i2ml).
14 |
15 | We would appreciate if you contact us in case you are re-using our course.
16 | Knowing this helps us to keep the project alive. Thank you!
17 |
--------------------------------------------------------------------------------
/content/chapters/07_forests/07-03-oob-error.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 07.03: Out-of-Bag Error Estimate"
3 | weight: 7003
4 | quizdown: true
5 | ---
6 |
7 | We introduce the concepts of in-bag and out-of-bag observations and explain how to compute the out-of-bag error estimate.
8 |
9 |
10 |
11 | ### Lecture video
12 |
13 | {{< video id="gucPQxcqPcY" >}}
14 |
15 | ### Lecture slides
16 |
17 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/slides-pdf/slides-forests-oob.pdf" >}}
18 |
19 | ### Quiz
20 |
21 | {{< quizdown >}}
22 |
23 | ---
24 | shuffle_questions: false
25 | ---
26 |
27 | ## Which statements are true?
28 |
29 | - [x] The OOB error shares similarities with cross-validation estimation. It can also be used for a quicker model selection.
30 | - [x] In random forests for classification, a good rule of thumb is to use mtry = $\sqrt{p}$.
31 |
32 | {{< /quizdown >}}
--------------------------------------------------------------------------------
/content/chapters/07_forests/07-01-bagging.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 07.01: Bagging Ensembles"
3 | weight: 7001
4 | quizdown: true
5 | ---
6 | Bagging (bootstrap aggregation) is a method for combining many models into a meta-model which often works much better than its individual components. In this section, we present the basic idea of bagging and explain why and when bagging works.
7 |
8 |
9 |
10 | ### Lecture video
11 |
12 | {{< video id="S4Sa6YEXq7g" >}}
13 |
14 | ### Lecture slides
15 |
16 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/slides-pdf/slides-forests-bagging.pdf" >}}
17 |
18 | ### Quiz
19 |
20 | {{< quizdown >}}
21 |
22 | ---
23 | shuffle_questions: false
24 | ---
25 |
26 | ## Which statements are true?
27 |
28 | - [x] Bagging works best for unstable learners.
29 | - [ ] For stable estimation methods, bagging always reduces performance.
30 |
31 | {{< /quizdown >}}
--------------------------------------------------------------------------------
/content/chapters/02_supervised_regression/02-04-polynomials.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 02.04: Polynomial Regression Models"
3 | weight: 2004
4 | quizdown: true
5 | ---
6 | This section introduces polynomials to obtain more flexible models for the regression task. We explain the connection to the basic linear model and discuss the problem of overfitting.
7 |
8 |
9 |
10 | ### Lecture video
11 |
12 | {{< video id="dsO2sNiuxug" >}}
13 |
14 | ### Lecture slides
15 |
16 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-regression-polynomials.pdf" >}}
17 |
18 | ### Quiz
19 |
20 | {{< quizdown >}}
21 |
22 | ---
23 | shuffle_questions: false
24 | ---
25 |
26 | ## Which statements are true?
27 |
28 | - [ ] With linear regression it is only possible to model linear effects of a feature.
29 | - [x] Overfitting is a present danger in polynomial regression.
30 |
31 | {{< /quizdown >}}
32 |
33 |
34 |
--------------------------------------------------------------------------------
/content/chapters/coding_ml_python/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Coding ML [Python and sklearn]"
3 | ---
4 | This section introduces basic concepts and implementations using Python and in particular sklearn.
5 |
6 | 1. Installation: [Notebook](https://github.com/slds-lmu/lecture_i2ml/blob/master/python-intro/01-introduction-to-python-installation.ipynb)
7 |
8 | 2. Python basics: [Notebook](https://github.com/slds-lmu/lecture_i2ml/blob/master/python-intro/02-introduction-to-python-data_types.ipynb)
9 |
10 | 3. The `NumPy` package: [Notebook](https://github.com/slds-lmu/lecture_i2ml/blob/master/python-intro/03-introduction-to-python-packages_numpy.ipynb)
11 |
12 | 4. The `pandas` package: [Notebook](https://github.com/slds-lmu/lecture_i2ml/blob/master/python-intro/04-introduction-to-python-packages_pandas.ipynb)
13 |
14 | 5. The `scikit-learn` package: [Notebook](https://github.com/slds-lmu/lecture_i2ml/blob/master/python-intro/05-introduction-to-python-packages_sklearn.ipynb)
15 |
--------------------------------------------------------------------------------
/.github/workflows/gh-pages.yml:
--------------------------------------------------------------------------------
1 | name: github pages
2 |
3 | on:
4 | push:
5 | branches:
6 | - main # Set a branch to deploy
7 | pull_request:
8 |
9 | jobs:
10 | deploy:
11 | runs-on: ubuntu-24.04
12 | concurrency:
13 | group: ${{ github.workflow }}-${{ github.ref }}
14 | steps:
15 | - uses: actions/checkout@v4
16 | with:
17 | submodules: true # Fetch Hugo themes (true OR recursive)
18 | fetch-depth: 0 # Fetch all history for .GitInfo and .Lastmod
19 |
20 | - name: Setup Hugo
21 | uses: peaceiris/actions-hugo@v2
22 | with:
23 | hugo-version: '0.145.0'
24 | # extended: true
25 |
26 | - name: Build
27 | run: hugo --minify
28 |
29 | - name: Deploy
30 | uses: peaceiris/actions-gh-pages@v3
31 | if: ${{ github.ref == 'refs/heads/main' }}
32 | with:
33 | github_token: ${{ secrets.GITHUB_TOKEN }}
34 | publish_dir: ./public
35 |
--------------------------------------------------------------------------------
/content/chapters/09_tuning/09-02-tuning-tuningproblem.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 09.02: Problem Definition"
3 | weight: 9002
4 | quizdown: true
5 | ---
6 | Hyperparameter tuning is the process of finding good model hyperparameters. In this section we formalize the problem of tuning and explain why tuning is computationally hard.
7 |
8 |
9 |
10 | ### Lecture video
11 |
12 | {{< video id="3Su1jA6Ed1g" >}}
13 |
14 | ### Lecture slides
15 |
16 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/slides-pdf/slides-tuning-tuningproblem.pdf" >}}
17 |
18 | ### Quiz
19 |
20 | {{< quizdown >}}
21 |
22 | ---
23 | shuffle_questions: false
24 | ---
25 |
26 | ## Which statements are true?
27 |
28 | - [x] Tuning means optimizing hyperparameters.
29 | - [x] Doing tuning well is hard; nested resampling can help.
30 | - [x] Good tuning is crucial to achieving good performance for most ML algorithms.
31 | - [ ] Tuning optimizes the inner loss.
32 |
33 | {{< /quizdown >}}
34 |
--------------------------------------------------------------------------------
/content/chapters/10_nested_resampling/10-03-nestedresampling.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 10.03: Nested Resampling"
3 | weight: 10003
4 | ---
5 | In this section, we will explain why and how nested resampling is done.
6 |
7 |
8 |
9 | ### Lecture video
10 |
11 | {{< video id="-d338rc076s" >}}
12 |
13 | ### Lecture slides
14 |
15 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/slides-pdf/slides-nested-nestedresampling.pdf" >}}
16 |
17 | ### Code demo
18 |
19 | **Kaggle Challenge**
20 |
21 | You can run the code snippets in the demos on your local machine. The corresponding Rmd version of this demo can be found [here](https://github.com/compstat-lmu/lecture_i2ml/blob/master/code-demos/code_demo_kaggle.Rmd). If you want to render the Rmd files to PDF, you need the accompanying [style files](https://github.com/compstat-lmu/lecture_i2ml/tree/master/style).
22 |
23 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/code-demos-pdf/code_demo_kaggle.pdf" >}}
24 |
--------------------------------------------------------------------------------
/content/chapters/04_evaluation/04-01-generalization-error.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 04.01: Generalization Error"
3 | weight: 4001
4 | quizdown: true
5 | ---
6 | It is a crucial part of machine learning to evaluate the performance of a learner. We will explain the concept of generalization error and the difference between inner and outer loss.
7 |
8 |
9 |
10 | ### Lecture video
11 |
12 | {{< video id="gkQ6jXeIEzU" >}}
13 |
14 | ### Lecture slides
15 |
16 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-evaluation-generr.pdf" >}}
17 |
18 | ### Quiz
19 |
20 | {{< quizdown >}}
21 |
22 | ---
23 | shuffle_questions: false
24 | ---
25 |
26 | ## Which statements are true?
27 |
28 | - [x] We want to know how accurately a fitted model will predict target variable values for new, previously unseen data.
29 | - [ ] Outer loss and inner loss are the same thing.
30 | - [x] The data-generating process is the true underlying phenomenon creating the data.
31 |
32 | {{< /quizdown >}}
--------------------------------------------------------------------------------
/content/chapters/04_evaluation/04-02-measures-regression.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 04.02: Measures Regression"
3 | weight: 4002
4 | quizdown: true
5 | ---
6 | In this section we familiarize ourselves with essential performance measures for regression. In particular, mean squared error (MSE), mean absolute error (MAE), and a straightforward generalization of $R^2$ are discussed.
7 |
8 |
9 |
10 | ### Lecture video
11 |
12 | {{< video id="FUHs6hu88MI" >}}
13 |
14 | ### Lecture slides
15 |
16 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-evaluation-measures-regression.pdf" >}}
17 |
18 | ### Quiz
19 |
20 | {{< quizdown >}}
21 |
22 | ---
23 | shuffle_questions: false
24 | ---
25 |
26 | ## Which statements are true?
27 |
28 | - [x] The MSE is used in the conventional linear model to find the best parameter estimates.
29 | - [x] On test data, $R^2$ can be smaller than 0 for linear models.
30 | - [x] The MAE is equivalent to $L1$ loss.
31 | - [ ] The MSE is equivalent to $L1$ loss.
32 |
33 | {{< /quizdown >}}
--------------------------------------------------------------------------------
/content/chapters/06_cart/06-06-stoppingpruning.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 06.06: Stopping Criteria & Pruning"
3 | weight: 6006
4 | quizdown: true
5 | ---
6 | The recursive partitioning procedure used to grow a CART usually leads to problems such as exponential growth of computations, overfitting, and the horizon effect. To deal with these problems, we can use stopping criteria and pruning. In this section, we explain the basis of these two solutions.
7 |
8 |
9 |
10 | ### Lecture video
11 |
12 | {{< video id="8CSGRjM8E0g" >}}
13 |
14 | ### Lecture slides
15 |
16 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/slides-pdf/slides-cart-stoppingpruning.pdf" >}}
17 |
18 | ### Quiz
19 |
20 | {{< quizdown >}}
21 |
22 | ---
23 | shuffle_questions: false
24 | ---
25 |
26 | ## Which statements are true?
27 |
28 | - [x] Pruning means cutting back the tree.
29 | - [x] Pruning means that the tree becomes less complex.
30 | - [ ] A tree with one observation per leaf node should have good generalization performance.
31 |
32 | {{< /quizdown >}}
33 |
--------------------------------------------------------------------------------
/content/chapters/10_nested_resampling/10-01-nestedintro.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 10.01: Motivation"
3 | weight: 10001
4 | quizdown: true
5 | ---
6 | Selecting the best model from a set of candidates is an important part of most machine learning problems. By examining an instructive and problematic example, we introduce the untouched-test-set principle.
7 |
8 |
9 |
10 | ### Lecture video
11 |
12 | {{< video id="_GVysctg5sY" >}}
13 |
14 | ### Lecture slides
15 |
16 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/slides-pdf/slides-nested-nestedintro.pdf" >}}
17 |
18 | ### Code demo
19 |
20 | **Nested Resampling**
21 |
22 | You can run the code snippets in the demos on your local machine. The corresponding Rmd version of this demo can be found [here](https://github.com/compstat-lmu/lecture_i2ml/blob/master/code-demos/code_demo_nested.Rmd). If you want to render the Rmd files to PDF, you need the accompanying [style files](https://github.com/compstat-lmu/lecture_i2ml/tree/master/style).
23 |
24 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/code-demos-pdf/code_demo_nested.pdf" >}}
25 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Jakob Richter
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/content/chapters/04_evaluation/04-09-rocbasics.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 04.09: ROC Basics"
3 | weight: 4009
4 | quizdown: true
5 | ---
6 | From the confusion matrix we can calculate a variety of ROC metrics. Among others, we will explain true positive rate, negative predictive value and the $F1$ measure.
7 |
8 |
9 |
10 | ### Lecture video
11 |
12 | {{< video id="fzuk7glzpC0" >}}
13 |
14 | ### Lecture slides
15 |
16 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-evaluation-rocbasics.pdf" >}}
17 |
18 | ### Quiz
19 |
20 | {{< quizdown >}}
21 |
22 | ---
23 | shuffle_questions: false
24 | ---
25 |
26 | ## Which statements are true?
27 |
28 | - [x] Logistic regression minimizes the binomial loss.
29 | - [x] The Brier score is like the MSE, just with probabilities.
30 | - [ ] The log-loss punishes being very wrong less than the Brier score.
31 | - [ ] Accuracy and mean classification error are calculated using the predicted probabilities.
32 | - [x] The confusion matrix tabulates the true against predicted classes.
33 | - [ ] A misclassification error rate of 0.1% is always great.
34 |
35 | {{< /quizdown >}}
--------------------------------------------------------------------------------
/content/chapters/03_supervised_classification/03-05-classification-discranalysis.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 03.05: Discriminant Analysis"
3 | weight: 3005
4 | quizdown: true
5 | ---
6 | Discriminant analysis is a generative approach toward constructing a classifier. We distinguish between linear (LDA) and quadratic (QDA) discriminant analysis, where the latter is a more flexible approach subsuming the first.
7 |
8 |
9 |
10 | ### Lecture video
11 |
12 | {{< video id="WHQLS9PBLig" >}}
13 |
14 | ### Lecture slides
15 |
16 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-classification-discranalysis.pdf" >}}
17 |
18 | ### Quiz
19 |
20 | {{< quizdown >}}
21 |
22 | ---
23 | shuffle_questions: false
24 | ---
25 |
26 | ## Which statements are true?
27 |
28 | - [ ] In LDA, each class density is modeled as a multivariate Gaussian with unequal covariance.
29 | - [x] LDA is a linear classifier.
30 | - [x] LDA follows a generative approach.
31 | - [ ] In QDA, each class density is modeled as a multivariate Gaussian with equal covariance.
32 | - [x] QDA follows a generative approach.
33 | - [x] QDA requires estimation of more parameters than LDA.
34 |
35 | {{< /quizdown >}}
--------------------------------------------------------------------------------
/content/chapters/06_cart/06-01-predictions.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 06.01: Predictions with CART"
3 | weight: 6001
4 | quizdown: true
5 | ---
6 | Decision trees are an important type of machine learning model and come in two main types: classification and regression trees. In this section, we explain the general idea of CART and show how they recursively divide up the input space into ever smaller rectangular partitions.
7 | Thus, we think of CART for now only as a predictor.
8 |
9 |
10 |
11 | ### Lecture video
12 |
13 | {{< video id="mouBj4qiHRs" >}}
14 |
15 | ### Lecture slides
16 |
17 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/slides-pdf/slides-cart-predictions.pdf" >}}
18 |
19 | ### Quiz
20 |
21 | {{< quizdown >}}
22 |
23 | ---
24 | shuffle_questions: false
25 | ---
26 |
27 | ## Which statements are true?
28 |
29 | - [x] The prediction function defined by a CART divides the feature space into disjoint hyperrectangles.
30 | - [x] The root node contains all data.
31 | - [ ] The leaf node contains all data.
32 | - [ ] Observations can end up in several leaf nodes at the same time.
33 | - [x] The CART algorithm recursively splits nodes into child nodes.
34 |
35 | {{< /quizdown >}}
36 |
--------------------------------------------------------------------------------
/content/chapters/02_supervised_regression/02-01-l2-loss.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 02.01: Linear Models with L2 Loss"
3 | weight: 2001
4 | quizdown: true
5 | ---
6 | In this section, we focus on the general concept of linear regression and explain how the linear regression model can be used from a machine learning perspective to predict a continuous numerical target variable. Furthermore, we introduce the \\(L2\\) loss in the context of linear regression and explain how its use results in an SSE-minimal model.
7 |
8 |
9 |
10 | ### Lecture video
11 |
12 | {{< video id="ajqQ3NUuNzE" >}}
13 |
14 | ### Lecture slides
15 |
16 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-regression-linearmodel-l2.pdf" >}}
17 |
18 | ### Quiz
19 |
20 | {{< quizdown >}}
21 |
22 | ---
23 | shuffle_questions: false
24 | ---
25 |
26 | ## Which statements are true?
27 |
28 | - [x] The target in linear regression has to be numeric.
29 | - [ ] The features in linear regression have to be numeric.
30 | - [x] The classical linear model from statistics with Gaussian errors is linear regression with $L2$ loss.
31 | - [x] The hypothesis space of linear regression consists of linear functions of the features.
32 |
33 | {{< /quizdown >}}
34 |
--------------------------------------------------------------------------------
/content/prerequisites/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Prerequisites
3 | ---
4 |
5 | ## Maths & Statistics
6 |
7 | The course is targeted at ML beginners with a basic, university-level, education in maths and statistics and programming. The first part aims at a practical and operational understanding of concepts. It requires a basic (!) understanding of:
8 | - Linear algebra: vectors, matrices
9 | - Multivariate calculus: derivatives, gradients, integrals
10 | - Probability theory: random variables, distributions, expectation and variance
11 | - Statistics: not a lot -- parameter estimation and linear modeling from a statistics perspective will help but is not required
12 |
13 | The second part focuses on theoretical foundations and more complex algorithms. The general prerequisites are not really that different, but we often go more in-depth into topics, and some parts also contain proofs.
14 |
15 | ## Programming Language
16 |
17 | All of the covered concepts and algorithms are presented independently of any programming language. But of course we also offer a large variety of applied exercises and notebooks. These are currently in R, and often use the [mlr3](https://cran.r-project.org/web/packages/mlr3/index.html) package. We are working on offering the exercises in Python as well.
18 |
--------------------------------------------------------------------------------
/content/chapters/09_tuning/09-03-basicalgos.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 09.03: Basic Techniques"
3 | weight: 9003
4 | quizdown: true
5 | ---
6 | In this section we familiarize ourselves with two simple but popular tuning strategies, namely grid search and random search, and discuss their advantages and disadvantages.
7 |
8 |
9 |
10 | ### Lecture video
11 |
12 | {{< video id="LSt1KcsHAK0" >}}
13 |
14 | ### Lecture slides
15 |
16 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/slides-pdf/slides-tuning-basicalgos.pdf" >}}
17 |
18 | ### Quiz
19 |
20 | {{< quizdown >}}
21 |
22 | ---
23 | shuffle_questions: false
24 | ---
25 |
26 | ## Which statements are true?
27 |
28 | - [x] How well tuning works depends on the learner and the impact of the hyperparameters on that learner.
29 | - [ ] Grid search often works better than random search.
30 | - [x] Grid search scales exponentially with the dimension of the search space.
31 | - [x] Grid search evaluates many points from the search space that aren't of interest.
32 | - [x] Random search works often better due to its better exploration of the search space.
33 | - [ ] Random search scales very well with the dimension of the search space.
34 | - [ ] Random search as well as grid search has the problem of discretization.
35 |
36 | {{< /quizdown >}}
37 |
--------------------------------------------------------------------------------
/content/chapters/06_cart/06-05-computationalaspects.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 06.05: Computational Aspects of Finding Splits"
3 | weight: 6005
4 | quizdown: true
5 | ---
6 | In this section, we explain the computational aspects of the node-splitting procedure, especially for nominal features. In addition, we illustrate how to deal with missing values.
7 |
8 |
9 |
10 | ### Lecture video
11 |
12 | {{< video id="6yAgoECJAP0" >}}
13 |
14 | ### Lecture slides
15 |
16 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/slides-pdf/slides-cart-computationalaspects.pdf" >}}
17 |
18 | ### Quiz
19 |
20 | {{< quizdown >}}
21 |
22 | ---
23 | shuffle_questions: false
24 | ---
25 |
26 | ## Which statements are true?
27 |
28 | - [x] To find optimal splits, one iterates over all features, and for each feature over all possible split points.
29 | - [ ] To find optimal splits, we use the one that splits the data approximately in half in each step.
30 | - [x] To find optimal splits, we evaluate the possible splits only on the data that ended up in the parent node we are trying to split.
31 | - [x] The optimal split results in the lowest sum of empirical risks in the child nodes.
32 | - [ ] Monotone transformations of several features will change the structure of the tree.
33 | - [x] The CART algorithm cannot go on training if every node contains exactly one observation.
34 |
35 | {{< /quizdown >}}
36 |
--------------------------------------------------------------------------------
/content/chapters/05_knn/05-01-knn.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 05.01: k-Nearest Neighbors (k-NN)"
3 | weight: 5001
4 | quizdown: true
5 | ---
6 | We demonstrate that distances in feature space are crucial in \\(k\\)-NN regression / classification and show how we can form predictions by averaging / majority vote. In this, \\(k\\)-NN is a very local model and works without distributional assumptions.
7 |
8 |
9 |
10 | ### Lecture video
11 |
12 | {{< video id="BMCgd1et_2E">}}
13 |
14 | ### Lecture slides
15 |
16 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/slides-pdf/slides-knn.pdf" >}}
17 |
18 | ### Quiz
19 |
20 | {{< quizdown >}}
21 |
22 | ---
23 | shuffle_questions: false
24 | ---
25 |
26 | ## Which statements are true?
27 |
28 | - [x] Choosing the distance metric is a crucial design decision for $k$-NN.
29 | - [ ] $k$-NN can only be used for classification tasks.
30 | - [x] $N_k(x)$ contains the subset of the feature space $\mathcal{X}$ that is at least as close to $x$ as the $k$-th closest neighbor of $x$ in the training data set.
31 | - [x] 1-NN always 'predicts' perfectly on observations of the training data set (if there are no observations with equal feature but different target values).
32 | - [x] $k$-NN with $k = n$ always predicts the same target variable value for all possible inputs (if no weights are used).
33 | - [ ] $k$-NN for classification is a probabilistic classifier.
34 |
35 | {{< /quizdown >}}
--------------------------------------------------------------------------------
/content/chapters/02_supervised_regression/02-03-l1-loss.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 02.03: Linear Models with L1 Loss"
3 | weight: 2003
4 | quizdown: true
5 | ---
6 | In this section, we introduce \\(L1\\) loss and elaborate its differences to \\(L2\\) loss. In addition, we explain how the choice of loss affects optimization and robustness.
7 |
8 |
9 |
10 | ### Lecture video
11 |
12 | {{< video id="kAZclFFAkhA" >}}
13 |
14 | ### Lecture slides
15 |
16 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-regression-linearmodel-l1.pdf" >}}
17 |
18 | ### Code demo
19 |
20 | **Linear model & gradient descent**
21 |
22 | You can run the code snippets in the demos on your local machine. The corresponding Rmd version of this demo can be found [here](https://github.com/compstat-lmu/lecture_i2ml/blob/master/code-demos/code_demo_limo.Rmd). If you want to render the Rmd files to PDF, you need the accompanying [style files](https://github.com/compstat-lmu/lecture_i2ml/tree/master/style).
23 |
24 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/code-demos-pdf/code_demo_limo.pdf" >}}
25 |
26 | ### Quiz
27 |
28 | {{< quizdown >}}
29 |
30 | ---
31 | shuffle_questions: false
32 | ---
33 |
34 | ## Which statements are true?
35 |
36 | - [ ] The absolute loss function is more sensitive to outliers than the quadratic loss function.
37 | - [x] Optimization of $L2$ loss is easier than of $L1$ loss.
38 |
39 | {{< /quizdown >}}
40 |
41 |
42 |
--------------------------------------------------------------------------------
/content/chapters/06_cart/06-07-discussion.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 06.07: Discussion"
3 | weight: 6007
4 | quizdown: true
5 | ---
6 | In this section we discuss the advantages and disadvantages of CART and mention other tree methodologies.
7 |
8 |
9 |
10 | ### Lecture video
11 |
12 | {{< video id="Jrr2BCnYtlI" >}}
13 |
14 | ### Lecture slides
15 |
16 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/slides-pdf/slides-cart-discussion.pdf" >}}
17 |
18 | ### Code demo
19 |
20 | **CART**
21 |
22 | You can run the code snippets in the demos on your local machine. The corresponding Rmd version of this demo can be found [here](https://github.com/compstat-lmu/lecture_i2ml/blob/master/code-demos/code_demo_cart.Rmd). If you want to render the Rmd files to PDF, you need the accompanying [style files](https://github.com/compstat-lmu/lecture_i2ml/tree/master/style).
23 |
24 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/code-demos-pdf/code_demo_cart.pdf" >}}
25 |
26 | ### Quiz
27 |
28 | {{< quizdown >}}
29 |
30 | ---
31 | shuffle_questions: false
32 | ---
33 |
34 | ## Which statements are true?
35 |
36 | - [ ] CART is a stable algorithm -- if the data changes slightly the tree structure remains quite similar.
37 | - [x] CART is robust to outliers.
38 | - [x] With trees it is easy to handle missing values.
39 | - [ ] A simple tree is everything you need to fit a linear function $y \approx a + b \cdot x$ with numeric target.
40 |
41 | {{< /quizdown >}}
42 |
--------------------------------------------------------------------------------
/content/chapters/07_forests/07-05-proximities.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 07.05: Proximities"
3 | weight: 7005
4 | quizdown: true
5 | ---
6 |
7 | The term *proximity* refers to the "closeness" between pairs of cases. Proximities are calculated for each pair of observations and can be derived directly from random forests.
8 |
9 |
10 |
11 | ### Lecture video
12 |
13 | {{< video id="TYqXaZ5HCtI" >}}
14 |
15 | ### Lecture slides
16 |
17 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/slides-pdf/slides-forests-proximities.pdf" >}}
18 |
19 | ### Code demo
20 |
21 | **Random Forests**
22 |
23 | You can run the code snippets in the demos on your local machine. The corresponding Rmd version of this demo can be found [here](https://github.com/compstat-lmu/lecture_i2ml/blob/master/code-demos/code_demo_randforests.Rmd). If you want to render the Rmd files to PDF, you need the accompanying [style files](https://github.com/compstat-lmu/lecture_i2ml/tree/master/style).
24 |
25 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/code-demos-pdf/code_demo_randforests.pdf" >}}
26 |
27 | ### Quiz
28 |
29 | {{< quizdown >}}
30 |
31 | ---
32 | shuffle_questions: false
33 | ---
34 |
35 | ## Which statements are true?
36 |
37 | - [x] To compute permutation variable importance for feature $j$, we permute the feature and see how the performance changes (in OOB observations).
38 | - [ ] The random forest is a bad out-of-the box model and requires tuning of hyperparameters.
39 | - [x] Random forests and trees can be used for high-dimensional data.
40 | - [ ] Proximities are used in replacing missing data, but not in locating outliers.
41 |
42 | {{< /quizdown >}}
--------------------------------------------------------------------------------
/content/chapters/09_tuning/further-material.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Tuning: Further Material"
3 | weight: 9006
4 | ---
5 |
6 |
7 |
12 | #### Further Material
13 |
14 | - Bischl, Bernd, et al. ["Hyperparameter optimization: Foundations, algorithms, best practices, and open challenges."](https://wires.onlinelibrary.wiley.com/doi/full/10.1002/widm.1484) Wiley Interdisciplinary Reviews: Data Mining and Knowledge Discovery (2021): e1484.
15 | This paper goes beyond grid search and random search and reviews important automatic hyperparameter optimization (HPO) methods, provides practical recommendations for conducting HPO, and discusses HPO algorithms, performance evaluation, combination with machine learning pipelines, runtime improvements, and parallelization.
16 | - mlr3 Practical Tuning Series:
17 | These notebooks are a step-by-step hands-on tutorial on how to tune ML models with mlr3.
18 | - [Part I - Tune a Support Vector Machine](https://mlr-org.com/gallery/series/2021-03-09-practical-tuning-series-tune-a-support-vector-machine/)
19 | - [Part II - Tune a Preprocessing Pipeline](https://mlr-org.com/gallery/series/2021-03-10-practical-tuning-series-tune-a-preprocessing-pipeline/)
20 | - [Part III - Build an Automated Machine Learning System](https://mlr-org.com/gallery/series/2021-03-11-practical-tuning-series-build-an-automated-machine-learning-system/)
21 | - [Part IV - Tuning and Parallel Processing](https://mlr-org.com/gallery/series/2021-03-12-practical-tuning-series-tuning-and-parallel-processing/)
22 |
--------------------------------------------------------------------------------
/content/chapters/03_supervised_classification/03-03-classification-linear.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 03.03: Linear Classifiers"
3 | weight: 3003
4 | quizdown: true
5 | ---
6 | Linear classifiers are an essential subclass of classification models. This section provides the definition of a linear classifier and depicts differences between linear and non-linear decision boundaries.
7 |
8 |
9 |
10 | ### Lecture video
11 |
12 | {{< video id="SkrQOtpD9d0" >}}
13 |
14 | ### Lecture slides
15 |
16 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-classification-linear.pdf" >}}
17 |
18 | ### Quiz
19 |
20 | {{< quizdown >}}
21 |
22 | ---
23 | shuffle_questions: false
24 | ---
25 |
26 | ## Which statements are true?
27 |
28 | - [x] Classification is part of supervised learning.
29 | - [ ] Scoring classifiers always output numbers between 0 and 1.
30 | - [x] Probabilistic classifiers always output numbers between 0 and 1.
31 | - [x] With scoring classifiers one can obtain class labels by using a threshold.
32 | - [ ] The decision boundary does not depend on the model used.
33 |
34 | ## Which statements are true?
35 |
36 | - [x] For the discriminant approach we must have a loss function for minimization.
37 | - [ ] The generative and discriminant approach are basically the same.
38 | - [x] The generative approach is a probabilistic approach.
39 | - [ ] Binary classification uses two discriminant functions.
40 | - [ ] Linear classifiers can just learn linear decision boundaries.
41 | - [x] Logistic regression is an example for the discriminant approach.
42 | - [x] Linear classifiers specify the discriminant function with linear functions.
43 |
44 | {{< /quizdown >}}
--------------------------------------------------------------------------------
/content/chapters/03_supervised_classification/03-04-classification-logistic.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 03.04: Logistic Regression"
3 | weight: 3004
4 | quizdown: true
5 | ---
6 | Logistic regression is a discriminant approach toward constructing a classifier. We will motivate logistic regression via the logistic function, define the log-loss for optimization and illustrate the approach in 1D and 2D.
7 |
8 |
9 |
10 | ### Lecture video
11 |
12 | {{< video id="V24s9HkLyaw" >}}
13 |
14 | ### Lecture slides
15 |
16 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-classification-logistic.pdf" >}}
17 |
18 | ### Code demo
19 |
20 | **Logistic regression & Newton-Raphson**
21 |
22 | You can run the code snippets in the demos on your local machine. The corresponding Rmd version of this demo can be found [here](https://github.com/compstat-lmu/lecture_i2ml/blob/master/code-demos/code_demo_logreg.Rmd). If you want to render the Rmd files to PDF, you need the accompanying [style files](https://github.com/compstat-lmu/lecture_i2ml/tree/master/style).
23 |
24 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/code-demos-pdf/code_demo_logreg.pdf" >}}
25 |
26 | ### Quiz
27 |
28 | {{< quizdown >}}
29 |
30 | ---
31 | shuffle_questions: false
32 | ---
33 |
34 | ## Which statements are true?
35 |
36 | - [ ] Logistic regression can be fitted by maximum likelihood estimation with numerical optimization or solved analytically.
37 | - [ ] Logistic regression follows a generative approach.
38 | - [x] In logistic regression, the parameter vector $\mathbf{\theta}$ that maximizes the model's likelihood is identical to the one minimizing its empirical risk.
39 |
40 | {{< /quizdown >}}
--------------------------------------------------------------------------------
/content/chapters/04_evaluation/04-06-resampling-1.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 04.06: Resampling 1"
3 | weight: 4006
4 | quizdown: true
5 | ---
6 |
7 | Different resampling techniques help to assess the performance of a learner while avoiding potential quirks resulting from a single train-test split. We will introduce cross-validation (with and without stratification), bootstrap and subsampling.
8 |
9 |
10 |
11 | ### Lecture video
12 |
13 | {{< video id="YWmC0C_NBLk" >}}
14 |
15 | ### Lecture slides
16 |
17 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-evaluation-resampling-1.pdf" >}}
18 |
19 | ### Code demo
20 |
21 | **Resampling**
22 |
23 | You can run the code snippets in the demos on your local machine. The corresponding Rmd version of this demo can be found [here](https://github.com/compstat-lmu/lecture_i2ml/blob/master/code-demos/code_demo_resampling.Rmd). If you want to render the Rmd files to PDF, you need the accompanying [style files](https://github.com/compstat-lmu/lecture_i2ml/tree/master/style).
24 |
25 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/code-demos-pdf/code_demo_resampling.pdf" >}}
26 |
27 | ### Quiz
28 |
29 | {{< quizdown >}}
30 |
31 | ---
32 | shuffle_questions: false
33 | ---
34 |
35 | ## Which statements are true?
36 |
37 | - [x] Cross-validation, bootstrap, and subsampling are resampling techniques.
38 | - [x] Estimating the expected generalization error is a goal of resampling.
39 | - [ ] In bootstrap, each observation serves as a test point exactly once.
40 | - [x] In resampling, the data set is split repeatedly into training and tests sets.
41 | - [ ] Resampling strategies are unbiased.
42 |
43 | {{< /quizdown >}}
44 |
--------------------------------------------------------------------------------
/content/chapters/04_evaluation/04-10-roccurves.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 04.10: ROC Curves"
3 | weight: 4010
4 | quizdown: true
5 | ---
6 | In this section, we explain the ROC curve and how to calculate it. In addition, we will present the AUC as a global performance measure that integrates over all possible thresholds.
7 |
8 |
9 |
10 | ### Lecture video
11 |
12 | {{< video id="91PXeV65x5Y" >}}
13 |
14 | ### Lecture slides
15 |
16 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-evaluation-roccurves.pdf" >}}
17 |
18 | ### Code demo
19 |
20 | **ROC**
21 |
22 | You can run the code snippets in the demos on your local machine. The corresponding Rmd version of this demo can be found [here](https://github.com/compstat-lmu/lecture_i2ml/blob/master/code-demos/code_demo_roc.Rmd). If you want to render the Rmd files to PDF, you need the accompanying [style files](https://github.com/compstat-lmu/lecture_i2ml/tree/master/style).
23 |
24 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/code-demos-pdf/code_demo_roc.pdf" >}}
25 |
26 | ### Quiz
27 |
28 | {{< quizdown >}}
29 |
30 | ---
31 | shuffle_questions: false
32 | ---
33 |
34 | ## Which statements are true?
35 |
36 | - [ ] If the proportion of positive to negative instances in the training data changes, the ROC curve will not change.
37 | - [x] If the proportion of positive to negative instances in the test data changes, the ROC curve will not change.
38 | - [x] Several evaluation metrics can be derived from a confusion matrix.
39 | - [x] The area under the ROC curve is called AUC.
40 | - [ ] A model with AUC=0 is the worst case.
41 |
42 | ## Which statements are true?
43 |
44 | - [ ] Using the prediction on the train data is the ordinary and correct way of calculating the ROC.
45 | - [x] The calculation of the ROC should be done on a test set.
46 | - [x] The AUC is not affected by the threshold, as opposed to the MCE.
47 |
48 | {{< /quizdown >}}
49 |
--------------------------------------------------------------------------------
/config.toml:
--------------------------------------------------------------------------------
1 | baseURL = "https://slds-lmu.github.io/i2ml/"
2 | languageCode = "en-us"
3 | title = "Introduction to Machine Learning (I2ML)"
4 | theme = "website_theme"
5 | enableRobotsTXT = true
6 |
7 | [markup.goldmark.renderer]
8 | unsafe= true
9 |
10 | [Params]
11 | footer = "© 2022 Course Creator"
12 | logo = "i2ml.svg" # file in ./static
13 | video_base_url = "https://www.youtube-nocookie.com/embed/"
14 | mathjax = true # has to be enabled for each page or enable it here globally
15 | use_favicon = true # if you have put all files generated by https://realfavicongenerator.net/ in ./static
16 | [menu]
17 | [[menu.main]]
18 | name = "Home"
19 | pre = "home"
20 | url = "."
21 | weight = 1
22 | [[menu.main]]
23 | name = "Chapters"
24 | pre = "chapters"
25 | url = "chapters/"
26 | weight = 2
27 | [[menu.main]]
28 | name = "Exercises"
29 | pre = "exercises"
30 | url = "exercises/"
31 | weight = 3
32 | [[menu.main]]
33 | name = "Appendix"
34 | pre = "appendix"
35 | url = "appendix/"
36 | weight = 4
37 | [[menu.main]]
38 | name = "Prereqs"
39 | pre = "prerequisites"
40 | url = "prerequisites/"
41 | weight = 5
42 | [[menu.main]]
43 | name = "Literature"
44 | pre = "literature"
45 | url = "literature/"
46 | weight = 6
47 | [[menu.main]]
48 | name = "Team"
49 | pre = "team"
50 | url = "team/"
51 | weight = 7
52 | [[menu.main]]
53 | name = "Contributing"
54 | pre = "contributing"
55 | url = "contributing/"
56 | weight = 8
57 |
58 | # can contain as many items as you want
59 | [[menu.footer]] # name here does not matter
60 | name = "Main Course Website"
61 | url = "."
62 | weight = 1
63 | [[menu.footer]]
64 | name = "Material Source Code"
65 | url = "https://github.com/slds-lmu/lecture_i2ml"
66 | weight = 2
67 | [[menu.footer]]
68 | name = "Website Source Code"
69 | url = "https://github.com/slds-lmu/i2ml"
70 | weight = 3
71 | [minify] # needed for quizdown to work
72 | disableHTML = true
73 |
--------------------------------------------------------------------------------
/content/chapters/03_supervised_classification/03-06-classification-naivebayes.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 03.06: Naive Bayes"
3 | weight: 3006
4 | quizdown: true
5 | ---
6 | Naive Bayes is a generative approach based on an assumption of conditional independence across features and closely related to discriminant analysis.
7 |
8 |
9 |
10 | ### Lecture video
11 |
12 | {{< video id="TJ0ZSAyIi_c" >}}
13 |
14 | ### Lecture slides
15 |
16 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-classification-naivebayes.pdf" >}}
17 |
18 | ### Code demo
19 |
20 | **Comparison of classification methods**
21 |
22 | You can run the code snippets in the demos on your local machine. The corresponding Rmd version of this demo can be found [here](https://github.com/compstat-lmu/lecture_i2ml/blob/master/code-demos/code_demo_cmpclass.Rmd). If you want to render the Rmd files to PDF, you need the accompanying [style files](https://github.com/compstat-lmu/lecture_i2ml/tree/master/style).
23 |
24 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/code-demos-pdf/code_demo_cmpclass.pdf" >}}
25 |
26 | ### Code demo
27 |
28 | **Deep-dive generative classification methods**
29 |
30 | You can run the code snippets in the demos on your local machine. The corresponding Rmd version of this demo can be found [here](https://github.com/compstat-lmu/lecture_i2ml/blob/master/code-demos/code_demo_genclass.Rmd). If you want to render the Rmd files to PDF, you need the accompanying [style files](https://github.com/compstat-lmu/lecture_i2ml/tree/master/style).
31 |
32 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/code-demos-pdf/code_demo_genclass.pdf" >}}
33 |
34 | ### Quiz
35 |
36 | {{< quizdown >}}
37 |
38 | ---
39 | shuffle_questions: false
40 | ---
41 |
42 | ## Which statements are true?
43 |
44 | - [x] Naive Bayes assumes that the features are independent within each outcome class $y$.
45 | - [x] Naive Bayes follows a generative approach.
46 | - [ ] Decision boundaries in Naive Bayes are linear.
47 |
48 | {{< /quizdown >}}
--------------------------------------------------------------------------------
/content/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Introduction to Machine Learning (I2ML)
3 | ---
4 |
5 | This website offers an open and free introductory course on (supervised) machine learning. The course is constructed as self-contained as possible, and enables self-study through lecture videos, PDF slides, cheatsheets, quizzes, exercises (with solutions), and notebooks.
6 |
7 | The quite extensive material can roughly be divided into an introductory undergraduate part (chapters 1-10), a more advanced second one on MSc level (chapters 11-19), and a third course, on MSc level (chapters 20-23). At the [LMU Munich](https://www.slds.stat.uni-muenchen.de/teaching/) we teach all parts in an inverted-classroom style (B.Sc. lecture "Introduction to ML" and M.Sc. lectures "Supervised Learning" and "Advanced Machine Learning"). While the first part aims at a practical and operational understanding of concepts, the second and third parts focus on theoretical foundations and more complex algorithms.
8 |
9 | __Remarks on Deep Dive sections__: Certain sections exclusively present mathematical proofs, acting as deep-dives into the respective topics. It's important to note that these deep-dive sections do not have accompanying videos.
10 |
11 | __Why another ML course:__ A key goal of the course is to teach the fundamental building blocks behind ML, instead of introducing “yet another algorithm with yet another name”. We discuss, compare, and contrast risk minimization, statistical parameter estimation, the Bayesian viewpoint, and information theory and demonstrate that all of these are equally valid entry points to ML. Developing the ability to take on and switch between these perspectives is a major goal of this course, and in our opinion not always ideally presented in other courses.
12 |
13 | We also want this course not only to be open, but [open source](https://proceedings.mlr.press/v207/bothmann23a.html).
14 |
15 | __What is not covered:__ (1) An in-depth coverage of deep learning, we offer this in our course [Introduction to Deep Learning](https://slds-lmu.github.io/i2dl/). (2) An in-depth coverage of optimization - we are working on a separate course for optimization.
16 |
--------------------------------------------------------------------------------
/content/chapters/04_evaluation/04-05-overfitting-underfitting.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 04.05: Overfitting & Underfitting"
3 | weight: 4005
4 | quizdown: true
5 | ---
6 | In machine learning, we are interested in a model that captures the true underlying function and still generalizes well to new data.
7 | When the model fails on the first task, we speak of underfitting, and both train and test error will be high.
8 | On the other hand, learning the training data very well at the expense of generalization ability is referred to as overfitting and usually occurs when there is not enough data to tell our hypotheses apart.
9 | We will show you examples of this behavior and how to diagnose overfitting.
10 |
11 |
12 |
13 | ### Lecture video
14 |
15 | {{< video id="eNMM1b98kow" >}}
16 |
17 | ### Lecture slides
18 |
19 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-evaluation-overfitting-underfitting.pdf" >}}
20 |
21 | ### Code demo
22 |
23 | **Overfitting**
24 |
25 | You can run the code snippets in the demos on your local machine. The corresponding Rmd version of this demo can be found [here](https://github.com/compstat-lmu/lecture_i2ml/blob/master/code-demos/code_demo_overfitting.Rmd). If you want to render the Rmd files to PDF, you need the accompanying [style files](https://github.com/compstat-lmu/lecture_i2ml/tree/master/style).
26 |
27 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/tree/master/code-demos-pdf/code_demo_overfitting.pdf" >}}
28 |
29 | ### Quiz
30 |
31 | {{< quizdown >}}
32 |
33 | ---
34 | shuffle_questions: false
35 | ---
36 |
37 | ## Which statements are true?
38 |
39 | - [x] Overfitting means that the model performs much better on the training data than on the test data.
40 | - [ ] A good test performance is an indicator of overfitting.
41 | - [ ] The linear model is known to overfit very fast.
42 | - [x] Overfitting risk increases with model complexity.
43 | - [x] Constraining the hypothesis space helps the learner to find a good hypothesis.
44 | - [ ] Goodness-of-fit measures like $R^2$, likelihood, AIC, BIC and deviance are all based on the test error.
45 |
46 | {{< /quizdown >}}
47 |
--------------------------------------------------------------------------------
/content/appendix/04_data.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Data sets
3 | ---
4 |
5 | ## Data sets used in the lecture
6 |
7 | - Bikeshare
8 |
9 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/datasets-pdf/bikeshare.pdf" >}}
10 |
11 | - Boston Housing
12 |
13 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/datasets-pdf/boston.pdf" >}}
14 |
15 | We are aware of the ethical issues regarding this data set ([more information](https://medium.com/@docintangible/racist-data-destruction-113e3eff54a8)) and will replace it in upcoming revisions of the material.
16 |
17 | - Circle
18 |
19 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/datasets-pdf/circle.pdf" >}}
20 |
21 | - German Credit
22 |
23 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/datasets-pdf/credit.pdf" >}}
24 |
25 | - Glass
26 |
27 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/datasets-pdf/glass.pdf" >}}
28 |
29 | - Ionosphere
30 |
31 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/datasets-pdf/ionosphere.pdf" >}}
32 |
33 | - Iris
34 |
35 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/datasets-pdf/iris.pdf" >}}
36 |
37 | We are aware of the ethical issues regarding this data set ([more information](https://armchairecology.blog/iris-dataset/)) and will replace it in upcoming revisions of the material.
38 |
39 | - MNIST
40 |
41 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/datasets-pdf/mnist.pdf" >}}
42 |
43 | - Sonar
44 |
45 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/datasets-pdf/sonar.pdf" >}}
46 |
47 | - Spam Classification
48 |
49 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/datasets-pdf/spam.pdf" >}}
50 |
51 | - Spirals
52 |
53 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/datasets-pdf/spirals.pdf" >}}
54 |
55 | - Titanic Survival
56 |
57 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/datasets-pdf/titanic.pdf" >}}
58 |
59 | - Waveform
60 |
61 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/datasets-pdf/waveform.pdf" >}}
62 |
63 |
--------------------------------------------------------------------------------
/static/i2ml_old.svg:
--------------------------------------------------------------------------------
1 |
2 |
79 |
--------------------------------------------------------------------------------
/content/chapters/01_ml_basics/01-08-learnercomponents-hro.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 01.08: Components of a Learner"
3 | weight: 1008
4 | quizdown: true
5 | ---
6 | Nearly all supervised learning algorithms can be described in terms of three components: 1) hypothesis space, 2) risk, and 3) optimization. In this section, we explain how these components interact and why this is a very useful concept for many supervised learning approaches.
7 |
8 |
9 |
10 | ### Lecture video
11 |
12 | {{< video id="WzQVGfa-ikw">}}
13 |
14 | ### Lecture slides
15 |
16 | {{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-basics-learnercomponents-hro.pdf" >}}
17 |
18 | ### Quiz
19 |
20 | {{< quizdown >}}
21 |
22 | ---
23 | shuffle_questions: false
24 | ---
25 |
26 | ## Which statements are true?
27 |
28 | - [x] For a given hypothesis space, different optimization procedures can be used to find the best model within it.
29 | - [ ] Providing two different training data sets to a learner will result in the same optimal model.
30 | - [x] The parameterization of a model defines its hypothesis space.
31 | - [x] Supervised learning consists of three components: hypothesis space, risk, and optimization.
32 |
33 | ## Which statements are true?
34 |
35 | - [x] If a hypothesis space can be understood as a parameterized family of curves, finding the optimal model is equivalent to finding the optimal set of parameter values.
36 | - [x] Supervised ML requires having labeled data to train the model.
37 | - [ ] A learner is a function that maps feature vectors to predicted target values.
38 | - [ ] The risk function does not depend on the choice of the loss function.
39 |
40 | ## Which statements are true?
41 | - [ ] The idea of Gradient Descent (GD) is to iteratively go from the current candidate θ[t] in the direction of the positive gradient, with learning rate α to the next θ[t+1].
42 | - [x] Empirical risk minimization (ERM) leads to finding the model with the lowest average loss (in the absence of regularization).
43 | - [ ] A learner outputs the best parameters and hyperparameters.
44 | - [ ] Supervised ML is always about learning to predict, and never about learning to explain.
45 |
46 | ## Which statements are true?
47 | - [ ] In supervised ML, there are two tasks: Regression for categorical target variables, and classification for numerical ones.
48 | - [x] An algorithm that - given some hypothesis space H, training data D, and hyperparameter control settings λ - returns one element of the hypothesis space H, is called a learner.
49 | - [x] A hypothesis space H is a set that can have an infinite number of elements.
50 | - [ ] The empirical risk function allows us to associate a quality score with each of our models: the higher the empirical risk, the better a model fits our training data.
51 |
52 | {{< /quizdown >}}
53 |
--------------------------------------------------------------------------------
/content/team/_index.md:
--------------------------------------------------------------------------------
1 | # Team
2 |
3 | -  [Bernd Bischl](https://www.slds.statistik.uni-muenchen.de/people/bischl/)
4 | wrote the initial version of most of the course material and actively develops and extends the material. At the LMU, he often teaches the latter parts of the material, so the "Supervised Learning" and "Advanced Machine Learning" lectures.
5 | -  [Ludwig Bothmann](https://www.slds.statistik.uni-muenchen.de/people/bothmann/)
6 | joined the team in summer 2020 and actively develops and extends the material. At the LMU, he often teaches "Introduction to Machine Learning" and "Supervised Learning".
7 | -  [Giuseppe Casalicchio](https://www.slds.statistik.uni-muenchen.de/people/casalicchio/)
8 | contributed to some chapters of the initial version (before 2020) and rejoined the team in 2024, where he actively develops and extends the material. At the LMU, he often teaches "Introduction to Machine Learning", "Advanced Machine Learning", and "Applied Machine Learning".
9 | -  [Yawei Li](https://www.slds.stat.uni-muenchen.de/people/li/)
10 | is a PhD student in Bernd's group and contributed to the slides and exercises of AdvML.
11 | -  [Chris Kolb](https://www.slds.stat.uni-muenchen.de/people/kolb/)
12 | is a PhD student in Bernd's group and contributed to the slides and exercises.
13 |
14 | ## Current student assistants
15 |
16 | - Ziyu Mu
17 | - Manuel Helmerichs
18 | - Aik Tarkhanyan
19 |
20 | ## Alumni
21 |
22 | -  [Fabian Scheipl](https://www.fda.statistik.uni-muenchen.de/people/head/scheipl/index.html)
23 | joined the team in fall 2018 and contributed to the slides, videos and code demos.
24 | -  [Tobias Pielok](https://www.slds.stat.uni-muenchen.de/people/pielok/)
25 | was a PhD student in Bernd’s group and contributed to the code demos and exercises; he also presented the code demos at the classes held in spring 2020.
26 | -  [Lisa Wimmer](https://www.slds.stat.uni-muenchen.de/people/wimmer/)
27 | was a PhD student in Bernd’s group and contributed to slides and exercises.
28 | -  [Daniel Schalk](https://www.slds.statistik.uni-muenchen.de/people/schalk)
29 | was a PhD at Bernd's group and organized the second round of the inverted classroom in spring 2019; he also provided the digital platform for videos, quizzes and exercises.
30 | -  [Heidi Seibold](https://www.slds.statistik.uni-muenchen.de/people/seibold) joined the team in fall 2019 and has been in charge of the classes held in spring 2020 at LMU Munich and University of Bielefeld.
31 | -  [Christoph Molnar](https://www.slds.statistik.uni-muenchen.de/people/molnar)
32 | was a PhD at Bernd's group and organized the first round of the inverted classroom in spring 2018, including the first round of videos.
33 | -  [Jakob Richter](https://jakob-r.de/)
34 | created this website using Hugo, some CSS and the [hugo-quiz plugin](https://github.com/bonartm/hugo-quiz).
35 |
--------------------------------------------------------------------------------
/content/appendix/02_errata.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Errata
3 | ---
4 |
5 | ## Errata in the slides shown in the videos
6 |
7 | - Chapter 1.4 (Models & Parameters): Slide 5/10: d-dimensional vector, not p-dimensional
8 | - Chapter 2.1 (Regression losses): Slide 1/5 sign in bullet point 4
9 | - Chapter 2.2 (Deep Dive OLS): Slide 2/2 last lines in left column
10 | - Chapter 3.4 (Logistic Regression) - Slide 4/10: The alpha of the red (scaled) logistic function in the second plot is 5, not -5
11 | - Chapter 3.6 (Naive Bayes): Slide 3/6: Missing exponents in figure
12 | - Chapter 4.3 (Simple Measures for Classification): Slide 6/9: Error in cost matrix
13 | - Chapter 4.4 (Test Error): Slide 8/13: The variance of MSE decreases with test set size, not the mean of MSE
14 | - Chapter 4.7 (Classification measures): Slide 6/9 cost computation
15 | - Chapter 6.2 (CART: Growing a Tree): Slide 5/8: Wrong plot is displayed in video (axis wrong, points missing)
16 | - Chapter 8.3 (Single Hidden Layer NN): Slides 9 & 11/14: missing minus sign in the sigmoid function
17 | - Chapter 11.6 (0-1 Loss): Slides 2/5 and 4/5 Errors in notation of conditional probability inside of expectation
18 | - Chapter 11.7 (Bernoulli Loss): Slides 9/10 and 10/10 Errors in Bernoulli Loss and Entropy Splitting Criterion
19 | - Chapter 11.7 (Classification and 0-1 Loss): Slides 2/6 Errors in law of total expectation, should also condition on x inside inner expectation
20 | - Chapter 11.16 (Bias-Variance-Decomposition 1): Revised simulation set-up and figures
21 | - Chapter 11.12 (MLE2): Slide 2/5 wrong negative sign in NLL equation
22 | - Chapter 12.2 (Softmax): Slide 2/9 [0,1]^g instead of R^g
23 | - Chapter 13.1 (Entropy I): Slide 4,6,8/10 changed entropy calculation from nats to bits
24 | - Chapter 13.2 (Entropy II): Slide 1/7 corrected plot for entropy of Bernoulli distribution
25 | - Chapter 13.5 (CE-KLD): Slide 6/7 typo in formula (1)
26 | - Chapter 13.6: Slide 2/7 typos in formula and bullet point 3
27 | - Chapter 13.7: Slide 4/14 switched x and y in the proposition regarding zero conditional entropy
28 | - Chapter 13.7: Slide 14/14 added missing 0.5 factor in the entropy of the multivariate Gaussian
29 | - Chapter 13.7: Slide 14/14 added parentheses to make log less ambiguous
30 | - Chapter 15.2 (Ridge Regression): Slide 4/10 clarified meaning of green dot in plot in comment
31 | - Chapter 15.8 (Bayesian Priors): Slide 5/5 renew plot and add comment on ybar
32 | - Chapter 16.1 (SVM linear hard-margin): Slide 11/11, fix ambiguity in defining SVs
33 | - Chapter 17.1: Slide 6/11 Typo in f
34 | - Chapter 17.3: Slide 2/6 Show how to get phi(x) explicitly, and the last component of phi(x) should be b instead of sqrt(b).
35 | - Chapter 17.3: Slide 2/7 Assume gamma = 1 to avoid conflict with geometric distances
36 | - Chapter 18.1: Slide 5/7: Typo in Monomials; it should be (d+p-1 over d) and ( (d+p) over d) -1
37 | - Chapter 19.2: Slide 1/15: R_emp formula contains x_{i}, not x.
38 | - Chapter 19.3: Slide 5/20 blue point moved from -2.5 to -2
39 | - Chapter 19.5: Slide 3/11: bullet point 1 added "if", bullet point 2 changed "decrease" to "increase"
40 | - Chapter 20.1: Slide 6/12: fix error in step 6 and clarify that b-hat is a hard label classifier
41 | - Chapter 20.1: Slides 7 and 8/12: improved plot and text
42 | - Chapter 20.2: Slide 14/15: error in step 1 of the algorithm
43 | - Chapter 20.4: Slide 20/28: typo in cov in second bullet point
44 | - Chapter 21.4: Slide 2/5: increased number of boosting iterations from 1 to 300 in plot
45 | - Chapter 21.4: Slide 5/5: add to for loops over i=1,..,n samples and initialize empty data set
46 | - Chapter 13.1: Slide 10/10: fixed incorrect derivative
47 | - Chapter 18.08: Slide 6/8: the slide set is correct, i.e., the given percentage is used to "dropout" / ignore the learners. (This is currently incorrectly stated in the video)
48 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # README
2 |
3 | ## How to create a new website repository?
4 |
5 | 1. Create a new repository with the name `website_xyz` and select `slds-lmu/website_template` as template repository
6 | 2. There should already be a `gh-pages` branch. If not, create it.
7 | 3. Go to `Settings -> Pages` and select the `gh-pages` branch as built branch.
8 | 4. Change all `website_template` entries in the `config.toml` file to your repository name `website_xyz` (this should happen in two places, line 1 the `baseURL` and line 43 the footer url).
9 | 5. The built of the website should already be available at `http://slds-lmu.github.io/website_xyz/`
10 | 6. [Optional] Add your website link to the description of the repository to make it accessible more easily.
11 | 7. Edit the team page. Remove people not affiliated with the lecture and add add relevant people.
12 | 8. Edit your files as described below.
13 |
14 | ## Edit the content of the website
15 |
16 | - Directories and filenames have to be lowercase
17 | - Chapters are ordered after filenames
18 | - if no video is given or no pdf file is present: delete yaml param completely
19 | - to include pdfs, use [pdfjs](https://github.com/anvithks/hugo-embed-pdf-shortcode):
20 | - For Files: `{{< pdfjs file="slides-regression-losses.pdf" >}}`. The pdf has to be in the same directory as the md file.
21 | - As URL: `{{< pdfjs file="https://github.com/slds-lmu/lecture_i2ml/blob/master/slides-pdf/slides-regression-losses.pdf" >}}`. If a URL is used, a download button is created instead a preview of the slides.
22 | - if you add a markdown or html link to a file that is in the same directory
23 | - you should use the shortcode `{{< fileurl file="cheatsheet_notation.pdf" >}}`
24 | - if you link to `../file.pdf` it will break in the index pages
25 | - to include lecture video: `{{< video id = "Syrzezpj2FY" >}}`
26 | - the `video_id` is the last part of the youtube url, e.g. for `https://www.youtube.com/watch?v=BmSvhDCdJro` it is `BmSvhDCdJro`.
27 | - You can also embed videos from other platforms by changing `video_base_url` in `config.toml`
28 | - Or use a custom shortcode.
29 | - To directly change aspects in the video embedding you can also use the explicit HTML-iframe tag e.g. ``
30 | - To ensure that the views are counted properly explicitly set the `autoplay = "0"`-HTML attribute either directly in the HTML-iframe tag or in the video.html in the layouts of the website theme
31 | - mathjax enabled if `mathjax: true` in yaml-frontmatter
32 | - if mathjax should be supported in index page, add it to respective `_index.md` or enable it in `config.toml`
33 | - make quizzes using https://github.com/bonartm/hugo-quiz
34 | - put `quizdown: true` in yaml-frontmatter
35 | - quizzes support mathjax without the `mathjax: true`
36 | - in `_index.md` you can specify `show_in_index: (content|summary|none)`
37 |
38 | ## Deploying the website
39 |
40 | The deploy is done automatically via GitHub Actions. Nothing needs to be done to publish a new version of the website. Each build is triggered when a commit is done to the `main` branch.
41 |
42 | ## Citation
43 |
44 | If you use our material, please consider citing us as follows:
45 |
46 | ```
47 | @misc{bischl22i2ml,
48 | author = {Bischl, Bernd and Bothmann, Ludwig and Scheipl, Fabian and Pielok, Tobias and Wimmer, Lisa and Li, Yawei and Kolb, Chris and Schalk, Daniel and Seibold, Heidi and Molnar, Christoph and Richter, Jakob},
49 | title = {{Introduction to Machine Learning (I2ML)}},
50 | howpublished = "\url{https://slds-lmu.github.io/i2ml/}",
51 | year = {2022},
52 | note = "[Online; accessed yyyy-mm-dd]"
53 | }
54 | ```
55 |
56 | This course is based on our concept of open-source educational resources (OSER) as described in our paper:
57 |
58 | ```
59 | @InProceedings{bothmann23oser,
60 | title = {Developing Open Source Educational Resources for Machine Learning and Data Science},
61 | author = {Bothmann, Ludwig and Strickroth, Sven and Casalicchio, Giuseppe and R\"ugamer, David and Lindauer, Marius and Scheipl, Fabian and Bischl, Bernd},
62 | booktitle = {Proceedings of the Third Teaching Machine Learning and Artificial Intelligence Workshop},
63 | pages = {1--6},
64 | year = {2023},
65 | editor = {Kinnaird, Katherine M. and Steinbach, Peter and Guhr, Oliver},
66 | volume = {207},
67 | series = {Proceedings of Machine Learning Research},
68 | month = {19--23 Sep},
69 | publisher = {PMLR},
70 | url = {https://proceedings.mlr.press/v207/bothmann23a.html},
71 | }
72 | ```
73 |
74 | ## License
75 |
76 | [MIT LICENSE](LICENSE)
77 |
--------------------------------------------------------------------------------
/content/literature/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Literature
3 | ---
4 |
5 | The course material covers all exam-relevant topics in a quite self-contained manner.
6 | For more in-depth study, we recommend the following literature. Note that some of the books are rather detailed and involved, and more geared towards a larger lecture in a Master's degree.
7 |
8 | We **recommend to buy and read** at least one standard reference on ML, for BSc level this might be the James, for the MSc level the Hastie, Bishop, Murphy or Alplaydin, the Shalev-Shwartz for a mathematical entry point.
9 |
10 | ## Helpful References for Prerequisites
11 |
12 | If you need to read up on some of the required topics (see [Prerequisites](../prerequisites)), this list might help. We tried to keep it as short as possible.
13 | - M. Deisenroth, A. Faisal, C. Ong. Mathematics for Machine Learning. [URL](https://mml-book.github.io/book/mml-book.pdf)
14 | - L. Wassermann. All of Statistics. [URL](http://egrcc.github.io/docs/math/all-of-statistics.pdf)
15 | - H. Wickham, G. Grolemund. R for Data Science. [URL](https://r4ds.had.co.nz/)
16 | - Introductory R course on datacamp.com [URL](https://learn.datacamp.com/courses/free-introduction-to-r)
17 |
18 |
19 | ## Machine Learning
20 |
21 | - K. Kersting, C. Lampert, C. Rothkopf. Wie Maschinen Lernen. Springer, 2019. [URL](https://link.springer.com/book/10.1007/978-3-658-26763-6) *German, informal, intuitive introduction to ML. Lower than BSc level, maybe more targeted at pupils or a non-academic audience. Read if you want a very light-weight introduction into the field, or buy as present for relatives and friends if they ask what you are doing.*
22 | - G. James, D. Witten, T. Hastie, R. Tibshirani. An Introduction to Statistical Learning. MIT Press, 2010. [URL](http://www-bcf.usc.edu/~gareth/ISL/) *Beginner-level introduction with applications in R. Very well suited for the BSc level.*
23 | - T. Hastie, R. Tibshirani, J. Friedman. The Elements of Statistical Learning. Springer, 2009. [URL](https://web.stanford.edu/~hastie/ElemStatLearn/) *Standard reference for statistics-flavored ML.*
24 | - C. M. Bishop. Pattern Recognition and Machine Learning. Springer, 2006. [URL](http://research.microsoft.com/en-us/um/people/cmbishop/prml/) *Standard reference for ML-flavored ML.*
25 | - S. Shalev-Shwartz, S. Ben-David. Understanding machine learning: From Theory to Algorithms. Cambridge University Press, 2014. [URL](https://www.cs.huji.ac.il/w~shais/UnderstandingMachineLearning/) *Great, thorough introduction to ML theory. Math-style book with definitions and proofs.*
26 | - E. Alpaydin. Introduction to Machine Learning. MIT Press, 2010. [URL](http://www.cmpe.boun.edu.tr/~ethem/i2ml2e/) *Standard reference with broad coverage; easy to read.*
27 | - K. Murphy. Machine Learning: a Probabilistic Perspective. MIT Press, 2012. [URL](https://probml.github.io/pml-book/book0.html) *Standard reference; quite extensive; statistical/probabilistic lens.*
28 | - F. Provost, T. Fawcett. Data Science for Business. O’Reilly, 2013. [URL](https://book.akij.net/eBooks/2018/May/5aef50939a868/Data_Science_for_Bus.pdf) *A very good, applied and easy-to-read book by 2 well-known ML scientists. Contains many practical aspects that are missing in other references. Probably a good idea to read this in any case.*
29 | - N. Japkowicz. Evaluating Learning Algorithms (A Classification Perspective). Cambridge University Press, 2011. *Nice reading on performance measures, resampling methods and (some) statistical tests for benchmarking in ML; only for classification.*
30 | - B. Bischl et al. Hyperparameter Optimization: Foundations, Algorithms, Best Practices and Open Challenges. arXiv preprint 2021. [URL](https://arxiv.org/pdf/2107.05847.pdf) *Our tutorial paper on HPO.*
31 | - I. Goodfellow, Y. Bengio, A. Courville. Deep Learning. MIT Press, 2016. [URL](https://www.deeplearningbook.org/) *Standard, modern reference for DL.*
32 | - J. Friedman, T. Hastie, R. Tibshirani. Additive logistic regression: a statistical view of boosting (With discussion and a rejoinder by the authors). Ann. Statist. 2000. [URL](
33 | https://projecteuclid.org/journals/annals-of-statistics/volume-28/issue-2/Additive-logistic-regression--a-statistical-view-of-boosting-With/10.1214/aos/1016218223.full)
34 |
35 | ## Mathematical & Statistical Theory
36 |
37 | - G. Strang. Linear Algebra and Learning from Data. Cambridge University Press, 2019. *Serious course on matrices and applied linear algebra.*
38 | - S. Axler. Linear Algebra Done Right. Springer, 2015. [URL](https://link.springer.com/content/pdf/10.1007%2F978-3-319-11080-6.pdf) *Linear Algebra from a more theoretical but still beginner-friendly perspective*
39 | - A. M. Mood, F. A. Graybill, D. C. Boes. Introduction to the Theory of Statistics, McGraw-Hill 1974. [URL](https://www.fulviofrisone.com/attachments/article/446/Introduction%20to%20the%20theory%20of%20statistics%20by%20MOOD.pdf) *Beginner-friendly intro to statistics; bit on the mathy side.*
40 | - J. Watt, R. Borhani, A. Katsaggelos. Machine Learning Refined. Cambridge University Press, 2020. [URL](https://github.com/jermwatt/machine_learning_refined) *Check chapters 2-4 plus Appendix for insightful explanations and visualizations of a variety of optimization concepts.*
41 | - T. M. Cover, J. A. Thomas. Elements of Information Theory. Wiley, 2006. [URL](http://staff.ustc.edu.cn/~cgong821/Wiley.Interscience.Elements.of.Information.Theory.Jul.2006.eBook-DDU.pdf) *Good intro to information theory in first hundred pages, though lacking cross-connections to ML / statistics.*
42 |
43 | ## R Programming
44 |
45 | - N. Matloff. The Art of R Programming. No Starch Press, 2011. [URL](https://diytranscriptomics.com/Reading/files/The%20Art%20of%20R%20Programming.pdf)
46 |
47 | We use the **mlr3** package for machine learning in R quite heavily.
48 | - Central project page and learning resources: https://mlr3.mlr-org.com/, in particular
49 | - the [book](https://mlr3book.mlr-org.com/),
50 | - the [gallery](https://mlr3gallery.mlr-org.com/), and
51 | - the [cheatsheets](https://cheatsheets.mlr-org.com/).
52 | - GitHub page: https://github.com/mlr-org/mlr3
53 |
--------------------------------------------------------------------------------
/static/safari-pinned-tab.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
100 |
--------------------------------------------------------------------------------
/content/chapters/02_supervised_regression/Bananas.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/content/exercises/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Exercises
3 | ---
4 |
5 | __Exercises for Chapters 1-10 (LMU Lecture I2ML):__
6 |
7 | | | HTML | Jupyter | PDF |
8 | | ------------| ---------------- | ------------------ | ------------------ |
9 | | Exercise 1 | [ML basics](https://github.com/slds-lmu/lecture_i2ml/blob/master/exercises/ml-basics/ml-basics.html) | | [Exercise](https://github.com/slds-lmu/lecture_i2ml/raw/master/exercises-pdf/ml-basics_ex.pdf), [Solution](https://github.com/slds-lmu/lecture_i2ml/raw/master/exercises-pdf/ml-basics_all.pdf) |
10 | | Exercise 2 | [Regression](https://github.com/slds-lmu/lecture_i2ml/blob/master/exercises/supervised-regression/regression.html) | [R solution](https://github.com/slds-lmu/lecture_i2ml/blob/master/exercises/supervised-regression/sol_regression_R.ipynb), [Python solution](https://github.com/slds-lmu/lecture_i2ml/blob/master/exercises/supervised-regression/sol_regression_py.ipynb) |[Exercise](https://github.com/slds-lmu/lecture_i2ml/raw/master/exercises-pdf/regression_ex.pdf), [Solution](https://github.com/slds-lmu/lecture_i2ml/raw/master/exercises-pdf/regression_all.pdf) |
11 | | Exercise 3 | [Classification I](https://github.com/slds-lmu/lecture_i2ml/blob/master/exercises/supervised-classification/classification_1.html) | | [Exercise](https://github.com/slds-lmu/lecture_i2ml/raw/master/exercises-pdf/classification_1_ex.pdf), [Solution](https://github.com/slds-lmu/lecture_i2ml/raw/master/exercises-pdf/classification_1_all.pdf) |
12 | | Exercise 4 | [Classification II](https://github.com/slds-lmu/lecture_i2ml/blob/master/exercises/supervised-classification/classification_2.html) | [R solution](https://github.com/slds-lmu/lecture_i2ml/blob/master/exercises/supervised-classification/sol_classification_2_R.ipynb), [Python solution](https://github.com/slds-lmu/lecture_i2ml/blob/master/exercises/supervised-classification/sol_classification_2_py.ipynb) | [Exercise](https://github.com/slds-lmu/lecture_i2ml/raw/master/exercises-pdf/classification_2_ex.pdf), [Solution](https://github.com/slds-lmu/lecture_i2ml/raw/master/exercises-pdf/classification_2_all.pdf) |
13 | | Exercise 5 | [Evaluation I](https://github.com/slds-lmu/lecture_i2ml/blob/master/exercises/evaluation/evaluation_1.html) | [R solution](https://github.com/slds-lmu/lecture_i2ml/blob/master/exercises/evaluation/sol_eval_1_R.ipynb), [Python solution](https://github.com/slds-lmu/lecture_i2ml/blob/master/exercises/evaluation/sol_eval_1_py.ipynb) | [Exercise](https://github.com/slds-lmu/lecture_i2ml/raw/master/exercises-pdf/evaluation_1_ex.pdf), [Solution](https://github.com/slds-lmu/lecture_i2ml/raw/master/exercises-pdf/evaluation_1_all.pdf) |
14 | | Exercise 6 | [Evaluation II](https://github.com/slds-lmu/lecture_i2ml/blob/master/exercises/evaluation/evaluation_2.html) | [R solution](https://github.com/slds-lmu/lecture_i2ml/blob/master/exercises/evaluation/sol_eval_2_R.ipynb), [Python solution](https://github.com/slds-lmu/lecture_i2ml/blob/master/exercises/evaluation/sol_eval_2_py.ipynb) | [Exercise](https://github.com/slds-lmu/lecture_i2ml/raw/master/exercises-pdf/evaluation_2_ex.pdf), [Solution](https://github.com/slds-lmu/lecture_i2ml/raw/master/exercises-pdf/evaluation_2_all.pdf) |
15 | | Exercise 7 | [Evaluation III](https://github.com/slds-lmu/lecture_i2ml/blob/master/exercises/evaluation/evaluation_3.html) | | [Exercise](https://github.com/slds-lmu/lecture_i2ml/raw/master/exercises-pdf/evaluation_3_ex.pdf), [Solution](https://github.com/slds-lmu/lecture_i2ml/raw/master/exercises-pdf/evaluation_3_all.pdf) |
16 | | Exercise 8 | [CART](https://github.com/slds-lmu/lecture_i2ml/blob/master/exercises/trees/trees.html) | [R solution](https://github.com/slds-lmu/lecture_i2ml/blob/master/exercises/trees/sol_trees_R.ipynb), [Python solution](https://github.com/slds-lmu/lecture_i2ml/blob/master/exercises/trees/sol_trees_py.ipynb) | [Exercise](https://github.com/slds-lmu/lecture_i2ml/raw/master/exercises-pdf/trees_ex.pdf), [Solution](https://github.com/slds-lmu/lecture_i2ml/raw/master/exercises-pdf/trees_all.pdf) |
17 | | Exercise 9 | [Random forests](https://github.com/slds-lmu/lecture_i2ml/blob/master/exercises/forests/forests.html) | [R solution](https://github.com/slds-lmu/lecture_i2ml/blob/master/exercises/forests/sol_forests_R.ipynb), [Python solution](https://github.com/slds-lmu/lecture_i2ml/blob/master/exercises/forests/sol_forests_py.ipynb) | [Exercise](https://github.com/slds-lmu/lecture_i2ml/raw/master/exercises-pdf/forests_ex.pdf), [Solution](https://github.com/slds-lmu/lecture_i2ml/raw/master/exercises-pdf/forests_all.pdf) |
18 | | Exercise 10 | [Neural networks](https://github.com/slds-lmu/lecture_i2ml/blob/master/exercises/neural-networks/nn.html) | | [Exercise](https://github.com/slds-lmu/lecture_i2ml/raw/master/exercises-pdf/nn_ex.pdf), [Solution](https://github.com/slds-lmu/lecture_i2ml/raw/master/exercises-pdf/nn_all.pdf) |
19 | | Exercise 11 | [Tuning](https://github.com/slds-lmu/lecture_i2ml/blob/master/exercises/tuning/tuning.html) | [R solution](https://github.com/slds-lmu/lecture_i2ml/blob/master/exercises/tuning/sol_tuning_R.ipynb), [Python solution](https://github.com/slds-lmu/lecture_i2ml/blob/master/exercises/tuning/sol_tuning_py.ipynb)| [Exercise](https://github.com/slds-lmu/lecture_i2ml/raw/master/exercises-pdf/tuning_ex.pdf), [Solution](https://github.com/slds-lmu/lecture_i2ml/raw/master/exercises-pdf/tuning_all.pdf) |
20 | | Exercise 12 | [Nested resampling](https://github.com/slds-lmu/lecture_i2ml/blob/master/exercises/nested-resampling/nested_resampling.html) | [R solution](https://github.com/slds-lmu/lecture_i2ml/blob/master/exercises/nested-resampling/sol_nested_resampling_R.ipynb), [Python solution](https://github.com/slds-lmu/lecture_i2ml/blob/master/exercises/nested-resampling/sol_nested_resampling_py.ipynb) | [Exercise](https://github.com/slds-lmu/lecture_i2ml/raw/master/exercises-pdf/nested_resampling_ex.pdf), [Solution](https://github.com/slds-lmu/lecture_i2ml/raw/master/exercises-pdf/nested_resampling_all.pdf) |
21 |
22 |
23 |
24 | __Exercises for Chapters 11-19 (LMU Lecture SL):__
25 |
26 | - The PDF files contain the full solutions, but whenever a coding exercise is present, it is **only in R** and almost always the solution is outdated.
27 | - The `Coding Exercise` column links to a single HTML file that contain solutions in both languages.
28 |
29 | | | Exercise Name | PDF | Coding Exercise | Google Colab |
30 | | ------------| ------------------ | ------------------ | ---------------- | ------------------ |
31 | | Exercise 1 | Advanced Risk Minimization 1 | [Exercise](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/ex_advriskmin_1.pdf), [Solution](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/sol_advriskmin_1.pdf) | | |
32 | | Exercise 2 | Advanced Risk Minimization 2 | [Exercise](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/ex_advriskmin_2.pdf), [Solution](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/sol_advriskmin_2.pdf) | | |
33 | | Exercise 3 | Advanced Risk Minimization 3 | [Exercise](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/ex_advriskmin_3.pdf), [Solution](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/sol_advriskmin_3.pdf) | [Advanced Risk Minimization 3](https://github.com/slds-lmu/lecture_sl/blob/main/exercises/advriskmin-quarto/inserted/advriskmin_3_all.html) | [R solution](https://colab.research.google.com/github/slds-lmu/lecture_sl/blob/main/exercises/advriskmin-quarto/inserted/sol_advriskmin_3_R.ipynb), [Python solution](https://colab.research.google.com/github/slds-lmu/lecture_sl/blob/main/exercises/advriskmin-quarto/inserted/sol_advriskmin_3_py.ipynb) |
34 | | Exercise 4 | Multiclass Classification | [Exercise](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/ex_multiclass_1.pdf), [Solution](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/sol_multiclass_1.pdf) | | |
35 | | Exercise 5 | Information Theory 1 | [Exercise](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/ex_information_theory_1.pdf), [Solution](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/sol_information_theory_1.pdf) | [Information Theory 1](https://github.com/slds-lmu/lecture_sl/blob/main/exercises/information-theory-quarto/inserted/information_theory_1_all.html) | [R solution](https://colab.research.google.com/github/slds-lmu/lecture_sl/blob/main/exercises/information-theory-quarto/inserted/sol_information_theory_1_R.ipynb), [Python solution](https://colab.research.google.com/github/slds-lmu/lecture_sl/blob/main/exercises/information-theory-quarto/inserted/sol_information_theory_1_py.ipynb) |
36 | | Exercise 6 | Information Theory 2 | [Exercise](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/ex_information_theory_2.pdf), [Solution](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/sol_information_theory_2.pdf) | [Information Theory 2](https://github.com/slds-lmu/lecture_sl/blob/main/exercises/information-theory-quarto/inserted/information_theory_2_all.html) | [R solution](https://colab.research.google.com/github/slds-lmu/lecture_sl/blob/main/exercises/information-theory-quarto/inserted/sol_information_theory_2_R.ipynb), [Python solution](https://colab.research.google.com/github/slds-lmu/lecture_sl/blob/main/exercises/information-theory-quarto/inserted/sol_information_theory_2_py.ipynb) |
37 | | Exercise 7 | Curse of Dimensionality | [Exercise](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/ex_cod_1.pdf), [Solution](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/sol_cod_1.pdf) | [Curse of Dimensionality](https://github.com/slds-lmu/lecture_sl/blob/main/exercises/cod-quarto/inserted/cod_all.html) | [R solution](https://colab.research.google.com/github/slds-lmu/lecture_sl/blob/main/exercises/cod-quarto/inserted/sol_cod_R.ipynb), [Python solution](https://colab.research.google.com/github/slds-lmu/lecture_sl/blob/main/exercises/cod-quarto/inserted/sol_cod_py.ipynb) |
38 | | Exercise 8 | Regularization 1 | [Exercise](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/ex_regularization_1.pdf), [Solution](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/sol_regularization_1.pdf) | [Regularization 1](https://github.com/slds-lmu/lecture_sl/blob/main/exercises/regularization-quarto/inserted/regularization_1_all.html) | [R solution](https://colab.research.google.com/github/slds-lmu/lecture_sl/blob/main/exercises/regularization-quarto/inserted/sol_regularization_1_R.ipynb), [Python solution](https://colab.research.google.com/github/slds-lmu/lecture_sl/blob/main/exercises/regularization-quarto/inserted/sol_regularization_1_py.ipynb) |
39 | | Exercise 9 | Regularization 2 | [Exercise](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/ex_regularization_2.pdf), [Solution](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/sol_regularization_2.pdf) | [Regularization 2](https://github.com/slds-lmu/lecture_sl/blob/main/exercises/regularization-quarto/inserted/regularization_2_all.html) | [R solution](https://colab.research.google.com/github/slds-lmu/lecture_sl/blob/main/exercises/regularization-quarto/inserted/sol_regularization_2_R.ipynb), [Python solution](https://colab.research.google.com/github/slds-lmu/lecture_sl/blob/main/exercises/regularization-quarto/inserted/sol_regularization_2_py.ipynb) |
40 | | Exercise 10 | Support Vector Machines 1 | [Exercise](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/ex_svm_1.pdf), [Solution](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/sol_svm_1.pdf) | [Support Vector Machines](https://github.com/slds-lmu/lecture_sl/blob/main/exercises/svm-quarto/inserted/svm_1_all.html) | [R solution](https://colab.research.google.com/github/slds-lmu/lecture_sl/blob/main/exercises/svm-quarto/inserted/sol_svm_1_R.ipynb), [Python solution](https://colab.research.google.com/github/slds-lmu/lecture_sl/blob/main/exercises/svm-quarto/inserted/sol_svm_1_py.ipynb) |
41 | | Exercise 11 | Support Vector Machines 2 | [Exercise](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/ex_svm_2.pdf), [Solution](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/sol_svm_2.pdf) | | |
42 | | Exercise 12 | Boosting 1 | [Exercise](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/ex_boosting_1.pdf), [Solution](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/sol_boosting_1.pdf) | | |
43 | | Exercise 13 | Boosting 2 | [Exercise](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/ex_boosting_2.pdf), [Solution](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/sol_boosting_2.pdf) | | |
44 | | Exercise 14 | Feature Selection | [Exercise](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/ex_feature_selection_1.pdf), [Solution](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/sol_feature_selection_1.pdf) | | |
45 | | Exercise 15 | Gaussian Processes 1 | [Exercise](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/ex_gp_1.pdf), [Solution](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/sol_gp_1.pdf) | | |
46 | | Exercise 16 | Gaussian Processes 2 | [Exercise](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/ex_gp_2.pdf), [Solution](https://github.com/slds-lmu/lecture_sl/raw/main/exercises-pdf/sol_gp_2.pdf) | [Gaussian Processes 2](https://github.com/slds-lmu/lecture_sl/blob/main/exercises/gaussian-processes-quarto/inserted/gp_2_all.html) | [R solution](https://colab.research.google.com/github/slds-lmu/lecture_sl/blob/main/exercises/gaussian-processes-quarto/inserted/sol_gp_2_R.ipynb), [Python solution](https://colab.research.google.com/github/slds-lmu/lecture_sl/blob/main/exercises/gaussian-processes-quarto/inserted/sol_gp_2_py.ipynb) |
47 |
48 |
49 |
50 | __Exercises for Chapters 20-23 (LMU Lecture AdvML):__
51 |
52 | | | Exercise | Solution |
53 | | ---------| -------------- | -------------|
54 | | Exercise 1 | {{< pdfjs file="https://github.com/slds-lmu/lecture_advml/raw/main/exercises-pdf/ex_imbal_1.pdf" >}} | {{< pdfjs file="https://github.com/slds-lmu/lecture_advml/raw/main/exercises-pdf/sol_imbal_1.pdf" >}} |
55 | | Exercise 2 | {{< pdfjs file="https://github.com/slds-lmu/lecture_advml/raw/main/exercises-pdf/ex_imbal_2.pdf" >}} | {{< pdfjs file="https://github.com/slds-lmu/lecture_advml/raw/main/exercises-pdf/sol_imbal_2.pdf" >}} |
56 | | Exercise 3 | {{< pdfjs file="https://github.com/slds-lmu/lecture_advml/raw/main/exercises-pdf/ex_multitarget_1.pdf" >}} | {{< pdfjs file="https://github.com/slds-lmu/lecture_advml/raw/main/exercises-pdf/sol_multitarget_1.pdf" >}} |
57 | | Exercise 4 | {{< pdfjs file="https://github.com/slds-lmu/lecture_advml/raw/main/exercises-pdf/ex_multitarget_2.pdf" >}} | {{< pdfjs file="https://github.com/slds-lmu/lecture_advml/raw/main/exercises-pdf/sol_multitarget_2.pdf" >}} |
58 | | Exercise 5 | {{< pdfjs file="https://github.com/slds-lmu/lecture_advml/raw/main/exercises-pdf/ex_online_learning_1.pdf" >}} | {{< pdfjs file="https://github.com/slds-lmu/lecture_advml/raw/main/exercises-pdf/sol_online_learning_1.pdf" >}} |
59 |
--------------------------------------------------------------------------------