├── .gitignore
├── L01
    ├── 01-ml-overview__notes.pdf
    └── 01-ml-overview__slides.pdf
├── L02
    ├── 02-knn__notes.pdf
    ├── 02-knn__slides.pdf
    └── code
    │   ├── 02_knn_demo.ipynb
    │   └── iris.csv
├── L03
    └── 03-python__notes.pdf
├── L04
    ├── 04_scipython__code.ipynb
    └── images
    │   ├── numpy-intro
    │       ├── array_1.png
    │       ├── array_2.png
    │       ├── broadcasting-1.png
    │       ├── broadcasting-2.png
    │       ├── matmatmul.png
    │       ├── matmul.png
    │       ├── numpy-nature-1.png
    │       ├── random_1.png
    │       ├── random_2.png
    │       ├── transpose.png
    │       └── ufunc.png
    │   ├── output_171_0.png
    │   ├── output_173_0.png
    │   ├── output_174_0.png
    │   ├── output_176_0.png
    │   ├── output_178_0.png
    │   ├── output_180_0.png
    │   ├── output_181_0.png
    │   ├── output_183_0.png
    │   ├── output_185_0.png
    │   └── output_188_0.png
├── L05
    └── code
    │   ├── 05-bonus-column-transformer.ipynb
    │   ├── 05-preprocessing-and-sklearn__notes.ipynb
    │   ├── 05-preprocessing-and-sklearn__slides.pdf
    │   ├── data
    │       ├── categoricaldata.csv
    │       ├── iris.csv
    │       ├── iris_mod.csv
    │       └── missingdata.csv
    │   └── images
    │       ├── decisionreg.pdf
    │       ├── eda.pdf
    │       ├── estimator-api.pdf
    │       ├── estimator-api.png
    │       ├── holdout-tuning.pdf
    │       ├── holdout-tuning.png
    │       ├── iris-subsampling.pdf
    │       ├── iris-subsampling.png
    │       ├── sklearn-pipeline.pdf
    │       ├── sklearn-pipeline.png
    │       ├── transformer-api.pdf
    │       └── transformer-api.png
├── L06
    ├── 06-trees__notes.pdf
    ├── 06-trees__slides.pdf
    └── code
    │   ├── 06-trees_demo.ipynb
    │   └── 06-trees_demo_without_graphviz.ipynb
├── L07
    ├── 07-ensembles__notes.pdf
    ├── 07-ensembles__slides.pdf
    └── code
    │   └── 07_code-from-slides.ipynb
├── L08
    ├── 08-model-eval-1-intro__notes.pdf
    └── 08-model-eval-1-intro__slides.pdf
├── L09
    ├── 09-eval2-ci__notes.pdf
    ├── 09-eval2-ci__slides.pdf
    └── code
    │   ├── 09-eval2-ci__1_distribution-and-subsampling.ipynb
    │   ├── 09-eval2-ci__2_holdout-and-repeated-sampling.ipynb
    │   ├── 09-eval2-ci__3_pessimistic-bias-in-holdout.ipynb
    │   ├── 09-eval2-ci__4-confidence-intervals_iris.ipynb
    │   ├── 09-eval2-ci__4-confidence-intervals_mnist.ipynb
    │   └── 09-eval2-ci__5.ipynb
├── L10
    ├── 10_eval3-cv__notes.pdf
    ├── 10_eval3-cv__slides.pdf
    └── code
    │   ├── 10_04_kfold-eval.ipynb
    │   ├── 10_06_kfold-sele.ipynb
    │   └── 10_08_1stderr.ipynb
├── L11
    ├── 11-eval4-algo__notes.pdf
    ├── 11-eval4-algo__slides.pdf
    └── code
    │   ├── 11-eval4-algo__nested-cv_compact.ipynb
    │   ├── 11-eval4-algo__nested-cv_verbose1.ipynb
    │   ├── 11-eval4-algo__nested-cv_verbose2.ipynb
    │   └── nested-cv-image.png
├── L12
    ├── 12_eval5-metrics__slides.pdf
    └── code
    │   ├── 12_1_confusion-matrix.ipynb
    │   ├── 12_2_pre-recall-f1.ipynb
    │   ├── 12_3_balanced-acc-Copy1.ipynb
    │   ├── 12_4_roc.ipynb
    │   ├── wdbc.data
    │   └── wdbc.names.txt
├── README.md
└── report-template
    ├── examples
        ├── example-presentations.md
        ├── example-proposal.pdf
        └── example-report.pdf
    ├── project-presentation-assessment.md
    ├── project-proposal-assessment.md
    ├── project-report-assessment.md
    ├── proposal-latex
        ├── bibliography.bib
        ├── figures
        │   ├── google-scholar.pdf
        │   └── not-own-figure.pdf
        ├── ieee.bst
        ├── proposal.pdf
        ├── proposal.tex
        └── statcourse.sty
    └── report-latex
        ├── bibliography.bib
        ├── figures
            └── google-scholar.pdf
        ├── ieee.bst
        ├── report.pdf
        ├── report.tex
        └── statcourse.sty


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/L01/01-ml-overview__notes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L01/01-ml-overview__notes.pdf


--------------------------------------------------------------------------------
/L01/01-ml-overview__slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L01/01-ml-overview__slides.pdf


--------------------------------------------------------------------------------
/L02/02-knn__notes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L02/02-knn__notes.pdf


--------------------------------------------------------------------------------
/L02/02-knn__slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L02/02-knn__slides.pdf


--------------------------------------------------------------------------------
/L02/code/iris.csv:
--------------------------------------------------------------------------------
  1 | Id,SepalLength[cm],SepalWidth[cm],PetalLength[cm],PetalWidth[cm],Species
  2 | 1,5.1,3.5,1.4,0.2,Iris-setosa
  3 | 2,4.9,3.0,1.4,0.2,Iris-setosa
  4 | 3,4.7,3.2,1.3,0.2,Iris-setosa
  5 | 4,4.6,3.1,1.5,0.2,Iris-setosa
  6 | 5,5.0,3.6,1.4,0.2,Iris-setosa
  7 | 6,5.4,3.9,1.7,0.4,Iris-setosa
  8 | 7,4.6,3.4,1.4,0.3,Iris-setosa
  9 | 8,5.0,3.4,1.5,0.2,Iris-setosa
 10 | 9,4.4,2.9,1.4,0.2,Iris-setosa
 11 | 10,4.9,3.1,1.5,0.1,Iris-setosa
 12 | 11,5.4,3.7,1.5,0.2,Iris-setosa
 13 | 12,4.8,3.4,1.6,0.2,Iris-setosa
 14 | 13,4.8,3.0,1.4,0.1,Iris-setosa
 15 | 14,4.3,3.0,1.1,0.1,Iris-setosa
 16 | 15,5.8,4.0,1.2,0.2,Iris-setosa
 17 | 16,5.7,4.4,1.5,0.4,Iris-setosa
 18 | 17,5.4,3.9,1.3,0.4,Iris-setosa
 19 | 18,5.1,3.5,1.4,0.3,Iris-setosa
 20 | 19,5.7,3.8,1.7,0.3,Iris-setosa
 21 | 20,5.1,3.8,1.5,0.3,Iris-setosa
 22 | 21,5.4,3.4,1.7,0.2,Iris-setosa
 23 | 22,5.1,3.7,1.5,0.4,Iris-setosa
 24 | 23,4.6,3.6,1.0,0.2,Iris-setosa
 25 | 24,5.1,3.3,1.7,0.5,Iris-setosa
 26 | 25,4.8,3.4,1.9,0.2,Iris-setosa
 27 | 26,5.0,3.0,1.6,0.2,Iris-setosa
 28 | 27,5.0,3.4,1.6,0.4,Iris-setosa
 29 | 28,5.2,3.5,1.5,0.2,Iris-setosa
 30 | 29,5.2,3.4,1.4,0.2,Iris-setosa
 31 | 30,4.7,3.2,1.6,0.2,Iris-setosa
 32 | 31,4.8,3.1,1.6,0.2,Iris-setosa
 33 | 32,5.4,3.4,1.5,0.4,Iris-setosa
 34 | 33,5.2,4.1,1.5,0.1,Iris-setosa
 35 | 34,5.5,4.2,1.4,0.2,Iris-setosa
 36 | 35,4.9,3.1,1.5,0.1,Iris-setosa
 37 | 36,5.0,3.2,1.2,0.2,Iris-setosa
 38 | 37,5.5,3.5,1.3,0.2,Iris-setosa
 39 | 38,4.9,3.1,1.5,0.1,Iris-setosa
 40 | 39,4.4,3.0,1.3,0.2,Iris-setosa
 41 | 40,5.1,3.4,1.5,0.2,Iris-setosa
 42 | 41,5.0,3.5,1.3,0.3,Iris-setosa
 43 | 42,4.5,2.3,1.3,0.3,Iris-setosa
 44 | 43,4.4,3.2,1.3,0.2,Iris-setosa
 45 | 44,5.0,3.5,1.6,0.6,Iris-setosa
 46 | 45,5.1,3.8,1.9,0.4,Iris-setosa
 47 | 46,4.8,3.0,1.4,0.3,Iris-setosa
 48 | 47,5.1,3.8,1.6,0.2,Iris-setosa
 49 | 48,4.6,3.2,1.4,0.2,Iris-setosa
 50 | 49,5.3,3.7,1.5,0.2,Iris-setosa
 51 | 50,5.0,3.3,1.4,0.2,Iris-setosa
 52 | 51,7.0,3.2,4.7,1.4,Iris-versicolor
 53 | 52,6.4,3.2,4.5,1.5,Iris-versicolor
 54 | 53,6.9,3.1,4.9,1.5,Iris-versicolor
 55 | 54,5.5,2.3,4.0,1.3,Iris-versicolor
 56 | 55,6.5,2.8,4.6,1.5,Iris-versicolor
 57 | 56,5.7,2.8,4.5,1.3,Iris-versicolor
 58 | 57,6.3,3.3,4.7,1.6,Iris-versicolor
 59 | 58,4.9,2.4,3.3,1.0,Iris-versicolor
 60 | 59,6.6,2.9,4.6,1.3,Iris-versicolor
 61 | 60,5.2,2.7,3.9,1.4,Iris-versicolor
 62 | 61,5.0,2.0,3.5,1.0,Iris-versicolor
 63 | 62,5.9,3.0,4.2,1.5,Iris-versicolor
 64 | 63,6.0,2.2,4.0,1.0,Iris-versicolor
 65 | 64,6.1,2.9,4.7,1.4,Iris-versicolor
 66 | 65,5.6,2.9,3.6,1.3,Iris-versicolor
 67 | 66,6.7,3.1,4.4,1.4,Iris-versicolor
 68 | 67,5.6,3.0,4.5,1.5,Iris-versicolor
 69 | 68,5.8,2.7,4.1,1.0,Iris-versicolor
 70 | 69,6.2,2.2,4.5,1.5,Iris-versicolor
 71 | 70,5.6,2.5,3.9,1.1,Iris-versicolor
 72 | 71,5.9,3.2,4.8,1.8,Iris-versicolor
 73 | 72,6.1,2.8,4.0,1.3,Iris-versicolor
 74 | 73,6.3,2.5,4.9,1.5,Iris-versicolor
 75 | 74,6.1,2.8,4.7,1.2,Iris-versicolor
 76 | 75,6.4,2.9,4.3,1.3,Iris-versicolor
 77 | 76,6.6,3.0,4.4,1.4,Iris-versicolor
 78 | 77,6.8,2.8,4.8,1.4,Iris-versicolor
 79 | 78,6.7,3.0,5.0,1.7,Iris-versicolor
 80 | 79,6.0,2.9,4.5,1.5,Iris-versicolor
 81 | 80,5.7,2.6,3.5,1.0,Iris-versicolor
 82 | 81,5.5,2.4,3.8,1.1,Iris-versicolor
 83 | 82,5.5,2.4,3.7,1.0,Iris-versicolor
 84 | 83,5.8,2.7,3.9,1.2,Iris-versicolor
 85 | 84,6.0,2.7,5.1,1.6,Iris-versicolor
 86 | 85,5.4,3.0,4.5,1.5,Iris-versicolor
 87 | 86,6.0,3.4,4.5,1.6,Iris-versicolor
 88 | 87,6.7,3.1,4.7,1.5,Iris-versicolor
 89 | 88,6.3,2.3,4.4,1.3,Iris-versicolor
 90 | 89,5.6,3.0,4.1,1.3,Iris-versicolor
 91 | 90,5.5,2.5,4.0,1.3,Iris-versicolor
 92 | 91,5.5,2.6,4.4,1.2,Iris-versicolor
 93 | 92,6.1,3.0,4.6,1.4,Iris-versicolor
 94 | 93,5.8,2.6,4.0,1.2,Iris-versicolor
 95 | 94,5.0,2.3,3.3,1.0,Iris-versicolor
 96 | 95,5.6,2.7,4.2,1.3,Iris-versicolor
 97 | 96,5.7,3.0,4.2,1.2,Iris-versicolor
 98 | 97,5.7,2.9,4.2,1.3,Iris-versicolor
 99 | 98,6.2,2.9,4.3,1.3,Iris-versicolor
100 | 99,5.1,2.5,3.0,1.1,Iris-versicolor
101 | 100,5.7,2.8,4.1,1.3,Iris-versicolor
102 | 101,6.3,3.3,6.0,2.5,Iris-virginica
103 | 102,5.8,2.7,5.1,1.9,Iris-virginica
104 | 103,7.1,3.0,5.9,2.1,Iris-virginica
105 | 104,6.3,2.9,5.6,1.8,Iris-virginica
106 | 105,6.5,3.0,5.8,2.2,Iris-virginica
107 | 106,7.6,3.0,6.6,2.1,Iris-virginica
108 | 107,4.9,2.5,4.5,1.7,Iris-virginica
109 | 108,7.3,2.9,6.3,1.8,Iris-virginica
110 | 109,6.7,2.5,5.8,1.8,Iris-virginica
111 | 110,7.2,3.6,6.1,2.5,Iris-virginica
112 | 111,6.5,3.2,5.1,2.0,Iris-virginica
113 | 112,6.4,2.7,5.3,1.9,Iris-virginica
114 | 113,6.8,3.0,5.5,2.1,Iris-virginica
115 | 114,5.7,2.5,5.0,2.0,Iris-virginica
116 | 115,5.8,2.8,5.1,2.4,Iris-virginica
117 | 116,6.4,3.2,5.3,2.3,Iris-virginica
118 | 117,6.5,3.0,5.5,1.8,Iris-virginica
119 | 118,7.7,3.8,6.7,2.2,Iris-virginica
120 | 119,7.7,2.6,6.9,2.3,Iris-virginica
121 | 120,6.0,2.2,5.0,1.5,Iris-virginica
122 | 121,6.9,3.2,5.7,2.3,Iris-virginica
123 | 122,5.6,2.8,4.9,2.0,Iris-virginica
124 | 123,7.7,2.8,6.7,2.0,Iris-virginica
125 | 124,6.3,2.7,4.9,1.8,Iris-virginica
126 | 125,6.7,3.3,5.7,2.1,Iris-virginica
127 | 126,7.2,3.2,6.0,1.8,Iris-virginica
128 | 127,6.2,2.8,4.8,1.8,Iris-virginica
129 | 128,6.1,3.0,4.9,1.8,Iris-virginica
130 | 129,6.4,2.8,5.6,2.1,Iris-virginica
131 | 130,7.2,3.0,5.8,1.6,Iris-virginica
132 | 131,7.4,2.8,6.1,1.9,Iris-virginica
133 | 132,7.9,3.8,6.4,2.0,Iris-virginica
134 | 133,6.4,2.8,5.6,2.2,Iris-virginica
135 | 134,6.3,2.8,5.1,1.5,Iris-virginica
136 | 135,6.1,2.6,5.6,1.4,Iris-virginica
137 | 136,7.7,3.0,6.1,2.3,Iris-virginica
138 | 137,6.3,3.4,5.6,2.4,Iris-virginica
139 | 138,6.4,3.1,5.5,1.8,Iris-virginica
140 | 139,6.0,3.0,4.8,1.8,Iris-virginica
141 | 140,6.9,3.1,5.4,2.1,Iris-virginica
142 | 141,6.7,3.1,5.6,2.4,Iris-virginica
143 | 142,6.9,3.1,5.1,2.3,Iris-virginica
144 | 143,5.8,2.7,5.1,1.9,Iris-virginica
145 | 144,6.8,3.2,5.9,2.3,Iris-virginica
146 | 145,6.7,3.3,5.7,2.5,Iris-virginica
147 | 146,6.7,3.0,5.2,2.3,Iris-virginica
148 | 147,6.3,2.5,5.0,1.9,Iris-virginica
149 | 148,6.5,3.0,5.2,2.0,Iris-virginica
150 | 149,6.2,3.4,5.4,2.3,Iris-virginica
151 | 150,5.9,3.0,5.1,1.8,Iris-virginica


--------------------------------------------------------------------------------
/L03/03-python__notes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L03/03-python__notes.pdf


--------------------------------------------------------------------------------
/L04/images/numpy-intro/array_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L04/images/numpy-intro/array_1.png


--------------------------------------------------------------------------------
/L04/images/numpy-intro/array_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L04/images/numpy-intro/array_2.png


--------------------------------------------------------------------------------
/L04/images/numpy-intro/broadcasting-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L04/images/numpy-intro/broadcasting-1.png


--------------------------------------------------------------------------------
/L04/images/numpy-intro/broadcasting-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L04/images/numpy-intro/broadcasting-2.png


--------------------------------------------------------------------------------
/L04/images/numpy-intro/matmatmul.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L04/images/numpy-intro/matmatmul.png


--------------------------------------------------------------------------------
/L04/images/numpy-intro/matmul.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L04/images/numpy-intro/matmul.png


--------------------------------------------------------------------------------
/L04/images/numpy-intro/numpy-nature-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L04/images/numpy-intro/numpy-nature-1.png


--------------------------------------------------------------------------------
/L04/images/numpy-intro/random_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L04/images/numpy-intro/random_1.png


--------------------------------------------------------------------------------
/L04/images/numpy-intro/random_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L04/images/numpy-intro/random_2.png


--------------------------------------------------------------------------------
/L04/images/numpy-intro/transpose.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L04/images/numpy-intro/transpose.png


--------------------------------------------------------------------------------
/L04/images/numpy-intro/ufunc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L04/images/numpy-intro/ufunc.png


--------------------------------------------------------------------------------
/L04/images/output_171_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L04/images/output_171_0.png


--------------------------------------------------------------------------------
/L04/images/output_173_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L04/images/output_173_0.png


--------------------------------------------------------------------------------
/L04/images/output_174_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L04/images/output_174_0.png


--------------------------------------------------------------------------------
/L04/images/output_176_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L04/images/output_176_0.png


--------------------------------------------------------------------------------
/L04/images/output_178_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L04/images/output_178_0.png


--------------------------------------------------------------------------------
/L04/images/output_180_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L04/images/output_180_0.png


--------------------------------------------------------------------------------
/L04/images/output_181_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L04/images/output_181_0.png


--------------------------------------------------------------------------------
/L04/images/output_183_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L04/images/output_183_0.png


--------------------------------------------------------------------------------
/L04/images/output_185_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L04/images/output_185_0.png


--------------------------------------------------------------------------------
/L04/images/output_188_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L04/images/output_188_0.png


--------------------------------------------------------------------------------
/L05/code/05-bonus-column-transformer.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "STAT 451: Machine Learning (Fall 2020)  \n",
  8 |     "Instructor: Sebastian Raschka (sraschka@wisc.edu)  \n",
  9 |     "\n",
 10 |     "Course website: http://pages.stat.wisc.edu/~sraschka/teaching/stat451-fs2020/"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "# L05 - Bonus Notebook: Working with Heterogenous Datasets"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 1,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "import pandas as pd\n",
 27 |     "from sklearn.preprocessing import StandardScaler\n",
 28 |     "from sklearn.preprocessing import OneHotEncoder\n",
 29 |     "from sklearn.neighbors import KNeighborsClassifier\n",
 30 |     "from sklearn.decomposition import PCA\n",
 31 |     "from sklearn.pipeline import Pipeline\n",
 32 |     "from sklearn.compose import ColumnTransformer\n",
 33 |     "from sklearn.model_selection import train_test_split"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "- Suppose you have a dataset that has both numerical and categorical features as follows: "
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 2,
 46 |    "metadata": {},
 47 |    "outputs": [
 48 |     {
 49 |      "data": {
 50 |       "text/html": [
 51 |        "<div>\n",
 52 |        "<style scoped>\n",
 53 |        "    .dataframe tbody tr th:only-of-type {\n",
 54 |        "        vertical-align: middle;\n",
 55 |        "    }\n",
 56 |        "\n",
 57 |        "    .dataframe tbody tr th {\n",
 58 |        "        vertical-align: top;\n",
 59 |        "    }\n",
 60 |        "\n",
 61 |        "    .dataframe thead th {\n",
 62 |        "        text-align: right;\n",
 63 |        "    }\n",
 64 |        "</style>\n",
 65 |        "<table border=\"1\" class=\"dataframe\">\n",
 66 |        "  <thead>\n",
 67 |        "    <tr style=\"text-align: right;\">\n",
 68 |        "      <th></th>\n",
 69 |        "      <th>SepalLength[cm]</th>\n",
 70 |        "      <th>SepalWidth[cm]</th>\n",
 71 |        "      <th>PetalLength[cm]</th>\n",
 72 |        "      <th>PetalWidth[cm]</th>\n",
 73 |        "      <th>Color_IMadeThisUp</th>\n",
 74 |        "      <th>Species</th>\n",
 75 |        "    </tr>\n",
 76 |        "    <tr>\n",
 77 |        "      <th>Id</th>\n",
 78 |        "      <th></th>\n",
 79 |        "      <th></th>\n",
 80 |        "      <th></th>\n",
 81 |        "      <th></th>\n",
 82 |        "      <th></th>\n",
 83 |        "      <th></th>\n",
 84 |        "    </tr>\n",
 85 |        "  </thead>\n",
 86 |        "  <tbody>\n",
 87 |        "    <tr>\n",
 88 |        "      <th>1</th>\n",
 89 |        "      <td>5.1</td>\n",
 90 |        "      <td>3.5</td>\n",
 91 |        "      <td>1.4</td>\n",
 92 |        "      <td>0.2</td>\n",
 93 |        "      <td>red</td>\n",
 94 |        "      <td>Iris-setosa</td>\n",
 95 |        "    </tr>\n",
 96 |        "    <tr>\n",
 97 |        "      <th>2</th>\n",
 98 |        "      <td>4.9</td>\n",
 99 |        "      <td>3.0</td>\n",
100 |        "      <td>1.4</td>\n",
101 |        "      <td>0.2</td>\n",
102 |        "      <td>red</td>\n",
103 |        "      <td>Iris-setosa</td>\n",
104 |        "    </tr>\n",
105 |        "    <tr>\n",
106 |        "      <th>3</th>\n",
107 |        "      <td>4.7</td>\n",
108 |        "      <td>3.2</td>\n",
109 |        "      <td>1.3</td>\n",
110 |        "      <td>0.2</td>\n",
111 |        "      <td>red</td>\n",
112 |        "      <td>Iris-setosa</td>\n",
113 |        "    </tr>\n",
114 |        "    <tr>\n",
115 |        "      <th>4</th>\n",
116 |        "      <td>4.6</td>\n",
117 |        "      <td>3.1</td>\n",
118 |        "      <td>1.5</td>\n",
119 |        "      <td>0.2</td>\n",
120 |        "      <td>red</td>\n",
121 |        "      <td>Iris-setosa</td>\n",
122 |        "    </tr>\n",
123 |        "    <tr>\n",
124 |        "      <th>5</th>\n",
125 |        "      <td>5.0</td>\n",
126 |        "      <td>3.6</td>\n",
127 |        "      <td>1.4</td>\n",
128 |        "      <td>0.2</td>\n",
129 |        "      <td>red</td>\n",
130 |        "      <td>Iris-setosa</td>\n",
131 |        "    </tr>\n",
132 |        "  </tbody>\n",
133 |        "</table>\n",
134 |        "</div>"
135 |       ],
136 |       "text/plain": [
137 |        "    SepalLength[cm]  SepalWidth[cm]  PetalLength[cm]  PetalWidth[cm]  \\\n",
138 |        "Id                                                                     \n",
139 |        "1               5.1             3.5              1.4             0.2   \n",
140 |        "2               4.9             3.0              1.4             0.2   \n",
141 |        "3               4.7             3.2              1.3             0.2   \n",
142 |        "4               4.6             3.1              1.5             0.2   \n",
143 |        "5               5.0             3.6              1.4             0.2   \n",
144 |        "\n",
145 |        "   Color_IMadeThisUp      Species  \n",
146 |        "Id                                 \n",
147 |        "1                red  Iris-setosa  \n",
148 |        "2                red  Iris-setosa  \n",
149 |        "3                red  Iris-setosa  \n",
150 |        "4                red  Iris-setosa  \n",
151 |        "5                red  Iris-setosa  "
152 |       ]
153 |      },
154 |      "execution_count": 2,
155 |      "metadata": {},
156 |      "output_type": "execute_result"
157 |     }
158 |    ],
159 |    "source": [
160 |     "df = pd.read_csv('data/iris_mod.csv', index_col='Id')\n",
161 |     "df.head()"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "markdown",
166 |    "metadata": {},
167 |    "source": [
168 |     "- As usual, we first tranform the class labels into an integer format:"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": 3,
174 |    "metadata": {},
175 |    "outputs": [],
176 |    "source": [
177 |     "X = df.drop('Species', axis=1)\n",
178 |     "y = df['Species']\n",
179 |     "\n",
180 |     "label_dict = {'Iris-setosa': 0,\n",
181 |     "              'Iris-versicolor': 1,\n",
182 |     "              'Iris-virginica': 2}\n",
183 |     "\n",
184 |     "y = y.map(label_dict)"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "markdown",
189 |    "metadata": {},
190 |    "source": [
191 |     "- Next, we are going to set up a `Pipeline` that performs certain preprocessing steps only on the numerical features:"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": 4,
197 |    "metadata": {},
198 |    "outputs": [],
199 |    "source": [
200 |     "numeric_features = ['SepalLength[cm]', 'SepalWidth[cm]', 'PetalLength[cm]', 'PetalWidth[cm]']\n",
201 |     "\n",
202 |     "numeric_transformer = Pipeline(steps=[\n",
203 |     "    ('scaler', StandardScaler()),\n",
204 |     "    ('feature_extraction', PCA(n_components=2))])"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "markdown",
209 |    "metadata": {},
210 |    "source": [
211 |     "- Above, we weren't interested in performing these preprocessing steps on the categorical feature(s); instead, we apply **different** preprocessing steps to the categorical variable like so:"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "code",
216 |    "execution_count": 5,
217 |    "metadata": {},
218 |    "outputs": [],
219 |    "source": [
220 |     "categorical_features = ['Color_IMadeThisUp']\n",
221 |     "categorical_transformer = Pipeline(steps=[\n",
222 |     "    ('onehot', OneHotEncoder())])"
223 |    ]
224 |   },
225 |   {
226 |    "cell_type": "markdown",
227 |    "metadata": {},
228 |    "source": [
229 |     "- Scikit-learn's `ColumnTransformer` now allows us to merge these 2 seperate preprocessing pipelines, which operate on different feature sets in our dataset:"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "code",
234 |    "execution_count": 6,
235 |    "metadata": {},
236 |    "outputs": [],
237 |    "source": [
238 |     "preprocessor = ColumnTransformer(\n",
239 |     "    transformers=[\n",
240 |     "        ('num', numeric_transformer, numeric_features),\n",
241 |     "        ('cat', categorical_transformer, categorical_features)])"
242 |    ]
243 |   },
244 |   {
245 |    "cell_type": "markdown",
246 |    "metadata": {},
247 |    "source": [
248 |     "- As a result, we get a 5 dimensional feature array (design matrix) if we apply this preprocessor. What are these 5 columns?"
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "code",
253 |    "execution_count": 7,
254 |    "metadata": {},
255 |    "outputs": [
256 |     {
257 |      "data": {
258 |       "text/plain": [
259 |        "(150, 5)"
260 |       ]
261 |      },
262 |      "execution_count": 7,
263 |      "metadata": {},
264 |      "output_type": "execute_result"
265 |     }
266 |    ],
267 |    "source": [
268 |     "temp = preprocessor.fit_transform(X)\n",
269 |     "temp.shape"
270 |    ]
271 |   },
272 |   {
273 |    "cell_type": "code",
274 |    "execution_count": 8,
275 |    "metadata": {},
276 |    "outputs": [
277 |     {
278 |      "data": {
279 |       "text/plain": [
280 |        "array([[-2.26454173,  0.5057039 ,  0.        ,  1.        ,  0.        ],\n",
281 |        "       [-2.0864255 , -0.65540473,  0.        ,  1.        ,  0.        ],\n",
282 |        "       [-2.36795045, -0.31847731,  0.        ,  1.        ,  0.        ],\n",
283 |        "       [-2.30419716, -0.57536771,  0.        ,  1.        ,  0.        ],\n",
284 |        "       [-2.38877749,  0.6747674 ,  0.        ,  1.        ,  0.        ]])"
285 |       ]
286 |      },
287 |      "execution_count": 8,
288 |      "metadata": {},
289 |      "output_type": "execute_result"
290 |     }
291 |    ],
292 |    "source": [
293 |     "temp[:5]"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "markdown",
298 |    "metadata": {},
299 |    "source": [
300 |     "- The preprocessor can now also be conveniently be used in a Scikit-learn pipeline as shown below:"
301 |    ]
302 |   },
303 |   {
304 |    "cell_type": "code",
305 |    "execution_count": 9,
306 |    "metadata": {},
307 |    "outputs": [],
308 |    "source": [
309 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, \n",
310 |     "                                                    test_size=0.2,\n",
311 |     "                                                    random_state=0)"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "code",
316 |    "execution_count": 10,
317 |    "metadata": {},
318 |    "outputs": [
319 |     {
320 |      "name": "stdout",
321 |      "output_type": "stream",
322 |      "text": [
323 |       "Test accuracy: 100.0%\n"
324 |      ]
325 |     }
326 |    ],
327 |    "source": [
328 |     "clf = Pipeline(steps=[('preprocessor', preprocessor),\n",
329 |     "                      ('classifier', KNeighborsClassifier(p=3))])\n",
330 |     "\n",
331 |     "\n",
332 |     "clf.fit(X_train, y_train)\n",
333 |     "print(f'Test accuracy: {clf.score(X_test, y_test)*100}%')"
334 |    ]
335 |   }
336 |  ],
337 |  "metadata": {
338 |   "kernelspec": {
339 |    "display_name": "Python 3",
340 |    "language": "python",
341 |    "name": "python3"
342 |   },
343 |   "language_info": {
344 |    "codemirror_mode": {
345 |     "name": "ipython",
346 |     "version": 3
347 |    },
348 |    "file_extension": ".py",
349 |    "mimetype": "text/x-python",
350 |    "name": "python",
351 |    "nbconvert_exporter": "python",
352 |    "pygments_lexer": "ipython3",
353 |    "version": "3.8.3"
354 |   }
355 |  },
356 |  "nbformat": 4,
357 |  "nbformat_minor": 4
358 | }
359 | 


--------------------------------------------------------------------------------
/L05/code/05-preprocessing-and-sklearn__slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L05/code/05-preprocessing-and-sklearn__slides.pdf


--------------------------------------------------------------------------------
/L05/code/data/categoricaldata.csv:
--------------------------------------------------------------------------------
1 | color,size,price,classlabel
2 | green,M,10.1,class1
3 | red,L,13.5,class2
4 | blue,XXL,15.3,class1


--------------------------------------------------------------------------------
/L05/code/data/iris.csv:
--------------------------------------------------------------------------------
  1 | Id,SepalLength[cm],SepalWidth[cm],PetalLength[cm],PetalWidth[cm],Species
  2 | 1,5.1,3.5,1.4,0.2,Iris-setosa
  3 | 2,4.9,3.0,1.4,0.2,Iris-setosa
  4 | 3,4.7,3.2,1.3,0.2,Iris-setosa
  5 | 4,4.6,3.1,1.5,0.2,Iris-setosa
  6 | 5,5.0,3.6,1.4,0.2,Iris-setosa
  7 | 6,5.4,3.9,1.7,0.4,Iris-setosa
  8 | 7,4.6,3.4,1.4,0.3,Iris-setosa
  9 | 8,5.0,3.4,1.5,0.2,Iris-setosa
 10 | 9,4.4,2.9,1.4,0.2,Iris-setosa
 11 | 10,4.9,3.1,1.5,0.1,Iris-setosa
 12 | 11,5.4,3.7,1.5,0.2,Iris-setosa
 13 | 12,4.8,3.4,1.6,0.2,Iris-setosa
 14 | 13,4.8,3.0,1.4,0.1,Iris-setosa
 15 | 14,4.3,3.0,1.1,0.1,Iris-setosa
 16 | 15,5.8,4.0,1.2,0.2,Iris-setosa
 17 | 16,5.7,4.4,1.5,0.4,Iris-setosa
 18 | 17,5.4,3.9,1.3,0.4,Iris-setosa
 19 | 18,5.1,3.5,1.4,0.3,Iris-setosa
 20 | 19,5.7,3.8,1.7,0.3,Iris-setosa
 21 | 20,5.1,3.8,1.5,0.3,Iris-setosa
 22 | 21,5.4,3.4,1.7,0.2,Iris-setosa
 23 | 22,5.1,3.7,1.5,0.4,Iris-setosa
 24 | 23,4.6,3.6,1.0,0.2,Iris-setosa
 25 | 24,5.1,3.3,1.7,0.5,Iris-setosa
 26 | 25,4.8,3.4,1.9,0.2,Iris-setosa
 27 | 26,5.0,3.0,1.6,0.2,Iris-setosa
 28 | 27,5.0,3.4,1.6,0.4,Iris-setosa
 29 | 28,5.2,3.5,1.5,0.2,Iris-setosa
 30 | 29,5.2,3.4,1.4,0.2,Iris-setosa
 31 | 30,4.7,3.2,1.6,0.2,Iris-setosa
 32 | 31,4.8,3.1,1.6,0.2,Iris-setosa
 33 | 32,5.4,3.4,1.5,0.4,Iris-setosa
 34 | 33,5.2,4.1,1.5,0.1,Iris-setosa
 35 | 34,5.5,4.2,1.4,0.2,Iris-setosa
 36 | 35,4.9,3.1,1.5,0.1,Iris-setosa
 37 | 36,5.0,3.2,1.2,0.2,Iris-setosa
 38 | 37,5.5,3.5,1.3,0.2,Iris-setosa
 39 | 38,4.9,3.1,1.5,0.1,Iris-setosa
 40 | 39,4.4,3.0,1.3,0.2,Iris-setosa
 41 | 40,5.1,3.4,1.5,0.2,Iris-setosa
 42 | 41,5.0,3.5,1.3,0.3,Iris-setosa
 43 | 42,4.5,2.3,1.3,0.3,Iris-setosa
 44 | 43,4.4,3.2,1.3,0.2,Iris-setosa
 45 | 44,5.0,3.5,1.6,0.6,Iris-setosa
 46 | 45,5.1,3.8,1.9,0.4,Iris-setosa
 47 | 46,4.8,3.0,1.4,0.3,Iris-setosa
 48 | 47,5.1,3.8,1.6,0.2,Iris-setosa
 49 | 48,4.6,3.2,1.4,0.2,Iris-setosa
 50 | 49,5.3,3.7,1.5,0.2,Iris-setosa
 51 | 50,5.0,3.3,1.4,0.2,Iris-setosa
 52 | 51,7.0,3.2,4.7,1.4,Iris-versicolor
 53 | 52,6.4,3.2,4.5,1.5,Iris-versicolor
 54 | 53,6.9,3.1,4.9,1.5,Iris-versicolor
 55 | 54,5.5,2.3,4.0,1.3,Iris-versicolor
 56 | 55,6.5,2.8,4.6,1.5,Iris-versicolor
 57 | 56,5.7,2.8,4.5,1.3,Iris-versicolor
 58 | 57,6.3,3.3,4.7,1.6,Iris-versicolor
 59 | 58,4.9,2.4,3.3,1.0,Iris-versicolor
 60 | 59,6.6,2.9,4.6,1.3,Iris-versicolor
 61 | 60,5.2,2.7,3.9,1.4,Iris-versicolor
 62 | 61,5.0,2.0,3.5,1.0,Iris-versicolor
 63 | 62,5.9,3.0,4.2,1.5,Iris-versicolor
 64 | 63,6.0,2.2,4.0,1.0,Iris-versicolor
 65 | 64,6.1,2.9,4.7,1.4,Iris-versicolor
 66 | 65,5.6,2.9,3.6,1.3,Iris-versicolor
 67 | 66,6.7,3.1,4.4,1.4,Iris-versicolor
 68 | 67,5.6,3.0,4.5,1.5,Iris-versicolor
 69 | 68,5.8,2.7,4.1,1.0,Iris-versicolor
 70 | 69,6.2,2.2,4.5,1.5,Iris-versicolor
 71 | 70,5.6,2.5,3.9,1.1,Iris-versicolor
 72 | 71,5.9,3.2,4.8,1.8,Iris-versicolor
 73 | 72,6.1,2.8,4.0,1.3,Iris-versicolor
 74 | 73,6.3,2.5,4.9,1.5,Iris-versicolor
 75 | 74,6.1,2.8,4.7,1.2,Iris-versicolor
 76 | 75,6.4,2.9,4.3,1.3,Iris-versicolor
 77 | 76,6.6,3.0,4.4,1.4,Iris-versicolor
 78 | 77,6.8,2.8,4.8,1.4,Iris-versicolor
 79 | 78,6.7,3.0,5.0,1.7,Iris-versicolor
 80 | 79,6.0,2.9,4.5,1.5,Iris-versicolor
 81 | 80,5.7,2.6,3.5,1.0,Iris-versicolor
 82 | 81,5.5,2.4,3.8,1.1,Iris-versicolor
 83 | 82,5.5,2.4,3.7,1.0,Iris-versicolor
 84 | 83,5.8,2.7,3.9,1.2,Iris-versicolor
 85 | 84,6.0,2.7,5.1,1.6,Iris-versicolor
 86 | 85,5.4,3.0,4.5,1.5,Iris-versicolor
 87 | 86,6.0,3.4,4.5,1.6,Iris-versicolor
 88 | 87,6.7,3.1,4.7,1.5,Iris-versicolor
 89 | 88,6.3,2.3,4.4,1.3,Iris-versicolor
 90 | 89,5.6,3.0,4.1,1.3,Iris-versicolor
 91 | 90,5.5,2.5,4.0,1.3,Iris-versicolor
 92 | 91,5.5,2.6,4.4,1.2,Iris-versicolor
 93 | 92,6.1,3.0,4.6,1.4,Iris-versicolor
 94 | 93,5.8,2.6,4.0,1.2,Iris-versicolor
 95 | 94,5.0,2.3,3.3,1.0,Iris-versicolor
 96 | 95,5.6,2.7,4.2,1.3,Iris-versicolor
 97 | 96,5.7,3.0,4.2,1.2,Iris-versicolor
 98 | 97,5.7,2.9,4.2,1.3,Iris-versicolor
 99 | 98,6.2,2.9,4.3,1.3,Iris-versicolor
100 | 99,5.1,2.5,3.0,1.1,Iris-versicolor
101 | 100,5.7,2.8,4.1,1.3,Iris-versicolor
102 | 101,6.3,3.3,6.0,2.5,Iris-virginica
103 | 102,5.8,2.7,5.1,1.9,Iris-virginica
104 | 103,7.1,3.0,5.9,2.1,Iris-virginica
105 | 104,6.3,2.9,5.6,1.8,Iris-virginica
106 | 105,6.5,3.0,5.8,2.2,Iris-virginica
107 | 106,7.6,3.0,6.6,2.1,Iris-virginica
108 | 107,4.9,2.5,4.5,1.7,Iris-virginica
109 | 108,7.3,2.9,6.3,1.8,Iris-virginica
110 | 109,6.7,2.5,5.8,1.8,Iris-virginica
111 | 110,7.2,3.6,6.1,2.5,Iris-virginica
112 | 111,6.5,3.2,5.1,2.0,Iris-virginica
113 | 112,6.4,2.7,5.3,1.9,Iris-virginica
114 | 113,6.8,3.0,5.5,2.1,Iris-virginica
115 | 114,5.7,2.5,5.0,2.0,Iris-virginica
116 | 115,5.8,2.8,5.1,2.4,Iris-virginica
117 | 116,6.4,3.2,5.3,2.3,Iris-virginica
118 | 117,6.5,3.0,5.5,1.8,Iris-virginica
119 | 118,7.7,3.8,6.7,2.2,Iris-virginica
120 | 119,7.7,2.6,6.9,2.3,Iris-virginica
121 | 120,6.0,2.2,5.0,1.5,Iris-virginica
122 | 121,6.9,3.2,5.7,2.3,Iris-virginica
123 | 122,5.6,2.8,4.9,2.0,Iris-virginica
124 | 123,7.7,2.8,6.7,2.0,Iris-virginica
125 | 124,6.3,2.7,4.9,1.8,Iris-virginica
126 | 125,6.7,3.3,5.7,2.1,Iris-virginica
127 | 126,7.2,3.2,6.0,1.8,Iris-virginica
128 | 127,6.2,2.8,4.8,1.8,Iris-virginica
129 | 128,6.1,3.0,4.9,1.8,Iris-virginica
130 | 129,6.4,2.8,5.6,2.1,Iris-virginica
131 | 130,7.2,3.0,5.8,1.6,Iris-virginica
132 | 131,7.4,2.8,6.1,1.9,Iris-virginica
133 | 132,7.9,3.8,6.4,2.0,Iris-virginica
134 | 133,6.4,2.8,5.6,2.2,Iris-virginica
135 | 134,6.3,2.8,5.1,1.5,Iris-virginica
136 | 135,6.1,2.6,5.6,1.4,Iris-virginica
137 | 136,7.7,3.0,6.1,2.3,Iris-virginica
138 | 137,6.3,3.4,5.6,2.4,Iris-virginica
139 | 138,6.4,3.1,5.5,1.8,Iris-virginica
140 | 139,6.0,3.0,4.8,1.8,Iris-virginica
141 | 140,6.9,3.1,5.4,2.1,Iris-virginica
142 | 141,6.7,3.1,5.6,2.4,Iris-virginica
143 | 142,6.9,3.1,5.1,2.3,Iris-virginica
144 | 143,5.8,2.7,5.1,1.9,Iris-virginica
145 | 144,6.8,3.2,5.9,2.3,Iris-virginica
146 | 145,6.7,3.3,5.7,2.5,Iris-virginica
147 | 146,6.7,3.0,5.2,2.3,Iris-virginica
148 | 147,6.3,2.5,5.0,1.9,Iris-virginica
149 | 148,6.5,3.0,5.2,2.0,Iris-virginica
150 | 149,6.2,3.4,5.4,2.3,Iris-virginica
151 | 150,5.9,3.0,5.1,1.8,Iris-virginica


--------------------------------------------------------------------------------
/L05/code/data/iris_mod.csv:
--------------------------------------------------------------------------------
  1 | Id,SepalLength[cm],SepalWidth[cm],PetalLength[cm],PetalWidth[cm],Color_IMadeThisUp,Species
  2 | 1,5.1,3.5,1.4,0.2,red,Iris-setosa
  3 | 2,4.9,3,1.4,0.2,red,Iris-setosa
  4 | 3,4.7,3.2,1.3,0.2,red,Iris-setosa
  5 | 4,4.6,3.1,1.5,0.2,red,Iris-setosa
  6 | 5,5,3.6,1.4,0.2,red,Iris-setosa
  7 | 6,5.4,3.9,1.7,0.4,red,Iris-setosa
  8 | 7,4.6,3.4,1.4,0.3,red,Iris-setosa
  9 | 8,5,3.4,1.5,0.2,blue,Iris-setosa
 10 | 9,4.4,2.9,1.4,0.2,red,Iris-setosa
 11 | 10,4.9,3.1,1.5,0.1,red,Iris-setosa
 12 | 11,5.4,3.7,1.5,0.2,blue,Iris-setosa
 13 | 12,4.8,3.4,1.6,0.2,red,Iris-setosa
 14 | 13,4.8,3,1.4,0.1,red,Iris-setosa
 15 | 14,4.3,3,1.1,0.1,red,Iris-setosa
 16 | 15,5.8,4,1.2,0.2,red,Iris-setosa
 17 | 16,5.7,4.4,1.5,0.4,red,Iris-setosa
 18 | 17,5.4,3.9,1.3,0.4,red,Iris-setosa
 19 | 18,5.1,3.5,1.4,0.3,red,Iris-setosa
 20 | 19,5.7,3.8,1.7,0.3,red,Iris-setosa
 21 | 20,5.1,3.8,1.5,0.3,blue,Iris-setosa
 22 | 21,5.4,3.4,1.7,0.2,red,Iris-setosa
 23 | 22,5.1,3.7,1.5,0.4,red,Iris-setosa
 24 | 23,4.6,3.6,1,0.2,red,Iris-setosa
 25 | 24,5.1,3.3,1.7,0.5,blue,Iris-setosa
 26 | 25,4.8,3.4,1.9,0.2,red,Iris-setosa
 27 | 26,5,3,1.6,0.2,red,Iris-setosa
 28 | 27,5,3.4,1.6,0.4,red,Iris-setosa
 29 | 28,5.2,3.5,1.5,0.2,red,Iris-setosa
 30 | 29,5.2,3.4,1.4,0.2,red,Iris-setosa
 31 | 30,4.7,3.2,1.6,0.2,violet,Iris-setosa
 32 | 31,4.8,3.1,1.6,0.2,red,Iris-setosa
 33 | 32,5.4,3.4,1.5,0.4,red,Iris-setosa
 34 | 33,5.2,4.1,1.5,0.1,red,Iris-setosa
 35 | 34,5.5,4.2,1.4,0.2,red,Iris-setosa
 36 | 35,4.9,3.1,1.5,0.1,red,Iris-setosa
 37 | 36,5,3.2,1.2,0.2,violet,Iris-setosa
 38 | 37,5.5,3.5,1.3,0.2,red,Iris-setosa
 39 | 38,4.9,3.1,1.5,0.1,red,Iris-setosa
 40 | 39,4.4,3,1.3,0.2,red,Iris-setosa
 41 | 40,5.1,3.4,1.5,0.2,red,Iris-setosa
 42 | 41,5,3.5,1.3,0.3,red,Iris-setosa
 43 | 42,4.5,2.3,1.3,0.3,red,Iris-setosa
 44 | 43,4.4,3.2,1.3,0.2,red,Iris-setosa
 45 | 44,5,3.5,1.6,0.6,red,Iris-setosa
 46 | 45,5.1,3.8,1.9,0.4,red,Iris-setosa
 47 | 46,4.8,3,1.4,0.3,red,Iris-setosa
 48 | 47,5.1,3.8,1.6,0.2,red,Iris-setosa
 49 | 48,4.6,3.2,1.4,0.2,red,Iris-setosa
 50 | 49,5.3,3.7,1.5,0.2,red,Iris-setosa
 51 | 50,5,3.3,1.4,0.2,red,Iris-setosa
 52 | 51,7,3.2,4.7,1.4,blue,Iris-versicolor
 53 | 52,6.4,3.2,4.5,1.5,blue,Iris-versicolor
 54 | 53,6.9,3.1,4.9,1.5,blue,Iris-versicolor
 55 | 54,5.5,2.3,4,1.3,blue,Iris-versicolor
 56 | 55,6.5,2.8,4.6,1.5,blue,Iris-versicolor
 57 | 56,5.7,2.8,4.5,1.3,blue,Iris-versicolor
 58 | 57,6.3,3.3,4.7,1.6,blue,Iris-versicolor
 59 | 58,4.9,2.4,3.3,1,blue,Iris-versicolor
 60 | 59,6.6,2.9,4.6,1.3,blue,Iris-versicolor
 61 | 60,5.2,2.7,3.9,1.4,blue,Iris-versicolor
 62 | 61,5,2,3.5,1,blue,Iris-versicolor
 63 | 62,5.9,3,4.2,1.5,blue,Iris-versicolor
 64 | 63,6,2.2,4,1,blue,Iris-versicolor
 65 | 64,6.1,2.9,4.7,1.4,blue,Iris-versicolor
 66 | 65,5.6,2.9,3.6,1.3,blue,Iris-versicolor
 67 | 66,6.7,3.1,4.4,1.4,red,Iris-versicolor
 68 | 67,5.6,3,4.5,1.5,blue,Iris-versicolor
 69 | 68,5.8,2.7,4.1,1,blue,Iris-versicolor
 70 | 69,6.2,2.2,4.5,1.5,blue,Iris-versicolor
 71 | 70,5.6,2.5,3.9,1.1,violet,Iris-versicolor
 72 | 71,5.9,3.2,4.8,1.8,blue,Iris-versicolor
 73 | 72,6.1,2.8,4,1.3,blue,Iris-versicolor
 74 | 73,6.3,2.5,4.9,1.5,blue,Iris-versicolor
 75 | 74,6.1,2.8,4.7,1.2,blue,Iris-versicolor
 76 | 75,6.4,2.9,4.3,1.3,blue,Iris-versicolor
 77 | 76,6.6,3,4.4,1.4,blue,Iris-versicolor
 78 | 77,6.8,2.8,4.8,1.4,blue,Iris-versicolor
 79 | 78,6.7,3,5,1.7,blue,Iris-versicolor
 80 | 79,6,2.9,4.5,1.5,blue,Iris-versicolor
 81 | 80,5.7,2.6,3.5,1,violet,Iris-versicolor
 82 | 81,5.5,2.4,3.8,1.1,blue,Iris-versicolor
 83 | 82,5.5,2.4,3.7,1,red,Iris-versicolor
 84 | 83,5.8,2.7,3.9,1.2,blue,Iris-versicolor
 85 | 84,6,2.7,5.1,1.6,blue,Iris-versicolor
 86 | 85,5.4,3,4.5,1.5,blue,Iris-versicolor
 87 | 86,6,3.4,4.5,1.6,blue,Iris-versicolor
 88 | 87,6.7,3.1,4.7,1.5,blue,Iris-versicolor
 89 | 88,6.3,2.3,4.4,1.3,violet,Iris-versicolor
 90 | 89,5.6,3,4.1,1.3,blue,Iris-versicolor
 91 | 90,5.5,2.5,4,1.3,blue,Iris-versicolor
 92 | 91,5.5,2.6,4.4,1.2,blue,Iris-versicolor
 93 | 92,6.1,3,4.6,1.4,blue,Iris-versicolor
 94 | 93,5.8,2.6,4,1.2,violet,Iris-versicolor
 95 | 94,5,2.3,3.3,1,blue,Iris-versicolor
 96 | 95,5.6,2.7,4.2,1.3,violet,Iris-versicolor
 97 | 96,5.7,3,4.2,1.2,blue,Iris-versicolor
 98 | 97,5.7,2.9,4.2,1.3,blue,Iris-versicolor
 99 | 98,6.2,2.9,4.3,1.3,blue,Iris-versicolor
100 | 99,5.1,2.5,3,1.1,blue,Iris-versicolor
101 | 100,5.7,2.8,4.1,1.3,blue,Iris-versicolor
102 | 101,6.3,3.3,6,2.5,violet,Iris-virginica
103 | 102,5.8,2.7,5.1,1.9,violet,Iris-virginica
104 | 103,7.1,3,5.9,2.1,violet,Iris-virginica
105 | 104,6.3,2.9,5.6,1.8,violet,Iris-virginica
106 | 105,6.5,3,5.8,2.2,violet,Iris-virginica
107 | 106,7.6,3,6.6,2.1,violet,Iris-virginica
108 | 107,4.9,2.5,4.5,1.7,violet,Iris-virginica
109 | 108,7.3,2.9,6.3,1.8,violet,Iris-virginica
110 | 109,6.7,2.5,5.8,1.8,violet,Iris-virginica
111 | 110,7.2,3.6,6.1,2.5,violet,Iris-virginica
112 | 111,6.5,3.2,5.1,2,violet,Iris-virginica
113 | 112,6.4,2.7,5.3,1.9,violet,Iris-virginica
114 | 113,6.8,3,5.5,2.1,violet,Iris-virginica
115 | 114,5.7,2.5,5,2,violet,Iris-virginica
116 | 115,5.8,2.8,5.1,2.4,violet,Iris-virginica
117 | 116,6.4,3.2,5.3,2.3,violet,Iris-virginica
118 | 117,6.5,3,5.5,1.8,violet,Iris-virginica
119 | 118,7.7,3.8,6.7,2.2,violet,Iris-virginica
120 | 119,7.7,2.6,6.9,2.3,violet,Iris-virginica
121 | 120,6,2.2,5,1.5,violet,Iris-virginica
122 | 121,6.9,3.2,5.7,2.3,blue,Iris-virginica
123 | 122,5.6,2.8,4.9,2,violet,Iris-virginica
124 | 123,7.7,2.8,6.7,2,violet,Iris-virginica
125 | 124,6.3,2.7,4.9,1.8,violet,Iris-virginica
126 | 125,6.7,3.3,5.7,2.1,blue,Iris-virginica
127 | 126,7.2,3.2,6,1.8,violet,Iris-virginica
128 | 127,6.2,2.8,4.8,1.8,violet,Iris-virginica
129 | 128,6.1,3,4.9,1.8,violet,Iris-virginica
130 | 129,6.4,2.8,5.6,2.1,blue,Iris-virginica
131 | 130,7.2,3,5.8,1.6,violet,Iris-virginica
132 | 131,7.4,2.8,6.1,1.9,violet,Iris-virginica
133 | 132,7.9,3.8,6.4,2,violet,Iris-virginica
134 | 133,6.4,2.8,5.6,2.2,violet,Iris-virginica
135 | 134,6.3,2.8,5.1,1.5,red,Iris-virginica
136 | 135,6.1,2.6,5.6,1.4,violet,Iris-virginica
137 | 136,7.7,3,6.1,2.3,violet,Iris-virginica
138 | 137,6.3,3.4,5.6,2.4,violet,Iris-virginica
139 | 138,6.4,3.1,5.5,1.8,violet,Iris-virginica
140 | 139,6,3,4.8,1.8,blue,Iris-virginica
141 | 140,6.9,3.1,5.4,2.1,violet,Iris-virginica
142 | 141,6.7,3.1,5.6,2.4,violet,Iris-virginica
143 | 142,6.9,3.1,5.1,2.3,violet,Iris-virginica
144 | 143,5.8,2.7,5.1,1.9,violet,Iris-virginica
145 | 144,6.8,3.2,5.9,2.3,violet,Iris-virginica
146 | 145,6.7,3.3,5.7,2.5,violet,Iris-virginica
147 | 146,6.7,3,5.2,2.3,violet,Iris-virginica
148 | 147,6.3,2.5,5,1.9,violet,Iris-virginica
149 | 148,6.5,3,5.2,2,blue,Iris-virginica
150 | 149,6.2,3.4,5.4,2.3,violet,Iris-virginica
151 | 150,5.9,3,5.1,1.8,red,Iris-virginica


--------------------------------------------------------------------------------
/L05/code/data/missingdata.csv:
--------------------------------------------------------------------------------
1 | A,B,C,D
2 | 1.,2.,3.,4.
3 | 5.,6.,,8.
4 | 10.,11.,12.,
5 | 


--------------------------------------------------------------------------------
/L05/code/images/decisionreg.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L05/code/images/decisionreg.pdf


--------------------------------------------------------------------------------
/L05/code/images/eda.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L05/code/images/eda.pdf


--------------------------------------------------------------------------------
/L05/code/images/estimator-api.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L05/code/images/estimator-api.pdf


--------------------------------------------------------------------------------
/L05/code/images/estimator-api.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L05/code/images/estimator-api.png


--------------------------------------------------------------------------------
/L05/code/images/holdout-tuning.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L05/code/images/holdout-tuning.pdf


--------------------------------------------------------------------------------
/L05/code/images/holdout-tuning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L05/code/images/holdout-tuning.png


--------------------------------------------------------------------------------
/L05/code/images/iris-subsampling.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L05/code/images/iris-subsampling.pdf


--------------------------------------------------------------------------------
/L05/code/images/iris-subsampling.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L05/code/images/iris-subsampling.png


--------------------------------------------------------------------------------
/L05/code/images/sklearn-pipeline.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L05/code/images/sklearn-pipeline.pdf


--------------------------------------------------------------------------------
/L05/code/images/sklearn-pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L05/code/images/sklearn-pipeline.png


--------------------------------------------------------------------------------
/L05/code/images/transformer-api.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L05/code/images/transformer-api.pdf


--------------------------------------------------------------------------------
/L05/code/images/transformer-api.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L05/code/images/transformer-api.png


--------------------------------------------------------------------------------
/L06/06-trees__notes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L06/06-trees__notes.pdf


--------------------------------------------------------------------------------
/L06/06-trees__slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L06/06-trees__slides.pdf


--------------------------------------------------------------------------------
/L06/code/06-trees_demo.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# STAT451: Machine Learning -- L06: Decision Trees in Scikit-Learn Demo"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "STAT 451: Machine Learning (Fall 2020)  \n",
 15 |     "Instructor: Sebastian Raschka (sraschka@wisc.edu)  \n",
 16 |     "\n",
 17 |     "Course website: http://stat.wisc.edu/~sraschka/teaching/stat451-fs2020/  "
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 1,
 23 |    "metadata": {},
 24 |    "outputs": [
 25 |     {
 26 |      "name": "stdout",
 27 |      "output_type": "stream",
 28 |      "text": [
 29 |       "Sebastian Raschka \n",
 30 |       "last updated: 2020-10-15 \n",
 31 |       "\n",
 32 |       "CPython 3.8.2\n",
 33 |       "IPython 7.18.1\n",
 34 |       "\n",
 35 |       "numpy 1.19.1\n",
 36 |       "scipy 1.5.0\n",
 37 |       "matplotlib 3.3.1\n",
 38 |       "sklearn 0.23.2\n"
 39 |      ]
 40 |     }
 41 |    ],
 42 |    "source": [
 43 |     "%load_ext watermark\n",
 44 |     "%watermark  -d -u -a 'Sebastian Raschka' -v -p numpy,scipy,matplotlib,sklearn"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 2,
 50 |    "metadata": {},
 51 |    "outputs": [
 52 |     {
 53 |      "name": "stdout",
 54 |      "output_type": "stream",
 55 |      "text": [
 56 |       "Class labels: [0 1 2]\n"
 57 |      ]
 58 |     }
 59 |    ],
 60 |    "source": [
 61 |     "from sklearn import datasets\n",
 62 |     "import numpy as np\n",
 63 |     "\n",
 64 |     "\n",
 65 |     "iris = datasets.load_iris()\n",
 66 |     "X = iris.data[:, [2, 3]]\n",
 67 |     "y = iris.target\n",
 68 |     "\n",
 69 |     "print('Class labels:', np.unique(y))"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 3,
 75 |    "metadata": {},
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "from sklearn.model_selection import train_test_split\n",
 79 |     "\n",
 80 |     "\n",
 81 |     "X_train, X_test, y_train, y_test = train_test_split(\n",
 82 |     "    X, y, test_size=0.3, random_state=1, stratify=y)"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 4,
 88 |    "metadata": {},
 89 |    "outputs": [
 90 |     {
 91 |      "name": "stdout",
 92 |      "output_type": "stream",
 93 |      "text": [
 94 |       "Labels counts in y: [50 50 50]\n",
 95 |       "Labels counts in y_train: [35 35 35]\n",
 96 |       "Labels counts in y_test: [15 15 15]\n"
 97 |      ]
 98 |     }
 99 |    ],
100 |    "source": [
101 |     "print('Labels counts in y:', np.bincount(y))\n",
102 |     "print('Labels counts in y_train:', np.bincount(y_train))\n",
103 |     "print('Labels counts in y_test:', np.bincount(y_test))"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 7,
109 |    "metadata": {},
110 |    "outputs": [
111 |     {
112 |      "data": {
113 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAEYCAYAAAAJeGK1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAuUUlEQVR4nO3de3xU9bX38c/KBQKEmwJyVVAoWnwUFLEWq55avPIItR7Ea+3lUK22tbWPx9o+p7WnzzmeHuurVqxIvaJWREWkVEWOrRdoVZByEUTuGi4CieESIECS9fwxOyEJk2QnmZk9M/m+X695Zfae3/z2mtRmsfdes37m7oiIiKSbnKgDEBERiUcJSkRE0pISlIiIpCUlKBERSUtKUCIikpaUoEREJC1FlqDMrMDM3jOzpWa2wszuijPmPDPbZWZLgse/RRGriIikXl6Exz4AfNndy8wsH5hvZq+4+zv1xr3t7mMjiE9ERCIUWYLy2DeEy4LN/OChbw2LiAgQ7RkUZpYLvA8MBh5w93fjDDvLzJYCW4Afu/uKBuaaBEwCeOhXPzx90kSddIkk2//507OcPuaLUYchGW7isK9bvP2RJih3rwSGm1k34EUzO9ndP6g1ZDFwXHAZ8BJgFjCkgbmmAlMB+Nv9zra4eUxEEmjDB+uVoCRp0qKKz913Am8AF9Xbv9vdy4LnLwP5ZtYj5QGKiEjKRVnF1zM4c8LMOgBfAVbVG9PbzCx4PopYvCUpDlVERCIQ5SW+PsATwX2oHGCGu88xsxsB3H0KcAVwk5lVAPuBia726yIibUKUVXzLgBFx9k+p9XwyMDkRxztEPpsKPkd5TqdETJcUBVV76V++mnwORR2KiEjkIi2SSKVNBZ+jc8/jGNitE8FVw7Ti7pTs3MumHTCoXAUeIiJpUSSRCuU5nTg6TZMTgJlxdLdOaX2GJyKSSm0mQQFpm5yqpXt8IiKp1KYSlIiIZA4lqBR6dd5fGDpiNINP/QJ3/+b+qMMREUlrSlApUllZyc23/YRXZv6RlQvf4pnnX2Tlqo+iDktEJG21mSq+5hg1ZjzFpbuP2N+jexfemzerRXO+t+gfDD5+EMcPOg6AiV8bz0tz5vL5E4e2JlQRkaylBBVHceluht105NevVjx4S4vn3Lx1KwP69a3Z7t+vD+8uWtzi+UREsp0u8aVIvAYYqtoTEWmYElSK9O/bl6LNW2q2N23eSt/evSOMSEQkvSlBpcgZpw9nzbr1bNj4MQcPHmT6C7O47NILog5LRCRt6R5UiuTl5TH5nv/gwvFXUVlVyTevu4phJ50YdVgiImlLCSqOHt27xC2I6NG9S6vmveTCr3DJhV9p1RwiIm2FElQcLS0lFxGRxNE9KBERSUtKUCIikpaUoEREJC1FlqDMrMDM3jOzpWa2wszuijPGzOx3ZrbWzJaZ2WlRxCoiIqkXZZHEAeDL7l5mZvnAfDN7xd3fqTXmYmBI8DgTeDD4KSIiWS6yMyiPKQs284NH/X5A44Bpwdh3gG5m1ieVcSbSN2+6lV6DhnHyqHOjDkVEJO1Feg/KzHLNbAmwHZjn7u/WG9IPKKq1vSnYl5FuuOZKXn3xmajDEBHJCJEmKHevdPfhQH9glJmdXG9IvG6qR3ZdBcxskpktMrNFU19akJD4iotL+NqV11BS8llC5jvn7LM4qnu3hMwlIpLt0qKKz913Am8AF9V7aRMwoNZ2f2ALcbj7VHcf6e4jJ40bnZC4pj31FKUblvLEk08mZD4REQkvyiq+nmbWLXjeAfgKsKresNnA9UE13xeAXe6+NRXxFReXMOfF53lwwgDmvPh8ws6iREQknCjPoPoAfzWzZcBCYveg5pjZjWZ2YzDmZWA9sBb4A/DdVAU37amnGHsCDD2mA2NPQGdRIiIpFlmZubsvA0bE2T+l1nMHbk5lXHD47GnGxG4AXH9GNyZMf56vX3cdRx99VKrDERFpk9LiHlS6qT576lGYD8R+JuIs6qpv3MhZ54/lozXr6D90BI888cdEhCsikpXUzTyON958my1F5fxxWVGd/X1L3+ZHt/6gxfM+89iUpgeJiAigBBXX7BefjzoEEZE2T5f4REQkLSlBiYhIWlKCEhGRtKQEJSIiaUkJSkRE0pISVIoUbdrMP11yOSed/iWGnXEO9/3+D1GHJCKS1lRmniJ5eXn85j9+wWnDT2HPnjJO/9IFjPnyOXz+xKFRhyYikpZ0BtWAV+b9hQuvnMDnzjqLC6+cwCvz/tKq+fr0PobThp8CQOfOhZw0dAibt3yaiFBFRLKSzqDieGXeX7j9N7/g+K/24+xBp1KyYRe3/+YXAFw85sutnn/jx5/wj2UfcObI01o9l4hIttIZVBy/fXgKx3+1Hz0HdycnN4eeg7tz/Ff78duHW9+qqKxsL1+79tv89u5f0qVL5wREKyKSnZSg4tjwSRFHD+paZ9/Rg7qy4ZOiBt4RzqFDh/jatd/imgmXc/m4S1s1l4hItlOCimPQsQMo2bCrzr6SDbsYdOyABt7RNHfnWzf/kJOGDuFH37ux6TeIiLRxSlBx3PrtG1n/4mZ2rC2lqrKKHWtLWf/iZm79dssTy4K/v8eTzzzPX96cz/Avns/wL57Py3P/J4FRi4hkFxVJxFFdCPHbh6cw/5OlDDp2AL++7RetKpA4+4tn4ntUtSciEpYSVAMuHvPlhFTsiYhIy0SWoMxsADAN6A1UAVPd/b56Y84DXgI2BLtmuvsvUximiLRhS+Yv49UZr7FjSzE9+/bgc8OGsHrFmprtiyZcwPCzT4k6zKwV5RlUBXCbuy82s87A+2Y2z91X1hv3truPTcQB3R0zS8RUSeHuUYcgIoEl85fx7GMzGDi+DwMHnsS2lSXMfXoux53bjzP+5SR2bSzj2cdmAChJJUlkRRLuvtXdFwfP9wAfAv2SdbyCqr2U7NybtknA3SnZuZeCqr1RhyIiwKszXmPg+D50P6ELObk5WNcqBk3sQ/HKUnJyc+h+QhcGju/DqzNeizrUrJUW96DMbCAwAng3zstnmdlSYAvwY3df0cAck4BJAA/dfiWTxo2u83r/8tVs2gE7SjolMvSEKqjaS//y1VGHISLAji3FDBx4Us12xaFDdB1cyMbp22r2dR1YyOotrft+pDQs8gRlZoXAC8Ct7r673suLgePcvczMLgFmAUPizePuU4GpAPzt/iNOk/I5xKDyuLlNROQIPfv2YNfGMrqf0AWAvPx8dq0to2PPgpoxuzaW0bNvj6hCzHqRJigzyyeWnJ5295n1X6+dsNz9ZTP7vZn1cPfiVMYpIolVv/ggkcUGMx96iXmzXqd8bzkFnQoYM/58Lv/OuGbPc9GEC2L3mMbHzpR8Vw4bpm/luHP7UVVZxa6NZWyctZUrvzEhIXHLkaKs4jPgEeBDd7+3gTG9gW3u7mY2itg9s5IUhikiCVa/+CCRxQYzH3qJuX+ey6Dr+tB1cCG71pYxd/pcgGYnqepYXp3xGqu3FNGzbw8uvPRCVq9Yw8K7PqRn3x5c+Y0JKpBIoijPoEYD1wHLzWxJsO9O4FgAd58CXAHcZGYVwH5goqdrlYOIhFK7+ACI/Rwf29/aP/bzZr3OoOv60P3EWCPm7id2hokw78nXW3QWNfzsU5SAIhRZgnL3+UCjNd/uPhmYnJqIRCQV6hcfQOKKDcr3ltN1cGHduQcXUr5XhQyZSL34RCSlqosPaktUsUFBpwJ2ra0399oyCjoVNPAOSWdKUCKSUhdNuICNs7ZSum43VZVVlK7bzcZZW7lowgWtnnvM+PPZMH0rpav2UFXhlK7aw4bpWxkz/vwERC6pFnmZuYi0LfGKD6or4e7+/j2taitUfZ9p3pOvU763iIJOBVw4/sJQ95/CVBYms/qwpdIxpkRRghKRlKtffJDItkKXf2dcswsiwlQWJrP6sKXSMaZE0iU+EYlc1G2F6h8/3vHCjEm1dIwpkZSgRCRyO7YU03Xg4eq76rZC+3aU1+zrOrCQHVuS8x39+sePd7wwY1ItHWNKJCUoEYlc/cq+VLcVClNZmMzqw5ZKx5gSSfegRCRyiWwrFLZooPa49u3bs2P6dj438Ti6DiyMe7z6Maai1VFTnyWKmFJJCUpEIpeotkJhiwbijfvw8Q0UPb+D1XuL4h6voerDZBUjhPksqY4p1ZSgRCQtJKKtUNg2SvHGnXTDIErn7edXT/wiqTGGFfazZHM7Jt2DEpGsEbZoIBOKCzIhxmRTghKRrBG2aCATigsyIcZk0yU+EckYYYoGnnzwKY4+rzPte+ZyYEclJW/sYfQ5o4/oUrFw1sK0Ky5obuFGtlOCEpGMELYAorK8ik//WsKBskO0L8ynco/z1mtvc9INg2ret3DWQs4YdQar561Jm+KClhRuZDslKBHJCGGKBl6d8Ron3TCoZgzA2gUb+fSvJUe8b/W8Ndzxux+n+mM0qKWFG9lM96BEJCO0tNtD+565HCg71Oj70oGKIo6kBCUiGaGl3R4O7KikfWF+o+9LByqKOFJkl/jMbAAwDegNVAFT3f2+emMMuA+4BNgH3ODui1Mdq4gkVkuWiAjTNSHemJI39pBzII/SdbsjKzYI83njxf7h4xsoLCzktivuyLqlNMJoMEGZ2eUh3l/u7i+38NgVwG3uvtjMOgPvm9k8d19Za8zFwJDgcSbwYPBTRDJUS5eICNM1Id6Y6266tsn3JVPYz1s/9vbt25NbkMOAK3rWJKxsWkojDHP3+C+YlQAvAdbI+89x9xMSEojZS8Bkd59Xa99DwBvu/kyw/RFwnrtvbXSyv90f/0OJSEJd8ehsrvjhtc16z93fv4fuYzrUKWQoXbeb0nn706poIVFa+nnb0u9p4rCvx80zjV3ie8Xdv9nYpGb2VKuiOjzPQGAE8G69l/oBRbW2NwX7jkhQZjYJmATw0O1XMmnc6ESEJiIJtmNLMQMHnlRnX9eBhazeUtTAOzJbSz9vW/s9xdNgkYS7N/nPojBjmmJmhcALwK3uvrv+y/EO20AsU919pLuPVHISSV9trRigpZ+3rf2e4mmyis/Mcs3sMjP7vpn9qPqRiIObWT6x5PS0u8+MM2QTMKDWdn9gSyKOLSLRuGjCBWyctZXSdbupqqyidN1uNs7aykUTLog6tKRo6edta7+neMJU8f0JKAeWE6u2S4igQu8R4EN3v7eBYbOBW8xsOrHiiF1N3n8SkbRTv4qtkxXyzj1LqayoIjcvh5GjT8+IG/8tqT5s6ZIY2b6URhhhElR/d0/Gb2Q0cB2w3MyWBPvuBI4FcPcpwMvESszXEisz/0YS4hCRJKpfxfbRnzZQtOBTBn+zH0ed1IVda8tYNn0pMx96icu/My7qcBvU0urD6tdbkliyeSmNMMIkqFfM7AJ3fy2RB3b3+TReIYjHSgxvTuRxRSS16rfw2bHyM46/ug/tu7cjJ8/ofmJnmAjznnw9rRNU2PWZJHHCdJJ4B3jRzPab2W4z22Nm9YsZRETiqt/Cp/yzg3Q5oSNVFYfvGHQdXEj53vIowgtNrYhSL0yC+g1wFtDR3bu4e2d379LUm0RE4MhqtIKj2rF73T5y8g7/+dm1toyCTgVRhBeaqupSL8wlvjXAB97QN3pFpM36cMl6brzoe+zfs58OnTtwwVe/wvHDBtUpJKi/9lLPzx/F+j9uZdCEPnTsUcCutWVsmL6VC8df2KIYZj70EvNmvU753nIKOhVw8ohh7Cnb06xChoZofaZoNdhJomaA2ePA8cArwIHq/Y1U3kVPnSREku7fH3+VXz33GifeeCyFx7Wn7OMDrH18M526d2LY9SfU+SN+xqgzWL1iTU3S6FzYmQ/+saImqYwZf36L7j/NfOgl5v55LoMm9qHr4EKKXtvO9r+XcuKVA+k/ok+dJNLcJFW7KKJ+b7z9e8vbZG+8ZGlJJ4lqG4JHu+AhIsLkV96m+8md6XxCB9rlgp3QgXZH53P0OZ1StvbSvFmvM+i6PrFCC6Bs3T6Ov7oPFFaSk5vTqkIGrc8UvSYTlLvflYpARCSz7Nl7gB7dCsg1yDHINajYW0nBMXX/HZvM9jzle8vpOrhWAUZxrABj39aaiz0tPr5aDUUvTCeJeWbWrdZ2dzObm9SoRCTtFXZsx4HPDlJd65CXA3kdc9i/7WCdccksJCjoFLuHVbPdIyjAyDl8xailx1dRRPTCVPH1dPed1RvuXgr0SlpEIpIRTu/Xl50ryti5Zi+VFc7ONXs5UHKQbXNL47bn2V26h/u+dy97dpY1PXlIY8afz4bpWyldtYeqCqfwhI6s/+NW2JXb6vZAajUUvTD3oCrN7Fh3/wTAzI6jgYatItJ25OdA7gFYOWUzlZVObq5RaPkc1aE7pfP2H9Ge55VH55CzZSPzZ77Bxd8cm5AYqgsr5j35OuV7iyjoVMCIESPYs2gPC2d/2Kr2QGo1FL0wVXwXAVOBN4Nd5wCT3D19L/Opik8kJcKuB7W7dA9Tv/sfPDC2EzfP2ct3HvwpnbsVNvk+aRsaquJr8hKfu78KnAY8C8wATk/r5CQiaWfBi29y2eAchhzTnssG5zB/5htRhyQZoMEEZWa9q5+7e7G7z3H3P7l7cbwxIiLx7C7dw4p5C7jq9NgZ01WnF7Ji3oKE3ouS7NTYGdTLId4fZoyIZIjinWV87Y4plOzam7A5q8+eji6M3fI+ujCPywbn8PrTcxNeNNEcS+Yv4+7v38NtV9zB3d+/hyXzl0UShzSssSKJU5toCmuAmsaKZJFpf/4bpZ8W8cScBfzomsRUq616bwXvfXqAZ5Zvq7P/0NJ3OSa/PKFFE2G1ZukMSZ0GE5S756YyEBGJVvHOMua8uZAHL+/BTXMW8vWxozm6a6dWz/vDKf96xL7qool7x/bi5jkLOPvy81JaNKGlMzJDmO9BiUgbMO3Pf2Ps4ByG9mrP2ME5PDFnQdKOFXXRhJbOyAxKUCJSc/Z0/WmxM6brT+vEnDcXJvReVLV0KJpQl4jMEGmCMrNHzWy7mX3QwOvnmdkuM1sSPP4t1TGKtAXVZ089gkKGHoV5STuLaqhoIpVnUeoSkRnCdJLAzHKBY2qPr+4s0UqPA5OBaY2MedvdU3sHVSTDFe8s4zt3P8XUn1wX6j7SG4tXs2X7AR5euJmi4j0M6NGZjgX59N22+ohiidpzN2TLhq3897f+g9sf+yl9jutdZ12l8j17ydlfyW/ml3Cooor8vBy65LWj744VKSuWUJeIzNBkgjKz7wE/B7YB1Ws0O9Dq/yXd/S0zG9jaeUSkruZW483+zS0AjLjulww5ai9e0J5FT8a/YFF77oY8d8/T9M3dx4xfP8W5V11Qp2Ju28oSVj69luPOHcDxF/SvWbPpn669uGUftoWGn32KElKaC3OJ7wfAUHcf5u7/K3ik8n/Vs8xsqZm9YmbDUnhckYxUuxqvOfeR3lm+gdKSEh4d14HSkhLeW7mxybkPHDh0xJgtG7ay7YOP+MP4QrZ98BGzp82pqZjLyc3BulYxaGIfileW1qzZNHB8H16d8VprP7pkmTAJqgjYlexAGrAYOM7dTwXuB2Y1NNDMJpnZIjNbNPWl5FUfiaS7llbj3fTrJ7nmlHyG987jmlPy+c5/Hnnlvf7cn3y06Ygxz93zNFefnMcpffK4+uQ8PllTVKdiruLQIboOLmTfjvKafaqgk3gaa3X0IzP7EbAeeMPMflK9L9ifdO6+293LgucvA/lmFrfMxt2nuvtIdx85adzoVIQnknZaWo1XffZ08xmxxQZvPqPdEWdR8ebesXFrneq76rOnG0cVAHDjqAJyKg5RsmZnzZi8/Hx2rS2jY8+Cmn2qoJN4GjuD6hw8PgHmEVvuvXpfSr5RZ2a9zcyC56OIxVuSimOLpIPmth5qaTVe9dlT386x7+f37ZzLNafk861fPV5z/Oq5uxbksHbTDrp1yGHoUVan+q767Kl359iflt6dc7i0fw4rnlhbUzHnu3LYMH0rPT7fPSkVdGphlD0a6yRxF4CZ/bO7P1f7NTP750Qc3MyeAc4DepjZJmLFGPnB8acAVwA3mVkFsB+Y6E2tDyKSRZpb7FBdjffH5dvr7I9XjVdb0aelPLKtikcW172ndLCylJ7tDvHEnAU1c095Zxf795fTocN+PjtQxb73DlfffbxiPY8ePMij/6i7qq7ZwTprRF146YWsXrGGhXe1bs2m+tTCKLuEWQ9qsbuf1tS+tKL1oCQLFO8sY8Lt9/Hg2I7cNGcfz/33rQlpPdSa47t7nX2dRpzINXd8I2UxNeXu799D9zEdaloYAZSu203pvP3c8bsfRxiZNKah9aAaPIMys4uBS4B+Zva7Wi91ASoSG56I1Fe3IKE8oQ1cW3p8oO6+OEUSUdqxpZiBA0+qs6/rwEJWbymKKCJpjcbuQW0B3gfKg5/Vj9nAhckPTaTtSmXrobDHf+kv7/Hi6+82WiQRNbUwyi4NJih3X+rujwOD3f2JWo+Z7l6auhBFMl+iih0eeO6vR8zz0cfbGDT+TtYUHb7vVP948Y7f2L4HZ75xREHEuf0O0iO/vE5M9YskoqYWRtmlsUt8y4l1jCAopKsjxV/WFcloiSp2qKh6n6PyD9aZ544HnueovP3cfv9zvPjrm+MeL97xG9v3wqbt5FlVnYKI8oOHOFQJIx84HNPGzyrrFElETS2MsktjrY6q/4u7Ofj5ZPDzGmBf0iISyTItWWepuvVQ/XliBQpdauYp3lnG8lXrmDmhE5fPWMeaou1079yxzvHGfmn4Ecd39yb27WPKz27kxl/9gQfHHtNgkcYVj87mih9em9DfV2uphVH2aKzM/GMAMxvt7rW/+XqHmS0Afpns4ESyQaKKHeLN8/bSNbGuDb3zufrkPG6//zm+dOqQOuP+dfJzTRc7xNkX732pLNIQCdPqqJOZnV29YWZfBFJX6yqSwRJV7BBvnmfn/p3lq9ZxU9C14aZRBSxZuZaZ//NOzbhLhxawfNU6rh7eseZ9M19/l5f++l6jBRBXndqR5avWMfbEglbFLdIaYRLUt4AHzGyjmW0Efg98M6lRiWSJ1q6zVFO08EKsaAHga48VYWYc3F/GlcNy6VMY6/7QpzCXE482zuq1v2bcjKW7ufrkPDi0v+b4PfPLObfvwUYLIKxiP1efnMeclWVHxN3cgg+Rlmryi7o1A826BOOjahwbnr6oK2nistsms2X7kU1Q+/bqEfc+U333Pv0ac+a9yWeH2pGX43y2Zz/d8g6ys6IdO8vKaZ8LOTmH/51ZfqiKdrlGhw7t6ZZ3kKJdTrs8Izc3h17dOwOwuXgP+blQ0C4/KIAoqCmA6NcjNmZ76R4qK6uoIqdmX3Xc5532OebMe5OxY87lbwfK0+4elGSelnxR91p3f6p+Y9jqij53vzehEYpkoTBJqCF1iyv2MeVn/xIULQSdHZ74t7jFFmE6UBwe03ABREMxTbj9vpriik4jTmzx5xNpSmOX+Kr/a+3cwENEkqj+0hZ1ixYavkwYZrmNli7JEWa5DZFEaeyLug8FT//L3e+q/0hRfCJtUv2iiHjFDvGKFsIUZbS0cCPMchsiiRSmSOIDM1tgZneb2SVm1jXpUYm0cfWLK/78YdkRxQ7xznzCFGW0tHAj3vvSrZOEZJfGvqgLgLsPNrNjgS8R+/Lu781sp7sPT3ZwIm1J8c4yvnP3U0z9yXU1nSSmLfmUrZ/txXByzXl82Q56dT+8Em39ZTTCLLfR0iU54r0v3TpJSHYJs9xGf2LJ6VzgVOAzYL67/2fyw2shVfFJBqqu2Bs75tyaRBFvXzpJx04SknkaquILc4nvE+BW4BV3P8vdL03r5CSSgWpX7FXfD4q3T6QtCZOgRgDTgKvN7O9mNs3MvpXkuETalHhVdS2ttBPJFk0mKHdfCjwBPAb8hdilvv+b5LhE2ox41XHx2hHpLEramiYTlJktAv4OfBVYBZzj7gMTcXAze9TMtpvZBw28bmb2OzNba2bLzCx9l5mXNiFMm5946zM1Nld1G6Pa1XHV7YjCVNqp9ZBkqyar+ICL3X1Hko7/ODCZ2CXEuMcGhgSPM4EHg58ikQizrlO89Zkam+uFom3k5Xid6rjNxeW8vxle3tB0pV1z15oSyRRhysyTlZxw97fMbGAjQ8YB0zxWaviOmXUzsz7uvjVZMYk0JMy6Th99vO2I9ZmGDOjVxFzhWw21JCaRTBWmSCJK/YCiWtubgn1HMLNJZrbIzBZNfUk3kyXxwhQt3PHA80esz9TSuRIVk0imSvcEFa82Pu53nNx9qruPdPeRk8aNjjdEpMXCtAeqPnuqvT7T8lXrjrgXlcw1olRIIdmkwQRlZpc39khRfJuAAbW2+wNbUnRskRph2gNVnz3VXp8p3llUa9eISvQ8IumqsXtQ/7uR1xyYmeBY4pkN3GJm04kVR+zS/SeJQpj2QP/4qIj3Dh7ikX/srDMmv11Rne2WthpqSUwimSz0goVJObjZM8B5QA9gG/BzIB/A3adYbPGpycBFwD7gG+6+qMmJ1epIJCXU6kgSodkLFtZmZpcCw4CC6n3u/svWBuXuVzXxugMN1+mKiEjWCvNF3SnAlcD3iBUt/DNwXJLjEhGRNi5MFd8X3f16oDRYqPAs6hYuiIiIJFyYBLU/+LnPzPoCh4BByQtJREQk3D2oOWbWDfhvYDGxCr6HkxmUiIhImAT1a3c/ALxgZnOIFUqUN/EeERGRVglzie/v1U/c/YC776q9T0REJBkaPIMys97E+t51MLMRHG471AXomILYWmzj1pKoQxBpE7ZvLubdue9EHYZksF79esW+xBRHY5f4LgRuINZe6N5a+3cDdyYotqT478X5UYcgkvXcoV/PU9kwf1vUoUgG63XmwAZfa7KThJl9zd1fSHBMSfWHt9ark4SISAY4sU9nvjSkZ9xOEmHuQS0ws0fM7BUAM/u8mX0roRGKiIjUEyZBPQbMBfoG26uBW5MVkIiICIRLUD3cfQZQBeDuFUBlUqMSEZE2L0yC2mtmRxMsFGhmXwB2JTUqERFp88J8UfdHxNZlOsHMFgA9gSuSGpWIiLR5TSYod19sZucCQ4l9F+ojdz+U9MhERKRNazJBmVkB8F3gbGKX+d42synurnZHIiKSNGEu8U0D9gD3B9tXAU8SWxdKREQkKcIkqKHufmqt7b+a2dJEHNzMLgLuA3KBh9397nqvnwe8BGwIds1MxEq+cth/3nIVZWV7jthfWNiZn0x+JoKIRERiwiSof5jZF9z9HQAzOxNY0NoDm1ku8AAwBtgELDSz2e6+st7Qt919bGuPJ/GVle3h+G/ff8T+9Q9/L4JoREQOC5OgzgSuN7NPgu1jgQ/NbDng7n5KC489Cljr7usBzGw6MA6on6BERKQNCpOgLkrSsfsBRbW2NxFLhvWdFVxS3AL82N1XxJvMzCYBkwCuve1XnHPZVQkOV0REUilMmfnHSTp2vOaA9Zu8LgaOc/cyM7sEmAUMiTeZu08FpoKaxYqIZIMwZ1DJsgkYUGu7P7GzpBruvrvW85fN7Pdm1sPdi1MUY0b7/mVnUlFVN1dXHTqI5eZyVK8+AJQWb2fZ5JvILejIsG//Ju48KqQQkShEmaAWAkPMbBCwGZgIXF17QLBo4jZ3dzMbRaw1k1YjDKmiyjnulml19hU99gOOvvgHHDv4JAA+LVpPZWUln07/WZ3CiMLCzjXPVUghIlGILEG5e4WZ3UKsU3ou8Ki7rzCzG4PXpxBrqXSTmVUA+4GJ3tQCVtIsvQccD8CBHr34f4/PiTgaEZHDojyDwt1fBl6ut29KreeTgcmpjktERKIXppu5iIhIyilBiYhIWor0Ep+0XP0KvarKCsCgqoKc/PbBvkrW3zsBy8uvGeeVFWx79mdUHBNbIHlXSTFVXoV5FT+94XDDjtoVeoWFneMWRNQupBARSTQlqAxVv0Jv36fraddrEFsf/wEDvvk7AA4dPMDmh/6FkT95ts571z/8vZqCiJ/eMLbJCj2VkotIFHSJT0RE0pISlIiIpCUlKBERSUu6BxWxMG2E4rUsqjx4gE8evrnum3JyqdxTzMEDscWOtz/3c7yqksW//XadYRW7i7nxktNjGw47f/ttLK8dvcb+sGbMrpLD3aTU6khEoqAEFbEwbYTitSzacN/V9Ljs9sM7qioB2Pbsz9gy9V8A8Koqek24C8vNr/Pebc/cyYBbngLgUMkm3KvY9sydlMy5t2aMV1U0K0YRkURTgspYRv7RAzCLNYWvOnSAnPz2WE4u/W+OJbMtj96C5ebTrsexNe+qOnQAyzl8ZTfvqL6YxbZPueXBmv1KPiISNd2DEhGRtKQEJSIiaUmX+NLMiodvo7J8H4fKPqvp7FBVVcnmp39Cv2v+s1lzVe3fA1WVHCz+pM5+r6pKWLwiIsmiBBWx+m2Eyou303vir8jNza1ZCiN/7Yd8OuP/8vHk62vGeeUhNk2+7oj5vPIQRfdfGzyvoHjOPdRfvNgrKyiafG2wcfh9Da0HpVZHIhIFy8bllTJ5yffGWg81d72mRM4lIpIMJ/bpzJeG9LR4r+kelIiIpCUlKBERSUuRJigzu8jMPjKztWZ2R5zXzcx+F7y+zMxOiyJOERFJvciKJMwsF3gAGANsAhaa2Wx3X1lr2MXAkOBxJvBg8DNrJbIgQcUNIpLJoqziGwWsdff1AGY2HRgH1E5Q44BpHqvkeMfMuplZH3ffmvpwUyORve3UJ09EMlmUl/j6AUW1tjcF+5o7BgAzm2Rmi8xs0Vuz9YdZRCTTRXkGFa+ssH55eJgxsZ3uU4GpkNll5iIiEhPlGdQmYECt7f7AlhaMERGRLBRlgloIDDGzQWbWDpgIzK43ZjZwfVDN9wVgVzbffxIRkcMiu8Tn7hVmdgswF8gFHnX3FWZ2Y/D6FOBl4BJgLbAP+EZU8YqISGpF2ovP3V8mloRq75tS67kDN9d/n4iIZD91khARkbSkBCUiImlJCUpERNKSEpSIiKQlJSgREUlLSlAiIpKWlKBERCQtKUGJiEhaUoISEZG0pAQlIiJpSQlKRETSkhKUiIikJSUoERFJS0pQIiKSlpSgREQkLSlBiYhIWlKCEhGRtKQEJSIiaSmSJd/N7CjgWWAgsBGY4O6lccZtBPYAlUCFu49MXZQiIhKlqM6g7gBed/chwOvBdkP+yd2HKzmJiLQtUSWoccATwfMngPERxSEiImkqqgR1jLtvBQh+9mpgnAOvmdn7ZjapsQnNbJKZLTKzRW/NfibB4YqISKol7R6Umf0P0DvOSz9txjSj3X2LmfUC5pnZKnd/K95Ad58KTAX4w1vrvdkBi4hIWklagnL3rzT0mpltM7M+7r7VzPoA2xuYY0vwc7uZvQiMAuImKBERyS5RXeKbDXw9eP514KX6A8ysk5l1rn4OXAB8kLIIRUQkUlElqLuBMWa2BhgTbGNmfc3s5WDMMcB8M1sKvAf82d1fjSRaERFJuUi+B+XuJcD5cfZvAS4Jnq8HTk1xaCIikibUSUJERNKSEpSIiKQlJSgREUlLSlAiIpKWlKBERCQtKUGJiEhaUoISEZG0pAQlIiJpSQlKRETSkhKUiIikJSUoERFJS0pQIiKSlpSgREQkLSlBiYhIWopkuY1k69G5XdQhiIhICIXtG05D5u4pDCU1zGySu0+NOo7mUtyppbhTL1NjV9zRyNZLfJOiDqCFFHdqKe7Uy9TYFXcEsjVBiYhIhlOCEhGRtJStCSpTr7kq7tRS3KmXqbEr7ghkZZGEiIhkvmw9gxIRkQynBCUiImkpqxKUmV1kZh+Z2VozuyPqeMIys0fNbLuZfRB1LM1hZgPM7K9m9qGZrTCzH0QdUxhmVmBm75nZ0iDuu6KOqTnMLNfM/mFmc6KOJSwz22hmy81siZktijqesMysm5k9b2argv/Oz4o6pjDMbGjwu65+7DazW6OOq7my5h6UmeUCq4ExwCZgIXCVu6+MNLAQzOwcoAyY5u4nRx1PWGbWB+jj7ovNrDPwPjA+3X/nZmZAJ3cvM7N8YD7wA3d/J+LQQjGzHwEjgS7uPjbqeMIws43ASHcvjjqW5jCzJ4C33f1hM2sHdHT3nRGH1SzB38bNwJnu/nHU8TRHNp1BjQLWuvt6dz8ITAfGRRxTKO7+FvBZ1HE0l7tvdffFwfM9wIdAv2ijaprHlAWb+cEjI/6lZmb9gUuBh6OOJduZWRfgHOARAHc/mGnJKXA+sC7TkhNkV4LqBxTV2t5EBvyxzBZmNhAYAbwbcSihBJfJlgDbgXnunhFxA78FbgeqIo6juRx4zczeN7NM6W5wPLADeCy4pPqwmXWKOqgWmAg8E3UQLZFNCcri7MuIfxVnOjMrBF4AbnX33VHHE4a7V7r7cKA/MMrM0v7SqpmNBba7+/tRx9ICo939NOBi4Obgsna6ywNOAx509xHAXiBj7m0DBJclLwOeizqWlsimBLUJGFBruz+wJaJY2ozgHs4LwNPuPjPqeJoruGTzBnBRtJGEMhq4LLifMx34spk9FW1I4bj7luDnduBFYpfk090mYFOts+vniSWsTHIxsNjdt0UdSEtkU4JaCAwxs0HBvxomArMjjimrBcUGjwAfuvu9UccTlpn1NLNuwfMOwFeAVZEGFYK7/8Td+7v7QGL/ff/F3a+NOKwmmVmnoIiG4BLZBUDaV6y6+6dAkZkNDXadD6R1AVAcV5Ghl/cgi9aDcvcKM7sFmAvkAo+6+4qIwwrFzJ4BzgN6mNkm4Ofu/ki0UYUyGrgOWB7czwG4091fji6kUPoATwTVTTnADHfPmJLtDHQM8GLs3zPkAX9091ejDSm07wFPB//oXQ98I+J4QjOzjsSqmr8TdSwtlTVl5iIikl2y6RKfiIhkESUoERFJS0pQIiKSlpSgREQkLSlBiYhIWlKCEmkmM7vBzPqGGPe4mV0Rdn8C4rqz1vOBYbrjB7FsMLMbE3D8K4OVBFSyLwmhBCXSfDcATSaoCNzZ9JC4/o+7T2ntwd39WeDbrZ1HpJoSlLRpwZnGKjN7wsyWBWv/dAxeO93M3gwanM41sz7Bmc9IYl/eXGJmHczs38xsoZl9YGZTgw4bYY9/xDGC/W+Y2X8F61atNrMvBfs7mtmMINZnzexdMxtpZncDHYKYng6mzzWzP1hszavXgq4ZTcVzjJm9aLG1spaa2Rdr/Y4eDj7j02b2FTNbYGZrzCwT2hZJBlKCEoGhwFR3PwXYDXw36DF4P3CFu58OPAr8P3d/HlgEXOPuw919PzDZ3c8I1vLqAIRao6mhY9Qakufuo4BbgZ8H+74LlAax/jtwOoC73wHsD2K6Jhg7BHjA3YcBO4GvhQjrd8Cb7n4qsb5z1d1YBgP3AacAJwJXA2cDP6blZ24ijcqaVkcirVDk7guC508B3wdeBU4G5gUnRLnA1gbe/09mdjvQETiK2B/1P4U47tAmjlHdfPd9YGDw/GxiiQJ3/8DMljUy/wZ3XxJnjsZ8Gbg+mL8S2GVm3YO5lgOY2QrgdXd3M1secl6RZlOCEjlyWRYntnzLCndvdIlvMysAfk9stdgiM/sFUBDyuE0d40Dws5LD/18Nffmw1vur52jyEl/IuapqbVehvyOSJLrEJwLHmll1kriK2BLwHwE9q/ebWb6ZDQvG7AE6B8+rk1FxsC5Wc6rzGjtGQ+YDE4Lxnwf+V63XDgWXDVvjdeCmYP5ci60qKxIJJSiR2FL1Xw8ulx1FbIG6g8SSzX+Z2VJgCfDFYPzjwJSgg/sB4A/AcmAWsWVfQmniGA35PbGktgz4V2AZsCt4bSqwrFaRREv8gNgly+XELgs2lTBFkkbdzKVNs9hS9XOCAoe0FywRku/u5WZ2ArEzns8Fya4l8z1O7PM/n6D4zgN+7O6hCkVEGqNrxyKZpSPw1+BSngE3tTQ5BXYB/25mPVr7XSgzu5JYtWEmLkkvaUhnUCIikpZ0D0pERNKSEpSIiKQlJSgREUlLSlAiIpKWlKBERCQt/X/RoeoPLSkUyAAAAABJRU5ErkJggg==\n",
114 |       "text/plain": [
115 |        "<Figure size 432x288 with 1 Axes>"
116 |       ]
117 |      },
118 |      "metadata": {
119 |       "needs_background": "light"
120 |      },
121 |      "output_type": "display_data"
122 |     }
123 |    ],
124 |    "source": [
125 |     "%matplotlib inline\n",
126 |     "import matplotlib.pyplot as plt\n",
127 |     "from sklearn.tree import DecisionTreeClassifier\n",
128 |     "from mlxtend.plotting import plot_decision_regions\n",
129 |     "\n",
130 |     "\n",
131 |     "tree = DecisionTreeClassifier(criterion='entropy', \n",
132 |     "                              max_depth=2, \n",
133 |     "                              random_state=1)\n",
134 |     "tree.fit(X_train, y_train)\n",
135 |     "\n",
136 |     "\n",
137 |     "plot_decision_regions(X_train, y_train, tree)\n",
138 |     "\n",
139 |     "plt.xlabel('petal length [cm]')\n",
140 |     "plt.ylabel('petal width [cm]')\n",
141 |     "plt.legend(loc='upper left')\n",
142 |     "plt.tight_layout()\n",
143 |     "plt.show()"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 8,
149 |    "metadata": {},
150 |    "outputs": [],
151 |    "source": [
152 |     "# you may need to run\n",
153 |     "\n",
154 |     "# conda install pydotplus\n",
155 |     "# conda install graphviz\n",
156 |     "\n",
157 |     "# in your command line"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": 10,
163 |    "metadata": {},
164 |    "outputs": [
165 |     {
166 |      "data": {
167 |       "text/plain": [
168 |        "True"
169 |       ]
170 |      },
171 |      "execution_count": 10,
172 |      "metadata": {},
173 |      "output_type": "execute_result"
174 |     }
175 |    ],
176 |    "source": [
177 |     "from pydotplus import graph_from_dot_data\n",
178 |     "from sklearn.tree import export_graphviz\n",
179 |     "\n",
180 |     "\n",
181 |     "dot_data = export_graphviz(tree,\n",
182 |     "                           filled=True, \n",
183 |     "                           rounded=True,\n",
184 |     "                           class_names=['Setosa', \n",
185 |     "                                        'Versicolor',\n",
186 |     "                                        'Virginica'],\n",
187 |     "                           feature_names=['petal length', \n",
188 |     "                                          'petal width'],\n",
189 |     "                           out_file=None) \n",
190 |     "graph = graph_from_dot_data(dot_data) \n",
191 |     "graph.write_png('tree.png')"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "markdown",
196 |    "metadata": {},
197 |    "source": [
198 |     "from IPython.display import Image\n",
199 |     "\n",
200 |     "\n",
201 |     "Image('tree.png')"
202 |    ]
203 |   }
204 |  ],
205 |  "metadata": {
206 |   "kernelspec": {
207 |    "display_name": "Python 3",
208 |    "language": "python",
209 |    "name": "python3"
210 |   },
211 |   "language_info": {
212 |    "codemirror_mode": {
213 |     "name": "ipython",
214 |     "version": 3
215 |    },
216 |    "file_extension": ".py",
217 |    "mimetype": "text/x-python",
218 |    "name": "python",
219 |    "nbconvert_exporter": "python",
220 |    "pygments_lexer": "ipython3",
221 |    "version": "3.8.2"
222 |   }
223 |  },
224 |  "nbformat": 4,
225 |  "nbformat_minor": 4
226 | }
227 | 


--------------------------------------------------------------------------------
/L07/07-ensembles__notes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L07/07-ensembles__notes.pdf


--------------------------------------------------------------------------------
/L07/07-ensembles__slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L07/07-ensembles__slides.pdf


--------------------------------------------------------------------------------
/L08/08-model-eval-1-intro__notes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L08/08-model-eval-1-intro__notes.pdf


--------------------------------------------------------------------------------
/L08/08-model-eval-1-intro__slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L08/08-model-eval-1-intro__slides.pdf


--------------------------------------------------------------------------------
/L09/09-eval2-ci__notes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L09/09-eval2-ci__notes.pdf


--------------------------------------------------------------------------------
/L09/09-eval2-ci__slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L09/09-eval2-ci__slides.pdf


--------------------------------------------------------------------------------
/L09/code/09-eval2-ci__5.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "STAT 451: Machine Learning (Fall 2020)  \n",
  8 |     "Instructor: Sebastian Raschka (sraschka@wisc.edu)  \n",
  9 |     "\n",
 10 |     "Course website: http://pages.stat.wisc.edu/~sraschka/teaching/stat451-fs2020/"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "# L09: Model Evaluation 2 -- Confidence Intervals and Resampling"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "markdown",
 22 |    "metadata": {},
 23 |    "source": [
 24 |     "<br>\n",
 25 |     "<br>\n",
 26 |     "<br>"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "markdown",
 31 |    "metadata": {},
 32 |    "source": [
 33 |     "# 5. Out-of-Bag Bootstrap"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "In this section, we are going to look at the OOB bootstrap method, which I recently implemented in mlxtend."
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 1,
 46 |    "metadata": {},
 47 |    "outputs": [
 48 |     {
 49 |      "name": "stdout",
 50 |      "output_type": "stream",
 51 |      "text": [
 52 |       "[3 4 0 1 3] [2]\n",
 53 |       "[0 0 1 4 4] [2 3]\n",
 54 |       "[1 2 4 2 4] [0 3]\n"
 55 |      ]
 56 |     }
 57 |    ],
 58 |    "source": [
 59 |     "from mlxtend.evaluate import BootstrapOutOfBag\n",
 60 |     "import numpy as np\n",
 61 |     "\n",
 62 |     "\n",
 63 |     "\n",
 64 |     "\n",
 65 |     "oob = BootstrapOutOfBag(n_splits=3, random_seed=1)\n",
 66 |     "for train, test in oob.split(np.array([1, 2, 3, 4, 5])):\n",
 67 |     "    print(train, test)"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "The reason why I chose a object-oriented implementation is that we can plug it into scikit-learn's `cross_val_score` function, which is super convenient."
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 2,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "from mlxtend.data import iris_data\n",
 84 |     "from sklearn.tree import DecisionTreeClassifier\n",
 85 |     "from sklearn.model_selection import cross_val_score\n",
 86 |     "from sklearn.model_selection import train_test_split\n",
 87 |     "\n",
 88 |     "\n",
 89 |     "X, y = iris_data()\n",
 90 |     "\n",
 91 |     "X_train, X_test, y_train, y_test = train_test_split(\n",
 92 |     "    X, y, test_size=0.4, random_state=123, stratify=y)\n",
 93 |     "\n",
 94 |     "\n",
 95 |     "model = DecisionTreeClassifier(random_state=123)"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "markdown",
100 |    "metadata": {},
101 |    "source": [
102 |     "Below, we are using the standard approach for `cross_val_score` first, which will perform 5-fold cross validation by setting `cv=5`. Note that \n",
103 |     "\n",
104 |     "- if the model is a scikit-learn classifier, stratified k-fold cross validation will be performed by default, and the reported evaluation metric is the prediction accuracy;\n",
105 |     "- if the model is a scikit-learn regressor, standard k-fold cross validation will be performed by default, and the reported evaluation metric is the $R^2$ score on the test folds."
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 3,
111 |    "metadata": {},
112 |    "outputs": [
113 |     {
114 |      "name": "stdout",
115 |      "output_type": "stream",
116 |      "text": [
117 |       "CV scores [0.94444444 1.         1.         0.88888889 0.94444444]\n",
118 |       "Mean CV score 0.9555555555555555\n",
119 |       "CV score Std 0.04157397096415492\n"
120 |      ]
121 |     }
122 |    ],
123 |    "source": [
124 |     "cv_scores = cross_val_score(model, X_train, y_train, cv=5)\n",
125 |     "print('CV scores', cv_scores)\n",
126 |     "print('Mean CV score', np.mean(cv_scores))\n",
127 |     "print('CV score Std', np.std(cv_scores))"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "markdown",
132 |    "metadata": {},
133 |    "source": [
134 |     "Now, let's plug in our OOB object into the `cross_val_score` function:"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": 4,
140 |    "metadata": {},
141 |    "outputs": [
142 |     {
143 |      "name": "stdout",
144 |      "output_type": "stream",
145 |      "text": [
146 |       "Bootstrap scores [0.93548387 0.96774194 0.96875    0.93023256 0.97058824]\n",
147 |       "Mean Bootstrap score 0.9545593199770531\n",
148 |       "Score Std 0.017819915677477555\n"
149 |      ]
150 |     }
151 |    ],
152 |    "source": [
153 |     "# 5 splits\n",
154 |     "\n",
155 |     "bootstrap_scores = \\\n",
156 |     "    cross_val_score(model, X_train, y_train, \n",
157 |     "                    cv=BootstrapOutOfBag(n_splits=5, random_seed=123))\n",
158 |     "\n",
159 |     "print('Bootstrap scores', bootstrap_scores)\n",
160 |     "print('Mean Bootstrap score', np.mean(bootstrap_scores))\n",
161 |     "print('Score Std', np.std(bootstrap_scores))"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": 5,
167 |    "metadata": {},
168 |    "outputs": [
169 |     {
170 |      "name": "stdout",
171 |      "output_type": "stream",
172 |      "text": [
173 |       "Mean Bootstrap score 0.9483980861793887\n",
174 |       "Score Std 0.039817322453014004\n"
175 |      ]
176 |     }
177 |    ],
178 |    "source": [
179 |     "bootstrap_scores = \\\n",
180 |     "    cross_val_score(model, X_train, y_train, \n",
181 |     "                    cv=BootstrapOutOfBag(n_splits=200, random_seed=123))\n",
182 |     "\n",
183 |     "print('Mean Bootstrap score', np.mean(bootstrap_scores))\n",
184 |     "print('Score Std', np.std(bootstrap_scores))"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": 6,
190 |    "metadata": {},
191 |    "outputs": [
192 |     {
193 |      "name": "stdout",
194 |      "output_type": "stream",
195 |      "text": [
196 |       "95% Confidence interval: [83.33, 100.00]\n"
197 |      ]
198 |     }
199 |    ],
200 |    "source": [
201 |     "lower = np.percentile(bootstrap_scores, 2.5)\n",
202 |     "upper = np.percentile(bootstrap_scores, 97.5)\n",
203 |     "print('95%% Confidence interval: [%.2f, %.2f]' % (100*lower, 100*upper))"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "code",
208 |    "execution_count": 7,
209 |    "metadata": {},
210 |    "outputs": [
211 |     {
212 |      "data": {
213 |       "text/plain": [
214 |        "0.95"
215 |       ]
216 |      },
217 |      "execution_count": 7,
218 |      "metadata": {},
219 |      "output_type": "execute_result"
220 |     }
221 |    ],
222 |    "source": [
223 |     "model.fit(X_train, y_train)\n",
224 |     "model.score(X_test, y_test)"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "markdown",
229 |    "metadata": {},
230 |    "source": [
231 |     "<br>\n",
232 |     "<br>\n",
233 |     "<br>"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "markdown",
238 |    "metadata": {},
239 |    "source": [
240 |     "## MLxtend functional bootstrap API"
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "markdown",
245 |    "metadata": {},
246 |    "source": [
247 |     "###  OOB Bootstrap"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "markdown",
252 |    "metadata": {},
253 |    "source": [
254 |     "Below is a more convenient way to compute the OOB Boostrap. Note that it has a tendency to be over-pessimistic."
255 |    ]
256 |   },
257 |   {
258 |    "cell_type": "code",
259 |    "execution_count": 8,
260 |    "metadata": {},
261 |    "outputs": [
262 |     {
263 |      "name": "stdout",
264 |      "output_type": "stream",
265 |      "text": [
266 |       "Mean Bootstrap score 0.9483980861793887\n",
267 |       "Score Std 0.039817322453014004\n"
268 |      ]
269 |     }
270 |    ],
271 |    "source": [
272 |     "from mlxtend.evaluate import bootstrap_point632_score\n",
273 |     "\n",
274 |     "bootstrap_scores = bootstrap_point632_score(model, \n",
275 |     "                                            X_train, y_train, \n",
276 |     "                                            n_splits=200, \n",
277 |     "                                            method='oob',\n",
278 |     "                                            random_seed=123)\n",
279 |     "\n",
280 |     "print('Mean Bootstrap score', np.mean(bootstrap_scores))\n",
281 |     "print('Score Std', np.std(bootstrap_scores))"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "code",
286 |    "execution_count": 9,
287 |    "metadata": {},
288 |    "outputs": [
289 |     {
290 |      "name": "stdout",
291 |      "output_type": "stream",
292 |      "text": [
293 |       "95% Confidence interval: [83.33, 100.00]\n"
294 |      ]
295 |     }
296 |    ],
297 |    "source": [
298 |     "lower = np.percentile(bootstrap_scores, 2.5)\n",
299 |     "upper = np.percentile(bootstrap_scores, 97.5)\n",
300 |     "print('95%% Confidence interval: [%.2f, %.2f]' % (100*lower, 100*upper))"
301 |    ]
302 |   },
303 |   {
304 |    "cell_type": "markdown",
305 |    "metadata": {},
306 |    "source": [
307 |     "###  .632 Bootstrap"
308 |    ]
309 |   },
310 |   {
311 |    "cell_type": "markdown",
312 |    "metadata": {},
313 |    "source": [
314 |     "The .632 Bootstrap is the default setting of `bootstrap_point632_score`; it tends to be overly optimistic."
315 |    ]
316 |   },
317 |   {
318 |    "cell_type": "code",
319 |    "execution_count": 10,
320 |    "metadata": {},
321 |    "outputs": [
322 |     {
323 |      "name": "stdout",
324 |      "output_type": "stream",
325 |      "text": [
326 |       "Mean Bootstrap score 0.9673875904653735\n",
327 |       "Score Std 0.02516454779030485\n"
328 |      ]
329 |     }
330 |    ],
331 |    "source": [
332 |     "bootstrap_scores = bootstrap_point632_score(model, \n",
333 |     "                                            X_train, y_train, \n",
334 |     "                                            n_splits=200,\n",
335 |     "                                            random_seed=123)\n",
336 |     "print('Mean Bootstrap score', np.mean(bootstrap_scores))\n",
337 |     "print('Score Std', np.std(bootstrap_scores))"
338 |    ]
339 |   },
340 |   {
341 |    "cell_type": "code",
342 |    "execution_count": 11,
343 |    "metadata": {},
344 |    "outputs": [
345 |     {
346 |      "name": "stdout",
347 |      "output_type": "stream",
348 |      "text": [
349 |       "95% Confidence interval: [89.47, 100.00]\n"
350 |      ]
351 |     }
352 |    ],
353 |    "source": [
354 |     "lower = np.percentile(bootstrap_scores, 2.5)\n",
355 |     "upper = np.percentile(bootstrap_scores, 97.5)\n",
356 |     "print('95%% Confidence interval: [%.2f, %.2f]' % (100*lower, 100*upper))"
357 |    ]
358 |   },
359 |   {
360 |    "cell_type": "markdown",
361 |    "metadata": {},
362 |    "source": [
363 |     "###  .632+ Bootstrap"
364 |    ]
365 |   },
366 |   {
367 |    "cell_type": "markdown",
368 |    "metadata": {},
369 |    "source": [
370 |     "The .632+ Boostrap method attempts to address the optimistic bias of the regular .632 Boostrap."
371 |    ]
372 |   },
373 |   {
374 |    "cell_type": "code",
375 |    "execution_count": 12,
376 |    "metadata": {},
377 |    "outputs": [
378 |     {
379 |      "name": "stdout",
380 |      "output_type": "stream",
381 |      "text": [
382 |       "Mean Bootstrap score 0.9658029542600898\n",
383 |       "Score Std 0.027801366648921747\n"
384 |      ]
385 |     }
386 |    ],
387 |    "source": [
388 |     "bootstrap_scores = bootstrap_point632_score(model, X_train, y_train, \n",
389 |     "                                            n_splits=200, \n",
390 |     "                                            method='.632+',\n",
391 |     "                                            random_seed=123)\n",
392 |     "print('Mean Bootstrap score', np.mean(bootstrap_scores))\n",
393 |     "print('Score Std', np.std(bootstrap_scores))"
394 |    ]
395 |   },
396 |   {
397 |    "cell_type": "code",
398 |    "execution_count": 13,
399 |    "metadata": {},
400 |    "outputs": [
401 |     {
402 |      "name": "stdout",
403 |      "output_type": "stream",
404 |      "text": [
405 |       "95% Confidence interval: [88.40, 100.00]\n"
406 |      ]
407 |     }
408 |    ],
409 |    "source": [
410 |     "lower = np.percentile(bootstrap_scores, 2.5)\n",
411 |     "upper = np.percentile(bootstrap_scores, 97.5)\n",
412 |     "print('95%% Confidence interval: [%.2f, %.2f]' % (100*lower, 100*upper))"
413 |    ]
414 |   }
415 |  ],
416 |  "metadata": {
417 |   "kernelspec": {
418 |    "display_name": "Python 3",
419 |    "language": "python",
420 |    "name": "python3"
421 |   },
422 |   "language_info": {
423 |    "codemirror_mode": {
424 |     "name": "ipython",
425 |     "version": 3
426 |    },
427 |    "file_extension": ".py",
428 |    "mimetype": "text/x-python",
429 |    "name": "python",
430 |    "nbconvert_exporter": "python",
431 |    "pygments_lexer": "ipython3",
432 |    "version": "3.8.2"
433 |   }
434 |  },
435 |  "nbformat": 4,
436 |  "nbformat_minor": 4
437 | }
438 | 


--------------------------------------------------------------------------------
/L10/10_eval3-cv__notes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L10/10_eval3-cv__notes.pdf


--------------------------------------------------------------------------------
/L10/10_eval3-cv__slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L10/10_eval3-cv__slides.pdf


--------------------------------------------------------------------------------
/L10/code/10_04_kfold-eval.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "STAT 451: Machine Learning (Fall 2020)  \n",
  8 |     "Instructor: Sebastian Raschka (sraschka@wisc.edu)  \n",
  9 |     "\n",
 10 |     "Course website: http://pages.stat.wisc.edu/~sraschka/teaching/stat451-fs2020/"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "# L10: Model Evaluation 3 -- Cross-Validation and Model Selection"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": null,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "%load_ext watermark\n",
 27 |     "%watermark -a 'Sebastian Raschka' -u -d -v -p numpy,mlxtend,matplotlib,sklearn"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": null,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "import numpy as np\n",
 37 |     "import matplotlib.pyplot as plt"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "<p style=\"margin-bottom:5cm;\"></p>\n",
 45 |     "\n",
 46 |     "## K-fold Cross-Validation in Scikit-Learn"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "markdown",
 51 |    "metadata": {},
 52 |    "source": [
 53 |     "- Simple demonstration of using a cross-validation iterator in scikit-learn"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "from sklearn.model_selection import KFold\n",
 63 |     "\n",
 64 |     "\n",
 65 |     "rng = np.random.RandomState(123)\n",
 66 |     "\n",
 67 |     "y = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])\n",
 68 |     "X = rng.random_sample((y.shape[0], 4))\n",
 69 |     "\n",
 70 |     "\n",
 71 |     "cv = KFold(n_splits=5)\n",
 72 |     "\n",
 73 |     "for k in cv.split(X, y):\n",
 74 |     "    print(k)"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "markdown",
 79 |    "metadata": {},
 80 |    "source": [
 81 |     "<p style=\"margin-bottom:5cm;\"></p>\n",
 82 |     "\n",
 83 |     "- In practice, we are usually interested in shuffling the dataset, because if the data records are ordered by class label, this would result in cases where the classes are not well represented in the training and test folds"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": null,
 89 |    "metadata": {},
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "cv = KFold(n_splits=5, random_state=123, shuffle=True)\n",
 93 |     "\n",
 94 |     "for k in cv.split(X, y):\n",
 95 |     "    print(k)"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "markdown",
100 |    "metadata": {},
101 |    "source": [
102 |     "<p style=\"margin-bottom:5cm;\"></p>\n",
103 |     "\n",
104 |     "- Note that the `KFold` iterator only provides us with the array indices; in practice, we are actually interested in the array values (feature values and class labels)"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": null,
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": [
113 |     "cv = KFold(n_splits=5, random_state=123, shuffle=True)\n",
114 |     "\n",
115 |     "for train_idx, valid_idx in cv.split(X, y):\n",
116 |     "    print('train labels with shuffling', y[train_idx])"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "markdown",
121 |    "metadata": {},
122 |    "source": [
123 |     "<p style=\"margin-bottom:5cm;\"></p>\n",
124 |     "\n",
125 |     "- As discussed in the lecture, it's important to stratify the splits (very crucial for small datasets!)"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": null,
131 |    "metadata": {},
132 |    "outputs": [],
133 |    "source": [
134 |     "from sklearn.model_selection import StratifiedKFold\n",
135 |     "\n",
136 |     "cv = StratifiedKFold(n_splits=5, random_state=123, shuffle=True)\n",
137 |     "\n",
138 |     "for train_idx, valid_idx in cv.split(X, y):\n",
139 |     "    print('train labels', y[train_idx])"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "markdown",
144 |    "metadata": {},
145 |    "source": [
146 |     "<p style=\"margin-bottom:5cm;\"></p>\n",
147 |     "\n",
148 |     "- After the illustrations of cross-validation above, the next cell demonstrates how we can actually use the iterators provided through scikit-learn to fit and evaluate a learning algorithm"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": null,
154 |    "metadata": {},
155 |    "outputs": [],
156 |    "source": [
157 |     "from sklearn.tree import DecisionTreeClassifier\n",
158 |     "from mlxtend.data import iris_data\n",
159 |     "from sklearn.model_selection import train_test_split\n",
160 |     "\n",
161 |     "\n",
162 |     "X, y = iris_data()\n",
163 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123, test_size=0.15, \n",
164 |     "                                                    shuffle=True, stratify=y)\n",
165 |     "\n",
166 |     "\n",
167 |     "\n",
168 |     "cv = StratifiedKFold(n_splits=10, random_state=123, shuffle=True)\n",
169 |     "\n",
170 |     "kfold_acc = 0.\n",
171 |     "for train_idx, valid_idx in cv.split(X_train, y_train):\n",
172 |     "    clf = DecisionTreeClassifier(random_state=123, max_depth=3).fit(X_train[train_idx], y_train[train_idx])\n",
173 |     "    y_pred = clf.predict(X_train[valid_idx])\n",
174 |     "    acc = np.mean(y_pred == y_train[valid_idx])*100\n",
175 |     "    kfold_acc += acc\n",
176 |     "kfold_acc /= 10\n",
177 |     "    \n",
178 |     "clf = DecisionTreeClassifier(random_state=123, max_depth=3).fit(X_train, y_train)\n",
179 |     "y_pred = clf.predict(X_test)\n",
180 |     "test_acc = np.mean(y_pred == y_test)*100\n",
181 |     "    \n",
182 |     "print('Kfold Accuracy: %.2f%%' % kfold_acc)\n",
183 |     "print('Test Accuracy: %.2f%%' % test_acc)\n",
184 |     "\n"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "markdown",
189 |    "metadata": {},
190 |    "source": [
191 |     "<p style=\"margin-bottom:5cm;\"></p>\n",
192 |     "\n",
193 |     "- Usually, a more convenient way to use cross-validation through scikit-learn is to use the `cross_val_score` function (note that it performs stratifies splitting for classification by default)\n",
194 |     "- (remember to ask students about whitespaces according to pep8)"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": null,
200 |    "metadata": {},
201 |    "outputs": [],
202 |    "source": [
203 |     "from sklearn.model_selection import cross_val_score\n",
204 |     "\n",
205 |     "\n",
206 |     "cv_acc = cross_val_score(estimator=DecisionTreeClassifier(random_state=123, max_depth=3),\n",
207 |     "                         X=X_train,\n",
208 |     "                         y=y_train,\n",
209 |     "                         cv=10,\n",
210 |     "                         n_jobs=-1)\n",
211 |     "\n",
212 |     "print('Kfold Accuracy: %.2f%%' % (np.mean(cv_acc)*100))"
213 |    ]
214 |   },
215 |   {
216 |    "cell_type": "markdown",
217 |    "metadata": {},
218 |    "source": [
219 |     "<p style=\"margin-bottom:5cm;\"></p>\n",
220 |     "\n",
221 |     "- `cross_val_score` has unfortunately no way to specify a random seed; this is not an issue in regular use cases, but it is not useful if you want to do \"repeated cross-validation\"\n",
222 |     "- The next cell illustrates how we can provide our own cross-validation iterator for convenience (note that the results match or \"manual\" `StratifiedKFold` approach we performed earlier)"
223 |    ]
224 |   },
225 |   {
226 |    "cell_type": "code",
227 |    "execution_count": null,
228 |    "metadata": {},
229 |    "outputs": [],
230 |    "source": [
231 |     "from sklearn.model_selection import cross_val_score\n",
232 |     "\n",
233 |     "\n",
234 |     "cv_acc = cross_val_score(estimator=DecisionTreeClassifier(random_state=123, max_depth=3),\n",
235 |     "                         X=X_train,\n",
236 |     "                         y=y_train,\n",
237 |     "                         cv=StratifiedKFold(n_splits=10, random_state=123, shuffle=True),\n",
238 |     "                         n_jobs=-1)\n",
239 |     "\n",
240 |     "print('Kfold Accuracy: %.2f%%' % (np.mean(cv_acc)*100))"
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "markdown",
245 |    "metadata": {},
246 |    "source": [
247 |     "<p style=\"margin-bottom:5cm;\"></p>\n",
248 |     "\n",
249 |     "##  Bootstrap"
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "markdown",
254 |    "metadata": {},
255 |    "source": [
256 |     "- Recall Bootstrapping from 2 lectures ago? Here I is an iterator I implemented analogous to `KFold`"
257 |    ]
258 |   },
259 |   {
260 |    "cell_type": "code",
261 |    "execution_count": null,
262 |    "metadata": {},
263 |    "outputs": [],
264 |    "source": [
265 |     "from mlxtend.evaluate import BootstrapOutOfBag\n",
266 |     "\n",
267 |     "oob = BootstrapOutOfBag(n_splits=5, random_seed=99)\n",
268 |     "for train, test in oob.split(np.array([1, 2, 3, 4, 5])):\n",
269 |     "    print(train, test)"
270 |    ]
271 |   },
272 |   {
273 |    "cell_type": "markdown",
274 |    "metadata": {},
275 |    "source": [
276 |     "<p style=\"margin-bottom:5cm;\"></p>\n",
277 |     "\n",
278 |     "- Analagous the `KFold` iterator, we can use it in the `cross_val_score` function for convenience"
279 |    ]
280 |   },
281 |   {
282 |    "cell_type": "code",
283 |    "execution_count": null,
284 |    "metadata": {},
285 |    "outputs": [],
286 |    "source": [
287 |     "cv_acc = cross_val_score(estimator=DecisionTreeClassifier(random_state=99, max_depth=3),\n",
288 |     "                         X=X_train,\n",
289 |     "                         y=y_train,\n",
290 |     "                         cv=BootstrapOutOfBag(n_splits=200, random_seed=99),\n",
291 |     "                         n_jobs=-1)\n",
292 |     "\n",
293 |     "print('OOB Bootstrap Accuracy: %.2f%%' % (np.mean(cv_acc)*100))"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "markdown",
298 |    "metadata": {},
299 |    "source": [
300 |     "<p style=\"margin-bottom:5cm;\"></p>\n",
301 |     "\n",
302 |     "- Analagous to the `cross_val_score` method, you can use the `bootstrap_point632_score`, which implements the .632-Bootstrap method (which is less pesimistically biased than the out-of-bag bootstrap)"
303 |    ]
304 |   },
305 |   {
306 |    "cell_type": "code",
307 |    "execution_count": null,
308 |    "metadata": {},
309 |    "outputs": [],
310 |    "source": [
311 |     "from mlxtend.evaluate import bootstrap_point632_score\n",
312 |     "\n",
313 |     "\n",
314 |     "cv_acc = bootstrap_point632_score(estimator=DecisionTreeClassifier(random_state=123, max_depth=3),\n",
315 |     "                                  X=X_train,\n",
316 |     "                                  y=y_train,\n",
317 |     "                                  random_seed=99)\n",
318 |     "\n",
319 |     "print('OOB Bootstrap Accuracy: %.2f%%' % (np.mean(cv_acc)*100))"
320 |    ]
321 |   },
322 |   {
323 |    "cell_type": "markdown",
324 |    "metadata": {},
325 |    "source": [
326 |     "- By default, `bootstrap_point632_score` uses the setting `method='.632'`\n",
327 |     "- By setting `method='.632+'`, we can also perform the .632+ bootstrap, which corrects for optimism bias, which is shown below"
328 |    ]
329 |   },
330 |   {
331 |    "cell_type": "code",
332 |    "execution_count": null,
333 |    "metadata": {},
334 |    "outputs": [],
335 |    "source": [
336 |     "cv_acc = bootstrap_point632_score(estimator=DecisionTreeClassifier(random_state=123, max_depth=3),\n",
337 |     "                                  X=X_train,\n",
338 |     "                                  y=y_train,\n",
339 |     "                                  method='.632+',\n",
340 |     "                                  n_splits=200,\n",
341 |     "                                  random_seed=99)\n",
342 |     "\n",
343 |     "print('OOB Bootstrap Accuracy: %.2f%%' % (np.mean(cv_acc)*100))"
344 |    ]
345 |   },
346 |   {
347 |    "cell_type": "markdown",
348 |    "metadata": {},
349 |    "source": [
350 |     "- Finally, for your convenience, you can also set `method='oob'`, to run a regular Out-of-bag boostrap:"
351 |    ]
352 |   },
353 |   {
354 |    "cell_type": "code",
355 |    "execution_count": null,
356 |    "metadata": {},
357 |    "outputs": [],
358 |    "source": [
359 |     "cv_acc = bootstrap_point632_score(estimator=DecisionTreeClassifier(random_state=123, max_depth=3),\n",
360 |     "                                  X=X_train,\n",
361 |     "                                  y=y_train,\n",
362 |     "                                  method='oob',\n",
363 |     "                                  n_splits=200,\n",
364 |     "                                  random_seed=99)\n",
365 |     "\n",
366 |     "print('OOB Bootstrap Accuracy: %.2f%%' % (np.mean(cv_acc)*100))"
367 |    ]
368 |   },
369 |   {
370 |    "cell_type": "code",
371 |    "execution_count": null,
372 |    "metadata": {},
373 |    "outputs": [],
374 |    "source": []
375 |   }
376 |  ],
377 |  "metadata": {
378 |   "kernelspec": {
379 |    "display_name": "Python 3",
380 |    "language": "python",
381 |    "name": "python3"
382 |   },
383 |   "language_info": {
384 |    "codemirror_mode": {
385 |     "name": "ipython",
386 |     "version": 3
387 |    },
388 |    "file_extension": ".py",
389 |    "mimetype": "text/x-python",
390 |    "name": "python",
391 |    "nbconvert_exporter": "python",
392 |    "pygments_lexer": "ipython3",
393 |    "version": "3.8.2"
394 |   }
395 |  },
396 |  "nbformat": 4,
397 |  "nbformat_minor": 4
398 | }
399 | 


--------------------------------------------------------------------------------
/L11/11-eval4-algo__notes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L11/11-eval4-algo__notes.pdf


--------------------------------------------------------------------------------
/L11/11-eval4-algo__slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L11/11-eval4-algo__slides.pdf


--------------------------------------------------------------------------------
/L11/code/11-eval4-algo__nested-cv_compact.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "STAT 451: Machine Learning (Fall 2020)  \n",
  8 |     "Instructor: Sebastian Raschka (sraschka@wisc.edu)  \n",
  9 |     "\n",
 10 |     "Course website: http://pages.stat.wisc.edu/~sraschka/teaching/stat451-fs2020/"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "# L11: Model Evaluation 4 -- Algorithm Comparison (Nested Cross-Validation)"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "markdown",
 22 |    "metadata": {},
 23 |    "source": [
 24 |     "\n",
 25 |     "\n",
 26 |     "## -- Compact version"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "markdown",
 31 |    "metadata": {},
 32 |    "source": [
 33 |     "This notebook illustrates how to implement nested cross-validation in scikit-learn. This notebook is a more compact version of the other notebooks [./11-eval4-algo__nested-cv_verbose1.ipynb](./11-eval4-algo__nested-cv_verbose1.ipynb) and [./11-eval4-algo__nested-cv_verbose2.ipynb](./11-eval4-algo__nested-cv_verbose2.ipynb).\n",
 34 |     "\n",
 35 |     "Note that due to using `cross_val_score`, we cannot see the best settings for all the outer training folds here. \n",
 36 |     "\n",
 37 |     "<img src=\"nested-cv-image.png\" width=400>"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 1,
 43 |    "metadata": {},
 44 |    "outputs": [
 45 |     {
 46 |      "name": "stdout",
 47 |      "output_type": "stream",
 48 |      "text": [
 49 |       "Sebastian Raschka 2020-11-24 \n",
 50 |       "\n",
 51 |       "CPython 3.8.2\n",
 52 |       "IPython 7.18.1\n",
 53 |       "\n",
 54 |       "sklearn 0.23.2\n",
 55 |       "mlxtend 0.18.0.dev0\n"
 56 |      ]
 57 |     }
 58 |    ],
 59 |    "source": [
 60 |     "%load_ext watermark\n",
 61 |     "%watermark -a 'Sebastian Raschka' -d -p sklearn,mlxtend -v"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": 2,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "import numpy as np\n",
 71 |     "from sklearn.model_selection import GridSearchCV\n",
 72 |     "from sklearn.model_selection import train_test_split\n",
 73 |     "from sklearn.model_selection import StratifiedKFold\n",
 74 |     "from sklearn.model_selection import cross_val_score\n",
 75 |     "from sklearn.pipeline import Pipeline\n",
 76 |     "from sklearn.preprocessing import StandardScaler\n",
 77 |     "from sklearn.linear_model import LogisticRegression\n",
 78 |     "from sklearn.neighbors import KNeighborsClassifier\n",
 79 |     "from sklearn.tree import DecisionTreeClassifier\n",
 80 |     "from sklearn.ensemble import RandomForestClassifier\n",
 81 |     "from sklearn.svm import SVC\n",
 82 |     "from mlxtend.data import mnist_data\n",
 83 |     "from sklearn.metrics import accuracy_score\n",
 84 |     "\n",
 85 |     "# Loading and splitting the dataset\n",
 86 |     "# Note that this is a small (stratified) subset\n",
 87 |     "# of MNIST; it consists of 5000 samples only, that is,\n",
 88 |     "# 10% of the original MNIST dataset\n",
 89 |     "# http://yann.lecun.com/exdb/mnist/\n",
 90 |     "X, y = mnist_data()\n",
 91 |     "X = X.astype(np.float32)\n",
 92 |     "X_train, X_test, y_train, y_test = train_test_split(X, y,\n",
 93 |     "                                                    test_size=0.2,\n",
 94 |     "                                                    random_state=1,\n",
 95 |     "                                                    stratify=y)\n",
 96 |     "\n",
 97 |     "# Initializing Classifiers\n",
 98 |     "clf1 = LogisticRegression(multi_class='multinomial',\n",
 99 |     "                          solver='newton-cg',\n",
100 |     "                          random_state=1)\n",
101 |     "clf2 = KNeighborsClassifier(algorithm='ball_tree',\n",
102 |     "                            leaf_size=50)\n",
103 |     "clf3 = DecisionTreeClassifier(random_state=1)\n",
104 |     "clf4 = SVC(random_state=1)\n",
105 |     "clf5 = RandomForestClassifier(random_state=1)\n",
106 |     "\n",
107 |     "# Building the pipelines\n",
108 |     "pipe1 = Pipeline([('std', StandardScaler()),\n",
109 |     "                  ('clf1', clf1)])\n",
110 |     "\n",
111 |     "pipe2 = Pipeline([('std', StandardScaler()),\n",
112 |     "                  ('clf2', clf2)])\n",
113 |     "\n",
114 |     "pipe4 = Pipeline([('std', StandardScaler()),\n",
115 |     "                  ('clf4', clf4)])\n",
116 |     "\n",
117 |     "\n",
118 |     "# Setting up the parameter grids\n",
119 |     "param_grid1 = [{'clf1__penalty': ['l2'],\n",
120 |     "                'clf1__C': np.power(10., np.arange(-4, 4))}]\n",
121 |     "\n",
122 |     "param_grid2 = [{'clf2__n_neighbors': list(range(1, 10)),\n",
123 |     "                'clf2__p': [1, 2]}]\n",
124 |     "\n",
125 |     "param_grid3 = [{'max_depth': list(range(1, 10)) + [None],\n",
126 |     "                'criterion': ['gini', 'entropy']}]\n",
127 |     "\n",
128 |     "param_grid4 = [{'clf4__kernel': ['rbf'],\n",
129 |     "                'clf4__C': np.power(10., np.arange(-4, 4)),\n",
130 |     "                'clf4__gamma': np.power(10., np.arange(-5, 0))},\n",
131 |     "               {'clf4__kernel': ['linear'],\n",
132 |     "                'clf4__C': np.power(10., np.arange(-4, 4))}]\n",
133 |     "\n",
134 |     "param_grid5 = [{'n_estimators': [10, 100, 500, 1000, 10000]}]"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": 3,
140 |    "metadata": {},
141 |    "outputs": [],
142 |    "source": [
143 |     "# Setting up multiple GridSearchCV objects, 1 for each algorithm\n",
144 |     "gridcvs = {}\n",
145 |     "inner_cv = StratifiedKFold(n_splits=2, shuffle=True, random_state=1)\n",
146 |     "\n",
147 |     "for pgrid, est, name in zip((param_grid1, param_grid2,\n",
148 |     "                             param_grid3, param_grid4, param_grid5),\n",
149 |     "                            (pipe1, pipe2, clf3, pipe4, clf5),\n",
150 |     "                            ('Softmax', 'KNN', 'DTree', 'SVM', 'RForest')):\n",
151 |     "    gcv = GridSearchCV(estimator=est,\n",
152 |     "                       param_grid=pgrid,\n",
153 |     "                       scoring='accuracy',\n",
154 |     "                       n_jobs=-1,\n",
155 |     "                       cv=inner_cv,\n",
156 |     "                       verbose=0,\n",
157 |     "                       refit=True)\n",
158 |     "    gridcvs[name] = gcv"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": 4,
164 |    "metadata": {},
165 |    "outputs": [
166 |     {
167 |      "name": "stdout",
168 |      "output_type": "stream",
169 |      "text": [
170 |       "DTree | outer ACC 76.75% +/- 1.32\n",
171 |       "KNN | outer ACC 91.10% +/- 0.58\n",
172 |       "RForest | outer ACC 93.98% +/- 0.98\n",
173 |       "SVM | outer ACC 91.80% +/- 1.00\n",
174 |       "Softmax | outer ACC 89.97% +/- 0.57\n"
175 |      ]
176 |     }
177 |    ],
178 |    "source": [
179 |     "outer_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)\n",
180 |     "\n",
181 |     "\n",
182 |     "for name, gs_est in sorted(gridcvs.items()):\n",
183 |     "    nested_score = cross_val_score(gs_est, \n",
184 |     "                                   X=X_train, \n",
185 |     "                                   y=y_train, \n",
186 |     "                                   cv=outer_cv,\n",
187 |     "                                   n_jobs=-1)\n",
188 |     "    print('%s | outer ACC %.2f%% +/- %.2f' % \n",
189 |     "          (name, nested_score.mean() * 100, nested_score.std() * 100))"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "markdown",
194 |    "metadata": {},
195 |    "source": [
196 |     "------"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "markdown",
201 |    "metadata": {},
202 |    "source": [
203 |     "- Determine the best algorithm from the experiment above; e.g., we find that Random Forest is performing best\n",
204 |     "- Now, select a hyperparameters for the model based on regular k-fold on the whole training set"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": 5,
210 |    "metadata": {},
211 |    "outputs": [
212 |     {
213 |      "name": "stdout",
214 |      "output_type": "stream",
215 |      "text": [
216 |       "Fitting 2 folds for each of 5 candidates, totalling 10 fits\n"
217 |      ]
218 |     },
219 |     {
220 |      "name": "stderr",
221 |      "output_type": "stream",
222 |      "text": [
223 |       "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.\n",
224 |       "[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  1.5min finished\n"
225 |      ]
226 |     },
227 |     {
228 |      "data": {
229 |       "text/plain": [
230 |        "GridSearchCV(cv=StratifiedKFold(n_splits=2, random_state=1, shuffle=True),\n",
231 |        "             estimator=RandomForestClassifier(random_state=1), n_jobs=-1,\n",
232 |        "             param_grid=[{'n_estimators': [10, 100, 500, 1000, 10000]}],\n",
233 |        "             scoring='accuracy', verbose=1)"
234 |       ]
235 |      },
236 |      "execution_count": 5,
237 |      "metadata": {},
238 |      "output_type": "execute_result"
239 |     }
240 |    ],
241 |    "source": [
242 |     "gcv_model_select = GridSearchCV(estimator=clf5,\n",
243 |     "                                param_grid=param_grid5,\n",
244 |     "                                scoring='accuracy',\n",
245 |     "                                n_jobs=-1,\n",
246 |     "                                cv=inner_cv,\n",
247 |     "                                verbose=1,\n",
248 |     "                                refit=True)\n",
249 |     "\n",
250 |     "gcv_model_select.fit(X_train, y_train)"
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "code",
255 |    "execution_count": 6,
256 |    "metadata": {},
257 |    "outputs": [
258 |     {
259 |      "name": "stdout",
260 |      "output_type": "stream",
261 |      "text": [
262 |       "Accuracy 93.30% (average over k-fold CV test folds)\n",
263 |       "Best Parameters: {'n_estimators': 10000}\n",
264 |       "Training Accuracy: 100.00%\n",
265 |       "Test Accuracy: 94.00%\n"
266 |      ]
267 |     }
268 |    ],
269 |    "source": [
270 |     "best_model = gcv_model_select.best_estimator_\n",
271 |     "\n",
272 |     "\n",
273 |     "## We can skip the next step because we set refit=True\n",
274 |     "## so scikit-learn has already fit the model to the\n",
275 |     "## whole training set\n",
276 |     "\n",
277 |     "# best_model.fit(X_train, y_train)\n",
278 |     "\n",
279 |     "\n",
280 |     "train_acc = accuracy_score(y_true=y_train, y_pred=best_model.predict(X_train))\n",
281 |     "test_acc = accuracy_score(y_true=y_test, y_pred=best_model.predict(X_test))\n",
282 |     "\n",
283 |     "print('Accuracy %.2f%% (average over k-fold CV test folds)' %\n",
284 |     "      (100 * gcv_model_select.best_score_))\n",
285 |     "print('Best Parameters: %s' % gcv_model_select.best_params_)\n",
286 |     "\n",
287 |     "print('Training Accuracy: %.2f%%' % (100 * train_acc))\n",
288 |     "print('Test Accuracy: %.2f%%' % (100 * test_acc))"
289 |    ]
290 |   }
291 |  ],
292 |  "metadata": {
293 |   "anaconda-cloud": {},
294 |   "kernelspec": {
295 |    "display_name": "Python 3",
296 |    "language": "python",
297 |    "name": "python3"
298 |   },
299 |   "language_info": {
300 |    "codemirror_mode": {
301 |     "name": "ipython",
302 |     "version": 3
303 |    },
304 |    "file_extension": ".py",
305 |    "mimetype": "text/x-python",
306 |    "name": "python",
307 |    "nbconvert_exporter": "python",
308 |    "pygments_lexer": "ipython3",
309 |    "version": "3.8.2"
310 |   }
311 |  },
312 |  "nbformat": 4,
313 |  "nbformat_minor": 4
314 | }
315 | 


--------------------------------------------------------------------------------
/L11/code/11-eval4-algo__nested-cv_verbose1.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "STAT 451: Machine Learning (Fall 2020)  \n",
  8 |     "Instructor: Sebastian Raschka (sraschka@wisc.edu)  \n",
  9 |     "\n",
 10 |     "Course website: http://pages.stat.wisc.edu/~sraschka/teaching/stat451-fs2020/"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "# L11: Model Evaluation 4 -- Algorithm Comparison (Nested Cross-Validation)\n",
 18 |     "\n",
 19 |     "\n",
 20 |     "## verbose version 1 (using `StratifiedKFold` directly)"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {},
 26 |    "source": [
 27 |     "This notebook illustrates how to implement nested cross-validation in scikit-learn.\n",
 28 |     "\n",
 29 |     "<img src=\"nested-cv-image.png\" width=400>\n"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 1,
 35 |    "metadata": {},
 36 |    "outputs": [
 37 |     {
 38 |      "name": "stdout",
 39 |      "output_type": "stream",
 40 |      "text": [
 41 |       "Sebastian Raschka 2020-11-24 \n",
 42 |       "\n",
 43 |       "CPython 3.8.2\n",
 44 |       "IPython 7.18.1\n",
 45 |       "\n",
 46 |       "sklearn 0.23.2\n",
 47 |       "mlxtend 0.18.0.dev0\n"
 48 |      ]
 49 |     }
 50 |    ],
 51 |    "source": [
 52 |     "%load_ext watermark\n",
 53 |     "%watermark -a 'Sebastian Raschka' -d -p sklearn,mlxtend -v"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "metadata": {},
 59 |    "source": [
 60 |     "- Setting up classifiers (or pipelines) and the parameter grids for model tuning\n",
 61 |     "- Remember, the hyperparameter tuning takes place in the inner loop"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": 2,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "import numpy as np\n",
 71 |     "from sklearn.model_selection import GridSearchCV\n",
 72 |     "from sklearn.model_selection import train_test_split\n",
 73 |     "from sklearn.model_selection import StratifiedKFold\n",
 74 |     "from sklearn.model_selection import cross_val_score\n",
 75 |     "from sklearn.pipeline import Pipeline\n",
 76 |     "from sklearn.preprocessing import StandardScaler\n",
 77 |     "from sklearn.linear_model import LogisticRegression\n",
 78 |     "from sklearn.neighbors import KNeighborsClassifier\n",
 79 |     "from sklearn.tree import DecisionTreeClassifier\n",
 80 |     "from sklearn.ensemble import RandomForestClassifier\n",
 81 |     "from sklearn.svm import SVC\n",
 82 |     "from mlxtend.data import mnist_data\n",
 83 |     "from sklearn.metrics import accuracy_score\n",
 84 |     "\n",
 85 |     "# Loading and splitting the dataset\n",
 86 |     "# Note that this is a small (stratified) subset\n",
 87 |     "# of MNIST; it consists of 5000 samples only, that is,\n",
 88 |     "# 10% of the original MNIST dataset\n",
 89 |     "# http://yann.lecun.com/exdb/mnist/\n",
 90 |     "X, y = mnist_data()\n",
 91 |     "X = X.astype(np.float32)\n",
 92 |     "X_train, X_test, y_train, y_test = train_test_split(X, y,\n",
 93 |     "                                                    test_size=0.2,\n",
 94 |     "                                                    random_state=1,\n",
 95 |     "                                                    stratify=y)\n",
 96 |     "\n",
 97 |     "# Initializing Classifiers\n",
 98 |     "clf1 = LogisticRegression(multi_class='multinomial',\n",
 99 |     "                          solver='newton-cg',\n",
100 |     "                          random_state=1)\n",
101 |     "clf2 = KNeighborsClassifier(algorithm='ball_tree',\n",
102 |     "                            leaf_size=50)\n",
103 |     "clf3 = DecisionTreeClassifier(random_state=1)\n",
104 |     "clf4 = SVC(random_state=1)\n",
105 |     "clf5 = RandomForestClassifier(random_state=1)\n",
106 |     "\n",
107 |     "# Building the pipelines\n",
108 |     "pipe1 = Pipeline([('std', StandardScaler()),\n",
109 |     "                  ('clf1', clf1)])\n",
110 |     "\n",
111 |     "pipe2 = Pipeline([('std', StandardScaler()),\n",
112 |     "                  ('clf2', clf2)])\n",
113 |     "\n",
114 |     "pipe4 = Pipeline([('std', StandardScaler()),\n",
115 |     "                  ('clf4', clf4)])\n",
116 |     "\n",
117 |     "\n",
118 |     "# Setting up the parameter grids\n",
119 |     "param_grid1 = [{'clf1__penalty': ['l2'],\n",
120 |     "                'clf1__C': np.power(10., np.arange(-4, 4))}]\n",
121 |     "\n",
122 |     "param_grid2 = [{'clf2__n_neighbors': list(range(1, 10)),\n",
123 |     "                'clf2__p': [1, 2]}]\n",
124 |     "\n",
125 |     "param_grid3 = [{'max_depth': list(range(1, 10)) + [None],\n",
126 |     "                'criterion': ['gini', 'entropy']}]\n",
127 |     "\n",
128 |     "param_grid4 = [{'clf4__kernel': ['rbf'],\n",
129 |     "                'clf4__C': np.power(10., np.arange(-4, 4)),\n",
130 |     "                'clf4__gamma': np.power(10., np.arange(-5, 0))},\n",
131 |     "               {'clf4__kernel': ['linear'],\n",
132 |     "                'clf4__C': np.power(10., np.arange(-4, 4))}]\n",
133 |     "\n",
134 |     "param_grid5 = [{'n_estimators': [10, 100, 500, 1000, 10000]}]"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": 3,
140 |    "metadata": {},
141 |    "outputs": [],
142 |    "source": [
143 |     "# Setting up multiple GridSearchCV objects, 1 for each algorithm\n",
144 |     "gridcvs = {}\n",
145 |     "inner_cv = StratifiedKFold(n_splits=2, shuffle=True, random_state=1)\n",
146 |     "\n",
147 |     "for pgrid, est, name in zip((param_grid1, param_grid2, param_grid3, param_grid4, param_grid5),\n",
148 |     "                            (pipe1, pipe2, clf3, pipe4, clf5),\n",
149 |     "                            ('Softmax', 'KNN', 'DTree', 'SVM', 'RForest')):\n",
150 |     "    gcv = GridSearchCV(estimator=est,\n",
151 |     "                       param_grid=pgrid,\n",
152 |     "                       scoring='accuracy',\n",
153 |     "                       n_jobs=-1,\n",
154 |     "                       cv=inner_cv,\n",
155 |     "                       verbose=0,\n",
156 |     "                       refit=True)\n",
157 |     "    gridcvs[name] = gcv"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "markdown",
162 |    "metadata": {},
163 |    "source": [
164 |     "- Next, we define the outer loop\n",
165 |     "- The training folds from the outer loop will be used in the inner loop for model tuning\n",
166 |     "- The inner loop selects the best hyperparameter setting\n",
167 |     "- This best hyperparameter setting can be evaluated on both the avg. over the inner test folds and the 1 corresponding test fold of the outer loop"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": 4,
173 |    "metadata": {},
174 |    "outputs": [
175 |     {
176 |      "name": "stdout",
177 |      "output_type": "stream",
178 |      "text": [
179 |       "-------------------------------------------------- \n",
180 |       "\n",
181 |       "Algorithm: DTree\n",
182 |       "    Inner loop:\n",
183 |       "\n",
184 |       "        Best ACC (avg. of inner test folds) 72.59%\n",
185 |       "        Best parameters: {'criterion': 'gini', 'max_depth': None}\n",
186 |       "        ACC (on outer test fold) 75.50%\n",
187 |       "\n",
188 |       "        Best ACC (avg. of inner test folds) 74.03%\n",
189 |       "        Best parameters: {'criterion': 'entropy', 'max_depth': 7}\n",
190 |       "        ACC (on outer test fold) 78.25%\n",
191 |       "\n",
192 |       "        Best ACC (avg. of inner test folds) 73.88%\n",
193 |       "        Best parameters: {'criterion': 'entropy', 'max_depth': 9}\n",
194 |       "        ACC (on outer test fold) 77.38%\n",
195 |       "\n",
196 |       "        Best ACC (avg. of inner test folds) 73.38%\n",
197 |       "        Best parameters: {'criterion': 'entropy', 'max_depth': 8}\n",
198 |       "        ACC (on outer test fold) 74.88%\n",
199 |       "\n",
200 |       "        Best ACC (avg. of inner test folds) 73.91%\n",
201 |       "        Best parameters: {'criterion': 'entropy', 'max_depth': 8}\n",
202 |       "        ACC (on outer test fold) 77.75%\n",
203 |       "\n",
204 |       "    Outer Loop:\n",
205 |       "        ACC 76.75% +/- 1.32\n",
206 |       "-------------------------------------------------- \n",
207 |       "\n",
208 |       "Algorithm: KNN\n",
209 |       "    Inner loop:\n",
210 |       "\n",
211 |       "        Best ACC (avg. of inner test folds) 88.38%\n",
212 |       "        Best parameters: {'clf2__n_neighbors': 1, 'clf2__p': 1}\n",
213 |       "        ACC (on outer test fold) 91.62%\n",
214 |       "\n",
215 |       "        Best ACC (avg. of inner test folds) 88.75%\n",
216 |       "        Best parameters: {'clf2__n_neighbors': 1, 'clf2__p': 1}\n",
217 |       "        ACC (on outer test fold) 91.88%\n",
218 |       "\n",
219 |       "        Best ACC (avg. of inner test folds) 89.84%\n",
220 |       "        Best parameters: {'clf2__n_neighbors': 1, 'clf2__p': 1}\n",
221 |       "        ACC (on outer test fold) 90.88%\n",
222 |       "\n",
223 |       "        Best ACC (avg. of inner test folds) 89.50%\n",
224 |       "        Best parameters: {'clf2__n_neighbors': 1, 'clf2__p': 1}\n",
225 |       "        ACC (on outer test fold) 90.88%\n",
226 |       "\n",
227 |       "        Best ACC (avg. of inner test folds) 89.06%\n",
228 |       "        Best parameters: {'clf2__n_neighbors': 1, 'clf2__p': 1}\n",
229 |       "        ACC (on outer test fold) 90.25%\n",
230 |       "\n",
231 |       "    Outer Loop:\n",
232 |       "        ACC 91.10% +/- 0.58\n",
233 |       "-------------------------------------------------- \n",
234 |       "\n",
235 |       "Algorithm: RForest\n",
236 |       "    Inner loop:\n",
237 |       "\n",
238 |       "        Best ACC (avg. of inner test folds) 92.59%\n",
239 |       "        Best parameters: {'n_estimators': 1000}\n",
240 |       "        ACC (on outer test fold) 95.00%\n",
241 |       "\n",
242 |       "        Best ACC (avg. of inner test folds) 92.59%\n",
243 |       "        Best parameters: {'n_estimators': 10000}\n",
244 |       "        ACC (on outer test fold) 94.75%\n",
245 |       "\n",
246 |       "        Best ACC (avg. of inner test folds) 92.94%\n",
247 |       "        Best parameters: {'n_estimators': 10000}\n",
248 |       "        ACC (on outer test fold) 94.50%\n",
249 |       "\n",
250 |       "        Best ACC (avg. of inner test folds) 93.00%\n",
251 |       "        Best parameters: {'n_estimators': 10000}\n",
252 |       "        ACC (on outer test fold) 92.50%\n",
253 |       "\n",
254 |       "        Best ACC (avg. of inner test folds) 92.75%\n",
255 |       "        Best parameters: {'n_estimators': 500}\n",
256 |       "        ACC (on outer test fold) 93.12%\n",
257 |       "\n",
258 |       "    Outer Loop:\n",
259 |       "        ACC 93.98% +/- 0.98\n",
260 |       "-------------------------------------------------- \n",
261 |       "\n",
262 |       "Algorithm: SVM\n",
263 |       "    Inner loop:\n",
264 |       "\n",
265 |       "        Best ACC (avg. of inner test folds) 90.75%\n",
266 |       "        Best parameters: {'clf4__C': 10.0, 'clf4__gamma': 0.001, 'clf4__kernel': 'rbf'}\n",
267 |       "        ACC (on outer test fold) 92.12%\n",
268 |       "\n",
269 |       "        Best ACC (avg. of inner test folds) 90.22%\n",
270 |       "        Best parameters: {'clf4__C': 0.01, 'clf4__kernel': 'linear'}\n",
271 |       "        ACC (on outer test fold) 92.88%\n",
272 |       "\n",
273 |       "        Best ACC (avg. of inner test folds) 90.91%\n",
274 |       "        Best parameters: {'clf4__C': 0.01, 'clf4__kernel': 'linear'}\n",
275 |       "        ACC (on outer test fold) 90.50%\n",
276 |       "\n",
277 |       "        Best ACC (avg. of inner test folds) 90.53%\n",
278 |       "        Best parameters: {'clf4__C': 10.0, 'clf4__gamma': 0.001, 'clf4__kernel': 'rbf'}\n",
279 |       "        ACC (on outer test fold) 92.75%\n",
280 |       "\n",
281 |       "        Best ACC (avg. of inner test folds) 90.12%\n",
282 |       "        Best parameters: {'clf4__C': 0.001, 'clf4__kernel': 'linear'}\n",
283 |       "        ACC (on outer test fold) 90.75%\n",
284 |       "\n",
285 |       "    Outer Loop:\n",
286 |       "        ACC 91.80% +/- 1.00\n",
287 |       "-------------------------------------------------- \n",
288 |       "\n",
289 |       "Algorithm: Softmax\n",
290 |       "    Inner loop:\n"
291 |      ]
292 |     },
293 |     {
294 |      "name": "stderr",
295 |      "output_type": "stream",
296 |      "text": [
297 |       "/Users/sebastian/miniconda3/lib/python3.8/site-packages/scipy/optimize/linesearch.py:327: LineSearchWarning: The line search algorithm did not converge\n",
298 |       "  warn('The line search algorithm did not converge', LineSearchWarning)\n",
299 |       "/Users/sebastian/miniconda3/lib/python3.8/site-packages/sklearn/utils/optimize.py:204: UserWarning: Line Search failed\n",
300 |       "  warnings.warn('Line Search failed')\n"
301 |      ]
302 |     },
303 |     {
304 |      "name": "stdout",
305 |      "output_type": "stream",
306 |      "text": [
307 |       "\n",
308 |       "        Best ACC (avg. of inner test folds) 88.91%\n",
309 |       "        Best parameters: {'clf1__C': 0.01, 'clf1__penalty': 'l2'}\n",
310 |       "        ACC (on outer test fold) 90.00%\n"
311 |      ]
312 |     },
313 |     {
314 |      "name": "stderr",
315 |      "output_type": "stream",
316 |      "text": [
317 |       "/Users/sebastian/miniconda3/lib/python3.8/site-packages/scipy/optimize/linesearch.py:327: LineSearchWarning: The line search algorithm did not converge\n",
318 |       "  warn('The line search algorithm did not converge', LineSearchWarning)\n",
319 |       "/Users/sebastian/miniconda3/lib/python3.8/site-packages/sklearn/utils/optimize.py:204: UserWarning: Line Search failed\n",
320 |       "  warnings.warn('Line Search failed')\n"
321 |      ]
322 |     },
323 |     {
324 |      "name": "stdout",
325 |      "output_type": "stream",
326 |      "text": [
327 |       "\n",
328 |       "        Best ACC (avg. of inner test folds) 88.75%\n",
329 |       "        Best parameters: {'clf1__C': 0.01, 'clf1__penalty': 'l2'}\n",
330 |       "        ACC (on outer test fold) 91.00%\n"
331 |      ]
332 |     },
333 |     {
334 |      "name": "stderr",
335 |      "output_type": "stream",
336 |      "text": [
337 |       "/Users/sebastian/miniconda3/lib/python3.8/site-packages/scipy/optimize/linesearch.py:327: LineSearchWarning: The line search algorithm did not converge\n",
338 |       "  warn('The line search algorithm did not converge', LineSearchWarning)\n",
339 |       "/Users/sebastian/miniconda3/lib/python3.8/site-packages/sklearn/utils/optimize.py:204: UserWarning: Line Search failed\n",
340 |       "  warnings.warn('Line Search failed')\n"
341 |      ]
342 |     },
343 |     {
344 |      "name": "stdout",
345 |      "output_type": "stream",
346 |      "text": [
347 |       "\n",
348 |       "        Best ACC (avg. of inner test folds) 89.31%\n",
349 |       "        Best parameters: {'clf1__C': 0.01, 'clf1__penalty': 'l2'}\n",
350 |       "        ACC (on outer test fold) 90.00%\n"
351 |      ]
352 |     },
353 |     {
354 |      "name": "stderr",
355 |      "output_type": "stream",
356 |      "text": [
357 |       "/Users/sebastian/miniconda3/lib/python3.8/site-packages/scipy/optimize/linesearch.py:327: LineSearchWarning: The line search algorithm did not converge\n",
358 |       "  warn('The line search algorithm did not converge', LineSearchWarning)\n",
359 |       "/Users/sebastian/miniconda3/lib/python3.8/site-packages/sklearn/utils/optimize.py:204: UserWarning: Line Search failed\n",
360 |       "  warnings.warn('Line Search failed')\n"
361 |      ]
362 |     },
363 |     {
364 |      "name": "stdout",
365 |      "output_type": "stream",
366 |      "text": [
367 |       "\n",
368 |       "        Best ACC (avg. of inner test folds) 88.59%\n",
369 |       "        Best parameters: {'clf1__C': 0.1, 'clf1__penalty': 'l2'}\n",
370 |       "        ACC (on outer test fold) 89.38%\n",
371 |       "\n",
372 |       "        Best ACC (avg. of inner test folds) 88.66%\n",
373 |       "        Best parameters: {'clf1__C': 0.01, 'clf1__penalty': 'l2'}\n",
374 |       "        ACC (on outer test fold) 89.50%\n",
375 |       "\n",
376 |       "    Outer Loop:\n",
377 |       "        ACC 89.97% +/- 0.57\n"
378 |      ]
379 |     },
380 |     {
381 |      "name": "stderr",
382 |      "output_type": "stream",
383 |      "text": [
384 |       "/Users/sebastian/miniconda3/lib/python3.8/site-packages/scipy/optimize/linesearch.py:327: LineSearchWarning: The line search algorithm did not converge\n",
385 |       "  warn('The line search algorithm did not converge', LineSearchWarning)\n",
386 |       "/Users/sebastian/miniconda3/lib/python3.8/site-packages/sklearn/utils/optimize.py:204: UserWarning: Line Search failed\n",
387 |       "  warnings.warn('Line Search failed')\n"
388 |      ]
389 |     }
390 |    ],
391 |    "source": [
392 |     "for name, gs_est in sorted(gridcvs.items()):\n",
393 |     "\n",
394 |     "    print(50 * '-', '\\n')\n",
395 |     "    print('Algorithm:', name)\n",
396 |     "    print('    Inner loop:')\n",
397 |     "    \n",
398 |     "    outer_scores = []\n",
399 |     "    outer_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)\n",
400 |     "    \n",
401 |     "    \n",
402 |     "    for train_idx, valid_idx in outer_cv.split(X_train, y_train):\n",
403 |     "        \n",
404 |     "        gridcvs[name].fit(X_train[train_idx], y_train[train_idx]) # run inner loop hyperparam tuning\n",
405 |     "        print('\\n        Best ACC (avg. of inner test folds) %.2f%%' % (gridcvs[name].best_score_ * 100))\n",
406 |     "        print('        Best parameters:', gridcvs[name].best_params_)\n",
407 |     "        \n",
408 |     "        # perf on test fold (valid_idx)\n",
409 |     "        outer_scores.append(gridcvs[name].best_estimator_.score(X_train[valid_idx], y_train[valid_idx]))\n",
410 |     "        print('        ACC (on outer test fold) %.2f%%' % (outer_scores[-1]*100))\n",
411 |     "    \n",
412 |     "    print('\\n    Outer Loop:')\n",
413 |     "    print('        ACC %.2f%% +/- %.2f' % \n",
414 |     "              (np.mean(outer_scores) * 100, np.std(outer_scores) * 100))"
415 |    ]
416 |   },
417 |   {
418 |    "cell_type": "markdown",
419 |    "metadata": {},
420 |    "source": [
421 |     "------"
422 |    ]
423 |   },
424 |   {
425 |    "cell_type": "markdown",
426 |    "metadata": {},
427 |    "source": [
428 |     "- Determine the best algorithm from the experiment above; e.g., we find that Random Forest is performing best\n",
429 |     "- Now, select a hyperparameters for the model based on regular k-fold on the whole training set"
430 |    ]
431 |   },
432 |   {
433 |    "cell_type": "code",
434 |    "execution_count": 5,
435 |    "metadata": {},
436 |    "outputs": [
437 |     {
438 |      "name": "stderr",
439 |      "output_type": "stream",
440 |      "text": [
441 |       "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.\n"
442 |      ]
443 |     },
444 |     {
445 |      "name": "stdout",
446 |      "output_type": "stream",
447 |      "text": [
448 |       "Fitting 2 folds for each of 5 candidates, totalling 10 fits\n"
449 |      ]
450 |     },
451 |     {
452 |      "name": "stderr",
453 |      "output_type": "stream",
454 |      "text": [
455 |       "[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  1.3min finished\n"
456 |      ]
457 |     },
458 |     {
459 |      "name": "stdout",
460 |      "output_type": "stream",
461 |      "text": [
462 |       "Best CV accuracy: 93.30%\n",
463 |       "Best parameters: {'n_estimators': 10000}\n"
464 |      ]
465 |     }
466 |    ],
467 |    "source": [
468 |     "gcv_model_select = GridSearchCV(estimator=clf5,\n",
469 |     "                                param_grid=param_grid5,\n",
470 |     "                                scoring='accuracy',\n",
471 |     "                                n_jobs=-1,\n",
472 |     "                                cv=inner_cv,\n",
473 |     "                                verbose=1,\n",
474 |     "                                refit=True)\n",
475 |     "\n",
476 |     "gcv_model_select.fit(X_train, y_train)\n",
477 |     "print('Best CV accuracy: %.2f%%' % (gcv_model_select.best_score_*100))\n",
478 |     "print('Best parameters:', gcv_model_select.best_params_)"
479 |    ]
480 |   },
481 |   {
482 |    "cell_type": "markdown",
483 |    "metadata": {},
484 |    "source": [
485 |     "- Using these settings, we can now train the best model to the whole training set"
486 |    ]
487 |   },
488 |   {
489 |    "cell_type": "code",
490 |    "execution_count": 6,
491 |    "metadata": {},
492 |    "outputs": [
493 |     {
494 |      "name": "stdout",
495 |      "output_type": "stream",
496 |      "text": [
497 |       "Training Accuracy: 100.00%\n",
498 |       "Test Accuracy: 94.00%\n"
499 |      ]
500 |     }
501 |    ],
502 |    "source": [
503 |     "## We can skip the next step because we set refit=True\n",
504 |     "## so scikit-learn has already fit the model to the\n",
505 |     "## whole training set\n",
506 |     "\n",
507 |     "# gcv_model_select.fit(X_train, y_train)\n",
508 |     "\n",
509 |     "train_acc = accuracy_score(y_true=y_train, y_pred=gcv_model_select.predict(X_train))\n",
510 |     "test_acc = accuracy_score(y_true=y_test, y_pred=gcv_model_select.predict(X_test))\n",
511 |     "\n",
512 |     "print('Training Accuracy: %.2f%%' % (100 * train_acc))\n",
513 |     "print('Test Accuracy: %.2f%%' % (100 * test_acc))"
514 |    ]
515 |   },
516 |   {
517 |    "cell_type": "markdown",
518 |    "metadata": {},
519 |    "source": [
520 |     "For comparison, previously, we have seen that using this algorithm, that the avg. outer fold accuracy was \n",
521 |     "\n",
522 |     "    ACC 93.98% +/- 0.98"
523 |    ]
524 |   }
525 |  ],
526 |  "metadata": {
527 |   "anaconda-cloud": {},
528 |   "kernelspec": {
529 |    "display_name": "Python 3",
530 |    "language": "python",
531 |    "name": "python3"
532 |   },
533 |   "language_info": {
534 |    "codemirror_mode": {
535 |     "name": "ipython",
536 |     "version": 3
537 |    },
538 |    "file_extension": ".py",
539 |    "mimetype": "text/x-python",
540 |    "name": "python",
541 |    "nbconvert_exporter": "python",
542 |    "pygments_lexer": "ipython3",
543 |    "version": "3.8.2"
544 |   }
545 |  },
546 |  "nbformat": 4,
547 |  "nbformat_minor": 4
548 | }
549 | 


--------------------------------------------------------------------------------
/L11/code/11-eval4-algo__nested-cv_verbose2.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "STAT 451: Machine Learning (Fall 2020)  \n",
  8 |     "Instructor: Sebastian Raschka (sraschka@wisc.edu)  \n",
  9 |     "\n",
 10 |     "Course website: http://pages.stat.wisc.edu/~sraschka/teaching/stat451-fs2020/"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "# L11: Model Evaluation 4 -- Algorithm Comparison (Nested Cross-Validation)"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "markdown",
 22 |    "metadata": {},
 23 |    "source": [
 24 |     "\n",
 25 |     "\n",
 26 |     "## -- verbose version 2 (using `cross_validate`)"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "markdown",
 31 |    "metadata": {},
 32 |    "source": [
 33 |     "This notebook illustrates how to implement nested cross-validation in scikit-learn. This notebook is a more compact version of the other notebook [./11-eval4-algo__nested-cv_verbose1.ipynb](./11-eval4-algo__nested-cv_verbose1.ipynb). Here, instead of using `StratifiedKFold` directly and iterate over the splits, we use the `cross_validate` function.\n",
 34 |     "\n",
 35 |     "<img src=\"nested-cv-image.png\" width=400>"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 1,
 41 |    "metadata": {},
 42 |    "outputs": [
 43 |     {
 44 |      "name": "stdout",
 45 |      "output_type": "stream",
 46 |      "text": [
 47 |       "Sebastian Raschka 2020-11-24 \n",
 48 |       "\n",
 49 |       "CPython 3.8.2\n",
 50 |       "IPython 7.18.1\n",
 51 |       "\n",
 52 |       "sklearn 0.23.2\n",
 53 |       "mlxtend 0.18.0.dev0\n"
 54 |      ]
 55 |     }
 56 |    ],
 57 |    "source": [
 58 |     "%load_ext watermark\n",
 59 |     "%watermark -a 'Sebastian Raschka' -d -p sklearn,mlxtend -v"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 2,
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "import numpy as np\n",
 69 |     "from sklearn.model_selection import GridSearchCV\n",
 70 |     "from sklearn.model_selection import train_test_split\n",
 71 |     "from sklearn.model_selection import StratifiedKFold\n",
 72 |     "from sklearn.model_selection import cross_validate\n",
 73 |     "from sklearn.pipeline import Pipeline\n",
 74 |     "from sklearn.preprocessing import StandardScaler\n",
 75 |     "from sklearn.linear_model import LogisticRegression\n",
 76 |     "from sklearn.neighbors import KNeighborsClassifier\n",
 77 |     "from sklearn.tree import DecisionTreeClassifier\n",
 78 |     "from sklearn.ensemble import RandomForestClassifier\n",
 79 |     "from sklearn.svm import SVC\n",
 80 |     "from mlxtend.data import mnist_data\n",
 81 |     "from sklearn.metrics import accuracy_score\n",
 82 |     "\n",
 83 |     "# Loading and splitting the dataset\n",
 84 |     "# Note that this is a small (stratified) subset\n",
 85 |     "# of MNIST; it consists of 5000 samples only, that is,\n",
 86 |     "# 10% of the original MNIST dataset\n",
 87 |     "# http://yann.lecun.com/exdb/mnist/\n",
 88 |     "X, y = mnist_data()\n",
 89 |     "X = X.astype(np.float32)\n",
 90 |     "X_train, X_test, y_train, y_test = train_test_split(X, y,\n",
 91 |     "                                                    test_size=0.2,\n",
 92 |     "                                                    random_state=1,\n",
 93 |     "                                                    stratify=y)\n",
 94 |     "\n",
 95 |     "# Initializing Classifiers\n",
 96 |     "clf1 = LogisticRegression(multi_class='multinomial',\n",
 97 |     "                          solver='newton-cg',\n",
 98 |     "                          random_state=1)\n",
 99 |     "clf2 = KNeighborsClassifier(algorithm='ball_tree',\n",
100 |     "                            leaf_size=50)\n",
101 |     "clf3 = DecisionTreeClassifier(random_state=1)\n",
102 |     "clf4 = SVC(random_state=1)\n",
103 |     "clf5 = RandomForestClassifier(random_state=1)\n",
104 |     "\n",
105 |     "# Building the pipelines\n",
106 |     "pipe1 = Pipeline([('std', StandardScaler()),\n",
107 |     "                  ('clf1', clf1)])\n",
108 |     "\n",
109 |     "pipe2 = Pipeline([('std', StandardScaler()),\n",
110 |     "                  ('clf2', clf2)])\n",
111 |     "\n",
112 |     "pipe4 = Pipeline([('std', StandardScaler()),\n",
113 |     "                  ('clf4', clf4)])\n",
114 |     "\n",
115 |     "\n",
116 |     "# Setting up the parameter grids\n",
117 |     "param_grid1 = [{'clf1__penalty': ['l2'],\n",
118 |     "                'clf1__C': np.power(10., np.arange(-4, 4))}]\n",
119 |     "\n",
120 |     "param_grid2 = [{'clf2__n_neighbors': list(range(1, 10)),\n",
121 |     "                'clf2__p': [1, 2]}]\n",
122 |     "\n",
123 |     "param_grid3 = [{'max_depth': list(range(1, 10)) + [None],\n",
124 |     "                'criterion': ['gini', 'entropy']}]\n",
125 |     "\n",
126 |     "param_grid4 = [{'clf4__kernel': ['rbf'],\n",
127 |     "                'clf4__C': np.power(10., np.arange(-4, 4)),\n",
128 |     "                'clf4__gamma': np.power(10., np.arange(-5, 0))},\n",
129 |     "               {'clf4__kernel': ['linear'],\n",
130 |     "                'clf4__C': np.power(10., np.arange(-4, 4))}]\n",
131 |     "\n",
132 |     "param_grid5 = [{'n_estimators': [10, 100, 500, 1000, 10000]}]"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": 3,
138 |    "metadata": {},
139 |    "outputs": [],
140 |    "source": [
141 |     "# Setting up multiple GridSearchCV objects, 1 for each algorithm\n",
142 |     "gridcvs = {}\n",
143 |     "inner_cv = StratifiedKFold(n_splits=2, shuffle=True, random_state=1)\n",
144 |     "\n",
145 |     "for pgrid, est, name in zip((param_grid1, param_grid2,\n",
146 |     "                             param_grid3, param_grid4, param_grid5),\n",
147 |     "                            (pipe1, pipe2, clf3, pipe4, clf5),\n",
148 |     "                            ('Softmax', 'KNN', 'DTree', 'SVM', 'RForest')):\n",
149 |     "    gcv = GridSearchCV(estimator=est,\n",
150 |     "                       param_grid=pgrid,\n",
151 |     "                       scoring='accuracy',\n",
152 |     "                       n_jobs=-1,\n",
153 |     "                       cv=inner_cv,\n",
154 |     "                       verbose=0,\n",
155 |     "                       refit=True)\n",
156 |     "    gridcvs[name] = gcv"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": 4,
162 |    "metadata": {},
163 |    "outputs": [
164 |     {
165 |      "name": "stdout",
166 |      "output_type": "stream",
167 |      "text": [
168 |       "-------------------------------------------------- \n",
169 |       "\n",
170 |       "Algorithm: DTree\n",
171 |       "    Inner loop:\n",
172 |       "\n",
173 |       "        Best ACC (avg. of inner test folds) 72.59%\n",
174 |       "        Best parameters: DecisionTreeClassifier(random_state=1)\n",
175 |       "        ACC (on outer test fold) 75.50%\n",
176 |       "\n",
177 |       "        Best ACC (avg. of inner test folds) 74.03%\n",
178 |       "        Best parameters: DecisionTreeClassifier(criterion='entropy', max_depth=7, random_state=1)\n",
179 |       "        ACC (on outer test fold) 78.25%\n",
180 |       "\n",
181 |       "        Best ACC (avg. of inner test folds) 73.88%\n",
182 |       "        Best parameters: DecisionTreeClassifier(criterion='entropy', max_depth=9, random_state=1)\n",
183 |       "        ACC (on outer test fold) 77.38%\n",
184 |       "\n",
185 |       "        Best ACC (avg. of inner test folds) 73.38%\n",
186 |       "        Best parameters: DecisionTreeClassifier(criterion='entropy', max_depth=8, random_state=1)\n",
187 |       "        ACC (on outer test fold) 74.88%\n",
188 |       "\n",
189 |       "        Best ACC (avg. of inner test folds) 73.91%\n",
190 |       "        Best parameters: DecisionTreeClassifier(criterion='entropy', max_depth=8, random_state=1)\n",
191 |       "        ACC (on outer test fold) 77.75%\n",
192 |       "\n",
193 |       "DTree | outer ACC 76.75% +/- 1.32\n",
194 |       "-------------------------------------------------- \n",
195 |       "\n",
196 |       "Algorithm: KNN\n",
197 |       "    Inner loop:\n",
198 |       "\n",
199 |       "        Best ACC (avg. of inner test folds) 88.38%\n",
200 |       "        Best parameters: Pipeline(steps=[('std', StandardScaler()),\n",
201 |       "                ('clf2',\n",
202 |       "                 KNeighborsClassifier(algorithm='ball_tree', leaf_size=50,\n",
203 |       "                                      n_neighbors=1, p=1))])\n",
204 |       "        ACC (on outer test fold) 91.62%\n",
205 |       "\n",
206 |       "        Best ACC (avg. of inner test folds) 88.75%\n",
207 |       "        Best parameters: Pipeline(steps=[('std', StandardScaler()),\n",
208 |       "                ('clf2',\n",
209 |       "                 KNeighborsClassifier(algorithm='ball_tree', leaf_size=50,\n",
210 |       "                                      n_neighbors=1, p=1))])\n",
211 |       "        ACC (on outer test fold) 91.88%\n",
212 |       "\n",
213 |       "        Best ACC (avg. of inner test folds) 89.84%\n",
214 |       "        Best parameters: Pipeline(steps=[('std', StandardScaler()),\n",
215 |       "                ('clf2',\n",
216 |       "                 KNeighborsClassifier(algorithm='ball_tree', leaf_size=50,\n",
217 |       "                                      n_neighbors=1, p=1))])\n",
218 |       "        ACC (on outer test fold) 90.88%\n",
219 |       "\n",
220 |       "        Best ACC (avg. of inner test folds) 89.50%\n",
221 |       "        Best parameters: Pipeline(steps=[('std', StandardScaler()),\n",
222 |       "                ('clf2',\n",
223 |       "                 KNeighborsClassifier(algorithm='ball_tree', leaf_size=50,\n",
224 |       "                                      n_neighbors=1, p=1))])\n",
225 |       "        ACC (on outer test fold) 90.88%\n",
226 |       "\n",
227 |       "        Best ACC (avg. of inner test folds) 89.06%\n",
228 |       "        Best parameters: Pipeline(steps=[('std', StandardScaler()),\n",
229 |       "                ('clf2',\n",
230 |       "                 KNeighborsClassifier(algorithm='ball_tree', leaf_size=50,\n",
231 |       "                                      n_neighbors=1, p=1))])\n",
232 |       "        ACC (on outer test fold) 90.25%\n",
233 |       "\n",
234 |       "KNN | outer ACC 91.10% +/- 0.58\n",
235 |       "-------------------------------------------------- \n",
236 |       "\n",
237 |       "Algorithm: RForest\n",
238 |       "    Inner loop:\n",
239 |       "\n",
240 |       "        Best ACC (avg. of inner test folds) 92.59%\n",
241 |       "        Best parameters: RandomForestClassifier(n_estimators=1000, random_state=1)\n",
242 |       "        ACC (on outer test fold) 95.00%\n",
243 |       "\n",
244 |       "        Best ACC (avg. of inner test folds) 92.59%\n",
245 |       "        Best parameters: RandomForestClassifier(n_estimators=10000, random_state=1)\n",
246 |       "        ACC (on outer test fold) 94.75%\n",
247 |       "\n",
248 |       "        Best ACC (avg. of inner test folds) 92.94%\n",
249 |       "        Best parameters: RandomForestClassifier(n_estimators=10000, random_state=1)\n",
250 |       "        ACC (on outer test fold) 94.50%\n",
251 |       "\n",
252 |       "        Best ACC (avg. of inner test folds) 93.00%\n",
253 |       "        Best parameters: RandomForestClassifier(n_estimators=10000, random_state=1)\n",
254 |       "        ACC (on outer test fold) 92.50%\n",
255 |       "\n",
256 |       "        Best ACC (avg. of inner test folds) 92.75%\n",
257 |       "        Best parameters: RandomForestClassifier(n_estimators=500, random_state=1)\n",
258 |       "        ACC (on outer test fold) 93.12%\n",
259 |       "\n",
260 |       "RForest | outer ACC 93.98% +/- 0.98\n",
261 |       "-------------------------------------------------- \n",
262 |       "\n",
263 |       "Algorithm: SVM\n",
264 |       "    Inner loop:\n",
265 |       "\n",
266 |       "        Best ACC (avg. of inner test folds) 90.75%\n",
267 |       "        Best parameters: Pipeline(steps=[('std', StandardScaler()),\n",
268 |       "                ('clf4', SVC(C=10.0, gamma=0.001, random_state=1))])\n",
269 |       "        ACC (on outer test fold) 92.12%\n",
270 |       "\n",
271 |       "        Best ACC (avg. of inner test folds) 90.22%\n",
272 |       "        Best parameters: Pipeline(steps=[('std', StandardScaler()),\n",
273 |       "                ('clf4', SVC(C=0.01, kernel='linear', random_state=1))])\n",
274 |       "        ACC (on outer test fold) 92.88%\n",
275 |       "\n",
276 |       "        Best ACC (avg. of inner test folds) 90.91%\n",
277 |       "        Best parameters: Pipeline(steps=[('std', StandardScaler()),\n",
278 |       "                ('clf4', SVC(C=0.01, kernel='linear', random_state=1))])\n",
279 |       "        ACC (on outer test fold) 90.50%\n",
280 |       "\n",
281 |       "        Best ACC (avg. of inner test folds) 90.53%\n",
282 |       "        Best parameters: Pipeline(steps=[('std', StandardScaler()),\n",
283 |       "                ('clf4', SVC(C=10.0, gamma=0.001, random_state=1))])\n",
284 |       "        ACC (on outer test fold) 92.75%\n",
285 |       "\n",
286 |       "        Best ACC (avg. of inner test folds) 90.12%\n",
287 |       "        Best parameters: Pipeline(steps=[('std', StandardScaler()),\n",
288 |       "                ('clf4', SVC(C=0.001, kernel='linear', random_state=1))])\n",
289 |       "        ACC (on outer test fold) 90.75%\n",
290 |       "\n",
291 |       "SVM | outer ACC 91.80% +/- 1.00\n",
292 |       "-------------------------------------------------- \n",
293 |       "\n",
294 |       "Algorithm: Softmax\n",
295 |       "    Inner loop:\n",
296 |       "\n",
297 |       "        Best ACC (avg. of inner test folds) 88.91%\n",
298 |       "        Best parameters: Pipeline(steps=[('std', StandardScaler()),\n",
299 |       "                ('clf1',\n",
300 |       "                 LogisticRegression(C=0.01, multi_class='multinomial',\n",
301 |       "                                    random_state=1, solver='newton-cg'))])\n",
302 |       "        ACC (on outer test fold) 90.00%\n",
303 |       "\n",
304 |       "        Best ACC (avg. of inner test folds) 88.75%\n",
305 |       "        Best parameters: Pipeline(steps=[('std', StandardScaler()),\n",
306 |       "                ('clf1',\n",
307 |       "                 LogisticRegression(C=0.01, multi_class='multinomial',\n",
308 |       "                                    random_state=1, solver='newton-cg'))])\n",
309 |       "        ACC (on outer test fold) 91.00%\n",
310 |       "\n",
311 |       "        Best ACC (avg. of inner test folds) 89.31%\n",
312 |       "        Best parameters: Pipeline(steps=[('std', StandardScaler()),\n",
313 |       "                ('clf1',\n",
314 |       "                 LogisticRegression(C=0.01, multi_class='multinomial',\n",
315 |       "                                    random_state=1, solver='newton-cg'))])\n",
316 |       "        ACC (on outer test fold) 90.00%\n",
317 |       "\n",
318 |       "        Best ACC (avg. of inner test folds) 88.59%\n",
319 |       "        Best parameters: Pipeline(steps=[('std', StandardScaler()),\n",
320 |       "                ('clf1',\n",
321 |       "                 LogisticRegression(C=0.1, multi_class='multinomial',\n",
322 |       "                                    random_state=1, solver='newton-cg'))])\n",
323 |       "        ACC (on outer test fold) 89.38%\n",
324 |       "\n",
325 |       "        Best ACC (avg. of inner test folds) 88.66%\n",
326 |       "        Best parameters: Pipeline(steps=[('std', StandardScaler()),\n",
327 |       "                ('clf1',\n",
328 |       "                 LogisticRegression(C=0.01, multi_class='multinomial',\n",
329 |       "                                    random_state=1, solver='newton-cg'))])\n",
330 |       "        ACC (on outer test fold) 89.50%\n",
331 |       "\n",
332 |       "Softmax | outer ACC 89.97% +/- 0.57\n"
333 |      ]
334 |     }
335 |    ],
336 |    "source": [
337 |     "outer_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)\n",
338 |     "\n",
339 |     "\n",
340 |     "for name, gs_est in sorted(gridcvs.items()):\n",
341 |     "    scores_dict = cross_validate(gs_est, \n",
342 |     "                                 X=X_train, \n",
343 |     "                                 y=y_train, \n",
344 |     "                                 cv=outer_cv,\n",
345 |     "                                 return_estimator=True,\n",
346 |     "                                 n_jobs=-1)\n",
347 |     "\n",
348 |     "    print(50 * '-', '\\n')\n",
349 |     "    print('Algorithm:', name)\n",
350 |     "    print('    Inner loop:')\n",
351 |     "    \n",
352 |     "    \n",
353 |     "    for i in range(scores_dict['test_score'].shape[0]):\n",
354 |     "\n",
355 |     "        print('\\n        Best ACC (avg. of inner test folds) %.2f%%' % (scores_dict['estimator'][i].best_score_ * 100))\n",
356 |     "        print('        Best parameters:', scores_dict['estimator'][i].best_estimator_)\n",
357 |     "        print('        ACC (on outer test fold) %.2f%%' % (scores_dict['test_score'][i]*100))\n",
358 |     "\n",
359 |     "    print('\\n%s | outer ACC %.2f%% +/- %.2f' % \n",
360 |     "          (name, scores_dict['test_score'].mean() * 100, \n",
361 |     "           scores_dict['test_score'].std() * 100))"
362 |    ]
363 |   },
364 |   {
365 |    "cell_type": "markdown",
366 |    "metadata": {},
367 |    "source": [
368 |     "------"
369 |    ]
370 |   },
371 |   {
372 |    "cell_type": "markdown",
373 |    "metadata": {},
374 |    "source": [
375 |     "- Determine the best algorithm from the experiment above; e.g., we find that Random Forest is performing best\n",
376 |     "- Now, select a hyperparameters for the model based on regular k-fold on the whole training set"
377 |    ]
378 |   },
379 |   {
380 |    "cell_type": "code",
381 |    "execution_count": 5,
382 |    "metadata": {},
383 |    "outputs": [
384 |     {
385 |      "name": "stdout",
386 |      "output_type": "stream",
387 |      "text": [
388 |       "Fitting 2 folds for each of 5 candidates, totalling 10 fits\n"
389 |      ]
390 |     },
391 |     {
392 |      "name": "stderr",
393 |      "output_type": "stream",
394 |      "text": [
395 |       "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.\n",
396 |       "[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  1.4min finished\n"
397 |      ]
398 |     },
399 |     {
400 |      "data": {
401 |       "text/plain": [
402 |        "GridSearchCV(cv=StratifiedKFold(n_splits=2, random_state=1, shuffle=True),\n",
403 |        "             estimator=RandomForestClassifier(random_state=1), n_jobs=-1,\n",
404 |        "             param_grid=[{'n_estimators': [10, 100, 500, 1000, 10000]}],\n",
405 |        "             scoring='accuracy', verbose=1)"
406 |       ]
407 |      },
408 |      "execution_count": 5,
409 |      "metadata": {},
410 |      "output_type": "execute_result"
411 |     }
412 |    ],
413 |    "source": [
414 |     "gcv_model_select = GridSearchCV(estimator=clf5,\n",
415 |     "                                param_grid=param_grid5,\n",
416 |     "                                scoring='accuracy',\n",
417 |     "                                n_jobs=-1,\n",
418 |     "                                cv=inner_cv,\n",
419 |     "                                verbose=1,\n",
420 |     "                                refit=True)\n",
421 |     "\n",
422 |     "gcv_model_select.fit(X_train, y_train)"
423 |    ]
424 |   },
425 |   {
426 |    "cell_type": "code",
427 |    "execution_count": 6,
428 |    "metadata": {},
429 |    "outputs": [
430 |     {
431 |      "name": "stdout",
432 |      "output_type": "stream",
433 |      "text": [
434 |       "Accuracy 93.30% (average over k-fold CV test folds)\n",
435 |       "Best Parameters: {'n_estimators': 10000}\n",
436 |       "Training Accuracy: 100.00%\n",
437 |       "Test Accuracy: 94.00%\n"
438 |      ]
439 |     }
440 |    ],
441 |    "source": [
442 |     "best_model = gcv_model_select.best_estimator_\n",
443 |     "\n",
444 |     "\n",
445 |     "## We can skip the next step because we set refit=True\n",
446 |     "## so scikit-learn has already fit the model to the\n",
447 |     "## whole training set\n",
448 |     "\n",
449 |     "# best_model.fit(X_train, y_train)\n",
450 |     "\n",
451 |     "\n",
452 |     "train_acc = accuracy_score(y_true=y_train, y_pred=best_model.predict(X_train))\n",
453 |     "test_acc = accuracy_score(y_true=y_test, y_pred=best_model.predict(X_test))\n",
454 |     "\n",
455 |     "print('Accuracy %.2f%% (average over k-fold CV test folds)' %\n",
456 |     "      (100 * gcv_model_select.best_score_))\n",
457 |     "print('Best Parameters: %s' % gcv_model_select.best_params_)\n",
458 |     "\n",
459 |     "print('Training Accuracy: %.2f%%' % (100 * train_acc))\n",
460 |     "print('Test Accuracy: %.2f%%' % (100 * test_acc))"
461 |    ]
462 |   }
463 |  ],
464 |  "metadata": {
465 |   "anaconda-cloud": {},
466 |   "kernelspec": {
467 |    "display_name": "Python 3",
468 |    "language": "python",
469 |    "name": "python3"
470 |   },
471 |   "language_info": {
472 |    "codemirror_mode": {
473 |     "name": "ipython",
474 |     "version": 3
475 |    },
476 |    "file_extension": ".py",
477 |    "mimetype": "text/x-python",
478 |    "name": "python",
479 |    "nbconvert_exporter": "python",
480 |    "pygments_lexer": "ipython3",
481 |    "version": "3.8.2"
482 |   }
483 |  },
484 |  "nbformat": 4,
485 |  "nbformat_minor": 4
486 | }
487 | 


--------------------------------------------------------------------------------
/L11/code/nested-cv-image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L11/code/nested-cv-image.png


--------------------------------------------------------------------------------
/L12/12_eval5-metrics__slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/L12/12_eval5-metrics__slides.pdf


--------------------------------------------------------------------------------
/L12/code/12_2_pre-recall-f1.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "STAT 451: Machine Learning (Fall 2020)  \n",
  8 |     "Instructor: Sebastian Raschka (sraschka@wisc.edu)  \n",
  9 |     "\n",
 10 |     "Course website: http://pages.stat.wisc.edu/~sraschka/teaching/stat451-fs2020/"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "# 2) Precision, Recall, F1 Score"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "markdown",
 22 |    "metadata": {},
 23 |    "source": [
 24 |     "## Loading the Breast Cancer Wisconsin dataset"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "markdown",
 29 |    "metadata": {},
 30 |    "source": [
 31 |     "- In the Breast Cancer Wisconsin dataset, the firt column in this dataset stores the unique ID numbers of patients\n",
 32 |     "- The second column stores the corresponding cancer diagnoses (M = malignant, B = benign)\n",
 33 |     "- Columns 3-32 contain features that were extracted from digitized images of the nuclei of the cancer cells, which can be used to build a model to predict whether a tumor is benign or malignant.\n",
 34 |     "- The Breast Cancer Wisconsin dataset has been deposited in the UCI Machine Learning Repository, and more detailed information about this dataset can be found at https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+(Diagnostic)."
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 1,
 40 |    "metadata": {},
 41 |    "outputs": [
 42 |     {
 43 |      "data": {
 44 |       "text/html": [
 45 |        "<div>\n",
 46 |        "<style scoped>\n",
 47 |        "    .dataframe tbody tr th:only-of-type {\n",
 48 |        "        vertical-align: middle;\n",
 49 |        "    }\n",
 50 |        "\n",
 51 |        "    .dataframe tbody tr th {\n",
 52 |        "        vertical-align: top;\n",
 53 |        "    }\n",
 54 |        "\n",
 55 |        "    .dataframe thead th {\n",
 56 |        "        text-align: right;\n",
 57 |        "    }\n",
 58 |        "</style>\n",
 59 |        "<table border=\"1\" class=\"dataframe\">\n",
 60 |        "  <thead>\n",
 61 |        "    <tr style=\"text-align: right;\">\n",
 62 |        "      <th></th>\n",
 63 |        "      <th>0</th>\n",
 64 |        "      <th>1</th>\n",
 65 |        "      <th>2</th>\n",
 66 |        "      <th>3</th>\n",
 67 |        "      <th>4</th>\n",
 68 |        "      <th>5</th>\n",
 69 |        "      <th>6</th>\n",
 70 |        "      <th>7</th>\n",
 71 |        "      <th>8</th>\n",
 72 |        "      <th>9</th>\n",
 73 |        "      <th>...</th>\n",
 74 |        "      <th>22</th>\n",
 75 |        "      <th>23</th>\n",
 76 |        "      <th>24</th>\n",
 77 |        "      <th>25</th>\n",
 78 |        "      <th>26</th>\n",
 79 |        "      <th>27</th>\n",
 80 |        "      <th>28</th>\n",
 81 |        "      <th>29</th>\n",
 82 |        "      <th>30</th>\n",
 83 |        "      <th>31</th>\n",
 84 |        "    </tr>\n",
 85 |        "  </thead>\n",
 86 |        "  <tbody>\n",
 87 |        "    <tr>\n",
 88 |        "      <th>0</th>\n",
 89 |        "      <td>842302</td>\n",
 90 |        "      <td>M</td>\n",
 91 |        "      <td>17.99</td>\n",
 92 |        "      <td>10.38</td>\n",
 93 |        "      <td>122.80</td>\n",
 94 |        "      <td>1001.0</td>\n",
 95 |        "      <td>0.11840</td>\n",
 96 |        "      <td>0.27760</td>\n",
 97 |        "      <td>0.3001</td>\n",
 98 |        "      <td>0.14710</td>\n",
 99 |        "      <td>...</td>\n",
100 |        "      <td>25.38</td>\n",
101 |        "      <td>17.33</td>\n",
102 |        "      <td>184.60</td>\n",
103 |        "      <td>2019.0</td>\n",
104 |        "      <td>0.1622</td>\n",
105 |        "      <td>0.6656</td>\n",
106 |        "      <td>0.7119</td>\n",
107 |        "      <td>0.2654</td>\n",
108 |        "      <td>0.4601</td>\n",
109 |        "      <td>0.11890</td>\n",
110 |        "    </tr>\n",
111 |        "    <tr>\n",
112 |        "      <th>1</th>\n",
113 |        "      <td>842517</td>\n",
114 |        "      <td>M</td>\n",
115 |        "      <td>20.57</td>\n",
116 |        "      <td>17.77</td>\n",
117 |        "      <td>132.90</td>\n",
118 |        "      <td>1326.0</td>\n",
119 |        "      <td>0.08474</td>\n",
120 |        "      <td>0.07864</td>\n",
121 |        "      <td>0.0869</td>\n",
122 |        "      <td>0.07017</td>\n",
123 |        "      <td>...</td>\n",
124 |        "      <td>24.99</td>\n",
125 |        "      <td>23.41</td>\n",
126 |        "      <td>158.80</td>\n",
127 |        "      <td>1956.0</td>\n",
128 |        "      <td>0.1238</td>\n",
129 |        "      <td>0.1866</td>\n",
130 |        "      <td>0.2416</td>\n",
131 |        "      <td>0.1860</td>\n",
132 |        "      <td>0.2750</td>\n",
133 |        "      <td>0.08902</td>\n",
134 |        "    </tr>\n",
135 |        "    <tr>\n",
136 |        "      <th>2</th>\n",
137 |        "      <td>84300903</td>\n",
138 |        "      <td>M</td>\n",
139 |        "      <td>19.69</td>\n",
140 |        "      <td>21.25</td>\n",
141 |        "      <td>130.00</td>\n",
142 |        "      <td>1203.0</td>\n",
143 |        "      <td>0.10960</td>\n",
144 |        "      <td>0.15990</td>\n",
145 |        "      <td>0.1974</td>\n",
146 |        "      <td>0.12790</td>\n",
147 |        "      <td>...</td>\n",
148 |        "      <td>23.57</td>\n",
149 |        "      <td>25.53</td>\n",
150 |        "      <td>152.50</td>\n",
151 |        "      <td>1709.0</td>\n",
152 |        "      <td>0.1444</td>\n",
153 |        "      <td>0.4245</td>\n",
154 |        "      <td>0.4504</td>\n",
155 |        "      <td>0.2430</td>\n",
156 |        "      <td>0.3613</td>\n",
157 |        "      <td>0.08758</td>\n",
158 |        "    </tr>\n",
159 |        "    <tr>\n",
160 |        "      <th>3</th>\n",
161 |        "      <td>84348301</td>\n",
162 |        "      <td>M</td>\n",
163 |        "      <td>11.42</td>\n",
164 |        "      <td>20.38</td>\n",
165 |        "      <td>77.58</td>\n",
166 |        "      <td>386.1</td>\n",
167 |        "      <td>0.14250</td>\n",
168 |        "      <td>0.28390</td>\n",
169 |        "      <td>0.2414</td>\n",
170 |        "      <td>0.10520</td>\n",
171 |        "      <td>...</td>\n",
172 |        "      <td>14.91</td>\n",
173 |        "      <td>26.50</td>\n",
174 |        "      <td>98.87</td>\n",
175 |        "      <td>567.7</td>\n",
176 |        "      <td>0.2098</td>\n",
177 |        "      <td>0.8663</td>\n",
178 |        "      <td>0.6869</td>\n",
179 |        "      <td>0.2575</td>\n",
180 |        "      <td>0.6638</td>\n",
181 |        "      <td>0.17300</td>\n",
182 |        "    </tr>\n",
183 |        "    <tr>\n",
184 |        "      <th>4</th>\n",
185 |        "      <td>84358402</td>\n",
186 |        "      <td>M</td>\n",
187 |        "      <td>20.29</td>\n",
188 |        "      <td>14.34</td>\n",
189 |        "      <td>135.10</td>\n",
190 |        "      <td>1297.0</td>\n",
191 |        "      <td>0.10030</td>\n",
192 |        "      <td>0.13280</td>\n",
193 |        "      <td>0.1980</td>\n",
194 |        "      <td>0.10430</td>\n",
195 |        "      <td>...</td>\n",
196 |        "      <td>22.54</td>\n",
197 |        "      <td>16.67</td>\n",
198 |        "      <td>152.20</td>\n",
199 |        "      <td>1575.0</td>\n",
200 |        "      <td>0.1374</td>\n",
201 |        "      <td>0.2050</td>\n",
202 |        "      <td>0.4000</td>\n",
203 |        "      <td>0.1625</td>\n",
204 |        "      <td>0.2364</td>\n",
205 |        "      <td>0.07678</td>\n",
206 |        "    </tr>\n",
207 |        "  </tbody>\n",
208 |        "</table>\n",
209 |        "<p>5 rows × 32 columns</p>\n",
210 |        "</div>"
211 |       ],
212 |       "text/plain": [
213 |        "         0  1      2      3       4       5        6        7       8   \\\n",
214 |        "0    842302  M  17.99  10.38  122.80  1001.0  0.11840  0.27760  0.3001   \n",
215 |        "1    842517  M  20.57  17.77  132.90  1326.0  0.08474  0.07864  0.0869   \n",
216 |        "2  84300903  M  19.69  21.25  130.00  1203.0  0.10960  0.15990  0.1974   \n",
217 |        "3  84348301  M  11.42  20.38   77.58   386.1  0.14250  0.28390  0.2414   \n",
218 |        "4  84358402  M  20.29  14.34  135.10  1297.0  0.10030  0.13280  0.1980   \n",
219 |        "\n",
220 |        "        9   ...     22     23      24      25      26      27      28      29  \\\n",
221 |        "0  0.14710  ...  25.38  17.33  184.60  2019.0  0.1622  0.6656  0.7119  0.2654   \n",
222 |        "1  0.07017  ...  24.99  23.41  158.80  1956.0  0.1238  0.1866  0.2416  0.1860   \n",
223 |        "2  0.12790  ...  23.57  25.53  152.50  1709.0  0.1444  0.4245  0.4504  0.2430   \n",
224 |        "3  0.10520  ...  14.91  26.50   98.87   567.7  0.2098  0.8663  0.6869  0.2575   \n",
225 |        "4  0.10430  ...  22.54  16.67  152.20  1575.0  0.1374  0.2050  0.4000  0.1625   \n",
226 |        "\n",
227 |        "       30       31  \n",
228 |        "0  0.4601  0.11890  \n",
229 |        "1  0.2750  0.08902  \n",
230 |        "2  0.3613  0.08758  \n",
231 |        "3  0.6638  0.17300  \n",
232 |        "4  0.2364  0.07678  \n",
233 |        "\n",
234 |        "[5 rows x 32 columns]"
235 |       ]
236 |      },
237 |      "execution_count": 1,
238 |      "metadata": {},
239 |      "output_type": "execute_result"
240 |     }
241 |    ],
242 |    "source": [
243 |     "import pandas as pd\n",
244 |     "\n",
245 |     "df = pd.read_csv('https://archive.ics.uci.edu/ml/'\n",
246 |     "                 'machine-learning-databases'\n",
247 |     "                 '/breast-cancer-wisconsin/wdbc.data', header=None)\n",
248 |     "\n",
249 |     "df.head()"
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "code",
254 |    "execution_count": 2,
255 |    "metadata": {},
256 |    "outputs": [
257 |     {
258 |      "data": {
259 |       "text/plain": [
260 |        "(569, 32)"
261 |       ]
262 |      },
263 |      "execution_count": 2,
264 |      "metadata": {},
265 |      "output_type": "execute_result"
266 |     }
267 |    ],
268 |    "source": [
269 |     "df.shape"
270 |    ]
271 |   },
272 |   {
273 |    "cell_type": "markdown",
274 |    "metadata": {},
275 |    "source": [
276 |     "<hr>"
277 |    ]
278 |   },
279 |   {
280 |    "cell_type": "markdown",
281 |    "metadata": {},
282 |    "source": [
283 |     "- First, we are converting the class labels from a string format into integers"
284 |    ]
285 |   },
286 |   {
287 |    "cell_type": "code",
288 |    "execution_count": 3,
289 |    "metadata": {},
290 |    "outputs": [
291 |     {
292 |      "data": {
293 |       "text/plain": [
294 |        "array(['B', 'M'], dtype=object)"
295 |       ]
296 |      },
297 |      "execution_count": 3,
298 |      "metadata": {},
299 |      "output_type": "execute_result"
300 |     }
301 |    ],
302 |    "source": [
303 |     "from sklearn.preprocessing import LabelEncoder\n",
304 |     "\n",
305 |     "X = df.loc[:, 2:].values\n",
306 |     "y = df.loc[:, 1].values\n",
307 |     "le = LabelEncoder()\n",
308 |     "y = le.fit_transform(y)\n",
309 |     "le.classes_"
310 |    ]
311 |   },
312 |   {
313 |    "cell_type": "markdown",
314 |    "metadata": {},
315 |    "source": [
316 |     "- Here, class \"M\" (malignant cancer) will be converted to class 1, and \"B\" will be converted into class 0 (the order the class labels are mapped depends on the alphabetical order of the string labels)"
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "code",
321 |    "execution_count": 4,
322 |    "metadata": {},
323 |    "outputs": [
324 |     {
325 |      "data": {
326 |       "text/plain": [
327 |        "array([1, 0])"
328 |       ]
329 |      },
330 |      "execution_count": 4,
331 |      "metadata": {},
332 |      "output_type": "execute_result"
333 |     }
334 |    ],
335 |    "source": [
336 |     "le.transform(['M', 'B'])"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "markdown",
341 |    "metadata": {},
342 |    "source": [
343 |     "- Next, we split the data into 80% training data and 20% test data, using a stratified split"
344 |    ]
345 |   },
346 |   {
347 |    "cell_type": "code",
348 |    "execution_count": 5,
349 |    "metadata": {},
350 |    "outputs": [],
351 |    "source": [
352 |     "from sklearn.model_selection import train_test_split\n",
353 |     "\n",
354 |     "X_train, X_test, y_train, y_test = \\\n",
355 |     "    train_test_split(X, y, \n",
356 |     "                     test_size=0.20,\n",
357 |     "                     stratify=y,\n",
358 |     "                     random_state=1)"
359 |    ]
360 |   },
361 |   {
362 |    "cell_type": "markdown",
363 |    "metadata": {},
364 |    "source": [
365 |     "## 2) Precision, Recall, F1 Score"
366 |    ]
367 |   },
368 |   {
369 |    "cell_type": "code",
370 |    "execution_count": 6,
371 |    "metadata": {},
372 |    "outputs": [
373 |     {
374 |      "name": "stdout",
375 |      "output_type": "stream",
376 |      "text": [
377 |       "[[71  1]\n",
378 |       " [ 3 39]]\n"
379 |      ]
380 |     }
381 |    ],
382 |    "source": [
383 |     "from sklearn.preprocessing import StandardScaler\n",
384 |     "from sklearn.neighbors import KNeighborsClassifier\n",
385 |     "from sklearn.pipeline import make_pipeline\n",
386 |     "from mlxtend.evaluate import confusion_matrix\n",
387 |     "\n",
388 |     "\n",
389 |     "pipe_knn = make_pipeline(StandardScaler(),\n",
390 |     "                         KNeighborsClassifier(n_neighbors=5))\n",
391 |     "\n",
392 |     "pipe_knn.fit(X_train, y_train)\n",
393 |     "\n",
394 |     "y_pred = pipe_knn.predict(X_test)\n",
395 |     "\n",
396 |     "confmat = confusion_matrix(y_test, y_pred)\n",
397 |     "\n",
398 |     "print(confmat)"
399 |    ]
400 |   },
401 |   {
402 |    "cell_type": "code",
403 |    "execution_count": 7,
404 |    "metadata": {},
405 |    "outputs": [
406 |     {
407 |      "name": "stdout",
408 |      "output_type": "stream",
409 |      "text": [
410 |       "Accuracy: 0.965\n",
411 |       "Precision: 0.975\n",
412 |       "Recall: 0.929\n",
413 |       "F1: 0.951\n",
414 |       "MCC: 0.925\n"
415 |      ]
416 |     }
417 |    ],
418 |    "source": [
419 |     "from sklearn.metrics import accuracy_score, precision_score, \\\n",
420 |     "                            recall_score, f1_score, matthews_corrcoef\n",
421 |     "\n",
422 |     "\n",
423 |     "print('Accuracy: %.3f' % accuracy_score(y_true=y_test, y_pred=y_pred))\n",
424 |     "print('Precision: %.3f' % precision_score(y_true=y_test, y_pred=y_pred))\n",
425 |     "print('Recall: %.3f' % recall_score(y_true=y_test, y_pred=y_pred))\n",
426 |     "print('F1: %.3f' % f1_score(y_true=y_test, y_pred=y_pred))\n",
427 |     "print('MCC: %.3f' % matthews_corrcoef(y_true=y_test, y_pred=y_pred))"
428 |    ]
429 |   },
430 |   {
431 |    "cell_type": "markdown",
432 |    "metadata": {},
433 |    "source": [
434 |     "## 3) Using those Metrics in GridSearch"
435 |    ]
436 |   },
437 |   {
438 |    "cell_type": "code",
439 |    "execution_count": 8,
440 |    "metadata": {},
441 |    "outputs": [
442 |     {
443 |      "name": "stdout",
444 |      "output_type": "stream",
445 |      "text": [
446 |       "0.9564099246736818\n",
447 |       "{'kneighborsclassifier__n_neighbors': 5}\n"
448 |      ]
449 |     }
450 |    ],
451 |    "source": [
452 |     "from sklearn.model_selection import GridSearchCV\n",
453 |     "\n",
454 |     "\n",
455 |     "param_range = [3, 5, 7, 9, 15, 21, 31]\n",
456 |     "\n",
457 |     "pipe_knn = make_pipeline(StandardScaler(),\n",
458 |     "                         KNeighborsClassifier())\n",
459 |     "\n",
460 |     "param_grid = [{'kneighborsclassifier__n_neighbors': param_range}]\n",
461 |     "\n",
462 |     "\n",
463 |     "gs = GridSearchCV(estimator=pipe_knn,\n",
464 |     "                  param_grid=param_grid,\n",
465 |     "                  scoring='f1',\n",
466 |     "                  cv=10,\n",
467 |     "                  n_jobs=-1)\n",
468 |     "\n",
469 |     "\n",
470 |     "gs = gs.fit(X_train, y_train)\n",
471 |     "print(gs.best_score_)\n",
472 |     "print(gs.best_params_)"
473 |    ]
474 |   },
475 |   {
476 |    "cell_type": "code",
477 |    "execution_count": 9,
478 |    "metadata": {},
479 |    "outputs": [
480 |     {
481 |      "name": "stdout",
482 |      "output_type": "stream",
483 |      "text": [
484 |       "0.9597306397306398\n",
485 |       "{'kneighborsclassifier__n_neighbors': 15}\n"
486 |      ]
487 |     }
488 |    ],
489 |    "source": [
490 |     "from sklearn.metrics import make_scorer\n",
491 |     "from mlxtend.data import iris_data\n",
492 |     "\n",
493 |     "\n",
494 |     "X_iris, y_iris = iris_data()\n",
495 |     "\n",
496 |     "\n",
497 |     "# for multiclass:\n",
498 |     "scorer = make_scorer(f1_score, average='macro')\n",
499 |     "\n",
500 |     "\n",
501 |     "from sklearn.model_selection import GridSearchCV\n",
502 |     "\n",
503 |     "\n",
504 |     "param_range = [3, 5, 7, 9, 15, 21, 31]\n",
505 |     "\n",
506 |     "pipe_knn = make_pipeline(StandardScaler(),\n",
507 |     "                         KNeighborsClassifier())\n",
508 |     "\n",
509 |     "param_grid = [{'kneighborsclassifier__n_neighbors': param_range}]\n",
510 |     "\n",
511 |     "\n",
512 |     "gs = GridSearchCV(estimator=pipe_knn,\n",
513 |     "                  param_grid=param_grid,\n",
514 |     "                  scoring=scorer,\n",
515 |     "                  cv=10,\n",
516 |     "                  n_jobs=-1)\n",
517 |     "\n",
518 |     "\n",
519 |     "gs = gs.fit(X_iris, y_iris)\n",
520 |     "print(gs.best_score_)\n",
521 |     "print(gs.best_params_)"
522 |    ]
523 |   },
524 |   {
525 |    "cell_type": "code",
526 |    "execution_count": null,
527 |    "metadata": {},
528 |    "outputs": [],
529 |    "source": []
530 |   }
531 |  ],
532 |  "metadata": {
533 |   "kernelspec": {
534 |    "display_name": "Python 3",
535 |    "language": "python",
536 |    "name": "python3"
537 |   },
538 |   "language_info": {
539 |    "codemirror_mode": {
540 |     "name": "ipython",
541 |     "version": 3
542 |    },
543 |    "file_extension": ".py",
544 |    "mimetype": "text/x-python",
545 |    "name": "python",
546 |    "nbconvert_exporter": "python",
547 |    "pygments_lexer": "ipython3",
548 |    "version": "3.8.2"
549 |   }
550 |  },
551 |  "nbformat": 4,
552 |  "nbformat_minor": 4
553 | }
554 | 


--------------------------------------------------------------------------------
/L12/code/12_3_balanced-acc-Copy1.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "STAT 451: Machine Learning (Fall 2020)  \n",
 8 |     "Instructor: Sebastian Raschka (sraschka@wisc.edu)  \n",
 9 |     "\n",
10 |     "Course website: http://pages.stat.wisc.edu/~sraschka/teaching/stat451-fs2020/"
11 |    ]
12 |   },
13 |   {
14 |    "cell_type": "markdown",
15 |    "metadata": {},
16 |    "source": [
17 |     "# Balanced Accuracy"
18 |    ]
19 |   },
20 |   {
21 |    "cell_type": "code",
22 |    "execution_count": 23,
23 |    "metadata": {},
24 |    "outputs": [],
25 |    "source": [
26 |     "from mlxtend.evaluate import confusion_matrix\n",
27 |     "from mlxtend.evaluate import accuracy_score\n",
28 |     "import numpy as np"
29 |    ]
30 |   },
31 |   {
32 |    "cell_type": "code",
33 |    "execution_count": 24,
34 |    "metadata": {},
35 |    "outputs": [],
36 |    "source": [
37 |     "y_targ = np.array(3*[0] + 69*[1] + 18*[2])\n",
38 |     "y_pred = np.array(10*[0] + 50*[1] + 30*[2])"
39 |    ]
40 |   },
41 |   {
42 |    "cell_type": "code",
43 |    "execution_count": 25,
44 |    "metadata": {},
45 |    "outputs": [
46 |     {
47 |      "name": "stdout",
48 |      "output_type": "stream",
49 |      "text": [
50 |       "Standard accuracy: 78.89%\n",
51 |       "Class 0 accuracy: 92.22%\n",
52 |       "Class 1 accuracy: 78.89%\n",
53 |       "Class 2 accuracy: 86.67%\n",
54 |       "Average per-class accuracy: 85.93%\n"
55 |      ]
56 |     }
57 |    ],
58 |    "source": [
59 |     "std_acc = accuracy_score(y_targ, y_pred)\n",
60 |     "\n",
61 |     "bin_acc0 = accuracy_score(y_targ, y_pred, method='binary', pos_label=0)\n",
62 |     "bin_acc1 = accuracy_score(y_targ, y_pred, method='binary', pos_label=1)\n",
63 |     "bin_acc2 = accuracy_score(y_targ, y_pred, method='binary', pos_label=2)\n",
64 |     "\n",
65 |     "avg_acc = accuracy_score(y_targ, y_pred, method='average')\n",
66 |     "\n",
67 |     "print(f'Standard accuracy: {std_acc*100:.2f}%')\n",
68 |     "print(f'Class 0 accuracy: {bin_acc0*100:.2f}%')\n",
69 |     "print(f'Class 1 accuracy: {bin_acc1*100:.2f}%')\n",
70 |     "print(f'Class 2 accuracy: {bin_acc2*100:.2f}%')\n",
71 |     "print(f'Average per-class accuracy: {avg_acc*100:.2f}%')"
72 |    ]
73 |   }
74 |  ],
75 |  "metadata": {
76 |   "kernelspec": {
77 |    "display_name": "Python 3",
78 |    "language": "python",
79 |    "name": "python3"
80 |   },
81 |   "language_info": {
82 |    "codemirror_mode": {
83 |     "name": "ipython",
84 |     "version": 3
85 |    },
86 |    "file_extension": ".py",
87 |    "mimetype": "text/x-python",
88 |    "name": "python",
89 |    "nbconvert_exporter": "python",
90 |    "pygments_lexer": "ipython3",
91 |    "version": "3.8.2"
92 |   }
93 |  },
94 |  "nbformat": 4,
95 |  "nbformat_minor": 4
96 | }
97 | 


--------------------------------------------------------------------------------
/L12/code/wdbc.names.txt:
--------------------------------------------------------------------------------
  1 | 1. Title: Wisconsin Diagnostic Breast Cancer (WDBC)
  2 | 
  3 | 2. Source Information
  4 | 
  5 | a) Creators: 
  6 | 
  7 | 	Dr. William H. Wolberg, General Surgery Dept., University of
  8 | 	Wisconsin,  Clinical Sciences Center, Madison, WI 53792
  9 | 	wolberg@eagle.surgery.wisc.edu
 10 | 
 11 | 	W. Nick Street, Computer Sciences Dept., University of
 12 | 	Wisconsin, 1210 West Dayton St., Madison, WI 53706
 13 | 	street@cs.wisc.edu  608-262-6619
 14 | 
 15 | 	Olvi L. Mangasarian, Computer Sciences Dept., University of
 16 | 	Wisconsin, 1210 West Dayton St., Madison, WI 53706
 17 | 	olvi@cs.wisc.edu 
 18 | 
 19 | b) Donor: Nick Street
 20 | 
 21 | c) Date: November 1995
 22 | 
 23 | 3. Past Usage:
 24 | 
 25 | first usage:
 26 | 
 27 | 	W.N. Street, W.H. Wolberg and O.L. Mangasarian 
 28 | 	Nuclear feature extraction for breast tumor diagnosis.
 29 | 	IS&T/SPIE 1993 International Symposium on Electronic Imaging: Science
 30 | 	and Technology, volume 1905, pages 861-870, San Jose, CA, 1993.
 31 | 
 32 | OR literature:
 33 | 
 34 | 	O.L. Mangasarian, W.N. Street and W.H. Wolberg. 
 35 | 	Breast cancer diagnosis and prognosis via linear programming. 
 36 | 	Operations Research, 43(4), pages 570-577, July-August 1995.
 37 | 
 38 | Medical literature:
 39 | 
 40 | 	W.H. Wolberg, W.N. Street, and O.L. Mangasarian. 
 41 | 	Machine learning techniques to diagnose breast cancer from
 42 | 	fine-needle aspirates.  
 43 | 	Cancer Letters 77 (1994) 163-171.
 44 | 
 45 | 	W.H. Wolberg, W.N. Street, and O.L. Mangasarian. 
 46 | 	Image analysis and machine learning applied to breast cancer
 47 | 	diagnosis and prognosis.  
 48 | 	Analytical and Quantitative Cytology and Histology, Vol. 17
 49 | 	No. 2, pages 77-87, April 1995. 
 50 | 
 51 | 	W.H. Wolberg, W.N. Street, D.M. Heisey, and O.L. Mangasarian. 
 52 | 	Computerized breast cancer diagnosis and prognosis from fine
 53 | 	needle aspirates.  
 54 | 	Archives of Surgery 1995;130:511-516.
 55 | 
 56 | 	W.H. Wolberg, W.N. Street, D.M. Heisey, and O.L. Mangasarian. 
 57 | 	Computer-derived nuclear features distinguish malignant from
 58 | 	benign breast cytology.  
 59 | 	Human Pathology, 26:792--796, 1995.
 60 | 
 61 | See also:
 62 | 	http://www.cs.wisc.edu/~olvi/uwmp/mpml.html
 63 | 	http://www.cs.wisc.edu/~olvi/uwmp/cancer.html
 64 | 
 65 | Results:
 66 | 
 67 | 	- predicting field 2, diagnosis: B = benign, M = malignant
 68 | 	- sets are linearly separable using all 30 input features
 69 | 	- best predictive accuracy obtained using one separating plane
 70 | 		in the 3-D space of Worst Area, Worst Smoothness and
 71 | 		Mean Texture.  Estimated accuracy 97.5% using repeated
 72 | 		10-fold crossvalidations.  Classifier has correctly
 73 | 		diagnosed 176 consecutive new patients as of November
 74 | 		1995. 
 75 | 
 76 | 4. Relevant information
 77 | 
 78 | 	Features are computed from a digitized image of a fine needle
 79 | 	aspirate (FNA) of a breast mass.  They describe
 80 | 	characteristics of the cell nuclei present in the image.
 81 | 	A few of the images can be found at
 82 | 	http://www.cs.wisc.edu/~street/images/
 83 | 
 84 | 	Separating plane described above was obtained using
 85 | 	Multisurface Method-Tree (MSM-T) [K. P. Bennett, "Decision Tree
 86 | 	Construction Via Linear Programming." Proceedings of the 4th
 87 | 	Midwest Artificial Intelligence and Cognitive Science Society,
 88 | 	pp. 97-101, 1992], a classification method which uses linear
 89 | 	programming to construct a decision tree.  Relevant features
 90 | 	were selected using an exhaustive search in the space of 1-4
 91 | 	features and 1-3 separating planes.
 92 | 
 93 | 	The actual linear program used to obtain the separating plane
 94 | 	in the 3-dimensional space is that described in:
 95 | 	[K. P. Bennett and O. L. Mangasarian: "Robust Linear
 96 | 	Programming Discrimination of Two Linearly Inseparable Sets",
 97 | 	Optimization Methods and Software 1, 1992, 23-34].
 98 | 
 99 | 
100 | 	This database is also available through the UW CS ftp server:
101 | 
102 | 	ftp ftp.cs.wisc.edu
103 | 	cd math-prog/cpo-dataset/machine-learn/WDBC/
104 | 
105 | 5. Number of instances: 569 
106 | 
107 | 6. Number of attributes: 32 (ID, diagnosis, 30 real-valued input features)
108 | 
109 | 7. Attribute information
110 | 
111 | 1) ID number
112 | 2) Diagnosis (M = malignant, B = benign)
113 | 3-32)
114 | 
115 | Ten real-valued features are computed for each cell nucleus:
116 | 
117 | 	a) radius (mean of distances from center to points on the perimeter)
118 | 	b) texture (standard deviation of gray-scale values)
119 | 	c) perimeter
120 | 	d) area
121 | 	e) smoothness (local variation in radius lengths)
122 | 	f) compactness (perimeter^2 / area - 1.0)
123 | 	g) concavity (severity of concave portions of the contour)
124 | 	h) concave points (number of concave portions of the contour)
125 | 	i) symmetry 
126 | 	j) fractal dimension ("coastline approximation" - 1)
127 | 
128 | Several of the papers listed above contain detailed descriptions of
129 | how these features are computed. 
130 | 
131 | The mean, standard error, and "worst" or largest (mean of the three
132 | largest values) of these features were computed for each image,
133 | resulting in 30 features.  For instance, field 3 is Mean Radius, field
134 | 13 is Radius SE, field 23 is Worst Radius.
135 | 
136 | All feature values are recoded with four significant digits.
137 | 
138 | 8. Missing attribute values: none
139 | 
140 | 9. Class distribution: 357 benign, 212 malignant


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/rasbt/stat451-machine-learning-fs20/master)
 2 | 
 3 | # stat451-machine-learning-fs20
 4 | 
 5 | STAT 451: Intro to Machine Learning @ UW-Madison (Fall 2020)
 6 | 
 7 | - [Lecture 01](L01): Course overview, introduction to machine learning
 8 | - [Lecture 02](L02): Nearest Neighbor Methods
 9 | - [Lecture 03](L03): Python
10 | - [Lecture 04](L04): Scientific Computing in Python
11 | - [Lecture 05](L05): Scikit-learn
12 | - [Lecture 06](L06): Decision Trees
13 | - [Lecture 07](L07): Ensemble Methods
14 | - [Lecture 08](L08): Model Evaluation 1: Overfitting and Underfitting
15 | - [Lecture 09](L09): Model Evaluation 2: Resampling Methods and Confidence Intervals
16 | - [Lecture 10](L10): Model Evaluation 3: Cross-Validation and Model Selection
17 | - [Lecture 11](L11): Model Evaluation 4: Algorithm Comparison
18 | - [Lecture 12](L12): Model Evaluation 5: Performance Metrics


--------------------------------------------------------------------------------
/report-template/examples/example-presentations.md:
--------------------------------------------------------------------------------
1 | See videos that students volunteered to share on YouTube: [https://www.youtube.com/watch?v=e_I0q3mmfw4]
2 | 
3 | 
4 | (PS: This is from the deep learning, not machine learning class, so the topics are different. However, the presentation style & expectation is the same.)


--------------------------------------------------------------------------------
/report-template/examples/example-proposal.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/report-template/examples/example-proposal.pdf


--------------------------------------------------------------------------------
/report-template/examples/example-report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/report-template/examples/example-report.pdf


--------------------------------------------------------------------------------
/report-template/project-presentation-assessment.md:
--------------------------------------------------------------------------------
1 | # Project Presentation Assessment
2 | 
3 | - 10 pts: Is there a motivation for the project given?
4 | - 40 pts: Is the project described well enough that a general audience, familiar with machine learning, can understand the project?
5 | - 20 pts: Figures are all legible and explained well
6 | - 20 pts: Are the results presented adequately discussed?
7 | - 10 pts: Did all team members contribute to the presentation?


--------------------------------------------------------------------------------
/report-template/project-proposal-assessment.md:
--------------------------------------------------------------------------------
1 | # Project Proposal Assessment
2 | 
3 | The proposal will be graded based on completeness of each of the 5 sections (Introduction, Motivation, Evaluation, Resources, and Contributions) and *not* be based on language, style, and how "exciting" or "interesting" the project is. For each section, you can receive a maximum of 10 points, totalling 50 pts for the proposal overall.
4 | 
5 | Also, it is important to make sure that you acknowledge previous work and use citations properly when referring to other people's work. Even minor forms of plagiarism (e.g., copying sentences from other texts) will result in a subtraction of at least 10 pts each per incidence. And university guidelines dictate that severe incidents need to be reported. If you are unsure about what constitutes plagiarism and how to avoid it, please see the helpful guides at https://conduct.students.wisc.edu/plagiarism/
6 | 
7 | 


--------------------------------------------------------------------------------
/report-template/project-report-assessment.md:
--------------------------------------------------------------------------------
 1 | # Project Report Assessment
 2 | 
 3 | 
 4 | ### Abstract: 15 pts
 5 | 
 6 | - Is enough information provided get a clear idea about the subject matter?
 7 | - Is the abstract conveying the findings?
 8 | - Are the main points of the report described succinctly?
 9 | 
10 | ### Introduction: 15 pts
11 | 
12 | - Does the introduction cover the required background information to understand the work?
13 | - Is the introduction well organized: it starts out general and becomes more specific towards the end?
14 | - Is there a motivation explaining why this project is relevant, important, and/or interesting?
15 | 
16 | ### Related Work: 15 pts
17 | 
18 | - Is the similar and related work discussed adequately?
19 | - Are references cited properly (here, but also throughout the whole paper)?
20 | - Is the a discussion or paragraph on comparing this project with other people's work adequate?
21 | 
22 | 
23 | ### Proposed Method: 25 pts
24 | 
25 | - Are there any missing descriptions of symbols used in mathematical notations (if applicable)?
26 | - Are the main algorithms described well enough so that they can be implemented by a knowledgeable reader?
27 | 
28 | ### Experiments: 25 pts
29 | 
30 | - Is the experimental setup and methodology described well enough so that it can be repeated?
31 | - If datasets are used, are they referenced appropriately?
32 | 
33 | ### Results and Discussion: 30 pts
34 | 
35 | - Are the results described clearly?
36 | - Is the data analyzed well, and are the results logical?
37 | - Are the figures clear and have no missing labels?
38 | - Do the figure captions have sufficient information to understand the figure?
39 | - Is each figure referenced in the text?
40 | - Is the discussion critical/honest, and are potential weaknesses/shortcomings are discussed as well? 
41 | 
42 | ### Conclusions: 15 pts
43 | 
44 | - Do the authors describe whether the initial motivation/task was accomplished or not based on the results?
45 | - Is it discussed adequately how the results relate to previous work?
46 | - If applicable, are potential future directions given?
47 | 
48 | ### Contributions: 10 pts
49 | 
50 | - Are all contributions listed clearly?
51 | - Did each member contribute approximately equally to the project?
52 | 
53 | ### Length, Formatting, and Citations:
54 | 
55 | - -25 pts if you submit the report in some arbitrary format and didn't use the report template.
56 | - -10 pts for each page that goes over the 8-page limit (references are not counted; so you may have 8 pages of text + an infinite number of reference pages).
57 | - -10 pts for each page below the 6-page minimum requirement (references are not counted; so you may have 6 pages of text + 1 to infinitely many reference pages).
58 | - -10 pts for each missing image reference -- this means, if you are using an image that was not made by yourself and you don't cite the source, 10 pts will be deducted for each missing reference.
59 | - -10 pts will be deducted where a sentence from a book or website is copied without citation. For example, consider the following sentence from [https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm](https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm)
60 | 
61 |   - In pattern recognition, the k-nearest neighbors algorithm (k-NN) is a non-parametric method used for classification and regression.
62 |   
63 | If you use it in your text in the following way
64 | 
65 | > This section describes the machine learning methods used in this study. As a baseline model, the k-neared neighbors algorithm was used. In pattern recognition, the k-nearest neighbors algorithm (k-NN) is a non-parametric method used for classification and regression. The distance metric ...
66 | 
67 | I will deduct 10 pts because you didn't indicate that you obtained the sentence from Wikipedia. However the following is ok:
68 | 
69 | a)
70 | 
71 | > This section describes the machine learning methods used in this study. As a baseline model, the k-neared neighbors algorithm was used. "In pattern recognition, the k-nearest neighbors algorithm (k-NN) is a non-parametric method used for classification and regression."\footcite{\url{https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm}} The distance metric ...
72 | 
73 | the following is also okay -- because you rewrote the sentence you don't need the quotation marks:
74 | 
75 | b) 
76 | 
77 | > This section describes the machine learning methods used in this study. As a baseline model, the k-neared neighbors algorithm was used. The k-nearest neigbhors algorithm is a so-called \texit{lazy} machine learning algorithm and non-parametric method that can be used for classification and regression.\footcite{\url{https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm}} The distance metric ...
78 | 
79 | 


--------------------------------------------------------------------------------
/report-template/proposal-latex/bibliography.bib:
--------------------------------------------------------------------------------
 1 | @article{Raschka2020PythonTrends,
 2 |   title={Machine learning in python: Main developments and technology trends in data science, machine learning, and artificial intelligence},
 3 |   author={Raschka, Sebastian and Patterson, Joshua and Nolet, Corey},
 4 |   volume={11},
 5 |   number={7},
 6 |   pages={345},
 7 |   year={2020},
 8 |   journal={Information},
 9 |   publisher={MDPI}
10 | }


--------------------------------------------------------------------------------
/report-template/proposal-latex/figures/google-scholar.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/report-template/proposal-latex/figures/google-scholar.pdf


--------------------------------------------------------------------------------
/report-template/proposal-latex/figures/not-own-figure.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/report-template/proposal-latex/figures/not-own-figure.pdf


--------------------------------------------------------------------------------
/report-template/proposal-latex/proposal.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/report-template/proposal-latex/proposal.pdf


--------------------------------------------------------------------------------
/report-template/proposal-latex/proposal.tex:
--------------------------------------------------------------------------------
  1 | \documentclass[10pt,twocolumn,letterpaper]{article}
  2 | 
  3 | \usepackage{statcourse}
  4 | \usepackage{times}
  5 | \usepackage{epsfig}
  6 | \usepackage{graphicx}
  7 | \usepackage{amsmath}
  8 | \usepackage{amssymb}
  9 | 
 10 | % Include other packages here, before hyperref.
 11 | 
 12 | % If you comment hyperref and then uncomment it, you should delete
 13 | % egpaper.aux before re-running latex.  (Or just hit 'q' on the first latex
 14 | % run, let it finish, and you should be clear).
 15 | \usepackage[breaklinks=true,bookmarks=false]{hyperref}
 16 | 
 17 | 
 18 | \statcoursefinalcopy
 19 | 
 20 | 
 21 | \setcounter{page}{1}
 22 | \begin{document}
 23 | 
 24 | 
 25 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 26 | % DO NOT EDIT ANYTHING ABOVE THIS LINE
 27 | % EXCEPT IF YOU LIKE TO USE ADDITIONAL PACKAGES
 28 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 29 | 
 30 | 
 31 | 
 32 | %%%%%%%%% TITLE
 33 | \title{\LaTeX\ Template for STAT451 Project Proposal (replace with your project title)}
 34 | 
 35 | \author{First Author\\
 36 | {\tt\small firstauthor@wisc.edu}
 37 | \and
 38 | Second Author\\
 39 | {\tt\small secondauthor@wisc.edu}
 40 | \and
 41 | Third Author\\
 42 | {\tt\small thirdauthor@wisc.edu}
 43 | }
 44 | 
 45 | \maketitle
 46 | %\thispagestyle{empty}
 47 | 
 48 | 
 49 | 
 50 | % MAIN ARTICLE GOES BELOW
 51 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 52 | 
 53 | 
 54 | 
 55 | %%%%%%%%% BODY TEXT
 56 | 
 57 | 
 58 | 
 59 | \begin{itemize}
 60 | 
 61 | 
 62 | 	\item The information in this template is very minimal, and this file should serve you as a framework for writing your proposal. You may prefer to use a more collaboration-friendly tool while drafting the report with your classmates before you prepare the final report for submission. Remember that you only need to turn in the PDF file on Canvas. Also, \textbf{only one member per team} needs to submit the project proposal.
 63 | 	
 64 | 	\item The project proposal is a 2-4 page document excluding references\footnote{This means, references should of course be included but do not count towards the page limit}.
 65 | 	
 66 | 	\item You are encouraged (not required) to use 1-2 figures to illustrate technical concepts.
 67 | 	
 68 | 	\item The proposal must be formatted and submitted as a PDF document on Canvas (the submission deadline will be later announced on Canvas.
 69 | 	
 70 | 	\item Please
 71 | 	check out the text in the sections below for further information.
 72 | 	
 73 | \end{itemize}
 74 | 
 75 | 
 76 | 
 77 | 
 78 | \section{Introduction}
 79 | 
 80 | 
 81 | In this section, describe what you are planning to do. Also, briefly describe related work.
 82 | 
 83 | \subsection{Notes about Citations}
 84 | 
 85 | When discussing related work, do not forget to include appropriate references.  This is an example of a citation \cite{Raschka2020PythonTrends}. To format the citations properly, put the
 86 | corresponding references into the ``bibliography.bib`` file. You can obtain
 87 | BibTeX-formatted references for the "bib" file from Google Scholar 
 88 | (\url{https://scholar.google.com}), for example, by clicking on the 
 89 | double-quote character under a citation and then selecting \mbox{"BibTeX"} as
 90 | shown in Figure \ref{fig:google-scholar-1col} and 
 91 | Figure \ref{fig:google-scholar-2col}.
 92 | 
 93 | To avoid plagiarism, any sentence that is copied from other articles or sources (internet, papers, etc.) must be put in quotation marks. The next sentence provides and example that uses an existing sentence verbatim. 
 94 | 
 95 | According to \cite{Raschka2020PythonTrends}, "The development of machine learning algorithms that operate on a set of values (as opposed to a single value) at a time is also commonly known as vectorization." 
 96 | 
 97 | Direct quotes should be used sparingly, and it is usually better to rephrase sentences in your own words.  The next sentence provides an example.
 98 | 
 99 | Vectorization is a programming approach utilizing functions that operate on multiple values simultaneously to speed up computation \cite{Raschka2020PythonTrends}.
100 | 
101 | \begin{figure}[t]
102 | \begin{center}
103 |    \includegraphics[width=0.8\linewidth]{figures/google-scholar.pdf}
104 | \end{center}
105 |    \caption{Example illustrating how to get BibTeX references from
106 |    Google Scholar as a 1-column figure.}
107 | \label{fig:google-scholar-1col}
108 | \end{figure}
109 | 
110 | \subsection{Notes about Figures}
111 | 
112 | Figure~\ref{fig:google-scholar-1col} shows an example of a 1-column figures.
113 | 
114 | You can create two-column figures, too, as shown in Figure \ref{fig:google-scholar-2col}. Please not that you can reuse figures from other papers or lecture material, but for every figure that is not your own, you have to include the "Source" as shown in Figure~\ref{fig:other-figure}.
115 | 
116 | \begin{figure*}
117 | \begin{center}
118 |    \includegraphics[width=0.8\linewidth]{figures/google-scholar.pdf}
119 | \end{center}
120 |    \caption{Example of a 2-column figure.}
121 | \label{fig:google-scholar-2col}
122 | \end{figure*}
123 | 
124 | \begin{figure*}
125 | \begin{center}
126 |    \includegraphics[width=0.8\linewidth]{figures/not-own-figure.pdf}
127 | \end{center}
128 |    \caption{Figure note created by yourself. Image source: \cite{Raschka2020PythonTrends}. (If the source is a website, not a paper, please use the URL link instead of the paper reference. Image source: \url{https://www.mdpi.com/2078-2489/11/4/193}.)}
129 | \label{fig:other-figure}
130 | \end{figure*}
131 | 
132 | 
133 | \section{Motivation}
134 | 
135 | Describe why your project is interesting. E.g., you can describe why your project could have a broader societal impact. Or, you may describe the motivation from a personal learning perspective.
136 | 
137 | \section{Evaluation}
138 | 
139 | What would the successful outcome of your project look like? In other words, under which circumstances would you consider your project to be “successful?”
140 | 
141 | How do you measure success, specific to this project, from a technical standpoint?
142 | 
143 | \section{Resources}
144 | 
145 | What resources are you going to use (datasets, computer hardware, computational tools, etc.)?
146 | 
147 | \section{Contributions}
148 | 
149 | You are expected to share the workload evenly, and every group member is expected to participate in both the experiments and writing. (As a group, you only need to submit one proposal and one report, though. So you need to work together and coordinate your efforts.)
150 | 
151 | Clearly indicate what computational and writing tasks each member of your group will be participating in.
152 | 
153 | 
154 | {\small
155 | \bibliographystyle{ieee}
156 | \bibliography{bibliography.bib}
157 | }
158 | 
159 | \end{document}
160 | 


--------------------------------------------------------------------------------
/report-template/proposal-latex/statcourse.sty:
--------------------------------------------------------------------------------
  1 | % ---------------------------------------------------------------
  2 | %
  3 | % $Id: statcourse.sty,v 1.3 2005/10/24 19:56:15 awf Exp $
  4 | %
  5 | % by Paolo.Ienne@di.epfl.ch
  6 | % some mods by awf@acm.org
  7 | %
  8 | % ---------------------------------------------------------------
  9 | %
 10 | % no guarantee is given that the format corresponds perfectly to
 11 | % IEEE 8.5" x 11" Proceedings, but most features should be ok.
 12 | %
 13 | % ---------------------------------------------------------------
 14 | % with LaTeX2e:
 15 | % =============
 16 | %
 17 | % use as
 18 | %   \documentclass[times,10pt,twocolumn]{article}
 19 | %   \usepackage{latex8}
 20 | %   \usepackage{times}
 21 | %
 22 | % ---------------------------------------------------------------
 23 | 
 24 | % with LaTeX 2.09:
 25 | % ================
 26 | %
 27 | % use as
 28 | %   \documentstyle[times,art10,twocolumn,latex8]{article}
 29 | %
 30 | % ---------------------------------------------------------------
 31 | % with both versions:
 32 | % ===================
 33 | %
 34 | % specify \statcoursefinalcopy to emit the final camera-ready copy
 35 | %
 36 | % specify references as
 37 | %   \bibliographystyle{ieee}
 38 | %   \bibliography{...your files...}
 39 | %
 40 | % ---------------------------------------------------------------
 41 | 
 42 | \usepackage{eso-pic}
 43 | \usepackage{xspace}
 44 | 
 45 | \typeout{CVPR 8.5 x 11-Inch Proceedings Style `statcourse.sty'.}
 46 | 
 47 | % ten point helvetica bold required for captions
 48 | % eleven point times bold required for second-order headings
 49 | % in some sites the name of the fonts may differ,
 50 | % change the name here:
 51 | \font\statcoursetenhv  = phvb at 8pt % *** IF THIS FAILS, SEE statcourse.sty ***
 52 | \font\elvbf  = ptmb scaled 1100
 53 | 
 54 | % If the above lines give an error message, try to comment them and
 55 | % uncomment these:
 56 | %\font\statcoursetenhv  = phvb7t at 8pt
 57 | %\font\elvbf  = ptmb7t scaled 1100
 58 | 
 59 | % set dimensions of columns, gap between columns, and paragraph indent
 60 | \setlength{\textheight}{8.875in}
 61 | \setlength{\textwidth}{6.875in}
 62 | \setlength{\columnsep}{0.3125in}
 63 | \setlength{\topmargin}{0in}
 64 | \setlength{\headheight}{0in}
 65 | \setlength{\headsep}{0in}
 66 | \setlength{\parindent}{1pc}
 67 | \setlength{\oddsidemargin}{-.304in}
 68 | \setlength{\evensidemargin}{-.304in}
 69 | 
 70 | \newif\ifstatcoursefinal
 71 | \statcoursefinalfalse
 72 | \def\statcoursefinalcopy{\global\statcoursefinaltrue}
 73 | 
 74 | % memento from size10.clo
 75 | % \normalsize{\@setfontsize\normalsize\@xpt\@xiipt}
 76 | % \small{\@setfontsize\small\@ixpt{11}}
 77 | % \footnotesize{\@setfontsize\footnotesize\@viiipt{9.5}}
 78 | % \scriptsize{\@setfontsize\scriptsize\@viipt\@viiipt}
 79 | % \tiny{\@setfontsize\tiny\@vpt\@vipt}
 80 | % \large{\@setfontsize\large\@xiipt{14}}
 81 | % \Large{\@setfontsize\Large\@xivpt{18}}
 82 | % \LARGE{\@setfontsize\LARGE\@xviipt{22}}
 83 | % \huge{\@setfontsize\huge\@xxpt{25}}
 84 | % \Huge{\@setfontsize\Huge\@xxvpt{30}}
 85 | 
 86 | \def\@maketitle
 87 |    {
 88 |    \newpage
 89 |    \null
 90 |    \vskip .375in
 91 |    \begin{center}
 92 |       {\Large \bf \@title \par}
 93 |       % additional two empty lines at the end of the title
 94 |       \vspace*{24pt}
 95 |       {
 96 |       \large
 97 |       \lineskip .5em
 98 |       \begin{tabular}[t]{c}
 99 |          \ifstatcoursefinal\@author\else Anonymous CVPR submission\\
100 |          \vspace*{1pt}\\%This space will need to be here in the final copy, so don't squeeze it out for the review copy.
101 | Paper ID \statcoursePaperID \fi
102 |       \end{tabular}
103 |       \par
104 |       }
105 |       % additional small space at the end of the author name
106 |       \vskip .5em
107 |       % additional empty line at the end of the title block
108 |       \vspace*{12pt}
109 |    \end{center}
110 |    }
111 | 
112 | \def\abstract
113 |    {%
114 |    \centerline{\large\bf Abstract}%
115 |    \vspace*{12pt}%
116 |    \it%
117 |    }
118 | 
119 | \def\endabstract
120 |    {
121 |    % additional empty line at the end of the abstract
122 |    \vspace*{12pt}
123 |    }
124 | 
125 | \def\affiliation#1{\gdef\@affiliation{#1}} \gdef\@affiliation{}
126 | 
127 | \newlength{\@ctmp}
128 | \newlength{\@figindent}
129 | \setlength{\@figindent}{1pc}
130 | 
131 | \long\def\@makecaption#1#2{
132 |    \setbox\@tempboxa\hbox{\small \noindent #1.~#2}
133 |    \setlength{\@ctmp}{\hsize}
134 |    \addtolength{\@ctmp}{-\@figindent}\addtolength{\@ctmp}{-\@figindent}
135 |    % IF longer than one indented paragraph line
136 |    \ifdim \wd\@tempboxa >\@ctmp
137 |       % THEN DON'T set as an indented paragraph
138 |       {\small #1.~#2\par}
139 |    \else
140 |       % ELSE center
141 |       \hbox to\hsize{\hfil\box\@tempboxa\hfil}
142 |   \fi}
143 | 
144 | % correct heading spacing and type
145 | \def\statcoursesection{\@startsection {section}{1}{\z@}
146 |    {10pt plus 2pt minus 2pt}{7pt} {\large\bf}}
147 | \def\statcoursessect#1{\statcoursesection*{#1}}
148 | \def\statcoursesect#1{\statcoursesection{\hskip -1em.~#1}}
149 | \def\section{\@ifstar\statcoursessect\statcoursesect}
150 | 
151 | \def\statcoursesubsection{\@startsection {subsection}{2}{\z@}
152 |    {8pt plus 2pt minus 2pt}{6pt} {\elvbf}}
153 | \def\statcoursessubsect#1{\statcoursesubsection*{#1}}
154 | \def\statcoursesubsect#1{\statcoursesubsection{\hskip -1em.~#1}}
155 | \def\subsection{\@ifstar\statcoursessubsect\statcoursesubsect}
156 | 
157 | %% --------- Page background marks: Ruler and confidentiality
158 | 
159 | % ----- define vruler
160 | \makeatletter
161 | \newbox\statcourserulerbox
162 | \newcount\statcourserulercount
163 | \newdimen\statcourseruleroffset
164 | \newdimen\cv@lineheight
165 | \newdimen\cv@boxheight
166 | \newbox\cv@tmpbox
167 | \newcount\cv@refno
168 | \newcount\cv@tot
169 | % NUMBER with left flushed zeros  \fillzeros[<WIDTH>]<NUMBER>
170 | \newcount\cv@tmpc@ \newcount\cv@tmpc
171 | \def\fillzeros[#1]#2{\cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi
172 | \cv@tmpc=1 %
173 | \loop\ifnum\cv@tmpc@<10 \else \divide\cv@tmpc@ by 10 \advance\cv@tmpc by 1 \fi
174 |    \ifnum\cv@tmpc@=10\relax\cv@tmpc@=11\relax\fi \ifnum\cv@tmpc@>10 \repeat
175 | \ifnum#2<0\advance\cv@tmpc1\relax-\fi
176 | \loop\ifnum\cv@tmpc<#1\relax0\advance\cv@tmpc1\relax\fi \ifnum\cv@tmpc<#1 \repeat
177 | \cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi \relax\the\cv@tmpc@}%
178 | % \makevruler[<SCALE>][<INITIAL_COUNT>][<STEP>][<DIGITS>][<HEIGHT>]
179 | \def\makevruler[#1][#2][#3][#4][#5]{\begingroup\offinterlineskip
180 | \textheight=#5\vbadness=10000\vfuzz=120ex\overfullrule=0pt%
181 | \global\setbox\statcourserulerbox=\vbox to \textheight{%
182 | {\parskip=0pt\hfuzz=150em\cv@boxheight=\textheight
183 | \cv@lineheight=#1\global\statcourserulercount=#2%
184 | \cv@tot\cv@boxheight\divide\cv@tot\cv@lineheight\advance\cv@tot2%
185 | \cv@refno1\vskip-\cv@lineheight\vskip1ex%
186 | \loop\setbox\cv@tmpbox=\hbox to0cm{{\statcoursetenhv\hfil\fillzeros[#4]\statcourserulercount}}%
187 | \ht\cv@tmpbox\cv@lineheight\dp\cv@tmpbox0pt\box\cv@tmpbox\break
188 | \advance\cv@refno1\global\advance\statcourserulercount#3\relax
189 | \ifnum\cv@refno<\cv@tot\repeat}}\endgroup}%
190 | \makeatother
191 | % ----- end of vruler
192 | 
193 | % \makevruler[<SCALE>][<INITIAL_COUNT>][<STEP>][<DIGITS>][<HEIGHT>]
194 | \def\statcourseruler#1{\makevruler[12pt][#1][1][3][0.993\textheight]\usebox{\statcourserulerbox}}
195 | \AddToShipoutPicture{%
196 | \ifstatcoursefinal\else
197 |   %\AtTextLowerLeft{%
198 |   % \color[gray]{.15}\framebox(\LenToUnit{\textwidth},\LenToUnit{\textheight}){}
199 |   %}
200 | \statcourseruleroffset=\textheight
201 | \advance\statcourseruleroffset by -3.7pt
202 |   \color[rgb]{.5,.5,1}
203 |   \AtTextUpperLeft{%
204 |     \put(\LenToUnit{-35pt},\LenToUnit{-\statcourseruleroffset}){%left ruler
205 |       \statcourseruler{\statcourserulercount}}
206 |     \put(\LenToUnit{\textwidth\kern 30pt},\LenToUnit{-\statcourseruleroffset}){%right ruler
207 |       \statcourseruler{\statcourserulercount}}
208 |   }
209 | \def\pid{\parbox{1in}{\begin{center}\bf\sf{\small CVPR}\\\#\statcoursePaperID\end{center}}}
210 |   \AtTextUpperLeft{%paperID in corners
211 |     \put(\LenToUnit{-65pt},\LenToUnit{45pt}){\pid}
212 |     \put(\LenToUnit{\textwidth\kern-8pt},\LenToUnit{45pt}){\pid}
213 |   }
214 |   \AtTextUpperLeft{%confidential
215 |     \put(0,\LenToUnit{1cm}){\parbox{\textwidth}{\centering\statcoursetenhv
216 |        CVPR 2018 Submission \#\statcoursePaperID. CONFIDENTIAL REVIEW COPY.  DO NOT DISTRIBUTE.}}
217 |   }
218 | \fi
219 | }
220 | 
221 | %%% Make figure placement a little more predictable.
222 | % We trust the user to move figures if this results
223 | % in ugliness.
224 | % Minimize bad page breaks at figures
225 | \renewcommand{\textfraction}{0.01}
226 | \renewcommand{\floatpagefraction}{0.99}
227 | \renewcommand{\topfraction}{0.99}
228 | \renewcommand{\bottomfraction}{0.99}
229 | \renewcommand{\dblfloatpagefraction}{0.99}
230 | \renewcommand{\dbltopfraction}{0.99}
231 | \setcounter{totalnumber}{99}
232 | \setcounter{topnumber}{99}
233 | \setcounter{bottomnumber}{99}
234 | 
235 | % Add a period to the end of an abbreviation unless there's one
236 | % already, then \xspace.
237 | \makeatletter
238 | \DeclareRobustCommand\onedot{\futurelet\@let@token\@onedot}
239 | \def\@onedot{\ifx\@let@token.\else.\null\fi\xspace}
240 | 
241 | \def\eg{\emph{e.g}\onedot} \def\Eg{\emph{E.g}\onedot}
242 | \def\ie{\emph{i.e}\onedot} \def\Ie{\emph{I.e}\onedot}
243 | \def\cf{\emph{c.f}\onedot} \def\Cf{\emph{C.f}\onedot}
244 | \def\etc{\emph{etc}\onedot} \def\vs{\emph{vs}\onedot}
245 | \def\wrt{w.r.t\onedot} \def\dof{d.o.f\onedot}
246 | \def\etal{\emph{et al}\onedot}
247 | \makeatother
248 | 
249 | % ---------------------------------------------------------------
250 | 


--------------------------------------------------------------------------------
/report-template/report-latex/bibliography.bib:
--------------------------------------------------------------------------------
 1 | @article{Raschka2020PythonTrends,
 2 |   title={Machine learning in python: Main developments and technology trends in data science, machine learning, and artificial intelligence},
 3 |   author={Raschka, Sebastian and Patterson, Joshua and Nolet, Corey},
 4 |   volume={11},
 5 |   number={7},
 6 |   pages={345},
 7 |   year={2020},
 8 |   journal={Information},
 9 |   publisher={MDPI}
10 | }


--------------------------------------------------------------------------------
/report-template/report-latex/figures/google-scholar.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/report-template/report-latex/figures/google-scholar.pdf


--------------------------------------------------------------------------------
/report-template/report-latex/report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs20/51ae6db167ec9ccae555e973179c31be0d111804/report-template/report-latex/report.pdf


--------------------------------------------------------------------------------
/report-template/report-latex/report.tex:
--------------------------------------------------------------------------------
  1 | \documentclass[10pt,twocolumn,letterpaper]{article}
  2 | 
  3 | \usepackage{statcourse}
  4 | \usepackage{times}
  5 | \usepackage{epsfig}
  6 | \usepackage{graphicx}
  7 | \usepackage{amsmath}
  8 | \usepackage{amssymb}
  9 | 
 10 | % Include other packages here, before hyperref.
 11 | 
 12 | % If you comment hyperref and then uncomment it, you should delete
 13 | % egpaper.aux before re-running latex.  (Or just hit 'q' on the first latex
 14 | % run, let it finish, and you should be clear).
 15 | \usepackage[breaklinks=true,bookmarks=false]{hyperref}
 16 | 
 17 | 
 18 | \statcoursefinalcopy
 19 | 
 20 | 
 21 | \setcounter{page}{1}
 22 | \begin{document}
 23 | 
 24 | 
 25 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 26 | % DO NOT EDIT ANYTHING ABOVE THIS LINE
 27 | % EXCEPT IF YOU LIKE TO USE ADDITIONAL PACKAGES
 28 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 29 | 
 30 | 
 31 | 
 32 | %%%%%%%%% TITLE
 33 | \title{\LaTeX\ Template for STAT451 Project Report (replace with your project title)}
 34 | 
 35 | \author{First Author\\
 36 | {\tt\small firstauthor@wisc.edu}
 37 | \and
 38 | Second Author\\
 39 | {\tt\small secondauthor@wisc.edu}
 40 | \and
 41 | Third Author\\
 42 | {\tt\small thirdauthor@wisc.edu}
 43 | }
 44 | 
 45 | \maketitle
 46 | %\thispagestyle{empty}
 47 | 
 48 | 
 49 | 
 50 | % MAIN ARTICLE GOES BELOW
 51 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 52 | 
 53 | 
 54 | %%%%%%%%% ABSTRACT
 55 | \begin{abstract}
 56 |    The abstract for your project goes here. The length of the abstract
 57 |    should be between 200-250 words. Tips for writing a good abstract
 58 |    can be found at \url{https://writing.wisc.edu/Handbook/presentations_abstracts.html}.
 59 | \end{abstract}
 60 | 
 61 | %%%%%%%%% BODY TEXT
 62 | 
 63 | %-------------------------------------------------
 64 | \section{Introduction}
 65 | %-------------------------------------------------
 66 | 
 67 | \noindent\textit{Recommended length: 1/2 to 1 pages.}\vspace{1cm}
 68 | 
 69 | For the report, the same rules and guidelines apply as for the proposal. This is an example of a citation \cite{Raschka2020PythonTrends}; if you use the "cite{}" function in LaTeX, the References section will be created automatically at the end of the document. Please read through the {"proposal-latex/proposal.pdf"} for a refresher on how to use citations and figures properly.
 70 | 
 71 | Note that the sections for this report are different, and some additional information is contained in this template document, so please read it carefully before you start writing.
 72 | 
 73 | This is an example of a mathematical equation:
 74 | 
 75 | $$f(\mathbf{x}; \mathbf{w}) = \sum_{i=1}^{n} w_ix_i.$$
 76 | 
 77 | This is a mathematical expression, $h(\mathbf{x}) = \hat{y}$ formatted in text. 
 78 | 
 79 | The project report should be 6-8 pages long (not counting references)
 80 | and should contain the sections that are already provided in this paper. Please
 81 | check out the text in these sections for further information.
 82 | 
 83 | 
 84 | \subsection{Subsection}
 85 | 
 86 | You can use paragraphs or subsections to further structure your
 87 | main sections. This is an example of a subsection.
 88 | 
 89 | \paragraph{This is a paragraph title.} This is an example of a paragraph.
 90 | 
 91 | Ideally, your report should contain all the major sections provided in this report template. Please also consult the "report-template/project-report-assessment.md" for further information on these sections and grading.
 92 | 
 93 | 
 94 | 
 95 | %-------------------------------------------------
 96 | \section{Related Work}
 97 | %-------------------------------------------------
 98 | 
 99 | \noindent\textit{Recommended length: 1/2 to 1 pages.}\vspace{1cm}
100 | 
101 | Related work should be discussed here. This should be a short (1/2 to 1 page) discussion of work (from research papers and articles) that explored similar questions. For example, if you plan to predict COVID-19 from chest X-ray images, discuss previous work that was about a similar project. If the focus of your project is on analyzing the behavior of certain machine learning on a variety of different datasets, and the comparison itself (rather application) is the focus of your paper, discuss other papers that analyzed different algorithms.
102 | 
103 | %-------------------------------------------------
104 | \section{Proposed Method}
105 | %-------------------------------------------------
106 | 
107 | \noindent\textit{Recommended length: 1 to 2 pages.}\vspace{1cm}
108 | 
109 | Describe the method(s) you are proposing, developing, or using. Most students will not propose new or modified machine learning methods or algorithms. In this case, describe how the main algorithms you are using work. This may include mathematical details.
110 | 
111 | %-------------------------------------------------
112 | \section{Experiments}
113 | %-------------------------------------------------
114 | 
115 | \noindent\textit{Recommended length: 1/2 to 1 pages.}\vspace{1cm}
116 | 
117 | Describe the experiments you performed to address specific questions. This includes information about the dataset and software, which are listed as subsections below. Please do not remove these subsections.
118 | 
119 | \subsection{Dataset}
120 | 
121 | Briefly describe your dataset in a separate subsection.
122 | 
123 | 
124 | Table \ref{tab:some-table} shows an example for formatting a table.
125 | 
126 | \begin{table}
127 | \begin{center}
128 | \begin{tabular}{|l|c|}
129 | \hline
130 | Method & Accuracy \\
131 | \hline\hline
132 | Method 1 & $70 \pm 3$ \% \\
133 |  Method 2 & $76 \pm 3$ \% \\
134 | \hline
135 | \end{tabular}
136 | \end{center}
137 | \label{tab:some-table}
138 | \caption{This is an example of a table.}
139 | \end{table}
140 | 
141 | 
142 | 
143 | 
144 | \subsection{Software}
145 | 
146 | Briefly list (and cite) software software you used.
147 | 
148 | \subsection{Hardware}
149 | 
150 | If relevant, list hardware resources you used.
151 | 
152 | %-------------------------------------------------
153 | \section{Results and Discussion}
154 | %-------------------------------------------------
155 | 
156 | \noindent\textit{Recommended length: 2 to 3 pages.}\vspace{1cm}
157 | 
158 | Describe the results you obtained from the experiments and interpret them.
159 | Optionally, you could split "Results and Discussion" into two separate
160 | sections, but it is often easier to present the results and discuss them at the same time. In this section, you will likely want to create several subsections that address your specific research questions. As an example for structuring the Results and Discussion section, you can take a look at the following paper: \url{https://www.mdpi.com/2078-2489/11/7/345}.
161 | 
162 | %-------------------------------------------------
163 | \section{Conclusions}
164 | %-------------------------------------------------
165 | 
166 | \noindent\textit{Recommended length: 1/3 to 1/2 page.}\vspace{1cm}
167 | 
168 | Describe your conclusions here. If there are any future directions, you can
169 | describe them here, or you can create a new section for future directions.
170 | 
171 | %-------------------------------------------------
172 | \section{Acknowledgements}
173 | %-------------------------------------------------
174 | 
175 | \noindent\textit{Recommended length: 2-4 sentences.}\vspace{1cm}
176 | 
177 | List acknowledgements if any. For example, if someone provided you a dataset, or
178 | you used someone else's resources, this is a good place to acknowledge
179 | the help or support you received.
180 | 
181 | %-------------------------------------------------
182 | \section{Contributions}
183 | %-------------------------------------------------
184 | 
185 | \noindent\textit{Recommended length: 1/3 to 1/2 page.}\vspace{1cm}
186 | 
187 | Describe the contributions of each team member who worked on this project.
188 | 
189 | 
190 | {\small
191 | \bibliographystyle{ieee}
192 | \bibliography{bibliography.bib}
193 | }
194 | 
195 | \end{document}
196 | 


--------------------------------------------------------------------------------
/report-template/report-latex/statcourse.sty:
--------------------------------------------------------------------------------
  1 | % ---------------------------------------------------------------
  2 | %
  3 | % $Id: statcourse.sty,v 1.3 2005/10/24 19:56:15 awf Exp $
  4 | %
  5 | % by Paolo.Ienne@di.epfl.ch
  6 | % some mods by awf@acm.org
  7 | %
  8 | % ---------------------------------------------------------------
  9 | %
 10 | % no guarantee is given that the format corresponds perfectly to
 11 | % IEEE 8.5" x 11" Proceedings, but most features should be ok.
 12 | %
 13 | % ---------------------------------------------------------------
 14 | % with LaTeX2e:
 15 | % =============
 16 | %
 17 | % use as
 18 | %   \documentclass[times,10pt,twocolumn]{article}
 19 | %   \usepackage{latex8}
 20 | %   \usepackage{times}
 21 | %
 22 | % ---------------------------------------------------------------
 23 | 
 24 | % with LaTeX 2.09:
 25 | % ================
 26 | %
 27 | % use as
 28 | %   \documentstyle[times,art10,twocolumn,latex8]{article}
 29 | %
 30 | % ---------------------------------------------------------------
 31 | % with both versions:
 32 | % ===================
 33 | %
 34 | % specify \statcoursefinalcopy to emit the final camera-ready copy
 35 | %
 36 | % specify references as
 37 | %   \bibliographystyle{ieee}
 38 | %   \bibliography{...your files...}
 39 | %
 40 | % ---------------------------------------------------------------
 41 | 
 42 | \usepackage{eso-pic}
 43 | \usepackage{xspace}
 44 | 
 45 | \typeout{CVPR 8.5 x 11-Inch Proceedings Style `statcourse.sty'.}
 46 | 
 47 | % ten point helvetica bold required for captions
 48 | % eleven point times bold required for second-order headings
 49 | % in some sites the name of the fonts may differ,
 50 | % change the name here:
 51 | \font\statcoursetenhv  = phvb at 8pt % *** IF THIS FAILS, SEE statcourse.sty ***
 52 | \font\elvbf  = ptmb scaled 1100
 53 | 
 54 | % If the above lines give an error message, try to comment them and
 55 | % uncomment these:
 56 | %\font\statcoursetenhv  = phvb7t at 8pt
 57 | %\font\elvbf  = ptmb7t scaled 1100
 58 | 
 59 | % set dimensions of columns, gap between columns, and paragraph indent
 60 | \setlength{\textheight}{8.875in}
 61 | \setlength{\textwidth}{6.875in}
 62 | \setlength{\columnsep}{0.3125in}
 63 | \setlength{\topmargin}{0in}
 64 | \setlength{\headheight}{0in}
 65 | \setlength{\headsep}{0in}
 66 | \setlength{\parindent}{1pc}
 67 | \setlength{\oddsidemargin}{-.304in}
 68 | \setlength{\evensidemargin}{-.304in}
 69 | 
 70 | \newif\ifstatcoursefinal
 71 | \statcoursefinalfalse
 72 | \def\statcoursefinalcopy{\global\statcoursefinaltrue}
 73 | 
 74 | % memento from size10.clo
 75 | % \normalsize{\@setfontsize\normalsize\@xpt\@xiipt}
 76 | % \small{\@setfontsize\small\@ixpt{11}}
 77 | % \footnotesize{\@setfontsize\footnotesize\@viiipt{9.5}}
 78 | % \scriptsize{\@setfontsize\scriptsize\@viipt\@viiipt}
 79 | % \tiny{\@setfontsize\tiny\@vpt\@vipt}
 80 | % \large{\@setfontsize\large\@xiipt{14}}
 81 | % \Large{\@setfontsize\Large\@xivpt{18}}
 82 | % \LARGE{\@setfontsize\LARGE\@xviipt{22}}
 83 | % \huge{\@setfontsize\huge\@xxpt{25}}
 84 | % \Huge{\@setfontsize\Huge\@xxvpt{30}}
 85 | 
 86 | \def\@maketitle
 87 |    {
 88 |    \newpage
 89 |    \null
 90 |    \vskip .375in
 91 |    \begin{center}
 92 |       {\Large \bf \@title \par}
 93 |       % additional two empty lines at the end of the title
 94 |       \vspace*{24pt}
 95 |       {
 96 |       \large
 97 |       \lineskip .5em
 98 |       \begin{tabular}[t]{c}
 99 |          \ifstatcoursefinal\@author\else Anonymous CVPR submission\\
100 |          \vspace*{1pt}\\%This space will need to be here in the final copy, so don't squeeze it out for the review copy.
101 | Paper ID \statcoursePaperID \fi
102 |       \end{tabular}
103 |       \par
104 |       }
105 |       % additional small space at the end of the author name
106 |       \vskip .5em
107 |       % additional empty line at the end of the title block
108 |       \vspace*{12pt}
109 |    \end{center}
110 |    }
111 | 
112 | \def\abstract
113 |    {%
114 |    \centerline{\large\bf Abstract}%
115 |    \vspace*{12pt}%
116 |    \it%
117 |    }
118 | 
119 | \def\endabstract
120 |    {
121 |    % additional empty line at the end of the abstract
122 |    \vspace*{12pt}
123 |    }
124 | 
125 | \def\affiliation#1{\gdef\@affiliation{#1}} \gdef\@affiliation{}
126 | 
127 | \newlength{\@ctmp}
128 | \newlength{\@figindent}
129 | \setlength{\@figindent}{1pc}
130 | 
131 | \long\def\@makecaption#1#2{
132 |    \setbox\@tempboxa\hbox{\small \noindent #1.~#2}
133 |    \setlength{\@ctmp}{\hsize}
134 |    \addtolength{\@ctmp}{-\@figindent}\addtolength{\@ctmp}{-\@figindent}
135 |    % IF longer than one indented paragraph line
136 |    \ifdim \wd\@tempboxa >\@ctmp
137 |       % THEN DON'T set as an indented paragraph
138 |       {\small #1.~#2\par}
139 |    \else
140 |       % ELSE center
141 |       \hbox to\hsize{\hfil\box\@tempboxa\hfil}
142 |   \fi}
143 | 
144 | % correct heading spacing and type
145 | \def\statcoursesection{\@startsection {section}{1}{\z@}
146 |    {10pt plus 2pt minus 2pt}{7pt} {\large\bf}}
147 | \def\statcoursessect#1{\statcoursesection*{#1}}
148 | \def\statcoursesect#1{\statcoursesection{\hskip -1em.~#1}}
149 | \def\section{\@ifstar\statcoursessect\statcoursesect}
150 | 
151 | \def\statcoursesubsection{\@startsection {subsection}{2}{\z@}
152 |    {8pt plus 2pt minus 2pt}{6pt} {\elvbf}}
153 | \def\statcoursessubsect#1{\statcoursesubsection*{#1}}
154 | \def\statcoursesubsect#1{\statcoursesubsection{\hskip -1em.~#1}}
155 | \def\subsection{\@ifstar\statcoursessubsect\statcoursesubsect}
156 | 
157 | %% --------- Page background marks: Ruler and confidentiality
158 | 
159 | % ----- define vruler
160 | \makeatletter
161 | \newbox\statcourserulerbox
162 | \newcount\statcourserulercount
163 | \newdimen\statcourseruleroffset
164 | \newdimen\cv@lineheight
165 | \newdimen\cv@boxheight
166 | \newbox\cv@tmpbox
167 | \newcount\cv@refno
168 | \newcount\cv@tot
169 | % NUMBER with left flushed zeros  \fillzeros[<WIDTH>]<NUMBER>
170 | \newcount\cv@tmpc@ \newcount\cv@tmpc
171 | \def\fillzeros[#1]#2{\cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi
172 | \cv@tmpc=1 %
173 | \loop\ifnum\cv@tmpc@<10 \else \divide\cv@tmpc@ by 10 \advance\cv@tmpc by 1 \fi
174 |    \ifnum\cv@tmpc@=10\relax\cv@tmpc@=11\relax\fi \ifnum\cv@tmpc@>10 \repeat
175 | \ifnum#2<0\advance\cv@tmpc1\relax-\fi
176 | \loop\ifnum\cv@tmpc<#1\relax0\advance\cv@tmpc1\relax\fi \ifnum\cv@tmpc<#1 \repeat
177 | \cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi \relax\the\cv@tmpc@}%
178 | % \makevruler[<SCALE>][<INITIAL_COUNT>][<STEP>][<DIGITS>][<HEIGHT>]
179 | \def\makevruler[#1][#2][#3][#4][#5]{\begingroup\offinterlineskip
180 | \textheight=#5\vbadness=10000\vfuzz=120ex\overfullrule=0pt%
181 | \global\setbox\statcourserulerbox=\vbox to \textheight{%
182 | {\parskip=0pt\hfuzz=150em\cv@boxheight=\textheight
183 | \cv@lineheight=#1\global\statcourserulercount=#2%
184 | \cv@tot\cv@boxheight\divide\cv@tot\cv@lineheight\advance\cv@tot2%
185 | \cv@refno1\vskip-\cv@lineheight\vskip1ex%
186 | \loop\setbox\cv@tmpbox=\hbox to0cm{{\statcoursetenhv\hfil\fillzeros[#4]\statcourserulercount}}%
187 | \ht\cv@tmpbox\cv@lineheight\dp\cv@tmpbox0pt\box\cv@tmpbox\break
188 | \advance\cv@refno1\global\advance\statcourserulercount#3\relax
189 | \ifnum\cv@refno<\cv@tot\repeat}}\endgroup}%
190 | \makeatother
191 | % ----- end of vruler
192 | 
193 | % \makevruler[<SCALE>][<INITIAL_COUNT>][<STEP>][<DIGITS>][<HEIGHT>]
194 | \def\statcourseruler#1{\makevruler[12pt][#1][1][3][0.993\textheight]\usebox{\statcourserulerbox}}
195 | \AddToShipoutPicture{%
196 | \ifstatcoursefinal\else
197 |   %\AtTextLowerLeft{%
198 |   % \color[gray]{.15}\framebox(\LenToUnit{\textwidth},\LenToUnit{\textheight}){}
199 |   %}
200 | \statcourseruleroffset=\textheight
201 | \advance\statcourseruleroffset by -3.7pt
202 |   \color[rgb]{.5,.5,1}
203 |   \AtTextUpperLeft{%
204 |     \put(\LenToUnit{-35pt},\LenToUnit{-\statcourseruleroffset}){%left ruler
205 |       \statcourseruler{\statcourserulercount}}
206 |     \put(\LenToUnit{\textwidth\kern 30pt},\LenToUnit{-\statcourseruleroffset}){%right ruler
207 |       \statcourseruler{\statcourserulercount}}
208 |   }
209 | \def\pid{\parbox{1in}{\begin{center}\bf\sf{\small CVPR}\\\#\statcoursePaperID\end{center}}}
210 |   \AtTextUpperLeft{%paperID in corners
211 |     \put(\LenToUnit{-65pt},\LenToUnit{45pt}){\pid}
212 |     \put(\LenToUnit{\textwidth\kern-8pt},\LenToUnit{45pt}){\pid}
213 |   }
214 |   \AtTextUpperLeft{%confidential
215 |     \put(0,\LenToUnit{1cm}){\parbox{\textwidth}{\centering\statcoursetenhv
216 |        CVPR 2018 Submission \#\statcoursePaperID. CONFIDENTIAL REVIEW COPY.  DO NOT DISTRIBUTE.}}
217 |   }
218 | \fi
219 | }
220 | 
221 | %%% Make figure placement a little more predictable.
222 | % We trust the user to move figures if this results
223 | % in ugliness.
224 | % Minimize bad page breaks at figures
225 | \renewcommand{\textfraction}{0.01}
226 | \renewcommand{\floatpagefraction}{0.99}
227 | \renewcommand{\topfraction}{0.99}
228 | \renewcommand{\bottomfraction}{0.99}
229 | \renewcommand{\dblfloatpagefraction}{0.99}
230 | \renewcommand{\dbltopfraction}{0.99}
231 | \setcounter{totalnumber}{99}
232 | \setcounter{topnumber}{99}
233 | \setcounter{bottomnumber}{99}
234 | 
235 | % Add a period to the end of an abbreviation unless there's one
236 | % already, then \xspace.
237 | \makeatletter
238 | \DeclareRobustCommand\onedot{\futurelet\@let@token\@onedot}
239 | \def\@onedot{\ifx\@let@token.\else.\null\fi\xspace}
240 | 
241 | \def\eg{\emph{e.g}\onedot} \def\Eg{\emph{E.g}\onedot}
242 | \def\ie{\emph{i.e}\onedot} \def\Ie{\emph{I.e}\onedot}
243 | \def\cf{\emph{c.f}\onedot} \def\Cf{\emph{C.f}\onedot}
244 | \def\etc{\emph{etc}\onedot} \def\vs{\emph{vs}\onedot}
245 | \def\wrt{w.r.t\onedot} \def\dof{d.o.f\onedot}
246 | \def\etal{\emph{et al}\onedot}
247 | \makeatother
248 | 
249 | % ---------------------------------------------------------------
250 | 


--------------------------------------------------------------------------------