├── dl-course-info.md
├── hw_02
    ├── some_digit.png
    ├── tree-viz-1.png
    └── hw02.ipynb
├── 02_knn
    ├── 02_knn_notes.pdf
    ├── 02_knn_slides.pdf
    └── iris.csv
├── hw_03
    ├── images
    │   ├── conf-1.png
    │   ├── conf-2.png
    │   └── hint-1.png
    ├── helper.py
    ├── data
    │   └── wine.data
    └── hw3.ipynb
├── 05_sklearn
    ├── images
    │   ├── eda.pdf
    │   ├── decisionreg.pdf
    │   ├── estimator-api.pdf
    │   ├── estimator-api.png
    │   ├── holdout-tuning.pdf
    │   ├── holdout-tuning.png
    │   ├── iris-subsampling.pdf
    │   ├── iris-subsampling.png
    │   ├── sklearn-pipeline.pdf
    │   ├── sklearn-pipeline.png
    │   ├── transformer-api.pdf
    │   └── transformer-api.png
    └── 05_sklearn_slides.pdf
├── report-template
    ├── report.pdf
    ├── figures
    │   └── google-scholar.pdf
    ├── bibliography.bib
    ├── project-presentation-assessment.md
    ├── project-report-assessment.md
    ├── report.tex
    ├── statcourse.sty
    └── ieee.bst
├── 06_trees
    ├── 06_trees_notes.pdf
    └── 06_trees_slides.pdf
├── 03_python
    └── 03_python_notes.pdf
├── 09_eval-ci
    ├── 09_eval-ci_notes.pdf
    └── 09_eval-ci_slides.pdf
├── 10_eval-cv
    ├── 10_eval-cv_notes.pdf
    └── 10_eval-cv_slides.pdf
├── other
    ├── stat479-fs18-awards.jpg
    └── dl-course-info.md
├── 01_overview
    ├── 01_ml-overview_notes.pdf
    └── 01_ml-overview_slides.pdf
├── 04_scipython
    ├── 04_scipython_notes.pdf
    └── images
    │   └── numpy-intro
    │       ├── ufunc.png
    │       ├── array_1.png
    │       ├── array_2.png
    │       ├── matmul.png
    │       ├── matmatmul.png
    │       ├── random_1.png
    │       ├── random_2.png
    │       ├── transpose.png
    │       ├── broadcasting-1.png
    │       └── broadcasting-2.png
├── 07_ensembles
    ├── 07_ensembles_notes.pdf
    └── 07_ensembles_slides.pdf
├── 11_eval-algo
    ├── 11_eval-algo_notes.pdf
    ├── 11_eval-algo_slides.pdf
    └── 11_eval-algo_code.ipynb
├── 13_feat-sele
    ├── 13_feat-sele_slides.pdf
    └── code-figures
    │   ├── logreg.png
    │   └── multinomial-logreg.png
├── 08_eval-intro
    ├── 08_eval-intro_notes.pdf
    └── 08_eval-intro_slides.pdf
├── 12_eval-metrics
    └── 12_eval-metrics_slides.pdf
├── 14_feat-extract
    └── 14_feat-extract_slides.pdf
├── .gitignore
├── README.md
└── hw_01
    ├── test_data.txt
    └── train_data.txt


/dl-course-info.md:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hw_02/some_digit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/hw_02/some_digit.png


--------------------------------------------------------------------------------
/hw_02/tree-viz-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/hw_02/tree-viz-1.png


--------------------------------------------------------------------------------
/02_knn/02_knn_notes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/02_knn/02_knn_notes.pdf


--------------------------------------------------------------------------------
/hw_03/images/conf-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/hw_03/images/conf-1.png


--------------------------------------------------------------------------------
/hw_03/images/conf-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/hw_03/images/conf-2.png


--------------------------------------------------------------------------------
/hw_03/images/hint-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/hw_03/images/hint-1.png


--------------------------------------------------------------------------------
/02_knn/02_knn_slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/02_knn/02_knn_slides.pdf


--------------------------------------------------------------------------------
/05_sklearn/images/eda.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/05_sklearn/images/eda.pdf


--------------------------------------------------------------------------------
/report-template/report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/report-template/report.pdf


--------------------------------------------------------------------------------
/06_trees/06_trees_notes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/06_trees/06_trees_notes.pdf


--------------------------------------------------------------------------------
/06_trees/06_trees_slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/06_trees/06_trees_slides.pdf


--------------------------------------------------------------------------------
/03_python/03_python_notes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/03_python/03_python_notes.pdf


--------------------------------------------------------------------------------
/09_eval-ci/09_eval-ci_notes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/09_eval-ci/09_eval-ci_notes.pdf


--------------------------------------------------------------------------------
/10_eval-cv/10_eval-cv_notes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/10_eval-cv/10_eval-cv_notes.pdf


--------------------------------------------------------------------------------
/other/stat479-fs18-awards.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/other/stat479-fs18-awards.jpg


--------------------------------------------------------------------------------
/05_sklearn/05_sklearn_slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/05_sklearn/05_sklearn_slides.pdf


--------------------------------------------------------------------------------
/05_sklearn/images/decisionreg.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/05_sklearn/images/decisionreg.pdf


--------------------------------------------------------------------------------
/09_eval-ci/09_eval-ci_slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/09_eval-ci/09_eval-ci_slides.pdf


--------------------------------------------------------------------------------
/10_eval-cv/10_eval-cv_slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/10_eval-cv/10_eval-cv_slides.pdf


--------------------------------------------------------------------------------
/01_overview/01_ml-overview_notes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/01_overview/01_ml-overview_notes.pdf


--------------------------------------------------------------------------------
/04_scipython/04_scipython_notes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/04_scipython/04_scipython_notes.pdf


--------------------------------------------------------------------------------
/05_sklearn/images/estimator-api.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/05_sklearn/images/estimator-api.pdf


--------------------------------------------------------------------------------
/05_sklearn/images/estimator-api.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/05_sklearn/images/estimator-api.png


--------------------------------------------------------------------------------
/05_sklearn/images/holdout-tuning.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/05_sklearn/images/holdout-tuning.pdf


--------------------------------------------------------------------------------
/05_sklearn/images/holdout-tuning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/05_sklearn/images/holdout-tuning.png


--------------------------------------------------------------------------------
/07_ensembles/07_ensembles_notes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/07_ensembles/07_ensembles_notes.pdf


--------------------------------------------------------------------------------
/07_ensembles/07_ensembles_slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/07_ensembles/07_ensembles_slides.pdf


--------------------------------------------------------------------------------
/11_eval-algo/11_eval-algo_notes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/11_eval-algo/11_eval-algo_notes.pdf


--------------------------------------------------------------------------------
/11_eval-algo/11_eval-algo_slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/11_eval-algo/11_eval-algo_slides.pdf


--------------------------------------------------------------------------------
/13_feat-sele/13_feat-sele_slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/13_feat-sele/13_feat-sele_slides.pdf


--------------------------------------------------------------------------------
/13_feat-sele/code-figures/logreg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/13_feat-sele/code-figures/logreg.png


--------------------------------------------------------------------------------
/01_overview/01_ml-overview_slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/01_overview/01_ml-overview_slides.pdf


--------------------------------------------------------------------------------
/05_sklearn/images/iris-subsampling.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/05_sklearn/images/iris-subsampling.pdf


--------------------------------------------------------------------------------
/05_sklearn/images/iris-subsampling.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/05_sklearn/images/iris-subsampling.png


--------------------------------------------------------------------------------
/05_sklearn/images/sklearn-pipeline.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/05_sklearn/images/sklearn-pipeline.pdf


--------------------------------------------------------------------------------
/05_sklearn/images/sklearn-pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/05_sklearn/images/sklearn-pipeline.png


--------------------------------------------------------------------------------
/05_sklearn/images/transformer-api.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/05_sklearn/images/transformer-api.pdf


--------------------------------------------------------------------------------
/05_sklearn/images/transformer-api.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/05_sklearn/images/transformer-api.png


--------------------------------------------------------------------------------
/08_eval-intro/08_eval-intro_notes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/08_eval-intro/08_eval-intro_notes.pdf


--------------------------------------------------------------------------------
/08_eval-intro/08_eval-intro_slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/08_eval-intro/08_eval-intro_slides.pdf


--------------------------------------------------------------------------------
/04_scipython/images/numpy-intro/ufunc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/04_scipython/images/numpy-intro/ufunc.png


--------------------------------------------------------------------------------
/04_scipython/images/numpy-intro/array_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/04_scipython/images/numpy-intro/array_1.png


--------------------------------------------------------------------------------
/04_scipython/images/numpy-intro/array_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/04_scipython/images/numpy-intro/array_2.png


--------------------------------------------------------------------------------
/04_scipython/images/numpy-intro/matmul.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/04_scipython/images/numpy-intro/matmul.png


--------------------------------------------------------------------------------
/12_eval-metrics/12_eval-metrics_slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/12_eval-metrics/12_eval-metrics_slides.pdf


--------------------------------------------------------------------------------
/14_feat-extract/14_feat-extract_slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/14_feat-extract/14_feat-extract_slides.pdf


--------------------------------------------------------------------------------
/report-template/figures/google-scholar.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/report-template/figures/google-scholar.pdf


--------------------------------------------------------------------------------
/04_scipython/images/numpy-intro/matmatmul.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/04_scipython/images/numpy-intro/matmatmul.png


--------------------------------------------------------------------------------
/04_scipython/images/numpy-intro/random_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/04_scipython/images/numpy-intro/random_1.png


--------------------------------------------------------------------------------
/04_scipython/images/numpy-intro/random_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/04_scipython/images/numpy-intro/random_2.png


--------------------------------------------------------------------------------
/04_scipython/images/numpy-intro/transpose.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/04_scipython/images/numpy-intro/transpose.png


--------------------------------------------------------------------------------
/13_feat-sele/code-figures/multinomial-logreg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/13_feat-sele/code-figures/multinomial-logreg.png


--------------------------------------------------------------------------------
/04_scipython/images/numpy-intro/broadcasting-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/04_scipython/images/numpy-intro/broadcasting-1.png


--------------------------------------------------------------------------------
/04_scipython/images/numpy-intro/broadcasting-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/04_scipython/images/numpy-intro/broadcasting-2.png


--------------------------------------------------------------------------------
/report-template/bibliography.bib:
--------------------------------------------------------------------------------
1 | @article{mirjalili2018gender,
2 |   title={Gender Privacy: An Ensemble of Semi Adversarial Networks for Confounding Arbitrary Gender Classifiers},
3 |   author={Mirjalili, Vahid and Raschka, Sebastian and Ross, Arun},
4 |   journal={arXiv preprint arXiv:1807.11936},
5 |   year={2018}
6 | }


--------------------------------------------------------------------------------
/report-template/project-presentation-assessment.md:
--------------------------------------------------------------------------------
1 | # Project Presentation Assessment
2 | 
3 | - 10 pts: Is there a motivation for the project given?
4 | - 40 pts: Is the project described well enough that a general audience, familiar with machine learning, can understand the project?
5 | - 20 pts: Figures are all legible and explained well
6 | - 20 pts: Are the results presented adequately discussed?
7 | - 10 pts: Did all team members contribute to the presentation?


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Datasets
  2 | list_attr_celeba.txt 
  3 | *.zip
  4 | *.npz
  5 | *.npy
  6 | *ubyte.gz
  7 | *archive.ics.uci.edu*
  8 | 
  9 | # Binary PyTorch models
 10 | *.pt
 11 | 
 12 | # Temporary OS files
 13 | .DS_Store
 14 | 
 15 | # TensorFlow Checkpoint files
 16 | checkpoint
 17 | code/*/*.data-?????-of-?????
 18 | code/*/*.index
 19 | code/*/*.meta
 20 | code/model_zoo/tensorflow_ipynb/*.data-?????-of-?????
 21 | code/model_zoo/tensorflow_ipynb/*.index
 22 | code/model_zoo/tensorflow_ipynb/*.meta
 23 | code/model_zoo/tensorflow_ipynb/cifar-10/*
 24 | 
 25 | # Byte-compiled / optimized / DLL files
 26 | __pycache__/
 27 | *.py[cod]
 28 | *$py.class
 29 | 
 30 | # C extensions
 31 | *.so
 32 | 
 33 | # Distribution / packaging
 34 | .Python
 35 | env/
 36 | build/
 37 | develop-eggs/
 38 | dist/
 39 | downloads/
 40 | eggs/
 41 | .eggs/
 42 | lib/
 43 | lib64/
 44 | parts/
 45 | sdist/
 46 | var/
 47 | *.egg-info/
 48 | .installed.cfg
 49 | *.egg
 50 | 
 51 | # PyInstaller
 52 | #  Usually these files are written by a python script from a template
 53 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 54 | *.manifest
 55 | *.spec
 56 | 
 57 | # Installer logs
 58 | pip-log.txt
 59 | pip-delete-this-directory.txt
 60 | 
 61 | # Unit test / coverage reports
 62 | htmlcov/
 63 | .tox/
 64 | .coverage
 65 | .coverage.*
 66 | .cache
 67 | nosetests.xml
 68 | coverage.xml
 69 | *,cover
 70 | .hypothesis/
 71 | 
 72 | # Translations
 73 | *.mo
 74 | *.pot
 75 | 
 76 | # Django stuff:
 77 | *.log
 78 | local_settings.py
 79 | 
 80 | # Flask stuff:
 81 | instance/
 82 | .webassets-cache
 83 | 
 84 | # Scrapy stuff:
 85 | .scrapy
 86 | 
 87 | # Sphinx documentation
 88 | docs/_build/
 89 | 
 90 | # PyBuilder
 91 | target/
 92 | 
 93 | # IPython Notebook
 94 | .ipynb_checkpoints
 95 | 
 96 | # pyenv
 97 | .python-version
 98 | 
 99 | # celery beat schedule file
100 | celerybeat-schedule
101 | 
102 | # dotenv
103 | .env
104 | 
105 | # virtualenv
106 | venv/
107 | ENV/
108 | 
109 | # Spyder project settings
110 | .spyderproject
111 | 
112 | # Rope project settings
113 | .ropeproject
114 | 
115 | # Datasets
116 | MNIST*
117 | 


--------------------------------------------------------------------------------
/report-template/project-report-assessment.md:
--------------------------------------------------------------------------------
 1 | # Project Report Assessment
 2 | 
 3 | 
 4 | ### Abstract: 15 pts
 5 | 
 6 | - Is enough information provided get a clear idea about the subject matter?
 7 | - Is the abstract conveying the findings?
 8 | - Are the main points of the report described succinctly?
 9 | 
10 | ### Introduction: 15 pts
11 | 
12 | - Does the introduction cover the required background information to understand the work?
13 | - Is the introduction well organized: it starts out general and becomes more specific towards the end?
14 | - Is there a motivation explaining why this project is relevant, important, and/or interesting?
15 | 
16 | ### Related Work: 15 pts
17 | 
18 | - Is the similar and related work discussed adequately?
19 | - Are references cited properly (here, but also throughout the whole paper)?
20 | - Is the a discussion or paragraph on comparing this project with other people's work adequate?
21 | 
22 | 
23 | ### Proposed Method: 25 pts
24 | 
25 | - Are there any missing descriptions of symbols used in mathematical notations (if applicable)?
26 | - Are the main algorithms described well enough so that they can be implemented by a knowledgeable reader?
27 | 
28 | ### Experiments: 25 pts
29 | 
30 | - Is the experimental setup and methodology described well enough so that it can be repeated?
31 | - If datasets are used, are they referenced appropriately?
32 | 
33 | ### Results and Discussion: 30 pts
34 | 
35 | - Are the results described clearly?
36 | - Is the data analyzed well, and are the results logical?
37 | - Are the figures clear and have no missing labels?
38 | - Do the figure captions have sufficient information to understand the figure?
39 | - Is each figure referenced in the text?
40 | - Is the discussion critical/honest, and are potential weaknesses/shortcomings are discussed as well? 
41 | 
42 | ### Conclusions: 15 pts
43 | 
44 | - Do the authors describe whether the initial motivation/task was accomplished or not based on the results?
45 | - Is it discussed adequately how the results relate to previous work?
46 | - If applicable, are potential future directions given?
47 | 
48 | ### Contributions: 10 pts
49 | 
50 | - Are all contributions listed clearly?
51 | - Did each member contribute approximately equally to the project?
52 | 
53 | 


--------------------------------------------------------------------------------
/hw_03/helper.py:
--------------------------------------------------------------------------------
 1 | # Copyright Sebastian Raschka 2018
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | 
 6 | 
 7 | def plot_confusion_matrix(conf_mat,
 8 |                           hide_spines=False,
 9 |                           hide_ticks=False,
10 |                           figsize=None,
11 |                           cmap=None,
12 |                           colorbar=False,
13 |                           show_absolute=True,
14 |                           show_normed=False):
15 | 
16 |     if not (show_absolute or show_normed):
17 |         raise AssertionError('Both show_absolute and show_normed are False')
18 | 
19 |     total_samples = conf_mat.sum(axis=1)[:, np.newaxis]
20 |     normed_conf_mat = conf_mat.astype('float') / total_samples
21 | 
22 |     fig, ax = plt.subplots(figsize=figsize)
23 |     ax.grid(False)
24 |     if cmap is None:
25 |         cmap = plt.cm.Blues
26 | 
27 |     if figsize is None:
28 |         figsize = (len(conf_mat)*1.25, len(conf_mat)*1.25)
29 | 
30 |     if show_absolute:
31 |         matshow = ax.matshow(conf_mat, cmap=cmap)
32 |     else:
33 |         matshow = ax.matshow(normed_conf_mat, cmap=cmap)
34 | 
35 |     if colorbar:
36 |         fig.colorbar(matshow)
37 | 
38 |     for i in range(conf_mat.shape[0]):
39 |         for j in range(conf_mat.shape[1]):
40 |             cell_text = ""
41 |             if show_absolute:
42 |                 cell_text += format(conf_mat[i, j], 'd')
43 |                 if show_normed:
44 |                     cell_text += "\n" + '('
45 |                     cell_text += format(normed_conf_mat[i, j], '.2f') + ')'
46 |             else:
47 |                 cell_text += format(normed_conf_mat[i, j], '.2f')
48 |             ax.text(x=j,
49 |                     y=i,
50 |                     s=cell_text,
51 |                     va='center',
52 |                     ha='center',
53 |                     color="white" if normed_conf_mat[i, j] > 0.5 else "black")
54 | 
55 |     if hide_spines:
56 |         ax.spines['right'].set_visible(False)
57 |         ax.spines['top'].set_visible(False)
58 |         ax.spines['left'].set_visible(False)
59 |         ax.spines['bottom'].set_visible(False)
60 |     ax.yaxis.set_ticks_position('left')
61 |     ax.xaxis.set_ticks_position('bottom')
62 |     if hide_ticks:
63 |         ax.axes.get_yaxis().set_ticks([])
64 |         ax.axes.get_xaxis().set_ticks([])
65 | 
66 |     plt.xlabel('predicted label')
67 |     plt.ylabel('true label')
68 |     return fig, ax


--------------------------------------------------------------------------------
/other/dl-course-info.md:
--------------------------------------------------------------------------------
 1 | # STAT 479 SS 2019: Deep Learning
 2 | 
 3 | ## Abstract
 4 | 
 5 | Deep learning is an exciting, young field that specializes in discovering and extracting intricate structures in large, unstructured datasets for parameterizing artificial neural networks with many layers. Since deep learning has pushed the state-of-the-art in many applications, it's become indispensable for modern technology. This is owed to the vast utility of deep learning for tackling complex tasks in the fields of computer vision and natural language processing -- tasks that humans are good at but are traditionally challenging for computers. This includes tasks such as image classification, object detection, and speech recognition.
 6 | 
 7 | The focus of this course will be on understanding artificial neural networks and deep learning algorithmically (discussing the math behind these methods on a basic level) and implementing network models in code as well as applying these to real-world datasets. Some of the topics that will be covered include convolutional neural networks for image classification and object detection, recurrent neural networks for modeling text, and generative adversarial networks for generating new data.
 8 | 
 9 | Familiarity with general machine learning concepts (such as the FS2018 STAT479: Machine Learning course) is recommended but not required. We will review some relevant background concepts, which include general machine learning concepts such as supervised learning, classification, model evaluation, etc. Furthermore, some lectures will focus on reviewing the use of Python's stack for scientific computing (NumPy, SciPy, matplotlib) prior to the introduction of PyTorch as the main computational deep learning library that we are going to use in this course.
10 | 
11 | 
12 | ## Tentative List of Topics
13 | 
14 | - brief history of neural networks and what makes deep learning different from "classic machine learning"
15 | - introducing the concept of neural networks by connecting it to familiar concepts such as logistic regression and multinomial logistic regression (which can be seen as special cases: single-layer neural nets)
16 | - modeling and deriving non-convex loss function through computation graphs
17 | - introduction to automatic differentiation and PyTorch for efficient data manipulation using GPUs
18 | - convolutional neural networks for analyzing unstructured data (image analysis)
19 | - using 1D convolutions for sequence analysis
20 | - more advanced sequence analysis using recurrent neural networks
21 | - introducing generative models to sample from input distributions: autoencoders, variational autoencoders, and generative adversarial neural networks


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # STAT479: Machine Learning (Fall 2018)
 2 | 
 3 | Instructor: Sebastian Raschka
 4 | 
 5 | Lecture material for the Machine Learning course (STAT 479) at University Wisconsin-Madison. For details, please see the course website at http://pages.stat.wisc.edu/~sraschka/teaching/stat479-fs2018/
 6 | 
 7 | 
 8 | 
 9 | **Part I: Introduction**
10 | 
11 | - [Lecture 1](01_overview): What is Machine Learning? An Overview.
12 | - [Lecture 2](02_knn): Intro to Supervised Learning: KNN
13 | 
14 | **Part II: Computational Foundations**
15 | 
16 | - [Lecture 3](03_python): Using Python, Anaconda, IPython, Jupyter Notebooks
17 | - [Lecture 4](04_scipython): Scientific Computing with NumPy, SciPy, and Matplotlib
18 | - [Lecture 5](05_sklearn): Data Preprocessing and Machine Learning with Scikit-Learn
19 | 
20 | **Part III: Tree-Based Methods**
21 | 
22 | - [Lecture 6](06_trees): Decision Trees
23 | - [Lecture 7](07_ensembles): Ensemble Methods
24 | 
25 | **Part IV: Evaluation**
26 | 
27 | - [Lecture 8](08_eval-intro): Model Evaluation 1: Introduction to Overfitting and Underfitting
28 | - [Lecture 9](09_eval-ci): Model Evaluation 2: Uncertainty Estimates and Resampling
29 | - [Lecture 10](10_eval-cv): Model Evaluation 3: Model Selection and Cross-Validation
30 | - [Lecture 11](11_eval-algo): Model Evaluation 4: Algorithm Selection and Statistical Tests
31 | - [Lecture 12](12_eval-metrics): Model Evaluation 5: Performance Metrics
32 | 
33 | **Part V: Dimensionality Reduction**
34 | 
35 | - [Lecture 13](13_feat-sele): Feature Selection
36 | - [Lecture 14](14_feat-extract): Feature Extraction
37 | 
38 | **Due to time constraints, the following topics could unfortunately not be covered:**
39 | 
40 | **Part VI: Bayesian Learning** 
41 | 
42 | - Bayes Classifiers
43 | - Text Data & Sentiment Analysis
44 | - Naive Bayes Classification
45 | 
46 | **Part VII:  Regression and Unsupervised Learning**
47 | 
48 | - Regression Analysis
49 | - Clustering
50 | 
51 | **The following topics will be covered at the beginning of the
52 | Deep Learning class next Spring.** [Tentative outline of the DL course](./other/dl-course-info.md).
53 | 
54 | **Part VIII: Introduction to Artificial Neural Networks**
55 | 
56 | - Perceptron
57 | - Adaline & Logistic Regression
58 | - SVM
59 | - Multilayer Perceptron
60 | 
61 | 
62 | <a rel="license" href="http://creativecommons.org/licenses/by-nc/4.0/"><img alt="Creative Commons License" style="border-width:0" src="https://i.creativecommons.org/l/by-nc/4.0/88x31.png" /></a><br />This work is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-nc/4.0/">Creative Commons Attribution-NonCommercial 4.0 International License</a>.
63 | 
64 | 
65 | <br>
66 | <br>
67 | <br>
68 | 
69 | Teaching this class was a pleasure, and I am especially happy about how awesome the class projects turned out. Listed below are the winners of the three award categories as determined by ~210 votes. Congratulations! 
70 | 
71 | ![](other/stat479-fs18-awards.jpg)


--------------------------------------------------------------------------------
/hw_01/test_data.txt:
--------------------------------------------------------------------------------
  1 | x1 x2 y
  2 | -5.75 -6.83 0
  3 | 5.51 3.67 1
  4 | 5.11 5.32 1
  5 | 0.85 -4.11 0
  6 | -0.50 -0.45 1
  7 | -12.65 -12.05 0
  8 | -4.22 -6.39 0
  9 | -0.56 -10.23 0
 10 | 2.82 1.68 1
 11 | 3.44 -7.70 0
 12 | 9.56 -7.29 1
 13 | 11.22 5.10 1
 14 | -2.90 -8.44 0
 15 | 3.65 -10.13 0
 16 | -5.95 -6.79 0
 17 | 10.30 6.20 1
 18 | 11.59 5.99 1
 19 | -8.87 -2.64 0
 20 | -2.63 -6.28 0
 21 | 14.82 5.55 1
 22 | 4.70 2.81 1
 23 | -5.90 2.11 0
 24 | -3.98 -8.53 0
 25 | 10.52 -0.67 1
 26 | -6.96 -3.70 0
 27 | -4.06 -1.97 1
 28 | 7.40 -0.49 1
 29 | -2.08 -3.87 0
 30 | -4.07 -2.24 0
 31 | 7.31 0.19 1
 32 | 2.26 3.73 1
 33 | -6.76 -9.25 0
 34 | 2.80 0.13 0
 35 | -6.79 -5.64 0
 36 | 5.54 9.07 1
 37 | 0.36 3.12 1
 38 | -0.09 -5.57 0
 39 | -2.43 -8.09 0
 40 | -0.77 7.97 1
 41 | -2.36 -3.81 0
 42 | -2.96 -1.82 0
 43 | -7.74 -4.67 0
 44 | -4.85 -12.71 0
 45 | 1.07 -4.86 0
 46 | -4.71 -2.16 0
 47 | -5.00 -6.76 0
 48 | -11.60 4.64 0
 49 | 4.39 0.39 1
 50 | 0.14 0.06 1
 51 | 7.64 5.08 1
 52 | 8.37 3.39 1
 53 | 1.59 9.37 1
 54 | 7.96 7.02 1
 55 | 3.73 -4.61 0
 56 | -8.17 -9.61 0
 57 | -1.95 -4.46 0
 58 | 0.93 -1.05 1
 59 | -14.65 -1.69 0
 60 | -7.93 -7.95 0
 61 | 7.68 9.08 1
 62 | 9.50 -2.88 1
 63 | 5.17 7.50 1
 64 | -4.86 -6.51 0
 65 | 1.94 1.10 1
 66 | -0.32 -12.92 0
 67 | 7.44 -0.90 1
 68 | 10.65 3.87 1
 69 | -10.45 -2.66 0
 70 | 7.48 -2.95 1
 71 | 0.28 -0.52 0
 72 | 3.18 -13.24 0
 73 | 8.39 0.84 1
 74 | 8.86 4.78 1
 75 | 0.49 10.36 1
 76 | 2.36 -12.78 0
 77 | -1.97 -7.52 0
 78 | 1.87 -8.03 0
 79 | 3.50 5.48 1
 80 | -5.58 -2.99 0
 81 | 6.99 -8.59 1
 82 | -6.34 -3.89 0
 83 | 11.34 2.99 1
 84 | -0.56 -10.16 0
 85 | 8.08 6.18 1
 86 | 8.94 2.05 1
 87 | -11.12 -2.71 0
 88 | 10.76 2.59 1
 89 | 0.03 1.11 1
 90 | 0.84 2.83 1
 91 | 8.36 8.34 1
 92 | -4.38 -4.40 0
 93 | -6.94 -8.48 0
 94 | -11.82 1.06 0
 95 | -7.66 -5.78 0
 96 | 3.29 -0.30 1
 97 | 6.47 7.38 1
 98 | 2.08 -6.21 0
 99 | 5.97 4.18 1
100 | -1.57 -6.36 0
101 | -1.53 -3.74 0
102 | -2.84 -0.15 0
103 | 12.69 -4.20 1
104 | -7.43 -4.21 0
105 | 3.81 -8.34 0
106 | 4.76 0.32 1
107 | 11.87 6.52 1
108 | -2.01 3.78 0
109 | 1.95 0.55 1
110 | 3.51 -6.28 1
111 | -3.27 -2.19 0
112 | -5.74 1.53 0
113 | 6.98 2.86 1
114 | -7.02 -7.18 0
115 | 2.49 8.94 1
116 | -3.52 1.14 0
117 | 9.68 0.98 1
118 | -13.70 -7.31 0
119 | 11.38 4.25 1
120 | -5.46 -4.15 0
121 | -0.68 -8.03 0
122 | 0.10 -3.51 0
123 | 10.43 6.93 1
124 | 2.74 -4.24 0
125 | -2.99 -6.52 0
126 | -4.69 1.39 0
127 | 6.87 9.68 1
128 | 6.20 4.20 1
129 | 6.75 -1.85 1
130 | 6.32 9.44 1
131 | -6.92 -8.03 0
132 | 12.44 2.15 1
133 | -7.26 -1.17 0
134 | -11.95 1.21 0
135 | -3.93 -5.76 0
136 | 0.84 8.70 1
137 | 0.45 -0.26 1
138 | -0.82 -8.39 0
139 | -7.75 -12.57 0
140 | 7.03 -2.10 1
141 | -4.95 -13.39 0
142 | 5.64 1.28 1
143 | 5.47 6.38 1
144 | 3.04 -4.91 1
145 | -3.33 -3.80 0
146 | -5.89 0.18 0
147 | 8.61 10.52 1
148 | -1.91 -2.04 1
149 | 3.86 5.78 1
150 | -3.50 -5.25 0
151 | 0.78 2.49 1
152 | 8.84 3.60 1
153 | -3.50 0.86 0
154 | -7.13 -8.24 0
155 | 2.82 -8.17 0
156 | 6.67 3.99 1
157 | 10.19 3.48 1
158 | 9.79 -2.40 1
159 | 2.12 -3.79 0
160 | 11.98 5.16 1
161 | 10.65 7.99 1
162 | 9.95 0.36 1
163 | 6.19 0.89 1
164 | -3.94 -10.17 0
165 | -4.30 -9.05 0
166 | 12.59 -3.56 1
167 | 5.04 2.32 1
168 | -9.20 -14.65 0
169 | -8.35 -0.15 0
170 | -5.98 -4.62 0
171 | 4.39 1.88 1
172 | 1.01 8.72 1
173 | 0.25 5.29 1
174 | 7.30 -1.07 1
175 | -2.65 -5.44 0
176 | 12.10 -6.39 1
177 | 8.95 -1.73 1
178 | 8.79 3.18 1
179 | 3.42 12.11 1
180 | 8.71 6.47 1
181 | -15.19 -2.76 0
182 | -3.15 -9.35 0
183 | -3.26 -7.77 0
184 | 12.06 -1.95 1
185 | -1.07 -2.64 0
186 | 0.80 5.37 1
187 | 4.76 -7.93 0
188 | -2.68 -16.15 0
189 | -2.63 -8.02 0
190 | 13.31 -3.46 1
191 | 8.58 -4.67 1
192 | 4.69 2.50 1
193 | 3.25 5.99 1
194 | 1.29 6.16 1
195 | -3.17 -5.06 0
196 | -2.64 -3.66 0
197 | -3.89 -12.56 0
198 | 3.14 5.05 1
199 | 8.05 7.63 1
200 | -4.87 -6.22 0
201 | -12.42 -6.33 0
202 | 


--------------------------------------------------------------------------------
/02_knn/iris.csv:
--------------------------------------------------------------------------------
  1 | Id,SepalLength[cm],SepalWidth[cm],PetalLength[cm],PetalWidth[cm],Species
  2 | 1,5.1,3.5,1.4,0.2,Iris-setosa
  3 | 2,4.9,3.0,1.4,0.2,Iris-setosa
  4 | 3,4.7,3.2,1.3,0.2,Iris-setosa
  5 | 4,4.6,3.1,1.5,0.2,Iris-setosa
  6 | 5,5.0,3.6,1.4,0.2,Iris-setosa
  7 | 6,5.4,3.9,1.7,0.4,Iris-setosa
  8 | 7,4.6,3.4,1.4,0.3,Iris-setosa
  9 | 8,5.0,3.4,1.5,0.2,Iris-setosa
 10 | 9,4.4,2.9,1.4,0.2,Iris-setosa
 11 | 10,4.9,3.1,1.5,0.1,Iris-setosa
 12 | 11,5.4,3.7,1.5,0.2,Iris-setosa
 13 | 12,4.8,3.4,1.6,0.2,Iris-setosa
 14 | 13,4.8,3.0,1.4,0.1,Iris-setosa
 15 | 14,4.3,3.0,1.1,0.1,Iris-setosa
 16 | 15,5.8,4.0,1.2,0.2,Iris-setosa
 17 | 16,5.7,4.4,1.5,0.4,Iris-setosa
 18 | 17,5.4,3.9,1.3,0.4,Iris-setosa
 19 | 18,5.1,3.5,1.4,0.3,Iris-setosa
 20 | 19,5.7,3.8,1.7,0.3,Iris-setosa
 21 | 20,5.1,3.8,1.5,0.3,Iris-setosa
 22 | 21,5.4,3.4,1.7,0.2,Iris-setosa
 23 | 22,5.1,3.7,1.5,0.4,Iris-setosa
 24 | 23,4.6,3.6,1.0,0.2,Iris-setosa
 25 | 24,5.1,3.3,1.7,0.5,Iris-setosa
 26 | 25,4.8,3.4,1.9,0.2,Iris-setosa
 27 | 26,5.0,3.0,1.6,0.2,Iris-setosa
 28 | 27,5.0,3.4,1.6,0.4,Iris-setosa
 29 | 28,5.2,3.5,1.5,0.2,Iris-setosa
 30 | 29,5.2,3.4,1.4,0.2,Iris-setosa
 31 | 30,4.7,3.2,1.6,0.2,Iris-setosa
 32 | 31,4.8,3.1,1.6,0.2,Iris-setosa
 33 | 32,5.4,3.4,1.5,0.4,Iris-setosa
 34 | 33,5.2,4.1,1.5,0.1,Iris-setosa
 35 | 34,5.5,4.2,1.4,0.2,Iris-setosa
 36 | 35,4.9,3.1,1.5,0.1,Iris-setosa
 37 | 36,5.0,3.2,1.2,0.2,Iris-setosa
 38 | 37,5.5,3.5,1.3,0.2,Iris-setosa
 39 | 38,4.9,3.1,1.5,0.1,Iris-setosa
 40 | 39,4.4,3.0,1.3,0.2,Iris-setosa
 41 | 40,5.1,3.4,1.5,0.2,Iris-setosa
 42 | 41,5.0,3.5,1.3,0.3,Iris-setosa
 43 | 42,4.5,2.3,1.3,0.3,Iris-setosa
 44 | 43,4.4,3.2,1.3,0.2,Iris-setosa
 45 | 44,5.0,3.5,1.6,0.6,Iris-setosa
 46 | 45,5.1,3.8,1.9,0.4,Iris-setosa
 47 | 46,4.8,3.0,1.4,0.3,Iris-setosa
 48 | 47,5.1,3.8,1.6,0.2,Iris-setosa
 49 | 48,4.6,3.2,1.4,0.2,Iris-setosa
 50 | 49,5.3,3.7,1.5,0.2,Iris-setosa
 51 | 50,5.0,3.3,1.4,0.2,Iris-setosa
 52 | 51,7.0,3.2,4.7,1.4,Iris-versicolor
 53 | 52,6.4,3.2,4.5,1.5,Iris-versicolor
 54 | 53,6.9,3.1,4.9,1.5,Iris-versicolor
 55 | 54,5.5,2.3,4.0,1.3,Iris-versicolor
 56 | 55,6.5,2.8,4.6,1.5,Iris-versicolor
 57 | 56,5.7,2.8,4.5,1.3,Iris-versicolor
 58 | 57,6.3,3.3,4.7,1.6,Iris-versicolor
 59 | 58,4.9,2.4,3.3,1.0,Iris-versicolor
 60 | 59,6.6,2.9,4.6,1.3,Iris-versicolor
 61 | 60,5.2,2.7,3.9,1.4,Iris-versicolor
 62 | 61,5.0,2.0,3.5,1.0,Iris-versicolor
 63 | 62,5.9,3.0,4.2,1.5,Iris-versicolor
 64 | 63,6.0,2.2,4.0,1.0,Iris-versicolor
 65 | 64,6.1,2.9,4.7,1.4,Iris-versicolor
 66 | 65,5.6,2.9,3.6,1.3,Iris-versicolor
 67 | 66,6.7,3.1,4.4,1.4,Iris-versicolor
 68 | 67,5.6,3.0,4.5,1.5,Iris-versicolor
 69 | 68,5.8,2.7,4.1,1.0,Iris-versicolor
 70 | 69,6.2,2.2,4.5,1.5,Iris-versicolor
 71 | 70,5.6,2.5,3.9,1.1,Iris-versicolor
 72 | 71,5.9,3.2,4.8,1.8,Iris-versicolor
 73 | 72,6.1,2.8,4.0,1.3,Iris-versicolor
 74 | 73,6.3,2.5,4.9,1.5,Iris-versicolor
 75 | 74,6.1,2.8,4.7,1.2,Iris-versicolor
 76 | 75,6.4,2.9,4.3,1.3,Iris-versicolor
 77 | 76,6.6,3.0,4.4,1.4,Iris-versicolor
 78 | 77,6.8,2.8,4.8,1.4,Iris-versicolor
 79 | 78,6.7,3.0,5.0,1.7,Iris-versicolor
 80 | 79,6.0,2.9,4.5,1.5,Iris-versicolor
 81 | 80,5.7,2.6,3.5,1.0,Iris-versicolor
 82 | 81,5.5,2.4,3.8,1.1,Iris-versicolor
 83 | 82,5.5,2.4,3.7,1.0,Iris-versicolor
 84 | 83,5.8,2.7,3.9,1.2,Iris-versicolor
 85 | 84,6.0,2.7,5.1,1.6,Iris-versicolor
 86 | 85,5.4,3.0,4.5,1.5,Iris-versicolor
 87 | 86,6.0,3.4,4.5,1.6,Iris-versicolor
 88 | 87,6.7,3.1,4.7,1.5,Iris-versicolor
 89 | 88,6.3,2.3,4.4,1.3,Iris-versicolor
 90 | 89,5.6,3.0,4.1,1.3,Iris-versicolor
 91 | 90,5.5,2.5,4.0,1.3,Iris-versicolor
 92 | 91,5.5,2.6,4.4,1.2,Iris-versicolor
 93 | 92,6.1,3.0,4.6,1.4,Iris-versicolor
 94 | 93,5.8,2.6,4.0,1.2,Iris-versicolor
 95 | 94,5.0,2.3,3.3,1.0,Iris-versicolor
 96 | 95,5.6,2.7,4.2,1.3,Iris-versicolor
 97 | 96,5.7,3.0,4.2,1.2,Iris-versicolor
 98 | 97,5.7,2.9,4.2,1.3,Iris-versicolor
 99 | 98,6.2,2.9,4.3,1.3,Iris-versicolor
100 | 99,5.1,2.5,3.0,1.1,Iris-versicolor
101 | 100,5.7,2.8,4.1,1.3,Iris-versicolor
102 | 101,6.3,3.3,6.0,2.5,Iris-virginica
103 | 102,5.8,2.7,5.1,1.9,Iris-virginica
104 | 103,7.1,3.0,5.9,2.1,Iris-virginica
105 | 104,6.3,2.9,5.6,1.8,Iris-virginica
106 | 105,6.5,3.0,5.8,2.2,Iris-virginica
107 | 106,7.6,3.0,6.6,2.1,Iris-virginica
108 | 107,4.9,2.5,4.5,1.7,Iris-virginica
109 | 108,7.3,2.9,6.3,1.8,Iris-virginica
110 | 109,6.7,2.5,5.8,1.8,Iris-virginica
111 | 110,7.2,3.6,6.1,2.5,Iris-virginica
112 | 111,6.5,3.2,5.1,2.0,Iris-virginica
113 | 112,6.4,2.7,5.3,1.9,Iris-virginica
114 | 113,6.8,3.0,5.5,2.1,Iris-virginica
115 | 114,5.7,2.5,5.0,2.0,Iris-virginica
116 | 115,5.8,2.8,5.1,2.4,Iris-virginica
117 | 116,6.4,3.2,5.3,2.3,Iris-virginica
118 | 117,6.5,3.0,5.5,1.8,Iris-virginica
119 | 118,7.7,3.8,6.7,2.2,Iris-virginica
120 | 119,7.7,2.6,6.9,2.3,Iris-virginica
121 | 120,6.0,2.2,5.0,1.5,Iris-virginica
122 | 121,6.9,3.2,5.7,2.3,Iris-virginica
123 | 122,5.6,2.8,4.9,2.0,Iris-virginica
124 | 123,7.7,2.8,6.7,2.0,Iris-virginica
125 | 124,6.3,2.7,4.9,1.8,Iris-virginica
126 | 125,6.7,3.3,5.7,2.1,Iris-virginica
127 | 126,7.2,3.2,6.0,1.8,Iris-virginica
128 | 127,6.2,2.8,4.8,1.8,Iris-virginica
129 | 128,6.1,3.0,4.9,1.8,Iris-virginica
130 | 129,6.4,2.8,5.6,2.1,Iris-virginica
131 | 130,7.2,3.0,5.8,1.6,Iris-virginica
132 | 131,7.4,2.8,6.1,1.9,Iris-virginica
133 | 132,7.9,3.8,6.4,2.0,Iris-virginica
134 | 133,6.4,2.8,5.6,2.2,Iris-virginica
135 | 134,6.3,2.8,5.1,1.5,Iris-virginica
136 | 135,6.1,2.6,5.6,1.4,Iris-virginica
137 | 136,7.7,3.0,6.1,2.3,Iris-virginica
138 | 137,6.3,3.4,5.6,2.4,Iris-virginica
139 | 138,6.4,3.1,5.5,1.8,Iris-virginica
140 | 139,6.0,3.0,4.8,1.8,Iris-virginica
141 | 140,6.9,3.1,5.4,2.1,Iris-virginica
142 | 141,6.7,3.1,5.6,2.4,Iris-virginica
143 | 142,6.9,3.1,5.1,2.3,Iris-virginica
144 | 143,5.8,2.7,5.1,1.9,Iris-virginica
145 | 144,6.8,3.2,5.9,2.3,Iris-virginica
146 | 145,6.7,3.3,5.7,2.5,Iris-virginica
147 | 146,6.7,3.0,5.2,2.3,Iris-virginica
148 | 147,6.3,2.5,5.0,1.9,Iris-virginica
149 | 148,6.5,3.0,5.2,2.0,Iris-virginica
150 | 149,6.2,3.4,5.4,2.3,Iris-virginica
151 | 150,5.9,3.0,5.1,1.8,Iris-virginica


--------------------------------------------------------------------------------
/report-template/report.tex:
--------------------------------------------------------------------------------
  1 | \documentclass[10pt,twocolumn,letterpaper]{article}
  2 | 
  3 | \usepackage{statcourse}
  4 | \usepackage{times}
  5 | \usepackage{epsfig}
  6 | \usepackage{graphicx}
  7 | \usepackage{amsmath}
  8 | \usepackage{amssymb}
  9 | 
 10 | % Include other packages here, before hyperref.
 11 | 
 12 | % If you comment hyperref and then uncomment it, you should delete
 13 | % egpaper.aux before re-running latex.  (Or just hit 'q' on the first latex
 14 | % run, let it finish, and you should be clear).
 15 | \usepackage[breaklinks=true,bookmarks=false]{hyperref}
 16 | 
 17 | 
 18 | \statcoursefinalcopy
 19 | 
 20 | 
 21 | \setcounter{page}{1}
 22 | \begin{document}
 23 | 
 24 | 
 25 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 26 | % DO NOT EDIT ANYTHING ABOVE THIS LINE
 27 | % EXCEPT IF YOU LIKE TO USE ADDITIONAL PACKAGES
 28 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 29 | 
 30 | 
 31 | 
 32 | %%%%%%%%% TITLE
 33 | \title{\LaTeX\ Template for STAT479 Project Report}
 34 | 
 35 | \author{First Author\\
 36 | {\tt\small firstauthor@wisc.edu}
 37 | \and
 38 | Second Author\\
 39 | {\tt\small secondauthor@wisc.edu}
 40 | \and
 41 | Third Author\\
 42 | {\tt\small thirdauthor@wisc.edu}
 43 | }
 44 | 
 45 | \maketitle
 46 | %\thispagestyle{empty}
 47 | 
 48 | 
 49 | 
 50 | % MAIN ARTICLE GOES BELOW
 51 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 52 | 
 53 | 
 54 | %%%%%%%%% ABSTRACT
 55 | \begin{abstract}
 56 |    The abstract for your project goes here. The length of the abstract
 57 |    should be between 200-250 words. Tips for writing a good abstract
 58 |    can be found at \url{https://writing.wisc.edu/Handbook/presentations_abstracts.html}.
 59 | \end{abstract}
 60 | 
 61 | %%%%%%%%% BODY TEXT
 62 | \section{Introduction}
 63 | 
 64 | This template is based on the CVPR conference template\footnote{\url{http://statcourse2018.thecvf.com/submission/main_conference/author_guidelines}}.
 65 | 
 66 | The information in this template is very minimal, and this file should serve you as a framework for writing your report. You may prefer to use a more collaboration-friendly tool while drafting the report with your class mates before you prepare the final report for submission. Remember that you should \textbf{submit both the report and code} you used for this project via Canvas. Also, \textbf{only one member per team} needs to submit the project material.
 67 | 
 68 | 
 69 | This is an example of a mathematical equation:
 70 | 
 71 | $$f(\mathbf{x}; \mathbf{w}) = \sum_{i=1}^{n} w_ix_i.$$
 72 | 
 73 | This is a mathematical expression, $h(\mathbf{x}) = \hat{y}$ formatted in text. 
 74 | 
 75 | The project report should be 6-8 pages long (not counting references)
 76 | and should contain the sections that are already provided in this paper. Please
 77 | check out the text in these sections for further information.
 78 | 
 79 | 
 80 | \subsection{Subsection}
 81 | 
 82 | You can use paragraphs or subsections to further structure your
 83 | main sections. This is an example of a subsection.
 84 | 
 85 | \paragraph{This is a paragraph title.} This is an example of a paragraph.
 86 | 
 87 | \section{Related Work}
 88 | 
 89 | Related work should be discussed here. This is an example of a citation \cite{mirjalili2018gender}. To format the citations properly, put the
 90 | corresponding references into the bibliography.bib file. You can obtain
 91 | BibTeX-formatted references for the "bib" file from Google Scholar 
 92 | (\url{https://scholar.google.com}), for example, by clicking on the 
 93 | double-quote character under a citation and then selecting \mbox{"BibTeX"} as
 94 | shown in Figure \ref{fig:google-scholar-1col} and 
 95 | Figure \ref{fig:google-scholar-2col}.
 96 | 
 97 | \begin{figure}[t]
 98 | \begin{center}
 99 |    \includegraphics[width=0.8\linewidth]{figures/google-scholar.pdf}
100 | \end{center}
101 |    \caption{Example illustrating how to get BibTeX references from
102 |    Google Scholar as a 1-column figure.}
103 | \label{fig:google-scholar-1col}
104 | \end{figure}
105 | 
106 | 
107 | \begin{figure*}
108 | \begin{center}
109 |    \includegraphics[width=0.8\linewidth]{figures/google-scholar.pdf}
110 | \end{center}
111 |    \caption{Example illustrating how to get BibTeX references from
112 |    Google Scholar as a 2-column figure.}
113 | \label{fig:google-scholar-2col}
114 | \end{figure*}
115 | 
116 | Table \ref{tab:some-table} shows an example for formatting a table.
117 | 
118 | \begin{table}
119 | \begin{center}
120 | \begin{tabular}{|l|c|}
121 | \hline
122 | Method & Accuracy \\
123 | \hline\hline
124 | Method 1 & $70 \pm 3$ \% \\
125 |  Method 2 & $76 \pm 3$ \% \\
126 | \hline
127 | \end{tabular}
128 | \end{center}
129 | \label{tab:some-table}
130 | \caption{This is an example of a table.}
131 | \end{table}
132 | 
133 | 
134 | \section{Proposed Method}
135 | 
136 | Describe the method(s) you are proposing, developing, or using. I.e., details
137 | of the algorithms may be included here. 
138 | 
139 | \section{Experiments}
140 | 
141 | Describe the experiments you performed. You may want to create separate
142 | subsections to further structure this section.
143 | 
144 | \subsection{Dataset}
145 | 
146 | Briefly describe your dataset in a separate subsection.
147 | 
148 | 
149 | \subsection{Software}
150 | 
151 | Briefly list (and cite) software software you used.
152 | 
153 | \subsection{Hardware}
154 | 
155 | If relevant, list hardware resources you used.
156 | 
157 | 
158 | \section{Results and Discussion}
159 | 
160 | Describe the results you obtained from the experiments and interpret them.
161 | Optionally, you could split "Results and Discussion" into two separate
162 | sections.
163 | 
164 | \section{Conclusions}
165 | 
166 | Describe your conclusions here. If there are any future directions, you can
167 | describe them here, or you can create a new section for future directions.
168 | 
169 | \section{Acknowledgements}
170 | 
171 | List acknowledgements if any. For example, if someone provided you a dataset, or
172 | you used someone else's resources, this is a good place to acknowledge
173 | the help or support you received.
174 | 
175 | \section{Contributions}
176 | 
177 | Describe the contributions of each team member who worked on this project.
178 | 
179 | 
180 | {\small
181 | \bibliographystyle{ieee}
182 | \bibliography{bibliography.bib}
183 | }
184 | 
185 | \end{document}
186 | 


--------------------------------------------------------------------------------
/11_eval-algo/11_eval-algo_code.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "STAT 479: Machine Learning (Fall 2018)  \n",
  8 |     "Instructor: Sebastian Raschka (sraschka@wisc.edu)  \n",
  9 |     "Course website: http://pages.stat.wisc.edu/~sraschka/teaching/stat479-fs2018/"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "# L11: Model Evaluation 4 -- Algorithm Comparison"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 1,
 22 |    "metadata": {},
 23 |    "outputs": [
 24 |     {
 25 |      "name": "stdout",
 26 |      "output_type": "stream",
 27 |      "text": [
 28 |       "Sebastian Raschka 2018-11-07 \n",
 29 |       "\n",
 30 |       "CPython 3.6.7\n",
 31 |       "IPython 6.5.0\n",
 32 |       "\n",
 33 |       "sklearn 0.20.0\n",
 34 |       "mlxtend 0.14.0dev\n"
 35 |      ]
 36 |     }
 37 |    ],
 38 |    "source": [
 39 |     "%load_ext watermark\n",
 40 |     "%watermark -a 'Sebastian Raschka' -d -p sklearn,mlxtend -v"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 2,
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "import numpy as np\n",
 50 |     "from sklearn.model_selection import GridSearchCV\n",
 51 |     "from sklearn.model_selection import train_test_split\n",
 52 |     "from sklearn.model_selection import StratifiedKFold\n",
 53 |     "from sklearn.model_selection import cross_val_score\n",
 54 |     "from sklearn.pipeline import Pipeline\n",
 55 |     "from sklearn.preprocessing import StandardScaler\n",
 56 |     "from sklearn.linear_model import LogisticRegression\n",
 57 |     "from sklearn.neighbors import KNeighborsClassifier\n",
 58 |     "from sklearn.tree import DecisionTreeClassifier\n",
 59 |     "from sklearn.svm import SVC\n",
 60 |     "from mlxtend.data import mnist_data\n",
 61 |     "from sklearn.metrics import accuracy_score\n",
 62 |     "\n",
 63 |     "# Loading and splitting the dataset\n",
 64 |     "# Note that this is a small (stratified) subset\n",
 65 |     "# of MNIST; it consists of 5000 samples only, that is,\n",
 66 |     "# 10% of the original MNIST dataset\n",
 67 |     "# http://yann.lecun.com/exdb/mnist/\n",
 68 |     "X, y = mnist_data()\n",
 69 |     "X = X.astype(np.float32)\n",
 70 |     "X_train, X_test, y_train, y_test = train_test_split(X, y,\n",
 71 |     "                                                    test_size=0.2,\n",
 72 |     "                                                    random_state=1,\n",
 73 |     "                                                    stratify=y)\n",
 74 |     "\n",
 75 |     "# Initializing Classifiers\n",
 76 |     "clf1 = LogisticRegression(multi_class='multinomial',\n",
 77 |     "                          solver='newton-cg',\n",
 78 |     "                          random_state=1)\n",
 79 |     "clf2 = KNeighborsClassifier(algorithm='ball_tree',\n",
 80 |     "                            leaf_size=50)\n",
 81 |     "clf3 = DecisionTreeClassifier(random_state=1)\n",
 82 |     "clf4 = SVC(random_state=1)\n",
 83 |     "\n",
 84 |     "# Building the pipelines\n",
 85 |     "pipe1 = Pipeline([('std', StandardScaler()),\n",
 86 |     "                  ('clf1', clf1)])\n",
 87 |     "\n",
 88 |     "pipe2 = Pipeline([('std', StandardScaler()),\n",
 89 |     "                  ('clf2', clf2)])\n",
 90 |     "\n",
 91 |     "pipe4 = Pipeline([('std', StandardScaler()),\n",
 92 |     "                  ('clf4', clf4)])\n",
 93 |     "\n",
 94 |     "\n",
 95 |     "# Setting up the parameter grids\n",
 96 |     "param_grid1 = [{'clf1__penalty': ['l2'],\n",
 97 |     "                'clf1__C': np.power(10., np.arange(-4, 4))}]\n",
 98 |     "\n",
 99 |     "param_grid2 = [{'clf2__n_neighbors': list(range(1, 10)),\n",
100 |     "                'clf2__p': [1, 2]}]\n",
101 |     "\n",
102 |     "param_grid3 = [{'max_depth': list(range(1, 10)) + [None],\n",
103 |     "                'criterion': ['gini', 'entropy']}]\n",
104 |     "\n",
105 |     "param_grid4 = [{'clf4__kernel': ['rbf'],\n",
106 |     "                'clf4__C': np.power(10., np.arange(-4, 4)),\n",
107 |     "                'clf4__gamma': np.power(10., np.arange(-5, 0))},\n",
108 |     "               {'clf4__kernel': ['linear'],\n",
109 |     "                'clf4__C': np.power(10., np.arange(-4, 4))}]\n",
110 |     "\n",
111 |     "# Setting up multiple GridSearchCV objects, 1 for each algorithm\n",
112 |     "gridcvs = {}\n",
113 |     "inner_cv = StratifiedKFold(n_splits=2, shuffle=True, random_state=1)\n",
114 |     "\n",
115 |     "for pgrid, est, name in zip((param_grid1, param_grid2,\n",
116 |     "                             param_grid3, param_grid4),\n",
117 |     "                            (pipe1, pipe2, clf3, pipe4),\n",
118 |     "                            ('Softmax', 'KNN', 'DTree', 'SVM')):\n",
119 |     "    gcv = GridSearchCV(estimator=est,\n",
120 |     "                       param_grid=pgrid,\n",
121 |     "                       scoring='accuracy',\n",
122 |     "                       n_jobs=1,\n",
123 |     "                       cv=inner_cv,\n",
124 |     "                       verbose=0,\n",
125 |     "                       refit=True)\n",
126 |     "    gridcvs[name] = gcv"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": 3,
132 |    "metadata": {},
133 |    "outputs": [
134 |     {
135 |      "name": "stdout",
136 |      "output_type": "stream",
137 |      "text": [
138 |       "DTree | outer ACC 77.25% +/- 2.05\n",
139 |       "KNN | outer ACC 91.17% +/- 1.07\n",
140 |       "SVM | outer ACC 91.93% +/- 1.38\n",
141 |       "Softmax | outer ACC 90.25% +/- 1.31\n"
142 |      ]
143 |     }
144 |    ],
145 |    "source": [
146 |     "outer_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)\n",
147 |     "\n",
148 |     "for name, gs_est in sorted(gridcvs.items()):\n",
149 |     "    nested_score = cross_val_score(gs_est, \n",
150 |     "                                   X=X_train, \n",
151 |     "                                   y=y_train, \n",
152 |     "                                   cv=outer_cv,\n",
153 |     "                                   n_jobs=-1)\n",
154 |     "    print('%s | outer ACC %.2f%% +/- %.2f' % \n",
155 |     "          (name, nested_score.mean() * 100, nested_score.std() * 100))"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": 4,
161 |    "metadata": {},
162 |    "outputs": [
163 |     {
164 |      "name": "stdout",
165 |      "output_type": "stream",
166 |      "text": [
167 |       "Accuracy 91.30% (average over CV test folds)\n",
168 |       "Best Parameters: {'clf4__C': 100.0, 'clf4__gamma': 0.001, 'clf4__kernel': 'rbf'}\n",
169 |       "Training Accuracy: 100.00%\n",
170 |       "Test Accuracy: 93.00%\n"
171 |      ]
172 |     }
173 |    ],
174 |    "source": [
175 |     "# Fitting a model to the whole training set\n",
176 |     "# using the \"best\" algorithm\n",
177 |     "best_algo = gridcvs['SVM']\n",
178 |     "\n",
179 |     "best_algo.fit(X_train, y_train)\n",
180 |     "train_acc = accuracy_score(y_true=y_train, y_pred=best_algo.predict(X_train))\n",
181 |     "test_acc = accuracy_score(y_true=y_test, y_pred=best_algo.predict(X_test))\n",
182 |     "\n",
183 |     "print('Accuracy %.2f%% (average over CV test folds)' %\n",
184 |     "      (100 * best_algo.best_score_))\n",
185 |     "print('Best Parameters: %s' % gridcvs['SVM'].best_params_)\n",
186 |     "print('Training Accuracy: %.2f%%' % (100 * train_acc))\n",
187 |     "print('Test Accuracy: %.2f%%' % (100 * test_acc))"
188 |    ]
189 |   }
190 |  ],
191 |  "metadata": {
192 |   "anaconda-cloud": {},
193 |   "kernelspec": {
194 |    "display_name": "Python 3",
195 |    "language": "python",
196 |    "name": "python3"
197 |   },
198 |   "language_info": {
199 |    "codemirror_mode": {
200 |     "name": "ipython",
201 |     "version": 3
202 |    },
203 |    "file_extension": ".py",
204 |    "mimetype": "text/x-python",
205 |    "name": "python",
206 |    "nbconvert_exporter": "python",
207 |    "pygments_lexer": "ipython3",
208 |    "version": "3.6.7"
209 |   }
210 |  },
211 |  "nbformat": 4,
212 |  "nbformat_minor": 1
213 | }
214 | 


--------------------------------------------------------------------------------
/report-template/statcourse.sty:
--------------------------------------------------------------------------------
  1 | % ---------------------------------------------------------------
  2 | %
  3 | % $Id: statcourse.sty,v 1.3 2005/10/24 19:56:15 awf Exp $
  4 | %
  5 | % by Paolo.Ienne@di.epfl.ch
  6 | % some mods by awf@acm.org
  7 | %
  8 | % ---------------------------------------------------------------
  9 | %
 10 | % no guarantee is given that the format corresponds perfectly to
 11 | % IEEE 8.5" x 11" Proceedings, but most features should be ok.
 12 | %
 13 | % ---------------------------------------------------------------
 14 | % with LaTeX2e:
 15 | % =============
 16 | %
 17 | % use as
 18 | %   \documentclass[times,10pt,twocolumn]{article}
 19 | %   \usepackage{latex8}
 20 | %   \usepackage{times}
 21 | %
 22 | % ---------------------------------------------------------------
 23 | 
 24 | % with LaTeX 2.09:
 25 | % ================
 26 | %
 27 | % use as
 28 | %   \documentstyle[times,art10,twocolumn,latex8]{article}
 29 | %
 30 | % ---------------------------------------------------------------
 31 | % with both versions:
 32 | % ===================
 33 | %
 34 | % specify \statcoursefinalcopy to emit the final camera-ready copy
 35 | %
 36 | % specify references as
 37 | %   \bibliographystyle{ieee}
 38 | %   \bibliography{...your files...}
 39 | %
 40 | % ---------------------------------------------------------------
 41 | 
 42 | \usepackage{eso-pic}
 43 | \usepackage{xspace}
 44 | 
 45 | \typeout{CVPR 8.5 x 11-Inch Proceedings Style `statcourse.sty'.}
 46 | 
 47 | % ten point helvetica bold required for captions
 48 | % eleven point times bold required for second-order headings
 49 | % in some sites the name of the fonts may differ,
 50 | % change the name here:
 51 | \font\statcoursetenhv  = phvb at 8pt % *** IF THIS FAILS, SEE statcourse.sty ***
 52 | \font\elvbf  = ptmb scaled 1100
 53 | 
 54 | % If the above lines give an error message, try to comment them and
 55 | % uncomment these:
 56 | %\font\statcoursetenhv  = phvb7t at 8pt
 57 | %\font\elvbf  = ptmb7t scaled 1100
 58 | 
 59 | % set dimensions of columns, gap between columns, and paragraph indent
 60 | \setlength{\textheight}{8.875in}
 61 | \setlength{\textwidth}{6.875in}
 62 | \setlength{\columnsep}{0.3125in}
 63 | \setlength{\topmargin}{0in}
 64 | \setlength{\headheight}{0in}
 65 | \setlength{\headsep}{0in}
 66 | \setlength{\parindent}{1pc}
 67 | \setlength{\oddsidemargin}{-.304in}
 68 | \setlength{\evensidemargin}{-.304in}
 69 | 
 70 | \newif\ifstatcoursefinal
 71 | \statcoursefinalfalse
 72 | \def\statcoursefinalcopy{\global\statcoursefinaltrue}
 73 | 
 74 | % memento from size10.clo
 75 | % \normalsize{\@setfontsize\normalsize\@xpt\@xiipt}
 76 | % \small{\@setfontsize\small\@ixpt{11}}
 77 | % \footnotesize{\@setfontsize\footnotesize\@viiipt{9.5}}
 78 | % \scriptsize{\@setfontsize\scriptsize\@viipt\@viiipt}
 79 | % \tiny{\@setfontsize\tiny\@vpt\@vipt}
 80 | % \large{\@setfontsize\large\@xiipt{14}}
 81 | % \Large{\@setfontsize\Large\@xivpt{18}}
 82 | % \LARGE{\@setfontsize\LARGE\@xviipt{22}}
 83 | % \huge{\@setfontsize\huge\@xxpt{25}}
 84 | % \Huge{\@setfontsize\Huge\@xxvpt{30}}
 85 | 
 86 | \def\@maketitle
 87 |    {
 88 |    \newpage
 89 |    \null
 90 |    \vskip .375in
 91 |    \begin{center}
 92 |       {\Large \bf \@title \par}
 93 |       % additional two empty lines at the end of the title
 94 |       \vspace*{24pt}
 95 |       {
 96 |       \large
 97 |       \lineskip .5em
 98 |       \begin{tabular}[t]{c}
 99 |          \ifstatcoursefinal\@author\else Anonymous CVPR submission\\
100 |          \vspace*{1pt}\\%This space will need to be here in the final copy, so don't squeeze it out for the review copy.
101 | Paper ID \statcoursePaperID \fi
102 |       \end{tabular}
103 |       \par
104 |       }
105 |       % additional small space at the end of the author name
106 |       \vskip .5em
107 |       % additional empty line at the end of the title block
108 |       \vspace*{12pt}
109 |    \end{center}
110 |    }
111 | 
112 | \def\abstract
113 |    {%
114 |    \centerline{\large\bf Abstract}%
115 |    \vspace*{12pt}%
116 |    \it%
117 |    }
118 | 
119 | \def\endabstract
120 |    {
121 |    % additional empty line at the end of the abstract
122 |    \vspace*{12pt}
123 |    }
124 | 
125 | \def\affiliation#1{\gdef\@affiliation{#1}} \gdef\@affiliation{}
126 | 
127 | \newlength{\@ctmp}
128 | \newlength{\@figindent}
129 | \setlength{\@figindent}{1pc}
130 | 
131 | \long\def\@makecaption#1#2{
132 |    \setbox\@tempboxa\hbox{\small \noindent #1.~#2}
133 |    \setlength{\@ctmp}{\hsize}
134 |    \addtolength{\@ctmp}{-\@figindent}\addtolength{\@ctmp}{-\@figindent}
135 |    % IF longer than one indented paragraph line
136 |    \ifdim \wd\@tempboxa >\@ctmp
137 |       % THEN DON'T set as an indented paragraph
138 |       {\small #1.~#2\par}
139 |    \else
140 |       % ELSE center
141 |       \hbox to\hsize{\hfil\box\@tempboxa\hfil}
142 |   \fi}
143 | 
144 | % correct heading spacing and type
145 | \def\statcoursesection{\@startsection {section}{1}{\z@}
146 |    {10pt plus 2pt minus 2pt}{7pt} {\large\bf}}
147 | \def\statcoursessect#1{\statcoursesection*{#1}}
148 | \def\statcoursesect#1{\statcoursesection{\hskip -1em.~#1}}
149 | \def\section{\@ifstar\statcoursessect\statcoursesect}
150 | 
151 | \def\statcoursesubsection{\@startsection {subsection}{2}{\z@}
152 |    {8pt plus 2pt minus 2pt}{6pt} {\elvbf}}
153 | \def\statcoursessubsect#1{\statcoursesubsection*{#1}}
154 | \def\statcoursesubsect#1{\statcoursesubsection{\hskip -1em.~#1}}
155 | \def\subsection{\@ifstar\statcoursessubsect\statcoursesubsect}
156 | 
157 | %% --------- Page background marks: Ruler and confidentiality
158 | 
159 | % ----- define vruler
160 | \makeatletter
161 | \newbox\statcourserulerbox
162 | \newcount\statcourserulercount
163 | \newdimen\statcourseruleroffset
164 | \newdimen\cv@lineheight
165 | \newdimen\cv@boxheight
166 | \newbox\cv@tmpbox
167 | \newcount\cv@refno
168 | \newcount\cv@tot
169 | % NUMBER with left flushed zeros  \fillzeros[<WIDTH>]<NUMBER>
170 | \newcount\cv@tmpc@ \newcount\cv@tmpc
171 | \def\fillzeros[#1]#2{\cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi
172 | \cv@tmpc=1 %
173 | \loop\ifnum\cv@tmpc@<10 \else \divide\cv@tmpc@ by 10 \advance\cv@tmpc by 1 \fi
174 |    \ifnum\cv@tmpc@=10\relax\cv@tmpc@=11\relax\fi \ifnum\cv@tmpc@>10 \repeat
175 | \ifnum#2<0\advance\cv@tmpc1\relax-\fi
176 | \loop\ifnum\cv@tmpc<#1\relax0\advance\cv@tmpc1\relax\fi \ifnum\cv@tmpc<#1 \repeat
177 | \cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi \relax\the\cv@tmpc@}%
178 | % \makevruler[<SCALE>][<INITIAL_COUNT>][<STEP>][<DIGITS>][<HEIGHT>]
179 | \def\makevruler[#1][#2][#3][#4][#5]{\begingroup\offinterlineskip
180 | \textheight=#5\vbadness=10000\vfuzz=120ex\overfullrule=0pt%
181 | \global\setbox\statcourserulerbox=\vbox to \textheight{%
182 | {\parskip=0pt\hfuzz=150em\cv@boxheight=\textheight
183 | \cv@lineheight=#1\global\statcourserulercount=#2%
184 | \cv@tot\cv@boxheight\divide\cv@tot\cv@lineheight\advance\cv@tot2%
185 | \cv@refno1\vskip-\cv@lineheight\vskip1ex%
186 | \loop\setbox\cv@tmpbox=\hbox to0cm{{\statcoursetenhv\hfil\fillzeros[#4]\statcourserulercount}}%
187 | \ht\cv@tmpbox\cv@lineheight\dp\cv@tmpbox0pt\box\cv@tmpbox\break
188 | \advance\cv@refno1\global\advance\statcourserulercount#3\relax
189 | \ifnum\cv@refno<\cv@tot\repeat}}\endgroup}%
190 | \makeatother
191 | % ----- end of vruler
192 | 
193 | % \makevruler[<SCALE>][<INITIAL_COUNT>][<STEP>][<DIGITS>][<HEIGHT>]
194 | \def\statcourseruler#1{\makevruler[12pt][#1][1][3][0.993\textheight]\usebox{\statcourserulerbox}}
195 | \AddToShipoutPicture{%
196 | \ifstatcoursefinal\else
197 |   %\AtTextLowerLeft{%
198 |   % \color[gray]{.15}\framebox(\LenToUnit{\textwidth},\LenToUnit{\textheight}){}
199 |   %}
200 | \statcourseruleroffset=\textheight
201 | \advance\statcourseruleroffset by -3.7pt
202 |   \color[rgb]{.5,.5,1}
203 |   \AtTextUpperLeft{%
204 |     \put(\LenToUnit{-35pt},\LenToUnit{-\statcourseruleroffset}){%left ruler
205 |       \statcourseruler{\statcourserulercount}}
206 |     \put(\LenToUnit{\textwidth\kern 30pt},\LenToUnit{-\statcourseruleroffset}){%right ruler
207 |       \statcourseruler{\statcourserulercount}}
208 |   }
209 | \def\pid{\parbox{1in}{\begin{center}\bf\sf{\small CVPR}\\\#\statcoursePaperID\end{center}}}
210 |   \AtTextUpperLeft{%paperID in corners
211 |     \put(\LenToUnit{-65pt},\LenToUnit{45pt}){\pid}
212 |     \put(\LenToUnit{\textwidth\kern-8pt},\LenToUnit{45pt}){\pid}
213 |   }
214 |   \AtTextUpperLeft{%confidential
215 |     \put(0,\LenToUnit{1cm}){\parbox{\textwidth}{\centering\statcoursetenhv
216 |        CVPR 2018 Submission \#\statcoursePaperID. CONFIDENTIAL REVIEW COPY.  DO NOT DISTRIBUTE.}}
217 |   }
218 | \fi
219 | }
220 | 
221 | %%% Make figure placement a little more predictable.
222 | % We trust the user to move figures if this results
223 | % in ugliness.
224 | % Minimize bad page breaks at figures
225 | \renewcommand{\textfraction}{0.01}
226 | \renewcommand{\floatpagefraction}{0.99}
227 | \renewcommand{\topfraction}{0.99}
228 | \renewcommand{\bottomfraction}{0.99}
229 | \renewcommand{\dblfloatpagefraction}{0.99}
230 | \renewcommand{\dbltopfraction}{0.99}
231 | \setcounter{totalnumber}{99}
232 | \setcounter{topnumber}{99}
233 | \setcounter{bottomnumber}{99}
234 | 
235 | % Add a period to the end of an abbreviation unless there's one
236 | % already, then \xspace.
237 | \makeatletter
238 | \DeclareRobustCommand\onedot{\futurelet\@let@token\@onedot}
239 | \def\@onedot{\ifx\@let@token.\else.\null\fi\xspace}
240 | 
241 | \def\eg{\emph{e.g}\onedot} \def\Eg{\emph{E.g}\onedot}
242 | \def\ie{\emph{i.e}\onedot} \def\Ie{\emph{I.e}\onedot}
243 | \def\cf{\emph{c.f}\onedot} \def\Cf{\emph{C.f}\onedot}
244 | \def\etc{\emph{etc}\onedot} \def\vs{\emph{vs}\onedot}
245 | \def\wrt{w.r.t\onedot} \def\dof{d.o.f\onedot}
246 | \def\etal{\emph{et al}\onedot}
247 | \makeatother
248 | 
249 | % ---------------------------------------------------------------
250 | 


--------------------------------------------------------------------------------
/hw_01/train_data.txt:
--------------------------------------------------------------------------------
  1 | x1 x2 y
  2 | -3.84 -4.40 0
  3 | 16.36 6.54 1
  4 | -2.73 -5.13 0
  5 | 4.83 7.22 1
  6 | 3.66 -5.34 0
  7 | -0.25 3.12 1
  8 | -4.05 -5.13 0
  9 | 5.92 4.12 1
 10 | 5.55 -1.74 1
 11 | 5.68 3.40 1
 12 | 10.18 8.89 1
 13 | -5.23 -6.67 0
 14 | -2.94 -7.10 0
 15 | 3.17 6.16 1
 16 | 1.82 -1.63 1
 17 | -9.18 -1.19 0
 18 | 1.28 -4.73 0
 19 | -1.49 -2.72 0
 20 | 7.21 1.48 1
 21 | 0.83 6.78 1
 22 | -13.54 -1.02 0
 23 | 3.14 1.96 1
 24 | 0.94 0.11 1
 25 | -4.76 -8.73 0
 26 | 5.20 7.22 1
 27 | 4.49 4.01 1
 28 | 5.28 -2.48 1
 29 | 6.70 -6.34 0
 30 | 5.42 -2.77 1
 31 | -0.43 -3.38 0
 32 | -5.37 -3.82 0
 33 | -0.09 -8.31 0
 34 | -10.86 -9.11 0
 35 | 2.16 4.69 1
 36 | -1.67 0.07 0
 37 | 0.18 -9.78 0
 38 | 4.27 -13.91 0
 39 | 3.71 9.04 1
 40 | 9.27 1.85 1
 41 | 1.80 4.61 1
 42 | -7.37 -11.87 0
 43 | -0.37 -7.59 0
 44 | -0.96 -5.23 0
 45 | -3.35 -6.77 0
 46 | 4.13 -7.18 0
 47 | 10.44 -6.05 1
 48 | -4.22 -7.05 0
 49 | 3.72 8.25 1
 50 | 2.76 -0.68 1
 51 | -3.50 -5.68 0
 52 | 5.95 -10.07 0
 53 | -5.17 -4.59 0
 54 | -1.76 -0.97 0
 55 | -7.83 -3.18 0
 56 | -1.57 0.57 1
 57 | 9.14 4.46 1
 58 | -10.80 -4.57 0
 59 | -0.08 3.66 0
 60 | -3.28 -1.54 1
 61 | -1.04 -5.42 0
 62 | 10.21 3.82 1
 63 | 3.71 2.54 1
 64 | 12.28 -0.10 1
 65 | -0.84 -3.87 0
 66 | 6.53 0.10 1
 67 | 8.97 2.10 1
 68 | -3.97 -4.71 0
 69 | 2.84 -7.89 0
 70 | -4.31 -2.16 0
 71 | -2.30 -4.22 0
 72 | -3.62 -7.97 0
 73 | 11.72 -3.33 1
 74 | 0.79 -4.98 0
 75 | 11.03 -7.03 0
 76 | -3.30 -2.64 0
 77 | 7.84 -5.64 0
 78 | -5.49 -1.57 0
 79 | -8.69 -9.69 0
 80 | -5.89 -5.96 0
 81 | 5.36 2.73 1
 82 | 1.53 -4.95 0
 83 | -1.05 4.01 1
 84 | -4.65 -7.61 0
 85 | -4.66 -0.78 0
 86 | 1.18 -9.71 0
 87 | 4.03 5.24 1
 88 | 4.09 4.61 1
 89 | -0.88 -4.48 0
 90 | 0.56 -5.17 0
 91 | 12.29 -2.51 1
 92 | 9.77 6.69 1
 93 | -4.52 -11.13 0
 94 | 0.80 -8.83 0
 95 | -4.89 -8.58 0
 96 | 3.40 -2.12 1
 97 | 3.25 3.71 1
 98 | 4.78 0.08 1
 99 | 6.11 4.34 1
100 | -7.67 -10.05 0
101 | 2.69 -0.84 1
102 | -3.69 -10.78 0
103 | 0.04 -2.91 1
104 | 8.93 7.30 1
105 | 2.85 1.86 1
106 | 10.66 -2.37 1
107 | 4.36 -2.10 1
108 | 2.53 1.89 1
109 | 8.36 10.60 1
110 | 9.12 -1.53 1
111 | 2.06 -8.03 0
112 | 0.02 -5.39 0
113 | 12.79 8.90 1
114 | -5.52 -9.25 0
115 | 3.61 5.99 1
116 | -5.45 -5.48 0
117 | 2.74 11.48 1
118 | -8.05 1.79 0
119 | 8.87 -3.80 1
120 | 2.33 7.95 1
121 | 5.22 7.43 1
122 | 4.34 0.68 1
123 | 6.33 3.30 1
124 | 9.39 3.89 1
125 | 6.83 2.22 1
126 | 5.69 6.50 1
127 | -6.70 -10.23 0
128 | 0.89 3.70 1
129 | 2.74 -9.34 0
130 | -0.40 6.67 1
131 | 0.63 -0.58 0
132 | -0.97 -0.19 0
133 | -0.38 -13.55 0
134 | 7.35 1.79 1
135 | 3.10 -11.50 0
136 | -1.53 -7.31 0
137 | -5.52 -4.68 0
138 | 4.38 -5.04 0
139 | 2.22 -0.00 1
140 | -1.05 -3.75 0
141 | 1.53 -12.24 0
142 | 6.83 -2.38 1
143 | -3.96 -9.17 0
144 | 3.77 1.20 1
145 | 10.50 -1.03 1
146 | 7.93 0.80 1
147 | 7.26 -6.40 0
148 | 4.84 3.15 1
149 | 10.10 2.34 1
150 | -4.68 -8.24 0
151 | 14.16 2.35 1
152 | -3.83 -0.51 0
153 | -1.74 -7.86 0
154 | 7.38 7.20 1
155 | -5.17 -1.23 0
156 | 3.13 3.11 1
157 | -5.92 -10.49 0
158 | 15.94 9.48 1
159 | -3.12 -9.22 0
160 | 11.43 -4.44 1
161 | -0.05 -4.04 0
162 | 4.63 6.95 1
163 | 4.13 5.42 1
164 | 4.24 -6.61 0
165 | 14.14 -6.83 1
166 | -14.85 -2.24 0
167 | 11.43 1.90 1
168 | 12.33 1.21 1
169 | 4.59 4.69 1
170 | 4.03 0.40 1
171 | 1.64 -2.76 0
172 | 5.90 1.57 1
173 | 2.83 6.11 1
174 | -2.02 -3.45 0
175 | 7.11 8.73 1
176 | 7.76 3.95 1
177 | 5.94 3.97 1
178 | 7.00 4.18 1
179 | -8.12 -12.72 0
180 | -3.11 -4.88 0
181 | 6.72 5.81 1
182 | -8.97 -4.16 0
183 | 6.42 0.60 1
184 | -8.41 -5.61 0
185 | -4.09 -2.59 0
186 | -0.63 -2.20 1
187 | -0.02 -12.95 0
188 | -1.45 -12.04 0
189 | -10.99 4.08 0
190 | 14.14 2.09 1
191 | 1.37 3.49 1
192 | -11.21 -12.60 0
193 | -6.72 -2.12 0
194 | 9.90 2.87 1
195 | 1.43 -10.15 0
196 | -4.91 -8.80 0
197 | -0.15 -6.41 0
198 | -1.50 -5.15 0
199 | -3.31 -6.48 0
200 | 4.82 -2.20 1
201 | 4.88 4.83 1
202 | -4.89 -0.84 0
203 | -2.56 -1.44 0
204 | -5.38 -3.27 0
205 | 5.31 1.29 1
206 | 2.40 -8.01 0
207 | -3.84 1.85 0
208 | -8.64 0.75 0
209 | 6.58 6.45 1
210 | -6.61 -7.82 0
211 | -2.16 -5.64 0
212 | 7.00 1.84 1
213 | 3.56 -7.63 0
214 | 4.14 -3.39 1
215 | 1.21 -5.49 0
216 | 9.53 0.58 1
217 | -8.63 -3.64 0
218 | 10.51 0.32 1
219 | 12.28 8.25 1
220 | 6.30 9.16 1
221 | -8.06 -7.50 0
222 | -8.03 -9.91 0
223 | 6.51 6.24 1
224 | -6.99 -12.41 0
225 | -7.52 -1.73 0
226 | -3.81 -6.57 0
227 | -8.33 0.31 0
228 | -3.07 -0.45 0
229 | 6.49 4.80 1
230 | -2.00 -0.73 0
231 | 5.91 3.55 1
232 | 4.41 -5.24 1
233 | 4.69 -2.42 1
234 | -0.44 -0.16 1
235 | 4.42 3.53 1
236 | 1.96 -2.66 0
237 | 7.35 -1.35 1
238 | -6.70 -1.99 0
239 | -2.80 -2.71 0
240 | -4.58 -6.58 0
241 | -3.40 -3.48 0
242 | -1.53 -0.63 0
243 | -5.97 -2.88 0
244 | 4.52 -3.56 0
245 | -2.74 -3.33 0
246 | -8.16 -0.73 0
247 | 2.88 -1.97 1
248 | -0.15 -5.59 0
249 | 7.59 3.10 1
250 | 5.66 2.11 1
251 | 6.66 1.61 1
252 | -6.10 -8.85 0
253 | 8.85 4.87 1
254 | -0.23 -2.25 0
255 | 5.42 6.79 1
256 | 3.95 -1.02 0
257 | -1.68 6.95 1
258 | 9.08 1.09 1
259 | -6.78 -6.66 0
260 | -2.70 -2.01 0
261 | 8.34 0.42 1
262 | 1.72 0.18 1
263 | 7.00 8.32 1
264 | 7.93 8.65 1
265 | 5.25 8.99 1
266 | 8.60 8.71 1
267 | 6.35 3.75 1
268 | 11.18 -7.69 1
269 | 4.05 7.97 1
270 | -6.92 3.60 0
271 | 9.77 1.08 1
272 | 1.00 -4.85 1
273 | -3.50 -3.90 0
274 | -5.00 -6.54 0
275 | 9.92 8.11 1
276 | 10.27 2.32 1
277 | 12.08 2.77 1
278 | -8.65 -3.61 0
279 | 6.10 -3.14 0
280 | 12.19 1.87 1
281 | 11.21 -0.54 1
282 | 2.47 -2.72 1
283 | 5.38 -2.78 1
284 | 5.18 1.96 1
285 | 10.55 0.84 1
286 | 3.82 9.14 1
287 | -6.08 -14.13 0
288 | -2.09 -2.07 0
289 | 0.05 0.24 1
290 | -3.57 -3.27 0
291 | 0.50 -6.19 0
292 | -5.03 0.37 0
293 | -9.77 -6.21 0
294 | -2.97 -5.53 0
295 | -5.04 -12.17 0
296 | 2.59 -4.90 0
297 | 6.53 0.61 1
298 | 5.29 3.97 1
299 | 1.32 0.07 1
300 | 3.03 7.38 1
301 | -5.93 1.51 0
302 | -0.79 -12.55 0
303 | -4.89 -3.07 0
304 | -2.02 -8.23 0
305 | -1.91 0.51 0
306 | 1.28 -8.06 0
307 | -2.17 -0.35 0
308 | -5.11 -0.12 0
309 | -0.39 -3.54 0
310 | -2.81 -11.67 0
311 | 5.85 5.42 1
312 | 5.46 10.15 1
313 | -3.51 -7.83 0
314 | 3.84 8.11 1
315 | -4.96 -4.69 0
316 | 1.93 9.17 1
317 | 15.33 4.70 1
318 | 7.52 8.67 1
319 | -2.23 -8.06 0
320 | -6.72 -10.20 0
321 | -6.04 -4.30 0
322 | 1.96 -7.93 0
323 | 7.78 -5.09 1
324 | 5.82 3.20 1
325 | 0.76 5.85 1
326 | -6.11 -9.28 0
327 | 3.83 10.35 1
328 | -8.57 -4.99 0
329 | 8.56 5.87 1
330 | 6.15 0.12 1
331 | 4.00 1.99 1
332 | 3.48 -0.73 1
333 | -11.02 -5.98 0
334 | 6.14 5.43 1
335 | -3.27 -2.94 0
336 | 2.18 3.36 1
337 | 0.49 3.84 1
338 | 2.08 1.81 1
339 | 17.31 0.60 1
340 | 2.98 8.29 1
341 | 2.05 5.49 1
342 | 2.29 0.69 0
343 | -3.56 0.85 0
344 | 8.20 -1.62 1
345 | -5.60 -3.07 0
346 | 6.52 3.71 1
347 | -7.34 -3.16 0
348 | -6.43 -7.56 0
349 | -8.50 -7.98 0
350 | 1.36 -0.27 1
351 | 7.82 -3.16 1
352 | 4.59 -1.90 1
353 | 7.24 -5.03 1
354 | -5.51 -6.32 0
355 | 0.34 -4.44 0
356 | 2.02 -2.24 0
357 | -7.31 -4.34 0
358 | -0.46 8.11 1
359 | -1.79 -1.83 0
360 | -11.32 -6.57 0
361 | 2.50 4.13 1
362 | 2.92 8.44 1
363 | 0.69 0.32 1
364 | 10.97 -0.40 1
365 | -1.04 -12.37 0
366 | 3.66 3.09 1
367 | -2.28 -6.20 0
368 | 3.73 -1.49 1
369 | -1.29 -7.59 0
370 | 5.97 -1.52 1
371 | -1.93 0.49 0
372 | 3.40 -2.34 1
373 | 8.66 4.40 1
374 | -2.75 13.66 1
375 | 1.60 -13.26 0
376 | 14.95 4.36 1
377 | 3.86 -1.50 1
378 | 13.71 2.04 1
379 | 2.72 4.63 1
380 | 6.24 -0.43 1
381 | 4.38 -1.27 1
382 | 9.06 9.67 1
383 | 3.83 5.15 1
384 | 4.14 -11.07 0
385 | -4.44 -6.76 0
386 | -6.64 -9.32 0
387 | -4.65 1.24 0
388 | 4.55 0.21 1
389 | 5.57 8.57 1
390 | -4.79 -5.34 0
391 | 2.97 -4.13 1
392 | 5.99 -2.15 1
393 | -4.93 -3.56 0
394 | -8.14 -12.20 0
395 | -0.14 -6.42 0
396 | -4.79 -3.73 0
397 | 0.68 -3.48 0
398 | -4.16 -3.25 0
399 | 10.64 2.00 1
400 | -8.16 -7.55 0
401 | 5.96 5.37 1
402 | 11.09 -3.39 1
403 | 7.46 -4.72 1
404 | -0.42 2.09 0
405 | -1.40 1.66 0
406 | 9.24 -0.16 1
407 | -2.97 -11.87 0
408 | 2.60 -10.34 0
409 | -1.24 -7.76 0
410 | -2.84 -7.49 0
411 | 10.89 9.67 1
412 | 1.16 -5.77 1
413 | 1.94 -5.81 0
414 | 10.42 -0.43 1
415 | -2.81 -3.98 0
416 | 3.73 -4.75 1
417 | 6.19 -2.02 1
418 | 10.06 3.45 1
419 | -1.59 -3.61 0
420 | -0.19 6.68 1
421 | 7.74 5.71 1
422 | 4.56 3.95 1
423 | -3.00 0.04 0
424 | 5.94 1.09 1
425 | -7.53 -2.33 0
426 | 4.57 5.36 1
427 | 5.10 1.44 1
428 | 0.20 -6.57 0
429 | 1.37 8.58 1
430 | -1.90 -12.73 0
431 | -4.96 -9.93 0
432 | -1.05 4.67 1
433 | 0.52 6.56 1
434 | -1.27 -5.65 0
435 | -0.93 0.78 1
436 | -2.12 3.12 1
437 | -3.87 -2.52 0
438 | 3.61 5.72 1
439 | -1.07 -8.50 0
440 | -1.38 -2.40 0
441 | 13.24 1.52 1
442 | -5.94 -6.61 0
443 | 7.74 -6.51 1
444 | 2.35 2.45 1
445 | -1.94 -4.15 0
446 | -6.16 -5.45 0
447 | 6.09 -0.46 1
448 | 1.99 -10.66 0
449 | -4.25 -5.11 0
450 | 4.65 1.91 1
451 | 2.85 5.48 1
452 | -1.24 -10.13 0
453 | 0.93 -12.92 0
454 | 7.44 -4.40 1
455 | 4.18 2.07 1
456 | -1.03 1.92 1
457 | -9.23 -5.69 0
458 | -8.26 -5.02 0
459 | 5.56 -0.05 1
460 | 11.94 5.48 1
461 | 3.57 0.19 1
462 | -4.58 -1.32 0
463 | 2.34 5.58 1
464 | 0.71 -14.05 0
465 | -0.77 -6.71 0
466 | -8.67 -3.51 0
467 | 8.42 -2.26 1
468 | -0.81 -11.36 0
469 | 0.15 -12.24 0
470 | 1.08 2.51 1
471 | 3.28 4.80 1
472 | -0.77 -3.27 1
473 | 4.47 0.62 1
474 | -4.60 -4.60 0
475 | -0.24 1.90 1
476 | -2.33 -5.57 0
477 | -7.92 -7.43 0
478 | 2.76 -5.48 0
479 | 10.90 7.48 1
480 | -4.81 -2.50 0
481 | -9.87 0.80 0
482 | 14.55 3.38 1
483 | -3.02 -5.36 0
484 | -5.06 -10.03 0
485 | 2.62 10.62 1
486 | 6.75 1.53 1
487 | 6.57 4.42 1
488 | -5.56 -3.18 0
489 | -3.70 -7.45 0
490 | 8.03 3.40 1
491 | -3.10 -2.89 0
492 | -3.57 -11.72 0
493 | 2.87 -0.17 1
494 | 5.93 2.04 1
495 | -9.25 -2.29 0
496 | 5.21 10.32 1
497 | -5.71 -2.44 0
498 | -0.46 2.13 0
499 | -1.83 -6.59 0
500 | 4.24 -0.65 1
501 | 5.84 2.89 1
502 | -4.12 -3.02 0
503 | 5.04 6.66 1
504 | -8.33 3.65 0
505 | 6.01 5.23 1
506 | 6.95 -0.67 1
507 | 4.10 2.33 1
508 | -3.21 -9.92 0
509 | -9.60 -9.94 0
510 | -0.52 -0.78 0
511 | 7.93 8.80 1
512 | -9.67 -5.47 0
513 | 3.25 -1.47 1
514 | 10.65 5.04 1
515 | -5.51 2.58 1
516 | -1.96 -1.92 0
517 | -7.86 0.78 0
518 | -1.39 -8.28 0
519 | -2.48 -9.59 0
520 | 10.16 3.82 1
521 | 2.95 3.52 1
522 | 5.94 6.22 1
523 | -2.61 -3.42 0
524 | -10.44 -0.81 0
525 | -3.32 1.46 0
526 | 9.07 0.55 1
527 | 4.19 3.70 1
528 | 1.46 0.04 1
529 | 7.85 3.80 1
530 | 0.84 -5.74 0
531 | -0.22 1.19 1
532 | 9.63 9.58 1
533 | 9.67 2.25 1
534 | 4.58 11.08 1
535 | -8.67 -3.77 0
536 | 8.11 5.11 1
537 | -0.07 -0.68 0
538 | -1.64 -2.83 0
539 | 3.16 0.57 1
540 | -10.26 -12.83 0
541 | -6.24 -3.93 0
542 | -9.27 -7.59 0
543 | 9.04 -4.97 1
544 | -2.17 -9.35 0
545 | -6.71 -6.63 0
546 | 3.85 7.37 1
547 | 1.86 6.00 1
548 | 9.99 6.05 1
549 | -0.42 -4.97 0
550 | -8.11 -8.39 0
551 | -4.51 0.34 0
552 | -4.18 -3.82 0
553 | -6.86 -5.77 0
554 | 9.11 -0.19 1
555 | -1.96 0.63 0
556 | 14.16 -5.06 1
557 | -2.73 -11.75 0
558 | 6.44 3.08 1
559 | 1.01 -2.94 0
560 | -0.31 -0.05 0
561 | -0.63 -6.24 1
562 | -7.52 -4.67 0
563 | -2.70 -2.01 0
564 | 6.00 1.02 1
565 | -5.10 -4.24 0
566 | 11.22 2.00 1
567 | 5.33 -0.79 0
568 | -2.94 0.57 0
569 | 1.86 -2.52 0
570 | -7.77 -4.05 0
571 | 4.95 4.44 1
572 | -10.64 -5.98 0
573 | 8.72 -1.71 1
574 | -0.91 -9.54 0
575 | -2.29 -2.71 1
576 | -7.20 -15.09 0
577 | -4.73 -2.52 0
578 | 5.78 7.52 1
579 | 8.60 2.52 1
580 | 5.55 4.51 1
581 | 2.44 -4.08 0
582 | 0.75 -8.00 0
583 | 12.48 5.19 1
584 | -4.74 3.36 0
585 | 1.39 2.07 1
586 | 5.83 2.80 1
587 | -6.47 -0.05 0
588 | 6.04 5.53 1
589 | -0.94 -12.89 0
590 | 1.00 -10.54 0
591 | -12.01 -0.84 0
592 | 4.10 6.69 1
593 | 6.33 9.37 1
594 | -10.23 -0.92 0
595 | 6.39 -3.54 1
596 | -0.75 -0.03 1
597 | -1.03 -5.81 0
598 | 1.11 4.33 1
599 | -3.33 -5.00 0
600 | 3.58 1.97 1
601 | 5.41 4.52 1
602 | 


--------------------------------------------------------------------------------
/hw_03/data/wine.data:
--------------------------------------------------------------------------------
  1 | 1,14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065
  2 | 1,13.2,1.78,2.14,11.2,100,2.65,2.76,.26,1.28,4.38,1.05,3.4,1050
  3 | 1,13.16,2.36,2.67,18.6,101,2.8,3.24,.3,2.81,5.68,1.03,3.17,1185
  4 | 1,14.37,1.95,2.5,16.8,113,3.85,3.49,.24,2.18,7.8,.86,3.45,1480
  5 | 1,13.24,2.59,2.87,21,118,2.8,2.69,.39,1.82,4.32,1.04,2.93,735
  6 | 1,14.2,1.76,2.45,15.2,112,3.27,3.39,.34,1.97,6.75,1.05,2.85,1450
  7 | 1,14.39,1.87,2.45,14.6,96,2.5,2.52,.3,1.98,5.25,1.02,3.58,1290
  8 | 1,14.06,2.15,2.61,17.6,121,2.6,2.51,.31,1.25,5.05,1.06,3.58,1295
  9 | 1,14.83,1.64,2.17,14,97,2.8,2.98,.29,1.98,5.2,1.08,2.85,1045
 10 | 1,13.86,1.35,2.27,16,98,2.98,3.15,.22,1.85,7.22,1.01,3.55,1045
 11 | 1,14.1,2.16,2.3,18,105,2.95,3.32,.22,2.38,5.75,1.25,3.17,1510
 12 | 1,14.12,1.48,2.32,16.8,95,2.2,2.43,.26,1.57,5,1.17,2.82,1280
 13 | 1,13.75,1.73,2.41,16,89,2.6,2.76,.29,1.81,5.6,1.15,2.9,1320
 14 | 1,14.75,1.73,2.39,11.4,91,3.1,3.69,.43,2.81,5.4,1.25,2.73,1150
 15 | 1,14.38,1.87,2.38,12,102,3.3,3.64,.29,2.96,7.5,1.2,3,1547
 16 | 1,13.63,1.81,2.7,17.2,112,2.85,2.91,.3,1.46,7.3,1.28,2.88,1310
 17 | 1,14.3,1.92,2.72,20,120,2.8,3.14,.33,1.97,6.2,1.07,2.65,1280
 18 | 1,13.83,1.57,2.62,20,115,2.95,3.4,.4,1.72,6.6,1.13,2.57,1130
 19 | 1,14.19,1.59,2.48,16.5,108,3.3,3.93,.32,1.86,8.7,1.23,2.82,1680
 20 | 1,13.64,3.1,2.56,15.2,116,2.7,3.03,.17,1.66,5.1,.96,3.36,845
 21 | 1,14.06,1.63,2.28,16,126,3,3.17,.24,2.1,5.65,1.09,3.71,780
 22 | 1,12.93,3.8,2.65,18.6,102,2.41,2.41,.25,1.98,4.5,1.03,3.52,770
 23 | 1,13.71,1.86,2.36,16.6,101,2.61,2.88,.27,1.69,3.8,1.11,4,1035
 24 | 1,12.85,1.6,2.52,17.8,95,2.48,2.37,.26,1.46,3.93,1.09,3.63,1015
 25 | 1,13.5,1.81,2.61,20,96,2.53,2.61,.28,1.66,3.52,1.12,3.82,845
 26 | 1,13.05,2.05,3.22,25,124,2.63,2.68,.47,1.92,3.58,1.13,3.2,830
 27 | 1,13.39,1.77,2.62,16.1,93,2.85,2.94,.34,1.45,4.8,.92,3.22,1195
 28 | 1,13.3,1.72,2.14,17,94,2.4,2.19,.27,1.35,3.95,1.02,2.77,1285
 29 | 1,13.87,1.9,2.8,19.4,107,2.95,2.97,.37,1.76,4.5,1.25,3.4,915
 30 | 1,14.02,1.68,2.21,16,96,2.65,2.33,.26,1.98,4.7,1.04,3.59,1035
 31 | 1,13.73,1.5,2.7,22.5,101,3,3.25,.29,2.38,5.7,1.19,2.71,1285
 32 | 1,13.58,1.66,2.36,19.1,106,2.86,3.19,.22,1.95,6.9,1.09,2.88,1515
 33 | 1,13.68,1.83,2.36,17.2,104,2.42,2.69,.42,1.97,3.84,1.23,2.87,990
 34 | 1,13.76,1.53,2.7,19.5,132,2.95,2.74,.5,1.35,5.4,1.25,3,1235
 35 | 1,13.51,1.8,2.65,19,110,2.35,2.53,.29,1.54,4.2,1.1,2.87,1095
 36 | 1,13.48,1.81,2.41,20.5,100,2.7,2.98,.26,1.86,5.1,1.04,3.47,920
 37 | 1,13.28,1.64,2.84,15.5,110,2.6,2.68,.34,1.36,4.6,1.09,2.78,880
 38 | 1,13.05,1.65,2.55,18,98,2.45,2.43,.29,1.44,4.25,1.12,2.51,1105
 39 | 1,13.07,1.5,2.1,15.5,98,2.4,2.64,.28,1.37,3.7,1.18,2.69,1020
 40 | 1,14.22,3.99,2.51,13.2,128,3,3.04,.2,2.08,5.1,.89,3.53,760
 41 | 1,13.56,1.71,2.31,16.2,117,3.15,3.29,.34,2.34,6.13,.95,3.38,795
 42 | 1,13.41,3.84,2.12,18.8,90,2.45,2.68,.27,1.48,4.28,.91,3,1035
 43 | 1,13.88,1.89,2.59,15,101,3.25,3.56,.17,1.7,5.43,.88,3.56,1095
 44 | 1,13.24,3.98,2.29,17.5,103,2.64,2.63,.32,1.66,4.36,.82,3,680
 45 | 1,13.05,1.77,2.1,17,107,3,3,.28,2.03,5.04,.88,3.35,885
 46 | 1,14.21,4.04,2.44,18.9,111,2.85,2.65,.3,1.25,5.24,.87,3.33,1080
 47 | 1,14.38,3.59,2.28,16,102,3.25,3.17,.27,2.19,4.9,1.04,3.44,1065
 48 | 1,13.9,1.68,2.12,16,101,3.1,3.39,.21,2.14,6.1,.91,3.33,985
 49 | 1,14.1,2.02,2.4,18.8,103,2.75,2.92,.32,2.38,6.2,1.07,2.75,1060
 50 | 1,13.94,1.73,2.27,17.4,108,2.88,3.54,.32,2.08,8.90,1.12,3.1,1260
 51 | 1,13.05,1.73,2.04,12.4,92,2.72,3.27,.17,2.91,7.2,1.12,2.91,1150
 52 | 1,13.83,1.65,2.6,17.2,94,2.45,2.99,.22,2.29,5.6,1.24,3.37,1265
 53 | 1,13.82,1.75,2.42,14,111,3.88,3.74,.32,1.87,7.05,1.01,3.26,1190
 54 | 1,13.77,1.9,2.68,17.1,115,3,2.79,.39,1.68,6.3,1.13,2.93,1375
 55 | 1,13.74,1.67,2.25,16.4,118,2.6,2.9,.21,1.62,5.85,.92,3.2,1060
 56 | 1,13.56,1.73,2.46,20.5,116,2.96,2.78,.2,2.45,6.25,.98,3.03,1120
 57 | 1,14.22,1.7,2.3,16.3,118,3.2,3,.26,2.03,6.38,.94,3.31,970
 58 | 1,13.29,1.97,2.68,16.8,102,3,3.23,.31,1.66,6,1.07,2.84,1270
 59 | 1,13.72,1.43,2.5,16.7,108,3.4,3.67,.19,2.04,6.8,.89,2.87,1285
 60 | 2,12.37,.94,1.36,10.6,88,1.98,.57,.28,.42,1.95,1.05,1.82,520
 61 | 2,12.33,1.1,2.28,16,101,2.05,1.09,.63,.41,3.27,1.25,1.67,680
 62 | 2,12.64,1.36,2.02,16.8,100,2.02,1.41,.53,.62,5.75,.98,1.59,450
 63 | 2,13.67,1.25,1.92,18,94,2.1,1.79,.32,.73,3.8,1.23,2.46,630
 64 | 2,12.37,1.13,2.16,19,87,3.5,3.1,.19,1.87,4.45,1.22,2.87,420
 65 | 2,12.17,1.45,2.53,19,104,1.89,1.75,.45,1.03,2.95,1.45,2.23,355
 66 | 2,12.37,1.21,2.56,18.1,98,2.42,2.65,.37,2.08,4.6,1.19,2.3,678
 67 | 2,13.11,1.01,1.7,15,78,2.98,3.18,.26,2.28,5.3,1.12,3.18,502
 68 | 2,12.37,1.17,1.92,19.6,78,2.11,2,.27,1.04,4.68,1.12,3.48,510
 69 | 2,13.34,.94,2.36,17,110,2.53,1.3,.55,.42,3.17,1.02,1.93,750
 70 | 2,12.21,1.19,1.75,16.8,151,1.85,1.28,.14,2.5,2.85,1.28,3.07,718
 71 | 2,12.29,1.61,2.21,20.4,103,1.1,1.02,.37,1.46,3.05,.906,1.82,870
 72 | 2,13.86,1.51,2.67,25,86,2.95,2.86,.21,1.87,3.38,1.36,3.16,410
 73 | 2,13.49,1.66,2.24,24,87,1.88,1.84,.27,1.03,3.74,.98,2.78,472
 74 | 2,12.99,1.67,2.6,30,139,3.3,2.89,.21,1.96,3.35,1.31,3.5,985
 75 | 2,11.96,1.09,2.3,21,101,3.38,2.14,.13,1.65,3.21,.99,3.13,886
 76 | 2,11.66,1.88,1.92,16,97,1.61,1.57,.34,1.15,3.8,1.23,2.14,428
 77 | 2,13.03,.9,1.71,16,86,1.95,2.03,.24,1.46,4.6,1.19,2.48,392
 78 | 2,11.84,2.89,2.23,18,112,1.72,1.32,.43,.95,2.65,.96,2.52,500
 79 | 2,12.33,.99,1.95,14.8,136,1.9,1.85,.35,2.76,3.4,1.06,2.31,750
 80 | 2,12.7,3.87,2.4,23,101,2.83,2.55,.43,1.95,2.57,1.19,3.13,463
 81 | 2,12,.92,2,19,86,2.42,2.26,.3,1.43,2.5,1.38,3.12,278
 82 | 2,12.72,1.81,2.2,18.8,86,2.2,2.53,.26,1.77,3.9,1.16,3.14,714
 83 | 2,12.08,1.13,2.51,24,78,2,1.58,.4,1.4,2.2,1.31,2.72,630
 84 | 2,13.05,3.86,2.32,22.5,85,1.65,1.59,.61,1.62,4.8,.84,2.01,515
 85 | 2,11.84,.89,2.58,18,94,2.2,2.21,.22,2.35,3.05,.79,3.08,520
 86 | 2,12.67,.98,2.24,18,99,2.2,1.94,.3,1.46,2.62,1.23,3.16,450
 87 | 2,12.16,1.61,2.31,22.8,90,1.78,1.69,.43,1.56,2.45,1.33,2.26,495
 88 | 2,11.65,1.67,2.62,26,88,1.92,1.61,.4,1.34,2.6,1.36,3.21,562
 89 | 2,11.64,2.06,2.46,21.6,84,1.95,1.69,.48,1.35,2.8,1,2.75,680
 90 | 2,12.08,1.33,2.3,23.6,70,2.2,1.59,.42,1.38,1.74,1.07,3.21,625
 91 | 2,12.08,1.83,2.32,18.5,81,1.6,1.5,.52,1.64,2.4,1.08,2.27,480
 92 | 2,12,1.51,2.42,22,86,1.45,1.25,.5,1.63,3.6,1.05,2.65,450
 93 | 2,12.69,1.53,2.26,20.7,80,1.38,1.46,.58,1.62,3.05,.96,2.06,495
 94 | 2,12.29,2.83,2.22,18,88,2.45,2.25,.25,1.99,2.15,1.15,3.3,290
 95 | 2,11.62,1.99,2.28,18,98,3.02,2.26,.17,1.35,3.25,1.16,2.96,345
 96 | 2,12.47,1.52,2.2,19,162,2.5,2.27,.32,3.28,2.6,1.16,2.63,937
 97 | 2,11.81,2.12,2.74,21.5,134,1.6,.99,.14,1.56,2.5,.95,2.26,625
 98 | 2,12.29,1.41,1.98,16,85,2.55,2.5,.29,1.77,2.9,1.23,2.74,428
 99 | 2,12.37,1.07,2.1,18.5,88,3.52,3.75,.24,1.95,4.5,1.04,2.77,660
100 | 2,12.29,3.17,2.21,18,88,2.85,2.99,.45,2.81,2.3,1.42,2.83,406
101 | 2,12.08,2.08,1.7,17.5,97,2.23,2.17,.26,1.4,3.3,1.27,2.96,710
102 | 2,12.6,1.34,1.9,18.5,88,1.45,1.36,.29,1.35,2.45,1.04,2.77,562
103 | 2,12.34,2.45,2.46,21,98,2.56,2.11,.34,1.31,2.8,.8,3.38,438
104 | 2,11.82,1.72,1.88,19.5,86,2.5,1.64,.37,1.42,2.06,.94,2.44,415
105 | 2,12.51,1.73,1.98,20.5,85,2.2,1.92,.32,1.48,2.94,1.04,3.57,672
106 | 2,12.42,2.55,2.27,22,90,1.68,1.84,.66,1.42,2.7,.86,3.3,315
107 | 2,12.25,1.73,2.12,19,80,1.65,2.03,.37,1.63,3.4,1,3.17,510
108 | 2,12.72,1.75,2.28,22.5,84,1.38,1.76,.48,1.63,3.3,.88,2.42,488
109 | 2,12.22,1.29,1.94,19,92,2.36,2.04,.39,2.08,2.7,.86,3.02,312
110 | 2,11.61,1.35,2.7,20,94,2.74,2.92,.29,2.49,2.65,.96,3.26,680
111 | 2,11.46,3.74,1.82,19.5,107,3.18,2.58,.24,3.58,2.9,.75,2.81,562
112 | 2,12.52,2.43,2.17,21,88,2.55,2.27,.26,1.22,2,.9,2.78,325
113 | 2,11.76,2.68,2.92,20,103,1.75,2.03,.6,1.05,3.8,1.23,2.5,607
114 | 2,11.41,.74,2.5,21,88,2.48,2.01,.42,1.44,3.08,1.1,2.31,434
115 | 2,12.08,1.39,2.5,22.5,84,2.56,2.29,.43,1.04,2.9,.93,3.19,385
116 | 2,11.03,1.51,2.2,21.5,85,2.46,2.17,.52,2.01,1.9,1.71,2.87,407
117 | 2,11.82,1.47,1.99,20.8,86,1.98,1.6,.3,1.53,1.95,.95,3.33,495
118 | 2,12.42,1.61,2.19,22.5,108,2,2.09,.34,1.61,2.06,1.06,2.96,345
119 | 2,12.77,3.43,1.98,16,80,1.63,1.25,.43,.83,3.4,.7,2.12,372
120 | 2,12,3.43,2,19,87,2,1.64,.37,1.87,1.28,.93,3.05,564
121 | 2,11.45,2.4,2.42,20,96,2.9,2.79,.32,1.83,3.25,.8,3.39,625
122 | 2,11.56,2.05,3.23,28.5,119,3.18,5.08,.47,1.87,6,.93,3.69,465
123 | 2,12.42,4.43,2.73,26.5,102,2.2,2.13,.43,1.71,2.08,.92,3.12,365
124 | 2,13.05,5.8,2.13,21.5,86,2.62,2.65,.3,2.01,2.6,.73,3.1,380
125 | 2,11.87,4.31,2.39,21,82,2.86,3.03,.21,2.91,2.8,.75,3.64,380
126 | 2,12.07,2.16,2.17,21,85,2.6,2.65,.37,1.35,2.76,.86,3.28,378
127 | 2,12.43,1.53,2.29,21.5,86,2.74,3.15,.39,1.77,3.94,.69,2.84,352
128 | 2,11.79,2.13,2.78,28.5,92,2.13,2.24,.58,1.76,3,.97,2.44,466
129 | 2,12.37,1.63,2.3,24.5,88,2.22,2.45,.4,1.9,2.12,.89,2.78,342
130 | 2,12.04,4.3,2.38,22,80,2.1,1.75,.42,1.35,2.6,.79,2.57,580
131 | 3,12.86,1.35,2.32,18,122,1.51,1.25,.21,.94,4.1,.76,1.29,630
132 | 3,12.88,2.99,2.4,20,104,1.3,1.22,.24,.83,5.4,.74,1.42,530
133 | 3,12.81,2.31,2.4,24,98,1.15,1.09,.27,.83,5.7,.66,1.36,560
134 | 3,12.7,3.55,2.36,21.5,106,1.7,1.2,.17,.84,5,.78,1.29,600
135 | 3,12.51,1.24,2.25,17.5,85,2,.58,.6,1.25,5.45,.75,1.51,650
136 | 3,12.6,2.46,2.2,18.5,94,1.62,.66,.63,.94,7.1,.73,1.58,695
137 | 3,12.25,4.72,2.54,21,89,1.38,.47,.53,.8,3.85,.75,1.27,720
138 | 3,12.53,5.51,2.64,25,96,1.79,.6,.63,1.1,5,.82,1.69,515
139 | 3,13.49,3.59,2.19,19.5,88,1.62,.48,.58,.88,5.7,.81,1.82,580
140 | 3,12.84,2.96,2.61,24,101,2.32,.6,.53,.81,4.92,.89,2.15,590
141 | 3,12.93,2.81,2.7,21,96,1.54,.5,.53,.75,4.6,.77,2.31,600
142 | 3,13.36,2.56,2.35,20,89,1.4,.5,.37,.64,5.6,.7,2.47,780
143 | 3,13.52,3.17,2.72,23.5,97,1.55,.52,.5,.55,4.35,.89,2.06,520
144 | 3,13.62,4.95,2.35,20,92,2,.8,.47,1.02,4.4,.91,2.05,550
145 | 3,12.25,3.88,2.2,18.5,112,1.38,.78,.29,1.14,8.21,.65,2,855
146 | 3,13.16,3.57,2.15,21,102,1.5,.55,.43,1.3,4,.6,1.68,830
147 | 3,13.88,5.04,2.23,20,80,.98,.34,.4,.68,4.9,.58,1.33,415
148 | 3,12.87,4.61,2.48,21.5,86,1.7,.65,.47,.86,7.65,.54,1.86,625
149 | 3,13.32,3.24,2.38,21.5,92,1.93,.76,.45,1.25,8.42,.55,1.62,650
150 | 3,13.08,3.9,2.36,21.5,113,1.41,1.39,.34,1.14,9.40,.57,1.33,550
151 | 3,13.5,3.12,2.62,24,123,1.4,1.57,.22,1.25,8.60,.59,1.3,500
152 | 3,12.79,2.67,2.48,22,112,1.48,1.36,.24,1.26,10.8,.48,1.47,480
153 | 3,13.11,1.9,2.75,25.5,116,2.2,1.28,.26,1.56,7.1,.61,1.33,425
154 | 3,13.23,3.3,2.28,18.5,98,1.8,.83,.61,1.87,10.52,.56,1.51,675
155 | 3,12.58,1.29,2.1,20,103,1.48,.58,.53,1.4,7.6,.58,1.55,640
156 | 3,13.17,5.19,2.32,22,93,1.74,.63,.61,1.55,7.9,.6,1.48,725
157 | 3,13.84,4.12,2.38,19.5,89,1.8,.83,.48,1.56,9.01,.57,1.64,480
158 | 3,12.45,3.03,2.64,27,97,1.9,.58,.63,1.14,7.5,.67,1.73,880
159 | 3,14.34,1.68,2.7,25,98,2.8,1.31,.53,2.7,13,.57,1.96,660
160 | 3,13.48,1.67,2.64,22.5,89,2.6,1.1,.52,2.29,11.75,.57,1.78,620
161 | 3,12.36,3.83,2.38,21,88,2.3,.92,.5,1.04,7.65,.56,1.58,520
162 | 3,13.69,3.26,2.54,20,107,1.83,.56,.5,.8,5.88,.96,1.82,680
163 | 3,12.85,3.27,2.58,22,106,1.65,.6,.6,.96,5.58,.87,2.11,570
164 | 3,12.96,3.45,2.35,18.5,106,1.39,.7,.4,.94,5.28,.68,1.75,675
165 | 3,13.78,2.76,2.3,22,90,1.35,.68,.41,1.03,9.58,.7,1.68,615
166 | 3,13.73,4.36,2.26,22.5,88,1.28,.47,.52,1.15,6.62,.78,1.75,520
167 | 3,13.45,3.7,2.6,23,111,1.7,.92,.43,1.46,10.68,.85,1.56,695
168 | 3,12.82,3.37,2.3,19.5,88,1.48,.66,.4,.97,10.26,.72,1.75,685
169 | 3,13.58,2.58,2.69,24.5,105,1.55,.84,.39,1.54,8.66,.74,1.8,750
170 | 3,13.4,4.6,2.86,25,112,1.98,.96,.27,1.11,8.5,.67,1.92,630
171 | 3,12.2,3.03,2.32,19,96,1.25,.49,.4,.73,5.5,.66,1.83,510
172 | 3,12.77,2.39,2.28,19.5,86,1.39,.51,.48,.64,9.899999,.57,1.63,470
173 | 3,14.16,2.51,2.48,20,91,1.68,.7,.44,1.24,9.7,.62,1.71,660
174 | 3,13.71,5.65,2.45,20.5,95,1.68,.61,.52,1.06,7.7,.64,1.74,740
175 | 3,13.4,3.91,2.48,23,102,1.8,.75,.43,1.41,7.3,.7,1.56,750
176 | 3,13.27,4.28,2.26,20,120,1.59,.69,.43,1.35,10.2,.59,1.56,835
177 | 3,13.17,2.59,2.37,20,120,1.65,.68,.53,1.46,9.3,.6,1.62,840
178 | 3,14.13,4.1,2.74,24.5,96,2.05,.76,.56,1.35,9.2,.61,1.6,560
179 | 


--------------------------------------------------------------------------------
/report-template/ieee.bst:
--------------------------------------------------------------------------------
   1 | 
   2 | % ---------------------------------------------------------------
   3 | %
   4 | % ieee.bst,v 1.0 2002/04/16
   5 | %
   6 | % by Glenn Paulley (paulley@acm.org)
   7 | %
   8 | % Modified from latex8.bst 1995/09/15 15:13:49 ienne Exp $
   9 | %
  10 | % by Paolo.Ienne@di.epfl.ch
  11 | %
  12 | %
  13 | % ---------------------------------------------------------------
  14 | %
  15 | % no guarantee is given that the format corresponds perfectly to 
  16 | % IEEE 8.5" x 11" Proceedings, but most features should be ok.
  17 | %
  18 | % ---------------------------------------------------------------
  19 | %
  20 | % `ieee' from BibTeX standard bibliography style `abbrv'
  21 | % version 0.99a for BibTeX versions 0.99a or later, LaTeX version 2.09.
  22 | % Copyright (C) 1985, all rights reserved.
  23 | % Copying of this file is authorized only if either
  24 | % (1) you make absolutely no changes to your copy, including name, or
  25 | % (2) if you do make changes, you name it something other than
  26 | % btxbst.doc, plain.bst, unsrt.bst, alpha.bst, and abbrv.bst.
  27 | % This restriction helps ensure that all standard styles are identical.
  28 | % The file btxbst.doc has the documentation for this style.
  29 | 
  30 | ENTRY
  31 |   { address
  32 |     author
  33 |     booktitle
  34 |     chapter
  35 |     edition
  36 |     editor
  37 |     howpublished
  38 |     institution
  39 |     journal
  40 |     key
  41 |     month
  42 |     note
  43 |     number
  44 |     organization
  45 |     pages
  46 |     publisher
  47 |     school
  48 |     series
  49 |     title
  50 |     type
  51 |     volume
  52 |     year
  53 |   }
  54 |   {}
  55 |   { label }
  56 | 
  57 | INTEGERS { output.state before.all mid.sentence after.sentence after.block }
  58 | 
  59 | FUNCTION {init.state.consts}
  60 | { #0 'before.all :=
  61 |   #1 'mid.sentence :=
  62 |   #2 'after.sentence :=
  63 |   #3 'after.block :=
  64 | }
  65 | 
  66 | STRINGS { s t }
  67 | 
  68 | FUNCTION {output.nonnull}
  69 | { 's :=
  70 |   output.state mid.sentence =
  71 |     { ", " * write$ }
  72 |     { output.state after.block =
  73 |  { add.period$ write$
  74 |    newline$
  75 |    "\newblock " write$
  76 |  }
  77 |  { output.state before.all =
  78 |      'write$
  79 |      { add.period$ " " * write$ }
  80 |    if$
  81 |  }
  82 |       if$
  83 |       mid.sentence 'output.state :=
  84 |     }
  85 |   if$
  86 |   s
  87 | }
  88 | 
  89 | FUNCTION {output}
  90 | { duplicate$ empty$
  91 |     'pop$
  92 |     'output.nonnull
  93 |   if$
  94 | }
  95 | 
  96 | FUNCTION {output.check}
  97 | { 't :=
  98 |   duplicate$ empty$
  99 |     { pop$ "empty " t * " in " * cite$ * warning$ }
 100 |     'output.nonnull
 101 |   if$
 102 | }
 103 | 
 104 | FUNCTION {output.bibitem}
 105 | { newline$
 106 |   "\bibitem{" write$
 107 |   cite$ write$
 108 |   "}" write$
 109 |   newline$
 110 |   ""
 111 |   before.all 'output.state :=
 112 | }
 113 | 
 114 | FUNCTION {fin.entry}
 115 | { add.period$
 116 |   write$
 117 |   newline$
 118 | }
 119 | 
 120 | FUNCTION {new.block}
 121 | { output.state before.all =
 122 |     'skip$
 123 |     { after.block 'output.state := }
 124 |   if$
 125 | }
 126 | 
 127 | FUNCTION {new.sentence}
 128 | { output.state after.block =
 129 |     'skip$
 130 |     { output.state before.all =
 131 |  'skip$
 132 |  { after.sentence 'output.state := }
 133 |       if$
 134 |     }
 135 |   if$
 136 | }
 137 | 
 138 | FUNCTION {not}
 139 | {   { #0 }
 140 |     { #1 }
 141 |   if$
 142 | }
 143 | 
 144 | FUNCTION {and}
 145 | {   'skip$
 146 |     { pop$ #0 }
 147 |   if$
 148 | }
 149 | 
 150 | FUNCTION {or}
 151 | {   { pop$ #1 }
 152 |     'skip$
 153 |   if$
 154 | }
 155 | 
 156 | FUNCTION {new.block.checka}
 157 | { empty$
 158 |     'skip$
 159 |     'new.block
 160 |   if$
 161 | }
 162 | 
 163 | FUNCTION {new.block.checkb}
 164 | { empty$
 165 |   swap$ empty$
 166 |   and
 167 |     'skip$
 168 |     'new.block
 169 |   if$
 170 | }
 171 | 
 172 | FUNCTION {new.sentence.checka}
 173 | { empty$
 174 |     'skip$
 175 |     'new.sentence
 176 |   if$
 177 | }
 178 | 
 179 | FUNCTION {new.sentence.checkb}
 180 | { empty$
 181 |   swap$ empty$
 182 |   and
 183 |     'skip$
 184 |     'new.sentence
 185 |   if$
 186 | }
 187 | 
 188 | FUNCTION {field.or.null}
 189 | { duplicate$ empty$
 190 |     { pop$ "" }
 191 |     'skip$
 192 |   if$
 193 | }
 194 | 
 195 | FUNCTION {emphasize}
 196 | { duplicate$ empty$
 197 |     { pop$ "" }
 198 |     { "{\em " swap$ * "}" * }
 199 |   if$
 200 | }
 201 | 
 202 | INTEGERS { nameptr namesleft numnames }
 203 | 
 204 | FUNCTION {format.names}
 205 | { 's :=
 206 |   #1 'nameptr :=
 207 |   s num.names$ 'numnames :=
 208 |   numnames 'namesleft :=
 209 |     { namesleft #0 > }
 210 |     { s nameptr "{f.~}{vv~}{ll}{, jj}" format.name$ 't :=
 211 |       nameptr #1 >
 212 |  { namesleft #1 >
 213 |      { ", " * t * }
 214 |      { numnames #2 >
 215 |   { "," * }
 216 |   'skip$
 217 |        if$
 218 |        t "others" =
 219 |   { " et~al." * }
 220 |   { " and " * t * }
 221 |        if$
 222 |      }
 223 |    if$
 224 |  }
 225 |  't
 226 |       if$
 227 |       nameptr #1 + 'nameptr :=
 228 | 
 229 |       namesleft #1 - 'namesleft :=
 230 |     }
 231 |   while$
 232 | }
 233 | 
 234 | FUNCTION {format.authors}
 235 | { author empty$
 236 |     { "" }
 237 |     { author format.names }
 238 |   if$
 239 | }
 240 | 
 241 | FUNCTION {format.editors}
 242 | { editor empty$
 243 |     { "" }
 244 |     { editor format.names
 245 |       editor num.names$ #1 >
 246 |  { ", editors" * }
 247 |  { ", editor" * }
 248 |       if$
 249 |     }
 250 |   if$
 251 | }
 252 | 
 253 | FUNCTION {format.title}
 254 | { title empty$
 255 |     { "" }
 256 |     { title "t" change.case$ }
 257 |   if$
 258 | }
 259 | 
 260 | FUNCTION {n.dashify}
 261 | { 't :=
 262 |   ""
 263 |     { t empty$ not }
 264 |     { t #1 #1 substring$ "-" =
 265 |  { t #1 #2 substring$ "--" = not
 266 |      { "--" *
 267 |        t #2 global.max$ substring$ 't :=
 268 |      }
 269 |      {   { t #1 #1 substring$ "-" = }
 270 |   { "-" *
 271 |     t #2 global.max$ substring$ 't :=
 272 |   }
 273 |        while$
 274 |      }
 275 |    if$
 276 |  }
 277 |  { t #1 #1 substring$ *
 278 |    t #2 global.max$ substring$ 't :=
 279 |  }
 280 |       if$
 281 |     }
 282 |   while$
 283 | }
 284 | 
 285 | FUNCTION {format.date}
 286 | { year empty$
 287 |     { month empty$
 288 |  { "" }
 289 |  { "there's a month but no year in " cite$ * warning$
 290 |    month
 291 |  }
 292 |       if$
 293 |     }
 294 |     { month empty$
 295 |  'year
 296 |  { month " " * year * }
 297 |       if$
 298 |     }
 299 |   if$
 300 | }
 301 | 
 302 | FUNCTION {format.btitle}
 303 | { title emphasize
 304 | }
 305 | 
 306 | FUNCTION {tie.or.space.connect}
 307 | { duplicate$ text.length$ #3 <
 308 |     { "~" }
 309 |     { " " }
 310 |   if$
 311 |   swap$ * *
 312 | }
 313 | 
 314 | FUNCTION {either.or.check}
 315 | { empty$
 316 |     'pop$
 317 |     { "can't use both " swap$ * " fields in " * cite$ * warning$ }
 318 |   if$
 319 | }
 320 | 
 321 | FUNCTION {format.bvolume}
 322 | { volume empty$
 323 |     { "" }
 324 |     { "volume" volume tie.or.space.connect
 325 |       series empty$
 326 |  'skip$
 327 |  { " of " * series emphasize * }
 328 |       if$
 329 |       "volume and number" number either.or.check
 330 |     }
 331 |   if$
 332 | }
 333 | 
 334 | FUNCTION {format.number.series}
 335 | { volume empty$
 336 |     { number empty$
 337 |  { series field.or.null }
 338 |  { output.state mid.sentence =
 339 |      { "number" }
 340 |      { "Number" }
 341 |    if$
 342 |    number tie.or.space.connect
 343 |    series empty$
 344 |      { "there's a number but no series in " cite$ * warning$ }
 345 |      { " in " * series * }
 346 |    if$
 347 |  }
 348 |       if$
 349 |     }
 350 |     { "" }
 351 |   if$
 352 | }
 353 | 
 354 | FUNCTION {format.edition}
 355 | { edition empty$
 356 |     { "" }
 357 |     { output.state mid.sentence =
 358 |  { edition "l" change.case$ " edition" * }
 359 |  { edition "t" change.case$ " edition" * }
 360 |       if$
 361 |     }
 362 |   if$
 363 | }
 364 | 
 365 | INTEGERS { multiresult }
 366 | 
 367 | FUNCTION {multi.page.check}
 368 | { 't :=
 369 |   #0 'multiresult :=
 370 |     { multiresult not
 371 |       t empty$ not
 372 |       and
 373 |     }
 374 |     { t #1 #1 substring$
 375 |       duplicate$ "-" =
 376 |       swap$ duplicate$ "," =
 377 |       swap$ "+" =
 378 |       or or
 379 |  { #1 'multiresult := }
 380 |  { t #2 global.max$ substring$ 't := }
 381 |       if$
 382 |     }
 383 |   while$
 384 |   multiresult
 385 | }
 386 | 
 387 | FUNCTION {format.pages}
 388 | { pages empty$
 389 |     { "" }
 390 |     { pages multi.page.check
 391 |  { "pages" pages n.dashify tie.or.space.connect }
 392 |  { "page" pages tie.or.space.connect }
 393 |       if$
 394 |     }
 395 |   if$
 396 | }
 397 | 
 398 | FUNCTION {format.vol.num.pages}
 399 | { volume field.or.null
 400 |   number empty$
 401 |     'skip$
 402 |     { "(" number * ")" * *
 403 |       volume empty$
 404 |  { "there's a number but no volume in " cite$ * warning$ }
 405 |  'skip$
 406 |       if$
 407 |     }
 408 |   if$
 409 |   pages empty$
 410 |     'skip$
 411 |     { duplicate$ empty$
 412 |  { pop$ format.pages }
 413 |  { ":" * pages n.dashify * }
 414 |       if$
 415 |     }
 416 |   if$
 417 | }
 418 | 
 419 | FUNCTION {format.chapter.pages}
 420 | { chapter empty$
 421 |     'format.pages
 422 |     { type empty$
 423 |  { "chapter" }
 424 |  { type "l" change.case$ }
 425 |       if$
 426 |       chapter tie.or.space.connect
 427 |       pages empty$
 428 |  'skip$
 429 |  { ", " * format.pages * }
 430 |       if$
 431 |     }
 432 |   if$
 433 | }
 434 | 
 435 | FUNCTION {format.in.ed.booktitle}
 436 | { booktitle empty$
 437 |     { "" }
 438 |     { editor empty$
 439 |  { "In " booktitle emphasize * }
 440 |  { "In " format.editors * ", " * booktitle emphasize * }
 441 |       if$
 442 |     }
 443 |   if$
 444 | }
 445 | 
 446 | FUNCTION {empty.misc.check}
 447 | 
 448 | { author empty$ title empty$ howpublished empty$
 449 |   month empty$ year empty$ note empty$
 450 |   and and and and and
 451 |   key empty$ not and
 452 |     { "all relevant fields are empty in " cite$ * warning$ }
 453 |     'skip$
 454 |   if$
 455 | }
 456 | 
 457 | FUNCTION {format.thesis.type}
 458 | { type empty$
 459 |     'skip$
 460 |     { pop$
 461 |       type "t" change.case$
 462 |     }
 463 |   if$
 464 | }
 465 | 
 466 | FUNCTION {format.tr.number}
 467 | { type empty$
 468 |     { "Technical Report" }
 469 |     'type
 470 |   if$
 471 |   number empty$
 472 |     { "t" change.case$ }
 473 |     { number tie.or.space.connect }
 474 |   if$
 475 | }
 476 | 
 477 | FUNCTION {format.article.crossref}
 478 | { key empty$
 479 |     { journal empty$
 480 |  { "need key or journal for " cite$ * " to crossref " * crossref *
 481 |    warning$
 482 |    ""
 483 |  }
 484 |  { "In {\em " journal * "\/}" * }
 485 |       if$
 486 |     }
 487 |     { "In " key * }
 488 |   if$
 489 |   " \cite{" * crossref * "}" *
 490 | }
 491 | 
 492 | FUNCTION {format.crossref.editor}
 493 | { editor #1 "{vv~}{ll}" format.name$
 494 |   editor num.names$ duplicate$
 495 |   #2 >
 496 |     { pop$ " et~al." * }
 497 |     { #2 <
 498 |  'skip$
 499 |  { editor #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" =
 500 |      { " et~al." * }
 501 |      { " and " * editor #2 "{vv~}{ll}" format.name$ * }
 502 |    if$
 503 |  }
 504 |       if$
 505 |     }
 506 |   if$
 507 | }
 508 | 
 509 | FUNCTION {format.book.crossref}
 510 | { volume empty$
 511 |     { "empty volume in " cite$ * "'s crossref of " * crossref * warning$
 512 |       "In "
 513 |     }
 514 |     { "Volume" volume tie.or.space.connect
 515 |       " of " *
 516 |     }
 517 |   if$
 518 |   editor empty$
 519 |   editor field.or.null author field.or.null =
 520 |   or
 521 |     { key empty$
 522 |  { series empty$
 523 |      { "need editor, key, or series for " cite$ * " to crossref " *
 524 |        crossref * warning$
 525 |        "" *
 526 |      }
 527 |      { "{\em " * series * "\/}" * }
 528 |    if$
 529 |  }
 530 |  { key * }
 531 |       if$
 532 |     }
 533 |     { format.crossref.editor * }
 534 |   if$
 535 |   " \cite{" * crossref * "}" *
 536 | }
 537 | 
 538 | FUNCTION {format.incoll.inproc.crossref}
 539 | { editor empty$
 540 |   editor field.or.null author field.or.null =
 541 |   or
 542 |     { key empty$
 543 |  { booktitle empty$
 544 |      { "need editor, key, or booktitle for " cite$ * " to crossref " *
 545 |        crossref * warning$
 546 |        ""
 547 |      }
 548 |      { "In {\em " booktitle * "\/}" * }
 549 |    if$
 550 |  }
 551 |  { "In " key * }
 552 |       if$
 553 |     }
 554 |     { "In " format.crossref.editor * }
 555 |   if$
 556 |   " \cite{" * crossref * "}" *
 557 | }
 558 | 
 559 | FUNCTION {article}
 560 | { output.bibitem
 561 |   format.authors "author" output.check
 562 |   new.block
 563 |   format.title "title" output.check
 564 |   new.block
 565 |   crossref missing$
 566 |     { journal emphasize "journal" output.check
 567 |       format.vol.num.pages output
 568 |       format.date "year" output.check
 569 |     }
 570 |     { format.article.crossref output.nonnull
 571 |       format.pages output
 572 |     }
 573 |   if$
 574 |   new.block
 575 |   note output
 576 |   fin.entry
 577 | }
 578 | 
 579 | FUNCTION {book}
 580 | { output.bibitem
 581 |   author empty$
 582 |     { format.editors "author and editor" output.check }
 583 |     { format.authors output.nonnull
 584 |       crossref missing$
 585 |  { "author and editor" editor either.or.check }
 586 |  'skip$
 587 |       if$
 588 |     }
 589 |   if$
 590 |   new.block
 591 |   format.btitle "title" output.check
 592 |   crossref missing$
 593 |     { format.bvolume output
 594 |       new.block
 595 |       format.number.series output
 596 |       new.sentence
 597 |       publisher "publisher" output.check
 598 |       address output
 599 |     }
 600 |     { new.block
 601 |       format.book.crossref output.nonnull
 602 |     }
 603 |   if$
 604 |   format.edition output
 605 |   format.date "year" output.check
 606 |   new.block
 607 |   note output
 608 |   fin.entry
 609 | }
 610 | 
 611 | FUNCTION {booklet}
 612 | { output.bibitem
 613 |   format.authors output
 614 |   new.block
 615 |   format.title "title" output.check
 616 |   howpublished address new.block.checkb
 617 |   howpublished output
 618 |   address output
 619 |   format.date output
 620 |   new.block
 621 |   note output
 622 |   fin.entry
 623 | }
 624 | 
 625 | FUNCTION {inbook}
 626 | { output.bibitem
 627 |   author empty$
 628 |     { format.editors "author and editor" output.check }
 629 |     { format.authors output.nonnull
 630 | 
 631 |       crossref missing$
 632 |  { "author and editor" editor either.or.check }
 633 |  'skip$
 634 |       if$
 635 |     }
 636 |   if$
 637 |   new.block
 638 |   format.btitle "title" output.check
 639 |   crossref missing$
 640 |     { format.bvolume output
 641 |       format.chapter.pages "chapter and pages" output.check
 642 |       new.block
 643 |       format.number.series output
 644 |       new.sentence
 645 |       publisher "publisher" output.check
 646 |       address output
 647 |     }
 648 |     { format.chapter.pages "chapter and pages" output.check
 649 |       new.block
 650 |       format.book.crossref output.nonnull
 651 |     }
 652 |   if$
 653 |   format.edition output
 654 |   format.date "year" output.check
 655 |   new.block
 656 |   note output
 657 |   fin.entry
 658 | }
 659 | 
 660 | FUNCTION {incollection}
 661 | { output.bibitem
 662 |   format.authors "author" output.check
 663 |   new.block
 664 |   format.title "title" output.check
 665 |   new.block
 666 |   crossref missing$
 667 |     { format.in.ed.booktitle "booktitle" output.check
 668 |       format.bvolume output
 669 |       format.number.series output
 670 |       format.chapter.pages output
 671 |       new.sentence
 672 |       publisher "publisher" output.check
 673 |       address output
 674 |       format.edition output
 675 |       format.date "year" output.check
 676 |     }
 677 |     { format.incoll.inproc.crossref output.nonnull
 678 |       format.chapter.pages output
 679 |     }
 680 |   if$
 681 |   new.block
 682 |   note output
 683 |   fin.entry
 684 | }
 685 | 
 686 | FUNCTION {inproceedings}
 687 | { output.bibitem
 688 |   format.authors "author" output.check
 689 |   new.block
 690 |   format.title "title" output.check
 691 |   new.block
 692 |   crossref missing$
 693 |     { format.in.ed.booktitle "booktitle" output.check
 694 |       format.bvolume output
 695 |       format.number.series output
 696 |       format.pages output
 697 |       address empty$
 698 |  { organization publisher new.sentence.checkb
 699 |    organization output
 700 |    publisher output
 701 |    format.date "year" output.check
 702 |  }
 703 |  { address output.nonnull
 704 |    format.date "year" output.check
 705 |    new.sentence
 706 |    organization output
 707 |    publisher output
 708 |  }
 709 |       if$
 710 |     }
 711 |     { format.incoll.inproc.crossref output.nonnull
 712 |       format.pages output
 713 |     }
 714 |   if$
 715 |   new.block
 716 |   note output
 717 |   fin.entry
 718 | }
 719 | 
 720 | FUNCTION {conference} { inproceedings }
 721 | 
 722 | FUNCTION {manual}
 723 | { output.bibitem
 724 |   author empty$
 725 |     { organization empty$
 726 |  'skip$
 727 |  { organization output.nonnull
 728 |    address output
 729 |  }
 730 |       if$
 731 |     }
 732 |     { format.authors output.nonnull }
 733 |   if$
 734 |   new.block
 735 |   format.btitle "title" output.check
 736 |   author empty$
 737 |     { organization empty$
 738 |  { address new.block.checka
 739 |    address output
 740 |  }
 741 |  'skip$
 742 |       if$
 743 |     }
 744 |     { organization address new.block.checkb
 745 |       organization output
 746 |       address output
 747 |     }
 748 |   if$
 749 |   format.edition output
 750 |   format.date output
 751 |   new.block
 752 |   note output
 753 |   fin.entry
 754 | }
 755 | 
 756 | FUNCTION {mastersthesis}
 757 | { output.bibitem
 758 |   format.authors "author" output.check
 759 |   new.block
 760 |   format.title "title" output.check
 761 |   new.block
 762 |   "Master's thesis" format.thesis.type output.nonnull
 763 |   school "school" output.check
 764 |   address output
 765 |   format.date "year" output.check
 766 |   new.block
 767 |   note output
 768 |   fin.entry
 769 | }
 770 | 
 771 | FUNCTION {misc}
 772 | { output.bibitem
 773 |   format.authors output
 774 |   title howpublished new.block.checkb
 775 |   format.title output
 776 |   howpublished new.block.checka
 777 |   howpublished output
 778 |   format.date output
 779 |   new.block
 780 |   note output
 781 |   fin.entry
 782 |   empty.misc.check
 783 | }
 784 | 
 785 | FUNCTION {phdthesis}
 786 | { output.bibitem
 787 |   format.authors "author" output.check
 788 |   new.block
 789 |   format.btitle "title" output.check
 790 |   new.block
 791 |   "PhD thesis" format.thesis.type output.nonnull
 792 |   school "school" output.check
 793 |   address output
 794 |   format.date "year" output.check
 795 |   new.block
 796 |   note output
 797 |   fin.entry
 798 | }
 799 | 
 800 | FUNCTION {proceedings}
 801 | { output.bibitem
 802 |   editor empty$
 803 |     { organization output }
 804 |     { format.editors output.nonnull }
 805 | 
 806 |   if$
 807 |   new.block
 808 |   format.btitle "title" output.check
 809 |   format.bvolume output
 810 |   format.number.series output
 811 |   address empty$
 812 |     { editor empty$
 813 |  { publisher new.sentence.checka }
 814 |  { organization publisher new.sentence.checkb
 815 |    organization output
 816 |  }
 817 |       if$
 818 |       publisher output
 819 |       format.date "year" output.check
 820 |     }
 821 |     { address output.nonnull
 822 |       format.date "year" output.check
 823 |       new.sentence
 824 |       editor empty$
 825 |  'skip$
 826 |  { organization output }
 827 |       if$
 828 |       publisher output
 829 |     }
 830 |   if$
 831 |   new.block
 832 |   note output
 833 |   fin.entry
 834 | }
 835 | 
 836 | FUNCTION {techreport}
 837 | { output.bibitem
 838 |   format.authors "author" output.check
 839 |   new.block
 840 |   format.title "title" output.check
 841 |   new.block
 842 |   format.tr.number output.nonnull
 843 |   institution "institution" output.check
 844 |   address output
 845 |   format.date "year" output.check
 846 |   new.block
 847 |   note output
 848 |   fin.entry
 849 | }
 850 | 
 851 | FUNCTION {unpublished}
 852 | { output.bibitem
 853 |   format.authors "author" output.check
 854 |   new.block
 855 |   format.title "title" output.check
 856 |   new.block
 857 |   note "note" output.check
 858 |   format.date output
 859 |   fin.entry
 860 | }
 861 | 
 862 | FUNCTION {default.type} { misc }
 863 | 
 864 | MACRO {jan} {"Jan."}
 865 | 
 866 | MACRO {feb} {"Feb."}
 867 | 
 868 | MACRO {mar} {"Mar."}
 869 | 
 870 | MACRO {apr} {"Apr."}
 871 | 
 872 | MACRO {may} {"May"}
 873 | 
 874 | MACRO {jun} {"June"}
 875 | 
 876 | MACRO {jul} {"July"}
 877 | 
 878 | MACRO {aug} {"Aug."}
 879 | 
 880 | MACRO {sep} {"Sept."}
 881 | 
 882 | MACRO {oct} {"Oct."}
 883 | 
 884 | MACRO {nov} {"Nov."}
 885 | 
 886 | MACRO {dec} {"Dec."}
 887 | 
 888 | MACRO {acmcs} {"ACM Comput. Surv."}
 889 | 
 890 | MACRO {acta} {"Acta Inf."}
 891 | 
 892 | MACRO {cacm} {"Commun. ACM"}
 893 | 
 894 | MACRO {ibmjrd} {"IBM J. Res. Dev."}
 895 | 
 896 | MACRO {ibmsj} {"IBM Syst.~J."}
 897 | 
 898 | MACRO {ieeese} {"IEEE Trans. Softw. Eng."}
 899 | 
 900 | MACRO {ieeetc} {"IEEE Trans. Comput."}
 901 | 
 902 | MACRO {ieeetcad}
 903 |  {"IEEE Trans. Comput.-Aided Design Integrated Circuits"}
 904 | 
 905 | MACRO {ipl} {"Inf. Process. Lett."}
 906 | 
 907 | MACRO {jacm} {"J.~ACM"}
 908 | 
 909 | MACRO {jcss} {"J.~Comput. Syst. Sci."}
 910 | 
 911 | MACRO {scp} {"Sci. Comput. Programming"}
 912 | 
 913 | MACRO {sicomp} {"SIAM J. Comput."}
 914 | 
 915 | MACRO {tocs} {"ACM Trans. Comput. Syst."}
 916 | 
 917 | MACRO {tods} {"ACM Trans. Database Syst."}
 918 | 
 919 | MACRO {tog} {"ACM Trans. Gr."}
 920 | 
 921 | MACRO {toms} {"ACM Trans. Math. Softw."}
 922 | 
 923 | MACRO {toois} {"ACM Trans. Office Inf. Syst."}
 924 | 
 925 | MACRO {toplas} {"ACM Trans. Prog. Lang. Syst."}
 926 | 
 927 | MACRO {tcs} {"Theoretical Comput. Sci."}
 928 | 
 929 | READ
 930 | 
 931 | FUNCTION {sortify}
 932 | { purify$
 933 |   "l" change.case$
 934 | }
 935 | 
 936 | INTEGERS { len }
 937 | 
 938 | FUNCTION {chop.word}
 939 | { 's :=
 940 |   'len :=
 941 |   s #1 len substring$ =
 942 |     { s len #1 + global.max$ substring$ }
 943 |     's
 944 |   if$
 945 | }
 946 | 
 947 | FUNCTION {sort.format.names}
 948 | { 's :=
 949 |   #1 'nameptr :=
 950 |   ""
 951 |   s num.names$ 'numnames :=
 952 |   numnames 'namesleft :=
 953 |     { namesleft #0 > }
 954 |     { nameptr #1 >
 955 |  { "   " * }
 956 |  'skip$
 957 |       if$
 958 |       s nameptr "{vv{ } }{ll{ }}{  f{ }}{  jj{ }}" format.name$ 't :=
 959 |       nameptr numnames = t "others" = and
 960 |  { "et al" * }
 961 |  { t sortify * }
 962 |       if$
 963 |       nameptr #1 + 'nameptr :=
 964 |       namesleft #1 - 'namesleft :=
 965 |     }
 966 |   while$
 967 | }
 968 | 
 969 | FUNCTION {sort.format.title}
 970 | { 't :=
 971 |   "A " #2
 972 |     "An " #3
 973 |       "The " #4 t chop.word
 974 |     chop.word
 975 |   chop.word
 976 |   sortify
 977 |   #1 global.max$ substring$
 978 | }
 979 | 
 980 | FUNCTION {author.sort}
 981 | { author empty$
 982 |     { key empty$
 983 |  { "to sort, need author or key in " cite$ * warning$
 984 |    ""
 985 |  }
 986 |  { key sortify }
 987 |       if$
 988 |     }
 989 |     { author sort.format.names }
 990 |   if$
 991 | }
 992 | 
 993 | FUNCTION {author.editor.sort}
 994 | { author empty$
 995 |     { editor empty$
 996 |  { key empty$
 997 |      { "to sort, need author, editor, or key in " cite$ * warning$
 998 |        ""
 999 |      }
1000 |      { key sortify }
1001 |    if$
1002 |  }
1003 |  { editor sort.format.names }
1004 |       if$
1005 |     }
1006 |     { author sort.format.names }
1007 |   if$
1008 | }
1009 | 
1010 | FUNCTION {author.organization.sort}
1011 | { author empty$
1012 | 
1013 |     { organization empty$
1014 |  { key empty$
1015 |      { "to sort, need author, organization, or key in " cite$ * warning$
1016 |        ""
1017 |      }
1018 |      { key sortify }
1019 |    if$
1020 |  }
1021 |  { "The " #4 organization chop.word sortify }
1022 |       if$
1023 |     }
1024 |     { author sort.format.names }
1025 |   if$
1026 | }
1027 | 
1028 | FUNCTION {editor.organization.sort}
1029 | { editor empty$
1030 |     { organization empty$
1031 |  { key empty$
1032 |      { "to sort, need editor, organization, or key in " cite$ * warning$
1033 |        ""
1034 |      }
1035 |      { key sortify }
1036 |    if$
1037 |  }
1038 |  { "The " #4 organization chop.word sortify }
1039 |       if$
1040 |     }
1041 |     { editor sort.format.names }
1042 |   if$
1043 | }
1044 | 
1045 | FUNCTION {presort}
1046 | { type$ "book" =
1047 |   type$ "inbook" =
1048 |   or
1049 |     'author.editor.sort
1050 |     { type$ "proceedings" =
1051 |  'editor.organization.sort
1052 |  { type$ "manual" =
1053 |      'author.organization.sort
1054 |      'author.sort
1055 |    if$
1056 |  }
1057 |       if$
1058 |     }
1059 |   if$
1060 |   "    "
1061 |   *
1062 |   year field.or.null sortify
1063 |   *
1064 |   "    "
1065 |   *
1066 |   title field.or.null
1067 |   sort.format.title
1068 |   *
1069 |   #1 entry.max$ substring$
1070 |   'sort.key$ :=
1071 | }
1072 | 
1073 | ITERATE {presort}
1074 | 
1075 | SORT
1076 | 
1077 | STRINGS { longest.label }
1078 | 
1079 | INTEGERS { number.label longest.label.width }
1080 | 
1081 | FUNCTION {initialize.longest.label}
1082 | { "" 'longest.label :=
1083 |   #1 'number.label :=
1084 |   #0 'longest.label.width :=
1085 | }
1086 | 
1087 | FUNCTION {longest.label.pass}
1088 | { number.label int.to.str$ 'label :=
1089 |   number.label #1 + 'number.label :=
1090 |   label width$ longest.label.width >
1091 |     { label 'longest.label :=
1092 |       label width$ 'longest.label.width :=
1093 |     }
1094 |     'skip$
1095 |   if$
1096 | }
1097 | 
1098 | EXECUTE {initialize.longest.label}
1099 | 
1100 | ITERATE {longest.label.pass}
1101 | 
1102 | FUNCTION {begin.bib}
1103 | { preamble$ empty$
1104 |     'skip$
1105 |     { preamble$ write$ newline$ }
1106 |   if$
1107 |   "\begin{thebibliography}{"  longest.label  * "}" *
1108 |   "\itemsep=-1pt" * % Compact the entries a little.
1109 |   write$ newline$
1110 | }
1111 | 
1112 | EXECUTE {begin.bib}
1113 | 
1114 | EXECUTE {init.state.consts}
1115 | 
1116 | ITERATE {call.type$}
1117 | 
1118 | FUNCTION {end.bib}
1119 | { newline$
1120 |   "\end{thebibliography}" write$ newline$
1121 | }
1122 | 
1123 | EXECUTE {end.bib}
1124 | 
1125 | % end of file ieee.bst
1126 | % ---------------------------------------------------------------
1127 | 
1128 | 
1129 | 
1130 | 


--------------------------------------------------------------------------------
/hw_02/hw02.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "# Problem Set 2"
   8 |    ]
   9 |   },
  10 |   {
  11 |    "cell_type": "markdown",
  12 |    "metadata": {},
  13 |    "source": [
  14 |     "STAT 479: Machine Learning (Fall 2018)  \n",
  15 |     "Instructor: Sebastian Raschka (sraschka@wisc.edu)  \n",
  16 |     "Course website: http://pages.stat.wisc.edu/~sraschka/teaching/stat479-fs2018/\n",
  17 |     "\n",
  18 |     "**Due**: Nov 08, before class (before 8:00 am).\n",
  19 |     "\n",
  20 |     "**How to submit**\n",
  21 |     "\n",
  22 |     "As mentioned in the lecture, you need to submit the `.ipynb` file with your answers plus an `.html` file, which will serve as a backup for us in case the `.ipynb` file cannot be opened on my or the TA's computer. In addition, you may also export the notebook as PDF and upload it as well.\n",
  23 |     "\n",
  24 |     "This time, we will be using the Canvas platform, so you need to submit your homework there. You should be able to resubmit the homework as many times as you like before the due date."
  25 |    ]
  26 |   },
  27 |   {
  28 |    "cell_type": "markdown",
  29 |    "metadata": {},
  30 |    "source": [
  31 |     "**You are highly encouraged to use Piazza to ask questions and help each other while working on the homework. However, do not share any solutions with other students as this would be a violation of the Academic Integrity guidelines (for more info, see http://pages.stat.wisc.edu/~sraschka/teaching/stat479-fs2018/#other-important-course-information)**\n",
  32 |     "\n",
  33 |     "\n",
  34 |     "For example, a resonable question & answer would be:\n",
  35 |     "\n",
  36 |     "- Q: When I am asked to implement the code for majority voting, my code produces an array that has the wrong dimensions (I get the following dimensions ...). \n",
  37 |     "- A: Hm, I suspect you compute the `argmax` over rows, not columns. Maybe check that you specify the correct dimension for the `axis` parameter in the `argmax` function.\n",
  38 |     "\n",
  39 |     "Not ok would be:\n",
  40 |     "\n",
  41 |     "- Q: Here is my code and solution for exercise XXX. Is this correct? "
  42 |    ]
  43 |   },
  44 |   {
  45 |    "cell_type": "code",
  46 |    "execution_count": 1,
  47 |    "metadata": {},
  48 |    "outputs": [
  49 |     {
  50 |      "name": "stdout",
  51 |      "output_type": "stream",
  52 |      "text": [
  53 |       "<Your Name> \n",
  54 |       "last updated: 2018-10-21 \n",
  55 |       "\n",
  56 |       "CPython 3.6.6\n",
  57 |       "IPython 6.5.0\n",
  58 |       "\n",
  59 |       "numpy 1.15.1\n",
  60 |       "scipy 1.1.0\n",
  61 |       "matplotlib 2.2.3\n",
  62 |       "sklearn 0.20.0\n"
  63 |      ]
  64 |     }
  65 |    ],
  66 |    "source": [
  67 |     "%load_ext watermark\n",
  68 |     "%watermark  -d -u -a '<Your Name>' -v -p numpy,scipy,matplotlib,sklearn"
  69 |    ]
  70 |   },
  71 |   {
  72 |    "cell_type": "code",
  73 |    "execution_count": 2,
  74 |    "metadata": {},
  75 |    "outputs": [],
  76 |    "source": [
  77 |     "import numpy as np"
  78 |    ]
  79 |   },
  80 |   {
  81 |    "cell_type": "markdown",
  82 |    "metadata": {},
  83 |    "source": [
  84 |     "<br>\n",
  85 |     "<br>\n",
  86 |     "<br>\n",
  87 |     "<br>\n",
  88 |     "<br>\n",
  89 |     "<br>"
  90 |    ]
  91 |   },
  92 |   {
  93 |    "cell_type": "markdown",
  94 |    "metadata": {},
  95 |    "source": [
  96 |     "## 1) Implementing an ID3 Decision Tree"
  97 |    ]
  98 |   },
  99 |   {
 100 |    "cell_type": "markdown",
 101 |    "metadata": {},
 102 |    "source": [
 103 |     "In this first part of the homework, you are going to implement the ID3 decision tree algorithm we discussed in class. This decision tree algorithm will support multi-category splits, but just like the original ID3 algorithm, it will only support categorical feature values for simplicity. Here, categorical feature values will be represented by integer numbers. \n",
 104 |     "\n",
 105 |     "\n",
 106 |     "Implementing machine learning algorithms from scratch is a very important skill, and this homework will provide exercises that will help you to develop this skill. Even if you are interested in the more theoretical aspects of machine learning, being comfortable with implementing and trying out algorithms is vital for doing research, since even the more theoretical papers in machine learning are usually accompanied by experiments or simulations to a) verify results and b) to compare algorithms with the state-of-the art.\n",
 107 |     "\n",
 108 |     "Since many students are not expert Python programmers (yet), I will provide partial solutions to the homework tasks such that you have a framework or guide to implement the solutions. Areas that you need to fill in will be marked with comments (e.g., `# your code`). For these partial solutions, I first implemented the functions myself, and then I deleted parts you need to fill in by these comments. However, note that you can, of course, use more or fewer lines of code than I did. In other words, all that matter is that the function you write can create the same outputs as the ones I provide. How many lines of code you need to implement that function, and how efficient it is, does not matter here. The expected outputs for the respective functions will be provided so that you can double-check your solutions. "
 109 |    ]
 110 |   },
 111 |   {
 112 |    "cell_type": "markdown",
 113 |    "metadata": {},
 114 |    "source": [
 115 |     "### 1.1) Splitting a node (10 pts)"
 116 |    ]
 117 |   },
 118 |   {
 119 |    "cell_type": "markdown",
 120 |    "metadata": {},
 121 |    "source": [
 122 |     "First, we are going to implement a function that splits a dataset along a feature axis into sub-datasets. Since we are going to implement a decision tree that only supports categorical features (like ID3) for simplicity, you do not need to account for continuous feature variables. In other words, the splitting function only needs to support integer NumPy arrays.  \n",
 123 |     "\n",
 124 |     "To provide an intuitive example, suppose you are given the following NumPy array with four feature values, feature values 0-3:\n",
 125 |     "\n",
 126 |     "    np.array([0, 1, 2, 1, 0, 3, 1, 0, 1, 2])\n",
 127 |     "    \n",
 128 |     "The function you are going to implement should return a dictionary, where each dictionary key represents a unique value in the array, and the values are the indices in that array that map to the respective feature value. Hence, based on the feature array above, your `split` function should return the following dictionary:\n",
 129 |     "\n",
 130 |     "    {0: array([0, 4, 7]), \n",
 131 |     "     1: array([1, 3, 6, 8]), \n",
 132 |     "     2: array([2, 9]), \n",
 133 |     "     3: array([5])}"
 134 |    ]
 135 |   },
 136 |   {
 137 |    "cell_type": "markdown",
 138 |    "metadata": {},
 139 |    "source": [
 140 |     "Tip: I recommend you to use `np.where` and `np.unique` functions to make the implementation easier. If you do not remember these functions from the \"computational foundations\" lectures, you can either look up those functions in the NumPy documentation online, or you can execute `np.where?` and `np.unique?` in a new code cell to get more information."
 141 |    ]
 142 |   },
 143 |   {
 144 |    "cell_type": "code",
 145 |    "execution_count": null,
 146 |    "metadata": {},
 147 |    "outputs": [],
 148 |    "source": [
 149 |     "def split(array):\n",
 150 |     "    # your code to generate dictionary\n",
 151 |     "    return # return the dictionary variable"
 152 |    ]
 153 |   },
 154 |   {
 155 |    "cell_type": "markdown",
 156 |    "metadata": {},
 157 |    "source": [
 158 |     "I added the following code cell for your convenience to double-check your solution. If your results don't match the results shown below, there is a bug in your implementation."
 159 |    ]
 160 |   },
 161 |   {
 162 |    "cell_type": "code",
 163 |    "execution_count": 4,
 164 |    "metadata": {},
 165 |    "outputs": [
 166 |     {
 167 |      "name": "stdout",
 168 |      "output_type": "stream",
 169 |      "text": [
 170 |       "{0: array([0]), 1: array([1]), 2: array([2])}\n",
 171 |       "{0: array([1, 3, 4, 6]), 1: array([0, 2, 5])}\n",
 172 |       "{0: array([1, 4]), 1: array([0, 5, 6]), 2: array([3]), 3: array([2])}\n"
 173 |      ]
 174 |     }
 175 |    ],
 176 |    "source": [
 177 |     "# DO NOT EDIT OR DELETE THIS CELL\n",
 178 |     "\n",
 179 |     "print(split(np.array([0, 1, 2])))\n",
 180 |     "print(split(np.array([1, 0, 1, 0, 0, 1, 0])))\n",
 181 |     "print(split(np.array([1, 0, 3, 2, 0, 1, 1])))"
 182 |    ]
 183 |   },
 184 |   {
 185 |    "cell_type": "markdown",
 186 |    "metadata": {},
 187 |    "source": [
 188 |     "###  1.2) Implement Entropy (10 pts)"
 189 |    ]
 190 |   },
 191 |   {
 192 |    "cell_type": "markdown",
 193 |    "metadata": {},
 194 |    "source": [
 195 |     "After implementing the splitting function, we are now have to implement a criterion function so that we can compare splits on different features, to decide which feature is the best feature to split for growing the decision tree. As discussed in class, our splitting criterion will be Information Gain. However, before we implement an Information Gain function, we need to implement a function that computes the entropy at each node, which we need to compute Information Gain.\n",
 196 |     "\n",
 197 |     "For your reference, we defined entropy (i.e., Shannon Entropy) as follows:\n",
 198 |     "\n",
 199 |     "$$H(p) = \\sum_i p_i \\log_2 (1/p_i) = - \\sum_i p_i \\log_2 (p_i)$$\n",
 200 |     "\n",
 201 |     "where you can think of $p_i$ as the proportion of examples with class label $i$ at a given node."
 202 |    ]
 203 |   },
 204 |   {
 205 |    "cell_type": "code",
 206 |    "execution_count": null,
 207 |    "metadata": {},
 208 |    "outputs": [],
 209 |    "source": [
 210 |     "def entropy(array):\n",
 211 |     "    # your code\n",
 212 |     "    # your code\n",
 213 |     "    return # return a scalar"
 214 |    ]
 215 |   },
 216 |   {
 217 |    "cell_type": "markdown",
 218 |    "metadata": {},
 219 |    "source": [
 220 |     "I added the following code cell for your convenience to double-check your solution. If your results don't match the results shown below, there is a bug in your implementation of the `entropy` function."
 221 |    ]
 222 |   },
 223 |   {
 224 |    "cell_type": "code",
 225 |    "execution_count": 8,
 226 |    "metadata": {},
 227 |    "outputs": [
 228 |     {
 229 |      "name": "stdout",
 230 |      "output_type": "stream",
 231 |      "text": [
 232 |       "1.0\n",
 233 |       "1.0\n",
 234 |       "0.0\n",
 235 |       "0.4395\n",
 236 |       "0.0\n",
 237 |       "1.6577\n"
 238 |      ]
 239 |     }
 240 |    ],
 241 |    "source": [
 242 |     "# DO NOT EDIT OR DELETE THIS CELL\n",
 243 |     "\n",
 244 |     "print(round(entropy(np.array([0, 1, 0, 1, 1, 0])), 4))\n",
 245 |     "print(round(entropy(np.array([1, 2])), 4))\n",
 246 |     "print(round(entropy(np.array([1, 1])), 4))\n",
 247 |     "print(round(entropy(np.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])), 4))\n",
 248 |     "print(round(entropy(np.array([0, 0, 0])), 4))\n",
 249 |     "print(round(entropy(np.array([1, 1, 1, 0, 1, 4, 4, 2, 1])), 4))"
 250 |    ]
 251 |   },
 252 |   {
 253 |    "cell_type": "markdown",
 254 |    "metadata": {},
 255 |    "source": [
 256 |     "### 1.3) Implement Information Gain (10 pts)"
 257 |    ]
 258 |   },
 259 |   {
 260 |    "cell_type": "markdown",
 261 |    "metadata": {},
 262 |    "source": [
 263 |     "Now that you have a working solution for the `entropy` function, the next step is to compute the Information Gain. For your reference, information gain is computed as\n",
 264 |     "\n",
 265 |     "$$GAIN(\\mathcal{D}, x_j) = H(\\mathcal{D}) - \\sum_{v \\in Values(x_j)} \\frac{|\\mathcal{D}_v|}{|\\mathcal{D}|} H(\\mathcal{D}_v).$$"
 266 |    ]
 267 |   },
 268 |   {
 269 |    "cell_type": "code",
 270 |    "execution_count": null,
 271 |    "metadata": {},
 272 |    "outputs": [],
 273 |    "source": [
 274 |     "def information_gain(x_array, y_array):\n",
 275 |     "    parent_entropy = # your code\n",
 276 |     "\n",
 277 |     "    split_dict = # your code\n",
 278 |     "    \n",
 279 |     "    for val in split_dict:\n",
 280 |     "        freq = # your code\n",
 281 |     "        child_entropy = # your code\n",
 282 |     "        parent_entropy -= # your code\n",
 283 |     "        \n",
 284 |     "    return parent_entropy"
 285 |    ]
 286 |   },
 287 |   {
 288 |    "cell_type": "markdown",
 289 |    "metadata": {},
 290 |    "source": [
 291 |     "I added the following code cell for your convenience to double-check your solution. If your results don't match the results shown below, there is a bug in your implementation of the `information_gain` function."
 292 |    ]
 293 |   },
 294 |   {
 295 |    "cell_type": "code",
 296 |    "execution_count": 11,
 297 |    "metadata": {},
 298 |    "outputs": [
 299 |     {
 300 |      "name": "stdout",
 301 |      "output_type": "stream",
 302 |      "text": [
 303 |       "0.4591\n",
 304 |       "0.2516\n"
 305 |      ]
 306 |     }
 307 |    ],
 308 |    "source": [
 309 |     "# DO NOT EDIT OR DELETE THIS CELL\n",
 310 |     "\n",
 311 |     "x = np.array([0, 1, 0, 1, 0, 1])\n",
 312 |     "y = np.array([0, 1, 0, 1, 1, 1])\n",
 313 |     "print(round(information_gain(x, y), 4))\n",
 314 |     "\n",
 315 |     "x = np.array([0, 0, 1, 1, 2, 2])\n",
 316 |     "y = np.array([0, 1, 0, 1, 1, 1])\n",
 317 |     "print(round(information_gain(x, y), 4))"
 318 |    ]
 319 |   },
 320 |   {
 321 |    "cell_type": "markdown",
 322 |    "metadata": {},
 323 |    "source": [
 324 |     "(You may notice that these are actually the feature arrays from the midterm exam, Q 14.)"
 325 |    ]
 326 |   },
 327 |   {
 328 |    "cell_type": "markdown",
 329 |    "metadata": {},
 330 |    "source": [
 331 |     "### 1.4) Decision Tree Splitting (10 pts)"
 332 |    ]
 333 |   },
 334 |   {
 335 |    "cell_type": "markdown",
 336 |    "metadata": {},
 337 |    "source": [
 338 |     "Now, we should have all the main components that we need for implementing the ID3 decision tree algorithm: a `split` function, an `entropy` function, and an `information_gain` function based on the `entropy` function. \n",
 339 |     "\n",
 340 |     "The next task is combine these functions to recursively split a dataset on its different features to construct a decision tree that separate the examples from different classes well. We will call this function `make_tree`. \n",
 341 |     "\n",
 342 |     "For simplicity, the decision tree returned by the `make_tree` function will be represented by a Python dictionary. To illustrate this, consider the following dataset:\n",
 343 |     "\n",
 344 |     "```\n",
 345 |     "Inputs:\n",
 346 |     " [[0 0]\n",
 347 |     " [0 1]\n",
 348 |     " [1 0]\n",
 349 |     " [1 1]\n",
 350 |     " [2 0]\n",
 351 |     " [2 1]]\n",
 352 |     "\n",
 353 |     "Labels:\n",
 354 |     " [0 1 0 1 1 1]\n",
 355 |     "```\n",
 356 |     " \n",
 357 |     "This is a dataset with 6 training examples and two features. (Again, this is an example from the midterm exam.) The decision tree in form of the Python dictionary should look like as follows:\n",
 358 |     "\n",
 359 |     "\n",
 360 |     "\n",
 361 |     "You should return a dictionary with the following form:\n",
 362 |     "\n",
 363 |     "```\n",
 364 |     "{'X_1 = 0': {'X_0 = 0': array([0]),\n",
 365 |     "             'X_0 = 1': array([0]),\n",
 366 |     "             'X_0 = 2': array([1])},\n",
 367 |     " 'X_1 = 1': array([1, 1, 1])}\n",
 368 |     " ```\n",
 369 |     " \n",
 370 |     "Let me further illustrate what the different parts of the dictionary mean. Here, the `'X_1'` in `'X_1 = 0'` refers feature 2 (the first column of the NumPy array; remember that Python starts the index at 0, in contrast to R). \n",
 371 |     "\n",
 372 |     "- 'X_1 = 0': For training examples stored in this node, the second feature has the value 0\n",
 373 |     "- 'X_1 = 1': For training examples stored in this node, the second feature has the value 1\n",
 374 |     "\n",
 375 |     "The \"array\" is a NumPy array that stores the class labels of the training examples at that node. In the case of `'X_1 = 0'` we actually store actually a sub-dictionary, because this node can be split further. If you have trouble understanding this dictionary representation, the following illustration might help:\n",
 376 |     "\n",
 377 |     "\n",
 378 |     "![](tree-viz-1.png)"
 379 |    ]
 380 |   },
 381 |   {
 382 |    "cell_type": "code",
 383 |    "execution_count": null,
 384 |    "metadata": {},
 385 |    "outputs": [],
 386 |    "source": [
 387 |     "def make_tree(X, y):\n",
 388 |     "    \n",
 389 |     "    # Return array if node is empty or pure (1 example in leaf node)\n",
 390 |     "    if y.shape[0] == 1 or y.shape[0] == 0:\n",
 391 |     "        return y\n",
 392 |     "\n",
 393 |     "    # Compute information gain for each feature\n",
 394 |     "    gains = # YOUR CODE\n",
 395 |     "\n",
 396 |     "    # Early stopping if there is no information gain\n",
 397 |     "    if (gains <= 1e-05).all():\n",
 398 |     "        return # YOUR CODE\n",
 399 |     "    \n",
 400 |     "    # Else, get best feature\n",
 401 |     "    best_feature = np.argmax(gains)\n",
 402 |     "\n",
 403 |     "    \n",
 404 |     "    results = {}\n",
 405 |     "    \n",
 406 |     "    # Use the `split` function to split on the best feature\n",
 407 |     "    subset_dict = split(X[:, best_feature])\n",
 408 |     "\n",
 409 |     "    # Note that each entry in the dictionary returned by \n",
 410 |     "    # split is an attribute_value:array_indices pair.\n",
 411 |     "    # here, we are going to iterate over these key-value\n",
 412 |     "    # pairs and select the respective examples for the\n",
 413 |     "    # new child nodes\n",
 414 |     "    \n",
 415 |     "    for feature_value, train_example_indices in subset_dict.items():\n",
 416 |     "        child_y_subset = # YOUR CODE\n",
 417 |     "        child_x_subset = # YOUR CODE\n",
 418 |     "\n",
 419 |     "        # Next, we are using \"recursion,\" that is, calling the same\n",
 420 |     "        # tree_split function on the child subset(s)\n",
 421 |     "        \n",
 422 |     "        results[\"X_%d = %d\" % (best_feature, feature_value)] = \\\n",
 423 |     "                make_tree(child_x_subset, child_y_subset)\n",
 424 |     "\n",
 425 |     "        \n",
 426 |     "    return results"
 427 |    ]
 428 |   },
 429 |   {
 430 |    "cell_type": "markdown",
 431 |    "metadata": {},
 432 |    "source": [
 433 |     "I added the following code cell for your convenience to double-check your solution. If your results don't match the results shown below, there is a bug in your implementation of the `make_tree` function."
 434 |    ]
 435 |   },
 436 |   {
 437 |    "cell_type": "code",
 438 |    "execution_count": 10,
 439 |    "metadata": {},
 440 |    "outputs": [
 441 |     {
 442 |      "name": "stdout",
 443 |      "output_type": "stream",
 444 |      "text": [
 445 |       "Inputs:\n",
 446 |       " [[0 0]\n",
 447 |       " [0 1]\n",
 448 |       " [1 0]\n",
 449 |       " [1 1]\n",
 450 |       " [2 0]\n",
 451 |       " [2 1]]\n",
 452 |       "\n",
 453 |       "Labels:\n",
 454 |       " [0 1 0 1 1 1]\n",
 455 |       "\n",
 456 |       "Decision tree:\n",
 457 |       " {'X_1 = 0': {'X_0 = 0': array([0]), 'X_0 = 1': array([0]), 'X_0 = 2': array([1])}, 'X_1 = 1': array([1, 1, 1])}\n"
 458 |      ]
 459 |     }
 460 |    ],
 461 |    "source": [
 462 |     "# DO NOT EDIT OR DELETE THIS CELL\n",
 463 |     "\n",
 464 |     "x1 = np.array([0, 0, 1, 1, 2, 2])\n",
 465 |     "x2 = np.array([0, 1, 0, 1, 0, 1])\n",
 466 |     "X = np.array([x1, x2]).T\n",
 467 |     "y = np.array([0, 1, 0, 1, 1, 1])\n",
 468 |     "\n",
 469 |     "print('Inputs:\\n', X)\n",
 470 |     "print('\\nLabels:\\n', y)\n",
 471 |     "\n",
 472 |     "print('\\nDecision tree:\\n', make_tree(X, y))"
 473 |    ]
 474 |   },
 475 |   {
 476 |    "cell_type": "markdown",
 477 |    "metadata": {},
 478 |    "source": [
 479 |     "### 1.5) Building a Decision Tree API (10 pts)"
 480 |    ]
 481 |   },
 482 |   {
 483 |    "cell_type": "markdown",
 484 |    "metadata": {},
 485 |    "source": [
 486 |     "The final step of this part of the homework is now to write an API around our decision tree code so that we can use is for making predictions. Here, we will use the common convention, established by scikit-learn, to implement the decision tree as a Python class with \n",
 487 |     "\n",
 488 |     "- a `fit` method that learns the decision tree model from a training set via the `make_tree` function we already implemented;\n",
 489 |     "- a `predict` method to predict the class labels of training examples or any unseen data points.\n",
 490 |     "\n",
 491 |     "For making predictions, since not all leaf nodes are guaranteed to be single training examples, we will use a majority voting function to predict the class label as discussed in class. I already implemented a `_traverse` method, which will recursively traverse a decision tree dictionary that is produced by the `make_tree` function.\n",
 492 |     "\n",
 493 |     "Note that for simplicity, the `predict` method will only be able to accept one data point at a time (instead of a collection of data points). Hence `x` is a vector of size $\\mathbb{R}^m$, where $m$ is the number of features. I use capital letters `X` to denote a matrix of size $\\mathbb{R}^{n\\times m}$, where $n$ is the number of training examples."
 494 |    ]
 495 |   },
 496 |   {
 497 |    "cell_type": "code",
 498 |    "execution_count": null,
 499 |    "metadata": {},
 500 |    "outputs": [],
 501 |    "source": [
 502 |     "class ID3DecisionTreeClassifer(object):\n",
 503 |     "    \n",
 504 |     "    def __init__(self):\n",
 505 |     "        pass\n",
 506 |     "    \n",
 507 |     "    def fit(self, X, y):\n",
 508 |     "        self.splits_ = # YOUR CODE to generate the decision tree dictionary\n",
 509 |     "        \n",
 510 |     "    def _majority_vote(self, label_array):\n",
 511 |     "        return # YOUR CODE\n",
 512 |     "        \n",
 513 |     "    def _traverse(self, x, d):\n",
 514 |     "        if isinstance(d, np.ndarray):\n",
 515 |     "            return d\n",
 516 |     "        for key in d:\n",
 517 |     "            name, value = key.split(' = ')\n",
 518 |     "            feature_idx = int(name.split('_')[-1])\n",
 519 |     "            value = int(value)\n",
 520 |     "            if x[feature_idx] == value:\n",
 521 |     "                return self._traverse(x, d[key])\n",
 522 |     "        \n",
 523 |     "    def predict(self, x):\n",
 524 |     "        \n",
 525 |     "        label_array = # YOUR CODE to get class labels from the target node\n",
 526 |     "        return #YOUR CODE to predict the class label via majority voting from label_array"
 527 |    ]
 528 |   },
 529 |   {
 530 |    "cell_type": "markdown",
 531 |    "metadata": {},
 532 |    "source": [
 533 |     "I added the following code cell for your convenience to double-check your solution. If your results don't match the results shown below, there is a bug in your implementation of the `make_tree` function."
 534 |    ]
 535 |   },
 536 |   {
 537 |    "cell_type": "code",
 538 |    "execution_count": 12,
 539 |    "metadata": {},
 540 |    "outputs": [
 541 |     {
 542 |      "name": "stdout",
 543 |      "output_type": "stream",
 544 |      "text": [
 545 |       "0\n",
 546 |       "1\n",
 547 |       "0\n",
 548 |       "0\n",
 549 |       "1\n",
 550 |       "1\n",
 551 |       "1\n"
 552 |      ]
 553 |     }
 554 |    ],
 555 |    "source": [
 556 |     "# DO NOT EDIT OR DELETE THIS CELL\n",
 557 |     "\n",
 558 |     "tree = ID3DecisionTreeClassifer()\n",
 559 |     "tree.fit(X, y)\n",
 560 |     "\n",
 561 |     "print(tree.predict(np.array([0, 0])))\n",
 562 |     "print(tree.predict(np.array([0, 1])))\n",
 563 |     "print(tree.predict(np.array([1, 0])))\n",
 564 |     "print(tree.predict(np.array([1, 0])))\n",
 565 |     "print(tree.predict(np.array([1, 1])))\n",
 566 |     "print(tree.predict(np.array([2, 0])))\n",
 567 |     "print(tree.predict(np.array([2, 1])))"
 568 |    ]
 569 |   },
 570 |   {
 571 |    "cell_type": "markdown",
 572 |    "metadata": {},
 573 |    "source": [
 574 |     "<br>\n",
 575 |     "<br>\n",
 576 |     "<br>\n",
 577 |     "<br>\n",
 578 |     "<br>\n",
 579 |     "<br>"
 580 |    ]
 581 |   },
 582 |   {
 583 |    "cell_type": "markdown",
 584 |    "metadata": {},
 585 |    "source": [
 586 |     "## 2) Bagging"
 587 |    ]
 588 |   },
 589 |   {
 590 |    "cell_type": "markdown",
 591 |    "metadata": {},
 592 |    "source": [
 593 |     "In this second part of this homework, you will be combining multiple decision trees to a bagging classifier. This time, we will be using the decision tree algorithm implemented in scikit-learn (which is some variant of the CART algorithm for binary splits, as discussed in class)."
 594 |    ]
 595 |   },
 596 |   {
 597 |    "cell_type": "markdown",
 598 |    "metadata": {},
 599 |    "source": [
 600 |     "### 2.1 Bootrapping (10 pts)"
 601 |    ]
 602 |   },
 603 |   {
 604 |    "cell_type": "markdown",
 605 |    "metadata": {},
 606 |    "source": [
 607 |     "As you remember, bagging relies on bootstrap sampling. So, as a first step, your task is to implement a function for generating bootstrap samples. In this exercise, for simplicity, we will perform the computations based on the Iris dataset.\n",
 608 |     "\n",
 609 |     "On an interesting side note, scikit-learn recently updated their version of the Iris dataset since it was discovered that the Iris version hosted on the UCI machine learning repository (https://archive.ics.uci.edu/ml/datasets/Iris/) has two data points that are different from R. Fisher's original paper (Fisher,R.A. \"The use of multiple measurements in taxonomic problems\" Annual Eugenics, 7, Part II, 179-188 (1936); also in \"Contributions to Mathematical Statistics\" (John Wiley, NY, 1950).) and changed it in their most recent version. Since most students may not have the latest scikit-learn version installed, we will be working with the Iris dataset that is deposited on UCI, which has become quite the standard in the Python machine learning community for benchmarking algorithms. Instead of manually downloading it, we will be fetching it through the `mlxtend` (http://rasbt.github.io/mlxtend/) library that you installed in the last homework."
 610 |    ]
 611 |   },
 612 |   {
 613 |    "cell_type": "code",
 614 |    "execution_count": 13,
 615 |    "metadata": {},
 616 |    "outputs": [
 617 |     {
 618 |      "name": "stdout",
 619 |      "output_type": "stream",
 620 |      "text": [
 621 |       "Number of examples: 150\n",
 622 |       "Number of features: 4\n",
 623 |       "Unique class labels: [0 1 2]\n"
 624 |      ]
 625 |     }
 626 |    ],
 627 |    "source": [
 628 |     "# DO NOT EDIT OR DELETE THIS CELL\n",
 629 |     "\n",
 630 |     "from mlxtend.data import iris_data\n",
 631 |     "X, y = iris_data()\n",
 632 |     "\n",
 633 |     "print('Number of examples:', X.shape[0])\n",
 634 |     "print('Number of features:', X.shape[1])\n",
 635 |     "print('Unique class labels:', np.unique(y))"
 636 |    ]
 637 |   },
 638 |   {
 639 |    "cell_type": "markdown",
 640 |    "metadata": {},
 641 |    "source": [
 642 |     "Use scikit-learn's `train_test_split` function to divide the dataset into a training and a test set.\n",
 643 |     "\n",
 644 |     "- The test set should contain 45 examples, and the training set should contain 105 examples.\n",
 645 |     "- To ensure reproducible results, use `123` as a random seed.\n",
 646 |     "- Perform a stratified split."
 647 |    ]
 648 |   },
 649 |   {
 650 |    "cell_type": "code",
 651 |    "execution_count": null,
 652 |    "metadata": {},
 653 |    "outputs": [],
 654 |    "source": [
 655 |     "from sklearn.model_selection import # YOUR CODE\n",
 656 |     "\n",
 657 |     "\n",
 658 |     "X_train, X_test, y_train, y_test = # YOUR CODE\n",
 659 |     "\n",
 660 |     "print('Number of training examples:', X_train.shape[0])\n",
 661 |     "print('Number of test examples:', X_test.shape[0])"
 662 |    ]
 663 |   },
 664 |   {
 665 |    "cell_type": "markdown",
 666 |    "metadata": {},
 667 |    "source": [
 668 |     "I added the following code cell for your convenience to double-check your solution. If your results don't match the results shown below, there is a bug in your implementation of the `make_tree` function."
 669 |    ]
 670 |   },
 671 |   {
 672 |    "cell_type": "code",
 673 |    "execution_count": 15,
 674 |    "metadata": {},
 675 |    "outputs": [
 676 |     {
 677 |      "name": "stdout",
 678 |      "output_type": "stream",
 679 |      "text": [
 680 |       "Number of training examples: 105\n",
 681 |       "Number of test examples: 45\n"
 682 |      ]
 683 |     }
 684 |    ],
 685 |    "source": [
 686 |     "# DO NOT EDIT OR DELETE THIS CELL\n",
 687 |     "\n",
 688 |     "print('Number of training examples:', X_train.shape[0])\n",
 689 |     "print('Number of test examples:', X_test.shape[0])"
 690 |    ]
 691 |   },
 692 |   {
 693 |    "cell_type": "markdown",
 694 |    "metadata": {},
 695 |    "source": [
 696 |     "Next we are implementing a function to generate bootstrap samples of the training set. In particular, we will perform the bootstrapping as follows:\n",
 697 |     "\n",
 698 |     "- Create an index array with values 0, ..., 104.\n",
 699 |     "- Draw a random sample (with replacement) from this index array using the `choice` method of a NumPy `RandomState` object that is passed to the function as `rng`. \n",
 700 |     "- Select training examples from the X array and labels from the y array using the new sample of indices."
 701 |    ]
 702 |   },
 703 |   {
 704 |    "cell_type": "code",
 705 |    "execution_count": null,
 706 |    "metadata": {},
 707 |    "outputs": [],
 708 |    "source": [
 709 |     "def draw_bootstrap_sample(rng, X, y):\n",
 710 |     "    sample_indices = # YOUR CODE\n",
 711 |     "    bootstrap_indices = rng.choice( # YOUR CODE )\n",
 712 |     "    return X[# YOUR CODE], y[# YOUR CODE]"
 713 |    ]
 714 |   },
 715 |   {
 716 |    "cell_type": "markdown",
 717 |    "metadata": {},
 718 |    "source": [
 719 |     "I added the following code cell for your convenience to double-check your solution. If your results don't match the results shown below, there is a bug in your implementation of the `draw_bootstrap_sample` function."
 720 |    ]
 721 |   },
 722 |   {
 723 |    "cell_type": "code",
 724 |    "execution_count": 17,
 725 |    "metadata": {},
 726 |    "outputs": [
 727 |     {
 728 |      "name": "stdout",
 729 |      "output_type": "stream",
 730 |      "text": [
 731 |       "Number of training inputs from bootstrap round: 105\n",
 732 |       "Number of training labels from bootstrap round: 105\n",
 733 |       "Labels:\n",
 734 |       " [0 0 1 0 0 1 2 0 2 1 0 0 2 1 1 1 1 2 1 1 2 0 2 1 2 1 1 1 0 1 0 0 1 2 0 0 0\n",
 735 |       " 0 2 1 1 2 1 2 1 1 2 1 2 0 1 1 2 2 1 0 1 0 2 2 0 1 0 2 0 0 0 0 1 2 0 0 1 0\n",
 736 |       " 1 1 0 1 1 2 2 0 2 0 2 0 1 1 2 2 0 2 2 2 0 1 0 1 2 2 2 1 0 0 0]\n"
 737 |      ]
 738 |     }
 739 |    ],
 740 |    "source": [
 741 |     "# DO NOT EDIT OR DELETE THIS CELL\n",
 742 |     "\n",
 743 |     "rng = np.random.RandomState(123)\n",
 744 |     "X_boot, y_boot = draw_bootstrap_sample(rng, X_train, y_train)\n",
 745 |     "\n",
 746 |     "print('Number of training inputs from bootstrap round:', X_boot.shape[0])\n",
 747 |     "print('Number of training labels from bootstrap round:', y_boot.shape[0])\n",
 748 |     "print('Labels:\\n', y_boot)"
 749 |    ]
 750 |   },
 751 |   {
 752 |    "cell_type": "markdown",
 753 |    "metadata": {},
 754 |    "source": [
 755 |     "### 2.2 Baggging classifier from decision trees (10 pts)"
 756 |    ]
 757 |   },
 758 |   {
 759 |    "cell_type": "markdown",
 760 |    "metadata": {},
 761 |    "source": [
 762 |     "In this section, you will implement a Bagging algorithm based on the `DecisionTreeClassifier`. I provided a partial solution for you. "
 763 |    ]
 764 |   },
 765 |   {
 766 |    "cell_type": "code",
 767 |    "execution_count": null,
 768 |    "metadata": {},
 769 |    "outputs": [],
 770 |    "source": [
 771 |     "from sklearn.tree import DecisionTreeClassifier\n",
 772 |     "\n",
 773 |     "\n",
 774 |     "class BaggingClassifier(object):\n",
 775 |     "    \n",
 776 |     "    def __init__(self, num_trees=10, random_state=123):\n",
 777 |     "        self.num_trees = num_trees\n",
 778 |     "        self.rng = np.random.RandomState(random_state)\n",
 779 |     "        \n",
 780 |     "        \n",
 781 |     "    def fit(self, X, y):\n",
 782 |     "        self.trees_ = [DecisionTreeClassifier(random_state=self.rng) for i in range(self.num_trees)]\n",
 783 |     "        for i in range(self.num_trees):\n",
 784 |     "            X_boot, y_boot = # YOUR CODE to draw a bootstrap sample\n",
 785 |     "            # YOUR CODE to\n",
 786 |     "            # fit the trees in self.trees_ on the bootstrap samples\n",
 787 |     "        \n",
 788 |     "    def predict(self, X):\n",
 789 |     "        ary = np.zeros((X.shape[0], len(self.trees_)), dtype=np.int)\n",
 790 |     "        for i in range(len(self.trees_)):\n",
 791 |     "            ary[:, i] = self.trees_[i].predict(X)\n",
 792 |     "\n",
 793 |     "        maj = np.apply_along_axis(lambda x:\n",
 794 |     "                                  np.argmax(np.bincount(x)),\n",
 795 |     "                                            axis=1,\n",
 796 |     "                                            arr=ary)\n",
 797 |     "        return maj"
 798 |    ]
 799 |   },
 800 |   {
 801 |    "cell_type": "markdown",
 802 |    "metadata": {},
 803 |    "source": [
 804 |     "I added the following code cell for your convenience to double-check your solution. If your results don't match the results shown below, there is a bug in your implementation of the `BaggingClassifier()`."
 805 |    ]
 806 |   },
 807 |   {
 808 |    "cell_type": "code",
 809 |    "execution_count": 29,
 810 |    "metadata": {},
 811 |    "outputs": [
 812 |     {
 813 |      "name": "stdout",
 814 |      "output_type": "stream",
 815 |      "text": [
 816 |       "Individual Tree Accuracies:\n",
 817 |       "88.9%\n",
 818 |       "93.3%\n",
 819 |       "97.8%\n",
 820 |       "93.3%\n",
 821 |       "93.3%\n",
 822 |       "93.3%\n",
 823 |       "91.1%\n",
 824 |       "97.8%\n",
 825 |       "97.8%\n",
 826 |       "97.8%\n",
 827 |       "\n",
 828 |       "Bagging Test Accuracy: 97.8%\n"
 829 |      ]
 830 |     }
 831 |    ],
 832 |    "source": [
 833 |     "# DO NOT EDIT OR DELETE THIS CELL\n",
 834 |     "\n",
 835 |     "model = BaggingClassifier()\n",
 836 |     "model.fit(X_train, y_train)\n",
 837 |     "\n",
 838 |     "predictions = model.predict(X_test)\n",
 839 |     "\n",
 840 |     "print('Individual Tree Accuracies:')\n",
 841 |     "for tree in model.trees_:\n",
 842 |     "    predictions = tree.predict(X_test) \n",
 843 |     "    print('%.1f%%' % ((predictions == y_test).sum() / X_test.shape[0] * 100))\n",
 844 |     "\n",
 845 |     "print('\\nBagging Test Accuracy: %.1f%%' % ((predictions == y_test).sum() / X_test.shape[0] * 100))"
 846 |    ]
 847 |   },
 848 |   {
 849 |    "cell_type": "markdown",
 850 |    "metadata": {},
 851 |    "source": [
 852 |     "<br>\n",
 853 |     "<br>\n",
 854 |     "<br>\n",
 855 |     "<br>\n",
 856 |     "<br>\n",
 857 |     "<br>"
 858 |    ]
 859 |   },
 860 |   {
 861 |    "cell_type": "markdown",
 862 |    "metadata": {},
 863 |    "source": [
 864 |     "## 3) Bias-Variance Decomposition"
 865 |    ]
 866 |   },
 867 |   {
 868 |    "cell_type": "markdown",
 869 |    "metadata": {},
 870 |    "source": [
 871 |     "In this exercise you will be asked to compute the variance and bias components of the 0-1 loss that we discussed in class. \n",
 872 |     "\n",
 873 |     "- In particular, you will compute the average bias and the average variance over all test examples (instead of a single test example. \n",
 874 |     "\n",
 875 |     "- The dataset you will be using as training set(s) and test set is the Iris dataset that you already divided into `X_train` / `y_train` and `X_test` / `y_test` earlier.\n",
 876 |     "\n",
 877 |     "- Since we do not have unlimited training datasets to estimate the parameters (think back of the estimation over the training sets), we will use bootstrapping to simulate \"new\" training sets. \n"
 878 |    ]
 879 |   },
 880 |   {
 881 |    "cell_type": "markdown",
 882 |    "metadata": {},
 883 |    "source": [
 884 |     "### 3.1 Bias-Variance decomposition of the 0-1 Loss for Decision Trees (10 pts)"
 885 |    ]
 886 |   },
 887 |   {
 888 |    "cell_type": "markdown",
 889 |    "metadata": {},
 890 |    "source": [
 891 |     "In this first part, you will be computing the averaged bias and variance components over the test set examples for the decision tree algorithm implemented in scikit-learn on the Iris data. \n",
 892 |     "\n",
 893 |     "I already implemented the code for computing the \"main prediction\" for you:"
 894 |    ]
 895 |   },
 896 |   {
 897 |    "cell_type": "code",
 898 |    "execution_count": 20,
 899 |    "metadata": {},
 900 |    "outputs": [],
 901 |    "source": [
 902 |     "# DO NOT EDIT OR DELETE THIS CELL\n",
 903 |     "\n",
 904 |     "rng = np.random.RandomState(123)\n",
 905 |     "\n",
 906 |     "num_bootstrap = 200\n",
 907 |     "\n",
 908 |     "all_pred = np.zeros((num_bootstrap, y_test.shape[0]), dtype=np.int)\n",
 909 |     "\n",
 910 |     "for i in range(num_bootstrap):\n",
 911 |     "    X_boot, y_boot = draw_bootstrap_sample(rng, X_train, y_train)\n",
 912 |     "    pred = DecisionTreeClassifier(random_state=66).fit(X_boot, y_boot).predict(X_test)\n",
 913 |     "    all_pred[i] = pred\n",
 914 |     "    \n",
 915 |     "main_predictions = np.apply_along_axis(lambda x:\n",
 916 |     "                                       np.argmax(np.bincount(x)),\n",
 917 |     "                                       axis=0,\n",
 918 |     "                                       arr=all_pred)"
 919 |    ]
 920 |   },
 921 |   {
 922 |    "cell_type": "markdown",
 923 |    "metadata": {},
 924 |    "source": [
 925 |     "Note that `all_pred` is a 2D array of dimension $\\mathbb{R}^{b \\times n_{test}}$, where $m$ is the number of bootstrap rounds and $n_{test}$ is the number of test examples in the test set. In other words, each of the 200 rows in this array stores the predictions of one particular decision tree hypothesis for all 45 test data points.\n",
 926 |     "\n",
 927 |     "Your first task is to compute the average bias over all test examples:"
 928 |    ]
 929 |   },
 930 |   {
 931 |    "cell_type": "code",
 932 |    "execution_count": null,
 933 |    "metadata": {},
 934 |    "outputs": [],
 935 |    "source": [
 936 |     "# YOUR CODE\n",
 937 |     "\n",
 938 |     "\n",
 939 |     "print('Average bias:', bias)"
 940 |    ]
 941 |   },
 942 |   {
 943 |    "cell_type": "markdown",
 944 |    "metadata": {},
 945 |    "source": [
 946 |     "Your second task is to compute the average variance over all test examples:"
 947 |    ]
 948 |   },
 949 |   {
 950 |    "cell_type": "code",
 951 |    "execution_count": null,
 952 |    "metadata": {},
 953 |    "outputs": [],
 954 |    "source": [
 955 |     "# YOUR CODE\n",
 956 |     "# you probably need multiple\n",
 957 |     "# lines of code and a for-loop\n",
 958 |     "\n",
 959 |     "print('Average variance:', var)"
 960 |    ]
 961 |   },
 962 |   {
 963 |    "cell_type": "markdown",
 964 |    "metadata": {},
 965 |    "source": [
 966 |     "Hint: The average bias and variance values are both scalars, not vectors or matrices. In other words, for each of the code cells above, you should return a real number (float)."
 967 |    ]
 968 |   },
 969 |   {
 970 |    "cell_type": "markdown",
 971 |    "metadata": {},
 972 |    "source": [
 973 |     "### 3.2 Bias-Variance decomposition of the 0-1 Loss for Bagging (10 pts)"
 974 |    ]
 975 |   },
 976 |   {
 977 |    "cell_type": "markdown",
 978 |    "metadata": {},
 979 |    "source": [
 980 |     "Use the code from the previous section, 3.1, to compare the decision tree algorithm with a BaggingClassifier from scikit-learn.\n",
 981 |     "\n",
 982 |     "- Report both the average bias and average variance just like before, but use the `BaggingClassifier` in scikit-learn instead of the `DecisionTreeClassifier`. You can use the default values of `BaggingClassifier`."
 983 |    ]
 984 |   },
 985 |   {
 986 |    "cell_type": "code",
 987 |    "execution_count": null,
 988 |    "metadata": {},
 989 |    "outputs": [],
 990 |    "source": [
 991 |     "# YOUR SOLUTION\n",
 992 |     "# Many lines of code (which you may copy and modify from 3.1)\n",
 993 |     "\n",
 994 |     "\n",
 995 |     "print('Average bias:', bias)\n",
 996 |     "print('Average variance:', var)"
 997 |    ]
 998 |   },
 999 |   {
1000 |    "cell_type": "markdown",
1001 |    "metadata": {},
1002 |    "source": [
1003 |     "Is the average variance higher or lower than the avergage of the decision tree in 3.1? And what about the average bias?"
1004 |    ]
1005 |   },
1006 |   {
1007 |    "cell_type": "markdown",
1008 |    "metadata": {},
1009 |    "source": [
1010 |     "!!! TYPE YOUR ANSWER HERE !!!"
1011 |    ]
1012 |   },
1013 |   {
1014 |    "cell_type": "markdown",
1015 |    "metadata": {},
1016 |    "source": [
1017 |     "### 3.3 Bias-Variance decomposition of the 0-1 Loss for AdaBoost (10 pts)"
1018 |    ]
1019 |   },
1020 |   {
1021 |    "cell_type": "markdown",
1022 |    "metadata": {},
1023 |    "source": [
1024 |     "Use the code from the previous section, 3.1, to compare the decision tree algorithm with a AdaBoostClassifier from scikit-learn.\n",
1025 |     "\n",
1026 |     "- Report both the average bias and average variance just like before, but use the `AdaboostClassifier` in scikit-learn instead of the `DecisionTreeClassifier`. You can use the default values of `AdaboostClassifier`."
1027 |    ]
1028 |   },
1029 |   {
1030 |    "cell_type": "code",
1031 |    "execution_count": null,
1032 |    "metadata": {},
1033 |    "outputs": [],
1034 |    "source": [
1035 |     "# YOUR SOLUTION\n",
1036 |     "# Many lines of code (which you may copy and modify from 3.1)\n",
1037 |     "\n",
1038 |     "\n",
1039 |     "\n",
1040 |     "print('Average bias:', bias)\n",
1041 |     "print('Average variance:', var)"
1042 |    ]
1043 |   },
1044 |   {
1045 |    "cell_type": "markdown",
1046 |    "metadata": {},
1047 |    "source": [
1048 |     "Is the average variance higher or lower than the avergage of the decision tree in 3.1? And what about the average bias?"
1049 |    ]
1050 |   },
1051 |   {
1052 |    "cell_type": "markdown",
1053 |    "metadata": {},
1054 |    "source": [
1055 |     "!!! TYPE YOUR ANSWER HERE !!!"
1056 |    ]
1057 |   },
1058 |   {
1059 |    "cell_type": "markdown",
1060 |    "metadata": {},
1061 |    "source": [
1062 |     "<br>\n",
1063 |     "<br>\n",
1064 |     "<br>\n",
1065 |     "<br>\n",
1066 |     "<br>\n",
1067 |     "<br>"
1068 |    ]
1069 |   },
1070 |   {
1071 |    "cell_type": "markdown",
1072 |    "metadata": {},
1073 |    "source": [
1074 |     "## Bonus Exercise (10 pts)"
1075 |    ]
1076 |   },
1077 |   {
1078 |    "cell_type": "markdown",
1079 |    "metadata": {},
1080 |    "source": [
1081 |     "In this bonus exercise, you will be asked to fit a `RandomForestClassifier` on a small subset (10%) of the MNIST handwritten digits dataset (http://yann.lecun.com/exdb/mnist/). For convenience, the following code loads this small subset via mlxtend:"
1082 |    ]
1083 |   },
1084 |   {
1085 |    "cell_type": "code",
1086 |    "execution_count": 2,
1087 |    "metadata": {},
1088 |    "outputs": [
1089 |     {
1090 |      "name": "stdout",
1091 |      "output_type": "stream",
1092 |      "text": [
1093 |       "Dimensions: 5000 x 784\n",
1094 |       "1st row [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.\n",
1095 |       "   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.\n",
1096 |       "   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.\n",
1097 |       "   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.\n",
1098 |       "   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.\n",
1099 |       "   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.\n",
1100 |       "   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.\n",
1101 |       "   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.\n",
1102 |       "   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.\n",
1103 |       "   0.  51. 159. 253. 159.  50.   0.   0.   0.   0.   0.   0.   0.   0.\n",
1104 |       "   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.\n",
1105 |       "  48. 238. 252. 252. 252. 237.   0.   0.   0.   0.   0.   0.   0.   0.\n",
1106 |       "   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  54.\n",
1107 |       " 227. 253. 252. 239. 233. 252.  57.   6.   0.   0.   0.   0.   0.   0.\n",
1108 |       "   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  10.  60. 224.\n",
1109 |       " 252. 253. 252. 202.  84. 252. 253. 122.   0.   0.   0.   0.   0.   0.\n",
1110 |       "   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0. 163. 252. 252.\n",
1111 |       " 252. 253. 252. 252.  96. 189. 253. 167.   0.   0.   0.   0.   0.   0.\n",
1112 |       "   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  51. 238. 253. 253.\n",
1113 |       " 190. 114. 253. 228.  47.  79. 255. 168.   0.   0.   0.   0.   0.   0.\n",
1114 |       "   0.   0.   0.   0.   0.   0.   0.   0.   0.  48. 238. 252. 252. 179.\n",
1115 |       "  12.  75. 121.  21.   0.   0. 253. 243.  50.   0.   0.   0.   0.   0.\n",
1116 |       "   0.   0.   0.   0.   0.   0.   0.   0.  38. 165. 253. 233. 208.  84.\n",
1117 |       "   0.   0.   0.   0.   0.   0. 253. 252. 165.   0.   0.   0.   0.   0.\n",
1118 |       "   0.   0.   0.   0.   0.   0.   0.   7. 178. 252. 240.  71.  19.  28.\n",
1119 |       "   0.   0.   0.   0.   0.   0. 253. 252. 195.   0.   0.   0.   0.   0.\n",
1120 |       "   0.   0.   0.   0.   0.   0.   0.  57. 252. 252.  63.   0.   0.   0.\n",
1121 |       "   0.   0.   0.   0.   0.   0. 253. 252. 195.   0.   0.   0.   0.   0.\n",
1122 |       "   0.   0.   0.   0.   0.   0.   0. 198. 253. 190.   0.   0.   0.   0.\n",
1123 |       "   0.   0.   0.   0.   0.   0. 255. 253. 196.   0.   0.   0.   0.   0.\n",
1124 |       "   0.   0.   0.   0.   0.   0.  76. 246. 252. 112.   0.   0.   0.   0.\n",
1125 |       "   0.   0.   0.   0.   0.   0. 253. 252. 148.   0.   0.   0.   0.   0.\n",
1126 |       "   0.   0.   0.   0.   0.   0.  85. 252. 230.  25.   0.   0.   0.   0.\n",
1127 |       "   0.   0.   0.   0.   7. 135. 253. 186.  12.   0.   0.   0.   0.   0.\n",
1128 |       "   0.   0.   0.   0.   0.   0.  85. 252. 223.   0.   0.   0.   0.   0.\n",
1129 |       "   0.   0.   0.   7. 131. 252. 225.  71.   0.   0.   0.   0.   0.   0.\n",
1130 |       "   0.   0.   0.   0.   0.   0.  85. 252. 145.   0.   0.   0.   0.   0.\n",
1131 |       "   0.   0.  48. 165. 252. 173.   0.   0.   0.   0.   0.   0.   0.   0.\n",
1132 |       "   0.   0.   0.   0.   0.   0.  86. 253. 225.   0.   0.   0.   0.   0.\n",
1133 |       "   0. 114. 238. 253. 162.   0.   0.   0.   0.   0.   0.   0.   0.   0.\n",
1134 |       "   0.   0.   0.   0.   0.   0.  85. 252. 249. 146.  48.  29.  85. 178.\n",
1135 |       " 225. 253. 223. 167.  56.   0.   0.   0.   0.   0.   0.   0.   0.   0.\n",
1136 |       "   0.   0.   0.   0.   0.   0.  85. 252. 252. 252. 229. 215. 252. 252.\n",
1137 |       " 252. 196. 130.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.\n",
1138 |       "   0.   0.   0.   0.   0.   0.  28. 199. 252. 252. 253. 252. 252. 233.\n",
1139 |       " 145.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.\n",
1140 |       "   0.   0.   0.   0.   0.   0.   0.  25. 128. 252. 253. 252. 141.  37.\n",
1141 |       "   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.\n",
1142 |       "   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.\n",
1143 |       "   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.\n",
1144 |       "   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.\n",
1145 |       "   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.\n",
1146 |       "   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.\n",
1147 |       "   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.\n",
1148 |       "   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.\n",
1149 |       "   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.]\n"
1150 |      ]
1151 |     }
1152 |    ],
1153 |    "source": [
1154 |     "from mlxtend.data import mnist_data\n",
1155 |     "X, y = mnist_data()\n",
1156 |     "\n",
1157 |     "print('Dimensions: %s x %s' % (X.shape[0], X.shape[1]))\n",
1158 |     "print('1st row', X[0])"
1159 |    ]
1160 |   },
1161 |   {
1162 |    "cell_type": "markdown",
1163 |    "metadata": {},
1164 |    "source": [
1165 |     "The next code cell shuffles the dataset and divides it into 4500 training examples and 500 test examples, respectively."
1166 |    ]
1167 |   },
1168 |   {
1169 |    "cell_type": "code",
1170 |    "execution_count": 3,
1171 |    "metadata": {},
1172 |    "outputs": [],
1173 |    "source": [
1174 |     "from mlxtend.preprocessing import shuffle_arrays_unison\n",
1175 |     "\n",
1176 |     "\n",
1177 |     "X, y = shuffle_arrays_unison((X, y), random_seed=1)\n",
1178 |     "X_train, y_train = X[:4500], y[:4500]\n",
1179 |     "X_test, y_test = X[4500:], y[4500:]"
1180 |    ]
1181 |   },
1182 |   {
1183 |    "cell_type": "markdown",
1184 |    "metadata": {},
1185 |    "source": [
1186 |     "Now, your task is to fit a RandomForest classifier on the training set and evaluate it's predictive accuracy on the test set. "
1187 |    ]
1188 |   },
1189 |   {
1190 |    "cell_type": "code",
1191 |    "execution_count": 5,
1192 |    "metadata": {},
1193 |    "outputs": [
1194 |     {
1195 |      "name": "stdout",
1196 |      "output_type": "stream",
1197 |      "text": [
1198 |       "Accuracy 93.6%\n"
1199 |      ]
1200 |     }
1201 |    ],
1202 |    "source": [
1203 |     "from sklearn.ensemble import RandomForestClassifier\n",
1204 |     "\n",
1205 |     "model = RandomForestClassifier(n_estimators=100, random_state=123)\n",
1206 |     "model.fit(#YOUR CODE)\n",
1207 |     "\n",
1208 |     "acc = # YOUR CODE\n",
1209 |     "print('Accuracy %.1f%%' % acc)"
1210 |    ]
1211 |   },
1212 |   {
1213 |    "cell_type": "markdown",
1214 |    "metadata": {},
1215 |    "source": [
1216 |     "Next, your task is to load an image of a digit (some_digit.png) from this directory into a Python array and classify it using the random forest model. The some_digit.png image is displayed below:"
1217 |    ]
1218 |   },
1219 |   {
1220 |    "cell_type": "markdown",
1221 |    "metadata": {},
1222 |    "source": [
1223 |     "![](some_digit.png)"
1224 |    ]
1225 |   },
1226 |   {
1227 |    "cell_type": "markdown",
1228 |    "metadata": {},
1229 |    "source": [
1230 |     "Note: For loading the image, you need to install the Python imaging library PIL. Actually, Pillow, a more up-to-date fork is recommended. Execute one of the following two if you haven't installed Pillow already.\n",
1231 |     "    \n",
1232 |     "- `conda install Pillow`\n",
1233 |     "\n",
1234 |     "- `pip install Pillow`"
1235 |    ]
1236 |   },
1237 |   {
1238 |    "cell_type": "markdown",
1239 |    "metadata": {},
1240 |    "source": [
1241 |     "Again, I have partially pre-written the code for you."
1242 |    ]
1243 |   },
1244 |   {
1245 |    "cell_type": "code",
1246 |    "execution_count": null,
1247 |    "metadata": {},
1248 |    "outputs": [],
1249 |    "source": [
1250 |     "from PIL import Image\n",
1251 |     "import numpy as np\n",
1252 |     "\n",
1253 |     "def load_image(file_name):\n",
1254 |     "    img = Image.open(file_name)\n",
1255 |     "    img.load()\n",
1256 |     "    data = np.asarray(img, dtype=np.float)\n",
1257 |     "    return data\n",
1258 |     "\n",
1259 |     "x_image = # YOUR CODE"
1260 |    ]
1261 |   },
1262 |   {
1263 |    "cell_type": "code",
1264 |    "execution_count": 5,
1265 |    "metadata": {},
1266 |    "outputs": [
1267 |     {
1268 |      "name": "stdout",
1269 |      "output_type": "stream",
1270 |      "text": [
1271 |       "Digit: 5\n"
1272 |      ]
1273 |     }
1274 |    ],
1275 |    "source": [
1276 |     "# The data needs to be represented as a vector (1 position for each feature)\n",
1277 |     "x_transf = # YOUR CODE\n",
1278 |     "\n",
1279 |     "# Also, scikit-learn expects 2D arrays, so we need to add a dimension\n",
1280 |     "x_transf = # YOUR CODE\n",
1281 |     "\n",
1282 |     "print('Digit:', model.predict(x_transf)[0])"
1283 |    ]
1284 |   }
1285 |  ],
1286 |  "metadata": {
1287 |   "kernelspec": {
1288 |    "display_name": "Python 3",
1289 |    "language": "python",
1290 |    "name": "python3"
1291 |   },
1292 |   "language_info": {
1293 |    "codemirror_mode": {
1294 |     "name": "ipython",
1295 |     "version": 3
1296 |    },
1297 |    "file_extension": ".py",
1298 |    "mimetype": "text/x-python",
1299 |    "name": "python",
1300 |    "nbconvert_exporter": "python",
1301 |    "pygments_lexer": "ipython3",
1302 |    "version": "3.6.6"
1303 |   }
1304 |  },
1305 |  "nbformat": 4,
1306 |  "nbformat_minor": 2
1307 | }
1308 | 


--------------------------------------------------------------------------------
/hw_03/hw3.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "# Problem Set 3"
   8 |    ]
   9 |   },
  10 |   {
  11 |    "cell_type": "markdown",
  12 |    "metadata": {},
  13 |    "source": [
  14 |     "STAT 479: Machine Learning (Fall 2018)  \n",
  15 |     "Instructor: Sebastian Raschka (sraschka@wisc.edu)  \n",
  16 |     "Course website: http://pages.stat.wisc.edu/~sraschka/teaching/stat479-fs2018/\n",
  17 |     "\n",
  18 |     "**Due**: Dec 03 (before 11:59 pm).\n",
  19 |     "\n",
  20 |     "**How to submit**\n",
  21 |     "\n",
  22 |     "As mentioned in the lecture, you need to submit the `.ipynb` file with your answers plus an `.html` file, which will serve as a backup for us in case the `.ipynb` file cannot be opened on my or the TA's computer. In addition, you may also export the notebook as PDF and upload it as well.\n",
  23 |     "\n",
  24 |     "Again, we will be using the Canvas platform, so you need to submit your homework there. You should be able to resubmit the homework as many times as you like before the due date."
  25 |    ]
  26 |   },
  27 |   {
  28 |    "cell_type": "markdown",
  29 |    "metadata": {},
  30 |    "source": [
  31 |     "As usual, you do not write the whole code from scratch, and I provided you with a skeleton of code where you need to add the lines that I indicated. Not, however, that everyone's coding style is different. Where I use only one line of code, you may want to use multiple ones. Also, where you use one line of code, I may use multiple ones."
  32 |    ]
  33 |   },
  34 |   {
  35 |    "cell_type": "code",
  36 |    "execution_count": null,
  37 |    "metadata": {},
  38 |    "outputs": [],
  39 |    "source": [
  40 |     "%load_ext watermark\n",
  41 |     "%watermark  -d -u -a '<Your Name>' -v -p numpy,scipy,matplotlib,sklearn,mlxtend"
  42 |    ]
  43 |   },
  44 |   {
  45 |    "cell_type": "markdown",
  46 |    "metadata": {},
  47 |    "source": [
  48 |     "<div class=\"paragraph\">\n",
  49 |     "  <p><br></p>\n",
  50 |     "  <p><br></p>\n",
  51 |     "  <p><br></p>\n",
  52 |     "  <p><br></p>\n",
  53 |     "  <p><br></p>\n",
  54 |     "  <p><br></p>\n",
  55 |     "</div>"
  56 |    ]
  57 |   },
  58 |   {
  59 |    "cell_type": "markdown",
  60 |    "metadata": {},
  61 |    "source": [
  62 |     "## 1. Hyperparameter Tuning and Model Selection"
  63 |    ]
  64 |   },
  65 |   {
  66 |    "cell_type": "markdown",
  67 |    "metadata": {},
  68 |    "source": [
  69 |     "### 1.1  [10 pts] Using Grid Search for Hyperparameter Tuning"
  70 |    ]
  71 |   },
  72 |   {
  73 |    "cell_type": "markdown",
  74 |    "metadata": {},
  75 |    "source": [
  76 |     "In this exercise, you will be working with the Breast Cancer Wisconsin dataset,\n",
  77 |     "which contains 569 samples of malignant and benign tumor cells. \n",
  78 |     "\n",
  79 |     "The first two columns in the dataset store the unique ID numbers of the samples and the corresponding diagnoses (M = malignant, B = benign), respectively. Columns 3-32 contain 30 real-valued features that have been computed from digitized images of the cell nuclei, which can be used to build a model to predict whether a tumor is benign or malignant. The Breast Cancer Wisconsin dataset has been deposited in the UCI Machine Learning Repository, and more detailed information about this dataset can be found at https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wi sconsin+(Diagnostic).\n",
  80 |     "\n",
  81 |     "The next cell loads the datasets and converts the class label M (malignant) to a integer 1 and the label B (benign) to class label 0."
  82 |    ]
  83 |   },
  84 |   {
  85 |    "cell_type": "code",
  86 |    "execution_count": null,
  87 |    "metadata": {},
  88 |    "outputs": [],
  89 |    "source": [
  90 |     "# EXECUTE BUT DO NOT MODIFY THIS CELL\n",
  91 |     "\n",
  92 |     "import pandas as pd\n",
  93 |     "\n",
  94 |     "\n",
  95 |     "df = pd.read_csv('data/wdbc.data', header=None)\n",
  96 |     "\n",
  97 |     "# convert class label \"M\"->1 and label \"B\"->0\n",
  98 |     "df[1] = df[1].apply(lambda x: 1 if x == 'M' else 0)\n",
  99 |     "\n",
 100 |     "\n",
 101 |     "df.head()"
 102 |    ]
 103 |   },
 104 |   {
 105 |    "cell_type": "code",
 106 |    "execution_count": null,
 107 |    "metadata": {},
 108 |    "outputs": [],
 109 |    "source": [
 110 |     "# EXECUTE BUT DO NOT MODIFY THIS CELL\n",
 111 |     "\n",
 112 |     "\n",
 113 |     "from sklearn.model_selection import train_test_split\n",
 114 |     "\n",
 115 |     "\n",
 116 |     "y = df[1].values\n",
 117 |     "X = df.loc[:, 2:].values\n",
 118 |     "\n",
 119 |     "X_train, X_test, y_train, y_test = \\\n",
 120 |     "    train_test_split(X, y, test_size=0.3, shuffle=True, random_state=0, stratify=y)"
 121 |    ]
 122 |   },
 123 |   {
 124 |    "cell_type": "markdown",
 125 |    "metadata": {},
 126 |    "source": [
 127 |     "Now, your task is to use `GridSearchCV` from scikit-learn to find the best parameter for `n_neighbors` of a `KNearestNeighborClassifier`\n",
 128 |     "\n",
 129 |     "As hyperparameter values, you only need to consider the number of `n_neighbors` within the range 1-16 (including 16)."
 130 |    ]
 131 |   },
 132 |   {
 133 |    "cell_type": "code",
 134 |    "execution_count": null,
 135 |    "metadata": {},
 136 |    "outputs": [],
 137 |    "source": [
 138 |     "# MODIFY THIS CELL\n",
 139 |     "\n",
 140 |     "from sklearn.pipeline import make_pipeline\n",
 141 |     "from sklearn.preprocessing import StandardScaler\n",
 142 |     "from sklearn.neighbors import KNeighborsClassifier\n",
 143 |     "from sklearn.model_selection import GridSearchCV\n",
 144 |     "\n",
 145 |     "\n",
 146 |     "pipe = make_pipeline(# YOUR CODE HERE\n",
 147 |     "                     # YOUR CODE HERE\n",
 148 |     ")\n",
 149 |     "\n",
 150 |     "param_grid = [{ # YOUR CODE HERE  }]\n",
 151 |     "\n",
 152 |     "\n",
 153 |     "gs = GridSearchCV(# YOUR CODE HERE \n",
 154 |     "                  # YOUR CODE HERE \n",
 155 |     "                  iid=False,\n",
 156 |     "                  n_jobs=-1,\n",
 157 |     "                  refit=True,\n",
 158 |     "                  scoring='accuracy',\n",
 159 |     "                  cv=10)\n",
 160 |     "\n",
 161 |     "gs.fit(X_train, y_train)\n",
 162 |     "\n",
 163 |     "print('Best Accuracy: %.2f%%' % (gs.best_score_*100))"
 164 |    ]
 165 |   },
 166 |   {
 167 |    "cell_type": "markdown",
 168 |    "metadata": {},
 169 |    "source": [
 170 |     "Next, print the best parameters obtained from the `GridSearchCV` run and compute the accuracy a `KNearestNeighborClassifier` would achieve with these settings on the test set (`X_test`, `y_test`)."
 171 |    ]
 172 |   },
 173 |   {
 174 |    "cell_type": "code",
 175 |    "execution_count": null,
 176 |    "metadata": {},
 177 |    "outputs": [],
 178 |    "source": [
 179 |     "# MODIFY THIS CELL\n",
 180 |     "\n",
 181 |     "print('Best Params: %s' % # YOUR CODE HERE)\n",
 182 |     "print('Test Accuracy: %.2f%%' % # YOUR CODE HERE)"
 183 |    ]
 184 |   },
 185 |   {
 186 |    "cell_type": "markdown",
 187 |    "metadata": {},
 188 |    "source": [
 189 |     "<div class=\"paragraph\">\n",
 190 |     "  <p><br></p>\n",
 191 |     "  <p><br></p>\n",
 192 |     "  <p><br></p>\n",
 193 |     "  <p><br></p>\n",
 194 |     "  <p><br></p>\n",
 195 |     "  <p><br></p>\n",
 196 |     "</div>"
 197 |    ]
 198 |   },
 199 |   {
 200 |    "cell_type": "markdown",
 201 |    "metadata": {},
 202 |    "source": [
 203 |     "### 1.2 [10 pts] Estimate the Generalization Performance using the '.632+' Bootstrap"
 204 |    ]
 205 |   },
 206 |   {
 207 |    "cell_type": "markdown",
 208 |    "metadata": {},
 209 |    "source": [
 210 |     "In this exercise, you are asked to compute the accuracy of the model from the previous exercise (1.1) on the test set (`X_test`, `y_test`) using the .632+ Bootstrap method. For this you can use the `bootstrap_point632_score` function implemented in MLxtend for this: \n",
 211 |     "http://rasbt.github.io/mlxtend/user_guide/evaluate/bootstrap_point632_score/"
 212 |    ]
 213 |   },
 214 |   {
 215 |    "cell_type": "markdown",
 216 |    "metadata": {},
 217 |    "source": [
 218 |     "- use 200 bootstrap rounds\n",
 219 |     "- set the random seed to 1\n",
 220 |     "\n",
 221 |     "The accruacy should be the mean accuracy over the 200 bootstrap values that the `bootstrap_point632_score` method returns."
 222 |    ]
 223 |   },
 224 |   {
 225 |    "cell_type": "code",
 226 |    "execution_count": null,
 227 |    "metadata": {},
 228 |    "outputs": [],
 229 |    "source": [
 230 |     "# MODIFY THIS CELL\n",
 231 |     "\n",
 232 |     "from mlxtend.evaluate import bootstrap_point632_score\n",
 233 |     "import numpy as np\n",
 234 |     "\n",
 235 |     "\n",
 236 |     "scores = bootstrap_point632_score(# YOUR CODE HERE)\n",
 237 |     "\n",
 238 |     "acc = # YOUR CODE HERE\n",
 239 |     "print('Accuracy: %.2f%%' % (100*acc))"
 240 |    ]
 241 |   },
 242 |   {
 243 |    "cell_type": "markdown",
 244 |    "metadata": {},
 245 |    "source": [
 246 |     "Next, compute the lower and upper bound on the mean accuracy via a 95% confidence interval. For that, you should use the `scores` you computed in the cell above."
 247 |    ]
 248 |   },
 249 |   {
 250 |    "cell_type": "code",
 251 |    "execution_count": null,
 252 |    "metadata": {},
 253 |    "outputs": [],
 254 |    "source": [
 255 |     "# MODIFY THIS CELL\n",
 256 |     "\n",
 257 |     "lower = # YOUR CODE\n",
 258 |     "upper = # YOUR CODE\n",
 259 |     "\n",
 260 |     "print('95%% Confidence interval: [%.2f, %.2f]' % (100*lower, 100*upper))"
 261 |    ]
 262 |   },
 263 |   {
 264 |    "cell_type": "markdown",
 265 |    "metadata": {},
 266 |    "source": [
 267 |     "<div class=\"paragraph\">\n",
 268 |     "  <p><br></p>\n",
 269 |     "  <p><br></p>\n",
 270 |     "  <p><br></p>\n",
 271 |     "  <p><br></p>\n",
 272 |     "  <p><br></p>\n",
 273 |     "  <p><br></p>\n",
 274 |     "</div>"
 275 |    ]
 276 |   },
 277 |   {
 278 |    "cell_type": "markdown",
 279 |    "metadata": {},
 280 |    "source": [
 281 |     "## 2. Confusion Matrices"
 282 |    ]
 283 |   },
 284 |   {
 285 |    "cell_type": "markdown",
 286 |    "metadata": {},
 287 |    "source": [
 288 |     "### 2.1 [10 pts] Contructing a Binary Confusion Matrix"
 289 |    ]
 290 |   },
 291 |   {
 292 |    "cell_type": "markdown",
 293 |    "metadata": {},
 294 |    "source": [
 295 |     "The task of this execise is to construct a binary confusion matrix based of the following form:\n",
 296 |     "\n",
 297 |     "![](images/conf-1.png)\n",
 298 |     "\n",
 299 |     "Here, assume that the positive class is the class with label 0, and the negative class is the class with label 1. You are given an array of the actual class labels, `y_true`, as well as an array of the predicted class labels,  `y_predicted`. The output should be a numpy array, like shown below\n",
 300 |     "\n",
 301 |     "```\n",
 302 |     "array([[101, 21],\n",
 303 |     "       [41, 121]])\n",
 304 |     "``` \n",
 305 |     "  \n",
 306 |     "(Note that these number in the array are not the actual, expected or correct values.)\n",
 307 |     "\n",
 308 |     "Using the `plot_confusion_matrix` from the `helper.py` script (which should be in the same directory as this notebook) the example array/confusion matrix is visualized as follows:"
 309 |    ]
 310 |   },
 311 |   {
 312 |    "cell_type": "code",
 313 |    "execution_count": null,
 314 |    "metadata": {},
 315 |    "outputs": [],
 316 |    "source": [
 317 |     "%matplotlib inline"
 318 |    ]
 319 |   },
 320 |   {
 321 |    "cell_type": "code",
 322 |    "execution_count": null,
 323 |    "metadata": {},
 324 |    "outputs": [],
 325 |    "source": [
 326 |     "# EXECUTE BUT DO NOT MODIFY THIS CELL\n",
 327 |     "\n",
 328 |     "import numpy as np\n",
 329 |     "from helper import plot_confusion_matrix\n",
 330 |     "import matplotlib.pyplot as plt\n",
 331 |     "\n",
 332 |     "\n",
 333 |     "example_cm = np.array([[101, 21],\n",
 334 |     "                       [41, 121]])\n",
 335 |     "\n",
 336 |     "plot_confusion_matrix(example_cm)\n",
 337 |     "plt.show()"
 338 |    ]
 339 |   },
 340 |   {
 341 |    "cell_type": "markdown",
 342 |    "metadata": {},
 343 |    "source": [
 344 |     "Now, your task is to complete the `confusion_matrix_binary` below in order to construct a confusion matrix from 2 label arrays:\n",
 345 |     "\n",
 346 |     "- `y_true` (true or actual class labels)\n",
 347 |     "- `y_predicted` (class labels predicted by a classifier)\n",
 348 |     "\n",
 349 |     "To make it easier for you, you only need to replace the `???`'s with the right variable name (`tp`, `fn`, `fp`, or `tn`)."
 350 |    ]
 351 |   },
 352 |   {
 353 |    "cell_type": "code",
 354 |    "execution_count": null,
 355 |    "metadata": {},
 356 |    "outputs": [],
 357 |    "source": [
 358 |     "# MODIFY THIS CELL\n",
 359 |     "\n",
 360 |     "\n",
 361 |     "y_true =      np.array([1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0])\n",
 362 |     "y_predicted = np.array([1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0])\n",
 363 |     "\n",
 364 |     "\n",
 365 |     "def confusion_matrix_binary(y_true, y_predicted):\n",
 366 |     "\n",
 367 |     "    tp, fn, fp, tn = 0, 0, 0, 0\n",
 368 |     "    \n",
 369 |     "    for i, j in zip(y_true, y_predicted):\n",
 370 |     "        if i == j:\n",
 371 |     "            if i == 0:\n",
 372 |     "                ??? += 1\n",
 373 |     "            else:\n",
 374 |     "                ??? += 1\n",
 375 |     "        else:\n",
 376 |     "            if i == 0:\n",
 377 |     "                ??? += 1\n",
 378 |     "            else:\n",
 379 |     "                ??? += 1\n",
 380 |     "                \n",
 381 |     "    conf_matrix = np.zeros(4).reshape(2, 2).astype(int)\n",
 382 |     "    conf_matrix[0, 0] = ???\n",
 383 |     "    conf_matrix[0, 1] = ???\n",
 384 |     "    conf_matrix[1, 0] = ???\n",
 385 |     "    conf_matrix[1, 1] = ???    \n",
 386 |     "    \n",
 387 |     "    return conf_matrix\n",
 388 |     "\n",
 389 |     "result_matrix = confusion_matrix_binary(y_true, y_predicted)"
 390 |    ]
 391 |   },
 392 |   {
 393 |    "cell_type": "code",
 394 |    "execution_count": null,
 395 |    "metadata": {},
 396 |    "outputs": [],
 397 |    "source": [
 398 |     "# EXECUTE BUT DO NOT MODIFY THIS CELL\n",
 399 |     "\n",
 400 |     "print('Conusion matrix array:\\n', result_matrix)"
 401 |    ]
 402 |   },
 403 |   {
 404 |    "cell_type": "code",
 405 |    "execution_count": null,
 406 |    "metadata": {},
 407 |    "outputs": [],
 408 |    "source": [
 409 |     "# EXECUTE BUT DO NOT MODIFY THIS CELL\n",
 410 |     "\n",
 411 |     "plot_confusion_matrix(result_matrix)\n",
 412 |     "plt.show()"
 413 |    ]
 414 |   },
 415 |   {
 416 |    "cell_type": "markdown",
 417 |    "metadata": {},
 418 |    "source": [
 419 |     "<div class=\"paragraph\">\n",
 420 |     "  <p><br></p>\n",
 421 |     "  <p><br></p>\n",
 422 |     "  <p><br></p>\n",
 423 |     "  <p><br></p>\n",
 424 |     "  <p><br></p>\n",
 425 |     "  <p><br></p>\n",
 426 |     "</div>"
 427 |    ]
 428 |   },
 429 |   {
 430 |    "cell_type": "markdown",
 431 |    "metadata": {},
 432 |    "source": [
 433 |     "### 2.2 [10 pts] Constructing a Multiclass Confusion Matrix"
 434 |    ]
 435 |   },
 436 |   {
 437 |    "cell_type": "markdown",
 438 |    "metadata": {},
 439 |    "source": [
 440 |     "Next, write a version of this confusion matrix that generalizes to multi-class settings as shown in the figure below:\n",
 441 |     "\n",
 442 |     " \n",
 443 |     "![](images/conf-2.png)\n",
 444 |     "\n",
 445 |     "\n",
 446 |     "Again, the output should be a 2D NumPy array:\n",
 447 |     "\n",
 448 |     "```\n",
 449 |     "array([[3, 0, 0],\n",
 450 |     "       [7, 50, 12],\n",
 451 |     "       [0,  0, 18]])\n",
 452 |     "```\n",
 453 |     " \n",
 454 |     "(Note that these number in the array are not the actual, expected or correct values for this exercise.)\n",
 455 |     "\n",
 456 |     "\n",
 457 |     "There are many different ways to implement a function to construct a multi-class confusion matrix, and in this exercise, you are given the freedom to implement it however way you prefer. Please note though that you should not import confusion matrix code from other packages but implement it by your self in Python (and NumPy)."
 458 |    ]
 459 |   },
 460 |   {
 461 |    "cell_type": "markdown",
 462 |    "metadata": {},
 463 |    "source": [
 464 |     "Note that if there are 5 different class labels (0, ..., 4), then the result should be a 5x5 confusion matrix."
 465 |    ]
 466 |   },
 467 |   {
 468 |    "cell_type": "code",
 469 |    "execution_count": null,
 470 |    "metadata": {},
 471 |    "outputs": [],
 472 |    "source": [
 473 |     "## FOR STUDENTS\n",
 474 |     "\n",
 475 |     "\n",
 476 |     "import numpy as np\n",
 477 |     "\n",
 478 |     "\n",
 479 |     "def confusion_matrix_multiclass(y_true, y_predicted):\n",
 480 |     "\n",
 481 |     "    # YOUR CODE (As many lines of code as you like)\n",
 482 |     "    \n",
 483 |     "    return matrix\n",
 484 |     "\n",
 485 |     "\n",
 486 |     "y_true =      [1, 1, 1, 1, 0, 2, 0, 3, 4, 2, 1, 2, 2, 1, 2, 1, 0, 1, 1, 0]\n",
 487 |     "y_predicted = [1, 0, 1, 1, 0, 2, 1, 3, 4, 2, 2, 0, 2, 1, 2, 1, 0, 3, 1, 1]\n",
 488 |     "\n",
 489 |     "result_matrix = confusion_matrix_multiclass(y_true, y_predicted)\n",
 490 |     "result_matrix"
 491 |    ]
 492 |   },
 493 |   {
 494 |    "cell_type": "code",
 495 |    "execution_count": null,
 496 |    "metadata": {},
 497 |    "outputs": [],
 498 |    "source": [
 499 |     "# EXECUTE BUT DO NOT MODIFY THIS CELL\n",
 500 |     "\n",
 501 |     "from helper import plot_confusion_matrix\n",
 502 |     "\n",
 503 |     "\n",
 504 |     "plot_confusion_matrix(result_matrix)\n",
 505 |     "plt.show()"
 506 |    ]
 507 |   },
 508 |   {
 509 |    "cell_type": "markdown",
 510 |    "metadata": {},
 511 |    "source": [
 512 |     "<div class=\"paragraph\">\n",
 513 |     "  <p><br></p>\n",
 514 |     "  <p><br></p>\n",
 515 |     "  <p><br></p>\n",
 516 |     "  <p><br></p>\n",
 517 |     "  <p><br></p>\n",
 518 |     "  <p><br></p>\n",
 519 |     "</div>"
 520 |    ]
 521 |   },
 522 |   {
 523 |    "cell_type": "markdown",
 524 |    "metadata": {},
 525 |    "source": [
 526 |     "### 2.3 [10 pts] Binary Confusion Matrices for Multiclass Problems"
 527 |    ]
 528 |   },
 529 |   {
 530 |    "cell_type": "markdown",
 531 |    "metadata": {},
 532 |    "source": [
 533 |     "In this exercise, you will be building binary confusion matrices for multiclass problems as discussed in class when we talked about computing the balanced accuracy. Here, you can reuse the `confusion_matrix_binary` function you implemented in 2.1. \n",
 534 |     "\n",
 535 |     "Remember, if we are given 5 class labels (0, ..., 4) then we can construct 5 binary confusion matrices, where each time one of the 5 classes is assigned the positive class where all other classes will be considered as the negative class. The `positive_label` argument in the `binary_cm_from_multiclass` function below can be used to determine which class label refers to the positive class.\n",
 536 |     "\n",
 537 |     "Implementing the function below is actually very easy and should only require you to add 2 lines of code with the help of the `np.where` function. "
 538 |    ]
 539 |   },
 540 |   {
 541 |    "cell_type": "code",
 542 |    "execution_count": null,
 543 |    "metadata": {},
 544 |    "outputs": [],
 545 |    "source": [
 546 |     "# MODIFY THIS CELL\n",
 547 |     "\n",
 548 |     "def binary_cm_from_multiclass(y_true, y_predicted, positive_label):\n",
 549 |     "    \n",
 550 |     "    y_true_ary = np.array(y_true)\n",
 551 |     "    y_predicted_ary = np.array(y_predicted)\n",
 552 |     "    \n",
 553 |     "    y_true_mod = np.where( # YOUR CODE\n",
 554 |     "    y_predicted_mod = np.where( # YOUR CODE\n",
 555 |     "    \n",
 556 |     "    cm = confusion_matrix_binary(y_true_mod, y_predicted_mod)\n",
 557 |     "    return cm"
 558 |    ]
 559 |   },
 560 |   {
 561 |    "cell_type": "markdown",
 562 |    "metadata": {},
 563 |    "source": [
 564 |     "As a hint, the expected output for label 0 as positive label is shown below:"
 565 |    ]
 566 |   },
 567 |   {
 568 |    "cell_type": "markdown",
 569 |    "metadata": {},
 570 |    "source": [
 571 |     "![](images/hint-1.png)"
 572 |    ]
 573 |   },
 574 |   {
 575 |    "cell_type": "code",
 576 |    "execution_count": null,
 577 |    "metadata": {},
 578 |    "outputs": [],
 579 |    "source": [
 580 |     "# EXECUTE BUT DO NOT MODIFY THIS CELL\n",
 581 |     "\n",
 582 |     "\n",
 583 |     "y_true =      [1, 1, 1, 1, 0, 2, 0, 3, 4, 2, 1, 2, 2, 1, 2, 1, 0, 1, 1, 0]\n",
 584 |     "y_predicted = [1, 0, 1, 1, 0, 2, 1, 3, 4, 2, 2, 0, 2, 1, 2, 1, 0, 3, 1, 1]\n",
 585 |     "\n",
 586 |     "\n",
 587 |     "mat_pos0 = binary_cm_from_multiclass(y_true, y_predicted, positive_label=0)\n",
 588 |     "print('Positive Label 0:\\n', mat_pos0)\n",
 589 |     "\n",
 590 |     "fig, ax = plot_confusion_matrix(mat_pos0)\n",
 591 |     "ax.set_xticklabels(['', 'Pos Class (0)', 'Neg Class (Rest)'])\n",
 592 |     "ax.set_yticklabels(['', 'Pos Class (0)', 'Neg Class (Rest)']);"
 593 |    ]
 594 |   },
 595 |   {
 596 |    "cell_type": "code",
 597 |    "execution_count": null,
 598 |    "metadata": {},
 599 |    "outputs": [],
 600 |    "source": [
 601 |     "# EXECUTE BUT DO NOT MODIFY THIS CELL\n",
 602 |     "\n",
 603 |     "mat_pos1 = binary_cm_from_multiclass(y_true, y_predicted, positive_label=1)\n",
 604 |     "print('\\n\\nPositive Label 1:\\n', mat_pos1)\n",
 605 |     "\n",
 606 |     "fig, ax = plot_confusion_matrix(mat_pos1)\n",
 607 |     "ax.set_xticklabels(['', 'Pos Class (1)', 'Neg Class (Rest)'])\n",
 608 |     "ax.set_yticklabels(['', 'Pos Class (1)', 'Neg Class (Rest)']);\n",
 609 |     "\n",
 610 |     "plt.show()"
 611 |    ]
 612 |   },
 613 |   {
 614 |    "cell_type": "markdown",
 615 |    "metadata": {},
 616 |    "source": [
 617 |     "<div class=\"paragraph\">\n",
 618 |     "  <p><br></p>\n",
 619 |     "  <p><br></p>\n",
 620 |     "  <p><br></p>\n",
 621 |     "  <p><br></p>\n",
 622 |     "  <p><br></p>\n",
 623 |     "  <p><br></p>\n",
 624 |     "</div>"
 625 |    ]
 626 |   },
 627 |   {
 628 |    "cell_type": "markdown",
 629 |    "metadata": {},
 630 |    "source": [
 631 |     "## 3. [10 pts] Balanced Accuracy"
 632 |    ]
 633 |   },
 634 |   {
 635 |    "cell_type": "markdown",
 636 |    "metadata": {},
 637 |    "source": [
 638 |     "Based on our discussion in class, implement a function that computes the balanced accuracy. You can implement the accuracy whatever way you like using Python and NumPy. Note that you can also re-use the binary confusion matrix code and the `binary_cm_from_multiclass` code if you like (but you don't have to).\n",
 639 |     "\n",
 640 |     "Below is a template that you can use that does not require code from the previous exercises (but you can write the function in a different way if you like as long as it gives the correct results)."
 641 |    ]
 642 |   },
 643 |   {
 644 |    "cell_type": "code",
 645 |    "execution_count": null,
 646 |    "metadata": {},
 647 |    "outputs": [],
 648 |    "source": [
 649 |     "# MODIFY THIS CELL\n",
 650 |     "\n",
 651 |     "import numpy as np\n",
 652 |     "\n",
 653 |     "\n",
 654 |     "def balanced_accuracy(y_true, y_predicted):\n",
 655 |     "    \n",
 656 |     "    y_true_ary = np.array(y_true)\n",
 657 |     "    y_predicted_ary = np.array(y_predicted)\n",
 658 |     "    \n",
 659 |     "    unique_labels = np.unique(np.concatenate((y_true_ary, y_predicted_ary)))\n",
 660 |     "    class_accuracies = []\n",
 661 |     "    for l in unique_labels:\n",
 662 |     "        # YOUR CODE HERE\n",
 663 |     "        # YOUR CODE HERE\n",
 664 |     "        # YOUR CODE HERE\n",
 665 |     "        class_accuracies.append(acc)\n",
 666 |     "    return np.mean(class_accuracies)"
 667 |    ]
 668 |   },
 669 |   {
 670 |    "cell_type": "code",
 671 |    "execution_count": null,
 672 |    "metadata": {},
 673 |    "outputs": [],
 674 |    "source": [
 675 |     "# EXECUTE BUT DO NOT MODIFY THIS CELL\n",
 676 |     "\n",
 677 |     "y_targ = [1, 1, 2, 1, 1, 2, 0, 3]\n",
 678 |     "y_pred = [0, 0, 2, 1, 1, 2, 1, 3]\n",
 679 |     "    \n",
 680 |     "balanced_accuracy(y_targ, y_pred)"
 681 |    ]
 682 |   },
 683 |   {
 684 |    "cell_type": "markdown",
 685 |    "metadata": {},
 686 |    "source": [
 687 |     "<div class=\"paragraph\">\n",
 688 |     "  <p><br></p>\n",
 689 |     "  <p><br></p>\n",
 690 |     "  <p><br></p>\n",
 691 |     "  <p><br></p>\n",
 692 |     "  <p><br></p>\n",
 693 |     "  <p><br></p>\n",
 694 |     "</div>"
 695 |    ]
 696 |   },
 697 |   {
 698 |    "cell_type": "markdown",
 699 |    "metadata": {},
 700 |    "source": [
 701 |     "## 4. Receiver Operater Characteristic (ROC)"
 702 |    ]
 703 |   },
 704 |   {
 705 |    "cell_type": "markdown",
 706 |    "metadata": {},
 707 |    "source": [
 708 |     "### 4.1 [10 pts]  Plotting a ROC Curve"
 709 |    ]
 710 |   },
 711 |   {
 712 |    "cell_type": "markdown",
 713 |    "metadata": {},
 714 |    "source": [
 715 |     "In this exercise, you are asked to plot a ROC curve. You are given a 2D array of probability values (`y_probabilities`; see next code cells) where \n",
 716 |     "- a value in the first column refer to the probability that a given test example (each row is one test example) belongs to class 0\n",
 717 |     "- a value in the second column refer to the probability that a given test example belongs to class 1"
 718 |    ]
 719 |   },
 720 |   {
 721 |    "cell_type": "code",
 722 |    "execution_count": null,
 723 |    "metadata": {},
 724 |    "outputs": [],
 725 |    "source": [
 726 |     "# EXECUTE BUT DO NOT MODIFY THIS CELL\n",
 727 |     "\n",
 728 |     "\n",
 729 |     "from mlxtend.data import iris_data\n",
 730 |     "from sklearn.model_selection import train_test_split\n",
 731 |     "from sklearn.linear_model import LogisticRegression\n",
 732 |     "\n",
 733 |     "\n",
 734 |     "X, y = iris_data()\n",
 735 |     "X, y = X[:100, [1]], y[:100]\n",
 736 |     "X_train, X_test, y_train, y_test = \\\n",
 737 |     "    train_test_split(X, y, test_size=0.5, shuffle=True, random_state=0, stratify=y)\n",
 738 |     "\n",
 739 |     "model = LogisticRegression(solver='lbfgs', random_state=123)\n",
 740 |     "model.fit(X_train, y_train)\n",
 741 |     "\n",
 742 |     "y_probabilities = model.predict_proba(X_test)\n",
 743 |     "\n",
 744 |     "print(y_probabilities)"
 745 |    ]
 746 |   },
 747 |   {
 748 |    "cell_type": "markdown",
 749 |    "metadata": {},
 750 |    "source": [
 751 |     "For this exercise, these scores are probabilities here, but scores can be obtained from an arbitrary classifier (ROC curves are not limited to logistic regression classifiers). For instance, in k-nearest neighbor classifiers, we can consider the fraction of the majority class labels and number of neighbors as the score. In decision tree classifiers, the score can be calculated as the ratio of the majority class labels and number of data points at a given node.\n",
 752 |     "\n",
 753 |     "(In case you are curious, 'lbfgs' stands for Limited-memory BFGS, which is an optimization algorithm in the family of quasi-Newton methods that approximates the Broyden–Fletcher–Goldfarb–Shanno; not important to know here though.) "
 754 |    ]
 755 |   },
 756 |   {
 757 |    "cell_type": "markdown",
 758 |    "metadata": {},
 759 |    "source": [
 760 |     "**Note: You should only use Python base functions, NumPy, and matplotlib to get full points (do not use other external libraries)**"
 761 |    ]
 762 |   },
 763 |   {
 764 |    "cell_type": "markdown",
 765 |    "metadata": {},
 766 |    "source": [
 767 |     "The `pos_label` argument is used to specify the positive label and the threshold. For instance, if we are given score\n",
 768 |     "0.8, this score refers to the \"probability\" of the positive label. Assuming that the positive label is 1, this refers to a 80% probability that the true class label is 1. \n",
 769 |     "\n",
 770 |     "- Note that in the `y_probabilities` array, the second column refers to the probabilities of class label 1.\n",
 771 |     "- The `plot_roc_curve` function should only receive a 1D array for `y_score`. E.g., \n",
 772 |     "\n",
 773 |     "if `y_probabilities` is \n",
 774 |     "\n",
 775 |     "```\n",
 776 |     "[[0.44001556 0.55998444]\n",
 777 |     " [0.69026364 0.30973636]\n",
 778 |     " [0.31814182 0.68185818]\n",
 779 |     " [0.56957726 0.43042274]\n",
 780 |     " [0.86339788 0.13660212]\n",
 781 |     " [0.56957726 0.43042274]\n",
 782 |     " [0.86339788 0.13660212]\n",
 783 |     " [0.44001556 0.55998444]\n",
 784 |     " [0.08899234 0.91100766]\n",
 785 |     " [0.50487831 0.49512169]\n",
 786 |     " [0.74306586 0.25693414]\n",
 787 |     "```\n",
 788 |     " \n",
 789 |     "The `y_score` array is expected to be \n",
 790 |     "\n",
 791 |     "a) `y_score = [0.5599..., 0.3097..., 0.6818..., 0.4304..., ...]` for `pos_label=1`\n",
 792 |     "\n",
 793 |     "and \n",
 794 |     "\n",
 795 |     "b) `y_score = [0.4400..., 0.6902..., 0.3181..., 0.5695..., ...]` for `pos_label=0`"
 796 |    ]
 797 |   },
 798 |   {
 799 |    "cell_type": "code",
 800 |    "execution_count": null,
 801 |    "metadata": {},
 802 |    "outputs": [],
 803 |    "source": [
 804 |     "# MODIFY THIS CELL\n",
 805 |     "\n",
 806 |     "\n",
 807 |     "import matplotlib.pyplot as plt\n",
 808 |     "import numpy as np\n",
 809 |     "\n",
 810 |     "\n",
 811 |     "def plot_roc_curve(y_true, y_score, pos_label=1, num_thresholds=100):\n",
 812 |     "\n",
 813 |     "    y_true_ary = np.array(y_true)\n",
 814 |     "    y_score_ary = np.array(y_score)\n",
 815 |     "    x_axis_values = []\n",
 816 |     "    y_axis_values = []\n",
 817 |     "    thresholds = np.linspace(0., 1., num_thresholds)\n",
 818 |     "\n",
 819 |     "    num_positives = # YOUR CODE\n",
 820 |     "    num_negatives = # YOUR CODE\n",
 821 |     "\n",
 822 |     "    for i, thr in enumerate(thresholds):\n",
 823 |     "        \n",
 824 |     "        binarized_scores = np.where(y_score >= thr, pos_label, int(not pos_label))\n",
 825 |     "        \n",
 826 |     "        positive_predictions = # YOUR CODE\n",
 827 |     "        num_true_positives = # YOUR CODE\n",
 828 |     "        num_false_positives = # YOUR CODE\n",
 829 |     "        \n",
 830 |     "        x_axis_values.append(# YOUR CODE)\n",
 831 |     "        y_axis_values.append(# YOUR CODE)\n",
 832 |     "\n",
 833 |     "    plt.step(x_axis_values, y_axis_values, where='post')\n",
 834 |     "    \n",
 835 |     "    plt.xlim([0., 1.01])\n",
 836 |     "    plt.ylim([0., 1.01])\n",
 837 |     "    plt.ylabel('True Positive Rate')\n",
 838 |     "    plt.xlabel('False Positive Rate')\n",
 839 |     "    \n",
 840 |     "    return None"
 841 |    ]
 842 |   },
 843 |   {
 844 |    "cell_type": "code",
 845 |    "execution_count": null,
 846 |    "metadata": {
 847 |     "scrolled": true
 848 |    },
 849 |    "outputs": [],
 850 |    "source": [
 851 |     "# EXECUTE BUT DO NOT MODIFY THIS CELL\n",
 852 |     "\n",
 853 |     "plot_roc_curve(y_test, y_probabilities[:, 1], pos_label=1)\n",
 854 |     "plt.show()"
 855 |    ]
 856 |   },
 857 |   {
 858 |    "cell_type": "code",
 859 |    "execution_count": null,
 860 |    "metadata": {},
 861 |    "outputs": [],
 862 |    "source": [
 863 |     "# EXECUTE BUT DO NOT MODIFY THIS CELL\n",
 864 |     "\n",
 865 |     "plot_roc_curve(y_test, y_probabilities[:, 0], pos_label=0)\n",
 866 |     "plt.show()"
 867 |    ]
 868 |   },
 869 |   {
 870 |    "cell_type": "markdown",
 871 |    "metadata": {},
 872 |    "source": [
 873 |     "<div class=\"paragraph\">\n",
 874 |     "  <p><br></p>\n",
 875 |     "  <p><br></p>\n",
 876 |     "  <p><br></p>\n",
 877 |     "  <p><br></p>\n",
 878 |     "  <p><br></p>\n",
 879 |     "  <p><br></p>\n",
 880 |     "</div>"
 881 |    ]
 882 |   },
 883 |   {
 884 |    "cell_type": "markdown",
 885 |    "metadata": {},
 886 |    "source": [
 887 |     "### 4.2 [10 pts] Calculating the ROC AUC"
 888 |    ]
 889 |   },
 890 |   {
 891 |    "cell_type": "markdown",
 892 |    "metadata": {},
 893 |    "source": [
 894 |     "In this exercise, you are asked to modify your previous `plot_roc_curve` function to compute the ROC area under the curve (ROC AUC). To compute the ROC AUC, you can use NumPy's `trapz` function for your convenience (https://docs.scipy.org/doc/numpy-1.15.1/reference/generated/numpy.trapz.html).\n",
 895 |     "\n",
 896 |     "- As before, you should only use basic Python functions, NumPy, and matplotlib to get full points for this exercise (do not use other external libraries)"
 897 |    ]
 898 |   },
 899 |   {
 900 |    "cell_type": "code",
 901 |    "execution_count": null,
 902 |    "metadata": {},
 903 |    "outputs": [],
 904 |    "source": [
 905 |     "# MODIFY THIS CELL\n",
 906 |     "\n",
 907 |     "\n",
 908 |     "def plot_roc_curve_plus_auc(y_true, y_score, pos_label=1, num_thresholds=100):\n",
 909 |     "\n",
 910 |     "    # INSERT YOUR CODE FROM THE PREVIOUS EXERCISE HERE\n",
 911 |     "    # BUT MODIFY IT SUCH THAT IT ALSO RETURNS THE\n",
 912 |     "    # ROC Area Under the Curve\n",
 913 |     "    return roc_auc"
 914 |    ]
 915 |   },
 916 |   {
 917 |    "cell_type": "markdown",
 918 |    "metadata": {},
 919 |    "source": [
 920 |     "1) Calculate the ROC AUC for the positive class label 0"
 921 |    ]
 922 |   },
 923 |   {
 924 |    "cell_type": "code",
 925 |    "execution_count": null,
 926 |    "metadata": {},
 927 |    "outputs": [],
 928 |    "source": [
 929 |     "# DON'T MODIFY BUT EXECUTE THIS CELL TO SHOW YOUR SOLUTION\n",
 930 |     "\n",
 931 |     "auc = plot_roc_curve_plus_auc(y_test, y_probabilities[:, 0], pos_label=0)\n",
 932 |     "print('ROC AUC: %.4f' % auc)"
 933 |    ]
 934 |   },
 935 |   {
 936 |    "cell_type": "markdown",
 937 |    "metadata": {},
 938 |    "source": [
 939 |     "2) Calculate the ROC AUC for the positive class label 1"
 940 |    ]
 941 |   },
 942 |   {
 943 |    "cell_type": "code",
 944 |    "execution_count": null,
 945 |    "metadata": {},
 946 |    "outputs": [],
 947 |    "source": [
 948 |     "# DON'T MODIFY BUT EXECUTE THIS CELL TO SHOW YOUR SOLUTION\n",
 949 |     "\n",
 950 |     "auc = plot_roc_curve_plus_auc(y_test, y_probabilities[:, 1], pos_label=1)\n",
 951 |     "print('ROC AUC: %.4f' % auc)"
 952 |    ]
 953 |   },
 954 |   {
 955 |    "cell_type": "markdown",
 956 |    "metadata": {},
 957 |    "source": [
 958 |     "<div class=\"paragraph\">\n",
 959 |     "  <p><br></p>\n",
 960 |     "  <p><br></p>\n",
 961 |     "  <p><br></p>\n",
 962 |     "  <p><br></p>\n",
 963 |     "  <p><br></p>\n",
 964 |     "  <p><br></p>\n",
 965 |     "</div>"
 966 |    ]
 967 |   },
 968 |   {
 969 |    "cell_type": "markdown",
 970 |    "metadata": {},
 971 |    "source": [
 972 |     "## 5. Feature Importance"
 973 |    ]
 974 |   },
 975 |   {
 976 |    "cell_type": "markdown",
 977 |    "metadata": {},
 978 |    "source": [
 979 |     "### [10 pts] 5.1 Drop-Column Feature Importance"
 980 |    ]
 981 |   },
 982 |   {
 983 |    "cell_type": "markdown",
 984 |    "metadata": {},
 985 |    "source": [
 986 |     "In this exercise, you are asked to implement the \"drop-column feature importance\" method discussed in class, to measure the importance of individual features present in a dataset.\n",
 987 |     "\n",
 988 |     "\n",
 989 |     "- You will be using regular accuracy measure as performance metric\n",
 990 |     "- Use 5 fold cross-validation to compute the accuracies\n",
 991 |     "\n",
 992 |     "The dataset you will be using for this exercise is the so-called \"Wine\" dataset. \n",
 993 |     "\n",
 994 |     "The Wine dataset is another open-source dataset that is available from the UCI machine learning repository (https://archive.ics.uci.edu/ml/datasets/Wine); it consists of 178 wine samples with 13 features describing their different chemical properties.\n",
 995 |     "\n",
 996 |     "The 13 different features in the Wine dataset, describing the chemical properties of the 178 wine samples, are listed in the following table that you will see after executing the next code cell.\n"
 997 |    ]
 998 |   },
 999 |   {
1000 |    "cell_type": "code",
1001 |    "execution_count": null,
1002 |    "metadata": {},
1003 |    "outputs": [],
1004 |    "source": [
1005 |     "# EXECUTE BUT DO NOT MODIFY THIS CELL\n",
1006 |     "\n",
1007 |     "\n",
1008 |     "import pandas as pd\n",
1009 |     "\n",
1010 |     "df_wine = pd.read_csv('data/wine.data',\n",
1011 |     "                      header=None)\n",
1012 |     "\n",
1013 |     "df_wine.columns = ['Class label', 'Alcohol', 'Malic acid', 'Ash',\n",
1014 |     "                   'Alcalinity of ash', 'Magnesium', 'Total phenols',\n",
1015 |     "                   'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins',\n",
1016 |     "                   'Color intensity', 'Hue',\n",
1017 |     "                   'OD280/OD315 of diluted wines', 'Proline']\n",
1018 |     "\n",
1019 |     "df_wine.head()"
1020 |    ]
1021 |   },
1022 |   {
1023 |    "cell_type": "markdown",
1024 |    "metadata": {},
1025 |    "source": [
1026 |     "The samples belong to one of three different classes, 1, 2, and 3, which refer to the three different types of grape grown in the same region in Italy but derived from different wine cultivars, as described in the dataset summary (https://archive. ics.uci.edu/ml/machine-learning-databases/wine/wine.names)."
1027 |    ]
1028 |   },
1029 |   {
1030 |    "cell_type": "code",
1031 |    "execution_count": null,
1032 |    "metadata": {},
1033 |    "outputs": [],
1034 |    "source": [
1035 |     "# EXECUTE BUT DO NOT MODIFY THIS CELL\n",
1036 |     "\n",
1037 |     "\n",
1038 |     "from sklearn.model_selection import train_test_split\n",
1039 |     "\n",
1040 |     "X, y = df_wine.iloc[:, 1:].values, df_wine.iloc[:, 0].values\n",
1041 |     "\n",
1042 |     "X_train, X_test, y_train, y_test = \\\n",
1043 |     "    train_test_split(X, y, test_size=0.3, \n",
1044 |     "                     stratify=y,\n",
1045 |     "                     random_state=0)"
1046 |    ]
1047 |   },
1048 |   {
1049 |    "cell_type": "markdown",
1050 |    "metadata": {},
1051 |    "source": [
1052 |     "Now the task is to implement the `feature_importance_dropcolumn` function to compute the feature importance according the Drop-Column method discussed in class. Here, use the `cross_val_score` function from scikit-learn to compute the acccuracy as the average accuracy from 5-fold cross-validation."
1053 |    ]
1054 |   },
1055 |   {
1056 |    "cell_type": "code",
1057 |    "execution_count": null,
1058 |    "metadata": {},
1059 |    "outputs": [],
1060 |    "source": [
1061 |     "# MODIFY THIS CELL\n",
1062 |     "\n",
1063 |     "\n",
1064 |     "import numpy as np\n",
1065 |     "from sklearn.model_selection import cross_val_score\n",
1066 |     "\n",
1067 |     "\n",
1068 |     "def feature_importance_dropcolumn(estimator, X, y, cv=5):\n",
1069 |     "\n",
1070 |     "    base_accuracy = # YOUR CODE\n",
1071 |     "    column_indices = np.arange(X.shape[1]).astype(int)\n",
1072 |     "    drop_accuracies = np.zeros(column_indices.shape[0])\n",
1073 |     "    \n",
1074 |     "    for idx in column_indices:\n",
1075 |     "        mask = np.ones(column_indices.shape[0]).astype(bool)\n",
1076 |     "        mask[idx] = False\n",
1077 |     "        drop_accuracy = # YOUR CODE\n",
1078 |     "        drop_accuracies[idx] = # YOUR CODE\n",
1079 |     "        \n",
1080 |     "    return drop_accuracies"
1081 |    ]
1082 |   },
1083 |   {
1084 |    "cell_type": "markdown",
1085 |    "metadata": {},
1086 |    "source": [
1087 |     "Next, apply the `feature_importance_dropcolumn` function to the Wine training dataset (`X_train`, `y_train`) on a `KNeighborsClassifier` (you should use the `make_pipeline` function to create an estimator where the features are scaled to z-scores via the `StandardScaler`, since `KNeighborsClassifier` is very sensitive to feature scales).\n",
1088 |     "\n",
1089 |     "- You should use a `KNeighborsClassifier` with 5 nearest neighbors."
1090 |    ]
1091 |   },
1092 |   {
1093 |    "cell_type": "code",
1094 |    "execution_count": null,
1095 |    "metadata": {},
1096 |    "outputs": [],
1097 |    "source": [
1098 |     "# MODIFY THIS CELL\n",
1099 |     "\n",
1100 |     "from sklearn.pipeline import make_pipeline\n",
1101 |     "from sklearn.preprocessing import StandardScaler\n",
1102 |     "from sklearn.neighbors import KNeighborsClassifier\n",
1103 |     "\n",
1104 |     "\n",
1105 |     "\n",
1106 |     "pipe = make_pipeline(\n",
1107 |     "    # YOUR CODE\n",
1108 |     "    # YOUE CODE\n",
1109 |     ")\n",
1110 |     "\n",
1111 |     "\n",
1112 |     "feature_importance_dropcolumn(# YOUR CODE)"
1113 |    ]
1114 |   },
1115 |   {
1116 |    "cell_type": "markdown",
1117 |    "metadata": {},
1118 |    "source": [
1119 |     "<div class=\"paragraph\">\n",
1120 |     "  <p><br></p>\n",
1121 |     "  <p><br></p>\n",
1122 |     "  <p><br></p>\n",
1123 |     "  <p><br></p>\n",
1124 |     "  <p><br></p>\n",
1125 |     "  <p><br></p>\n",
1126 |     "</div>"
1127 |    ]
1128 |   },
1129 |   {
1130 |    "cell_type": "markdown",
1131 |    "metadata": {},
1132 |    "source": [
1133 |     "### [10 pts] 5.2 Random Forest Feature Importance"
1134 |    ]
1135 |   },
1136 |   {
1137 |    "cell_type": "markdown",
1138 |    "metadata": {},
1139 |    "source": [
1140 |     "First, use a `RandomForestClassifier` in your `feature_importance_dropcolumn` from the previous exercise, 5.1. Use a random forest \n",
1141 |     "\n",
1142 |     "- with 200 estimators and \n",
1143 |     "- random seed 0. "
1144 |    ]
1145 |   },
1146 |   {
1147 |    "cell_type": "code",
1148 |    "execution_count": null,
1149 |    "metadata": {},
1150 |    "outputs": [],
1151 |    "source": [
1152 |     "# MODIFY THIS CELL\n",
1153 |     "\n",
1154 |     "\n",
1155 |     "from sklearn.ensemble import RandomForestClassifier\n",
1156 |     "\n",
1157 |     "\n",
1158 |     "drop_importances = feature_importance_dropcolumn(\n",
1159 |     "                              # YOUR CODE]\n",
1160 |     "                              X=X_train, \n",
1161 |     "                              y=y_train,\n",
1162 |     "                              cv=5)\n",
1163 |     "\n",
1164 |     "\n",
1165 |     "print('Drop Importance from RF:', drop_importances)"
1166 |    ]
1167 |   },
1168 |   {
1169 |    "cell_type": "markdown",
1170 |    "metadata": {},
1171 |    "source": [
1172 |     "Next, compute the ranking among the features as determined by the outputs of the previous code cell, saved under `drop_importances`. You may use `np.argsort` in your computation, to compute the ranking, where the highest number should correspond to the most important feature."
1173 |    ]
1174 |   },
1175 |   {
1176 |    "cell_type": "code",
1177 |    "execution_count": null,
1178 |    "metadata": {},
1179 |    "outputs": [],
1180 |    "source": [
1181 |     "# MODIFY THIS CELL\n",
1182 |     "\n",
1183 |     "\n",
1184 |     "# YOUR CODE"
1185 |    ]
1186 |   },
1187 |   {
1188 |    "cell_type": "markdown",
1189 |    "metadata": {},
1190 |    "source": [
1191 |     "Which are the 3 most important features? You can either write the feature indices below that correspond to the most important features or write out the full column names (you can see the column names in the pandas `DataFrame` in 5.1)."
1192 |    ]
1193 |   },
1194 |   {
1195 |    "cell_type": "markdown",
1196 |    "metadata": {},
1197 |    "source": [
1198 |     "!!! **EDIT THIS CELL TO ENTER YOUR ANSWER** !!!"
1199 |    ]
1200 |   },
1201 |   {
1202 |    "cell_type": "markdown",
1203 |    "metadata": {},
1204 |    "source": [
1205 |     "<div class=\"paragraph\">\n",
1206 |     "  <p><br></p>\n",
1207 |     "  <p><br></p>\n",
1208 |     "  <p><br></p>\n",
1209 |     "  <p><br></p>\n",
1210 |     "  <p><br></p>\n",
1211 |     "  <p><br></p>\n",
1212 |     "</div>"
1213 |    ]
1214 |   },
1215 |   {
1216 |    "cell_type": "markdown",
1217 |    "metadata": {},
1218 |    "source": [
1219 |     "Next, obtain the feature importance from the random forest classifier directly and compute the ranking as before."
1220 |    ]
1221 |   },
1222 |   {
1223 |    "cell_type": "code",
1224 |    "execution_count": null,
1225 |    "metadata": {},
1226 |    "outputs": [],
1227 |    "source": [
1228 |     "# MODIFY THIS CELL\n",
1229 |     "\n",
1230 |     "forest = RandomForestClassifier(n_estimators=100, random_state=0)\n",
1231 |     "forest.fit(X_train, y_train)\n",
1232 |     "\n",
1233 |     "print('Random Forest Feature Importance:\\n', # YOUR CODE)"
1234 |    ]
1235 |   },
1236 |   {
1237 |    "cell_type": "code",
1238 |    "execution_count": null,
1239 |    "metadata": {},
1240 |    "outputs": [],
1241 |    "source": [
1242 |     "# MODIFY THIS CELL\n",
1243 |     "\n",
1244 |     "\n",
1245 |     "# YOUR CODE TO RANK THE FEATURES"
1246 |    ]
1247 |   },
1248 |   {
1249 |    "cell_type": "markdown",
1250 |    "metadata": {},
1251 |    "source": [
1252 |     "Which are the 3 most important features now? You can either write the feature indices below that correspond to the most important features or write out the full column names (you can see the column names in the pandas `DataFrame` in 5.1)."
1253 |    ]
1254 |   },
1255 |   {
1256 |    "cell_type": "markdown",
1257 |    "metadata": {},
1258 |    "source": [
1259 |     "!!! **EDIT THIS CELL TO ENTER YOUR ANSWER** !!!"
1260 |    ]
1261 |   },
1262 |   {
1263 |    "cell_type": "markdown",
1264 |    "metadata": {},
1265 |    "source": [
1266 |     "<div class=\"paragraph\">\n",
1267 |     "  <p><br></p>\n",
1268 |     "  <p><br></p>\n",
1269 |     "  <p><br></p>\n",
1270 |     "  <p><br></p>\n",
1271 |     "  <p><br></p>\n",
1272 |     "  <p><br></p>\n",
1273 |     "</div>"
1274 |    ]
1275 |   },
1276 |   {
1277 |    "cell_type": "markdown",
1278 |    "metadata": {},
1279 |    "source": [
1280 |     "Finally, use the `feature_importance_permutation` function from mlxtend (http://rasbt.github.io/mlxtend/user_guide/evaluate/feature_importance_permutation/) to compute the most important features. Inside `the feature_importance_permutation` function,\n",
1281 |     "\n",
1282 |     "- use a random seed of 0\n",
1283 |     "- use 50 permutation rounds\n",
1284 |     "\n",
1285 |     "then print the importance values."
1286 |    ]
1287 |   },
1288 |   {
1289 |    "cell_type": "code",
1290 |    "execution_count": null,
1291 |    "metadata": {},
1292 |    "outputs": [],
1293 |    "source": [
1294 |     "# MODIFY THIS CELL\n",
1295 |     "\n",
1296 |     "\n",
1297 |     "from mlxtend.evaluate import feature_importance_permutation\n",
1298 |     "\n",
1299 |     "\n",
1300 |     "forest = RandomForestClassifier(n_estimators=100,\n",
1301 |     "                                random_state=0)\n",
1302 |     "\n",
1303 |     "forest.fit(X_train, y_train)\n",
1304 |     "\n",
1305 |     "# YOUR CODE"
1306 |    ]
1307 |   },
1308 |   {
1309 |    "cell_type": "code",
1310 |    "execution_count": null,
1311 |    "metadata": {},
1312 |    "outputs": [],
1313 |    "source": [
1314 |     "# MODIFY THIS CELL\n",
1315 |     "\n",
1316 |     "\n",
1317 |     "# YOUR CODE TO RANK THE FEATURES"
1318 |    ]
1319 |   },
1320 |   {
1321 |    "cell_type": "markdown",
1322 |    "metadata": {},
1323 |    "source": [
1324 |     "Which are the 3 most important features now? You can either write the feature indices below that correspond to the most important features or write out the full column names (you can see the column names in the pandas `DataFrame` in 5.1)."
1325 |    ]
1326 |   },
1327 |   {
1328 |    "cell_type": "markdown",
1329 |    "metadata": {},
1330 |    "source": [
1331 |     "!!! **EDIT THIS CELL TO ENTER YOUR ANSWER** !!!"
1332 |    ]
1333 |   },
1334 |   {
1335 |    "cell_type": "markdown",
1336 |    "metadata": {},
1337 |    "source": [
1338 |     "<div class=\"paragraph\">\n",
1339 |     "  <p><br></p>\n",
1340 |     "  <p><br></p>\n",
1341 |     "  <p><br></p>\n",
1342 |     "  <p><br></p>\n",
1343 |     "  <p><br></p>\n",
1344 |     "  <p><br></p>\n",
1345 |     "</div>"
1346 |    ]
1347 |   },
1348 |   {
1349 |    "cell_type": "markdown",
1350 |    "metadata": {},
1351 |    "source": [
1352 |     "### [10 pts] 5.3 Creating your Own Feature Selection Transformer Class"
1353 |    ]
1354 |   },
1355 |   {
1356 |    "cell_type": "markdown",
1357 |    "metadata": {},
1358 |    "source": [
1359 |     "This section will help you understand how you can implement your own feature selection method in a way that is compatible with scikit-learn.\n",
1360 |     "\n",
1361 |     "The following code (`ColumnSelector`) implements a feature selector that works similarly to the feature selctors implemented in scikit-learn. However, this `ColumnSelector` does not do anything automatically."
1362 |    ]
1363 |   },
1364 |   {
1365 |    "cell_type": "code",
1366 |    "execution_count": null,
1367 |    "metadata": {},
1368 |    "outputs": [],
1369 |    "source": [
1370 |     "# EXECUTE BUT DO NOT EDIT THIS CELL\n",
1371 |     "\n",
1372 |     "from sklearn.base import BaseEstimator\n",
1373 |     "import numpy as np\n",
1374 |     "\n",
1375 |     "\n",
1376 |     "class ColumnSelector(BaseEstimator):\n",
1377 |     "\n",
1378 |     "    def __init__(self, cols=None):\n",
1379 |     "        self.cols = cols\n",
1380 |     "\n",
1381 |     "    def fit_transform(self, X, y=None):\n",
1382 |     "        return self.transform(X=X, y=y)\n",
1383 |     "\n",
1384 |     "    def transform(self, X, y=None):\n",
1385 |     "        feature_subset = X[:, self.cols]\n",
1386 |     "        if len(feature_subset.shape) == 1:\n",
1387 |     "            feature_subset = feature_subset[:, np.newaxis]\n",
1388 |     "        return feature_subset\n",
1389 |     "\n",
1390 |     "    def fit(self, X, y=None):\n",
1391 |     "        return self"
1392 |    ]
1393 |   },
1394 |   {
1395 |    "cell_type": "markdown",
1396 |    "metadata": {},
1397 |    "source": [
1398 |     "As the name implies, we `ColumnSelector` selects specific columns that we as the user need to specify. For example, consider the Wine dataset from earlier:"
1399 |    ]
1400 |   },
1401 |   {
1402 |    "cell_type": "code",
1403 |    "execution_count": null,
1404 |    "metadata": {},
1405 |    "outputs": [],
1406 |    "source": [
1407 |     "# EXECUTE BUT DO NOT EDIT THIS CELL\n",
1408 |     "\n",
1409 |     "import pandas as pd\n",
1410 |     "\n",
1411 |     "df_wine = pd.read_csv('data/wine.data',\n",
1412 |     "                      header=None)\n",
1413 |     "\n",
1414 |     "df_wine.columns = ['Class label', 'Alcohol', 'Malic acid', 'Ash',\n",
1415 |     "                   'Alcalinity of ash', 'Magnesium', 'Total phenols',\n",
1416 |     "                   'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins',\n",
1417 |     "                   'Color intensity', 'Hue',\n",
1418 |     "                   'OD280/OD315 of diluted wines', 'Proline']\n",
1419 |     "\n",
1420 |     "df_wine.head()"
1421 |    ]
1422 |   },
1423 |   {
1424 |    "cell_type": "code",
1425 |    "execution_count": null,
1426 |    "metadata": {},
1427 |    "outputs": [],
1428 |    "source": [
1429 |     "# EXECUTE BUT DO NOT EDIT THIS CELL\n",
1430 |     "\n",
1431 |     "from sklearn.model_selection import train_test_split\n",
1432 |     "\n",
1433 |     "X, y = df_wine.iloc[:, 1:].values, df_wine.iloc[:, 0].values\n",
1434 |     "\n",
1435 |     "X_train, X_test, y_train, y_test = \\\n",
1436 |     "    train_test_split(X, y, test_size=0.3, \n",
1437 |     "                     stratify=y,\n",
1438 |     "                     random_state=0)"
1439 |    ]
1440 |   },
1441 |   {
1442 |    "cell_type": "markdown",
1443 |    "metadata": {},
1444 |    "source": [
1445 |     "Via the `ColumnSelector`, we can select select specific columns from the dataset. E.g., to select the 1st, 6th, and 9th column, and 12th column, we can initialize the `ColumnSelector` with the argument `cols=[0, 5, 8, 11]` and use the transform method as shown below:"
1446 |    ]
1447 |   },
1448 |   {
1449 |    "cell_type": "code",
1450 |    "execution_count": null,
1451 |    "metadata": {},
1452 |    "outputs": [],
1453 |    "source": [
1454 |     "# EXECUTE BUT DO NOT EDIT THIS CELL\n",
1455 |     "\n",
1456 |     "col_sele = ColumnSelector(cols=[0, 5, 8, 11])\n",
1457 |     "reduced_subset = col_sele.transform(X_train)\n",
1458 |     "\n",
1459 |     "print('Original feature set size:', X_train.shape)\n",
1460 |     "print('Selected feature set size:', reduced_subset.shape)"
1461 |    ]
1462 |   },
1463 |   {
1464 |    "cell_type": "markdown",
1465 |    "metadata": {},
1466 |    "source": [
1467 |     "Your task now is to use the `feature_importances_` attribute from a fitted random forest model inside a custom feature selector. Using this feature selector, you should be able to select features as follows:\n",
1468 |     "\n",
1469 |     "\n",
1470 |     "```python\n",
1471 |     "\n",
1472 |     "forest = RandomForestClassifier(n_estimators=100, random_state=123)\n",
1473 |     "\n",
1474 |     "selector = ImportanceSelector(num_features=3, random_forest_estimator=forest)\n",
1475 |     "selector.fit(X_train, y_train)\n",
1476 |     "reduced_train_features = selector.transform(X_train, y_train)\n",
1477 |     "```\n",
1478 |     "\n",
1479 |     "- If `num_features=3` as shown above, this means that we are interested to select the top 3 most important features from a dataset based on the random forest feature importance values.\n",
1480 |     "\n",
1481 |     "\n",
1482 |     "- Actually, while it might be more interesting to implement a feature selctor based on the column-drop performance (which would then be somewhat related to sequential feature selection), we use the feature importance values from a `RandomForest`'s `feature_importances_` attribute for simplicity here, to allow you to implement this method in case your `feature_importance_dropcolumn` function does not work correctly."
1483 |    ]
1484 |   },
1485 |   {
1486 |    "cell_type": "code",
1487 |    "execution_count": null,
1488 |    "metadata": {},
1489 |    "outputs": [],
1490 |    "source": [
1491 |     "# MODIFY THIS CELL\n",
1492 |     "\n",
1493 |     "from sklearn.base import BaseEstimator\n",
1494 |     "import numpy as np\n",
1495 |     "\n",
1496 |     "\n",
1497 |     "class ImportanceSelector(BaseEstimator):\n",
1498 |     "\n",
1499 |     "    def __init__(self, num_features, random_forest_estimator):\n",
1500 |     "        self.num_features = num_features\n",
1501 |     "        self.forest = random_forest_estimator\n",
1502 |     "\n",
1503 |     "    def transform(self, X, y=None):\n",
1504 |     "        \n",
1505 |     "        # Feature by increasing feature importance:\n",
1506 |     "        features_by_importance = # YOUR CODE\n",
1507 |     "        top_k_feature_indices = # YOUR CODE\n",
1508 |     "        \n",
1509 |     "        feature_subset = X[:, top_k_feature_indices]\n",
1510 |     "        if len(feature_subset.shape) == 1:\n",
1511 |     "            feature_subset = feature_subset[:, np.newaxis]\n",
1512 |     "        return feature_subset\n",
1513 |     "\n",
1514 |     "    def fit(self, X, y=None):\n",
1515 |     "        self.forest.fit(X, y)\n",
1516 |     "        return self"
1517 |    ]
1518 |   },
1519 |   {
1520 |    "cell_type": "markdown",
1521 |    "metadata": {},
1522 |    "source": [
1523 |     "Now, use the `ImportanceSelector` to select the 3 most important features in the dataset:"
1524 |    ]
1525 |   },
1526 |   {
1527 |    "cell_type": "code",
1528 |    "execution_count": null,
1529 |    "metadata": {},
1530 |    "outputs": [],
1531 |    "source": [
1532 |     "# MODIFY THIS CELL\n",
1533 |     "\n",
1534 |     "from sklearn.ensemble import RandomForestClassifier\n",
1535 |     "\n",
1536 |     "\n",
1537 |     "forest = RandomForestClassifier(n_estimators=100, random_state=123)\n",
1538 |     "\n",
1539 |     "selector = # YOUR CODE\n",
1540 |     "# YOUR CODE\n",
1541 |     "reduced_train_features = # YOUR CODE\n",
1542 |     "\n",
1543 |     "print('Original feature set size:', X_train.shape)\n",
1544 |     "print('Selected feature set size:', reduced_train_features.shape)\n",
1545 |     "print('First 5 rows:\\n', reduced_train_features[:5])"
1546 |    ]
1547 |   },
1548 |   {
1549 |    "cell_type": "markdown",
1550 |    "metadata": {},
1551 |    "source": [
1552 |     "<div class=\"paragraph\">\n",
1553 |     "  <p><br></p>\n",
1554 |     "  <p><br></p>\n",
1555 |     "  <p><br></p>\n",
1556 |     "  <p><br></p>\n",
1557 |     "  <p><br></p>\n",
1558 |     "  <p><br></p>\n",
1559 |     "</div>"
1560 |    ]
1561 |   },
1562 |   {
1563 |    "cell_type": "markdown",
1564 |    "metadata": {},
1565 |    "source": [
1566 |     "## (5 pts) Bonus Exercise: Evaluating a KNN Classifier on Different Feature Subsets"
1567 |    ]
1568 |   },
1569 |   {
1570 |    "cell_type": "markdown",
1571 |    "metadata": {},
1572 |    "source": [
1573 |     "In this *Bonus Exercise*, your task is to use a scikit-learn pipeline to fit a KNN classifier based on different 2-feature combinations and different values of *k* (number of neighbors) via grid search. More specifically,\n",
1574 |     "\n",
1575 |     "1. Create a scikit-learn pipeline that consists of a `StandardScaler`, a `ColumnSelector`, and a `KNeighborsClassifeir` (think about the right way to order these elements in the pipeline);\n",
1576 |     "2. Using this pipeline, find the best value for `k` in the KNN classifier as well as the best feature combination (restricted to 2-feature subsets for simplicity) using `GridSearchCV`;\n",
1577 |     "3. Fit the best model determined via grid search on the whole training set and evaluate the performance on the test set."
1578 |    ]
1579 |   },
1580 |   {
1581 |    "cell_type": "code",
1582 |    "execution_count": null,
1583 |    "metadata": {},
1584 |    "outputs": [],
1585 |    "source": [
1586 |     "# EXECUTE BUT DO NOT EDIT\n",
1587 |     "\n",
1588 |     "\n",
1589 |     "import pandas as pd\n",
1590 |     "\n",
1591 |     "\n",
1592 |     "df_wine = pd.read_csv('data/wine.data',\n",
1593 |     "                      header=None)\n",
1594 |     "\n",
1595 |     "df_wine.columns = ['Class label', 'Alcohol', 'Malic acid', 'Ash',\n",
1596 |     "                   'Alcalinity of ash', 'Magnesium', 'Total phenols',\n",
1597 |     "                   'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins',\n",
1598 |     "                   'Color intensity', 'Hue',\n",
1599 |     "                   'OD280/OD315 of diluted wines', 'Proline']\n",
1600 |     "\n",
1601 |     "df_wine.head()"
1602 |    ]
1603 |   },
1604 |   {
1605 |    "cell_type": "code",
1606 |    "execution_count": null,
1607 |    "metadata": {},
1608 |    "outputs": [],
1609 |    "source": [
1610 |     "# EXECUTE BUT DO NOT EDIT\n",
1611 |     "\n",
1612 |     "from sklearn.model_selection import train_test_split\n",
1613 |     "\n",
1614 |     "\n",
1615 |     "X, y = df_wine.iloc[:, 1:].values, df_wine.iloc[:, 0].values\n",
1616 |     "\n",
1617 |     "X_train, X_test, y_train, y_test = \\\n",
1618 |     "    train_test_split(X, y, test_size=0.3, \n",
1619 |     "                     stratify=y,\n",
1620 |     "                     random_state=0)"
1621 |    ]
1622 |   },
1623 |   {
1624 |    "cell_type": "code",
1625 |    "execution_count": null,
1626 |    "metadata": {},
1627 |    "outputs": [],
1628 |    "source": [
1629 |     "# EXECUTE BUT DO NOT EDIT THIS CELL\n",
1630 |     "\n",
1631 |     "from sklearn.base import BaseEstimator\n",
1632 |     "import numpy as np\n",
1633 |     "\n",
1634 |     "\n",
1635 |     "class ColumnSelector(BaseEstimator):\n",
1636 |     "\n",
1637 |     "    def __init__(self, cols=None):\n",
1638 |     "        self.cols = cols\n",
1639 |     "\n",
1640 |     "    def fit_transform(self, X, y=None):\n",
1641 |     "        return self.transform(X=X, y=y)\n",
1642 |     "\n",
1643 |     "    def transform(self, X, y=None):\n",
1644 |     "        feature_subset = X[:, self.cols]\n",
1645 |     "        if len(feature_subset.shape) == 1:\n",
1646 |     "            feature_subset = feature_subset[:, np.newaxis]\n",
1647 |     "        return feature_subset\n",
1648 |     "\n",
1649 |     "    def fit(self, X, y=None):\n",
1650 |     "        return self"
1651 |    ]
1652 |   },
1653 |   {
1654 |    "cell_type": "markdown",
1655 |    "metadata": {},
1656 |    "source": [
1657 |     "Modify the following code cell to create a list of all possible 2-feature combinations:"
1658 |    ]
1659 |   },
1660 |   {
1661 |    "cell_type": "code",
1662 |    "execution_count": null,
1663 |    "metadata": {},
1664 |    "outputs": [],
1665 |    "source": [
1666 |     "# MODIFY THIS CELL\n",
1667 |     "\n",
1668 |     "import itertools\n",
1669 |     "\n",
1670 |     "\n",
1671 |     "all_combin_2 = list(itertools.combinations( # YOUR CODE)\n",
1672 |     "\n",
1673 |     "\n",
1674 |     "print('Number of all possible 2-feature combinations:', len(all_combin_2))"
1675 |    ]
1676 |   },
1677 |   {
1678 |    "cell_type": "markdown",
1679 |    "metadata": {},
1680 |    "source": [
1681 |     "Modify the following code cell to create a `pipeline` (as explained at the beginning of this section), and use the given `param_grid` to fit the `GridSearchCV` to obtain the best parameters settings and a classifier fit to `X_train` and `y_train` based on these best hyperparameter values.\n",
1682 |     "\n",
1683 |     "(Note that the code may take 10-30 seconds to execute.)"
1684 |    ]
1685 |   },
1686 |   {
1687 |    "cell_type": "code",
1688 |    "execution_count": null,
1689 |    "metadata": {},
1690 |    "outputs": [],
1691 |    "source": [
1692 |     "# MODIFY THIS CELL\n",
1693 |     "\n",
1694 |     "from sklearn.pipeline import make_pipeline\n",
1695 |     "from sklearn.preprocessing import StandardScaler\n",
1696 |     "from sklearn.neighbors import KNeighborsClassifier\n",
1697 |     "from sklearn.model_selection import GridSearchCV\n",
1698 |     "\n",
1699 |     "\n",
1700 |     "pipe = make_pipeline(\n",
1701 |     "# YOUR CODE\n",
1702 |     "# YOUR CODE\n",
1703 |     "# YOUR CODE\n",
1704 |     ")\n",
1705 |     "\n",
1706 |     "\n",
1707 |     "param_grid = {'kneighborsclassifier__n_neighbors': list(range(1, 8)),\n",
1708 |     "              'columnselector__cols': all_combin_2}\n",
1709 |     "\n",
1710 |     "gsearch = GridSearchCV(pipe,\n",
1711 |     "                       param_grid=param_grid,\n",
1712 |     "                       refit=True,\n",
1713 |     "                       iid=False,\n",
1714 |     "                       cv=5)\n",
1715 |     "\n",
1716 |     "gsearch.fit(X_train, y_train)"
1717 |    ]
1718 |   },
1719 |   {
1720 |    "cell_type": "code",
1721 |    "execution_count": null,
1722 |    "metadata": {},
1723 |    "outputs": [],
1724 |    "source": [
1725 |     "# EXECUTE BUT DO NOT EDIT\n",
1726 |     "\n",
1727 |     "\n",
1728 |     "print(gsearch.best_params_)"
1729 |    ]
1730 |   },
1731 |   {
1732 |    "cell_type": "markdown",
1733 |    "metadata": {},
1734 |    "source": [
1735 |     "Based on the best combination of a 2-feature subset and the number of `n_neigbors` your model should be fit the the training dataset now. Use the fitted model and compute its classification accuracy on the test set (`X_test`, `y_test`)."
1736 |    ]
1737 |   },
1738 |   {
1739 |    "cell_type": "code",
1740 |    "execution_count": null,
1741 |    "metadata": {},
1742 |    "outputs": [],
1743 |    "source": [
1744 |     "# MODIFY THIS CELL\n",
1745 |     "\n",
1746 |     "# YOUR CODE TO COMPUTE THE TEST ACCURACY"
1747 |    ]
1748 |   }
1749 |  ],
1750 |  "metadata": {
1751 |   "kernelspec": {
1752 |    "display_name": "Python 3",
1753 |    "language": "python",
1754 |    "name": "python3"
1755 |   },
1756 |   "language_info": {
1757 |    "codemirror_mode": {
1758 |     "name": "ipython",
1759 |     "version": 3
1760 |    },
1761 |    "file_extension": ".py",
1762 |    "mimetype": "text/x-python",
1763 |    "name": "python",
1764 |    "nbconvert_exporter": "python",
1765 |    "pygments_lexer": "ipython3",
1766 |    "version": "3.6.5"
1767 |   }
1768 |  },
1769 |  "nbformat": 4,
1770 |  "nbformat_minor": 2
1771 | }
1772 | 


--------------------------------------------------------------------------------