├── dl-course-info.md ├── hw_02 ├── some_digit.png ├── tree-viz-1.png └── hw02.ipynb ├── 02_knn ├── 02_knn_notes.pdf ├── 02_knn_slides.pdf └── iris.csv ├── hw_03 ├── images │ ├── conf-1.png │ ├── conf-2.png │ └── hint-1.png ├── helper.py ├── data │ └── wine.data └── hw3.ipynb ├── 05_sklearn ├── images │ ├── eda.pdf │ ├── decisionreg.pdf │ ├── estimator-api.pdf │ ├── estimator-api.png │ ├── holdout-tuning.pdf │ ├── holdout-tuning.png │ ├── iris-subsampling.pdf │ ├── iris-subsampling.png │ ├── sklearn-pipeline.pdf │ ├── sklearn-pipeline.png │ ├── transformer-api.pdf │ └── transformer-api.png └── 05_sklearn_slides.pdf ├── report-template ├── report.pdf ├── figures │ └── google-scholar.pdf ├── bibliography.bib ├── project-presentation-assessment.md ├── project-report-assessment.md ├── report.tex ├── statcourse.sty └── ieee.bst ├── 06_trees ├── 06_trees_notes.pdf └── 06_trees_slides.pdf ├── 03_python └── 03_python_notes.pdf ├── 09_eval-ci ├── 09_eval-ci_notes.pdf └── 09_eval-ci_slides.pdf ├── 10_eval-cv ├── 10_eval-cv_notes.pdf └── 10_eval-cv_slides.pdf ├── other ├── stat479-fs18-awards.jpg └── dl-course-info.md ├── 01_overview ├── 01_ml-overview_notes.pdf └── 01_ml-overview_slides.pdf ├── 04_scipython ├── 04_scipython_notes.pdf └── images │ └── numpy-intro │ ├── ufunc.png │ ├── array_1.png │ ├── array_2.png │ ├── matmul.png │ ├── matmatmul.png │ ├── random_1.png │ ├── random_2.png │ ├── transpose.png │ ├── broadcasting-1.png │ └── broadcasting-2.png ├── 07_ensembles ├── 07_ensembles_notes.pdf └── 07_ensembles_slides.pdf ├── 11_eval-algo ├── 11_eval-algo_notes.pdf ├── 11_eval-algo_slides.pdf └── 11_eval-algo_code.ipynb ├── 13_feat-sele ├── 13_feat-sele_slides.pdf └── code-figures │ ├── logreg.png │ └── multinomial-logreg.png ├── 08_eval-intro ├── 08_eval-intro_notes.pdf └── 08_eval-intro_slides.pdf ├── 12_eval-metrics └── 12_eval-metrics_slides.pdf ├── 14_feat-extract └── 14_feat-extract_slides.pdf ├── .gitignore ├── README.md └── hw_01 ├── test_data.txt └── train_data.txt /dl-course-info.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hw_02/some_digit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/hw_02/some_digit.png -------------------------------------------------------------------------------- /hw_02/tree-viz-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/hw_02/tree-viz-1.png -------------------------------------------------------------------------------- /02_knn/02_knn_notes.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/02_knn/02_knn_notes.pdf -------------------------------------------------------------------------------- /hw_03/images/conf-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/hw_03/images/conf-1.png -------------------------------------------------------------------------------- /hw_03/images/conf-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/hw_03/images/conf-2.png -------------------------------------------------------------------------------- /hw_03/images/hint-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/hw_03/images/hint-1.png -------------------------------------------------------------------------------- /02_knn/02_knn_slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/02_knn/02_knn_slides.pdf -------------------------------------------------------------------------------- /05_sklearn/images/eda.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/05_sklearn/images/eda.pdf -------------------------------------------------------------------------------- /report-template/report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/report-template/report.pdf -------------------------------------------------------------------------------- /06_trees/06_trees_notes.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/06_trees/06_trees_notes.pdf -------------------------------------------------------------------------------- /06_trees/06_trees_slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/06_trees/06_trees_slides.pdf -------------------------------------------------------------------------------- /03_python/03_python_notes.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/03_python/03_python_notes.pdf -------------------------------------------------------------------------------- /09_eval-ci/09_eval-ci_notes.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/09_eval-ci/09_eval-ci_notes.pdf -------------------------------------------------------------------------------- /10_eval-cv/10_eval-cv_notes.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/10_eval-cv/10_eval-cv_notes.pdf -------------------------------------------------------------------------------- /other/stat479-fs18-awards.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/other/stat479-fs18-awards.jpg -------------------------------------------------------------------------------- /05_sklearn/05_sklearn_slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/05_sklearn/05_sklearn_slides.pdf -------------------------------------------------------------------------------- /05_sklearn/images/decisionreg.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/05_sklearn/images/decisionreg.pdf -------------------------------------------------------------------------------- /09_eval-ci/09_eval-ci_slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/09_eval-ci/09_eval-ci_slides.pdf -------------------------------------------------------------------------------- /10_eval-cv/10_eval-cv_slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/10_eval-cv/10_eval-cv_slides.pdf -------------------------------------------------------------------------------- /01_overview/01_ml-overview_notes.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/01_overview/01_ml-overview_notes.pdf -------------------------------------------------------------------------------- /04_scipython/04_scipython_notes.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/04_scipython/04_scipython_notes.pdf -------------------------------------------------------------------------------- /05_sklearn/images/estimator-api.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/05_sklearn/images/estimator-api.pdf -------------------------------------------------------------------------------- /05_sklearn/images/estimator-api.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/05_sklearn/images/estimator-api.png -------------------------------------------------------------------------------- /05_sklearn/images/holdout-tuning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/05_sklearn/images/holdout-tuning.pdf -------------------------------------------------------------------------------- /05_sklearn/images/holdout-tuning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/05_sklearn/images/holdout-tuning.png -------------------------------------------------------------------------------- /07_ensembles/07_ensembles_notes.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/07_ensembles/07_ensembles_notes.pdf -------------------------------------------------------------------------------- /07_ensembles/07_ensembles_slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/07_ensembles/07_ensembles_slides.pdf -------------------------------------------------------------------------------- /11_eval-algo/11_eval-algo_notes.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/11_eval-algo/11_eval-algo_notes.pdf -------------------------------------------------------------------------------- /11_eval-algo/11_eval-algo_slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/11_eval-algo/11_eval-algo_slides.pdf -------------------------------------------------------------------------------- /13_feat-sele/13_feat-sele_slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/13_feat-sele/13_feat-sele_slides.pdf -------------------------------------------------------------------------------- /13_feat-sele/code-figures/logreg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/13_feat-sele/code-figures/logreg.png -------------------------------------------------------------------------------- /01_overview/01_ml-overview_slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/01_overview/01_ml-overview_slides.pdf -------------------------------------------------------------------------------- /05_sklearn/images/iris-subsampling.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/05_sklearn/images/iris-subsampling.pdf -------------------------------------------------------------------------------- /05_sklearn/images/iris-subsampling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/05_sklearn/images/iris-subsampling.png -------------------------------------------------------------------------------- /05_sklearn/images/sklearn-pipeline.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/05_sklearn/images/sklearn-pipeline.pdf -------------------------------------------------------------------------------- /05_sklearn/images/sklearn-pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/05_sklearn/images/sklearn-pipeline.png -------------------------------------------------------------------------------- /05_sklearn/images/transformer-api.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/05_sklearn/images/transformer-api.pdf -------------------------------------------------------------------------------- /05_sklearn/images/transformer-api.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/05_sklearn/images/transformer-api.png -------------------------------------------------------------------------------- /08_eval-intro/08_eval-intro_notes.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/08_eval-intro/08_eval-intro_notes.pdf -------------------------------------------------------------------------------- /08_eval-intro/08_eval-intro_slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/08_eval-intro/08_eval-intro_slides.pdf -------------------------------------------------------------------------------- /04_scipython/images/numpy-intro/ufunc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/04_scipython/images/numpy-intro/ufunc.png -------------------------------------------------------------------------------- /04_scipython/images/numpy-intro/array_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/04_scipython/images/numpy-intro/array_1.png -------------------------------------------------------------------------------- /04_scipython/images/numpy-intro/array_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/04_scipython/images/numpy-intro/array_2.png -------------------------------------------------------------------------------- /04_scipython/images/numpy-intro/matmul.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/04_scipython/images/numpy-intro/matmul.png -------------------------------------------------------------------------------- /12_eval-metrics/12_eval-metrics_slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/12_eval-metrics/12_eval-metrics_slides.pdf -------------------------------------------------------------------------------- /14_feat-extract/14_feat-extract_slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/14_feat-extract/14_feat-extract_slides.pdf -------------------------------------------------------------------------------- /report-template/figures/google-scholar.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/report-template/figures/google-scholar.pdf -------------------------------------------------------------------------------- /04_scipython/images/numpy-intro/matmatmul.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/04_scipython/images/numpy-intro/matmatmul.png -------------------------------------------------------------------------------- /04_scipython/images/numpy-intro/random_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/04_scipython/images/numpy-intro/random_1.png -------------------------------------------------------------------------------- /04_scipython/images/numpy-intro/random_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/04_scipython/images/numpy-intro/random_2.png -------------------------------------------------------------------------------- /04_scipython/images/numpy-intro/transpose.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/04_scipython/images/numpy-intro/transpose.png -------------------------------------------------------------------------------- /13_feat-sele/code-figures/multinomial-logreg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/13_feat-sele/code-figures/multinomial-logreg.png -------------------------------------------------------------------------------- /04_scipython/images/numpy-intro/broadcasting-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/04_scipython/images/numpy-intro/broadcasting-1.png -------------------------------------------------------------------------------- /04_scipython/images/numpy-intro/broadcasting-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/stat479-machine-learning-fs18/HEAD/04_scipython/images/numpy-intro/broadcasting-2.png -------------------------------------------------------------------------------- /report-template/bibliography.bib: -------------------------------------------------------------------------------- 1 | @article{mirjalili2018gender, 2 | title={Gender Privacy: An Ensemble of Semi Adversarial Networks for Confounding Arbitrary Gender Classifiers}, 3 | author={Mirjalili, Vahid and Raschka, Sebastian and Ross, Arun}, 4 | journal={arXiv preprint arXiv:1807.11936}, 5 | year={2018} 6 | } -------------------------------------------------------------------------------- /report-template/project-presentation-assessment.md: -------------------------------------------------------------------------------- 1 | # Project Presentation Assessment 2 | 3 | - 10 pts: Is there a motivation for the project given? 4 | - 40 pts: Is the project described well enough that a general audience, familiar with machine learning, can understand the project? 5 | - 20 pts: Figures are all legible and explained well 6 | - 20 pts: Are the results presented adequately discussed? 7 | - 10 pts: Did all team members contribute to the presentation? -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Datasets 2 | list_attr_celeba.txt 3 | *.zip 4 | *.npz 5 | *.npy 6 | *ubyte.gz 7 | *archive.ics.uci.edu* 8 | 9 | # Binary PyTorch models 10 | *.pt 11 | 12 | # Temporary OS files 13 | .DS_Store 14 | 15 | # TensorFlow Checkpoint files 16 | checkpoint 17 | code/*/*.data-?????-of-????? 18 | code/*/*.index 19 | code/*/*.meta 20 | code/model_zoo/tensorflow_ipynb/*.data-?????-of-????? 21 | code/model_zoo/tensorflow_ipynb/*.index 22 | code/model_zoo/tensorflow_ipynb/*.meta 23 | code/model_zoo/tensorflow_ipynb/cifar-10/* 24 | 25 | # Byte-compiled / optimized / DLL files 26 | __pycache__/ 27 | *.py[cod] 28 | *$py.class 29 | 30 | # C extensions 31 | *.so 32 | 33 | # Distribution / packaging 34 | .Python 35 | env/ 36 | build/ 37 | develop-eggs/ 38 | dist/ 39 | downloads/ 40 | eggs/ 41 | .eggs/ 42 | lib/ 43 | lib64/ 44 | parts/ 45 | sdist/ 46 | var/ 47 | *.egg-info/ 48 | .installed.cfg 49 | *.egg 50 | 51 | # PyInstaller 52 | # Usually these files are written by a python script from a template 53 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 54 | *.manifest 55 | *.spec 56 | 57 | # Installer logs 58 | pip-log.txt 59 | pip-delete-this-directory.txt 60 | 61 | # Unit test / coverage reports 62 | htmlcov/ 63 | .tox/ 64 | .coverage 65 | .coverage.* 66 | .cache 67 | nosetests.xml 68 | coverage.xml 69 | *,cover 70 | .hypothesis/ 71 | 72 | # Translations 73 | *.mo 74 | *.pot 75 | 76 | # Django stuff: 77 | *.log 78 | local_settings.py 79 | 80 | # Flask stuff: 81 | instance/ 82 | .webassets-cache 83 | 84 | # Scrapy stuff: 85 | .scrapy 86 | 87 | # Sphinx documentation 88 | docs/_build/ 89 | 90 | # PyBuilder 91 | target/ 92 | 93 | # IPython Notebook 94 | .ipynb_checkpoints 95 | 96 | # pyenv 97 | .python-version 98 | 99 | # celery beat schedule file 100 | celerybeat-schedule 101 | 102 | # dotenv 103 | .env 104 | 105 | # virtualenv 106 | venv/ 107 | ENV/ 108 | 109 | # Spyder project settings 110 | .spyderproject 111 | 112 | # Rope project settings 113 | .ropeproject 114 | 115 | # Datasets 116 | MNIST* 117 | -------------------------------------------------------------------------------- /report-template/project-report-assessment.md: -------------------------------------------------------------------------------- 1 | # Project Report Assessment 2 | 3 | 4 | ### Abstract: 15 pts 5 | 6 | - Is enough information provided get a clear idea about the subject matter? 7 | - Is the abstract conveying the findings? 8 | - Are the main points of the report described succinctly? 9 | 10 | ### Introduction: 15 pts 11 | 12 | - Does the introduction cover the required background information to understand the work? 13 | - Is the introduction well organized: it starts out general and becomes more specific towards the end? 14 | - Is there a motivation explaining why this project is relevant, important, and/or interesting? 15 | 16 | ### Related Work: 15 pts 17 | 18 | - Is the similar and related work discussed adequately? 19 | - Are references cited properly (here, but also throughout the whole paper)? 20 | - Is the a discussion or paragraph on comparing this project with other people's work adequate? 21 | 22 | 23 | ### Proposed Method: 25 pts 24 | 25 | - Are there any missing descriptions of symbols used in mathematical notations (if applicable)? 26 | - Are the main algorithms described well enough so that they can be implemented by a knowledgeable reader? 27 | 28 | ### Experiments: 25 pts 29 | 30 | - Is the experimental setup and methodology described well enough so that it can be repeated? 31 | - If datasets are used, are they referenced appropriately? 32 | 33 | ### Results and Discussion: 30 pts 34 | 35 | - Are the results described clearly? 36 | - Is the data analyzed well, and are the results logical? 37 | - Are the figures clear and have no missing labels? 38 | - Do the figure captions have sufficient information to understand the figure? 39 | - Is each figure referenced in the text? 40 | - Is the discussion critical/honest, and are potential weaknesses/shortcomings are discussed as well? 41 | 42 | ### Conclusions: 15 pts 43 | 44 | - Do the authors describe whether the initial motivation/task was accomplished or not based on the results? 45 | - Is it discussed adequately how the results relate to previous work? 46 | - If applicable, are potential future directions given? 47 | 48 | ### Contributions: 10 pts 49 | 50 | - Are all contributions listed clearly? 51 | - Did each member contribute approximately equally to the project? 52 | 53 | -------------------------------------------------------------------------------- /hw_03/helper.py: -------------------------------------------------------------------------------- 1 | # Copyright Sebastian Raschka 2018 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | 6 | 7 | def plot_confusion_matrix(conf_mat, 8 | hide_spines=False, 9 | hide_ticks=False, 10 | figsize=None, 11 | cmap=None, 12 | colorbar=False, 13 | show_absolute=True, 14 | show_normed=False): 15 | 16 | if not (show_absolute or show_normed): 17 | raise AssertionError('Both show_absolute and show_normed are False') 18 | 19 | total_samples = conf_mat.sum(axis=1)[:, np.newaxis] 20 | normed_conf_mat = conf_mat.astype('float') / total_samples 21 | 22 | fig, ax = plt.subplots(figsize=figsize) 23 | ax.grid(False) 24 | if cmap is None: 25 | cmap = plt.cm.Blues 26 | 27 | if figsize is None: 28 | figsize = (len(conf_mat)*1.25, len(conf_mat)*1.25) 29 | 30 | if show_absolute: 31 | matshow = ax.matshow(conf_mat, cmap=cmap) 32 | else: 33 | matshow = ax.matshow(normed_conf_mat, cmap=cmap) 34 | 35 | if colorbar: 36 | fig.colorbar(matshow) 37 | 38 | for i in range(conf_mat.shape[0]): 39 | for j in range(conf_mat.shape[1]): 40 | cell_text = "" 41 | if show_absolute: 42 | cell_text += format(conf_mat[i, j], 'd') 43 | if show_normed: 44 | cell_text += "\n" + '(' 45 | cell_text += format(normed_conf_mat[i, j], '.2f') + ')' 46 | else: 47 | cell_text += format(normed_conf_mat[i, j], '.2f') 48 | ax.text(x=j, 49 | y=i, 50 | s=cell_text, 51 | va='center', 52 | ha='center', 53 | color="white" if normed_conf_mat[i, j] > 0.5 else "black") 54 | 55 | if hide_spines: 56 | ax.spines['right'].set_visible(False) 57 | ax.spines['top'].set_visible(False) 58 | ax.spines['left'].set_visible(False) 59 | ax.spines['bottom'].set_visible(False) 60 | ax.yaxis.set_ticks_position('left') 61 | ax.xaxis.set_ticks_position('bottom') 62 | if hide_ticks: 63 | ax.axes.get_yaxis().set_ticks([]) 64 | ax.axes.get_xaxis().set_ticks([]) 65 | 66 | plt.xlabel('predicted label') 67 | plt.ylabel('true label') 68 | return fig, ax -------------------------------------------------------------------------------- /other/dl-course-info.md: -------------------------------------------------------------------------------- 1 | # STAT 479 SS 2019: Deep Learning 2 | 3 | ## Abstract 4 | 5 | Deep learning is an exciting, young field that specializes in discovering and extracting intricate structures in large, unstructured datasets for parameterizing artificial neural networks with many layers. Since deep learning has pushed the state-of-the-art in many applications, it's become indispensable for modern technology. This is owed to the vast utility of deep learning for tackling complex tasks in the fields of computer vision and natural language processing -- tasks that humans are good at but are traditionally challenging for computers. This includes tasks such as image classification, object detection, and speech recognition. 6 | 7 | The focus of this course will be on understanding artificial neural networks and deep learning algorithmically (discussing the math behind these methods on a basic level) and implementing network models in code as well as applying these to real-world datasets. Some of the topics that will be covered include convolutional neural networks for image classification and object detection, recurrent neural networks for modeling text, and generative adversarial networks for generating new data. 8 | 9 | Familiarity with general machine learning concepts (such as the FS2018 STAT479: Machine Learning course) is recommended but not required. We will review some relevant background concepts, which include general machine learning concepts such as supervised learning, classification, model evaluation, etc. Furthermore, some lectures will focus on reviewing the use of Python's stack for scientific computing (NumPy, SciPy, matplotlib) prior to the introduction of PyTorch as the main computational deep learning library that we are going to use in this course. 10 | 11 | 12 | ## Tentative List of Topics 13 | 14 | - brief history of neural networks and what makes deep learning different from "classic machine learning" 15 | - introducing the concept of neural networks by connecting it to familiar concepts such as logistic regression and multinomial logistic regression (which can be seen as special cases: single-layer neural nets) 16 | - modeling and deriving non-convex loss function through computation graphs 17 | - introduction to automatic differentiation and PyTorch for efficient data manipulation using GPUs 18 | - convolutional neural networks for analyzing unstructured data (image analysis) 19 | - using 1D convolutions for sequence analysis 20 | - more advanced sequence analysis using recurrent neural networks 21 | - introducing generative models to sample from input distributions: autoencoders, variational autoencoders, and generative adversarial neural networks -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # STAT479: Machine Learning (Fall 2018) 2 | 3 | Instructor: Sebastian Raschka 4 | 5 | Lecture material for the Machine Learning course (STAT 479) at University Wisconsin-Madison. For details, please see the course website at http://pages.stat.wisc.edu/~sraschka/teaching/stat479-fs2018/ 6 | 7 | 8 | 9 | **Part I: Introduction** 10 | 11 | - [Lecture 1](01_overview): What is Machine Learning? An Overview. 12 | - [Lecture 2](02_knn): Intro to Supervised Learning: KNN 13 | 14 | **Part II: Computational Foundations** 15 | 16 | - [Lecture 3](03_python): Using Python, Anaconda, IPython, Jupyter Notebooks 17 | - [Lecture 4](04_scipython): Scientific Computing with NumPy, SciPy, and Matplotlib 18 | - [Lecture 5](05_sklearn): Data Preprocessing and Machine Learning with Scikit-Learn 19 | 20 | **Part III: Tree-Based Methods** 21 | 22 | - [Lecture 6](06_trees): Decision Trees 23 | - [Lecture 7](07_ensembles): Ensemble Methods 24 | 25 | **Part IV: Evaluation** 26 | 27 | - [Lecture 8](08_eval-intro): Model Evaluation 1: Introduction to Overfitting and Underfitting 28 | - [Lecture 9](09_eval-ci): Model Evaluation 2: Uncertainty Estimates and Resampling 29 | - [Lecture 10](10_eval-cv): Model Evaluation 3: Model Selection and Cross-Validation 30 | - [Lecture 11](11_eval-algo): Model Evaluation 4: Algorithm Selection and Statistical Tests 31 | - [Lecture 12](12_eval-metrics): Model Evaluation 5: Performance Metrics 32 | 33 | **Part V: Dimensionality Reduction** 34 | 35 | - [Lecture 13](13_feat-sele): Feature Selection 36 | - [Lecture 14](14_feat-extract): Feature Extraction 37 | 38 | **Due to time constraints, the following topics could unfortunately not be covered:** 39 | 40 | **Part VI: Bayesian Learning** 41 | 42 | - Bayes Classifiers 43 | - Text Data & Sentiment Analysis 44 | - Naive Bayes Classification 45 | 46 | **Part VII: Regression and Unsupervised Learning** 47 | 48 | - Regression Analysis 49 | - Clustering 50 | 51 | **The following topics will be covered at the beginning of the 52 | Deep Learning class next Spring.** [Tentative outline of the DL course](./other/dl-course-info.md). 53 | 54 | **Part VIII: Introduction to Artificial Neural Networks** 55 | 56 | - Perceptron 57 | - Adaline & Logistic Regression 58 | - SVM 59 | - Multilayer Perceptron 60 | 61 | 62 | Creative Commons License
This work is licensed under a Creative Commons Attribution-NonCommercial 4.0 International License. 63 | 64 | 65 |
66 |
67 |
68 | 69 | Teaching this class was a pleasure, and I am especially happy about how awesome the class projects turned out. Listed below are the winners of the three award categories as determined by ~210 votes. Congratulations! 70 | 71 | ![](other/stat479-fs18-awards.jpg) -------------------------------------------------------------------------------- /hw_01/test_data.txt: -------------------------------------------------------------------------------- 1 | x1 x2 y 2 | -5.75 -6.83 0 3 | 5.51 3.67 1 4 | 5.11 5.32 1 5 | 0.85 -4.11 0 6 | -0.50 -0.45 1 7 | -12.65 -12.05 0 8 | -4.22 -6.39 0 9 | -0.56 -10.23 0 10 | 2.82 1.68 1 11 | 3.44 -7.70 0 12 | 9.56 -7.29 1 13 | 11.22 5.10 1 14 | -2.90 -8.44 0 15 | 3.65 -10.13 0 16 | -5.95 -6.79 0 17 | 10.30 6.20 1 18 | 11.59 5.99 1 19 | -8.87 -2.64 0 20 | -2.63 -6.28 0 21 | 14.82 5.55 1 22 | 4.70 2.81 1 23 | -5.90 2.11 0 24 | -3.98 -8.53 0 25 | 10.52 -0.67 1 26 | -6.96 -3.70 0 27 | -4.06 -1.97 1 28 | 7.40 -0.49 1 29 | -2.08 -3.87 0 30 | -4.07 -2.24 0 31 | 7.31 0.19 1 32 | 2.26 3.73 1 33 | -6.76 -9.25 0 34 | 2.80 0.13 0 35 | -6.79 -5.64 0 36 | 5.54 9.07 1 37 | 0.36 3.12 1 38 | -0.09 -5.57 0 39 | -2.43 -8.09 0 40 | -0.77 7.97 1 41 | -2.36 -3.81 0 42 | -2.96 -1.82 0 43 | -7.74 -4.67 0 44 | -4.85 -12.71 0 45 | 1.07 -4.86 0 46 | -4.71 -2.16 0 47 | -5.00 -6.76 0 48 | -11.60 4.64 0 49 | 4.39 0.39 1 50 | 0.14 0.06 1 51 | 7.64 5.08 1 52 | 8.37 3.39 1 53 | 1.59 9.37 1 54 | 7.96 7.02 1 55 | 3.73 -4.61 0 56 | -8.17 -9.61 0 57 | -1.95 -4.46 0 58 | 0.93 -1.05 1 59 | -14.65 -1.69 0 60 | -7.93 -7.95 0 61 | 7.68 9.08 1 62 | 9.50 -2.88 1 63 | 5.17 7.50 1 64 | -4.86 -6.51 0 65 | 1.94 1.10 1 66 | -0.32 -12.92 0 67 | 7.44 -0.90 1 68 | 10.65 3.87 1 69 | -10.45 -2.66 0 70 | 7.48 -2.95 1 71 | 0.28 -0.52 0 72 | 3.18 -13.24 0 73 | 8.39 0.84 1 74 | 8.86 4.78 1 75 | 0.49 10.36 1 76 | 2.36 -12.78 0 77 | -1.97 -7.52 0 78 | 1.87 -8.03 0 79 | 3.50 5.48 1 80 | -5.58 -2.99 0 81 | 6.99 -8.59 1 82 | -6.34 -3.89 0 83 | 11.34 2.99 1 84 | -0.56 -10.16 0 85 | 8.08 6.18 1 86 | 8.94 2.05 1 87 | -11.12 -2.71 0 88 | 10.76 2.59 1 89 | 0.03 1.11 1 90 | 0.84 2.83 1 91 | 8.36 8.34 1 92 | -4.38 -4.40 0 93 | -6.94 -8.48 0 94 | -11.82 1.06 0 95 | -7.66 -5.78 0 96 | 3.29 -0.30 1 97 | 6.47 7.38 1 98 | 2.08 -6.21 0 99 | 5.97 4.18 1 100 | -1.57 -6.36 0 101 | -1.53 -3.74 0 102 | -2.84 -0.15 0 103 | 12.69 -4.20 1 104 | -7.43 -4.21 0 105 | 3.81 -8.34 0 106 | 4.76 0.32 1 107 | 11.87 6.52 1 108 | -2.01 3.78 0 109 | 1.95 0.55 1 110 | 3.51 -6.28 1 111 | -3.27 -2.19 0 112 | -5.74 1.53 0 113 | 6.98 2.86 1 114 | -7.02 -7.18 0 115 | 2.49 8.94 1 116 | -3.52 1.14 0 117 | 9.68 0.98 1 118 | -13.70 -7.31 0 119 | 11.38 4.25 1 120 | -5.46 -4.15 0 121 | -0.68 -8.03 0 122 | 0.10 -3.51 0 123 | 10.43 6.93 1 124 | 2.74 -4.24 0 125 | -2.99 -6.52 0 126 | -4.69 1.39 0 127 | 6.87 9.68 1 128 | 6.20 4.20 1 129 | 6.75 -1.85 1 130 | 6.32 9.44 1 131 | -6.92 -8.03 0 132 | 12.44 2.15 1 133 | -7.26 -1.17 0 134 | -11.95 1.21 0 135 | -3.93 -5.76 0 136 | 0.84 8.70 1 137 | 0.45 -0.26 1 138 | -0.82 -8.39 0 139 | -7.75 -12.57 0 140 | 7.03 -2.10 1 141 | -4.95 -13.39 0 142 | 5.64 1.28 1 143 | 5.47 6.38 1 144 | 3.04 -4.91 1 145 | -3.33 -3.80 0 146 | -5.89 0.18 0 147 | 8.61 10.52 1 148 | -1.91 -2.04 1 149 | 3.86 5.78 1 150 | -3.50 -5.25 0 151 | 0.78 2.49 1 152 | 8.84 3.60 1 153 | -3.50 0.86 0 154 | -7.13 -8.24 0 155 | 2.82 -8.17 0 156 | 6.67 3.99 1 157 | 10.19 3.48 1 158 | 9.79 -2.40 1 159 | 2.12 -3.79 0 160 | 11.98 5.16 1 161 | 10.65 7.99 1 162 | 9.95 0.36 1 163 | 6.19 0.89 1 164 | -3.94 -10.17 0 165 | -4.30 -9.05 0 166 | 12.59 -3.56 1 167 | 5.04 2.32 1 168 | -9.20 -14.65 0 169 | -8.35 -0.15 0 170 | -5.98 -4.62 0 171 | 4.39 1.88 1 172 | 1.01 8.72 1 173 | 0.25 5.29 1 174 | 7.30 -1.07 1 175 | -2.65 -5.44 0 176 | 12.10 -6.39 1 177 | 8.95 -1.73 1 178 | 8.79 3.18 1 179 | 3.42 12.11 1 180 | 8.71 6.47 1 181 | -15.19 -2.76 0 182 | -3.15 -9.35 0 183 | -3.26 -7.77 0 184 | 12.06 -1.95 1 185 | -1.07 -2.64 0 186 | 0.80 5.37 1 187 | 4.76 -7.93 0 188 | -2.68 -16.15 0 189 | -2.63 -8.02 0 190 | 13.31 -3.46 1 191 | 8.58 -4.67 1 192 | 4.69 2.50 1 193 | 3.25 5.99 1 194 | 1.29 6.16 1 195 | -3.17 -5.06 0 196 | -2.64 -3.66 0 197 | -3.89 -12.56 0 198 | 3.14 5.05 1 199 | 8.05 7.63 1 200 | -4.87 -6.22 0 201 | -12.42 -6.33 0 202 | -------------------------------------------------------------------------------- /02_knn/iris.csv: -------------------------------------------------------------------------------- 1 | Id,SepalLength[cm],SepalWidth[cm],PetalLength[cm],PetalWidth[cm],Species 2 | 1,5.1,3.5,1.4,0.2,Iris-setosa 3 | 2,4.9,3.0,1.4,0.2,Iris-setosa 4 | 3,4.7,3.2,1.3,0.2,Iris-setosa 5 | 4,4.6,3.1,1.5,0.2,Iris-setosa 6 | 5,5.0,3.6,1.4,0.2,Iris-setosa 7 | 6,5.4,3.9,1.7,0.4,Iris-setosa 8 | 7,4.6,3.4,1.4,0.3,Iris-setosa 9 | 8,5.0,3.4,1.5,0.2,Iris-setosa 10 | 9,4.4,2.9,1.4,0.2,Iris-setosa 11 | 10,4.9,3.1,1.5,0.1,Iris-setosa 12 | 11,5.4,3.7,1.5,0.2,Iris-setosa 13 | 12,4.8,3.4,1.6,0.2,Iris-setosa 14 | 13,4.8,3.0,1.4,0.1,Iris-setosa 15 | 14,4.3,3.0,1.1,0.1,Iris-setosa 16 | 15,5.8,4.0,1.2,0.2,Iris-setosa 17 | 16,5.7,4.4,1.5,0.4,Iris-setosa 18 | 17,5.4,3.9,1.3,0.4,Iris-setosa 19 | 18,5.1,3.5,1.4,0.3,Iris-setosa 20 | 19,5.7,3.8,1.7,0.3,Iris-setosa 21 | 20,5.1,3.8,1.5,0.3,Iris-setosa 22 | 21,5.4,3.4,1.7,0.2,Iris-setosa 23 | 22,5.1,3.7,1.5,0.4,Iris-setosa 24 | 23,4.6,3.6,1.0,0.2,Iris-setosa 25 | 24,5.1,3.3,1.7,0.5,Iris-setosa 26 | 25,4.8,3.4,1.9,0.2,Iris-setosa 27 | 26,5.0,3.0,1.6,0.2,Iris-setosa 28 | 27,5.0,3.4,1.6,0.4,Iris-setosa 29 | 28,5.2,3.5,1.5,0.2,Iris-setosa 30 | 29,5.2,3.4,1.4,0.2,Iris-setosa 31 | 30,4.7,3.2,1.6,0.2,Iris-setosa 32 | 31,4.8,3.1,1.6,0.2,Iris-setosa 33 | 32,5.4,3.4,1.5,0.4,Iris-setosa 34 | 33,5.2,4.1,1.5,0.1,Iris-setosa 35 | 34,5.5,4.2,1.4,0.2,Iris-setosa 36 | 35,4.9,3.1,1.5,0.1,Iris-setosa 37 | 36,5.0,3.2,1.2,0.2,Iris-setosa 38 | 37,5.5,3.5,1.3,0.2,Iris-setosa 39 | 38,4.9,3.1,1.5,0.1,Iris-setosa 40 | 39,4.4,3.0,1.3,0.2,Iris-setosa 41 | 40,5.1,3.4,1.5,0.2,Iris-setosa 42 | 41,5.0,3.5,1.3,0.3,Iris-setosa 43 | 42,4.5,2.3,1.3,0.3,Iris-setosa 44 | 43,4.4,3.2,1.3,0.2,Iris-setosa 45 | 44,5.0,3.5,1.6,0.6,Iris-setosa 46 | 45,5.1,3.8,1.9,0.4,Iris-setosa 47 | 46,4.8,3.0,1.4,0.3,Iris-setosa 48 | 47,5.1,3.8,1.6,0.2,Iris-setosa 49 | 48,4.6,3.2,1.4,0.2,Iris-setosa 50 | 49,5.3,3.7,1.5,0.2,Iris-setosa 51 | 50,5.0,3.3,1.4,0.2,Iris-setosa 52 | 51,7.0,3.2,4.7,1.4,Iris-versicolor 53 | 52,6.4,3.2,4.5,1.5,Iris-versicolor 54 | 53,6.9,3.1,4.9,1.5,Iris-versicolor 55 | 54,5.5,2.3,4.0,1.3,Iris-versicolor 56 | 55,6.5,2.8,4.6,1.5,Iris-versicolor 57 | 56,5.7,2.8,4.5,1.3,Iris-versicolor 58 | 57,6.3,3.3,4.7,1.6,Iris-versicolor 59 | 58,4.9,2.4,3.3,1.0,Iris-versicolor 60 | 59,6.6,2.9,4.6,1.3,Iris-versicolor 61 | 60,5.2,2.7,3.9,1.4,Iris-versicolor 62 | 61,5.0,2.0,3.5,1.0,Iris-versicolor 63 | 62,5.9,3.0,4.2,1.5,Iris-versicolor 64 | 63,6.0,2.2,4.0,1.0,Iris-versicolor 65 | 64,6.1,2.9,4.7,1.4,Iris-versicolor 66 | 65,5.6,2.9,3.6,1.3,Iris-versicolor 67 | 66,6.7,3.1,4.4,1.4,Iris-versicolor 68 | 67,5.6,3.0,4.5,1.5,Iris-versicolor 69 | 68,5.8,2.7,4.1,1.0,Iris-versicolor 70 | 69,6.2,2.2,4.5,1.5,Iris-versicolor 71 | 70,5.6,2.5,3.9,1.1,Iris-versicolor 72 | 71,5.9,3.2,4.8,1.8,Iris-versicolor 73 | 72,6.1,2.8,4.0,1.3,Iris-versicolor 74 | 73,6.3,2.5,4.9,1.5,Iris-versicolor 75 | 74,6.1,2.8,4.7,1.2,Iris-versicolor 76 | 75,6.4,2.9,4.3,1.3,Iris-versicolor 77 | 76,6.6,3.0,4.4,1.4,Iris-versicolor 78 | 77,6.8,2.8,4.8,1.4,Iris-versicolor 79 | 78,6.7,3.0,5.0,1.7,Iris-versicolor 80 | 79,6.0,2.9,4.5,1.5,Iris-versicolor 81 | 80,5.7,2.6,3.5,1.0,Iris-versicolor 82 | 81,5.5,2.4,3.8,1.1,Iris-versicolor 83 | 82,5.5,2.4,3.7,1.0,Iris-versicolor 84 | 83,5.8,2.7,3.9,1.2,Iris-versicolor 85 | 84,6.0,2.7,5.1,1.6,Iris-versicolor 86 | 85,5.4,3.0,4.5,1.5,Iris-versicolor 87 | 86,6.0,3.4,4.5,1.6,Iris-versicolor 88 | 87,6.7,3.1,4.7,1.5,Iris-versicolor 89 | 88,6.3,2.3,4.4,1.3,Iris-versicolor 90 | 89,5.6,3.0,4.1,1.3,Iris-versicolor 91 | 90,5.5,2.5,4.0,1.3,Iris-versicolor 92 | 91,5.5,2.6,4.4,1.2,Iris-versicolor 93 | 92,6.1,3.0,4.6,1.4,Iris-versicolor 94 | 93,5.8,2.6,4.0,1.2,Iris-versicolor 95 | 94,5.0,2.3,3.3,1.0,Iris-versicolor 96 | 95,5.6,2.7,4.2,1.3,Iris-versicolor 97 | 96,5.7,3.0,4.2,1.2,Iris-versicolor 98 | 97,5.7,2.9,4.2,1.3,Iris-versicolor 99 | 98,6.2,2.9,4.3,1.3,Iris-versicolor 100 | 99,5.1,2.5,3.0,1.1,Iris-versicolor 101 | 100,5.7,2.8,4.1,1.3,Iris-versicolor 102 | 101,6.3,3.3,6.0,2.5,Iris-virginica 103 | 102,5.8,2.7,5.1,1.9,Iris-virginica 104 | 103,7.1,3.0,5.9,2.1,Iris-virginica 105 | 104,6.3,2.9,5.6,1.8,Iris-virginica 106 | 105,6.5,3.0,5.8,2.2,Iris-virginica 107 | 106,7.6,3.0,6.6,2.1,Iris-virginica 108 | 107,4.9,2.5,4.5,1.7,Iris-virginica 109 | 108,7.3,2.9,6.3,1.8,Iris-virginica 110 | 109,6.7,2.5,5.8,1.8,Iris-virginica 111 | 110,7.2,3.6,6.1,2.5,Iris-virginica 112 | 111,6.5,3.2,5.1,2.0,Iris-virginica 113 | 112,6.4,2.7,5.3,1.9,Iris-virginica 114 | 113,6.8,3.0,5.5,2.1,Iris-virginica 115 | 114,5.7,2.5,5.0,2.0,Iris-virginica 116 | 115,5.8,2.8,5.1,2.4,Iris-virginica 117 | 116,6.4,3.2,5.3,2.3,Iris-virginica 118 | 117,6.5,3.0,5.5,1.8,Iris-virginica 119 | 118,7.7,3.8,6.7,2.2,Iris-virginica 120 | 119,7.7,2.6,6.9,2.3,Iris-virginica 121 | 120,6.0,2.2,5.0,1.5,Iris-virginica 122 | 121,6.9,3.2,5.7,2.3,Iris-virginica 123 | 122,5.6,2.8,4.9,2.0,Iris-virginica 124 | 123,7.7,2.8,6.7,2.0,Iris-virginica 125 | 124,6.3,2.7,4.9,1.8,Iris-virginica 126 | 125,6.7,3.3,5.7,2.1,Iris-virginica 127 | 126,7.2,3.2,6.0,1.8,Iris-virginica 128 | 127,6.2,2.8,4.8,1.8,Iris-virginica 129 | 128,6.1,3.0,4.9,1.8,Iris-virginica 130 | 129,6.4,2.8,5.6,2.1,Iris-virginica 131 | 130,7.2,3.0,5.8,1.6,Iris-virginica 132 | 131,7.4,2.8,6.1,1.9,Iris-virginica 133 | 132,7.9,3.8,6.4,2.0,Iris-virginica 134 | 133,6.4,2.8,5.6,2.2,Iris-virginica 135 | 134,6.3,2.8,5.1,1.5,Iris-virginica 136 | 135,6.1,2.6,5.6,1.4,Iris-virginica 137 | 136,7.7,3.0,6.1,2.3,Iris-virginica 138 | 137,6.3,3.4,5.6,2.4,Iris-virginica 139 | 138,6.4,3.1,5.5,1.8,Iris-virginica 140 | 139,6.0,3.0,4.8,1.8,Iris-virginica 141 | 140,6.9,3.1,5.4,2.1,Iris-virginica 142 | 141,6.7,3.1,5.6,2.4,Iris-virginica 143 | 142,6.9,3.1,5.1,2.3,Iris-virginica 144 | 143,5.8,2.7,5.1,1.9,Iris-virginica 145 | 144,6.8,3.2,5.9,2.3,Iris-virginica 146 | 145,6.7,3.3,5.7,2.5,Iris-virginica 147 | 146,6.7,3.0,5.2,2.3,Iris-virginica 148 | 147,6.3,2.5,5.0,1.9,Iris-virginica 149 | 148,6.5,3.0,5.2,2.0,Iris-virginica 150 | 149,6.2,3.4,5.4,2.3,Iris-virginica 151 | 150,5.9,3.0,5.1,1.8,Iris-virginica -------------------------------------------------------------------------------- /report-template/report.tex: -------------------------------------------------------------------------------- 1 | \documentclass[10pt,twocolumn,letterpaper]{article} 2 | 3 | \usepackage{statcourse} 4 | \usepackage{times} 5 | \usepackage{epsfig} 6 | \usepackage{graphicx} 7 | \usepackage{amsmath} 8 | \usepackage{amssymb} 9 | 10 | % Include other packages here, before hyperref. 11 | 12 | % If you comment hyperref and then uncomment it, you should delete 13 | % egpaper.aux before re-running latex. (Or just hit 'q' on the first latex 14 | % run, let it finish, and you should be clear). 15 | \usepackage[breaklinks=true,bookmarks=false]{hyperref} 16 | 17 | 18 | \statcoursefinalcopy 19 | 20 | 21 | \setcounter{page}{1} 22 | \begin{document} 23 | 24 | 25 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 26 | % DO NOT EDIT ANYTHING ABOVE THIS LINE 27 | % EXCEPT IF YOU LIKE TO USE ADDITIONAL PACKAGES 28 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 29 | 30 | 31 | 32 | %%%%%%%%% TITLE 33 | \title{\LaTeX\ Template for STAT479 Project Report} 34 | 35 | \author{First Author\\ 36 | {\tt\small firstauthor@wisc.edu} 37 | \and 38 | Second Author\\ 39 | {\tt\small secondauthor@wisc.edu} 40 | \and 41 | Third Author\\ 42 | {\tt\small thirdauthor@wisc.edu} 43 | } 44 | 45 | \maketitle 46 | %\thispagestyle{empty} 47 | 48 | 49 | 50 | % MAIN ARTICLE GOES BELOW 51 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 52 | 53 | 54 | %%%%%%%%% ABSTRACT 55 | \begin{abstract} 56 | The abstract for your project goes here. The length of the abstract 57 | should be between 200-250 words. Tips for writing a good abstract 58 | can be found at \url{https://writing.wisc.edu/Handbook/presentations_abstracts.html}. 59 | \end{abstract} 60 | 61 | %%%%%%%%% BODY TEXT 62 | \section{Introduction} 63 | 64 | This template is based on the CVPR conference template\footnote{\url{http://statcourse2018.thecvf.com/submission/main_conference/author_guidelines}}. 65 | 66 | The information in this template is very minimal, and this file should serve you as a framework for writing your report. You may prefer to use a more collaboration-friendly tool while drafting the report with your class mates before you prepare the final report for submission. Remember that you should \textbf{submit both the report and code} you used for this project via Canvas. Also, \textbf{only one member per team} needs to submit the project material. 67 | 68 | 69 | This is an example of a mathematical equation: 70 | 71 | $$f(\mathbf{x}; \mathbf{w}) = \sum_{i=1}^{n} w_ix_i.$$ 72 | 73 | This is a mathematical expression, $h(\mathbf{x}) = \hat{y}$ formatted in text. 74 | 75 | The project report should be 6-8 pages long (not counting references) 76 | and should contain the sections that are already provided in this paper. Please 77 | check out the text in these sections for further information. 78 | 79 | 80 | \subsection{Subsection} 81 | 82 | You can use paragraphs or subsections to further structure your 83 | main sections. This is an example of a subsection. 84 | 85 | \paragraph{This is a paragraph title.} This is an example of a paragraph. 86 | 87 | \section{Related Work} 88 | 89 | Related work should be discussed here. This is an example of a citation \cite{mirjalili2018gender}. To format the citations properly, put the 90 | corresponding references into the bibliography.bib file. You can obtain 91 | BibTeX-formatted references for the "bib" file from Google Scholar 92 | (\url{https://scholar.google.com}), for example, by clicking on the 93 | double-quote character under a citation and then selecting \mbox{"BibTeX"} as 94 | shown in Figure \ref{fig:google-scholar-1col} and 95 | Figure \ref{fig:google-scholar-2col}. 96 | 97 | \begin{figure}[t] 98 | \begin{center} 99 | \includegraphics[width=0.8\linewidth]{figures/google-scholar.pdf} 100 | \end{center} 101 | \caption{Example illustrating how to get BibTeX references from 102 | Google Scholar as a 1-column figure.} 103 | \label{fig:google-scholar-1col} 104 | \end{figure} 105 | 106 | 107 | \begin{figure*} 108 | \begin{center} 109 | \includegraphics[width=0.8\linewidth]{figures/google-scholar.pdf} 110 | \end{center} 111 | \caption{Example illustrating how to get BibTeX references from 112 | Google Scholar as a 2-column figure.} 113 | \label{fig:google-scholar-2col} 114 | \end{figure*} 115 | 116 | Table \ref{tab:some-table} shows an example for formatting a table. 117 | 118 | \begin{table} 119 | \begin{center} 120 | \begin{tabular}{|l|c|} 121 | \hline 122 | Method & Accuracy \\ 123 | \hline\hline 124 | Method 1 & $70 \pm 3$ \% \\ 125 | Method 2 & $76 \pm 3$ \% \\ 126 | \hline 127 | \end{tabular} 128 | \end{center} 129 | \label{tab:some-table} 130 | \caption{This is an example of a table.} 131 | \end{table} 132 | 133 | 134 | \section{Proposed Method} 135 | 136 | Describe the method(s) you are proposing, developing, or using. I.e., details 137 | of the algorithms may be included here. 138 | 139 | \section{Experiments} 140 | 141 | Describe the experiments you performed. You may want to create separate 142 | subsections to further structure this section. 143 | 144 | \subsection{Dataset} 145 | 146 | Briefly describe your dataset in a separate subsection. 147 | 148 | 149 | \subsection{Software} 150 | 151 | Briefly list (and cite) software software you used. 152 | 153 | \subsection{Hardware} 154 | 155 | If relevant, list hardware resources you used. 156 | 157 | 158 | \section{Results and Discussion} 159 | 160 | Describe the results you obtained from the experiments and interpret them. 161 | Optionally, you could split "Results and Discussion" into two separate 162 | sections. 163 | 164 | \section{Conclusions} 165 | 166 | Describe your conclusions here. If there are any future directions, you can 167 | describe them here, or you can create a new section for future directions. 168 | 169 | \section{Acknowledgements} 170 | 171 | List acknowledgements if any. For example, if someone provided you a dataset, or 172 | you used someone else's resources, this is a good place to acknowledge 173 | the help or support you received. 174 | 175 | \section{Contributions} 176 | 177 | Describe the contributions of each team member who worked on this project. 178 | 179 | 180 | {\small 181 | \bibliographystyle{ieee} 182 | \bibliography{bibliography.bib} 183 | } 184 | 185 | \end{document} 186 | -------------------------------------------------------------------------------- /11_eval-algo/11_eval-algo_code.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "STAT 479: Machine Learning (Fall 2018) \n", 8 | "Instructor: Sebastian Raschka (sraschka@wisc.edu) \n", 9 | "Course website: http://pages.stat.wisc.edu/~sraschka/teaching/stat479-fs2018/" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "# L11: Model Evaluation 4 -- Algorithm Comparison" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": {}, 23 | "outputs": [ 24 | { 25 | "name": "stdout", 26 | "output_type": "stream", 27 | "text": [ 28 | "Sebastian Raschka 2018-11-07 \n", 29 | "\n", 30 | "CPython 3.6.7\n", 31 | "IPython 6.5.0\n", 32 | "\n", 33 | "sklearn 0.20.0\n", 34 | "mlxtend 0.14.0dev\n" 35 | ] 36 | } 37 | ], 38 | "source": [ 39 | "%load_ext watermark\n", 40 | "%watermark -a 'Sebastian Raschka' -d -p sklearn,mlxtend -v" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 2, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "import numpy as np\n", 50 | "from sklearn.model_selection import GridSearchCV\n", 51 | "from sklearn.model_selection import train_test_split\n", 52 | "from sklearn.model_selection import StratifiedKFold\n", 53 | "from sklearn.model_selection import cross_val_score\n", 54 | "from sklearn.pipeline import Pipeline\n", 55 | "from sklearn.preprocessing import StandardScaler\n", 56 | "from sklearn.linear_model import LogisticRegression\n", 57 | "from sklearn.neighbors import KNeighborsClassifier\n", 58 | "from sklearn.tree import DecisionTreeClassifier\n", 59 | "from sklearn.svm import SVC\n", 60 | "from mlxtend.data import mnist_data\n", 61 | "from sklearn.metrics import accuracy_score\n", 62 | "\n", 63 | "# Loading and splitting the dataset\n", 64 | "# Note that this is a small (stratified) subset\n", 65 | "# of MNIST; it consists of 5000 samples only, that is,\n", 66 | "# 10% of the original MNIST dataset\n", 67 | "# http://yann.lecun.com/exdb/mnist/\n", 68 | "X, y = mnist_data()\n", 69 | "X = X.astype(np.float32)\n", 70 | "X_train, X_test, y_train, y_test = train_test_split(X, y,\n", 71 | " test_size=0.2,\n", 72 | " random_state=1,\n", 73 | " stratify=y)\n", 74 | "\n", 75 | "# Initializing Classifiers\n", 76 | "clf1 = LogisticRegression(multi_class='multinomial',\n", 77 | " solver='newton-cg',\n", 78 | " random_state=1)\n", 79 | "clf2 = KNeighborsClassifier(algorithm='ball_tree',\n", 80 | " leaf_size=50)\n", 81 | "clf3 = DecisionTreeClassifier(random_state=1)\n", 82 | "clf4 = SVC(random_state=1)\n", 83 | "\n", 84 | "# Building the pipelines\n", 85 | "pipe1 = Pipeline([('std', StandardScaler()),\n", 86 | " ('clf1', clf1)])\n", 87 | "\n", 88 | "pipe2 = Pipeline([('std', StandardScaler()),\n", 89 | " ('clf2', clf2)])\n", 90 | "\n", 91 | "pipe4 = Pipeline([('std', StandardScaler()),\n", 92 | " ('clf4', clf4)])\n", 93 | "\n", 94 | "\n", 95 | "# Setting up the parameter grids\n", 96 | "param_grid1 = [{'clf1__penalty': ['l2'],\n", 97 | " 'clf1__C': np.power(10., np.arange(-4, 4))}]\n", 98 | "\n", 99 | "param_grid2 = [{'clf2__n_neighbors': list(range(1, 10)),\n", 100 | " 'clf2__p': [1, 2]}]\n", 101 | "\n", 102 | "param_grid3 = [{'max_depth': list(range(1, 10)) + [None],\n", 103 | " 'criterion': ['gini', 'entropy']}]\n", 104 | "\n", 105 | "param_grid4 = [{'clf4__kernel': ['rbf'],\n", 106 | " 'clf4__C': np.power(10., np.arange(-4, 4)),\n", 107 | " 'clf4__gamma': np.power(10., np.arange(-5, 0))},\n", 108 | " {'clf4__kernel': ['linear'],\n", 109 | " 'clf4__C': np.power(10., np.arange(-4, 4))}]\n", 110 | "\n", 111 | "# Setting up multiple GridSearchCV objects, 1 for each algorithm\n", 112 | "gridcvs = {}\n", 113 | "inner_cv = StratifiedKFold(n_splits=2, shuffle=True, random_state=1)\n", 114 | "\n", 115 | "for pgrid, est, name in zip((param_grid1, param_grid2,\n", 116 | " param_grid3, param_grid4),\n", 117 | " (pipe1, pipe2, clf3, pipe4),\n", 118 | " ('Softmax', 'KNN', 'DTree', 'SVM')):\n", 119 | " gcv = GridSearchCV(estimator=est,\n", 120 | " param_grid=pgrid,\n", 121 | " scoring='accuracy',\n", 122 | " n_jobs=1,\n", 123 | " cv=inner_cv,\n", 124 | " verbose=0,\n", 125 | " refit=True)\n", 126 | " gridcvs[name] = gcv" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 3, 132 | "metadata": {}, 133 | "outputs": [ 134 | { 135 | "name": "stdout", 136 | "output_type": "stream", 137 | "text": [ 138 | "DTree | outer ACC 77.25% +/- 2.05\n", 139 | "KNN | outer ACC 91.17% +/- 1.07\n", 140 | "SVM | outer ACC 91.93% +/- 1.38\n", 141 | "Softmax | outer ACC 90.25% +/- 1.31\n" 142 | ] 143 | } 144 | ], 145 | "source": [ 146 | "outer_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)\n", 147 | "\n", 148 | "for name, gs_est in sorted(gridcvs.items()):\n", 149 | " nested_score = cross_val_score(gs_est, \n", 150 | " X=X_train, \n", 151 | " y=y_train, \n", 152 | " cv=outer_cv,\n", 153 | " n_jobs=-1)\n", 154 | " print('%s | outer ACC %.2f%% +/- %.2f' % \n", 155 | " (name, nested_score.mean() * 100, nested_score.std() * 100))" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 4, 161 | "metadata": {}, 162 | "outputs": [ 163 | { 164 | "name": "stdout", 165 | "output_type": "stream", 166 | "text": [ 167 | "Accuracy 91.30% (average over CV test folds)\n", 168 | "Best Parameters: {'clf4__C': 100.0, 'clf4__gamma': 0.001, 'clf4__kernel': 'rbf'}\n", 169 | "Training Accuracy: 100.00%\n", 170 | "Test Accuracy: 93.00%\n" 171 | ] 172 | } 173 | ], 174 | "source": [ 175 | "# Fitting a model to the whole training set\n", 176 | "# using the \"best\" algorithm\n", 177 | "best_algo = gridcvs['SVM']\n", 178 | "\n", 179 | "best_algo.fit(X_train, y_train)\n", 180 | "train_acc = accuracy_score(y_true=y_train, y_pred=best_algo.predict(X_train))\n", 181 | "test_acc = accuracy_score(y_true=y_test, y_pred=best_algo.predict(X_test))\n", 182 | "\n", 183 | "print('Accuracy %.2f%% (average over CV test folds)' %\n", 184 | " (100 * best_algo.best_score_))\n", 185 | "print('Best Parameters: %s' % gridcvs['SVM'].best_params_)\n", 186 | "print('Training Accuracy: %.2f%%' % (100 * train_acc))\n", 187 | "print('Test Accuracy: %.2f%%' % (100 * test_acc))" 188 | ] 189 | } 190 | ], 191 | "metadata": { 192 | "anaconda-cloud": {}, 193 | "kernelspec": { 194 | "display_name": "Python 3", 195 | "language": "python", 196 | "name": "python3" 197 | }, 198 | "language_info": { 199 | "codemirror_mode": { 200 | "name": "ipython", 201 | "version": 3 202 | }, 203 | "file_extension": ".py", 204 | "mimetype": "text/x-python", 205 | "name": "python", 206 | "nbconvert_exporter": "python", 207 | "pygments_lexer": "ipython3", 208 | "version": "3.6.7" 209 | } 210 | }, 211 | "nbformat": 4, 212 | "nbformat_minor": 1 213 | } 214 | -------------------------------------------------------------------------------- /report-template/statcourse.sty: -------------------------------------------------------------------------------- 1 | % --------------------------------------------------------------- 2 | % 3 | % $Id: statcourse.sty,v 1.3 2005/10/24 19:56:15 awf Exp $ 4 | % 5 | % by Paolo.Ienne@di.epfl.ch 6 | % some mods by awf@acm.org 7 | % 8 | % --------------------------------------------------------------- 9 | % 10 | % no guarantee is given that the format corresponds perfectly to 11 | % IEEE 8.5" x 11" Proceedings, but most features should be ok. 12 | % 13 | % --------------------------------------------------------------- 14 | % with LaTeX2e: 15 | % ============= 16 | % 17 | % use as 18 | % \documentclass[times,10pt,twocolumn]{article} 19 | % \usepackage{latex8} 20 | % \usepackage{times} 21 | % 22 | % --------------------------------------------------------------- 23 | 24 | % with LaTeX 2.09: 25 | % ================ 26 | % 27 | % use as 28 | % \documentstyle[times,art10,twocolumn,latex8]{article} 29 | % 30 | % --------------------------------------------------------------- 31 | % with both versions: 32 | % =================== 33 | % 34 | % specify \statcoursefinalcopy to emit the final camera-ready copy 35 | % 36 | % specify references as 37 | % \bibliographystyle{ieee} 38 | % \bibliography{...your files...} 39 | % 40 | % --------------------------------------------------------------- 41 | 42 | \usepackage{eso-pic} 43 | \usepackage{xspace} 44 | 45 | \typeout{CVPR 8.5 x 11-Inch Proceedings Style `statcourse.sty'.} 46 | 47 | % ten point helvetica bold required for captions 48 | % eleven point times bold required for second-order headings 49 | % in some sites the name of the fonts may differ, 50 | % change the name here: 51 | \font\statcoursetenhv = phvb at 8pt % *** IF THIS FAILS, SEE statcourse.sty *** 52 | \font\elvbf = ptmb scaled 1100 53 | 54 | % If the above lines give an error message, try to comment them and 55 | % uncomment these: 56 | %\font\statcoursetenhv = phvb7t at 8pt 57 | %\font\elvbf = ptmb7t scaled 1100 58 | 59 | % set dimensions of columns, gap between columns, and paragraph indent 60 | \setlength{\textheight}{8.875in} 61 | \setlength{\textwidth}{6.875in} 62 | \setlength{\columnsep}{0.3125in} 63 | \setlength{\topmargin}{0in} 64 | \setlength{\headheight}{0in} 65 | \setlength{\headsep}{0in} 66 | \setlength{\parindent}{1pc} 67 | \setlength{\oddsidemargin}{-.304in} 68 | \setlength{\evensidemargin}{-.304in} 69 | 70 | \newif\ifstatcoursefinal 71 | \statcoursefinalfalse 72 | \def\statcoursefinalcopy{\global\statcoursefinaltrue} 73 | 74 | % memento from size10.clo 75 | % \normalsize{\@setfontsize\normalsize\@xpt\@xiipt} 76 | % \small{\@setfontsize\small\@ixpt{11}} 77 | % \footnotesize{\@setfontsize\footnotesize\@viiipt{9.5}} 78 | % \scriptsize{\@setfontsize\scriptsize\@viipt\@viiipt} 79 | % \tiny{\@setfontsize\tiny\@vpt\@vipt} 80 | % \large{\@setfontsize\large\@xiipt{14}} 81 | % \Large{\@setfontsize\Large\@xivpt{18}} 82 | % \LARGE{\@setfontsize\LARGE\@xviipt{22}} 83 | % \huge{\@setfontsize\huge\@xxpt{25}} 84 | % \Huge{\@setfontsize\Huge\@xxvpt{30}} 85 | 86 | \def\@maketitle 87 | { 88 | \newpage 89 | \null 90 | \vskip .375in 91 | \begin{center} 92 | {\Large \bf \@title \par} 93 | % additional two empty lines at the end of the title 94 | \vspace*{24pt} 95 | { 96 | \large 97 | \lineskip .5em 98 | \begin{tabular}[t]{c} 99 | \ifstatcoursefinal\@author\else Anonymous CVPR submission\\ 100 | \vspace*{1pt}\\%This space will need to be here in the final copy, so don't squeeze it out for the review copy. 101 | Paper ID \statcoursePaperID \fi 102 | \end{tabular} 103 | \par 104 | } 105 | % additional small space at the end of the author name 106 | \vskip .5em 107 | % additional empty line at the end of the title block 108 | \vspace*{12pt} 109 | \end{center} 110 | } 111 | 112 | \def\abstract 113 | {% 114 | \centerline{\large\bf Abstract}% 115 | \vspace*{12pt}% 116 | \it% 117 | } 118 | 119 | \def\endabstract 120 | { 121 | % additional empty line at the end of the abstract 122 | \vspace*{12pt} 123 | } 124 | 125 | \def\affiliation#1{\gdef\@affiliation{#1}} \gdef\@affiliation{} 126 | 127 | \newlength{\@ctmp} 128 | \newlength{\@figindent} 129 | \setlength{\@figindent}{1pc} 130 | 131 | \long\def\@makecaption#1#2{ 132 | \setbox\@tempboxa\hbox{\small \noindent #1.~#2} 133 | \setlength{\@ctmp}{\hsize} 134 | \addtolength{\@ctmp}{-\@figindent}\addtolength{\@ctmp}{-\@figindent} 135 | % IF longer than one indented paragraph line 136 | \ifdim \wd\@tempboxa >\@ctmp 137 | % THEN DON'T set as an indented paragraph 138 | {\small #1.~#2\par} 139 | \else 140 | % ELSE center 141 | \hbox to\hsize{\hfil\box\@tempboxa\hfil} 142 | \fi} 143 | 144 | % correct heading spacing and type 145 | \def\statcoursesection{\@startsection {section}{1}{\z@} 146 | {10pt plus 2pt minus 2pt}{7pt} {\large\bf}} 147 | \def\statcoursessect#1{\statcoursesection*{#1}} 148 | \def\statcoursesect#1{\statcoursesection{\hskip -1em.~#1}} 149 | \def\section{\@ifstar\statcoursessect\statcoursesect} 150 | 151 | \def\statcoursesubsection{\@startsection {subsection}{2}{\z@} 152 | {8pt plus 2pt minus 2pt}{6pt} {\elvbf}} 153 | \def\statcoursessubsect#1{\statcoursesubsection*{#1}} 154 | \def\statcoursesubsect#1{\statcoursesubsection{\hskip -1em.~#1}} 155 | \def\subsection{\@ifstar\statcoursessubsect\statcoursesubsect} 156 | 157 | %% --------- Page background marks: Ruler and confidentiality 158 | 159 | % ----- define vruler 160 | \makeatletter 161 | \newbox\statcourserulerbox 162 | \newcount\statcourserulercount 163 | \newdimen\statcourseruleroffset 164 | \newdimen\cv@lineheight 165 | \newdimen\cv@boxheight 166 | \newbox\cv@tmpbox 167 | \newcount\cv@refno 168 | \newcount\cv@tot 169 | % NUMBER with left flushed zeros \fillzeros[] 170 | \newcount\cv@tmpc@ \newcount\cv@tmpc 171 | \def\fillzeros[#1]#2{\cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi 172 | \cv@tmpc=1 % 173 | \loop\ifnum\cv@tmpc@<10 \else \divide\cv@tmpc@ by 10 \advance\cv@tmpc by 1 \fi 174 | \ifnum\cv@tmpc@=10\relax\cv@tmpc@=11\relax\fi \ifnum\cv@tmpc@>10 \repeat 175 | \ifnum#2<0\advance\cv@tmpc1\relax-\fi 176 | \loop\ifnum\cv@tmpc<#1\relax0\advance\cv@tmpc1\relax\fi \ifnum\cv@tmpc<#1 \repeat 177 | \cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi \relax\the\cv@tmpc@}% 178 | % \makevruler[][][][][] 179 | \def\makevruler[#1][#2][#3][#4][#5]{\begingroup\offinterlineskip 180 | \textheight=#5\vbadness=10000\vfuzz=120ex\overfullrule=0pt% 181 | \global\setbox\statcourserulerbox=\vbox to \textheight{% 182 | {\parskip=0pt\hfuzz=150em\cv@boxheight=\textheight 183 | \cv@lineheight=#1\global\statcourserulercount=#2% 184 | \cv@tot\cv@boxheight\divide\cv@tot\cv@lineheight\advance\cv@tot2% 185 | \cv@refno1\vskip-\cv@lineheight\vskip1ex% 186 | \loop\setbox\cv@tmpbox=\hbox to0cm{{\statcoursetenhv\hfil\fillzeros[#4]\statcourserulercount}}% 187 | \ht\cv@tmpbox\cv@lineheight\dp\cv@tmpbox0pt\box\cv@tmpbox\break 188 | \advance\cv@refno1\global\advance\statcourserulercount#3\relax 189 | \ifnum\cv@refno<\cv@tot\repeat}}\endgroup}% 190 | \makeatother 191 | % ----- end of vruler 192 | 193 | % \makevruler[][][][][] 194 | \def\statcourseruler#1{\makevruler[12pt][#1][1][3][0.993\textheight]\usebox{\statcourserulerbox}} 195 | \AddToShipoutPicture{% 196 | \ifstatcoursefinal\else 197 | %\AtTextLowerLeft{% 198 | % \color[gray]{.15}\framebox(\LenToUnit{\textwidth},\LenToUnit{\textheight}){} 199 | %} 200 | \statcourseruleroffset=\textheight 201 | \advance\statcourseruleroffset by -3.7pt 202 | \color[rgb]{.5,.5,1} 203 | \AtTextUpperLeft{% 204 | \put(\LenToUnit{-35pt},\LenToUnit{-\statcourseruleroffset}){%left ruler 205 | \statcourseruler{\statcourserulercount}} 206 | \put(\LenToUnit{\textwidth\kern 30pt},\LenToUnit{-\statcourseruleroffset}){%right ruler 207 | \statcourseruler{\statcourserulercount}} 208 | } 209 | \def\pid{\parbox{1in}{\begin{center}\bf\sf{\small CVPR}\\\#\statcoursePaperID\end{center}}} 210 | \AtTextUpperLeft{%paperID in corners 211 | \put(\LenToUnit{-65pt},\LenToUnit{45pt}){\pid} 212 | \put(\LenToUnit{\textwidth\kern-8pt},\LenToUnit{45pt}){\pid} 213 | } 214 | \AtTextUpperLeft{%confidential 215 | \put(0,\LenToUnit{1cm}){\parbox{\textwidth}{\centering\statcoursetenhv 216 | CVPR 2018 Submission \#\statcoursePaperID. CONFIDENTIAL REVIEW COPY. DO NOT DISTRIBUTE.}} 217 | } 218 | \fi 219 | } 220 | 221 | %%% Make figure placement a little more predictable. 222 | % We trust the user to move figures if this results 223 | % in ugliness. 224 | % Minimize bad page breaks at figures 225 | \renewcommand{\textfraction}{0.01} 226 | \renewcommand{\floatpagefraction}{0.99} 227 | \renewcommand{\topfraction}{0.99} 228 | \renewcommand{\bottomfraction}{0.99} 229 | \renewcommand{\dblfloatpagefraction}{0.99} 230 | \renewcommand{\dbltopfraction}{0.99} 231 | \setcounter{totalnumber}{99} 232 | \setcounter{topnumber}{99} 233 | \setcounter{bottomnumber}{99} 234 | 235 | % Add a period to the end of an abbreviation unless there's one 236 | % already, then \xspace. 237 | \makeatletter 238 | \DeclareRobustCommand\onedot{\futurelet\@let@token\@onedot} 239 | \def\@onedot{\ifx\@let@token.\else.\null\fi\xspace} 240 | 241 | \def\eg{\emph{e.g}\onedot} \def\Eg{\emph{E.g}\onedot} 242 | \def\ie{\emph{i.e}\onedot} \def\Ie{\emph{I.e}\onedot} 243 | \def\cf{\emph{c.f}\onedot} \def\Cf{\emph{C.f}\onedot} 244 | \def\etc{\emph{etc}\onedot} \def\vs{\emph{vs}\onedot} 245 | \def\wrt{w.r.t\onedot} \def\dof{d.o.f\onedot} 246 | \def\etal{\emph{et al}\onedot} 247 | \makeatother 248 | 249 | % --------------------------------------------------------------- 250 | -------------------------------------------------------------------------------- /hw_01/train_data.txt: -------------------------------------------------------------------------------- 1 | x1 x2 y 2 | -3.84 -4.40 0 3 | 16.36 6.54 1 4 | -2.73 -5.13 0 5 | 4.83 7.22 1 6 | 3.66 -5.34 0 7 | -0.25 3.12 1 8 | -4.05 -5.13 0 9 | 5.92 4.12 1 10 | 5.55 -1.74 1 11 | 5.68 3.40 1 12 | 10.18 8.89 1 13 | -5.23 -6.67 0 14 | -2.94 -7.10 0 15 | 3.17 6.16 1 16 | 1.82 -1.63 1 17 | -9.18 -1.19 0 18 | 1.28 -4.73 0 19 | -1.49 -2.72 0 20 | 7.21 1.48 1 21 | 0.83 6.78 1 22 | -13.54 -1.02 0 23 | 3.14 1.96 1 24 | 0.94 0.11 1 25 | -4.76 -8.73 0 26 | 5.20 7.22 1 27 | 4.49 4.01 1 28 | 5.28 -2.48 1 29 | 6.70 -6.34 0 30 | 5.42 -2.77 1 31 | -0.43 -3.38 0 32 | -5.37 -3.82 0 33 | -0.09 -8.31 0 34 | -10.86 -9.11 0 35 | 2.16 4.69 1 36 | -1.67 0.07 0 37 | 0.18 -9.78 0 38 | 4.27 -13.91 0 39 | 3.71 9.04 1 40 | 9.27 1.85 1 41 | 1.80 4.61 1 42 | -7.37 -11.87 0 43 | -0.37 -7.59 0 44 | -0.96 -5.23 0 45 | -3.35 -6.77 0 46 | 4.13 -7.18 0 47 | 10.44 -6.05 1 48 | -4.22 -7.05 0 49 | 3.72 8.25 1 50 | 2.76 -0.68 1 51 | -3.50 -5.68 0 52 | 5.95 -10.07 0 53 | -5.17 -4.59 0 54 | -1.76 -0.97 0 55 | -7.83 -3.18 0 56 | -1.57 0.57 1 57 | 9.14 4.46 1 58 | -10.80 -4.57 0 59 | -0.08 3.66 0 60 | -3.28 -1.54 1 61 | -1.04 -5.42 0 62 | 10.21 3.82 1 63 | 3.71 2.54 1 64 | 12.28 -0.10 1 65 | -0.84 -3.87 0 66 | 6.53 0.10 1 67 | 8.97 2.10 1 68 | -3.97 -4.71 0 69 | 2.84 -7.89 0 70 | -4.31 -2.16 0 71 | -2.30 -4.22 0 72 | -3.62 -7.97 0 73 | 11.72 -3.33 1 74 | 0.79 -4.98 0 75 | 11.03 -7.03 0 76 | -3.30 -2.64 0 77 | 7.84 -5.64 0 78 | -5.49 -1.57 0 79 | -8.69 -9.69 0 80 | -5.89 -5.96 0 81 | 5.36 2.73 1 82 | 1.53 -4.95 0 83 | -1.05 4.01 1 84 | -4.65 -7.61 0 85 | -4.66 -0.78 0 86 | 1.18 -9.71 0 87 | 4.03 5.24 1 88 | 4.09 4.61 1 89 | -0.88 -4.48 0 90 | 0.56 -5.17 0 91 | 12.29 -2.51 1 92 | 9.77 6.69 1 93 | -4.52 -11.13 0 94 | 0.80 -8.83 0 95 | -4.89 -8.58 0 96 | 3.40 -2.12 1 97 | 3.25 3.71 1 98 | 4.78 0.08 1 99 | 6.11 4.34 1 100 | -7.67 -10.05 0 101 | 2.69 -0.84 1 102 | -3.69 -10.78 0 103 | 0.04 -2.91 1 104 | 8.93 7.30 1 105 | 2.85 1.86 1 106 | 10.66 -2.37 1 107 | 4.36 -2.10 1 108 | 2.53 1.89 1 109 | 8.36 10.60 1 110 | 9.12 -1.53 1 111 | 2.06 -8.03 0 112 | 0.02 -5.39 0 113 | 12.79 8.90 1 114 | -5.52 -9.25 0 115 | 3.61 5.99 1 116 | -5.45 -5.48 0 117 | 2.74 11.48 1 118 | -8.05 1.79 0 119 | 8.87 -3.80 1 120 | 2.33 7.95 1 121 | 5.22 7.43 1 122 | 4.34 0.68 1 123 | 6.33 3.30 1 124 | 9.39 3.89 1 125 | 6.83 2.22 1 126 | 5.69 6.50 1 127 | -6.70 -10.23 0 128 | 0.89 3.70 1 129 | 2.74 -9.34 0 130 | -0.40 6.67 1 131 | 0.63 -0.58 0 132 | -0.97 -0.19 0 133 | -0.38 -13.55 0 134 | 7.35 1.79 1 135 | 3.10 -11.50 0 136 | -1.53 -7.31 0 137 | -5.52 -4.68 0 138 | 4.38 -5.04 0 139 | 2.22 -0.00 1 140 | -1.05 -3.75 0 141 | 1.53 -12.24 0 142 | 6.83 -2.38 1 143 | -3.96 -9.17 0 144 | 3.77 1.20 1 145 | 10.50 -1.03 1 146 | 7.93 0.80 1 147 | 7.26 -6.40 0 148 | 4.84 3.15 1 149 | 10.10 2.34 1 150 | -4.68 -8.24 0 151 | 14.16 2.35 1 152 | -3.83 -0.51 0 153 | -1.74 -7.86 0 154 | 7.38 7.20 1 155 | -5.17 -1.23 0 156 | 3.13 3.11 1 157 | -5.92 -10.49 0 158 | 15.94 9.48 1 159 | -3.12 -9.22 0 160 | 11.43 -4.44 1 161 | -0.05 -4.04 0 162 | 4.63 6.95 1 163 | 4.13 5.42 1 164 | 4.24 -6.61 0 165 | 14.14 -6.83 1 166 | -14.85 -2.24 0 167 | 11.43 1.90 1 168 | 12.33 1.21 1 169 | 4.59 4.69 1 170 | 4.03 0.40 1 171 | 1.64 -2.76 0 172 | 5.90 1.57 1 173 | 2.83 6.11 1 174 | -2.02 -3.45 0 175 | 7.11 8.73 1 176 | 7.76 3.95 1 177 | 5.94 3.97 1 178 | 7.00 4.18 1 179 | -8.12 -12.72 0 180 | -3.11 -4.88 0 181 | 6.72 5.81 1 182 | -8.97 -4.16 0 183 | 6.42 0.60 1 184 | -8.41 -5.61 0 185 | -4.09 -2.59 0 186 | -0.63 -2.20 1 187 | -0.02 -12.95 0 188 | -1.45 -12.04 0 189 | -10.99 4.08 0 190 | 14.14 2.09 1 191 | 1.37 3.49 1 192 | -11.21 -12.60 0 193 | -6.72 -2.12 0 194 | 9.90 2.87 1 195 | 1.43 -10.15 0 196 | -4.91 -8.80 0 197 | -0.15 -6.41 0 198 | -1.50 -5.15 0 199 | -3.31 -6.48 0 200 | 4.82 -2.20 1 201 | 4.88 4.83 1 202 | -4.89 -0.84 0 203 | -2.56 -1.44 0 204 | -5.38 -3.27 0 205 | 5.31 1.29 1 206 | 2.40 -8.01 0 207 | -3.84 1.85 0 208 | -8.64 0.75 0 209 | 6.58 6.45 1 210 | -6.61 -7.82 0 211 | -2.16 -5.64 0 212 | 7.00 1.84 1 213 | 3.56 -7.63 0 214 | 4.14 -3.39 1 215 | 1.21 -5.49 0 216 | 9.53 0.58 1 217 | -8.63 -3.64 0 218 | 10.51 0.32 1 219 | 12.28 8.25 1 220 | 6.30 9.16 1 221 | -8.06 -7.50 0 222 | -8.03 -9.91 0 223 | 6.51 6.24 1 224 | -6.99 -12.41 0 225 | -7.52 -1.73 0 226 | -3.81 -6.57 0 227 | -8.33 0.31 0 228 | -3.07 -0.45 0 229 | 6.49 4.80 1 230 | -2.00 -0.73 0 231 | 5.91 3.55 1 232 | 4.41 -5.24 1 233 | 4.69 -2.42 1 234 | -0.44 -0.16 1 235 | 4.42 3.53 1 236 | 1.96 -2.66 0 237 | 7.35 -1.35 1 238 | -6.70 -1.99 0 239 | -2.80 -2.71 0 240 | -4.58 -6.58 0 241 | -3.40 -3.48 0 242 | -1.53 -0.63 0 243 | -5.97 -2.88 0 244 | 4.52 -3.56 0 245 | -2.74 -3.33 0 246 | -8.16 -0.73 0 247 | 2.88 -1.97 1 248 | -0.15 -5.59 0 249 | 7.59 3.10 1 250 | 5.66 2.11 1 251 | 6.66 1.61 1 252 | -6.10 -8.85 0 253 | 8.85 4.87 1 254 | -0.23 -2.25 0 255 | 5.42 6.79 1 256 | 3.95 -1.02 0 257 | -1.68 6.95 1 258 | 9.08 1.09 1 259 | -6.78 -6.66 0 260 | -2.70 -2.01 0 261 | 8.34 0.42 1 262 | 1.72 0.18 1 263 | 7.00 8.32 1 264 | 7.93 8.65 1 265 | 5.25 8.99 1 266 | 8.60 8.71 1 267 | 6.35 3.75 1 268 | 11.18 -7.69 1 269 | 4.05 7.97 1 270 | -6.92 3.60 0 271 | 9.77 1.08 1 272 | 1.00 -4.85 1 273 | -3.50 -3.90 0 274 | -5.00 -6.54 0 275 | 9.92 8.11 1 276 | 10.27 2.32 1 277 | 12.08 2.77 1 278 | -8.65 -3.61 0 279 | 6.10 -3.14 0 280 | 12.19 1.87 1 281 | 11.21 -0.54 1 282 | 2.47 -2.72 1 283 | 5.38 -2.78 1 284 | 5.18 1.96 1 285 | 10.55 0.84 1 286 | 3.82 9.14 1 287 | -6.08 -14.13 0 288 | -2.09 -2.07 0 289 | 0.05 0.24 1 290 | -3.57 -3.27 0 291 | 0.50 -6.19 0 292 | -5.03 0.37 0 293 | -9.77 -6.21 0 294 | -2.97 -5.53 0 295 | -5.04 -12.17 0 296 | 2.59 -4.90 0 297 | 6.53 0.61 1 298 | 5.29 3.97 1 299 | 1.32 0.07 1 300 | 3.03 7.38 1 301 | -5.93 1.51 0 302 | -0.79 -12.55 0 303 | -4.89 -3.07 0 304 | -2.02 -8.23 0 305 | -1.91 0.51 0 306 | 1.28 -8.06 0 307 | -2.17 -0.35 0 308 | -5.11 -0.12 0 309 | -0.39 -3.54 0 310 | -2.81 -11.67 0 311 | 5.85 5.42 1 312 | 5.46 10.15 1 313 | -3.51 -7.83 0 314 | 3.84 8.11 1 315 | -4.96 -4.69 0 316 | 1.93 9.17 1 317 | 15.33 4.70 1 318 | 7.52 8.67 1 319 | -2.23 -8.06 0 320 | -6.72 -10.20 0 321 | -6.04 -4.30 0 322 | 1.96 -7.93 0 323 | 7.78 -5.09 1 324 | 5.82 3.20 1 325 | 0.76 5.85 1 326 | -6.11 -9.28 0 327 | 3.83 10.35 1 328 | -8.57 -4.99 0 329 | 8.56 5.87 1 330 | 6.15 0.12 1 331 | 4.00 1.99 1 332 | 3.48 -0.73 1 333 | -11.02 -5.98 0 334 | 6.14 5.43 1 335 | -3.27 -2.94 0 336 | 2.18 3.36 1 337 | 0.49 3.84 1 338 | 2.08 1.81 1 339 | 17.31 0.60 1 340 | 2.98 8.29 1 341 | 2.05 5.49 1 342 | 2.29 0.69 0 343 | -3.56 0.85 0 344 | 8.20 -1.62 1 345 | -5.60 -3.07 0 346 | 6.52 3.71 1 347 | -7.34 -3.16 0 348 | -6.43 -7.56 0 349 | -8.50 -7.98 0 350 | 1.36 -0.27 1 351 | 7.82 -3.16 1 352 | 4.59 -1.90 1 353 | 7.24 -5.03 1 354 | -5.51 -6.32 0 355 | 0.34 -4.44 0 356 | 2.02 -2.24 0 357 | -7.31 -4.34 0 358 | -0.46 8.11 1 359 | -1.79 -1.83 0 360 | -11.32 -6.57 0 361 | 2.50 4.13 1 362 | 2.92 8.44 1 363 | 0.69 0.32 1 364 | 10.97 -0.40 1 365 | -1.04 -12.37 0 366 | 3.66 3.09 1 367 | -2.28 -6.20 0 368 | 3.73 -1.49 1 369 | -1.29 -7.59 0 370 | 5.97 -1.52 1 371 | -1.93 0.49 0 372 | 3.40 -2.34 1 373 | 8.66 4.40 1 374 | -2.75 13.66 1 375 | 1.60 -13.26 0 376 | 14.95 4.36 1 377 | 3.86 -1.50 1 378 | 13.71 2.04 1 379 | 2.72 4.63 1 380 | 6.24 -0.43 1 381 | 4.38 -1.27 1 382 | 9.06 9.67 1 383 | 3.83 5.15 1 384 | 4.14 -11.07 0 385 | -4.44 -6.76 0 386 | -6.64 -9.32 0 387 | -4.65 1.24 0 388 | 4.55 0.21 1 389 | 5.57 8.57 1 390 | -4.79 -5.34 0 391 | 2.97 -4.13 1 392 | 5.99 -2.15 1 393 | -4.93 -3.56 0 394 | -8.14 -12.20 0 395 | -0.14 -6.42 0 396 | -4.79 -3.73 0 397 | 0.68 -3.48 0 398 | -4.16 -3.25 0 399 | 10.64 2.00 1 400 | -8.16 -7.55 0 401 | 5.96 5.37 1 402 | 11.09 -3.39 1 403 | 7.46 -4.72 1 404 | -0.42 2.09 0 405 | -1.40 1.66 0 406 | 9.24 -0.16 1 407 | -2.97 -11.87 0 408 | 2.60 -10.34 0 409 | -1.24 -7.76 0 410 | -2.84 -7.49 0 411 | 10.89 9.67 1 412 | 1.16 -5.77 1 413 | 1.94 -5.81 0 414 | 10.42 -0.43 1 415 | -2.81 -3.98 0 416 | 3.73 -4.75 1 417 | 6.19 -2.02 1 418 | 10.06 3.45 1 419 | -1.59 -3.61 0 420 | -0.19 6.68 1 421 | 7.74 5.71 1 422 | 4.56 3.95 1 423 | -3.00 0.04 0 424 | 5.94 1.09 1 425 | -7.53 -2.33 0 426 | 4.57 5.36 1 427 | 5.10 1.44 1 428 | 0.20 -6.57 0 429 | 1.37 8.58 1 430 | -1.90 -12.73 0 431 | -4.96 -9.93 0 432 | -1.05 4.67 1 433 | 0.52 6.56 1 434 | -1.27 -5.65 0 435 | -0.93 0.78 1 436 | -2.12 3.12 1 437 | -3.87 -2.52 0 438 | 3.61 5.72 1 439 | -1.07 -8.50 0 440 | -1.38 -2.40 0 441 | 13.24 1.52 1 442 | -5.94 -6.61 0 443 | 7.74 -6.51 1 444 | 2.35 2.45 1 445 | -1.94 -4.15 0 446 | -6.16 -5.45 0 447 | 6.09 -0.46 1 448 | 1.99 -10.66 0 449 | -4.25 -5.11 0 450 | 4.65 1.91 1 451 | 2.85 5.48 1 452 | -1.24 -10.13 0 453 | 0.93 -12.92 0 454 | 7.44 -4.40 1 455 | 4.18 2.07 1 456 | -1.03 1.92 1 457 | -9.23 -5.69 0 458 | -8.26 -5.02 0 459 | 5.56 -0.05 1 460 | 11.94 5.48 1 461 | 3.57 0.19 1 462 | -4.58 -1.32 0 463 | 2.34 5.58 1 464 | 0.71 -14.05 0 465 | -0.77 -6.71 0 466 | -8.67 -3.51 0 467 | 8.42 -2.26 1 468 | -0.81 -11.36 0 469 | 0.15 -12.24 0 470 | 1.08 2.51 1 471 | 3.28 4.80 1 472 | -0.77 -3.27 1 473 | 4.47 0.62 1 474 | -4.60 -4.60 0 475 | -0.24 1.90 1 476 | -2.33 -5.57 0 477 | -7.92 -7.43 0 478 | 2.76 -5.48 0 479 | 10.90 7.48 1 480 | -4.81 -2.50 0 481 | -9.87 0.80 0 482 | 14.55 3.38 1 483 | -3.02 -5.36 0 484 | -5.06 -10.03 0 485 | 2.62 10.62 1 486 | 6.75 1.53 1 487 | 6.57 4.42 1 488 | -5.56 -3.18 0 489 | -3.70 -7.45 0 490 | 8.03 3.40 1 491 | -3.10 -2.89 0 492 | -3.57 -11.72 0 493 | 2.87 -0.17 1 494 | 5.93 2.04 1 495 | -9.25 -2.29 0 496 | 5.21 10.32 1 497 | -5.71 -2.44 0 498 | -0.46 2.13 0 499 | -1.83 -6.59 0 500 | 4.24 -0.65 1 501 | 5.84 2.89 1 502 | -4.12 -3.02 0 503 | 5.04 6.66 1 504 | -8.33 3.65 0 505 | 6.01 5.23 1 506 | 6.95 -0.67 1 507 | 4.10 2.33 1 508 | -3.21 -9.92 0 509 | -9.60 -9.94 0 510 | -0.52 -0.78 0 511 | 7.93 8.80 1 512 | -9.67 -5.47 0 513 | 3.25 -1.47 1 514 | 10.65 5.04 1 515 | -5.51 2.58 1 516 | -1.96 -1.92 0 517 | -7.86 0.78 0 518 | -1.39 -8.28 0 519 | -2.48 -9.59 0 520 | 10.16 3.82 1 521 | 2.95 3.52 1 522 | 5.94 6.22 1 523 | -2.61 -3.42 0 524 | -10.44 -0.81 0 525 | -3.32 1.46 0 526 | 9.07 0.55 1 527 | 4.19 3.70 1 528 | 1.46 0.04 1 529 | 7.85 3.80 1 530 | 0.84 -5.74 0 531 | -0.22 1.19 1 532 | 9.63 9.58 1 533 | 9.67 2.25 1 534 | 4.58 11.08 1 535 | -8.67 -3.77 0 536 | 8.11 5.11 1 537 | -0.07 -0.68 0 538 | -1.64 -2.83 0 539 | 3.16 0.57 1 540 | -10.26 -12.83 0 541 | -6.24 -3.93 0 542 | -9.27 -7.59 0 543 | 9.04 -4.97 1 544 | -2.17 -9.35 0 545 | -6.71 -6.63 0 546 | 3.85 7.37 1 547 | 1.86 6.00 1 548 | 9.99 6.05 1 549 | -0.42 -4.97 0 550 | -8.11 -8.39 0 551 | -4.51 0.34 0 552 | -4.18 -3.82 0 553 | -6.86 -5.77 0 554 | 9.11 -0.19 1 555 | -1.96 0.63 0 556 | 14.16 -5.06 1 557 | -2.73 -11.75 0 558 | 6.44 3.08 1 559 | 1.01 -2.94 0 560 | -0.31 -0.05 0 561 | -0.63 -6.24 1 562 | -7.52 -4.67 0 563 | -2.70 -2.01 0 564 | 6.00 1.02 1 565 | -5.10 -4.24 0 566 | 11.22 2.00 1 567 | 5.33 -0.79 0 568 | -2.94 0.57 0 569 | 1.86 -2.52 0 570 | -7.77 -4.05 0 571 | 4.95 4.44 1 572 | -10.64 -5.98 0 573 | 8.72 -1.71 1 574 | -0.91 -9.54 0 575 | -2.29 -2.71 1 576 | -7.20 -15.09 0 577 | -4.73 -2.52 0 578 | 5.78 7.52 1 579 | 8.60 2.52 1 580 | 5.55 4.51 1 581 | 2.44 -4.08 0 582 | 0.75 -8.00 0 583 | 12.48 5.19 1 584 | -4.74 3.36 0 585 | 1.39 2.07 1 586 | 5.83 2.80 1 587 | -6.47 -0.05 0 588 | 6.04 5.53 1 589 | -0.94 -12.89 0 590 | 1.00 -10.54 0 591 | -12.01 -0.84 0 592 | 4.10 6.69 1 593 | 6.33 9.37 1 594 | -10.23 -0.92 0 595 | 6.39 -3.54 1 596 | -0.75 -0.03 1 597 | -1.03 -5.81 0 598 | 1.11 4.33 1 599 | -3.33 -5.00 0 600 | 3.58 1.97 1 601 | 5.41 4.52 1 602 | -------------------------------------------------------------------------------- /hw_03/data/wine.data: -------------------------------------------------------------------------------- 1 | 1,14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065 2 | 1,13.2,1.78,2.14,11.2,100,2.65,2.76,.26,1.28,4.38,1.05,3.4,1050 3 | 1,13.16,2.36,2.67,18.6,101,2.8,3.24,.3,2.81,5.68,1.03,3.17,1185 4 | 1,14.37,1.95,2.5,16.8,113,3.85,3.49,.24,2.18,7.8,.86,3.45,1480 5 | 1,13.24,2.59,2.87,21,118,2.8,2.69,.39,1.82,4.32,1.04,2.93,735 6 | 1,14.2,1.76,2.45,15.2,112,3.27,3.39,.34,1.97,6.75,1.05,2.85,1450 7 | 1,14.39,1.87,2.45,14.6,96,2.5,2.52,.3,1.98,5.25,1.02,3.58,1290 8 | 1,14.06,2.15,2.61,17.6,121,2.6,2.51,.31,1.25,5.05,1.06,3.58,1295 9 | 1,14.83,1.64,2.17,14,97,2.8,2.98,.29,1.98,5.2,1.08,2.85,1045 10 | 1,13.86,1.35,2.27,16,98,2.98,3.15,.22,1.85,7.22,1.01,3.55,1045 11 | 1,14.1,2.16,2.3,18,105,2.95,3.32,.22,2.38,5.75,1.25,3.17,1510 12 | 1,14.12,1.48,2.32,16.8,95,2.2,2.43,.26,1.57,5,1.17,2.82,1280 13 | 1,13.75,1.73,2.41,16,89,2.6,2.76,.29,1.81,5.6,1.15,2.9,1320 14 | 1,14.75,1.73,2.39,11.4,91,3.1,3.69,.43,2.81,5.4,1.25,2.73,1150 15 | 1,14.38,1.87,2.38,12,102,3.3,3.64,.29,2.96,7.5,1.2,3,1547 16 | 1,13.63,1.81,2.7,17.2,112,2.85,2.91,.3,1.46,7.3,1.28,2.88,1310 17 | 1,14.3,1.92,2.72,20,120,2.8,3.14,.33,1.97,6.2,1.07,2.65,1280 18 | 1,13.83,1.57,2.62,20,115,2.95,3.4,.4,1.72,6.6,1.13,2.57,1130 19 | 1,14.19,1.59,2.48,16.5,108,3.3,3.93,.32,1.86,8.7,1.23,2.82,1680 20 | 1,13.64,3.1,2.56,15.2,116,2.7,3.03,.17,1.66,5.1,.96,3.36,845 21 | 1,14.06,1.63,2.28,16,126,3,3.17,.24,2.1,5.65,1.09,3.71,780 22 | 1,12.93,3.8,2.65,18.6,102,2.41,2.41,.25,1.98,4.5,1.03,3.52,770 23 | 1,13.71,1.86,2.36,16.6,101,2.61,2.88,.27,1.69,3.8,1.11,4,1035 24 | 1,12.85,1.6,2.52,17.8,95,2.48,2.37,.26,1.46,3.93,1.09,3.63,1015 25 | 1,13.5,1.81,2.61,20,96,2.53,2.61,.28,1.66,3.52,1.12,3.82,845 26 | 1,13.05,2.05,3.22,25,124,2.63,2.68,.47,1.92,3.58,1.13,3.2,830 27 | 1,13.39,1.77,2.62,16.1,93,2.85,2.94,.34,1.45,4.8,.92,3.22,1195 28 | 1,13.3,1.72,2.14,17,94,2.4,2.19,.27,1.35,3.95,1.02,2.77,1285 29 | 1,13.87,1.9,2.8,19.4,107,2.95,2.97,.37,1.76,4.5,1.25,3.4,915 30 | 1,14.02,1.68,2.21,16,96,2.65,2.33,.26,1.98,4.7,1.04,3.59,1035 31 | 1,13.73,1.5,2.7,22.5,101,3,3.25,.29,2.38,5.7,1.19,2.71,1285 32 | 1,13.58,1.66,2.36,19.1,106,2.86,3.19,.22,1.95,6.9,1.09,2.88,1515 33 | 1,13.68,1.83,2.36,17.2,104,2.42,2.69,.42,1.97,3.84,1.23,2.87,990 34 | 1,13.76,1.53,2.7,19.5,132,2.95,2.74,.5,1.35,5.4,1.25,3,1235 35 | 1,13.51,1.8,2.65,19,110,2.35,2.53,.29,1.54,4.2,1.1,2.87,1095 36 | 1,13.48,1.81,2.41,20.5,100,2.7,2.98,.26,1.86,5.1,1.04,3.47,920 37 | 1,13.28,1.64,2.84,15.5,110,2.6,2.68,.34,1.36,4.6,1.09,2.78,880 38 | 1,13.05,1.65,2.55,18,98,2.45,2.43,.29,1.44,4.25,1.12,2.51,1105 39 | 1,13.07,1.5,2.1,15.5,98,2.4,2.64,.28,1.37,3.7,1.18,2.69,1020 40 | 1,14.22,3.99,2.51,13.2,128,3,3.04,.2,2.08,5.1,.89,3.53,760 41 | 1,13.56,1.71,2.31,16.2,117,3.15,3.29,.34,2.34,6.13,.95,3.38,795 42 | 1,13.41,3.84,2.12,18.8,90,2.45,2.68,.27,1.48,4.28,.91,3,1035 43 | 1,13.88,1.89,2.59,15,101,3.25,3.56,.17,1.7,5.43,.88,3.56,1095 44 | 1,13.24,3.98,2.29,17.5,103,2.64,2.63,.32,1.66,4.36,.82,3,680 45 | 1,13.05,1.77,2.1,17,107,3,3,.28,2.03,5.04,.88,3.35,885 46 | 1,14.21,4.04,2.44,18.9,111,2.85,2.65,.3,1.25,5.24,.87,3.33,1080 47 | 1,14.38,3.59,2.28,16,102,3.25,3.17,.27,2.19,4.9,1.04,3.44,1065 48 | 1,13.9,1.68,2.12,16,101,3.1,3.39,.21,2.14,6.1,.91,3.33,985 49 | 1,14.1,2.02,2.4,18.8,103,2.75,2.92,.32,2.38,6.2,1.07,2.75,1060 50 | 1,13.94,1.73,2.27,17.4,108,2.88,3.54,.32,2.08,8.90,1.12,3.1,1260 51 | 1,13.05,1.73,2.04,12.4,92,2.72,3.27,.17,2.91,7.2,1.12,2.91,1150 52 | 1,13.83,1.65,2.6,17.2,94,2.45,2.99,.22,2.29,5.6,1.24,3.37,1265 53 | 1,13.82,1.75,2.42,14,111,3.88,3.74,.32,1.87,7.05,1.01,3.26,1190 54 | 1,13.77,1.9,2.68,17.1,115,3,2.79,.39,1.68,6.3,1.13,2.93,1375 55 | 1,13.74,1.67,2.25,16.4,118,2.6,2.9,.21,1.62,5.85,.92,3.2,1060 56 | 1,13.56,1.73,2.46,20.5,116,2.96,2.78,.2,2.45,6.25,.98,3.03,1120 57 | 1,14.22,1.7,2.3,16.3,118,3.2,3,.26,2.03,6.38,.94,3.31,970 58 | 1,13.29,1.97,2.68,16.8,102,3,3.23,.31,1.66,6,1.07,2.84,1270 59 | 1,13.72,1.43,2.5,16.7,108,3.4,3.67,.19,2.04,6.8,.89,2.87,1285 60 | 2,12.37,.94,1.36,10.6,88,1.98,.57,.28,.42,1.95,1.05,1.82,520 61 | 2,12.33,1.1,2.28,16,101,2.05,1.09,.63,.41,3.27,1.25,1.67,680 62 | 2,12.64,1.36,2.02,16.8,100,2.02,1.41,.53,.62,5.75,.98,1.59,450 63 | 2,13.67,1.25,1.92,18,94,2.1,1.79,.32,.73,3.8,1.23,2.46,630 64 | 2,12.37,1.13,2.16,19,87,3.5,3.1,.19,1.87,4.45,1.22,2.87,420 65 | 2,12.17,1.45,2.53,19,104,1.89,1.75,.45,1.03,2.95,1.45,2.23,355 66 | 2,12.37,1.21,2.56,18.1,98,2.42,2.65,.37,2.08,4.6,1.19,2.3,678 67 | 2,13.11,1.01,1.7,15,78,2.98,3.18,.26,2.28,5.3,1.12,3.18,502 68 | 2,12.37,1.17,1.92,19.6,78,2.11,2,.27,1.04,4.68,1.12,3.48,510 69 | 2,13.34,.94,2.36,17,110,2.53,1.3,.55,.42,3.17,1.02,1.93,750 70 | 2,12.21,1.19,1.75,16.8,151,1.85,1.28,.14,2.5,2.85,1.28,3.07,718 71 | 2,12.29,1.61,2.21,20.4,103,1.1,1.02,.37,1.46,3.05,.906,1.82,870 72 | 2,13.86,1.51,2.67,25,86,2.95,2.86,.21,1.87,3.38,1.36,3.16,410 73 | 2,13.49,1.66,2.24,24,87,1.88,1.84,.27,1.03,3.74,.98,2.78,472 74 | 2,12.99,1.67,2.6,30,139,3.3,2.89,.21,1.96,3.35,1.31,3.5,985 75 | 2,11.96,1.09,2.3,21,101,3.38,2.14,.13,1.65,3.21,.99,3.13,886 76 | 2,11.66,1.88,1.92,16,97,1.61,1.57,.34,1.15,3.8,1.23,2.14,428 77 | 2,13.03,.9,1.71,16,86,1.95,2.03,.24,1.46,4.6,1.19,2.48,392 78 | 2,11.84,2.89,2.23,18,112,1.72,1.32,.43,.95,2.65,.96,2.52,500 79 | 2,12.33,.99,1.95,14.8,136,1.9,1.85,.35,2.76,3.4,1.06,2.31,750 80 | 2,12.7,3.87,2.4,23,101,2.83,2.55,.43,1.95,2.57,1.19,3.13,463 81 | 2,12,.92,2,19,86,2.42,2.26,.3,1.43,2.5,1.38,3.12,278 82 | 2,12.72,1.81,2.2,18.8,86,2.2,2.53,.26,1.77,3.9,1.16,3.14,714 83 | 2,12.08,1.13,2.51,24,78,2,1.58,.4,1.4,2.2,1.31,2.72,630 84 | 2,13.05,3.86,2.32,22.5,85,1.65,1.59,.61,1.62,4.8,.84,2.01,515 85 | 2,11.84,.89,2.58,18,94,2.2,2.21,.22,2.35,3.05,.79,3.08,520 86 | 2,12.67,.98,2.24,18,99,2.2,1.94,.3,1.46,2.62,1.23,3.16,450 87 | 2,12.16,1.61,2.31,22.8,90,1.78,1.69,.43,1.56,2.45,1.33,2.26,495 88 | 2,11.65,1.67,2.62,26,88,1.92,1.61,.4,1.34,2.6,1.36,3.21,562 89 | 2,11.64,2.06,2.46,21.6,84,1.95,1.69,.48,1.35,2.8,1,2.75,680 90 | 2,12.08,1.33,2.3,23.6,70,2.2,1.59,.42,1.38,1.74,1.07,3.21,625 91 | 2,12.08,1.83,2.32,18.5,81,1.6,1.5,.52,1.64,2.4,1.08,2.27,480 92 | 2,12,1.51,2.42,22,86,1.45,1.25,.5,1.63,3.6,1.05,2.65,450 93 | 2,12.69,1.53,2.26,20.7,80,1.38,1.46,.58,1.62,3.05,.96,2.06,495 94 | 2,12.29,2.83,2.22,18,88,2.45,2.25,.25,1.99,2.15,1.15,3.3,290 95 | 2,11.62,1.99,2.28,18,98,3.02,2.26,.17,1.35,3.25,1.16,2.96,345 96 | 2,12.47,1.52,2.2,19,162,2.5,2.27,.32,3.28,2.6,1.16,2.63,937 97 | 2,11.81,2.12,2.74,21.5,134,1.6,.99,.14,1.56,2.5,.95,2.26,625 98 | 2,12.29,1.41,1.98,16,85,2.55,2.5,.29,1.77,2.9,1.23,2.74,428 99 | 2,12.37,1.07,2.1,18.5,88,3.52,3.75,.24,1.95,4.5,1.04,2.77,660 100 | 2,12.29,3.17,2.21,18,88,2.85,2.99,.45,2.81,2.3,1.42,2.83,406 101 | 2,12.08,2.08,1.7,17.5,97,2.23,2.17,.26,1.4,3.3,1.27,2.96,710 102 | 2,12.6,1.34,1.9,18.5,88,1.45,1.36,.29,1.35,2.45,1.04,2.77,562 103 | 2,12.34,2.45,2.46,21,98,2.56,2.11,.34,1.31,2.8,.8,3.38,438 104 | 2,11.82,1.72,1.88,19.5,86,2.5,1.64,.37,1.42,2.06,.94,2.44,415 105 | 2,12.51,1.73,1.98,20.5,85,2.2,1.92,.32,1.48,2.94,1.04,3.57,672 106 | 2,12.42,2.55,2.27,22,90,1.68,1.84,.66,1.42,2.7,.86,3.3,315 107 | 2,12.25,1.73,2.12,19,80,1.65,2.03,.37,1.63,3.4,1,3.17,510 108 | 2,12.72,1.75,2.28,22.5,84,1.38,1.76,.48,1.63,3.3,.88,2.42,488 109 | 2,12.22,1.29,1.94,19,92,2.36,2.04,.39,2.08,2.7,.86,3.02,312 110 | 2,11.61,1.35,2.7,20,94,2.74,2.92,.29,2.49,2.65,.96,3.26,680 111 | 2,11.46,3.74,1.82,19.5,107,3.18,2.58,.24,3.58,2.9,.75,2.81,562 112 | 2,12.52,2.43,2.17,21,88,2.55,2.27,.26,1.22,2,.9,2.78,325 113 | 2,11.76,2.68,2.92,20,103,1.75,2.03,.6,1.05,3.8,1.23,2.5,607 114 | 2,11.41,.74,2.5,21,88,2.48,2.01,.42,1.44,3.08,1.1,2.31,434 115 | 2,12.08,1.39,2.5,22.5,84,2.56,2.29,.43,1.04,2.9,.93,3.19,385 116 | 2,11.03,1.51,2.2,21.5,85,2.46,2.17,.52,2.01,1.9,1.71,2.87,407 117 | 2,11.82,1.47,1.99,20.8,86,1.98,1.6,.3,1.53,1.95,.95,3.33,495 118 | 2,12.42,1.61,2.19,22.5,108,2,2.09,.34,1.61,2.06,1.06,2.96,345 119 | 2,12.77,3.43,1.98,16,80,1.63,1.25,.43,.83,3.4,.7,2.12,372 120 | 2,12,3.43,2,19,87,2,1.64,.37,1.87,1.28,.93,3.05,564 121 | 2,11.45,2.4,2.42,20,96,2.9,2.79,.32,1.83,3.25,.8,3.39,625 122 | 2,11.56,2.05,3.23,28.5,119,3.18,5.08,.47,1.87,6,.93,3.69,465 123 | 2,12.42,4.43,2.73,26.5,102,2.2,2.13,.43,1.71,2.08,.92,3.12,365 124 | 2,13.05,5.8,2.13,21.5,86,2.62,2.65,.3,2.01,2.6,.73,3.1,380 125 | 2,11.87,4.31,2.39,21,82,2.86,3.03,.21,2.91,2.8,.75,3.64,380 126 | 2,12.07,2.16,2.17,21,85,2.6,2.65,.37,1.35,2.76,.86,3.28,378 127 | 2,12.43,1.53,2.29,21.5,86,2.74,3.15,.39,1.77,3.94,.69,2.84,352 128 | 2,11.79,2.13,2.78,28.5,92,2.13,2.24,.58,1.76,3,.97,2.44,466 129 | 2,12.37,1.63,2.3,24.5,88,2.22,2.45,.4,1.9,2.12,.89,2.78,342 130 | 2,12.04,4.3,2.38,22,80,2.1,1.75,.42,1.35,2.6,.79,2.57,580 131 | 3,12.86,1.35,2.32,18,122,1.51,1.25,.21,.94,4.1,.76,1.29,630 132 | 3,12.88,2.99,2.4,20,104,1.3,1.22,.24,.83,5.4,.74,1.42,530 133 | 3,12.81,2.31,2.4,24,98,1.15,1.09,.27,.83,5.7,.66,1.36,560 134 | 3,12.7,3.55,2.36,21.5,106,1.7,1.2,.17,.84,5,.78,1.29,600 135 | 3,12.51,1.24,2.25,17.5,85,2,.58,.6,1.25,5.45,.75,1.51,650 136 | 3,12.6,2.46,2.2,18.5,94,1.62,.66,.63,.94,7.1,.73,1.58,695 137 | 3,12.25,4.72,2.54,21,89,1.38,.47,.53,.8,3.85,.75,1.27,720 138 | 3,12.53,5.51,2.64,25,96,1.79,.6,.63,1.1,5,.82,1.69,515 139 | 3,13.49,3.59,2.19,19.5,88,1.62,.48,.58,.88,5.7,.81,1.82,580 140 | 3,12.84,2.96,2.61,24,101,2.32,.6,.53,.81,4.92,.89,2.15,590 141 | 3,12.93,2.81,2.7,21,96,1.54,.5,.53,.75,4.6,.77,2.31,600 142 | 3,13.36,2.56,2.35,20,89,1.4,.5,.37,.64,5.6,.7,2.47,780 143 | 3,13.52,3.17,2.72,23.5,97,1.55,.52,.5,.55,4.35,.89,2.06,520 144 | 3,13.62,4.95,2.35,20,92,2,.8,.47,1.02,4.4,.91,2.05,550 145 | 3,12.25,3.88,2.2,18.5,112,1.38,.78,.29,1.14,8.21,.65,2,855 146 | 3,13.16,3.57,2.15,21,102,1.5,.55,.43,1.3,4,.6,1.68,830 147 | 3,13.88,5.04,2.23,20,80,.98,.34,.4,.68,4.9,.58,1.33,415 148 | 3,12.87,4.61,2.48,21.5,86,1.7,.65,.47,.86,7.65,.54,1.86,625 149 | 3,13.32,3.24,2.38,21.5,92,1.93,.76,.45,1.25,8.42,.55,1.62,650 150 | 3,13.08,3.9,2.36,21.5,113,1.41,1.39,.34,1.14,9.40,.57,1.33,550 151 | 3,13.5,3.12,2.62,24,123,1.4,1.57,.22,1.25,8.60,.59,1.3,500 152 | 3,12.79,2.67,2.48,22,112,1.48,1.36,.24,1.26,10.8,.48,1.47,480 153 | 3,13.11,1.9,2.75,25.5,116,2.2,1.28,.26,1.56,7.1,.61,1.33,425 154 | 3,13.23,3.3,2.28,18.5,98,1.8,.83,.61,1.87,10.52,.56,1.51,675 155 | 3,12.58,1.29,2.1,20,103,1.48,.58,.53,1.4,7.6,.58,1.55,640 156 | 3,13.17,5.19,2.32,22,93,1.74,.63,.61,1.55,7.9,.6,1.48,725 157 | 3,13.84,4.12,2.38,19.5,89,1.8,.83,.48,1.56,9.01,.57,1.64,480 158 | 3,12.45,3.03,2.64,27,97,1.9,.58,.63,1.14,7.5,.67,1.73,880 159 | 3,14.34,1.68,2.7,25,98,2.8,1.31,.53,2.7,13,.57,1.96,660 160 | 3,13.48,1.67,2.64,22.5,89,2.6,1.1,.52,2.29,11.75,.57,1.78,620 161 | 3,12.36,3.83,2.38,21,88,2.3,.92,.5,1.04,7.65,.56,1.58,520 162 | 3,13.69,3.26,2.54,20,107,1.83,.56,.5,.8,5.88,.96,1.82,680 163 | 3,12.85,3.27,2.58,22,106,1.65,.6,.6,.96,5.58,.87,2.11,570 164 | 3,12.96,3.45,2.35,18.5,106,1.39,.7,.4,.94,5.28,.68,1.75,675 165 | 3,13.78,2.76,2.3,22,90,1.35,.68,.41,1.03,9.58,.7,1.68,615 166 | 3,13.73,4.36,2.26,22.5,88,1.28,.47,.52,1.15,6.62,.78,1.75,520 167 | 3,13.45,3.7,2.6,23,111,1.7,.92,.43,1.46,10.68,.85,1.56,695 168 | 3,12.82,3.37,2.3,19.5,88,1.48,.66,.4,.97,10.26,.72,1.75,685 169 | 3,13.58,2.58,2.69,24.5,105,1.55,.84,.39,1.54,8.66,.74,1.8,750 170 | 3,13.4,4.6,2.86,25,112,1.98,.96,.27,1.11,8.5,.67,1.92,630 171 | 3,12.2,3.03,2.32,19,96,1.25,.49,.4,.73,5.5,.66,1.83,510 172 | 3,12.77,2.39,2.28,19.5,86,1.39,.51,.48,.64,9.899999,.57,1.63,470 173 | 3,14.16,2.51,2.48,20,91,1.68,.7,.44,1.24,9.7,.62,1.71,660 174 | 3,13.71,5.65,2.45,20.5,95,1.68,.61,.52,1.06,7.7,.64,1.74,740 175 | 3,13.4,3.91,2.48,23,102,1.8,.75,.43,1.41,7.3,.7,1.56,750 176 | 3,13.27,4.28,2.26,20,120,1.59,.69,.43,1.35,10.2,.59,1.56,835 177 | 3,13.17,2.59,2.37,20,120,1.65,.68,.53,1.46,9.3,.6,1.62,840 178 | 3,14.13,4.1,2.74,24.5,96,2.05,.76,.56,1.35,9.2,.61,1.6,560 179 | -------------------------------------------------------------------------------- /report-template/ieee.bst: -------------------------------------------------------------------------------- 1 | 2 | % --------------------------------------------------------------- 3 | % 4 | % ieee.bst,v 1.0 2002/04/16 5 | % 6 | % by Glenn Paulley (paulley@acm.org) 7 | % 8 | % Modified from latex8.bst 1995/09/15 15:13:49 ienne Exp $ 9 | % 10 | % by Paolo.Ienne@di.epfl.ch 11 | % 12 | % 13 | % --------------------------------------------------------------- 14 | % 15 | % no guarantee is given that the format corresponds perfectly to 16 | % IEEE 8.5" x 11" Proceedings, but most features should be ok. 17 | % 18 | % --------------------------------------------------------------- 19 | % 20 | % `ieee' from BibTeX standard bibliography style `abbrv' 21 | % version 0.99a for BibTeX versions 0.99a or later, LaTeX version 2.09. 22 | % Copyright (C) 1985, all rights reserved. 23 | % Copying of this file is authorized only if either 24 | % (1) you make absolutely no changes to your copy, including name, or 25 | % (2) if you do make changes, you name it something other than 26 | % btxbst.doc, plain.bst, unsrt.bst, alpha.bst, and abbrv.bst. 27 | % This restriction helps ensure that all standard styles are identical. 28 | % The file btxbst.doc has the documentation for this style. 29 | 30 | ENTRY 31 | { address 32 | author 33 | booktitle 34 | chapter 35 | edition 36 | editor 37 | howpublished 38 | institution 39 | journal 40 | key 41 | month 42 | note 43 | number 44 | organization 45 | pages 46 | publisher 47 | school 48 | series 49 | title 50 | type 51 | volume 52 | year 53 | } 54 | {} 55 | { label } 56 | 57 | INTEGERS { output.state before.all mid.sentence after.sentence after.block } 58 | 59 | FUNCTION {init.state.consts} 60 | { #0 'before.all := 61 | #1 'mid.sentence := 62 | #2 'after.sentence := 63 | #3 'after.block := 64 | } 65 | 66 | STRINGS { s t } 67 | 68 | FUNCTION {output.nonnull} 69 | { 's := 70 | output.state mid.sentence = 71 | { ", " * write$ } 72 | { output.state after.block = 73 | { add.period$ write$ 74 | newline$ 75 | "\newblock " write$ 76 | } 77 | { output.state before.all = 78 | 'write$ 79 | { add.period$ " " * write$ } 80 | if$ 81 | } 82 | if$ 83 | mid.sentence 'output.state := 84 | } 85 | if$ 86 | s 87 | } 88 | 89 | FUNCTION {output} 90 | { duplicate$ empty$ 91 | 'pop$ 92 | 'output.nonnull 93 | if$ 94 | } 95 | 96 | FUNCTION {output.check} 97 | { 't := 98 | duplicate$ empty$ 99 | { pop$ "empty " t * " in " * cite$ * warning$ } 100 | 'output.nonnull 101 | if$ 102 | } 103 | 104 | FUNCTION {output.bibitem} 105 | { newline$ 106 | "\bibitem{" write$ 107 | cite$ write$ 108 | "}" write$ 109 | newline$ 110 | "" 111 | before.all 'output.state := 112 | } 113 | 114 | FUNCTION {fin.entry} 115 | { add.period$ 116 | write$ 117 | newline$ 118 | } 119 | 120 | FUNCTION {new.block} 121 | { output.state before.all = 122 | 'skip$ 123 | { after.block 'output.state := } 124 | if$ 125 | } 126 | 127 | FUNCTION {new.sentence} 128 | { output.state after.block = 129 | 'skip$ 130 | { output.state before.all = 131 | 'skip$ 132 | { after.sentence 'output.state := } 133 | if$ 134 | } 135 | if$ 136 | } 137 | 138 | FUNCTION {not} 139 | { { #0 } 140 | { #1 } 141 | if$ 142 | } 143 | 144 | FUNCTION {and} 145 | { 'skip$ 146 | { pop$ #0 } 147 | if$ 148 | } 149 | 150 | FUNCTION {or} 151 | { { pop$ #1 } 152 | 'skip$ 153 | if$ 154 | } 155 | 156 | FUNCTION {new.block.checka} 157 | { empty$ 158 | 'skip$ 159 | 'new.block 160 | if$ 161 | } 162 | 163 | FUNCTION {new.block.checkb} 164 | { empty$ 165 | swap$ empty$ 166 | and 167 | 'skip$ 168 | 'new.block 169 | if$ 170 | } 171 | 172 | FUNCTION {new.sentence.checka} 173 | { empty$ 174 | 'skip$ 175 | 'new.sentence 176 | if$ 177 | } 178 | 179 | FUNCTION {new.sentence.checkb} 180 | { empty$ 181 | swap$ empty$ 182 | and 183 | 'skip$ 184 | 'new.sentence 185 | if$ 186 | } 187 | 188 | FUNCTION {field.or.null} 189 | { duplicate$ empty$ 190 | { pop$ "" } 191 | 'skip$ 192 | if$ 193 | } 194 | 195 | FUNCTION {emphasize} 196 | { duplicate$ empty$ 197 | { pop$ "" } 198 | { "{\em " swap$ * "}" * } 199 | if$ 200 | } 201 | 202 | INTEGERS { nameptr namesleft numnames } 203 | 204 | FUNCTION {format.names} 205 | { 's := 206 | #1 'nameptr := 207 | s num.names$ 'numnames := 208 | numnames 'namesleft := 209 | { namesleft #0 > } 210 | { s nameptr "{f.~}{vv~}{ll}{, jj}" format.name$ 't := 211 | nameptr #1 > 212 | { namesleft #1 > 213 | { ", " * t * } 214 | { numnames #2 > 215 | { "," * } 216 | 'skip$ 217 | if$ 218 | t "others" = 219 | { " et~al." * } 220 | { " and " * t * } 221 | if$ 222 | } 223 | if$ 224 | } 225 | 't 226 | if$ 227 | nameptr #1 + 'nameptr := 228 | 229 | namesleft #1 - 'namesleft := 230 | } 231 | while$ 232 | } 233 | 234 | FUNCTION {format.authors} 235 | { author empty$ 236 | { "" } 237 | { author format.names } 238 | if$ 239 | } 240 | 241 | FUNCTION {format.editors} 242 | { editor empty$ 243 | { "" } 244 | { editor format.names 245 | editor num.names$ #1 > 246 | { ", editors" * } 247 | { ", editor" * } 248 | if$ 249 | } 250 | if$ 251 | } 252 | 253 | FUNCTION {format.title} 254 | { title empty$ 255 | { "" } 256 | { title "t" change.case$ } 257 | if$ 258 | } 259 | 260 | FUNCTION {n.dashify} 261 | { 't := 262 | "" 263 | { t empty$ not } 264 | { t #1 #1 substring$ "-" = 265 | { t #1 #2 substring$ "--" = not 266 | { "--" * 267 | t #2 global.max$ substring$ 't := 268 | } 269 | { { t #1 #1 substring$ "-" = } 270 | { "-" * 271 | t #2 global.max$ substring$ 't := 272 | } 273 | while$ 274 | } 275 | if$ 276 | } 277 | { t #1 #1 substring$ * 278 | t #2 global.max$ substring$ 't := 279 | } 280 | if$ 281 | } 282 | while$ 283 | } 284 | 285 | FUNCTION {format.date} 286 | { year empty$ 287 | { month empty$ 288 | { "" } 289 | { "there's a month but no year in " cite$ * warning$ 290 | month 291 | } 292 | if$ 293 | } 294 | { month empty$ 295 | 'year 296 | { month " " * year * } 297 | if$ 298 | } 299 | if$ 300 | } 301 | 302 | FUNCTION {format.btitle} 303 | { title emphasize 304 | } 305 | 306 | FUNCTION {tie.or.space.connect} 307 | { duplicate$ text.length$ #3 < 308 | { "~" } 309 | { " " } 310 | if$ 311 | swap$ * * 312 | } 313 | 314 | FUNCTION {either.or.check} 315 | { empty$ 316 | 'pop$ 317 | { "can't use both " swap$ * " fields in " * cite$ * warning$ } 318 | if$ 319 | } 320 | 321 | FUNCTION {format.bvolume} 322 | { volume empty$ 323 | { "" } 324 | { "volume" volume tie.or.space.connect 325 | series empty$ 326 | 'skip$ 327 | { " of " * series emphasize * } 328 | if$ 329 | "volume and number" number either.or.check 330 | } 331 | if$ 332 | } 333 | 334 | FUNCTION {format.number.series} 335 | { volume empty$ 336 | { number empty$ 337 | { series field.or.null } 338 | { output.state mid.sentence = 339 | { "number" } 340 | { "Number" } 341 | if$ 342 | number tie.or.space.connect 343 | series empty$ 344 | { "there's a number but no series in " cite$ * warning$ } 345 | { " in " * series * } 346 | if$ 347 | } 348 | if$ 349 | } 350 | { "" } 351 | if$ 352 | } 353 | 354 | FUNCTION {format.edition} 355 | { edition empty$ 356 | { "" } 357 | { output.state mid.sentence = 358 | { edition "l" change.case$ " edition" * } 359 | { edition "t" change.case$ " edition" * } 360 | if$ 361 | } 362 | if$ 363 | } 364 | 365 | INTEGERS { multiresult } 366 | 367 | FUNCTION {multi.page.check} 368 | { 't := 369 | #0 'multiresult := 370 | { multiresult not 371 | t empty$ not 372 | and 373 | } 374 | { t #1 #1 substring$ 375 | duplicate$ "-" = 376 | swap$ duplicate$ "," = 377 | swap$ "+" = 378 | or or 379 | { #1 'multiresult := } 380 | { t #2 global.max$ substring$ 't := } 381 | if$ 382 | } 383 | while$ 384 | multiresult 385 | } 386 | 387 | FUNCTION {format.pages} 388 | { pages empty$ 389 | { "" } 390 | { pages multi.page.check 391 | { "pages" pages n.dashify tie.or.space.connect } 392 | { "page" pages tie.or.space.connect } 393 | if$ 394 | } 395 | if$ 396 | } 397 | 398 | FUNCTION {format.vol.num.pages} 399 | { volume field.or.null 400 | number empty$ 401 | 'skip$ 402 | { "(" number * ")" * * 403 | volume empty$ 404 | { "there's a number but no volume in " cite$ * warning$ } 405 | 'skip$ 406 | if$ 407 | } 408 | if$ 409 | pages empty$ 410 | 'skip$ 411 | { duplicate$ empty$ 412 | { pop$ format.pages } 413 | { ":" * pages n.dashify * } 414 | if$ 415 | } 416 | if$ 417 | } 418 | 419 | FUNCTION {format.chapter.pages} 420 | { chapter empty$ 421 | 'format.pages 422 | { type empty$ 423 | { "chapter" } 424 | { type "l" change.case$ } 425 | if$ 426 | chapter tie.or.space.connect 427 | pages empty$ 428 | 'skip$ 429 | { ", " * format.pages * } 430 | if$ 431 | } 432 | if$ 433 | } 434 | 435 | FUNCTION {format.in.ed.booktitle} 436 | { booktitle empty$ 437 | { "" } 438 | { editor empty$ 439 | { "In " booktitle emphasize * } 440 | { "In " format.editors * ", " * booktitle emphasize * } 441 | if$ 442 | } 443 | if$ 444 | } 445 | 446 | FUNCTION {empty.misc.check} 447 | 448 | { author empty$ title empty$ howpublished empty$ 449 | month empty$ year empty$ note empty$ 450 | and and and and and 451 | key empty$ not and 452 | { "all relevant fields are empty in " cite$ * warning$ } 453 | 'skip$ 454 | if$ 455 | } 456 | 457 | FUNCTION {format.thesis.type} 458 | { type empty$ 459 | 'skip$ 460 | { pop$ 461 | type "t" change.case$ 462 | } 463 | if$ 464 | } 465 | 466 | FUNCTION {format.tr.number} 467 | { type empty$ 468 | { "Technical Report" } 469 | 'type 470 | if$ 471 | number empty$ 472 | { "t" change.case$ } 473 | { number tie.or.space.connect } 474 | if$ 475 | } 476 | 477 | FUNCTION {format.article.crossref} 478 | { key empty$ 479 | { journal empty$ 480 | { "need key or journal for " cite$ * " to crossref " * crossref * 481 | warning$ 482 | "" 483 | } 484 | { "In {\em " journal * "\/}" * } 485 | if$ 486 | } 487 | { "In " key * } 488 | if$ 489 | " \cite{" * crossref * "}" * 490 | } 491 | 492 | FUNCTION {format.crossref.editor} 493 | { editor #1 "{vv~}{ll}" format.name$ 494 | editor num.names$ duplicate$ 495 | #2 > 496 | { pop$ " et~al." * } 497 | { #2 < 498 | 'skip$ 499 | { editor #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" = 500 | { " et~al." * } 501 | { " and " * editor #2 "{vv~}{ll}" format.name$ * } 502 | if$ 503 | } 504 | if$ 505 | } 506 | if$ 507 | } 508 | 509 | FUNCTION {format.book.crossref} 510 | { volume empty$ 511 | { "empty volume in " cite$ * "'s crossref of " * crossref * warning$ 512 | "In " 513 | } 514 | { "Volume" volume tie.or.space.connect 515 | " of " * 516 | } 517 | if$ 518 | editor empty$ 519 | editor field.or.null author field.or.null = 520 | or 521 | { key empty$ 522 | { series empty$ 523 | { "need editor, key, or series for " cite$ * " to crossref " * 524 | crossref * warning$ 525 | "" * 526 | } 527 | { "{\em " * series * "\/}" * } 528 | if$ 529 | } 530 | { key * } 531 | if$ 532 | } 533 | { format.crossref.editor * } 534 | if$ 535 | " \cite{" * crossref * "}" * 536 | } 537 | 538 | FUNCTION {format.incoll.inproc.crossref} 539 | { editor empty$ 540 | editor field.or.null author field.or.null = 541 | or 542 | { key empty$ 543 | { booktitle empty$ 544 | { "need editor, key, or booktitle for " cite$ * " to crossref " * 545 | crossref * warning$ 546 | "" 547 | } 548 | { "In {\em " booktitle * "\/}" * } 549 | if$ 550 | } 551 | { "In " key * } 552 | if$ 553 | } 554 | { "In " format.crossref.editor * } 555 | if$ 556 | " \cite{" * crossref * "}" * 557 | } 558 | 559 | FUNCTION {article} 560 | { output.bibitem 561 | format.authors "author" output.check 562 | new.block 563 | format.title "title" output.check 564 | new.block 565 | crossref missing$ 566 | { journal emphasize "journal" output.check 567 | format.vol.num.pages output 568 | format.date "year" output.check 569 | } 570 | { format.article.crossref output.nonnull 571 | format.pages output 572 | } 573 | if$ 574 | new.block 575 | note output 576 | fin.entry 577 | } 578 | 579 | FUNCTION {book} 580 | { output.bibitem 581 | author empty$ 582 | { format.editors "author and editor" output.check } 583 | { format.authors output.nonnull 584 | crossref missing$ 585 | { "author and editor" editor either.or.check } 586 | 'skip$ 587 | if$ 588 | } 589 | if$ 590 | new.block 591 | format.btitle "title" output.check 592 | crossref missing$ 593 | { format.bvolume output 594 | new.block 595 | format.number.series output 596 | new.sentence 597 | publisher "publisher" output.check 598 | address output 599 | } 600 | { new.block 601 | format.book.crossref output.nonnull 602 | } 603 | if$ 604 | format.edition output 605 | format.date "year" output.check 606 | new.block 607 | note output 608 | fin.entry 609 | } 610 | 611 | FUNCTION {booklet} 612 | { output.bibitem 613 | format.authors output 614 | new.block 615 | format.title "title" output.check 616 | howpublished address new.block.checkb 617 | howpublished output 618 | address output 619 | format.date output 620 | new.block 621 | note output 622 | fin.entry 623 | } 624 | 625 | FUNCTION {inbook} 626 | { output.bibitem 627 | author empty$ 628 | { format.editors "author and editor" output.check } 629 | { format.authors output.nonnull 630 | 631 | crossref missing$ 632 | { "author and editor" editor either.or.check } 633 | 'skip$ 634 | if$ 635 | } 636 | if$ 637 | new.block 638 | format.btitle "title" output.check 639 | crossref missing$ 640 | { format.bvolume output 641 | format.chapter.pages "chapter and pages" output.check 642 | new.block 643 | format.number.series output 644 | new.sentence 645 | publisher "publisher" output.check 646 | address output 647 | } 648 | { format.chapter.pages "chapter and pages" output.check 649 | new.block 650 | format.book.crossref output.nonnull 651 | } 652 | if$ 653 | format.edition output 654 | format.date "year" output.check 655 | new.block 656 | note output 657 | fin.entry 658 | } 659 | 660 | FUNCTION {incollection} 661 | { output.bibitem 662 | format.authors "author" output.check 663 | new.block 664 | format.title "title" output.check 665 | new.block 666 | crossref missing$ 667 | { format.in.ed.booktitle "booktitle" output.check 668 | format.bvolume output 669 | format.number.series output 670 | format.chapter.pages output 671 | new.sentence 672 | publisher "publisher" output.check 673 | address output 674 | format.edition output 675 | format.date "year" output.check 676 | } 677 | { format.incoll.inproc.crossref output.nonnull 678 | format.chapter.pages output 679 | } 680 | if$ 681 | new.block 682 | note output 683 | fin.entry 684 | } 685 | 686 | FUNCTION {inproceedings} 687 | { output.bibitem 688 | format.authors "author" output.check 689 | new.block 690 | format.title "title" output.check 691 | new.block 692 | crossref missing$ 693 | { format.in.ed.booktitle "booktitle" output.check 694 | format.bvolume output 695 | format.number.series output 696 | format.pages output 697 | address empty$ 698 | { organization publisher new.sentence.checkb 699 | organization output 700 | publisher output 701 | format.date "year" output.check 702 | } 703 | { address output.nonnull 704 | format.date "year" output.check 705 | new.sentence 706 | organization output 707 | publisher output 708 | } 709 | if$ 710 | } 711 | { format.incoll.inproc.crossref output.nonnull 712 | format.pages output 713 | } 714 | if$ 715 | new.block 716 | note output 717 | fin.entry 718 | } 719 | 720 | FUNCTION {conference} { inproceedings } 721 | 722 | FUNCTION {manual} 723 | { output.bibitem 724 | author empty$ 725 | { organization empty$ 726 | 'skip$ 727 | { organization output.nonnull 728 | address output 729 | } 730 | if$ 731 | } 732 | { format.authors output.nonnull } 733 | if$ 734 | new.block 735 | format.btitle "title" output.check 736 | author empty$ 737 | { organization empty$ 738 | { address new.block.checka 739 | address output 740 | } 741 | 'skip$ 742 | if$ 743 | } 744 | { organization address new.block.checkb 745 | organization output 746 | address output 747 | } 748 | if$ 749 | format.edition output 750 | format.date output 751 | new.block 752 | note output 753 | fin.entry 754 | } 755 | 756 | FUNCTION {mastersthesis} 757 | { output.bibitem 758 | format.authors "author" output.check 759 | new.block 760 | format.title "title" output.check 761 | new.block 762 | "Master's thesis" format.thesis.type output.nonnull 763 | school "school" output.check 764 | address output 765 | format.date "year" output.check 766 | new.block 767 | note output 768 | fin.entry 769 | } 770 | 771 | FUNCTION {misc} 772 | { output.bibitem 773 | format.authors output 774 | title howpublished new.block.checkb 775 | format.title output 776 | howpublished new.block.checka 777 | howpublished output 778 | format.date output 779 | new.block 780 | note output 781 | fin.entry 782 | empty.misc.check 783 | } 784 | 785 | FUNCTION {phdthesis} 786 | { output.bibitem 787 | format.authors "author" output.check 788 | new.block 789 | format.btitle "title" output.check 790 | new.block 791 | "PhD thesis" format.thesis.type output.nonnull 792 | school "school" output.check 793 | address output 794 | format.date "year" output.check 795 | new.block 796 | note output 797 | fin.entry 798 | } 799 | 800 | FUNCTION {proceedings} 801 | { output.bibitem 802 | editor empty$ 803 | { organization output } 804 | { format.editors output.nonnull } 805 | 806 | if$ 807 | new.block 808 | format.btitle "title" output.check 809 | format.bvolume output 810 | format.number.series output 811 | address empty$ 812 | { editor empty$ 813 | { publisher new.sentence.checka } 814 | { organization publisher new.sentence.checkb 815 | organization output 816 | } 817 | if$ 818 | publisher output 819 | format.date "year" output.check 820 | } 821 | { address output.nonnull 822 | format.date "year" output.check 823 | new.sentence 824 | editor empty$ 825 | 'skip$ 826 | { organization output } 827 | if$ 828 | publisher output 829 | } 830 | if$ 831 | new.block 832 | note output 833 | fin.entry 834 | } 835 | 836 | FUNCTION {techreport} 837 | { output.bibitem 838 | format.authors "author" output.check 839 | new.block 840 | format.title "title" output.check 841 | new.block 842 | format.tr.number output.nonnull 843 | institution "institution" output.check 844 | address output 845 | format.date "year" output.check 846 | new.block 847 | note output 848 | fin.entry 849 | } 850 | 851 | FUNCTION {unpublished} 852 | { output.bibitem 853 | format.authors "author" output.check 854 | new.block 855 | format.title "title" output.check 856 | new.block 857 | note "note" output.check 858 | format.date output 859 | fin.entry 860 | } 861 | 862 | FUNCTION {default.type} { misc } 863 | 864 | MACRO {jan} {"Jan."} 865 | 866 | MACRO {feb} {"Feb."} 867 | 868 | MACRO {mar} {"Mar."} 869 | 870 | MACRO {apr} {"Apr."} 871 | 872 | MACRO {may} {"May"} 873 | 874 | MACRO {jun} {"June"} 875 | 876 | MACRO {jul} {"July"} 877 | 878 | MACRO {aug} {"Aug."} 879 | 880 | MACRO {sep} {"Sept."} 881 | 882 | MACRO {oct} {"Oct."} 883 | 884 | MACRO {nov} {"Nov."} 885 | 886 | MACRO {dec} {"Dec."} 887 | 888 | MACRO {acmcs} {"ACM Comput. Surv."} 889 | 890 | MACRO {acta} {"Acta Inf."} 891 | 892 | MACRO {cacm} {"Commun. ACM"} 893 | 894 | MACRO {ibmjrd} {"IBM J. Res. Dev."} 895 | 896 | MACRO {ibmsj} {"IBM Syst.~J."} 897 | 898 | MACRO {ieeese} {"IEEE Trans. Softw. Eng."} 899 | 900 | MACRO {ieeetc} {"IEEE Trans. Comput."} 901 | 902 | MACRO {ieeetcad} 903 | {"IEEE Trans. Comput.-Aided Design Integrated Circuits"} 904 | 905 | MACRO {ipl} {"Inf. Process. Lett."} 906 | 907 | MACRO {jacm} {"J.~ACM"} 908 | 909 | MACRO {jcss} {"J.~Comput. Syst. Sci."} 910 | 911 | MACRO {scp} {"Sci. Comput. Programming"} 912 | 913 | MACRO {sicomp} {"SIAM J. Comput."} 914 | 915 | MACRO {tocs} {"ACM Trans. Comput. Syst."} 916 | 917 | MACRO {tods} {"ACM Trans. Database Syst."} 918 | 919 | MACRO {tog} {"ACM Trans. Gr."} 920 | 921 | MACRO {toms} {"ACM Trans. Math. Softw."} 922 | 923 | MACRO {toois} {"ACM Trans. Office Inf. Syst."} 924 | 925 | MACRO {toplas} {"ACM Trans. Prog. Lang. Syst."} 926 | 927 | MACRO {tcs} {"Theoretical Comput. Sci."} 928 | 929 | READ 930 | 931 | FUNCTION {sortify} 932 | { purify$ 933 | "l" change.case$ 934 | } 935 | 936 | INTEGERS { len } 937 | 938 | FUNCTION {chop.word} 939 | { 's := 940 | 'len := 941 | s #1 len substring$ = 942 | { s len #1 + global.max$ substring$ } 943 | 's 944 | if$ 945 | } 946 | 947 | FUNCTION {sort.format.names} 948 | { 's := 949 | #1 'nameptr := 950 | "" 951 | s num.names$ 'numnames := 952 | numnames 'namesleft := 953 | { namesleft #0 > } 954 | { nameptr #1 > 955 | { " " * } 956 | 'skip$ 957 | if$ 958 | s nameptr "{vv{ } }{ll{ }}{ f{ }}{ jj{ }}" format.name$ 't := 959 | nameptr numnames = t "others" = and 960 | { "et al" * } 961 | { t sortify * } 962 | if$ 963 | nameptr #1 + 'nameptr := 964 | namesleft #1 - 'namesleft := 965 | } 966 | while$ 967 | } 968 | 969 | FUNCTION {sort.format.title} 970 | { 't := 971 | "A " #2 972 | "An " #3 973 | "The " #4 t chop.word 974 | chop.word 975 | chop.word 976 | sortify 977 | #1 global.max$ substring$ 978 | } 979 | 980 | FUNCTION {author.sort} 981 | { author empty$ 982 | { key empty$ 983 | { "to sort, need author or key in " cite$ * warning$ 984 | "" 985 | } 986 | { key sortify } 987 | if$ 988 | } 989 | { author sort.format.names } 990 | if$ 991 | } 992 | 993 | FUNCTION {author.editor.sort} 994 | { author empty$ 995 | { editor empty$ 996 | { key empty$ 997 | { "to sort, need author, editor, or key in " cite$ * warning$ 998 | "" 999 | } 1000 | { key sortify } 1001 | if$ 1002 | } 1003 | { editor sort.format.names } 1004 | if$ 1005 | } 1006 | { author sort.format.names } 1007 | if$ 1008 | } 1009 | 1010 | FUNCTION {author.organization.sort} 1011 | { author empty$ 1012 | 1013 | { organization empty$ 1014 | { key empty$ 1015 | { "to sort, need author, organization, or key in " cite$ * warning$ 1016 | "" 1017 | } 1018 | { key sortify } 1019 | if$ 1020 | } 1021 | { "The " #4 organization chop.word sortify } 1022 | if$ 1023 | } 1024 | { author sort.format.names } 1025 | if$ 1026 | } 1027 | 1028 | FUNCTION {editor.organization.sort} 1029 | { editor empty$ 1030 | { organization empty$ 1031 | { key empty$ 1032 | { "to sort, need editor, organization, or key in " cite$ * warning$ 1033 | "" 1034 | } 1035 | { key sortify } 1036 | if$ 1037 | } 1038 | { "The " #4 organization chop.word sortify } 1039 | if$ 1040 | } 1041 | { editor sort.format.names } 1042 | if$ 1043 | } 1044 | 1045 | FUNCTION {presort} 1046 | { type$ "book" = 1047 | type$ "inbook" = 1048 | or 1049 | 'author.editor.sort 1050 | { type$ "proceedings" = 1051 | 'editor.organization.sort 1052 | { type$ "manual" = 1053 | 'author.organization.sort 1054 | 'author.sort 1055 | if$ 1056 | } 1057 | if$ 1058 | } 1059 | if$ 1060 | " " 1061 | * 1062 | year field.or.null sortify 1063 | * 1064 | " " 1065 | * 1066 | title field.or.null 1067 | sort.format.title 1068 | * 1069 | #1 entry.max$ substring$ 1070 | 'sort.key$ := 1071 | } 1072 | 1073 | ITERATE {presort} 1074 | 1075 | SORT 1076 | 1077 | STRINGS { longest.label } 1078 | 1079 | INTEGERS { number.label longest.label.width } 1080 | 1081 | FUNCTION {initialize.longest.label} 1082 | { "" 'longest.label := 1083 | #1 'number.label := 1084 | #0 'longest.label.width := 1085 | } 1086 | 1087 | FUNCTION {longest.label.pass} 1088 | { number.label int.to.str$ 'label := 1089 | number.label #1 + 'number.label := 1090 | label width$ longest.label.width > 1091 | { label 'longest.label := 1092 | label width$ 'longest.label.width := 1093 | } 1094 | 'skip$ 1095 | if$ 1096 | } 1097 | 1098 | EXECUTE {initialize.longest.label} 1099 | 1100 | ITERATE {longest.label.pass} 1101 | 1102 | FUNCTION {begin.bib} 1103 | { preamble$ empty$ 1104 | 'skip$ 1105 | { preamble$ write$ newline$ } 1106 | if$ 1107 | "\begin{thebibliography}{" longest.label * "}" * 1108 | "\itemsep=-1pt" * % Compact the entries a little. 1109 | write$ newline$ 1110 | } 1111 | 1112 | EXECUTE {begin.bib} 1113 | 1114 | EXECUTE {init.state.consts} 1115 | 1116 | ITERATE {call.type$} 1117 | 1118 | FUNCTION {end.bib} 1119 | { newline$ 1120 | "\end{thebibliography}" write$ newline$ 1121 | } 1122 | 1123 | EXECUTE {end.bib} 1124 | 1125 | % end of file ieee.bst 1126 | % --------------------------------------------------------------- 1127 | 1128 | 1129 | 1130 | -------------------------------------------------------------------------------- /hw_02/hw02.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Problem Set 2" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "STAT 479: Machine Learning (Fall 2018) \n", 15 | "Instructor: Sebastian Raschka (sraschka@wisc.edu) \n", 16 | "Course website: http://pages.stat.wisc.edu/~sraschka/teaching/stat479-fs2018/\n", 17 | "\n", 18 | "**Due**: Nov 08, before class (before 8:00 am).\n", 19 | "\n", 20 | "**How to submit**\n", 21 | "\n", 22 | "As mentioned in the lecture, you need to submit the `.ipynb` file with your answers plus an `.html` file, which will serve as a backup for us in case the `.ipynb` file cannot be opened on my or the TA's computer. In addition, you may also export the notebook as PDF and upload it as well.\n", 23 | "\n", 24 | "This time, we will be using the Canvas platform, so you need to submit your homework there. You should be able to resubmit the homework as many times as you like before the due date." 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "**You are highly encouraged to use Piazza to ask questions and help each other while working on the homework. However, do not share any solutions with other students as this would be a violation of the Academic Integrity guidelines (for more info, see http://pages.stat.wisc.edu/~sraschka/teaching/stat479-fs2018/#other-important-course-information)**\n", 32 | "\n", 33 | "\n", 34 | "For example, a resonable question & answer would be:\n", 35 | "\n", 36 | "- Q: When I am asked to implement the code for majority voting, my code produces an array that has the wrong dimensions (I get the following dimensions ...). \n", 37 | "- A: Hm, I suspect you compute the `argmax` over rows, not columns. Maybe check that you specify the correct dimension for the `axis` parameter in the `argmax` function.\n", 38 | "\n", 39 | "Not ok would be:\n", 40 | "\n", 41 | "- Q: Here is my code and solution for exercise XXX. Is this correct? " 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 1, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "name": "stdout", 51 | "output_type": "stream", 52 | "text": [ 53 | " \n", 54 | "last updated: 2018-10-21 \n", 55 | "\n", 56 | "CPython 3.6.6\n", 57 | "IPython 6.5.0\n", 58 | "\n", 59 | "numpy 1.15.1\n", 60 | "scipy 1.1.0\n", 61 | "matplotlib 2.2.3\n", 62 | "sklearn 0.20.0\n" 63 | ] 64 | } 65 | ], 66 | "source": [ 67 | "%load_ext watermark\n", 68 | "%watermark -d -u -a '' -v -p numpy,scipy,matplotlib,sklearn" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 2, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "import numpy as np" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "
\n", 85 | "
\n", 86 | "
\n", 87 | "
\n", 88 | "
\n", 89 | "
" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "## 1) Implementing an ID3 Decision Tree" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "In this first part of the homework, you are going to implement the ID3 decision tree algorithm we discussed in class. This decision tree algorithm will support multi-category splits, but just like the original ID3 algorithm, it will only support categorical feature values for simplicity. Here, categorical feature values will be represented by integer numbers. \n", 104 | "\n", 105 | "\n", 106 | "Implementing machine learning algorithms from scratch is a very important skill, and this homework will provide exercises that will help you to develop this skill. Even if you are interested in the more theoretical aspects of machine learning, being comfortable with implementing and trying out algorithms is vital for doing research, since even the more theoretical papers in machine learning are usually accompanied by experiments or simulations to a) verify results and b) to compare algorithms with the state-of-the art.\n", 107 | "\n", 108 | "Since many students are not expert Python programmers (yet), I will provide partial solutions to the homework tasks such that you have a framework or guide to implement the solutions. Areas that you need to fill in will be marked with comments (e.g., `# your code`). For these partial solutions, I first implemented the functions myself, and then I deleted parts you need to fill in by these comments. However, note that you can, of course, use more or fewer lines of code than I did. In other words, all that matter is that the function you write can create the same outputs as the ones I provide. How many lines of code you need to implement that function, and how efficient it is, does not matter here. The expected outputs for the respective functions will be provided so that you can double-check your solutions. " 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "### 1.1) Splitting a node (10 pts)" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "First, we are going to implement a function that splits a dataset along a feature axis into sub-datasets. Since we are going to implement a decision tree that only supports categorical features (like ID3) for simplicity, you do not need to account for continuous feature variables. In other words, the splitting function only needs to support integer NumPy arrays. \n", 123 | "\n", 124 | "To provide an intuitive example, suppose you are given the following NumPy array with four feature values, feature values 0-3:\n", 125 | "\n", 126 | " np.array([0, 1, 2, 1, 0, 3, 1, 0, 1, 2])\n", 127 | " \n", 128 | "The function you are going to implement should return a dictionary, where each dictionary key represents a unique value in the array, and the values are the indices in that array that map to the respective feature value. Hence, based on the feature array above, your `split` function should return the following dictionary:\n", 129 | "\n", 130 | " {0: array([0, 4, 7]), \n", 131 | " 1: array([1, 3, 6, 8]), \n", 132 | " 2: array([2, 9]), \n", 133 | " 3: array([5])}" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [ 140 | "Tip: I recommend you to use `np.where` and `np.unique` functions to make the implementation easier. If you do not remember these functions from the \"computational foundations\" lectures, you can either look up those functions in the NumPy documentation online, or you can execute `np.where?` and `np.unique?` in a new code cell to get more information." 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [ 149 | "def split(array):\n", 150 | " # your code to generate dictionary\n", 151 | " return # return the dictionary variable" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "I added the following code cell for your convenience to double-check your solution. If your results don't match the results shown below, there is a bug in your implementation." 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 4, 164 | "metadata": {}, 165 | "outputs": [ 166 | { 167 | "name": "stdout", 168 | "output_type": "stream", 169 | "text": [ 170 | "{0: array([0]), 1: array([1]), 2: array([2])}\n", 171 | "{0: array([1, 3, 4, 6]), 1: array([0, 2, 5])}\n", 172 | "{0: array([1, 4]), 1: array([0, 5, 6]), 2: array([3]), 3: array([2])}\n" 173 | ] 174 | } 175 | ], 176 | "source": [ 177 | "# DO NOT EDIT OR DELETE THIS CELL\n", 178 | "\n", 179 | "print(split(np.array([0, 1, 2])))\n", 180 | "print(split(np.array([1, 0, 1, 0, 0, 1, 0])))\n", 181 | "print(split(np.array([1, 0, 3, 2, 0, 1, 1])))" 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "metadata": {}, 187 | "source": [ 188 | "### 1.2) Implement Entropy (10 pts)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "metadata": {}, 194 | "source": [ 195 | "After implementing the splitting function, we are now have to implement a criterion function so that we can compare splits on different features, to decide which feature is the best feature to split for growing the decision tree. As discussed in class, our splitting criterion will be Information Gain. However, before we implement an Information Gain function, we need to implement a function that computes the entropy at each node, which we need to compute Information Gain.\n", 196 | "\n", 197 | "For your reference, we defined entropy (i.e., Shannon Entropy) as follows:\n", 198 | "\n", 199 | "$$H(p) = \\sum_i p_i \\log_2 (1/p_i) = - \\sum_i p_i \\log_2 (p_i)$$\n", 200 | "\n", 201 | "where you can think of $p_i$ as the proportion of examples with class label $i$ at a given node." 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "metadata": {}, 208 | "outputs": [], 209 | "source": [ 210 | "def entropy(array):\n", 211 | " # your code\n", 212 | " # your code\n", 213 | " return # return a scalar" 214 | ] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "metadata": {}, 219 | "source": [ 220 | "I added the following code cell for your convenience to double-check your solution. If your results don't match the results shown below, there is a bug in your implementation of the `entropy` function." 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": 8, 226 | "metadata": {}, 227 | "outputs": [ 228 | { 229 | "name": "stdout", 230 | "output_type": "stream", 231 | "text": [ 232 | "1.0\n", 233 | "1.0\n", 234 | "0.0\n", 235 | "0.4395\n", 236 | "0.0\n", 237 | "1.6577\n" 238 | ] 239 | } 240 | ], 241 | "source": [ 242 | "# DO NOT EDIT OR DELETE THIS CELL\n", 243 | "\n", 244 | "print(round(entropy(np.array([0, 1, 0, 1, 1, 0])), 4))\n", 245 | "print(round(entropy(np.array([1, 2])), 4))\n", 246 | "print(round(entropy(np.array([1, 1])), 4))\n", 247 | "print(round(entropy(np.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])), 4))\n", 248 | "print(round(entropy(np.array([0, 0, 0])), 4))\n", 249 | "print(round(entropy(np.array([1, 1, 1, 0, 1, 4, 4, 2, 1])), 4))" 250 | ] 251 | }, 252 | { 253 | "cell_type": "markdown", 254 | "metadata": {}, 255 | "source": [ 256 | "### 1.3) Implement Information Gain (10 pts)" 257 | ] 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "metadata": {}, 262 | "source": [ 263 | "Now that you have a working solution for the `entropy` function, the next step is to compute the Information Gain. For your reference, information gain is computed as\n", 264 | "\n", 265 | "$$GAIN(\\mathcal{D}, x_j) = H(\\mathcal{D}) - \\sum_{v \\in Values(x_j)} \\frac{|\\mathcal{D}_v|}{|\\mathcal{D}|} H(\\mathcal{D}_v).$$" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": null, 271 | "metadata": {}, 272 | "outputs": [], 273 | "source": [ 274 | "def information_gain(x_array, y_array):\n", 275 | " parent_entropy = # your code\n", 276 | "\n", 277 | " split_dict = # your code\n", 278 | " \n", 279 | " for val in split_dict:\n", 280 | " freq = # your code\n", 281 | " child_entropy = # your code\n", 282 | " parent_entropy -= # your code\n", 283 | " \n", 284 | " return parent_entropy" 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": {}, 290 | "source": [ 291 | "I added the following code cell for your convenience to double-check your solution. If your results don't match the results shown below, there is a bug in your implementation of the `information_gain` function." 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": 11, 297 | "metadata": {}, 298 | "outputs": [ 299 | { 300 | "name": "stdout", 301 | "output_type": "stream", 302 | "text": [ 303 | "0.4591\n", 304 | "0.2516\n" 305 | ] 306 | } 307 | ], 308 | "source": [ 309 | "# DO NOT EDIT OR DELETE THIS CELL\n", 310 | "\n", 311 | "x = np.array([0, 1, 0, 1, 0, 1])\n", 312 | "y = np.array([0, 1, 0, 1, 1, 1])\n", 313 | "print(round(information_gain(x, y), 4))\n", 314 | "\n", 315 | "x = np.array([0, 0, 1, 1, 2, 2])\n", 316 | "y = np.array([0, 1, 0, 1, 1, 1])\n", 317 | "print(round(information_gain(x, y), 4))" 318 | ] 319 | }, 320 | { 321 | "cell_type": "markdown", 322 | "metadata": {}, 323 | "source": [ 324 | "(You may notice that these are actually the feature arrays from the midterm exam, Q 14.)" 325 | ] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "metadata": {}, 330 | "source": [ 331 | "### 1.4) Decision Tree Splitting (10 pts)" 332 | ] 333 | }, 334 | { 335 | "cell_type": "markdown", 336 | "metadata": {}, 337 | "source": [ 338 | "Now, we should have all the main components that we need for implementing the ID3 decision tree algorithm: a `split` function, an `entropy` function, and an `information_gain` function based on the `entropy` function. \n", 339 | "\n", 340 | "The next task is combine these functions to recursively split a dataset on its different features to construct a decision tree that separate the examples from different classes well. We will call this function `make_tree`. \n", 341 | "\n", 342 | "For simplicity, the decision tree returned by the `make_tree` function will be represented by a Python dictionary. To illustrate this, consider the following dataset:\n", 343 | "\n", 344 | "```\n", 345 | "Inputs:\n", 346 | " [[0 0]\n", 347 | " [0 1]\n", 348 | " [1 0]\n", 349 | " [1 1]\n", 350 | " [2 0]\n", 351 | " [2 1]]\n", 352 | "\n", 353 | "Labels:\n", 354 | " [0 1 0 1 1 1]\n", 355 | "```\n", 356 | " \n", 357 | "This is a dataset with 6 training examples and two features. (Again, this is an example from the midterm exam.) The decision tree in form of the Python dictionary should look like as follows:\n", 358 | "\n", 359 | "\n", 360 | "\n", 361 | "You should return a dictionary with the following form:\n", 362 | "\n", 363 | "```\n", 364 | "{'X_1 = 0': {'X_0 = 0': array([0]),\n", 365 | " 'X_0 = 1': array([0]),\n", 366 | " 'X_0 = 2': array([1])},\n", 367 | " 'X_1 = 1': array([1, 1, 1])}\n", 368 | " ```\n", 369 | " \n", 370 | "Let me further illustrate what the different parts of the dictionary mean. Here, the `'X_1'` in `'X_1 = 0'` refers feature 2 (the first column of the NumPy array; remember that Python starts the index at 0, in contrast to R). \n", 371 | "\n", 372 | "- 'X_1 = 0': For training examples stored in this node, the second feature has the value 0\n", 373 | "- 'X_1 = 1': For training examples stored in this node, the second feature has the value 1\n", 374 | "\n", 375 | "The \"array\" is a NumPy array that stores the class labels of the training examples at that node. In the case of `'X_1 = 0'` we actually store actually a sub-dictionary, because this node can be split further. If you have trouble understanding this dictionary representation, the following illustration might help:\n", 376 | "\n", 377 | "\n", 378 | "![](tree-viz-1.png)" 379 | ] 380 | }, 381 | { 382 | "cell_type": "code", 383 | "execution_count": null, 384 | "metadata": {}, 385 | "outputs": [], 386 | "source": [ 387 | "def make_tree(X, y):\n", 388 | " \n", 389 | " # Return array if node is empty or pure (1 example in leaf node)\n", 390 | " if y.shape[0] == 1 or y.shape[0] == 0:\n", 391 | " return y\n", 392 | "\n", 393 | " # Compute information gain for each feature\n", 394 | " gains = # YOUR CODE\n", 395 | "\n", 396 | " # Early stopping if there is no information gain\n", 397 | " if (gains <= 1e-05).all():\n", 398 | " return # YOUR CODE\n", 399 | " \n", 400 | " # Else, get best feature\n", 401 | " best_feature = np.argmax(gains)\n", 402 | "\n", 403 | " \n", 404 | " results = {}\n", 405 | " \n", 406 | " # Use the `split` function to split on the best feature\n", 407 | " subset_dict = split(X[:, best_feature])\n", 408 | "\n", 409 | " # Note that each entry in the dictionary returned by \n", 410 | " # split is an attribute_value:array_indices pair.\n", 411 | " # here, we are going to iterate over these key-value\n", 412 | " # pairs and select the respective examples for the\n", 413 | " # new child nodes\n", 414 | " \n", 415 | " for feature_value, train_example_indices in subset_dict.items():\n", 416 | " child_y_subset = # YOUR CODE\n", 417 | " child_x_subset = # YOUR CODE\n", 418 | "\n", 419 | " # Next, we are using \"recursion,\" that is, calling the same\n", 420 | " # tree_split function on the child subset(s)\n", 421 | " \n", 422 | " results[\"X_%d = %d\" % (best_feature, feature_value)] = \\\n", 423 | " make_tree(child_x_subset, child_y_subset)\n", 424 | "\n", 425 | " \n", 426 | " return results" 427 | ] 428 | }, 429 | { 430 | "cell_type": "markdown", 431 | "metadata": {}, 432 | "source": [ 433 | "I added the following code cell for your convenience to double-check your solution. If your results don't match the results shown below, there is a bug in your implementation of the `make_tree` function." 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": 10, 439 | "metadata": {}, 440 | "outputs": [ 441 | { 442 | "name": "stdout", 443 | "output_type": "stream", 444 | "text": [ 445 | "Inputs:\n", 446 | " [[0 0]\n", 447 | " [0 1]\n", 448 | " [1 0]\n", 449 | " [1 1]\n", 450 | " [2 0]\n", 451 | " [2 1]]\n", 452 | "\n", 453 | "Labels:\n", 454 | " [0 1 0 1 1 1]\n", 455 | "\n", 456 | "Decision tree:\n", 457 | " {'X_1 = 0': {'X_0 = 0': array([0]), 'X_0 = 1': array([0]), 'X_0 = 2': array([1])}, 'X_1 = 1': array([1, 1, 1])}\n" 458 | ] 459 | } 460 | ], 461 | "source": [ 462 | "# DO NOT EDIT OR DELETE THIS CELL\n", 463 | "\n", 464 | "x1 = np.array([0, 0, 1, 1, 2, 2])\n", 465 | "x2 = np.array([0, 1, 0, 1, 0, 1])\n", 466 | "X = np.array([x1, x2]).T\n", 467 | "y = np.array([0, 1, 0, 1, 1, 1])\n", 468 | "\n", 469 | "print('Inputs:\\n', X)\n", 470 | "print('\\nLabels:\\n', y)\n", 471 | "\n", 472 | "print('\\nDecision tree:\\n', make_tree(X, y))" 473 | ] 474 | }, 475 | { 476 | "cell_type": "markdown", 477 | "metadata": {}, 478 | "source": [ 479 | "### 1.5) Building a Decision Tree API (10 pts)" 480 | ] 481 | }, 482 | { 483 | "cell_type": "markdown", 484 | "metadata": {}, 485 | "source": [ 486 | "The final step of this part of the homework is now to write an API around our decision tree code so that we can use is for making predictions. Here, we will use the common convention, established by scikit-learn, to implement the decision tree as a Python class with \n", 487 | "\n", 488 | "- a `fit` method that learns the decision tree model from a training set via the `make_tree` function we already implemented;\n", 489 | "- a `predict` method to predict the class labels of training examples or any unseen data points.\n", 490 | "\n", 491 | "For making predictions, since not all leaf nodes are guaranteed to be single training examples, we will use a majority voting function to predict the class label as discussed in class. I already implemented a `_traverse` method, which will recursively traverse a decision tree dictionary that is produced by the `make_tree` function.\n", 492 | "\n", 493 | "Note that for simplicity, the `predict` method will only be able to accept one data point at a time (instead of a collection of data points). Hence `x` is a vector of size $\\mathbb{R}^m$, where $m$ is the number of features. I use capital letters `X` to denote a matrix of size $\\mathbb{R}^{n\\times m}$, where $n$ is the number of training examples." 494 | ] 495 | }, 496 | { 497 | "cell_type": "code", 498 | "execution_count": null, 499 | "metadata": {}, 500 | "outputs": [], 501 | "source": [ 502 | "class ID3DecisionTreeClassifer(object):\n", 503 | " \n", 504 | " def __init__(self):\n", 505 | " pass\n", 506 | " \n", 507 | " def fit(self, X, y):\n", 508 | " self.splits_ = # YOUR CODE to generate the decision tree dictionary\n", 509 | " \n", 510 | " def _majority_vote(self, label_array):\n", 511 | " return # YOUR CODE\n", 512 | " \n", 513 | " def _traverse(self, x, d):\n", 514 | " if isinstance(d, np.ndarray):\n", 515 | " return d\n", 516 | " for key in d:\n", 517 | " name, value = key.split(' = ')\n", 518 | " feature_idx = int(name.split('_')[-1])\n", 519 | " value = int(value)\n", 520 | " if x[feature_idx] == value:\n", 521 | " return self._traverse(x, d[key])\n", 522 | " \n", 523 | " def predict(self, x):\n", 524 | " \n", 525 | " label_array = # YOUR CODE to get class labels from the target node\n", 526 | " return #YOUR CODE to predict the class label via majority voting from label_array" 527 | ] 528 | }, 529 | { 530 | "cell_type": "markdown", 531 | "metadata": {}, 532 | "source": [ 533 | "I added the following code cell for your convenience to double-check your solution. If your results don't match the results shown below, there is a bug in your implementation of the `make_tree` function." 534 | ] 535 | }, 536 | { 537 | "cell_type": "code", 538 | "execution_count": 12, 539 | "metadata": {}, 540 | "outputs": [ 541 | { 542 | "name": "stdout", 543 | "output_type": "stream", 544 | "text": [ 545 | "0\n", 546 | "1\n", 547 | "0\n", 548 | "0\n", 549 | "1\n", 550 | "1\n", 551 | "1\n" 552 | ] 553 | } 554 | ], 555 | "source": [ 556 | "# DO NOT EDIT OR DELETE THIS CELL\n", 557 | "\n", 558 | "tree = ID3DecisionTreeClassifer()\n", 559 | "tree.fit(X, y)\n", 560 | "\n", 561 | "print(tree.predict(np.array([0, 0])))\n", 562 | "print(tree.predict(np.array([0, 1])))\n", 563 | "print(tree.predict(np.array([1, 0])))\n", 564 | "print(tree.predict(np.array([1, 0])))\n", 565 | "print(tree.predict(np.array([1, 1])))\n", 566 | "print(tree.predict(np.array([2, 0])))\n", 567 | "print(tree.predict(np.array([2, 1])))" 568 | ] 569 | }, 570 | { 571 | "cell_type": "markdown", 572 | "metadata": {}, 573 | "source": [ 574 | "
\n", 575 | "
\n", 576 | "
\n", 577 | "
\n", 578 | "
\n", 579 | "
" 580 | ] 581 | }, 582 | { 583 | "cell_type": "markdown", 584 | "metadata": {}, 585 | "source": [ 586 | "## 2) Bagging" 587 | ] 588 | }, 589 | { 590 | "cell_type": "markdown", 591 | "metadata": {}, 592 | "source": [ 593 | "In this second part of this homework, you will be combining multiple decision trees to a bagging classifier. This time, we will be using the decision tree algorithm implemented in scikit-learn (which is some variant of the CART algorithm for binary splits, as discussed in class)." 594 | ] 595 | }, 596 | { 597 | "cell_type": "markdown", 598 | "metadata": {}, 599 | "source": [ 600 | "### 2.1 Bootrapping (10 pts)" 601 | ] 602 | }, 603 | { 604 | "cell_type": "markdown", 605 | "metadata": {}, 606 | "source": [ 607 | "As you remember, bagging relies on bootstrap sampling. So, as a first step, your task is to implement a function for generating bootstrap samples. In this exercise, for simplicity, we will perform the computations based on the Iris dataset.\n", 608 | "\n", 609 | "On an interesting side note, scikit-learn recently updated their version of the Iris dataset since it was discovered that the Iris version hosted on the UCI machine learning repository (https://archive.ics.uci.edu/ml/datasets/Iris/) has two data points that are different from R. Fisher's original paper (Fisher,R.A. \"The use of multiple measurements in taxonomic problems\" Annual Eugenics, 7, Part II, 179-188 (1936); also in \"Contributions to Mathematical Statistics\" (John Wiley, NY, 1950).) and changed it in their most recent version. Since most students may not have the latest scikit-learn version installed, we will be working with the Iris dataset that is deposited on UCI, which has become quite the standard in the Python machine learning community for benchmarking algorithms. Instead of manually downloading it, we will be fetching it through the `mlxtend` (http://rasbt.github.io/mlxtend/) library that you installed in the last homework." 610 | ] 611 | }, 612 | { 613 | "cell_type": "code", 614 | "execution_count": 13, 615 | "metadata": {}, 616 | "outputs": [ 617 | { 618 | "name": "stdout", 619 | "output_type": "stream", 620 | "text": [ 621 | "Number of examples: 150\n", 622 | "Number of features: 4\n", 623 | "Unique class labels: [0 1 2]\n" 624 | ] 625 | } 626 | ], 627 | "source": [ 628 | "# DO NOT EDIT OR DELETE THIS CELL\n", 629 | "\n", 630 | "from mlxtend.data import iris_data\n", 631 | "X, y = iris_data()\n", 632 | "\n", 633 | "print('Number of examples:', X.shape[0])\n", 634 | "print('Number of features:', X.shape[1])\n", 635 | "print('Unique class labels:', np.unique(y))" 636 | ] 637 | }, 638 | { 639 | "cell_type": "markdown", 640 | "metadata": {}, 641 | "source": [ 642 | "Use scikit-learn's `train_test_split` function to divide the dataset into a training and a test set.\n", 643 | "\n", 644 | "- The test set should contain 45 examples, and the training set should contain 105 examples.\n", 645 | "- To ensure reproducible results, use `123` as a random seed.\n", 646 | "- Perform a stratified split." 647 | ] 648 | }, 649 | { 650 | "cell_type": "code", 651 | "execution_count": null, 652 | "metadata": {}, 653 | "outputs": [], 654 | "source": [ 655 | "from sklearn.model_selection import # YOUR CODE\n", 656 | "\n", 657 | "\n", 658 | "X_train, X_test, y_train, y_test = # YOUR CODE\n", 659 | "\n", 660 | "print('Number of training examples:', X_train.shape[0])\n", 661 | "print('Number of test examples:', X_test.shape[0])" 662 | ] 663 | }, 664 | { 665 | "cell_type": "markdown", 666 | "metadata": {}, 667 | "source": [ 668 | "I added the following code cell for your convenience to double-check your solution. If your results don't match the results shown below, there is a bug in your implementation of the `make_tree` function." 669 | ] 670 | }, 671 | { 672 | "cell_type": "code", 673 | "execution_count": 15, 674 | "metadata": {}, 675 | "outputs": [ 676 | { 677 | "name": "stdout", 678 | "output_type": "stream", 679 | "text": [ 680 | "Number of training examples: 105\n", 681 | "Number of test examples: 45\n" 682 | ] 683 | } 684 | ], 685 | "source": [ 686 | "# DO NOT EDIT OR DELETE THIS CELL\n", 687 | "\n", 688 | "print('Number of training examples:', X_train.shape[0])\n", 689 | "print('Number of test examples:', X_test.shape[0])" 690 | ] 691 | }, 692 | { 693 | "cell_type": "markdown", 694 | "metadata": {}, 695 | "source": [ 696 | "Next we are implementing a function to generate bootstrap samples of the training set. In particular, we will perform the bootstrapping as follows:\n", 697 | "\n", 698 | "- Create an index array with values 0, ..., 104.\n", 699 | "- Draw a random sample (with replacement) from this index array using the `choice` method of a NumPy `RandomState` object that is passed to the function as `rng`. \n", 700 | "- Select training examples from the X array and labels from the y array using the new sample of indices." 701 | ] 702 | }, 703 | { 704 | "cell_type": "code", 705 | "execution_count": null, 706 | "metadata": {}, 707 | "outputs": [], 708 | "source": [ 709 | "def draw_bootstrap_sample(rng, X, y):\n", 710 | " sample_indices = # YOUR CODE\n", 711 | " bootstrap_indices = rng.choice( # YOUR CODE )\n", 712 | " return X[# YOUR CODE], y[# YOUR CODE]" 713 | ] 714 | }, 715 | { 716 | "cell_type": "markdown", 717 | "metadata": {}, 718 | "source": [ 719 | "I added the following code cell for your convenience to double-check your solution. If your results don't match the results shown below, there is a bug in your implementation of the `draw_bootstrap_sample` function." 720 | ] 721 | }, 722 | { 723 | "cell_type": "code", 724 | "execution_count": 17, 725 | "metadata": {}, 726 | "outputs": [ 727 | { 728 | "name": "stdout", 729 | "output_type": "stream", 730 | "text": [ 731 | "Number of training inputs from bootstrap round: 105\n", 732 | "Number of training labels from bootstrap round: 105\n", 733 | "Labels:\n", 734 | " [0 0 1 0 0 1 2 0 2 1 0 0 2 1 1 1 1 2 1 1 2 0 2 1 2 1 1 1 0 1 0 0 1 2 0 0 0\n", 735 | " 0 2 1 1 2 1 2 1 1 2 1 2 0 1 1 2 2 1 0 1 0 2 2 0 1 0 2 0 0 0 0 1 2 0 0 1 0\n", 736 | " 1 1 0 1 1 2 2 0 2 0 2 0 1 1 2 2 0 2 2 2 0 1 0 1 2 2 2 1 0 0 0]\n" 737 | ] 738 | } 739 | ], 740 | "source": [ 741 | "# DO NOT EDIT OR DELETE THIS CELL\n", 742 | "\n", 743 | "rng = np.random.RandomState(123)\n", 744 | "X_boot, y_boot = draw_bootstrap_sample(rng, X_train, y_train)\n", 745 | "\n", 746 | "print('Number of training inputs from bootstrap round:', X_boot.shape[0])\n", 747 | "print('Number of training labels from bootstrap round:', y_boot.shape[0])\n", 748 | "print('Labels:\\n', y_boot)" 749 | ] 750 | }, 751 | { 752 | "cell_type": "markdown", 753 | "metadata": {}, 754 | "source": [ 755 | "### 2.2 Baggging classifier from decision trees (10 pts)" 756 | ] 757 | }, 758 | { 759 | "cell_type": "markdown", 760 | "metadata": {}, 761 | "source": [ 762 | "In this section, you will implement a Bagging algorithm based on the `DecisionTreeClassifier`. I provided a partial solution for you. " 763 | ] 764 | }, 765 | { 766 | "cell_type": "code", 767 | "execution_count": null, 768 | "metadata": {}, 769 | "outputs": [], 770 | "source": [ 771 | "from sklearn.tree import DecisionTreeClassifier\n", 772 | "\n", 773 | "\n", 774 | "class BaggingClassifier(object):\n", 775 | " \n", 776 | " def __init__(self, num_trees=10, random_state=123):\n", 777 | " self.num_trees = num_trees\n", 778 | " self.rng = np.random.RandomState(random_state)\n", 779 | " \n", 780 | " \n", 781 | " def fit(self, X, y):\n", 782 | " self.trees_ = [DecisionTreeClassifier(random_state=self.rng) for i in range(self.num_trees)]\n", 783 | " for i in range(self.num_trees):\n", 784 | " X_boot, y_boot = # YOUR CODE to draw a bootstrap sample\n", 785 | " # YOUR CODE to\n", 786 | " # fit the trees in self.trees_ on the bootstrap samples\n", 787 | " \n", 788 | " def predict(self, X):\n", 789 | " ary = np.zeros((X.shape[0], len(self.trees_)), dtype=np.int)\n", 790 | " for i in range(len(self.trees_)):\n", 791 | " ary[:, i] = self.trees_[i].predict(X)\n", 792 | "\n", 793 | " maj = np.apply_along_axis(lambda x:\n", 794 | " np.argmax(np.bincount(x)),\n", 795 | " axis=1,\n", 796 | " arr=ary)\n", 797 | " return maj" 798 | ] 799 | }, 800 | { 801 | "cell_type": "markdown", 802 | "metadata": {}, 803 | "source": [ 804 | "I added the following code cell for your convenience to double-check your solution. If your results don't match the results shown below, there is a bug in your implementation of the `BaggingClassifier()`." 805 | ] 806 | }, 807 | { 808 | "cell_type": "code", 809 | "execution_count": 29, 810 | "metadata": {}, 811 | "outputs": [ 812 | { 813 | "name": "stdout", 814 | "output_type": "stream", 815 | "text": [ 816 | "Individual Tree Accuracies:\n", 817 | "88.9%\n", 818 | "93.3%\n", 819 | "97.8%\n", 820 | "93.3%\n", 821 | "93.3%\n", 822 | "93.3%\n", 823 | "91.1%\n", 824 | "97.8%\n", 825 | "97.8%\n", 826 | "97.8%\n", 827 | "\n", 828 | "Bagging Test Accuracy: 97.8%\n" 829 | ] 830 | } 831 | ], 832 | "source": [ 833 | "# DO NOT EDIT OR DELETE THIS CELL\n", 834 | "\n", 835 | "model = BaggingClassifier()\n", 836 | "model.fit(X_train, y_train)\n", 837 | "\n", 838 | "predictions = model.predict(X_test)\n", 839 | "\n", 840 | "print('Individual Tree Accuracies:')\n", 841 | "for tree in model.trees_:\n", 842 | " predictions = tree.predict(X_test) \n", 843 | " print('%.1f%%' % ((predictions == y_test).sum() / X_test.shape[0] * 100))\n", 844 | "\n", 845 | "print('\\nBagging Test Accuracy: %.1f%%' % ((predictions == y_test).sum() / X_test.shape[0] * 100))" 846 | ] 847 | }, 848 | { 849 | "cell_type": "markdown", 850 | "metadata": {}, 851 | "source": [ 852 | "
\n", 853 | "
\n", 854 | "
\n", 855 | "
\n", 856 | "
\n", 857 | "
" 858 | ] 859 | }, 860 | { 861 | "cell_type": "markdown", 862 | "metadata": {}, 863 | "source": [ 864 | "## 3) Bias-Variance Decomposition" 865 | ] 866 | }, 867 | { 868 | "cell_type": "markdown", 869 | "metadata": {}, 870 | "source": [ 871 | "In this exercise you will be asked to compute the variance and bias components of the 0-1 loss that we discussed in class. \n", 872 | "\n", 873 | "- In particular, you will compute the average bias and the average variance over all test examples (instead of a single test example. \n", 874 | "\n", 875 | "- The dataset you will be using as training set(s) and test set is the Iris dataset that you already divided into `X_train` / `y_train` and `X_test` / `y_test` earlier.\n", 876 | "\n", 877 | "- Since we do not have unlimited training datasets to estimate the parameters (think back of the estimation over the training sets), we will use bootstrapping to simulate \"new\" training sets. \n" 878 | ] 879 | }, 880 | { 881 | "cell_type": "markdown", 882 | "metadata": {}, 883 | "source": [ 884 | "### 3.1 Bias-Variance decomposition of the 0-1 Loss for Decision Trees (10 pts)" 885 | ] 886 | }, 887 | { 888 | "cell_type": "markdown", 889 | "metadata": {}, 890 | "source": [ 891 | "In this first part, you will be computing the averaged bias and variance components over the test set examples for the decision tree algorithm implemented in scikit-learn on the Iris data. \n", 892 | "\n", 893 | "I already implemented the code for computing the \"main prediction\" for you:" 894 | ] 895 | }, 896 | { 897 | "cell_type": "code", 898 | "execution_count": 20, 899 | "metadata": {}, 900 | "outputs": [], 901 | "source": [ 902 | "# DO NOT EDIT OR DELETE THIS CELL\n", 903 | "\n", 904 | "rng = np.random.RandomState(123)\n", 905 | "\n", 906 | "num_bootstrap = 200\n", 907 | "\n", 908 | "all_pred = np.zeros((num_bootstrap, y_test.shape[0]), dtype=np.int)\n", 909 | "\n", 910 | "for i in range(num_bootstrap):\n", 911 | " X_boot, y_boot = draw_bootstrap_sample(rng, X_train, y_train)\n", 912 | " pred = DecisionTreeClassifier(random_state=66).fit(X_boot, y_boot).predict(X_test)\n", 913 | " all_pred[i] = pred\n", 914 | " \n", 915 | "main_predictions = np.apply_along_axis(lambda x:\n", 916 | " np.argmax(np.bincount(x)),\n", 917 | " axis=0,\n", 918 | " arr=all_pred)" 919 | ] 920 | }, 921 | { 922 | "cell_type": "markdown", 923 | "metadata": {}, 924 | "source": [ 925 | "Note that `all_pred` is a 2D array of dimension $\\mathbb{R}^{b \\times n_{test}}$, where $m$ is the number of bootstrap rounds and $n_{test}$ is the number of test examples in the test set. In other words, each of the 200 rows in this array stores the predictions of one particular decision tree hypothesis for all 45 test data points.\n", 926 | "\n", 927 | "Your first task is to compute the average bias over all test examples:" 928 | ] 929 | }, 930 | { 931 | "cell_type": "code", 932 | "execution_count": null, 933 | "metadata": {}, 934 | "outputs": [], 935 | "source": [ 936 | "# YOUR CODE\n", 937 | "\n", 938 | "\n", 939 | "print('Average bias:', bias)" 940 | ] 941 | }, 942 | { 943 | "cell_type": "markdown", 944 | "metadata": {}, 945 | "source": [ 946 | "Your second task is to compute the average variance over all test examples:" 947 | ] 948 | }, 949 | { 950 | "cell_type": "code", 951 | "execution_count": null, 952 | "metadata": {}, 953 | "outputs": [], 954 | "source": [ 955 | "# YOUR CODE\n", 956 | "# you probably need multiple\n", 957 | "# lines of code and a for-loop\n", 958 | "\n", 959 | "print('Average variance:', var)" 960 | ] 961 | }, 962 | { 963 | "cell_type": "markdown", 964 | "metadata": {}, 965 | "source": [ 966 | "Hint: The average bias and variance values are both scalars, not vectors or matrices. In other words, for each of the code cells above, you should return a real number (float)." 967 | ] 968 | }, 969 | { 970 | "cell_type": "markdown", 971 | "metadata": {}, 972 | "source": [ 973 | "### 3.2 Bias-Variance decomposition of the 0-1 Loss for Bagging (10 pts)" 974 | ] 975 | }, 976 | { 977 | "cell_type": "markdown", 978 | "metadata": {}, 979 | "source": [ 980 | "Use the code from the previous section, 3.1, to compare the decision tree algorithm with a BaggingClassifier from scikit-learn.\n", 981 | "\n", 982 | "- Report both the average bias and average variance just like before, but use the `BaggingClassifier` in scikit-learn instead of the `DecisionTreeClassifier`. You can use the default values of `BaggingClassifier`." 983 | ] 984 | }, 985 | { 986 | "cell_type": "code", 987 | "execution_count": null, 988 | "metadata": {}, 989 | "outputs": [], 990 | "source": [ 991 | "# YOUR SOLUTION\n", 992 | "# Many lines of code (which you may copy and modify from 3.1)\n", 993 | "\n", 994 | "\n", 995 | "print('Average bias:', bias)\n", 996 | "print('Average variance:', var)" 997 | ] 998 | }, 999 | { 1000 | "cell_type": "markdown", 1001 | "metadata": {}, 1002 | "source": [ 1003 | "Is the average variance higher or lower than the avergage of the decision tree in 3.1? And what about the average bias?" 1004 | ] 1005 | }, 1006 | { 1007 | "cell_type": "markdown", 1008 | "metadata": {}, 1009 | "source": [ 1010 | "!!! TYPE YOUR ANSWER HERE !!!" 1011 | ] 1012 | }, 1013 | { 1014 | "cell_type": "markdown", 1015 | "metadata": {}, 1016 | "source": [ 1017 | "### 3.3 Bias-Variance decomposition of the 0-1 Loss for AdaBoost (10 pts)" 1018 | ] 1019 | }, 1020 | { 1021 | "cell_type": "markdown", 1022 | "metadata": {}, 1023 | "source": [ 1024 | "Use the code from the previous section, 3.1, to compare the decision tree algorithm with a AdaBoostClassifier from scikit-learn.\n", 1025 | "\n", 1026 | "- Report both the average bias and average variance just like before, but use the `AdaboostClassifier` in scikit-learn instead of the `DecisionTreeClassifier`. You can use the default values of `AdaboostClassifier`." 1027 | ] 1028 | }, 1029 | { 1030 | "cell_type": "code", 1031 | "execution_count": null, 1032 | "metadata": {}, 1033 | "outputs": [], 1034 | "source": [ 1035 | "# YOUR SOLUTION\n", 1036 | "# Many lines of code (which you may copy and modify from 3.1)\n", 1037 | "\n", 1038 | "\n", 1039 | "\n", 1040 | "print('Average bias:', bias)\n", 1041 | "print('Average variance:', var)" 1042 | ] 1043 | }, 1044 | { 1045 | "cell_type": "markdown", 1046 | "metadata": {}, 1047 | "source": [ 1048 | "Is the average variance higher or lower than the avergage of the decision tree in 3.1? And what about the average bias?" 1049 | ] 1050 | }, 1051 | { 1052 | "cell_type": "markdown", 1053 | "metadata": {}, 1054 | "source": [ 1055 | "!!! TYPE YOUR ANSWER HERE !!!" 1056 | ] 1057 | }, 1058 | { 1059 | "cell_type": "markdown", 1060 | "metadata": {}, 1061 | "source": [ 1062 | "
\n", 1063 | "
\n", 1064 | "
\n", 1065 | "
\n", 1066 | "
\n", 1067 | "
" 1068 | ] 1069 | }, 1070 | { 1071 | "cell_type": "markdown", 1072 | "metadata": {}, 1073 | "source": [ 1074 | "## Bonus Exercise (10 pts)" 1075 | ] 1076 | }, 1077 | { 1078 | "cell_type": "markdown", 1079 | "metadata": {}, 1080 | "source": [ 1081 | "In this bonus exercise, you will be asked to fit a `RandomForestClassifier` on a small subset (10%) of the MNIST handwritten digits dataset (http://yann.lecun.com/exdb/mnist/). For convenience, the following code loads this small subset via mlxtend:" 1082 | ] 1083 | }, 1084 | { 1085 | "cell_type": "code", 1086 | "execution_count": 2, 1087 | "metadata": {}, 1088 | "outputs": [ 1089 | { 1090 | "name": "stdout", 1091 | "output_type": "stream", 1092 | "text": [ 1093 | "Dimensions: 5000 x 784\n", 1094 | "1st row [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 1095 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 1096 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 1097 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 1098 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 1099 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 1100 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 1101 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 1102 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 1103 | " 0. 51. 159. 253. 159. 50. 0. 0. 0. 0. 0. 0. 0. 0.\n", 1104 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 1105 | " 48. 238. 252. 252. 252. 237. 0. 0. 0. 0. 0. 0. 0. 0.\n", 1106 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 54.\n", 1107 | " 227. 253. 252. 239. 233. 252. 57. 6. 0. 0. 0. 0. 0. 0.\n", 1108 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 10. 60. 224.\n", 1109 | " 252. 253. 252. 202. 84. 252. 253. 122. 0. 0. 0. 0. 0. 0.\n", 1110 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 163. 252. 252.\n", 1111 | " 252. 253. 252. 252. 96. 189. 253. 167. 0. 0. 0. 0. 0. 0.\n", 1112 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 51. 238. 253. 253.\n", 1113 | " 190. 114. 253. 228. 47. 79. 255. 168. 0. 0. 0. 0. 0. 0.\n", 1114 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 48. 238. 252. 252. 179.\n", 1115 | " 12. 75. 121. 21. 0. 0. 253. 243. 50. 0. 0. 0. 0. 0.\n", 1116 | " 0. 0. 0. 0. 0. 0. 0. 0. 38. 165. 253. 233. 208. 84.\n", 1117 | " 0. 0. 0. 0. 0. 0. 253. 252. 165. 0. 0. 0. 0. 0.\n", 1118 | " 0. 0. 0. 0. 0. 0. 0. 7. 178. 252. 240. 71. 19. 28.\n", 1119 | " 0. 0. 0. 0. 0. 0. 253. 252. 195. 0. 0. 0. 0. 0.\n", 1120 | " 0. 0. 0. 0. 0. 0. 0. 57. 252. 252. 63. 0. 0. 0.\n", 1121 | " 0. 0. 0. 0. 0. 0. 253. 252. 195. 0. 0. 0. 0. 0.\n", 1122 | " 0. 0. 0. 0. 0. 0. 0. 198. 253. 190. 0. 0. 0. 0.\n", 1123 | " 0. 0. 0. 0. 0. 0. 255. 253. 196. 0. 0. 0. 0. 0.\n", 1124 | " 0. 0. 0. 0. 0. 0. 76. 246. 252. 112. 0. 0. 0. 0.\n", 1125 | " 0. 0. 0. 0. 0. 0. 253. 252. 148. 0. 0. 0. 0. 0.\n", 1126 | " 0. 0. 0. 0. 0. 0. 85. 252. 230. 25. 0. 0. 0. 0.\n", 1127 | " 0. 0. 0. 0. 7. 135. 253. 186. 12. 0. 0. 0. 0. 0.\n", 1128 | " 0. 0. 0. 0. 0. 0. 85. 252. 223. 0. 0. 0. 0. 0.\n", 1129 | " 0. 0. 0. 7. 131. 252. 225. 71. 0. 0. 0. 0. 0. 0.\n", 1130 | " 0. 0. 0. 0. 0. 0. 85. 252. 145. 0. 0. 0. 0. 0.\n", 1131 | " 0. 0. 48. 165. 252. 173. 0. 0. 0. 0. 0. 0. 0. 0.\n", 1132 | " 0. 0. 0. 0. 0. 0. 86. 253. 225. 0. 0. 0. 0. 0.\n", 1133 | " 0. 114. 238. 253. 162. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 1134 | " 0. 0. 0. 0. 0. 0. 85. 252. 249. 146. 48. 29. 85. 178.\n", 1135 | " 225. 253. 223. 167. 56. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 1136 | " 0. 0. 0. 0. 0. 0. 85. 252. 252. 252. 229. 215. 252. 252.\n", 1137 | " 252. 196. 130. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 1138 | " 0. 0. 0. 0. 0. 0. 28. 199. 252. 252. 253. 252. 252. 233.\n", 1139 | " 145. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 1140 | " 0. 0. 0. 0. 0. 0. 0. 25. 128. 252. 253. 252. 141. 37.\n", 1141 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 1142 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 1143 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 1144 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 1145 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 1146 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 1147 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 1148 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 1149 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n" 1150 | ] 1151 | } 1152 | ], 1153 | "source": [ 1154 | "from mlxtend.data import mnist_data\n", 1155 | "X, y = mnist_data()\n", 1156 | "\n", 1157 | "print('Dimensions: %s x %s' % (X.shape[0], X.shape[1]))\n", 1158 | "print('1st row', X[0])" 1159 | ] 1160 | }, 1161 | { 1162 | "cell_type": "markdown", 1163 | "metadata": {}, 1164 | "source": [ 1165 | "The next code cell shuffles the dataset and divides it into 4500 training examples and 500 test examples, respectively." 1166 | ] 1167 | }, 1168 | { 1169 | "cell_type": "code", 1170 | "execution_count": 3, 1171 | "metadata": {}, 1172 | "outputs": [], 1173 | "source": [ 1174 | "from mlxtend.preprocessing import shuffle_arrays_unison\n", 1175 | "\n", 1176 | "\n", 1177 | "X, y = shuffle_arrays_unison((X, y), random_seed=1)\n", 1178 | "X_train, y_train = X[:4500], y[:4500]\n", 1179 | "X_test, y_test = X[4500:], y[4500:]" 1180 | ] 1181 | }, 1182 | { 1183 | "cell_type": "markdown", 1184 | "metadata": {}, 1185 | "source": [ 1186 | "Now, your task is to fit a RandomForest classifier on the training set and evaluate it's predictive accuracy on the test set. " 1187 | ] 1188 | }, 1189 | { 1190 | "cell_type": "code", 1191 | "execution_count": 5, 1192 | "metadata": {}, 1193 | "outputs": [ 1194 | { 1195 | "name": "stdout", 1196 | "output_type": "stream", 1197 | "text": [ 1198 | "Accuracy 93.6%\n" 1199 | ] 1200 | } 1201 | ], 1202 | "source": [ 1203 | "from sklearn.ensemble import RandomForestClassifier\n", 1204 | "\n", 1205 | "model = RandomForestClassifier(n_estimators=100, random_state=123)\n", 1206 | "model.fit(#YOUR CODE)\n", 1207 | "\n", 1208 | "acc = # YOUR CODE\n", 1209 | "print('Accuracy %.1f%%' % acc)" 1210 | ] 1211 | }, 1212 | { 1213 | "cell_type": "markdown", 1214 | "metadata": {}, 1215 | "source": [ 1216 | "Next, your task is to load an image of a digit (some_digit.png) from this directory into a Python array and classify it using the random forest model. The some_digit.png image is displayed below:" 1217 | ] 1218 | }, 1219 | { 1220 | "cell_type": "markdown", 1221 | "metadata": {}, 1222 | "source": [ 1223 | "![](some_digit.png)" 1224 | ] 1225 | }, 1226 | { 1227 | "cell_type": "markdown", 1228 | "metadata": {}, 1229 | "source": [ 1230 | "Note: For loading the image, you need to install the Python imaging library PIL. Actually, Pillow, a more up-to-date fork is recommended. Execute one of the following two if you haven't installed Pillow already.\n", 1231 | " \n", 1232 | "- `conda install Pillow`\n", 1233 | "\n", 1234 | "- `pip install Pillow`" 1235 | ] 1236 | }, 1237 | { 1238 | "cell_type": "markdown", 1239 | "metadata": {}, 1240 | "source": [ 1241 | "Again, I have partially pre-written the code for you." 1242 | ] 1243 | }, 1244 | { 1245 | "cell_type": "code", 1246 | "execution_count": null, 1247 | "metadata": {}, 1248 | "outputs": [], 1249 | "source": [ 1250 | "from PIL import Image\n", 1251 | "import numpy as np\n", 1252 | "\n", 1253 | "def load_image(file_name):\n", 1254 | " img = Image.open(file_name)\n", 1255 | " img.load()\n", 1256 | " data = np.asarray(img, dtype=np.float)\n", 1257 | " return data\n", 1258 | "\n", 1259 | "x_image = # YOUR CODE" 1260 | ] 1261 | }, 1262 | { 1263 | "cell_type": "code", 1264 | "execution_count": 5, 1265 | "metadata": {}, 1266 | "outputs": [ 1267 | { 1268 | "name": "stdout", 1269 | "output_type": "stream", 1270 | "text": [ 1271 | "Digit: 5\n" 1272 | ] 1273 | } 1274 | ], 1275 | "source": [ 1276 | "# The data needs to be represented as a vector (1 position for each feature)\n", 1277 | "x_transf = # YOUR CODE\n", 1278 | "\n", 1279 | "# Also, scikit-learn expects 2D arrays, so we need to add a dimension\n", 1280 | "x_transf = # YOUR CODE\n", 1281 | "\n", 1282 | "print('Digit:', model.predict(x_transf)[0])" 1283 | ] 1284 | } 1285 | ], 1286 | "metadata": { 1287 | "kernelspec": { 1288 | "display_name": "Python 3", 1289 | "language": "python", 1290 | "name": "python3" 1291 | }, 1292 | "language_info": { 1293 | "codemirror_mode": { 1294 | "name": "ipython", 1295 | "version": 3 1296 | }, 1297 | "file_extension": ".py", 1298 | "mimetype": "text/x-python", 1299 | "name": "python", 1300 | "nbconvert_exporter": "python", 1301 | "pygments_lexer": "ipython3", 1302 | "version": "3.6.6" 1303 | } 1304 | }, 1305 | "nbformat": 4, 1306 | "nbformat_minor": 2 1307 | } 1308 | -------------------------------------------------------------------------------- /hw_03/hw3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Problem Set 3" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "STAT 479: Machine Learning (Fall 2018) \n", 15 | "Instructor: Sebastian Raschka (sraschka@wisc.edu) \n", 16 | "Course website: http://pages.stat.wisc.edu/~sraschka/teaching/stat479-fs2018/\n", 17 | "\n", 18 | "**Due**: Dec 03 (before 11:59 pm).\n", 19 | "\n", 20 | "**How to submit**\n", 21 | "\n", 22 | "As mentioned in the lecture, you need to submit the `.ipynb` file with your answers plus an `.html` file, which will serve as a backup for us in case the `.ipynb` file cannot be opened on my or the TA's computer. In addition, you may also export the notebook as PDF and upload it as well.\n", 23 | "\n", 24 | "Again, we will be using the Canvas platform, so you need to submit your homework there. You should be able to resubmit the homework as many times as you like before the due date." 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "As usual, you do not write the whole code from scratch, and I provided you with a skeleton of code where you need to add the lines that I indicated. Not, however, that everyone's coding style is different. Where I use only one line of code, you may want to use multiple ones. Also, where you use one line of code, I may use multiple ones." 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "%load_ext watermark\n", 41 | "%watermark -d -u -a '' -v -p numpy,scipy,matplotlib,sklearn,mlxtend" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "
\n", 49 | "


\n", 50 | "


\n", 51 | "


\n", 52 | "


\n", 53 | "


\n", 54 | "


\n", 55 | "
" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "## 1. Hyperparameter Tuning and Model Selection" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "### 1.1 [10 pts] Using Grid Search for Hyperparameter Tuning" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "In this exercise, you will be working with the Breast Cancer Wisconsin dataset,\n", 77 | "which contains 569 samples of malignant and benign tumor cells. \n", 78 | "\n", 79 | "The first two columns in the dataset store the unique ID numbers of the samples and the corresponding diagnoses (M = malignant, B = benign), respectively. Columns 3-32 contain 30 real-valued features that have been computed from digitized images of the cell nuclei, which can be used to build a model to predict whether a tumor is benign or malignant. The Breast Cancer Wisconsin dataset has been deposited in the UCI Machine Learning Repository, and more detailed information about this dataset can be found at https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wi sconsin+(Diagnostic).\n", 80 | "\n", 81 | "The next cell loads the datasets and converts the class label M (malignant) to a integer 1 and the label B (benign) to class label 0." 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "# EXECUTE BUT DO NOT MODIFY THIS CELL\n", 91 | "\n", 92 | "import pandas as pd\n", 93 | "\n", 94 | "\n", 95 | "df = pd.read_csv('data/wdbc.data', header=None)\n", 96 | "\n", 97 | "# convert class label \"M\"->1 and label \"B\"->0\n", 98 | "df[1] = df[1].apply(lambda x: 1 if x == 'M' else 0)\n", 99 | "\n", 100 | "\n", 101 | "df.head()" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "# EXECUTE BUT DO NOT MODIFY THIS CELL\n", 111 | "\n", 112 | "\n", 113 | "from sklearn.model_selection import train_test_split\n", 114 | "\n", 115 | "\n", 116 | "y = df[1].values\n", 117 | "X = df.loc[:, 2:].values\n", 118 | "\n", 119 | "X_train, X_test, y_train, y_test = \\\n", 120 | " train_test_split(X, y, test_size=0.3, shuffle=True, random_state=0, stratify=y)" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "Now, your task is to use `GridSearchCV` from scikit-learn to find the best parameter for `n_neighbors` of a `KNearestNeighborClassifier`\n", 128 | "\n", 129 | "As hyperparameter values, you only need to consider the number of `n_neighbors` within the range 1-16 (including 16)." 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "# MODIFY THIS CELL\n", 139 | "\n", 140 | "from sklearn.pipeline import make_pipeline\n", 141 | "from sklearn.preprocessing import StandardScaler\n", 142 | "from sklearn.neighbors import KNeighborsClassifier\n", 143 | "from sklearn.model_selection import GridSearchCV\n", 144 | "\n", 145 | "\n", 146 | "pipe = make_pipeline(# YOUR CODE HERE\n", 147 | " # YOUR CODE HERE\n", 148 | ")\n", 149 | "\n", 150 | "param_grid = [{ # YOUR CODE HERE }]\n", 151 | "\n", 152 | "\n", 153 | "gs = GridSearchCV(# YOUR CODE HERE \n", 154 | " # YOUR CODE HERE \n", 155 | " iid=False,\n", 156 | " n_jobs=-1,\n", 157 | " refit=True,\n", 158 | " scoring='accuracy',\n", 159 | " cv=10)\n", 160 | "\n", 161 | "gs.fit(X_train, y_train)\n", 162 | "\n", 163 | "print('Best Accuracy: %.2f%%' % (gs.best_score_*100))" 164 | ] 165 | }, 166 | { 167 | "cell_type": "markdown", 168 | "metadata": {}, 169 | "source": [ 170 | "Next, print the best parameters obtained from the `GridSearchCV` run and compute the accuracy a `KNearestNeighborClassifier` would achieve with these settings on the test set (`X_test`, `y_test`)." 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "# MODIFY THIS CELL\n", 180 | "\n", 181 | "print('Best Params: %s' % # YOUR CODE HERE)\n", 182 | "print('Test Accuracy: %.2f%%' % # YOUR CODE HERE)" 183 | ] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "metadata": {}, 188 | "source": [ 189 | "
\n", 190 | "


\n", 191 | "


\n", 192 | "


\n", 193 | "


\n", 194 | "


\n", 195 | "


\n", 196 | "
" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": {}, 202 | "source": [ 203 | "### 1.2 [10 pts] Estimate the Generalization Performance using the '.632+' Bootstrap" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "In this exercise, you are asked to compute the accuracy of the model from the previous exercise (1.1) on the test set (`X_test`, `y_test`) using the .632+ Bootstrap method. For this you can use the `bootstrap_point632_score` function implemented in MLxtend for this: \n", 211 | "http://rasbt.github.io/mlxtend/user_guide/evaluate/bootstrap_point632_score/" 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": {}, 217 | "source": [ 218 | "- use 200 bootstrap rounds\n", 219 | "- set the random seed to 1\n", 220 | "\n", 221 | "The accruacy should be the mean accuracy over the 200 bootstrap values that the `bootstrap_point632_score` method returns." 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": null, 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "# MODIFY THIS CELL\n", 231 | "\n", 232 | "from mlxtend.evaluate import bootstrap_point632_score\n", 233 | "import numpy as np\n", 234 | "\n", 235 | "\n", 236 | "scores = bootstrap_point632_score(# YOUR CODE HERE)\n", 237 | "\n", 238 | "acc = # YOUR CODE HERE\n", 239 | "print('Accuracy: %.2f%%' % (100*acc))" 240 | ] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "metadata": {}, 245 | "source": [ 246 | "Next, compute the lower and upper bound on the mean accuracy via a 95% confidence interval. For that, you should use the `scores` you computed in the cell above." 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": null, 252 | "metadata": {}, 253 | "outputs": [], 254 | "source": [ 255 | "# MODIFY THIS CELL\n", 256 | "\n", 257 | "lower = # YOUR CODE\n", 258 | "upper = # YOUR CODE\n", 259 | "\n", 260 | "print('95%% Confidence interval: [%.2f, %.2f]' % (100*lower, 100*upper))" 261 | ] 262 | }, 263 | { 264 | "cell_type": "markdown", 265 | "metadata": {}, 266 | "source": [ 267 | "
\n", 268 | "


\n", 269 | "


\n", 270 | "


\n", 271 | "


\n", 272 | "


\n", 273 | "


\n", 274 | "
" 275 | ] 276 | }, 277 | { 278 | "cell_type": "markdown", 279 | "metadata": {}, 280 | "source": [ 281 | "## 2. Confusion Matrices" 282 | ] 283 | }, 284 | { 285 | "cell_type": "markdown", 286 | "metadata": {}, 287 | "source": [ 288 | "### 2.1 [10 pts] Contructing a Binary Confusion Matrix" 289 | ] 290 | }, 291 | { 292 | "cell_type": "markdown", 293 | "metadata": {}, 294 | "source": [ 295 | "The task of this execise is to construct a binary confusion matrix based of the following form:\n", 296 | "\n", 297 | "![](images/conf-1.png)\n", 298 | "\n", 299 | "Here, assume that the positive class is the class with label 0, and the negative class is the class with label 1. You are given an array of the actual class labels, `y_true`, as well as an array of the predicted class labels, `y_predicted`. The output should be a numpy array, like shown below\n", 300 | "\n", 301 | "```\n", 302 | "array([[101, 21],\n", 303 | " [41, 121]])\n", 304 | "``` \n", 305 | " \n", 306 | "(Note that these number in the array are not the actual, expected or correct values.)\n", 307 | "\n", 308 | "Using the `plot_confusion_matrix` from the `helper.py` script (which should be in the same directory as this notebook) the example array/confusion matrix is visualized as follows:" 309 | ] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": null, 314 | "metadata": {}, 315 | "outputs": [], 316 | "source": [ 317 | "%matplotlib inline" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": null, 323 | "metadata": {}, 324 | "outputs": [], 325 | "source": [ 326 | "# EXECUTE BUT DO NOT MODIFY THIS CELL\n", 327 | "\n", 328 | "import numpy as np\n", 329 | "from helper import plot_confusion_matrix\n", 330 | "import matplotlib.pyplot as plt\n", 331 | "\n", 332 | "\n", 333 | "example_cm = np.array([[101, 21],\n", 334 | " [41, 121]])\n", 335 | "\n", 336 | "plot_confusion_matrix(example_cm)\n", 337 | "plt.show()" 338 | ] 339 | }, 340 | { 341 | "cell_type": "markdown", 342 | "metadata": {}, 343 | "source": [ 344 | "Now, your task is to complete the `confusion_matrix_binary` below in order to construct a confusion matrix from 2 label arrays:\n", 345 | "\n", 346 | "- `y_true` (true or actual class labels)\n", 347 | "- `y_predicted` (class labels predicted by a classifier)\n", 348 | "\n", 349 | "To make it easier for you, you only need to replace the `???`'s with the right variable name (`tp`, `fn`, `fp`, or `tn`)." 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": null, 355 | "metadata": {}, 356 | "outputs": [], 357 | "source": [ 358 | "# MODIFY THIS CELL\n", 359 | "\n", 360 | "\n", 361 | "y_true = np.array([1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0])\n", 362 | "y_predicted = np.array([1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0])\n", 363 | "\n", 364 | "\n", 365 | "def confusion_matrix_binary(y_true, y_predicted):\n", 366 | "\n", 367 | " tp, fn, fp, tn = 0, 0, 0, 0\n", 368 | " \n", 369 | " for i, j in zip(y_true, y_predicted):\n", 370 | " if i == j:\n", 371 | " if i == 0:\n", 372 | " ??? += 1\n", 373 | " else:\n", 374 | " ??? += 1\n", 375 | " else:\n", 376 | " if i == 0:\n", 377 | " ??? += 1\n", 378 | " else:\n", 379 | " ??? += 1\n", 380 | " \n", 381 | " conf_matrix = np.zeros(4).reshape(2, 2).astype(int)\n", 382 | " conf_matrix[0, 0] = ???\n", 383 | " conf_matrix[0, 1] = ???\n", 384 | " conf_matrix[1, 0] = ???\n", 385 | " conf_matrix[1, 1] = ??? \n", 386 | " \n", 387 | " return conf_matrix\n", 388 | "\n", 389 | "result_matrix = confusion_matrix_binary(y_true, y_predicted)" 390 | ] 391 | }, 392 | { 393 | "cell_type": "code", 394 | "execution_count": null, 395 | "metadata": {}, 396 | "outputs": [], 397 | "source": [ 398 | "# EXECUTE BUT DO NOT MODIFY THIS CELL\n", 399 | "\n", 400 | "print('Conusion matrix array:\\n', result_matrix)" 401 | ] 402 | }, 403 | { 404 | "cell_type": "code", 405 | "execution_count": null, 406 | "metadata": {}, 407 | "outputs": [], 408 | "source": [ 409 | "# EXECUTE BUT DO NOT MODIFY THIS CELL\n", 410 | "\n", 411 | "plot_confusion_matrix(result_matrix)\n", 412 | "plt.show()" 413 | ] 414 | }, 415 | { 416 | "cell_type": "markdown", 417 | "metadata": {}, 418 | "source": [ 419 | "
\n", 420 | "


\n", 421 | "


\n", 422 | "


\n", 423 | "


\n", 424 | "


\n", 425 | "


\n", 426 | "
" 427 | ] 428 | }, 429 | { 430 | "cell_type": "markdown", 431 | "metadata": {}, 432 | "source": [ 433 | "### 2.2 [10 pts] Constructing a Multiclass Confusion Matrix" 434 | ] 435 | }, 436 | { 437 | "cell_type": "markdown", 438 | "metadata": {}, 439 | "source": [ 440 | "Next, write a version of this confusion matrix that generalizes to multi-class settings as shown in the figure below:\n", 441 | "\n", 442 | " \n", 443 | "![](images/conf-2.png)\n", 444 | "\n", 445 | "\n", 446 | "Again, the output should be a 2D NumPy array:\n", 447 | "\n", 448 | "```\n", 449 | "array([[3, 0, 0],\n", 450 | " [7, 50, 12],\n", 451 | " [0, 0, 18]])\n", 452 | "```\n", 453 | " \n", 454 | "(Note that these number in the array are not the actual, expected or correct values for this exercise.)\n", 455 | "\n", 456 | "\n", 457 | "There are many different ways to implement a function to construct a multi-class confusion matrix, and in this exercise, you are given the freedom to implement it however way you prefer. Please note though that you should not import confusion matrix code from other packages but implement it by your self in Python (and NumPy)." 458 | ] 459 | }, 460 | { 461 | "cell_type": "markdown", 462 | "metadata": {}, 463 | "source": [ 464 | "Note that if there are 5 different class labels (0, ..., 4), then the result should be a 5x5 confusion matrix." 465 | ] 466 | }, 467 | { 468 | "cell_type": "code", 469 | "execution_count": null, 470 | "metadata": {}, 471 | "outputs": [], 472 | "source": [ 473 | "## FOR STUDENTS\n", 474 | "\n", 475 | "\n", 476 | "import numpy as np\n", 477 | "\n", 478 | "\n", 479 | "def confusion_matrix_multiclass(y_true, y_predicted):\n", 480 | "\n", 481 | " # YOUR CODE (As many lines of code as you like)\n", 482 | " \n", 483 | " return matrix\n", 484 | "\n", 485 | "\n", 486 | "y_true = [1, 1, 1, 1, 0, 2, 0, 3, 4, 2, 1, 2, 2, 1, 2, 1, 0, 1, 1, 0]\n", 487 | "y_predicted = [1, 0, 1, 1, 0, 2, 1, 3, 4, 2, 2, 0, 2, 1, 2, 1, 0, 3, 1, 1]\n", 488 | "\n", 489 | "result_matrix = confusion_matrix_multiclass(y_true, y_predicted)\n", 490 | "result_matrix" 491 | ] 492 | }, 493 | { 494 | "cell_type": "code", 495 | "execution_count": null, 496 | "metadata": {}, 497 | "outputs": [], 498 | "source": [ 499 | "# EXECUTE BUT DO NOT MODIFY THIS CELL\n", 500 | "\n", 501 | "from helper import plot_confusion_matrix\n", 502 | "\n", 503 | "\n", 504 | "plot_confusion_matrix(result_matrix)\n", 505 | "plt.show()" 506 | ] 507 | }, 508 | { 509 | "cell_type": "markdown", 510 | "metadata": {}, 511 | "source": [ 512 | "
\n", 513 | "


\n", 514 | "


\n", 515 | "


\n", 516 | "


\n", 517 | "


\n", 518 | "


\n", 519 | "
" 520 | ] 521 | }, 522 | { 523 | "cell_type": "markdown", 524 | "metadata": {}, 525 | "source": [ 526 | "### 2.3 [10 pts] Binary Confusion Matrices for Multiclass Problems" 527 | ] 528 | }, 529 | { 530 | "cell_type": "markdown", 531 | "metadata": {}, 532 | "source": [ 533 | "In this exercise, you will be building binary confusion matrices for multiclass problems as discussed in class when we talked about computing the balanced accuracy. Here, you can reuse the `confusion_matrix_binary` function you implemented in 2.1. \n", 534 | "\n", 535 | "Remember, if we are given 5 class labels (0, ..., 4) then we can construct 5 binary confusion matrices, where each time one of the 5 classes is assigned the positive class where all other classes will be considered as the negative class. The `positive_label` argument in the `binary_cm_from_multiclass` function below can be used to determine which class label refers to the positive class.\n", 536 | "\n", 537 | "Implementing the function below is actually very easy and should only require you to add 2 lines of code with the help of the `np.where` function. " 538 | ] 539 | }, 540 | { 541 | "cell_type": "code", 542 | "execution_count": null, 543 | "metadata": {}, 544 | "outputs": [], 545 | "source": [ 546 | "# MODIFY THIS CELL\n", 547 | "\n", 548 | "def binary_cm_from_multiclass(y_true, y_predicted, positive_label):\n", 549 | " \n", 550 | " y_true_ary = np.array(y_true)\n", 551 | " y_predicted_ary = np.array(y_predicted)\n", 552 | " \n", 553 | " y_true_mod = np.where( # YOUR CODE\n", 554 | " y_predicted_mod = np.where( # YOUR CODE\n", 555 | " \n", 556 | " cm = confusion_matrix_binary(y_true_mod, y_predicted_mod)\n", 557 | " return cm" 558 | ] 559 | }, 560 | { 561 | "cell_type": "markdown", 562 | "metadata": {}, 563 | "source": [ 564 | "As a hint, the expected output for label 0 as positive label is shown below:" 565 | ] 566 | }, 567 | { 568 | "cell_type": "markdown", 569 | "metadata": {}, 570 | "source": [ 571 | "![](images/hint-1.png)" 572 | ] 573 | }, 574 | { 575 | "cell_type": "code", 576 | "execution_count": null, 577 | "metadata": {}, 578 | "outputs": [], 579 | "source": [ 580 | "# EXECUTE BUT DO NOT MODIFY THIS CELL\n", 581 | "\n", 582 | "\n", 583 | "y_true = [1, 1, 1, 1, 0, 2, 0, 3, 4, 2, 1, 2, 2, 1, 2, 1, 0, 1, 1, 0]\n", 584 | "y_predicted = [1, 0, 1, 1, 0, 2, 1, 3, 4, 2, 2, 0, 2, 1, 2, 1, 0, 3, 1, 1]\n", 585 | "\n", 586 | "\n", 587 | "mat_pos0 = binary_cm_from_multiclass(y_true, y_predicted, positive_label=0)\n", 588 | "print('Positive Label 0:\\n', mat_pos0)\n", 589 | "\n", 590 | "fig, ax = plot_confusion_matrix(mat_pos0)\n", 591 | "ax.set_xticklabels(['', 'Pos Class (0)', 'Neg Class (Rest)'])\n", 592 | "ax.set_yticklabels(['', 'Pos Class (0)', 'Neg Class (Rest)']);" 593 | ] 594 | }, 595 | { 596 | "cell_type": "code", 597 | "execution_count": null, 598 | "metadata": {}, 599 | "outputs": [], 600 | "source": [ 601 | "# EXECUTE BUT DO NOT MODIFY THIS CELL\n", 602 | "\n", 603 | "mat_pos1 = binary_cm_from_multiclass(y_true, y_predicted, positive_label=1)\n", 604 | "print('\\n\\nPositive Label 1:\\n', mat_pos1)\n", 605 | "\n", 606 | "fig, ax = plot_confusion_matrix(mat_pos1)\n", 607 | "ax.set_xticklabels(['', 'Pos Class (1)', 'Neg Class (Rest)'])\n", 608 | "ax.set_yticklabels(['', 'Pos Class (1)', 'Neg Class (Rest)']);\n", 609 | "\n", 610 | "plt.show()" 611 | ] 612 | }, 613 | { 614 | "cell_type": "markdown", 615 | "metadata": {}, 616 | "source": [ 617 | "
\n", 618 | "


\n", 619 | "


\n", 620 | "


\n", 621 | "


\n", 622 | "


\n", 623 | "


\n", 624 | "
" 625 | ] 626 | }, 627 | { 628 | "cell_type": "markdown", 629 | "metadata": {}, 630 | "source": [ 631 | "## 3. [10 pts] Balanced Accuracy" 632 | ] 633 | }, 634 | { 635 | "cell_type": "markdown", 636 | "metadata": {}, 637 | "source": [ 638 | "Based on our discussion in class, implement a function that computes the balanced accuracy. You can implement the accuracy whatever way you like using Python and NumPy. Note that you can also re-use the binary confusion matrix code and the `binary_cm_from_multiclass` code if you like (but you don't have to).\n", 639 | "\n", 640 | "Below is a template that you can use that does not require code from the previous exercises (but you can write the function in a different way if you like as long as it gives the correct results)." 641 | ] 642 | }, 643 | { 644 | "cell_type": "code", 645 | "execution_count": null, 646 | "metadata": {}, 647 | "outputs": [], 648 | "source": [ 649 | "# MODIFY THIS CELL\n", 650 | "\n", 651 | "import numpy as np\n", 652 | "\n", 653 | "\n", 654 | "def balanced_accuracy(y_true, y_predicted):\n", 655 | " \n", 656 | " y_true_ary = np.array(y_true)\n", 657 | " y_predicted_ary = np.array(y_predicted)\n", 658 | " \n", 659 | " unique_labels = np.unique(np.concatenate((y_true_ary, y_predicted_ary)))\n", 660 | " class_accuracies = []\n", 661 | " for l in unique_labels:\n", 662 | " # YOUR CODE HERE\n", 663 | " # YOUR CODE HERE\n", 664 | " # YOUR CODE HERE\n", 665 | " class_accuracies.append(acc)\n", 666 | " return np.mean(class_accuracies)" 667 | ] 668 | }, 669 | { 670 | "cell_type": "code", 671 | "execution_count": null, 672 | "metadata": {}, 673 | "outputs": [], 674 | "source": [ 675 | "# EXECUTE BUT DO NOT MODIFY THIS CELL\n", 676 | "\n", 677 | "y_targ = [1, 1, 2, 1, 1, 2, 0, 3]\n", 678 | "y_pred = [0, 0, 2, 1, 1, 2, 1, 3]\n", 679 | " \n", 680 | "balanced_accuracy(y_targ, y_pred)" 681 | ] 682 | }, 683 | { 684 | "cell_type": "markdown", 685 | "metadata": {}, 686 | "source": [ 687 | "
\n", 688 | "


\n", 689 | "


\n", 690 | "


\n", 691 | "


\n", 692 | "


\n", 693 | "


\n", 694 | "
" 695 | ] 696 | }, 697 | { 698 | "cell_type": "markdown", 699 | "metadata": {}, 700 | "source": [ 701 | "## 4. Receiver Operater Characteristic (ROC)" 702 | ] 703 | }, 704 | { 705 | "cell_type": "markdown", 706 | "metadata": {}, 707 | "source": [ 708 | "### 4.1 [10 pts] Plotting a ROC Curve" 709 | ] 710 | }, 711 | { 712 | "cell_type": "markdown", 713 | "metadata": {}, 714 | "source": [ 715 | "In this exercise, you are asked to plot a ROC curve. You are given a 2D array of probability values (`y_probabilities`; see next code cells) where \n", 716 | "- a value in the first column refer to the probability that a given test example (each row is one test example) belongs to class 0\n", 717 | "- a value in the second column refer to the probability that a given test example belongs to class 1" 718 | ] 719 | }, 720 | { 721 | "cell_type": "code", 722 | "execution_count": null, 723 | "metadata": {}, 724 | "outputs": [], 725 | "source": [ 726 | "# EXECUTE BUT DO NOT MODIFY THIS CELL\n", 727 | "\n", 728 | "\n", 729 | "from mlxtend.data import iris_data\n", 730 | "from sklearn.model_selection import train_test_split\n", 731 | "from sklearn.linear_model import LogisticRegression\n", 732 | "\n", 733 | "\n", 734 | "X, y = iris_data()\n", 735 | "X, y = X[:100, [1]], y[:100]\n", 736 | "X_train, X_test, y_train, y_test = \\\n", 737 | " train_test_split(X, y, test_size=0.5, shuffle=True, random_state=0, stratify=y)\n", 738 | "\n", 739 | "model = LogisticRegression(solver='lbfgs', random_state=123)\n", 740 | "model.fit(X_train, y_train)\n", 741 | "\n", 742 | "y_probabilities = model.predict_proba(X_test)\n", 743 | "\n", 744 | "print(y_probabilities)" 745 | ] 746 | }, 747 | { 748 | "cell_type": "markdown", 749 | "metadata": {}, 750 | "source": [ 751 | "For this exercise, these scores are probabilities here, but scores can be obtained from an arbitrary classifier (ROC curves are not limited to logistic regression classifiers). For instance, in k-nearest neighbor classifiers, we can consider the fraction of the majority class labels and number of neighbors as the score. In decision tree classifiers, the score can be calculated as the ratio of the majority class labels and number of data points at a given node.\n", 752 | "\n", 753 | "(In case you are curious, 'lbfgs' stands for Limited-memory BFGS, which is an optimization algorithm in the family of quasi-Newton methods that approximates the Broyden–Fletcher–Goldfarb–Shanno; not important to know here though.) " 754 | ] 755 | }, 756 | { 757 | "cell_type": "markdown", 758 | "metadata": {}, 759 | "source": [ 760 | "**Note: You should only use Python base functions, NumPy, and matplotlib to get full points (do not use other external libraries)**" 761 | ] 762 | }, 763 | { 764 | "cell_type": "markdown", 765 | "metadata": {}, 766 | "source": [ 767 | "The `pos_label` argument is used to specify the positive label and the threshold. For instance, if we are given score\n", 768 | "0.8, this score refers to the \"probability\" of the positive label. Assuming that the positive label is 1, this refers to a 80% probability that the true class label is 1. \n", 769 | "\n", 770 | "- Note that in the `y_probabilities` array, the second column refers to the probabilities of class label 1.\n", 771 | "- The `plot_roc_curve` function should only receive a 1D array for `y_score`. E.g., \n", 772 | "\n", 773 | "if `y_probabilities` is \n", 774 | "\n", 775 | "```\n", 776 | "[[0.44001556 0.55998444]\n", 777 | " [0.69026364 0.30973636]\n", 778 | " [0.31814182 0.68185818]\n", 779 | " [0.56957726 0.43042274]\n", 780 | " [0.86339788 0.13660212]\n", 781 | " [0.56957726 0.43042274]\n", 782 | " [0.86339788 0.13660212]\n", 783 | " [0.44001556 0.55998444]\n", 784 | " [0.08899234 0.91100766]\n", 785 | " [0.50487831 0.49512169]\n", 786 | " [0.74306586 0.25693414]\n", 787 | "```\n", 788 | " \n", 789 | "The `y_score` array is expected to be \n", 790 | "\n", 791 | "a) `y_score = [0.5599..., 0.3097..., 0.6818..., 0.4304..., ...]` for `pos_label=1`\n", 792 | "\n", 793 | "and \n", 794 | "\n", 795 | "b) `y_score = [0.4400..., 0.6902..., 0.3181..., 0.5695..., ...]` for `pos_label=0`" 796 | ] 797 | }, 798 | { 799 | "cell_type": "code", 800 | "execution_count": null, 801 | "metadata": {}, 802 | "outputs": [], 803 | "source": [ 804 | "# MODIFY THIS CELL\n", 805 | "\n", 806 | "\n", 807 | "import matplotlib.pyplot as plt\n", 808 | "import numpy as np\n", 809 | "\n", 810 | "\n", 811 | "def plot_roc_curve(y_true, y_score, pos_label=1, num_thresholds=100):\n", 812 | "\n", 813 | " y_true_ary = np.array(y_true)\n", 814 | " y_score_ary = np.array(y_score)\n", 815 | " x_axis_values = []\n", 816 | " y_axis_values = []\n", 817 | " thresholds = np.linspace(0., 1., num_thresholds)\n", 818 | "\n", 819 | " num_positives = # YOUR CODE\n", 820 | " num_negatives = # YOUR CODE\n", 821 | "\n", 822 | " for i, thr in enumerate(thresholds):\n", 823 | " \n", 824 | " binarized_scores = np.where(y_score >= thr, pos_label, int(not pos_label))\n", 825 | " \n", 826 | " positive_predictions = # YOUR CODE\n", 827 | " num_true_positives = # YOUR CODE\n", 828 | " num_false_positives = # YOUR CODE\n", 829 | " \n", 830 | " x_axis_values.append(# YOUR CODE)\n", 831 | " y_axis_values.append(# YOUR CODE)\n", 832 | "\n", 833 | " plt.step(x_axis_values, y_axis_values, where='post')\n", 834 | " \n", 835 | " plt.xlim([0., 1.01])\n", 836 | " plt.ylim([0., 1.01])\n", 837 | " plt.ylabel('True Positive Rate')\n", 838 | " plt.xlabel('False Positive Rate')\n", 839 | " \n", 840 | " return None" 841 | ] 842 | }, 843 | { 844 | "cell_type": "code", 845 | "execution_count": null, 846 | "metadata": { 847 | "scrolled": true 848 | }, 849 | "outputs": [], 850 | "source": [ 851 | "# EXECUTE BUT DO NOT MODIFY THIS CELL\n", 852 | "\n", 853 | "plot_roc_curve(y_test, y_probabilities[:, 1], pos_label=1)\n", 854 | "plt.show()" 855 | ] 856 | }, 857 | { 858 | "cell_type": "code", 859 | "execution_count": null, 860 | "metadata": {}, 861 | "outputs": [], 862 | "source": [ 863 | "# EXECUTE BUT DO NOT MODIFY THIS CELL\n", 864 | "\n", 865 | "plot_roc_curve(y_test, y_probabilities[:, 0], pos_label=0)\n", 866 | "plt.show()" 867 | ] 868 | }, 869 | { 870 | "cell_type": "markdown", 871 | "metadata": {}, 872 | "source": [ 873 | "
\n", 874 | "


\n", 875 | "


\n", 876 | "


\n", 877 | "


\n", 878 | "


\n", 879 | "


\n", 880 | "
" 881 | ] 882 | }, 883 | { 884 | "cell_type": "markdown", 885 | "metadata": {}, 886 | "source": [ 887 | "### 4.2 [10 pts] Calculating the ROC AUC" 888 | ] 889 | }, 890 | { 891 | "cell_type": "markdown", 892 | "metadata": {}, 893 | "source": [ 894 | "In this exercise, you are asked to modify your previous `plot_roc_curve` function to compute the ROC area under the curve (ROC AUC). To compute the ROC AUC, you can use NumPy's `trapz` function for your convenience (https://docs.scipy.org/doc/numpy-1.15.1/reference/generated/numpy.trapz.html).\n", 895 | "\n", 896 | "- As before, you should only use basic Python functions, NumPy, and matplotlib to get full points for this exercise (do not use other external libraries)" 897 | ] 898 | }, 899 | { 900 | "cell_type": "code", 901 | "execution_count": null, 902 | "metadata": {}, 903 | "outputs": [], 904 | "source": [ 905 | "# MODIFY THIS CELL\n", 906 | "\n", 907 | "\n", 908 | "def plot_roc_curve_plus_auc(y_true, y_score, pos_label=1, num_thresholds=100):\n", 909 | "\n", 910 | " # INSERT YOUR CODE FROM THE PREVIOUS EXERCISE HERE\n", 911 | " # BUT MODIFY IT SUCH THAT IT ALSO RETURNS THE\n", 912 | " # ROC Area Under the Curve\n", 913 | " return roc_auc" 914 | ] 915 | }, 916 | { 917 | "cell_type": "markdown", 918 | "metadata": {}, 919 | "source": [ 920 | "1) Calculate the ROC AUC for the positive class label 0" 921 | ] 922 | }, 923 | { 924 | "cell_type": "code", 925 | "execution_count": null, 926 | "metadata": {}, 927 | "outputs": [], 928 | "source": [ 929 | "# DON'T MODIFY BUT EXECUTE THIS CELL TO SHOW YOUR SOLUTION\n", 930 | "\n", 931 | "auc = plot_roc_curve_plus_auc(y_test, y_probabilities[:, 0], pos_label=0)\n", 932 | "print('ROC AUC: %.4f' % auc)" 933 | ] 934 | }, 935 | { 936 | "cell_type": "markdown", 937 | "metadata": {}, 938 | "source": [ 939 | "2) Calculate the ROC AUC for the positive class label 1" 940 | ] 941 | }, 942 | { 943 | "cell_type": "code", 944 | "execution_count": null, 945 | "metadata": {}, 946 | "outputs": [], 947 | "source": [ 948 | "# DON'T MODIFY BUT EXECUTE THIS CELL TO SHOW YOUR SOLUTION\n", 949 | "\n", 950 | "auc = plot_roc_curve_plus_auc(y_test, y_probabilities[:, 1], pos_label=1)\n", 951 | "print('ROC AUC: %.4f' % auc)" 952 | ] 953 | }, 954 | { 955 | "cell_type": "markdown", 956 | "metadata": {}, 957 | "source": [ 958 | "
\n", 959 | "


\n", 960 | "


\n", 961 | "


\n", 962 | "


\n", 963 | "


\n", 964 | "


\n", 965 | "
" 966 | ] 967 | }, 968 | { 969 | "cell_type": "markdown", 970 | "metadata": {}, 971 | "source": [ 972 | "## 5. Feature Importance" 973 | ] 974 | }, 975 | { 976 | "cell_type": "markdown", 977 | "metadata": {}, 978 | "source": [ 979 | "### [10 pts] 5.1 Drop-Column Feature Importance" 980 | ] 981 | }, 982 | { 983 | "cell_type": "markdown", 984 | "metadata": {}, 985 | "source": [ 986 | "In this exercise, you are asked to implement the \"drop-column feature importance\" method discussed in class, to measure the importance of individual features present in a dataset.\n", 987 | "\n", 988 | "\n", 989 | "- You will be using regular accuracy measure as performance metric\n", 990 | "- Use 5 fold cross-validation to compute the accuracies\n", 991 | "\n", 992 | "The dataset you will be using for this exercise is the so-called \"Wine\" dataset. \n", 993 | "\n", 994 | "The Wine dataset is another open-source dataset that is available from the UCI machine learning repository (https://archive.ics.uci.edu/ml/datasets/Wine); it consists of 178 wine samples with 13 features describing their different chemical properties.\n", 995 | "\n", 996 | "The 13 different features in the Wine dataset, describing the chemical properties of the 178 wine samples, are listed in the following table that you will see after executing the next code cell.\n" 997 | ] 998 | }, 999 | { 1000 | "cell_type": "code", 1001 | "execution_count": null, 1002 | "metadata": {}, 1003 | "outputs": [], 1004 | "source": [ 1005 | "# EXECUTE BUT DO NOT MODIFY THIS CELL\n", 1006 | "\n", 1007 | "\n", 1008 | "import pandas as pd\n", 1009 | "\n", 1010 | "df_wine = pd.read_csv('data/wine.data',\n", 1011 | " header=None)\n", 1012 | "\n", 1013 | "df_wine.columns = ['Class label', 'Alcohol', 'Malic acid', 'Ash',\n", 1014 | " 'Alcalinity of ash', 'Magnesium', 'Total phenols',\n", 1015 | " 'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins',\n", 1016 | " 'Color intensity', 'Hue',\n", 1017 | " 'OD280/OD315 of diluted wines', 'Proline']\n", 1018 | "\n", 1019 | "df_wine.head()" 1020 | ] 1021 | }, 1022 | { 1023 | "cell_type": "markdown", 1024 | "metadata": {}, 1025 | "source": [ 1026 | "The samples belong to one of three different classes, 1, 2, and 3, which refer to the three different types of grape grown in the same region in Italy but derived from different wine cultivars, as described in the dataset summary (https://archive. ics.uci.edu/ml/machine-learning-databases/wine/wine.names)." 1027 | ] 1028 | }, 1029 | { 1030 | "cell_type": "code", 1031 | "execution_count": null, 1032 | "metadata": {}, 1033 | "outputs": [], 1034 | "source": [ 1035 | "# EXECUTE BUT DO NOT MODIFY THIS CELL\n", 1036 | "\n", 1037 | "\n", 1038 | "from sklearn.model_selection import train_test_split\n", 1039 | "\n", 1040 | "X, y = df_wine.iloc[:, 1:].values, df_wine.iloc[:, 0].values\n", 1041 | "\n", 1042 | "X_train, X_test, y_train, y_test = \\\n", 1043 | " train_test_split(X, y, test_size=0.3, \n", 1044 | " stratify=y,\n", 1045 | " random_state=0)" 1046 | ] 1047 | }, 1048 | { 1049 | "cell_type": "markdown", 1050 | "metadata": {}, 1051 | "source": [ 1052 | "Now the task is to implement the `feature_importance_dropcolumn` function to compute the feature importance according the Drop-Column method discussed in class. Here, use the `cross_val_score` function from scikit-learn to compute the acccuracy as the average accuracy from 5-fold cross-validation." 1053 | ] 1054 | }, 1055 | { 1056 | "cell_type": "code", 1057 | "execution_count": null, 1058 | "metadata": {}, 1059 | "outputs": [], 1060 | "source": [ 1061 | "# MODIFY THIS CELL\n", 1062 | "\n", 1063 | "\n", 1064 | "import numpy as np\n", 1065 | "from sklearn.model_selection import cross_val_score\n", 1066 | "\n", 1067 | "\n", 1068 | "def feature_importance_dropcolumn(estimator, X, y, cv=5):\n", 1069 | "\n", 1070 | " base_accuracy = # YOUR CODE\n", 1071 | " column_indices = np.arange(X.shape[1]).astype(int)\n", 1072 | " drop_accuracies = np.zeros(column_indices.shape[0])\n", 1073 | " \n", 1074 | " for idx in column_indices:\n", 1075 | " mask = np.ones(column_indices.shape[0]).astype(bool)\n", 1076 | " mask[idx] = False\n", 1077 | " drop_accuracy = # YOUR CODE\n", 1078 | " drop_accuracies[idx] = # YOUR CODE\n", 1079 | " \n", 1080 | " return drop_accuracies" 1081 | ] 1082 | }, 1083 | { 1084 | "cell_type": "markdown", 1085 | "metadata": {}, 1086 | "source": [ 1087 | "Next, apply the `feature_importance_dropcolumn` function to the Wine training dataset (`X_train`, `y_train`) on a `KNeighborsClassifier` (you should use the `make_pipeline` function to create an estimator where the features are scaled to z-scores via the `StandardScaler`, since `KNeighborsClassifier` is very sensitive to feature scales).\n", 1088 | "\n", 1089 | "- You should use a `KNeighborsClassifier` with 5 nearest neighbors." 1090 | ] 1091 | }, 1092 | { 1093 | "cell_type": "code", 1094 | "execution_count": null, 1095 | "metadata": {}, 1096 | "outputs": [], 1097 | "source": [ 1098 | "# MODIFY THIS CELL\n", 1099 | "\n", 1100 | "from sklearn.pipeline import make_pipeline\n", 1101 | "from sklearn.preprocessing import StandardScaler\n", 1102 | "from sklearn.neighbors import KNeighborsClassifier\n", 1103 | "\n", 1104 | "\n", 1105 | "\n", 1106 | "pipe = make_pipeline(\n", 1107 | " # YOUR CODE\n", 1108 | " # YOUE CODE\n", 1109 | ")\n", 1110 | "\n", 1111 | "\n", 1112 | "feature_importance_dropcolumn(# YOUR CODE)" 1113 | ] 1114 | }, 1115 | { 1116 | "cell_type": "markdown", 1117 | "metadata": {}, 1118 | "source": [ 1119 | "
\n", 1120 | "


\n", 1121 | "


\n", 1122 | "


\n", 1123 | "


\n", 1124 | "


\n", 1125 | "


\n", 1126 | "
" 1127 | ] 1128 | }, 1129 | { 1130 | "cell_type": "markdown", 1131 | "metadata": {}, 1132 | "source": [ 1133 | "### [10 pts] 5.2 Random Forest Feature Importance" 1134 | ] 1135 | }, 1136 | { 1137 | "cell_type": "markdown", 1138 | "metadata": {}, 1139 | "source": [ 1140 | "First, use a `RandomForestClassifier` in your `feature_importance_dropcolumn` from the previous exercise, 5.1. Use a random forest \n", 1141 | "\n", 1142 | "- with 200 estimators and \n", 1143 | "- random seed 0. " 1144 | ] 1145 | }, 1146 | { 1147 | "cell_type": "code", 1148 | "execution_count": null, 1149 | "metadata": {}, 1150 | "outputs": [], 1151 | "source": [ 1152 | "# MODIFY THIS CELL\n", 1153 | "\n", 1154 | "\n", 1155 | "from sklearn.ensemble import RandomForestClassifier\n", 1156 | "\n", 1157 | "\n", 1158 | "drop_importances = feature_importance_dropcolumn(\n", 1159 | " # YOUR CODE]\n", 1160 | " X=X_train, \n", 1161 | " y=y_train,\n", 1162 | " cv=5)\n", 1163 | "\n", 1164 | "\n", 1165 | "print('Drop Importance from RF:', drop_importances)" 1166 | ] 1167 | }, 1168 | { 1169 | "cell_type": "markdown", 1170 | "metadata": {}, 1171 | "source": [ 1172 | "Next, compute the ranking among the features as determined by the outputs of the previous code cell, saved under `drop_importances`. You may use `np.argsort` in your computation, to compute the ranking, where the highest number should correspond to the most important feature." 1173 | ] 1174 | }, 1175 | { 1176 | "cell_type": "code", 1177 | "execution_count": null, 1178 | "metadata": {}, 1179 | "outputs": [], 1180 | "source": [ 1181 | "# MODIFY THIS CELL\n", 1182 | "\n", 1183 | "\n", 1184 | "# YOUR CODE" 1185 | ] 1186 | }, 1187 | { 1188 | "cell_type": "markdown", 1189 | "metadata": {}, 1190 | "source": [ 1191 | "Which are the 3 most important features? You can either write the feature indices below that correspond to the most important features or write out the full column names (you can see the column names in the pandas `DataFrame` in 5.1)." 1192 | ] 1193 | }, 1194 | { 1195 | "cell_type": "markdown", 1196 | "metadata": {}, 1197 | "source": [ 1198 | "!!! **EDIT THIS CELL TO ENTER YOUR ANSWER** !!!" 1199 | ] 1200 | }, 1201 | { 1202 | "cell_type": "markdown", 1203 | "metadata": {}, 1204 | "source": [ 1205 | "
\n", 1206 | "


\n", 1207 | "


\n", 1208 | "


\n", 1209 | "


\n", 1210 | "


\n", 1211 | "


\n", 1212 | "
" 1213 | ] 1214 | }, 1215 | { 1216 | "cell_type": "markdown", 1217 | "metadata": {}, 1218 | "source": [ 1219 | "Next, obtain the feature importance from the random forest classifier directly and compute the ranking as before." 1220 | ] 1221 | }, 1222 | { 1223 | "cell_type": "code", 1224 | "execution_count": null, 1225 | "metadata": {}, 1226 | "outputs": [], 1227 | "source": [ 1228 | "# MODIFY THIS CELL\n", 1229 | "\n", 1230 | "forest = RandomForestClassifier(n_estimators=100, random_state=0)\n", 1231 | "forest.fit(X_train, y_train)\n", 1232 | "\n", 1233 | "print('Random Forest Feature Importance:\\n', # YOUR CODE)" 1234 | ] 1235 | }, 1236 | { 1237 | "cell_type": "code", 1238 | "execution_count": null, 1239 | "metadata": {}, 1240 | "outputs": [], 1241 | "source": [ 1242 | "# MODIFY THIS CELL\n", 1243 | "\n", 1244 | "\n", 1245 | "# YOUR CODE TO RANK THE FEATURES" 1246 | ] 1247 | }, 1248 | { 1249 | "cell_type": "markdown", 1250 | "metadata": {}, 1251 | "source": [ 1252 | "Which are the 3 most important features now? You can either write the feature indices below that correspond to the most important features or write out the full column names (you can see the column names in the pandas `DataFrame` in 5.1)." 1253 | ] 1254 | }, 1255 | { 1256 | "cell_type": "markdown", 1257 | "metadata": {}, 1258 | "source": [ 1259 | "!!! **EDIT THIS CELL TO ENTER YOUR ANSWER** !!!" 1260 | ] 1261 | }, 1262 | { 1263 | "cell_type": "markdown", 1264 | "metadata": {}, 1265 | "source": [ 1266 | "
\n", 1267 | "


\n", 1268 | "


\n", 1269 | "


\n", 1270 | "


\n", 1271 | "


\n", 1272 | "


\n", 1273 | "
" 1274 | ] 1275 | }, 1276 | { 1277 | "cell_type": "markdown", 1278 | "metadata": {}, 1279 | "source": [ 1280 | "Finally, use the `feature_importance_permutation` function from mlxtend (http://rasbt.github.io/mlxtend/user_guide/evaluate/feature_importance_permutation/) to compute the most important features. Inside `the feature_importance_permutation` function,\n", 1281 | "\n", 1282 | "- use a random seed of 0\n", 1283 | "- use 50 permutation rounds\n", 1284 | "\n", 1285 | "then print the importance values." 1286 | ] 1287 | }, 1288 | { 1289 | "cell_type": "code", 1290 | "execution_count": null, 1291 | "metadata": {}, 1292 | "outputs": [], 1293 | "source": [ 1294 | "# MODIFY THIS CELL\n", 1295 | "\n", 1296 | "\n", 1297 | "from mlxtend.evaluate import feature_importance_permutation\n", 1298 | "\n", 1299 | "\n", 1300 | "forest = RandomForestClassifier(n_estimators=100,\n", 1301 | " random_state=0)\n", 1302 | "\n", 1303 | "forest.fit(X_train, y_train)\n", 1304 | "\n", 1305 | "# YOUR CODE" 1306 | ] 1307 | }, 1308 | { 1309 | "cell_type": "code", 1310 | "execution_count": null, 1311 | "metadata": {}, 1312 | "outputs": [], 1313 | "source": [ 1314 | "# MODIFY THIS CELL\n", 1315 | "\n", 1316 | "\n", 1317 | "# YOUR CODE TO RANK THE FEATURES" 1318 | ] 1319 | }, 1320 | { 1321 | "cell_type": "markdown", 1322 | "metadata": {}, 1323 | "source": [ 1324 | "Which are the 3 most important features now? You can either write the feature indices below that correspond to the most important features or write out the full column names (you can see the column names in the pandas `DataFrame` in 5.1)." 1325 | ] 1326 | }, 1327 | { 1328 | "cell_type": "markdown", 1329 | "metadata": {}, 1330 | "source": [ 1331 | "!!! **EDIT THIS CELL TO ENTER YOUR ANSWER** !!!" 1332 | ] 1333 | }, 1334 | { 1335 | "cell_type": "markdown", 1336 | "metadata": {}, 1337 | "source": [ 1338 | "
\n", 1339 | "


\n", 1340 | "


\n", 1341 | "


\n", 1342 | "


\n", 1343 | "


\n", 1344 | "


\n", 1345 | "
" 1346 | ] 1347 | }, 1348 | { 1349 | "cell_type": "markdown", 1350 | "metadata": {}, 1351 | "source": [ 1352 | "### [10 pts] 5.3 Creating your Own Feature Selection Transformer Class" 1353 | ] 1354 | }, 1355 | { 1356 | "cell_type": "markdown", 1357 | "metadata": {}, 1358 | "source": [ 1359 | "This section will help you understand how you can implement your own feature selection method in a way that is compatible with scikit-learn.\n", 1360 | "\n", 1361 | "The following code (`ColumnSelector`) implements a feature selector that works similarly to the feature selctors implemented in scikit-learn. However, this `ColumnSelector` does not do anything automatically." 1362 | ] 1363 | }, 1364 | { 1365 | "cell_type": "code", 1366 | "execution_count": null, 1367 | "metadata": {}, 1368 | "outputs": [], 1369 | "source": [ 1370 | "# EXECUTE BUT DO NOT EDIT THIS CELL\n", 1371 | "\n", 1372 | "from sklearn.base import BaseEstimator\n", 1373 | "import numpy as np\n", 1374 | "\n", 1375 | "\n", 1376 | "class ColumnSelector(BaseEstimator):\n", 1377 | "\n", 1378 | " def __init__(self, cols=None):\n", 1379 | " self.cols = cols\n", 1380 | "\n", 1381 | " def fit_transform(self, X, y=None):\n", 1382 | " return self.transform(X=X, y=y)\n", 1383 | "\n", 1384 | " def transform(self, X, y=None):\n", 1385 | " feature_subset = X[:, self.cols]\n", 1386 | " if len(feature_subset.shape) == 1:\n", 1387 | " feature_subset = feature_subset[:, np.newaxis]\n", 1388 | " return feature_subset\n", 1389 | "\n", 1390 | " def fit(self, X, y=None):\n", 1391 | " return self" 1392 | ] 1393 | }, 1394 | { 1395 | "cell_type": "markdown", 1396 | "metadata": {}, 1397 | "source": [ 1398 | "As the name implies, we `ColumnSelector` selects specific columns that we as the user need to specify. For example, consider the Wine dataset from earlier:" 1399 | ] 1400 | }, 1401 | { 1402 | "cell_type": "code", 1403 | "execution_count": null, 1404 | "metadata": {}, 1405 | "outputs": [], 1406 | "source": [ 1407 | "# EXECUTE BUT DO NOT EDIT THIS CELL\n", 1408 | "\n", 1409 | "import pandas as pd\n", 1410 | "\n", 1411 | "df_wine = pd.read_csv('data/wine.data',\n", 1412 | " header=None)\n", 1413 | "\n", 1414 | "df_wine.columns = ['Class label', 'Alcohol', 'Malic acid', 'Ash',\n", 1415 | " 'Alcalinity of ash', 'Magnesium', 'Total phenols',\n", 1416 | " 'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins',\n", 1417 | " 'Color intensity', 'Hue',\n", 1418 | " 'OD280/OD315 of diluted wines', 'Proline']\n", 1419 | "\n", 1420 | "df_wine.head()" 1421 | ] 1422 | }, 1423 | { 1424 | "cell_type": "code", 1425 | "execution_count": null, 1426 | "metadata": {}, 1427 | "outputs": [], 1428 | "source": [ 1429 | "# EXECUTE BUT DO NOT EDIT THIS CELL\n", 1430 | "\n", 1431 | "from sklearn.model_selection import train_test_split\n", 1432 | "\n", 1433 | "X, y = df_wine.iloc[:, 1:].values, df_wine.iloc[:, 0].values\n", 1434 | "\n", 1435 | "X_train, X_test, y_train, y_test = \\\n", 1436 | " train_test_split(X, y, test_size=0.3, \n", 1437 | " stratify=y,\n", 1438 | " random_state=0)" 1439 | ] 1440 | }, 1441 | { 1442 | "cell_type": "markdown", 1443 | "metadata": {}, 1444 | "source": [ 1445 | "Via the `ColumnSelector`, we can select select specific columns from the dataset. E.g., to select the 1st, 6th, and 9th column, and 12th column, we can initialize the `ColumnSelector` with the argument `cols=[0, 5, 8, 11]` and use the transform method as shown below:" 1446 | ] 1447 | }, 1448 | { 1449 | "cell_type": "code", 1450 | "execution_count": null, 1451 | "metadata": {}, 1452 | "outputs": [], 1453 | "source": [ 1454 | "# EXECUTE BUT DO NOT EDIT THIS CELL\n", 1455 | "\n", 1456 | "col_sele = ColumnSelector(cols=[0, 5, 8, 11])\n", 1457 | "reduced_subset = col_sele.transform(X_train)\n", 1458 | "\n", 1459 | "print('Original feature set size:', X_train.shape)\n", 1460 | "print('Selected feature set size:', reduced_subset.shape)" 1461 | ] 1462 | }, 1463 | { 1464 | "cell_type": "markdown", 1465 | "metadata": {}, 1466 | "source": [ 1467 | "Your task now is to use the `feature_importances_` attribute from a fitted random forest model inside a custom feature selector. Using this feature selector, you should be able to select features as follows:\n", 1468 | "\n", 1469 | "\n", 1470 | "```python\n", 1471 | "\n", 1472 | "forest = RandomForestClassifier(n_estimators=100, random_state=123)\n", 1473 | "\n", 1474 | "selector = ImportanceSelector(num_features=3, random_forest_estimator=forest)\n", 1475 | "selector.fit(X_train, y_train)\n", 1476 | "reduced_train_features = selector.transform(X_train, y_train)\n", 1477 | "```\n", 1478 | "\n", 1479 | "- If `num_features=3` as shown above, this means that we are interested to select the top 3 most important features from a dataset based on the random forest feature importance values.\n", 1480 | "\n", 1481 | "\n", 1482 | "- Actually, while it might be more interesting to implement a feature selctor based on the column-drop performance (which would then be somewhat related to sequential feature selection), we use the feature importance values from a `RandomForest`'s `feature_importances_` attribute for simplicity here, to allow you to implement this method in case your `feature_importance_dropcolumn` function does not work correctly." 1483 | ] 1484 | }, 1485 | { 1486 | "cell_type": "code", 1487 | "execution_count": null, 1488 | "metadata": {}, 1489 | "outputs": [], 1490 | "source": [ 1491 | "# MODIFY THIS CELL\n", 1492 | "\n", 1493 | "from sklearn.base import BaseEstimator\n", 1494 | "import numpy as np\n", 1495 | "\n", 1496 | "\n", 1497 | "class ImportanceSelector(BaseEstimator):\n", 1498 | "\n", 1499 | " def __init__(self, num_features, random_forest_estimator):\n", 1500 | " self.num_features = num_features\n", 1501 | " self.forest = random_forest_estimator\n", 1502 | "\n", 1503 | " def transform(self, X, y=None):\n", 1504 | " \n", 1505 | " # Feature by increasing feature importance:\n", 1506 | " features_by_importance = # YOUR CODE\n", 1507 | " top_k_feature_indices = # YOUR CODE\n", 1508 | " \n", 1509 | " feature_subset = X[:, top_k_feature_indices]\n", 1510 | " if len(feature_subset.shape) == 1:\n", 1511 | " feature_subset = feature_subset[:, np.newaxis]\n", 1512 | " return feature_subset\n", 1513 | "\n", 1514 | " def fit(self, X, y=None):\n", 1515 | " self.forest.fit(X, y)\n", 1516 | " return self" 1517 | ] 1518 | }, 1519 | { 1520 | "cell_type": "markdown", 1521 | "metadata": {}, 1522 | "source": [ 1523 | "Now, use the `ImportanceSelector` to select the 3 most important features in the dataset:" 1524 | ] 1525 | }, 1526 | { 1527 | "cell_type": "code", 1528 | "execution_count": null, 1529 | "metadata": {}, 1530 | "outputs": [], 1531 | "source": [ 1532 | "# MODIFY THIS CELL\n", 1533 | "\n", 1534 | "from sklearn.ensemble import RandomForestClassifier\n", 1535 | "\n", 1536 | "\n", 1537 | "forest = RandomForestClassifier(n_estimators=100, random_state=123)\n", 1538 | "\n", 1539 | "selector = # YOUR CODE\n", 1540 | "# YOUR CODE\n", 1541 | "reduced_train_features = # YOUR CODE\n", 1542 | "\n", 1543 | "print('Original feature set size:', X_train.shape)\n", 1544 | "print('Selected feature set size:', reduced_train_features.shape)\n", 1545 | "print('First 5 rows:\\n', reduced_train_features[:5])" 1546 | ] 1547 | }, 1548 | { 1549 | "cell_type": "markdown", 1550 | "metadata": {}, 1551 | "source": [ 1552 | "
\n", 1553 | "


\n", 1554 | "


\n", 1555 | "


\n", 1556 | "


\n", 1557 | "


\n", 1558 | "


\n", 1559 | "
" 1560 | ] 1561 | }, 1562 | { 1563 | "cell_type": "markdown", 1564 | "metadata": {}, 1565 | "source": [ 1566 | "## (5 pts) Bonus Exercise: Evaluating a KNN Classifier on Different Feature Subsets" 1567 | ] 1568 | }, 1569 | { 1570 | "cell_type": "markdown", 1571 | "metadata": {}, 1572 | "source": [ 1573 | "In this *Bonus Exercise*, your task is to use a scikit-learn pipeline to fit a KNN classifier based on different 2-feature combinations and different values of *k* (number of neighbors) via grid search. More specifically,\n", 1574 | "\n", 1575 | "1. Create a scikit-learn pipeline that consists of a `StandardScaler`, a `ColumnSelector`, and a `KNeighborsClassifeir` (think about the right way to order these elements in the pipeline);\n", 1576 | "2. Using this pipeline, find the best value for `k` in the KNN classifier as well as the best feature combination (restricted to 2-feature subsets for simplicity) using `GridSearchCV`;\n", 1577 | "3. Fit the best model determined via grid search on the whole training set and evaluate the performance on the test set." 1578 | ] 1579 | }, 1580 | { 1581 | "cell_type": "code", 1582 | "execution_count": null, 1583 | "metadata": {}, 1584 | "outputs": [], 1585 | "source": [ 1586 | "# EXECUTE BUT DO NOT EDIT\n", 1587 | "\n", 1588 | "\n", 1589 | "import pandas as pd\n", 1590 | "\n", 1591 | "\n", 1592 | "df_wine = pd.read_csv('data/wine.data',\n", 1593 | " header=None)\n", 1594 | "\n", 1595 | "df_wine.columns = ['Class label', 'Alcohol', 'Malic acid', 'Ash',\n", 1596 | " 'Alcalinity of ash', 'Magnesium', 'Total phenols',\n", 1597 | " 'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins',\n", 1598 | " 'Color intensity', 'Hue',\n", 1599 | " 'OD280/OD315 of diluted wines', 'Proline']\n", 1600 | "\n", 1601 | "df_wine.head()" 1602 | ] 1603 | }, 1604 | { 1605 | "cell_type": "code", 1606 | "execution_count": null, 1607 | "metadata": {}, 1608 | "outputs": [], 1609 | "source": [ 1610 | "# EXECUTE BUT DO NOT EDIT\n", 1611 | "\n", 1612 | "from sklearn.model_selection import train_test_split\n", 1613 | "\n", 1614 | "\n", 1615 | "X, y = df_wine.iloc[:, 1:].values, df_wine.iloc[:, 0].values\n", 1616 | "\n", 1617 | "X_train, X_test, y_train, y_test = \\\n", 1618 | " train_test_split(X, y, test_size=0.3, \n", 1619 | " stratify=y,\n", 1620 | " random_state=0)" 1621 | ] 1622 | }, 1623 | { 1624 | "cell_type": "code", 1625 | "execution_count": null, 1626 | "metadata": {}, 1627 | "outputs": [], 1628 | "source": [ 1629 | "# EXECUTE BUT DO NOT EDIT THIS CELL\n", 1630 | "\n", 1631 | "from sklearn.base import BaseEstimator\n", 1632 | "import numpy as np\n", 1633 | "\n", 1634 | "\n", 1635 | "class ColumnSelector(BaseEstimator):\n", 1636 | "\n", 1637 | " def __init__(self, cols=None):\n", 1638 | " self.cols = cols\n", 1639 | "\n", 1640 | " def fit_transform(self, X, y=None):\n", 1641 | " return self.transform(X=X, y=y)\n", 1642 | "\n", 1643 | " def transform(self, X, y=None):\n", 1644 | " feature_subset = X[:, self.cols]\n", 1645 | " if len(feature_subset.shape) == 1:\n", 1646 | " feature_subset = feature_subset[:, np.newaxis]\n", 1647 | " return feature_subset\n", 1648 | "\n", 1649 | " def fit(self, X, y=None):\n", 1650 | " return self" 1651 | ] 1652 | }, 1653 | { 1654 | "cell_type": "markdown", 1655 | "metadata": {}, 1656 | "source": [ 1657 | "Modify the following code cell to create a list of all possible 2-feature combinations:" 1658 | ] 1659 | }, 1660 | { 1661 | "cell_type": "code", 1662 | "execution_count": null, 1663 | "metadata": {}, 1664 | "outputs": [], 1665 | "source": [ 1666 | "# MODIFY THIS CELL\n", 1667 | "\n", 1668 | "import itertools\n", 1669 | "\n", 1670 | "\n", 1671 | "all_combin_2 = list(itertools.combinations( # YOUR CODE)\n", 1672 | "\n", 1673 | "\n", 1674 | "print('Number of all possible 2-feature combinations:', len(all_combin_2))" 1675 | ] 1676 | }, 1677 | { 1678 | "cell_type": "markdown", 1679 | "metadata": {}, 1680 | "source": [ 1681 | "Modify the following code cell to create a `pipeline` (as explained at the beginning of this section), and use the given `param_grid` to fit the `GridSearchCV` to obtain the best parameters settings and a classifier fit to `X_train` and `y_train` based on these best hyperparameter values.\n", 1682 | "\n", 1683 | "(Note that the code may take 10-30 seconds to execute.)" 1684 | ] 1685 | }, 1686 | { 1687 | "cell_type": "code", 1688 | "execution_count": null, 1689 | "metadata": {}, 1690 | "outputs": [], 1691 | "source": [ 1692 | "# MODIFY THIS CELL\n", 1693 | "\n", 1694 | "from sklearn.pipeline import make_pipeline\n", 1695 | "from sklearn.preprocessing import StandardScaler\n", 1696 | "from sklearn.neighbors import KNeighborsClassifier\n", 1697 | "from sklearn.model_selection import GridSearchCV\n", 1698 | "\n", 1699 | "\n", 1700 | "pipe = make_pipeline(\n", 1701 | "# YOUR CODE\n", 1702 | "# YOUR CODE\n", 1703 | "# YOUR CODE\n", 1704 | ")\n", 1705 | "\n", 1706 | "\n", 1707 | "param_grid = {'kneighborsclassifier__n_neighbors': list(range(1, 8)),\n", 1708 | " 'columnselector__cols': all_combin_2}\n", 1709 | "\n", 1710 | "gsearch = GridSearchCV(pipe,\n", 1711 | " param_grid=param_grid,\n", 1712 | " refit=True,\n", 1713 | " iid=False,\n", 1714 | " cv=5)\n", 1715 | "\n", 1716 | "gsearch.fit(X_train, y_train)" 1717 | ] 1718 | }, 1719 | { 1720 | "cell_type": "code", 1721 | "execution_count": null, 1722 | "metadata": {}, 1723 | "outputs": [], 1724 | "source": [ 1725 | "# EXECUTE BUT DO NOT EDIT\n", 1726 | "\n", 1727 | "\n", 1728 | "print(gsearch.best_params_)" 1729 | ] 1730 | }, 1731 | { 1732 | "cell_type": "markdown", 1733 | "metadata": {}, 1734 | "source": [ 1735 | "Based on the best combination of a 2-feature subset and the number of `n_neigbors` your model should be fit the the training dataset now. Use the fitted model and compute its classification accuracy on the test set (`X_test`, `y_test`)." 1736 | ] 1737 | }, 1738 | { 1739 | "cell_type": "code", 1740 | "execution_count": null, 1741 | "metadata": {}, 1742 | "outputs": [], 1743 | "source": [ 1744 | "# MODIFY THIS CELL\n", 1745 | "\n", 1746 | "# YOUR CODE TO COMPUTE THE TEST ACCURACY" 1747 | ] 1748 | } 1749 | ], 1750 | "metadata": { 1751 | "kernelspec": { 1752 | "display_name": "Python 3", 1753 | "language": "python", 1754 | "name": "python3" 1755 | }, 1756 | "language_info": { 1757 | "codemirror_mode": { 1758 | "name": "ipython", 1759 | "version": 3 1760 | }, 1761 | "file_extension": ".py", 1762 | "mimetype": "text/x-python", 1763 | "name": "python", 1764 | "nbconvert_exporter": "python", 1765 | "pygments_lexer": "ipython3", 1766 | "version": "3.6.5" 1767 | } 1768 | }, 1769 | "nbformat": 4, 1770 | "nbformat_minor": 2 1771 | } 1772 | --------------------------------------------------------------------------------