├── img ├── bv.png ├── forest.png ├── kfold.jpg ├── bagging.jpg ├── recall.jpeg ├── classifiers.png ├── motivation.png ├── numpy-logo.png ├── pandas-logo.png ├── precision.png ├── scipy-logo.png ├── tree-simple.png ├── highVariance.jpg ├── ipython-logo.jpg ├── cross-validation.png ├── decisiontree_1.png ├── decisiontree_2.png ├── learningcurve.jpeg ├── matplotlib-logo.png ├── modelComplexity.jpg ├── trainingerror.jpeg ├── tree-partition.png └── scikit-learn-logo.png ├── readme.md ├── .gitignore └── tutorial.py /img/bv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/omoju/tutorial-bagging-models/HEAD/img/bv.png -------------------------------------------------------------------------------- /img/forest.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/omoju/tutorial-bagging-models/HEAD/img/forest.png -------------------------------------------------------------------------------- /img/kfold.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/omoju/tutorial-bagging-models/HEAD/img/kfold.jpg -------------------------------------------------------------------------------- /img/bagging.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/omoju/tutorial-bagging-models/HEAD/img/bagging.jpg -------------------------------------------------------------------------------- /img/recall.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/omoju/tutorial-bagging-models/HEAD/img/recall.jpeg -------------------------------------------------------------------------------- /img/classifiers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/omoju/tutorial-bagging-models/HEAD/img/classifiers.png -------------------------------------------------------------------------------- /img/motivation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/omoju/tutorial-bagging-models/HEAD/img/motivation.png -------------------------------------------------------------------------------- /img/numpy-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/omoju/tutorial-bagging-models/HEAD/img/numpy-logo.png -------------------------------------------------------------------------------- /img/pandas-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/omoju/tutorial-bagging-models/HEAD/img/pandas-logo.png -------------------------------------------------------------------------------- /img/precision.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/omoju/tutorial-bagging-models/HEAD/img/precision.png -------------------------------------------------------------------------------- /img/scipy-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/omoju/tutorial-bagging-models/HEAD/img/scipy-logo.png -------------------------------------------------------------------------------- /img/tree-simple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/omoju/tutorial-bagging-models/HEAD/img/tree-simple.png -------------------------------------------------------------------------------- /img/highVariance.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/omoju/tutorial-bagging-models/HEAD/img/highVariance.jpg -------------------------------------------------------------------------------- /img/ipython-logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/omoju/tutorial-bagging-models/HEAD/img/ipython-logo.jpg -------------------------------------------------------------------------------- /img/cross-validation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/omoju/tutorial-bagging-models/HEAD/img/cross-validation.png -------------------------------------------------------------------------------- /img/decisiontree_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/omoju/tutorial-bagging-models/HEAD/img/decisiontree_1.png -------------------------------------------------------------------------------- /img/decisiontree_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/omoju/tutorial-bagging-models/HEAD/img/decisiontree_2.png -------------------------------------------------------------------------------- /img/learningcurve.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/omoju/tutorial-bagging-models/HEAD/img/learningcurve.jpeg -------------------------------------------------------------------------------- /img/matplotlib-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/omoju/tutorial-bagging-models/HEAD/img/matplotlib-logo.png -------------------------------------------------------------------------------- /img/modelComplexity.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/omoju/tutorial-bagging-models/HEAD/img/modelComplexity.jpg -------------------------------------------------------------------------------- /img/trainingerror.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/omoju/tutorial-bagging-models/HEAD/img/trainingerror.jpeg -------------------------------------------------------------------------------- /img/tree-partition.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/omoju/tutorial-bagging-models/HEAD/img/tree-partition.png -------------------------------------------------------------------------------- /img/scikit-learn-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/omoju/tutorial-bagging-models/HEAD/img/scikit-learn-logo.png -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Bagging Models with Scikit-Learn tutorial 2 | 3 | 1. Tutorial on bagging models using SciKit-Learn (beginner level). 4 | 5 | Materials modified from [glouppe/tutorials-scikit-learn](https://github.com/glouppe/tutorials-scikit-learn)|BSD 3-clause license 6 | 7 | ## Installation instructions 8 | 9 | 1) [Download](https://www.continuum.io/downloads) and install the latest Anaconda distribution, coming with Python 2.7 and the full scientific Python stack. 10 | 11 | 2) Install dependencies: 12 | ``` 13 | conda install numpy scipy scikit-learn jupyter matplotlib 14 | ``` 15 | 16 | 3) Clone this repository and start Jupyter 17 | ``` 18 | git clone https://github.com/omoju/tutorial-bagging-models.git 19 | cd tutorial-bagging-models 20 | jupyter notebook 21 | ``` 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Data Stuff # 2 | ################### 3 | *.dat 4 | *.csv 5 | *.tsv 6 | *.pkl 7 | 8 | 9 | # Jupyter Notebook # 10 | ################### 11 | *.ipynb_checkpoints/ 12 | 13 | 14 | # Python # 15 | ################### 16 | *.pyc 17 | 18 | # Compiled source # 19 | ################### 20 | *.com 21 | *.class 22 | *.dll 23 | *.exe 24 | *.o 25 | *.so 26 | 27 | # Packages # 28 | ############ 29 | # it's better to unpack these files and commit the raw source 30 | # git has its own built in compression methods 31 | *.7z 32 | *.dmg 33 | *.gz 34 | *.iso 35 | *.jar 36 | *.rar 37 | *.tar 38 | *.zip 39 | 40 | # Logs and databases # 41 | ###################### 42 | *.log 43 | *.sql 44 | *.sqlite 45 | 46 | # OS generated files # 47 | ###################### 48 | .DS_Store 49 | .DS_Store? 50 | ._* 51 | .Spotlight-V100 52 | .Trashes 53 | ehthumbs.db 54 | Thumbs.db 55 | 0 56 | 0 57 | 0 58 | 0 59 | -------------------------------------------------------------------------------- /tutorial.py: -------------------------------------------------------------------------------- 1 | ## Written by Giles Louppe 2 | ## https://github.com/glouppe 3 | 4 | 5 | from matplotlib import pyplot as plt 6 | import numpy as np 7 | 8 | def plot_surface(clf, X, y, 9 | xlim=(-10, 10), ylim=(-10, 10), n_steps=250, 10 | subplot=None, show=True): 11 | if subplot is None: 12 | fig = plt.figure() 13 | else: 14 | plt.subplot(*subplot) 15 | 16 | xx, yy = np.meshgrid(np.linspace(xlim[0], xlim[1], n_steps), 17 | np.linspace(ylim[0], ylim[1], n_steps)) 18 | 19 | if hasattr(clf, "decision_function"): 20 | z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) 21 | else: 22 | z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1] 23 | 24 | z = z.reshape(xx.shape) 25 | plt.contourf(xx, yy, z, alpha=0.8, cmap=plt.cm.RdBu_r) 26 | plt.scatter(X[:, 0], X[:, 1], c=y) 27 | plt.xlim(*xlim) 28 | plt.ylim(*ylim) 29 | 30 | if show: 31 | plt.show() 32 | 33 | def plot_histogram(clf, X, y, subplot=None, show=True): 34 | if subplot is None: 35 | fig = plt.figure() 36 | else: 37 | plt.subplot(*subplot) 38 | 39 | if hasattr(clf, "decision_function"): 40 | d = clf.decision_function(X) 41 | else: 42 | d = clf.predict_proba(X)[:, 1] 43 | 44 | plt.hist(d[y == "dodgerblue"], bins=50, normed=True, color="dodgerblue", alpha=0.5) 45 | plt.hist(d[y == "hotpink"], bins=50, normed=True, color="hotpink", alpha=0.5) 46 | 47 | if show: 48 | plt.show() 49 | 50 | def plot_clf(clf, X, y): 51 | plt.figure(figsize=(16, 8)) 52 | plot_surface(clf, X, y, subplot=(1, 2, 1), show=False) 53 | plot_histogram(clf, X, y, subplot=(1, 2, 2), show=True) 54 | --------------------------------------------------------------------------------