├── README.md ├── __init__.py ├── ch10-linear-regression.py ├── ch10-liner_reg-decisionTrees.py ├── ch10-liner_reg-ploynomial.py ├── ch10-scikit-linear-regression.py ├── ch10-scikit-linear_reg-RANSAC.py ├── ch10-scikit-linear_reg-model_eval.py ├── ch11-clustering-DBSCAN.py ├── ch11-clustering-agglomerative.py ├── ch11-clustering-kmeans-analysis.py ├── ch11-clustering-kmeans.py ├── ch12-NeuralNet-MLP.py ├── ch12-import-MNIST.py ├── ch13-keras.py ├── ch13-theano.py ├── ch2-Adaline-BatchGD.py ├── ch2-Adaline-stochasticGD.py ├── ch2-perceptron.py ├── ch3-SVM-Kernel-on-Iris.py ├── ch3-SVM-Kernel.py ├── ch3-SVM.py ├── ch3-decisionTree-RandomForests.py ├── ch3-decisionTrees-InformationGain.py ├── ch3-decisionTrees.py ├── ch3-k-nearest-neighbors.py ├── ch3-logisticRegression.py ├── ch3-logisticregression-cost.py ├── ch3-scikit-learn-perceptron.py ├── ch3-sigmoid.py ├── ch4-categoricalData.py ├── ch4-featureSelection-randomForest.py ├── ch4-imputation.py ├── ch4-missingData.py ├── ch4-partitioningData.py ├── ch4-seq-feature-selection.py ├── ch5-LDA-scikit.py ├── ch5-PCA-Kernel-ex1.py ├── ch5-PCA-Kernel-ex2.py ├── ch5-PCA-Kernel-newRBF.py ├── ch5-PCA-Kernel-scikit.py ├── ch5-PCA-scikit.py ├── ch5-PCA1.py ├── ch6-F1-score.py ├── ch6-Kfold-CrossValidation.py ├── ch6-ModelSelect-ParamTune-Nested-Kfold-CrossValidation.py ├── ch6-ROC-curve.py ├── ch6-confusion-matrix.py ├── ch6-hyperparameterTuning-gridSearch.py ├── ch6-learningCurve.py ├── ch6-pipeline.py ├── ch6-scikit-Kfold-CrossValidation.py ├── ch6-validationCurve.py ├── ch7-AdaBoost.py ├── ch7-BaggingClassifiers.py ├── ch7-majorityVote-Classifier.py ├── ch8-Online-Sentiment-Analysis.py ├── ch8-Sentiment-Analysis.py ├── ch8-bagOfWords.py ├── ch9-pickle-model.py ├── movieclassifier ├── __pycache__ │ ├── update.cpython-35.pyc │ └── vectorizer.cpython-35.pyc ├── app.py ├── ch9-ex.py ├── pkl_objects │ ├── classifier.pkl │ └── stopwords.pkl ├── reviews.sqlite ├── static │ └── style.css ├── templates │ ├── _formhelpers.html │ ├── results.html │ ├── reviewform.html │ └── thanks.html ├── update.py └── vectorizer.py └── neuralnet.py /README.md: -------------------------------------------------------------------------------- 1 | # python-ML-book-Raschka 2 | 3 | Code snippets from 2015 book "Python Machine Learning" by Sebastian Raschka 4 | 5 | https://www.amazon.com/Python-Machine-Learning-Sebastian-Raschka/dp/1783555130/ref=sr_1_1?s=books&ie=UTF8&qid=1476259049&sr=1-1&keywords=python+MAchine+learning 6 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rupskygill/python-ML-book-Raschka/3e69c6f9ee8514888b45e8a882c25bafafd7f3d5/__init__.py -------------------------------------------------------------------------------- /ch10-linear-regression.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | 5 | class LinearRegressionGD(object): 6 | def __init__(self, eta=0.001, n_iter=20): 7 | self.eta = eta 8 | self.n_iter = n_iter 9 | 10 | def net_input(self, X): 11 | return np.dot(X, self.w_[1:]) + self.w_[0] 12 | 13 | def fit(self, X, y): 14 | self.w_ = np.zeros(1 + X.shape[1]) 15 | self.cost_ = [] 16 | 17 | for i in range(self.n_iter): 18 | output = self.net_input(X) 19 | errors = (y - output) 20 | self.w_[1:] += self.eta * X.T.dot(errors) 21 | self.w_[0] += self.eta * errors.sum() 22 | cost = (errors**2).sum() / 2.0 23 | self.cost_.append(cost) 24 | return self 25 | 26 | def predict(self, X): 27 | return self.net_input(X) 28 | 29 | def lin_regplot(X, y, model): 30 | plt.scatter(X, y, c='blue') 31 | plt.plot(X, model.predict(X), color='red') 32 | return None 33 | 34 | df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data', header=None, sep='\s+') 35 | df.columns = ['CRIM', 'ZN', 'INDUS', 'CHAS', 36 | 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 37 | 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV'] 38 | 39 | 40 | import seaborn as sns 41 | sns.set(style='whitegrid', context='notebook') 42 | cols = ['LSTAT', 'INDUS', 'NOX', 'RM', 'MEDV'] 43 | sns.pairplot(df[cols], size=2.5) 44 | plt.show() 45 | 46 | X = df[['RM']].values 47 | y = df['MEDV'].values 48 | from sklearn.preprocessing import StandardScaler 49 | sc_x = StandardScaler() 50 | sc_y = StandardScaler() 51 | X_std = sc_x.fit_transform(X) 52 | y_std = sc_y.fit_transform(y) 53 | lr = LinearRegressionGD() 54 | lr.fit(X_std, y_std) 55 | 56 | #plt.plot(range(1, lr.n_iter+1), lr.cost_) 57 | #plt.ylabel('SSE') 58 | #plt.xlabel('Epoch') 59 | #plt.show() 60 | 61 | lin_regplot(X_std, y_std, lr) 62 | plt.xlabel('Average number of rooms [RM] (standardized)') 63 | plt.ylabel('Price in $1000\'s [MEDV] (standardized)') 64 | plt.show() 65 | 66 | num_rooms_std = sc_x.transform([5.0]) 67 | price_std = lr.predict(num_rooms_std) 68 | print("Price in $1000's: %.3f" % sc_y.inverse_transform(price_std)) 69 | 70 | 71 | print('Slope: %.3f' % lr.w_[1]) 72 | print('Intercept: %.3f' % lr.w_[0]) 73 | 74 | -------------------------------------------------------------------------------- /ch10-liner_reg-decisionTrees.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | from sklearn.cross_validation import train_test_split 4 | 5 | def lin_regplot(X, y, model): 6 | plt.scatter(X, y, c='blue') 7 | plt.plot(X, model.predict(X), color='red') 8 | return None 9 | 10 | df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data', header=None, sep='\s+') 11 | df.columns = ['CRIM', 'ZN', 'INDUS', 'CHAS', 12 | 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 13 | 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV'] 14 | 15 | # Decision Tree Regression 16 | from sklearn.tree import DecisionTreeRegressor 17 | X = df[['LSTAT']].values 18 | y = df['MEDV'].values 19 | tree = DecisionTreeRegressor(max_depth=3) 20 | tree.fit(X, y) 21 | sort_idx = X.flatten().argsort() 22 | lin_regplot(X[sort_idx], y[sort_idx], tree) 23 | plt.xlabel('% lower status of the population [LSTAT]') 24 | plt.ylabel('Price in $1000\'s [MEDV]') 25 | plt.show() 26 | 27 | 28 | # Random Forest Regression 29 | X = df.iloc[:, :-1].values 30 | y = df['MEDV'].values 31 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=1) 32 | 33 | from sklearn.ensemble import RandomForestRegressor 34 | from sklearn.metrics import mean_squared_error 35 | from sklearn.metrics import r2_score 36 | forest = RandomForestRegressor(n_estimators=1000, criterion='mse', random_state=1, n_jobs=-1) 37 | forest.fit(X_train, y_train) 38 | y_train_pred = forest.predict(X_train) 39 | y_test_pred = forest.predict(X_test) 40 | print('MSE train: %.3f, test: %.3f' % ( 41 | mean_squared_error(y_train, y_train_pred), 42 | mean_squared_error(y_test, y_test_pred))) 43 | 44 | print('R^2 train: %.3f, test: %.3f' % ( 45 | r2_score(y_train, y_train_pred), 46 | r2_score(y_test, y_test_pred))) 47 | 48 | plt.scatter(y_train_pred, 49 | y_train_pred - y_train, 50 | c='black', 51 | marker='o', 52 | s=35, 53 | alpha=0.5, 54 | label='Training data') 55 | 56 | plt.scatter(y_test_pred, 57 | y_test_pred - y_test, 58 | c='lightgreen', 59 | marker='s', 60 | s=35, 61 | alpha=0.7, 62 | label='Test data') 63 | 64 | plt.xlabel('Predicted values') 65 | plt.ylabel('Residuals') 66 | plt.legend(loc='upper left') 67 | plt.hlines(y=0, xmin=-10, xmax=50, lw=2, color='red') 68 | plt.xlim([-10, 50]) 69 | plt.show() 70 | 71 | -------------------------------------------------------------------------------- /ch10-liner_reg-ploynomial.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | from sklearn.linear_model import LinearRegression 4 | import numpy as np 5 | from sklearn.preprocessing import PolynomialFeatures 6 | from sklearn.metrics import r2_score 7 | 8 | df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data', header=None, sep='\s+') 9 | df.columns = ['CRIM', 'ZN', 'INDUS', 'CHAS', 10 | 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 11 | 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV'] 12 | 13 | X = df[['LSTAT']].values 14 | y = df['MEDV'].values 15 | regr = LinearRegression() 16 | 17 | # create polynomial features 18 | quadratic = PolynomialFeatures(degree=2) 19 | cubic = PolynomialFeatures(degree=3) 20 | X_quad = quadratic.fit_transform(X) 21 | X_cubic = cubic.fit_transform(X) 22 | 23 | # linear fit 24 | X_fit = np.arange(X.min(), X.max(), 1)[:, np.newaxis] 25 | regr = regr.fit(X, y) 26 | y_lin_fit = regr.predict(X_fit) 27 | linear_r2 = r2_score(y, regr.predict(X)) 28 | 29 | # quadratic fit 30 | regr = regr.fit(X_quad, y) 31 | y_quad_fit = regr.predict(quadratic.fit_transform(X_fit)) 32 | quadratic_r2 = r2_score(y, regr.predict(X_quad)) 33 | 34 | # cubic fit 35 | regr = regr.fit(X_cubic, y) 36 | y_cubic_fit = regr.predict(cubic.fit_transform(X_fit)) 37 | cubic_r2 = r2_score(y, regr.predict(X_cubic)) 38 | 39 | # plot results 40 | plt.scatter(X, y, 41 | label='training points', 42 | color='lightgray') 43 | 44 | plt.plot(X_fit, y_lin_fit, 45 | label='linear (d=1), $R^2=%.2f$' % linear_r2, 46 | color='blue', 47 | lw=2, 48 | linestyle=':') 49 | 50 | 51 | plt.plot(X_fit, y_quad_fit, 52 | label='quadratic (d=2), $R^2=%.2f$' % quadratic_r2, 53 | color='red', 54 | lw=2, 55 | linestyle='-') 56 | 57 | plt.plot(X_fit, y_cubic_fit, 58 | label='cubic (d=3), $R^2=%.2f$'% cubic_r2, 59 | color='green', 60 | lw=2, 61 | linestyle='--') 62 | 63 | plt.xlabel('% lower status of the population [LSTAT]') 64 | plt.ylabel('Price in $1000\'s [MEDV]') 65 | plt.legend(loc='upper right') 66 | plt.show() 67 | 68 | 69 | 70 | # transform features 71 | X_log = np.log(X) 72 | y_sqrt = np.sqrt(y) 73 | 74 | # fit features 75 | X_fit = np.arange(X_log.min()-1, X_log.max()+1, 1)[:, np.newaxis] 76 | regr = regr.fit(X_log, y_sqrt) 77 | y_lin_fit = regr.predict(X_fit) 78 | linear_r2 = r2_score(y_sqrt, regr.predict(X_log)) 79 | 80 | # plot results 81 | plt.scatter(X_log, y_sqrt, 82 | label='training points', 83 | color='lightgray') 84 | 85 | plt.plot(X_fit, y_lin_fit, 86 | label='linear (d=1), $R^2=%.2f$' % linear_r2, 87 | color='blue', 88 | lw=2) 89 | 90 | plt.xlabel('log(% lower status of the population [LSTAT])') 91 | plt.ylabel('$\sqrt{Price \; in \; \$1000\'s [MEDV]}$') 92 | plt.legend(loc='lower left') 93 | plt.show() 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | -------------------------------------------------------------------------------- /ch10-scikit-linear-regression.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | 4 | 5 | def lin_regplot(X, y, model): 6 | plt.scatter(X, y, c='blue') 7 | plt.plot(X, model.predict(X), color='red') 8 | return None 9 | 10 | df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data', header=None, sep='\s+') 11 | df.columns = ['CRIM', 'ZN', 'INDUS', 'CHAS', 12 | 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 13 | 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV'] 14 | 15 | 16 | 17 | X = df[['RM']].values 18 | y = df['MEDV'].values 19 | from sklearn.preprocessing import StandardScaler 20 | sc_x = StandardScaler() 21 | sc_y = StandardScaler() 22 | X_std = sc_x.fit_transform(X) 23 | y_std = sc_y.fit_transform(y) 24 | 25 | 26 | from sklearn.linear_model import LinearRegression 27 | slr = LinearRegression() 28 | slr.fit(X, y) 29 | print('Slope: %.3f' % slr.coef_[0]) 30 | print('Intercept: %.3f' % slr.intercept_) 31 | 32 | 33 | 34 | lin_regplot(X, y, slr) 35 | plt.xlabel('Average number of rooms [RM]') 36 | plt.ylabel('Price in $1000\'s [MEDV]') 37 | plt.show() 38 | -------------------------------------------------------------------------------- /ch10-scikit-linear_reg-RANSAC.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | 4 | def lin_regplot(X, y, model): 5 | plt.scatter(X, y, c='blue') 6 | plt.plot(X, model.predict(X), color='red') 7 | return None 8 | 9 | df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data', header=None, sep='\s+') 10 | df.columns = ['CRIM', 'ZN', 'INDUS', 'CHAS', 11 | 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 12 | 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV'] 13 | 14 | X = df[['RM']].values 15 | y = df['MEDV'].values 16 | from sklearn.preprocessing import StandardScaler 17 | sc_x = StandardScaler() 18 | sc_y = StandardScaler() 19 | X_std = sc_x.fit_transform(X) 20 | y_std = sc_y.fit_transform(y) 21 | 22 | from sklearn.linear_model import LinearRegression 23 | from sklearn.linear_model import RANSACRegressor 24 | import numpy as np 25 | 26 | ransac = RANSACRegressor(LinearRegression(), 27 | max_trials=100, 28 | min_samples=50, 29 | residual_metric=lambda x: np.sum(np.abs(x), axis=1), 30 | residual_threshold=5.0, 31 | random_state=0) 32 | 33 | ransac.fit(X, y) 34 | 35 | inlier_mask = ransac.inlier_mask_ 36 | outlier_mask = np.logical_not(inlier_mask) 37 | line_X = np.arange(3, 10, 1) 38 | line_y_ransac = ransac.predict(line_X[:, np.newaxis]) 39 | 40 | plt.scatter(X[inlier_mask], y[inlier_mask], c='blue', marker='o', label='Inliers') 41 | plt.scatter(X[outlier_mask], y[outlier_mask], c='lightgreen', marker='s', label='Outliers') 42 | plt.plot(line_X, line_y_ransac, color='red') 43 | plt.xlabel('Average number of rooms [RM]') 44 | plt.ylabel('Price in $1000\'s [MEDV]') 45 | plt.legend(loc='upper left') 46 | plt.show() 47 | 48 | print('Slope: %.3f' % ransac.estimator_.coef_[0]) 49 | print('Intercept: %.3f' % ransac.estimator_.intercept_) 50 | 51 | 52 | 53 | 54 | 55 | from sklearn.cross_validation import train_test_split 56 | X = df.iloc[:, :-1].values 57 | y = df['MEDV'].values 58 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) 59 | slr = LinearRegression() 60 | slr.fit(X_train, y_train) 61 | y_train_pred = slr.predict(X_train) 62 | y_test_pred = slr.predict(X_test) 63 | 64 | plt.scatter(y_train_pred, y_train_pred - y_train, c='blue', marker='o', label='Training data') 65 | plt.scatter(y_test_pred, y_test_pred - y_test, c='lightgreen', marker='s', label='Test data') 66 | plt.xlabel('Predicted values') 67 | plt.ylabel('Residuals') 68 | plt.legend(loc='upper left') 69 | plt.hlines(y=0, xmin=-10, xmax=50, lw=2, color='red') 70 | plt.xlim([-10, 50]) 71 | plt.show() 72 | 73 | from sklearn.metrics import mean_squared_error 74 | print('MSE train: %.3f, test: %.3f' % ( 75 | mean_squared_error(y_train, y_train_pred), 76 | mean_squared_error(y_test, y_test_pred))) 77 | 78 | from sklearn.metrics import r2_score 79 | print('R^2 train: %.3f, test: %.3f' % 80 | (r2_score(y_train, y_train_pred), 81 | r2_score(y_test, y_test_pred))) -------------------------------------------------------------------------------- /ch10-scikit-linear_reg-model_eval.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | from sklearn.linear_model import LinearRegression 4 | import numpy as np 5 | from sklearn.cross_validation import train_test_split 6 | 7 | df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data', header=None, sep='\s+') 8 | df.columns = ['CRIM', 'ZN', 'INDUS', 'CHAS', 9 | 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 10 | 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV'] 11 | 12 | 13 | X = df.iloc[:, :-1].values 14 | y = df['MEDV'].values 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) 16 | slr = LinearRegression() 17 | slr.fit(X_train, y_train) 18 | y_train_pred = slr.predict(X_train) 19 | y_test_pred = slr.predict(X_test) 20 | 21 | plt.scatter(y_train_pred, y_train_pred - y_train, c='blue', marker='o', label='Training data') 22 | plt.scatter(y_test_pred, y_test_pred - y_test, c='lightgreen', marker='s', label='Test data') 23 | plt.xlabel('Predicted values') 24 | plt.ylabel('Residuals') 25 | plt.legend(loc='upper left') 26 | plt.hlines(y=0, xmin=-10, xmax=50, lw=2, color='red') 27 | plt.xlim([-10, 50]) 28 | plt.show() 29 | 30 | from sklearn.metrics import mean_squared_error 31 | print('MSE train: %.3f, test: %.3f' % ( 32 | mean_squared_error(y_train, y_train_pred), 33 | mean_squared_error(y_test, y_test_pred))) 34 | 35 | from sklearn.metrics import r2_score 36 | print('R^2 train: %.3f, test: %.3f' % 37 | (r2_score(y_train, y_train_pred), 38 | r2_score(y_test, y_test_pred))) 39 | 40 | 41 | 42 | # regularized regression models 43 | from sklearn.linear_model import Ridge 44 | ridge = Ridge(alpha=1.0) 45 | 46 | from sklearn.linear_model import Lasso 47 | lasso = Lasso(alpha=1.0) 48 | 49 | from sklearn.linear_model import ElasticNet 50 | lasso = ElasticNet(alpha=1.0, l1_ratio=0.5) 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /ch11-clustering-DBSCAN.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets import make_moons 2 | import matplotlib.pyplot as plt 3 | 4 | # Generate data 5 | X, y = make_moons(n_samples=200, noise=0.05, random_state=0) 6 | plt.scatter(X[:,0], X[:,1]) 7 | plt.show() 8 | 9 | # Use Kmeans clustering 10 | f, (ax1, ax2) = plt.subplots(1, 2, figsize=(8,3)) 11 | from sklearn.cluster import KMeans 12 | km = KMeans(n_clusters=2, random_state=0) 13 | y_km = km.fit_predict(X) 14 | ax1.scatter(X[y_km==0,0], X[y_km==0,1], c='lightblue', marker='o', s=40, label='cluster 1') 15 | ax1.scatter(X[y_km==1,0], X[y_km==1,1], c='red', marker='s', s=40, label='cluster 2') 16 | ax1.set_title('K-means clustering') 17 | 18 | 19 | # Use Agglomerative Clustering 20 | from sklearn.cluster import AgglomerativeClustering 21 | ac = AgglomerativeClustering(n_clusters=2, affinity='euclidean', linkage='complete') 22 | y_ac = ac.fit_predict(X) 23 | ax2.scatter(X[y_ac==0,0], X[y_ac==0,1], c='lightblue', marker='o', s=40, label='cluster 1') 24 | ax2.scatter(X[y_ac==1,0], X[y_ac==1,1], c='red', marker='s', s=40, label='cluster 2') 25 | ax2.set_title('Agglomerative clustering') 26 | plt.legend() 27 | plt.show() 28 | 29 | 30 | # Use DBSCAN Clustering (Better at classifying non circular clusters) 31 | from sklearn.cluster import DBSCAN 32 | db = DBSCAN(eps=0.2, min_samples=5, metric='euclidean') 33 | y_db = db.fit_predict(X) 34 | plt.scatter(X[y_db==0,0], X[y_db==0,1], c='lightblue', marker='o', s=40, label='cluster 1') 35 | plt.scatter(X[y_db==1,0], X[y_db==1,1], c='red', marker='s', s=40, label='cluster 2') 36 | plt.legend() 37 | plt.show() 38 | -------------------------------------------------------------------------------- /ch11-clustering-agglomerative.py: -------------------------------------------------------------------------------- 1 | # Generate random data 2 | import pandas as pd 3 | import numpy as np 4 | np.random.seed(123) 5 | variables = ['X', 'Y', 'Z'] 6 | labels = ['ID_0','ID_1','ID_2','ID_3','ID_4'] 7 | X = np.random.random_sample([5,3])*10 8 | df = pd.DataFrame(X, columns=variables, index=labels) 9 | 10 | #from scipy.spatial.distance import pdist, squareform 11 | #row_dist = pd.DataFrame(squareform(pdist(df, metric='euclidean')), columns=labels, index=labels) 12 | 13 | 14 | # apply the complete linkage agglomeration 15 | from scipy.cluster.hierarchy import linkage 16 | row_clusters = linkage(df.values, method='complete', metric='euclidean') 17 | pd.DataFrame(row_clusters, 18 | columns=['row label 1', 19 | 'row label 2', 20 | 'distance', 21 | 'no. of items in clust.'], 22 | index=['cluster %d' %(i+1) for i in range(row_clusters.shape[0])]) 23 | 24 | from scipy.cluster.hierarchy import dendrogram 25 | import matplotlib.pyplot as plt 26 | # make dendrogram black (part 1/2) 27 | # from scipy.cluster.hierarchy import set_link_color_palette 28 | # set_link_color_palette(['black']) 29 | row_dendr = dendrogram(row_clusters, 30 | labels=labels, 31 | # make dendrogram black (part 2/2) 32 | # color_threshold=np.inf 33 | ) 34 | plt.tight_layout() 35 | plt.ylabel('Euclidean distance') 36 | plt.show() 37 | 38 | #Attach Dendograms to a heat-map 39 | fig = plt.figure(figsize=(8,8), facecolor='white') 40 | axd = fig.add_axes([0.09,0.1,0.2,0.6]) 41 | row_dendr = dendrogram(row_clusters, orientation='left') 42 | df_rowclust = df.ix[row_dendr['leaves'][::-1]] 43 | axm = fig.add_axes([0.23,0.1,0.6,0.6]) 44 | cax = axm.matshow(df_rowclust, interpolation='nearest', cmap='hot_r') 45 | 46 | axd.set_xticks([]) 47 | axd.set_yticks([]) 48 | 49 | for i in axd.spines.values(): 50 | i.set_visible(False) 51 | 52 | fig.colorbar(cax) 53 | axm.set_xticklabels([''] + list(df_rowclust.columns)) 54 | axm.set_yticklabels([''] + list(df_rowclust.index)) 55 | plt.show() 56 | 57 | 58 | # Use scikit-learn, you can define number of clusters (for pruning) 59 | from sklearn.cluster import AgglomerativeClustering 60 | ac = AgglomerativeClustering(n_clusters=2, affinity='euclidean', linkage='complete') 61 | labels = ac.fit_predict(X) 62 | print('Cluster labels: %s' % labels) 63 | -------------------------------------------------------------------------------- /ch11-clustering-kmeans-analysis.py: -------------------------------------------------------------------------------- 1 | # Generate random data 2 | from sklearn.datasets import make_blobs 3 | X, y = make_blobs(n_samples=150, n_features=2, centers=3, cluster_std=0.5, shuffle=True, random_state=0) 4 | 5 | # Use scikit Kmeans for clustering 6 | from sklearn.cluster import KMeans 7 | import matplotlib.pyplot as plt 8 | km = KMeans(n_clusters=3, init='k-means++', n_init=10, max_iter=300, tol=1e-04, random_state=0) 9 | y_km = km.fit_predict(X) 10 | 11 | import numpy as np 12 | from matplotlib import cm 13 | from sklearn.metrics import silhouette_samples 14 | cluster_labels = np.unique(y_km) 15 | n_clusters = cluster_labels.shape[0] 16 | silhouette_vals = silhouette_samples(X, y_km, metric='euclidean') 17 | y_ax_lower, y_ax_upper = 0, 0 18 | yticks = [] 19 | for i, c in enumerate(cluster_labels): 20 | c_silhouette_vals = silhouette_vals[y_km == c] 21 | c_silhouette_vals.sort() 22 | y_ax_upper += len(c_silhouette_vals) 23 | color = cm.jet(i / n_clusters) 24 | plt.barh(range(y_ax_lower, y_ax_upper), c_silhouette_vals, height=1.0, edgecolor='none', color=color) 25 | yticks.append((y_ax_lower + y_ax_upper) / 2) 26 | y_ax_lower += len(c_silhouette_vals) 27 | 28 | silhouette_avg = np.mean(silhouette_vals) 29 | plt.axvline(silhouette_avg,color="red", linestyle="--") 30 | plt.yticks(yticks, cluster_labels + 1) 31 | plt.ylabel('Cluster') 32 | plt.xlabel('Silhouette coefficient') 33 | plt.show() 34 | 35 | 36 | 37 | 38 | 39 | #Bad clustering example 40 | km = KMeans(n_clusters=2,init='k-means++', n_init=10, max_iter=300, tol=1e-04, random_state=0) 41 | y_km = km.fit_predict(X) 42 | plt.scatter(X[y_km==0,0], X[y_km==0,1],s=50, c='lightgreen',marker='s',label='cluster 1') 43 | plt.scatter(X[y_km==1,0], X[y_km==1,1], s=50, c='orange', marker='o', label='cluster 2') 44 | plt.scatter(km.cluster_centers_[:,0], km.cluster_centers_[:,1], s=250, marker='*', c='red', label='centroids') 45 | plt.legend() 46 | plt.grid() 47 | plt.show() 48 | cluster_labels = np.unique(y_km) 49 | n_clusters = cluster_labels.shape[0] 50 | silhouette_vals = silhouette_samples(X, y_km, metric='euclidean') 51 | y_ax_lower, y_ax_upper = 0, 0 52 | yticks = [] 53 | for i, c in enumerate(cluster_labels): 54 | c_silhouette_vals = silhouette_vals[y_km == c] 55 | c_silhouette_vals.sort() 56 | y_ax_upper += len(c_silhouette_vals) 57 | color = cm.jet(i / n_clusters) 58 | plt.barh(range(y_ax_lower, y_ax_upper), c_silhouette_vals, height=1.0, edgecolor='none', color=color) 59 | yticks.append((y_ax_lower + y_ax_upper) / 2) 60 | y_ax_lower += len(c_silhouette_vals) 61 | 62 | silhouette_avg = np.mean(silhouette_vals) 63 | plt.axvline(silhouette_avg, color="red", linestyle="--") 64 | plt.yticks(yticks, cluster_labels + 1) 65 | plt.ylabel('Cluster') 66 | plt.xlabel('Silhouette coefficient') 67 | plt.show() 68 | 69 | -------------------------------------------------------------------------------- /ch11-clustering-kmeans.py: -------------------------------------------------------------------------------- 1 | # Generate random data 2 | from sklearn.datasets import make_blobs 3 | X, y = make_blobs(n_samples=150, 4 | n_features=2, 5 | centers=3, 6 | cluster_std=0.5, 7 | shuffle=True, 8 | random_state=0) 9 | 10 | # plot data 11 | import matplotlib.pyplot as plt 12 | plt.scatter(X[:,0],X[:,1], c='white', marker='o', s=50) 13 | plt.grid() 14 | plt.show() 15 | 16 | 17 | 18 | # Use scikit Kmeans for clustering 19 | from sklearn.cluster import KMeans 20 | km = KMeans(n_clusters=3, 21 | init='random', # use init='kmeans++' to initialize centroids using kmeans++ 22 | n_init=10, 23 | max_iter=300, 24 | tol=1e-04, 25 | random_state=0) 26 | 27 | # cluster 28 | y_km = km.fit_predict(X) 29 | # print SSE 30 | print('Distortion: %.2f' % km.inertia_) 31 | 32 | # Plot clustering 33 | plt.scatter(X[y_km==0,0],X[y_km==0,1],s=50,c='lightgreen',marker='s',label='cluster 1') 34 | plt.scatter(X[y_km==1,0],X[y_km==1,1],s=50,c='orange',marker='o',label='cluster 2') 35 | plt.scatter(X[y_km==2,0],X[y_km==2,1],s=50,c='lightblue',marker='v',label='cluster 3') 36 | plt.scatter(km.cluster_centers_[:,0],km.cluster_centers_[:,1],s=250,marker='*',c='red',label='centroids') 37 | plt.legend() 38 | plt.grid() 39 | plt.show() 40 | 41 | # Use elbow method to decide on value of k (no of clusters) 42 | distortions = [] 43 | for i in range(1, 11): 44 | km = KMeans(n_clusters=i, 45 | init='k-means++', 46 | n_init=10, 47 | max_iter=300, 48 | random_state=0) 49 | km.fit(X) 50 | distortions.append(km.inertia_) 51 | # plot 52 | plt.plot(range(1,11), distortions, marker='o') 53 | plt.xlabel('Number of clusters') 54 | plt.ylabel('Distortion') 55 | plt.show() 56 | 57 | -------------------------------------------------------------------------------- /ch12-NeuralNet-MLP.py: -------------------------------------------------------------------------------- 1 | import os 2 | import struct 3 | import numpy as np 4 | 5 | def load_mnist(path, kind='train'): 6 | """Load MNIST data from `path`""" 7 | labels_path = os.path.join(path, '%s-labels.idx1-ubyte' % kind) 8 | images_path = os.path.join(path, '%s-images.idx3-ubyte' % kind) 9 | 10 | with open(labels_path, 'rb') as lbpath: 11 | magic, n = struct.unpack('>II', lbpath.read(8)) 12 | labels = np.fromfile(lbpath, dtype=np.uint8) 13 | 14 | with open(images_path, 'rb') as imgpath: 15 | magic, num, rows, cols = struct.unpack(">IIII", imgpath.read(16)) 16 | images = np.fromfile(imgpath, dtype=np.uint8).reshape(len(labels), 784) 17 | 18 | 19 | return images, labels 20 | 21 | 22 | X_train, y_train = load_mnist('mnist', kind='train') 23 | X_test, y_test = load_mnist('mnist', kind='t10k') 24 | 25 | 26 | from neuralnet import NeuralNetMLP 27 | nn = NeuralNetMLP(n_output=10, 28 | n_features=X_train.shape[1], 29 | n_hidden=50, 30 | l2=0.1, 31 | l1=0.0, 32 | epochs=1000, 33 | eta=0.001, 34 | alpha=0.001, 35 | decrease_const=0.00001, 36 | shuffle=True, 37 | minibatches=50, 38 | random_state=1) 39 | 40 | 41 | 42 | 43 | 44 | nn.fit(X_train, y_train, print_progress=True) 45 | 46 | import matplotlib.pyplot as plt 47 | plt.plot(range(len(nn.cost_)), nn.cost_) 48 | plt.ylim([0, 2000]) 49 | plt.ylabel('Cost') 50 | plt.xlabel('Epochs * 50') 51 | plt.tight_layout() 52 | plt.show() 53 | 54 | batches = np.array_split(range(len(nn.cost_)), 1000) 55 | cost_ary = np.array(nn.cost_) 56 | cost_avgs = [np.mean(cost_ary[i]) for i in batches] 57 | 58 | plt.plot(range(len(cost_avgs)), cost_avgs, color='red') 59 | plt.ylim([0, 2000]) 60 | plt.ylabel('Cost') 61 | plt.xlabel('Epochs') 62 | plt.tight_layout() 63 | plt.show() 64 | 65 | y_train_pred = nn.predict(X_train) 66 | acc = np.sum(y_train == y_train_pred, axis=0) / X_train.shape[0] 67 | print('Training accuracy: %.2f%%' % (acc * 100)) 68 | 69 | 70 | y_test_pred = nn.predict(X_test) 71 | acc = np.sum(y_test == y_test_pred, axis=0) / X_test.shape[0] 72 | print('Test accuracy: %.2f%%' % (acc * 100)) 73 | 74 | 75 | 76 | miscl_img = X_test[y_test != y_test_pred][:25] 77 | correct_lab = y_test[y_test != y_test_pred][:25] 78 | miscl_lab= y_test_pred[y_test != y_test_pred][:25] 79 | 80 | fig, ax = plt.subplots(nrows=5, ncols=5, sharex=True, sharey=True,) 81 | ax = ax.flatten() 82 | for i in range(25): 83 | img = miscl_img[i].reshape(28, 28) 84 | ax[i].imshow(img, cmap='Greys', interpolation='nearest') 85 | ax[i].set_title('%d) t: %d p: %d' % (i+1, correct_lab[i], miscl_lab[i])) 86 | 87 | ax[0].set_xticks([]) 88 | ax[0].set_yticks([]) 89 | plt.tight_layout() 90 | plt.show() 91 | 92 | 93 | # Perform gradient checking 94 | from neuralnet import MLPGradientCheck 95 | nn_check = MLPGradientCheck(n_output=10, 96 | n_features=X_train.shape[1], 97 | n_hidden=10, 98 | l2=0.0, 99 | l1=0.0, 100 | epochs=10, 101 | eta=0.001, 102 | alpha=0.0, 103 | decrease_const=0.0, 104 | minibatches=1, 105 | shuffle=False, 106 | random_state=1) 107 | 108 | 109 | # Perform check for handful samples 110 | nn_check.fit(X_train[:5], y_train[:5], print_progress=False) 111 | 112 | -------------------------------------------------------------------------------- /ch12-import-MNIST.py: -------------------------------------------------------------------------------- 1 | import os 2 | import struct 3 | import numpy as np 4 | 5 | def load_mnist(path, kind='train'): 6 | """Load MNIST data from `path`""" 7 | labels_path = os.path.join(path, '%s-labels.idx1-ubyte' % kind) 8 | images_path = os.path.join(path, '%s-images.idx3-ubyte' % kind) 9 | 10 | with open(labels_path, 'rb') as lbpath: 11 | magic, n = struct.unpack('>II', lbpath.read(8)) 12 | labels = np.fromfile(lbpath, dtype=np.uint8) 13 | 14 | with open(images_path, 'rb') as imgpath: 15 | magic, num, rows, cols = struct.unpack(">IIII", imgpath.read(16)) 16 | images = np.fromfile(imgpath, dtype=np.uint8).reshape(len(labels), 784) 17 | 18 | 19 | return images, labels 20 | 21 | 22 | X_train, y_train = load_mnist('mnist', kind='train') 23 | print('Rows: %d, columns: %d' % (X_train.shape[0], X_train.shape[1])) 24 | 25 | X_test, y_test = load_mnist('mnist', kind='t10k') 26 | print('Rows: %d, columns: %d' % (X_test.shape[0], X_test.shape[1])) 27 | 28 | # visualize examples of MNIST data 29 | import matplotlib.pyplot as plt 30 | fig, ax = plt.subplots(nrows=2, ncols=5, sharex=True, sharey=True,) 31 | ax = ax.flatten() 32 | 33 | for i in range(10): 34 | img = X_train[y_train == i][0].reshape(28, 28) 35 | ax[i].imshow(img, cmap='Greys', interpolation='nearest') 36 | 37 | ax[0].set_xticks([]) 38 | ax[0].set_yticks([]) 39 | plt.tight_layout() 40 | plt.show() 41 | 42 | 43 | # plot multiple examples of the same digit to see how different those handwriting examples really are 44 | fig, ax = plt.subplots(nrows=5, ncols=5, sharex=True, sharey=True,) 45 | ax = ax.flatten() 46 | 47 | for i in range(25): 48 | img = X_train[y_train == 7][i].reshape(28, 28) 49 | ax[i].imshow(img, cmap='Greys', interpolation='nearest') 50 | 51 | ax[0].set_xticks([]) 52 | ax[0].set_yticks([]) 53 | plt.tight_layout() 54 | plt.show() 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /ch13-keras.py: -------------------------------------------------------------------------------- 1 | import os 2 | import struct 3 | import numpy as np 4 | 5 | def load_mnist(path, kind='train'): 6 | """Load MNIST data from `path`""" 7 | labels_path = os.path.join(path, 8 | '%s-labels.idx1-ubyte' 9 | % kind) 10 | images_path = os.path.join(path, 11 | '%s-images.idx3-ubyte' 12 | % kind) 13 | 14 | with open(labels_path, 'rb') as lbpath: 15 | magic, n = struct.unpack('>II', 16 | lbpath.read(8)) 17 | labels = np.fromfile(lbpath, 18 | dtype=np.uint8) 19 | 20 | with open(images_path, 'rb') as imgpath: 21 | magic, num, rows, cols = struct.unpack(">IIII", 22 | imgpath.read(16)) 23 | images = np.fromfile(imgpath, 24 | dtype=np.uint8).reshape(len(labels), 784) 25 | 26 | return images, labels 27 | 28 | X_train, y_train = load_mnist('mnist', kind='train') 29 | print('Rows: %d, columns: %d' % (X_train.shape[0], X_train.shape[1])) 30 | 31 | X_test, y_test = load_mnist('mnist', kind='t10k') 32 | print('Rows: %d, columns: %d' % (X_test.shape[0], X_test.shape[1])) 33 | 34 | import theano 35 | 36 | theano.config.floatX = 'float32' 37 | X_train = X_train.astype(theano.config.floatX) 38 | X_test = X_test.astype(theano.config.floatX) 39 | 40 | 41 | from keras.utils import np_utils 42 | 43 | print('First 3 labels: ', y_train[:3]) 44 | 45 | y_train_ohe = np_utils.to_categorical(y_train) 46 | print('\nFirst 3 labels (one-hot):\n', y_train_ohe[:3]) 47 | 48 | 49 | from keras.models import Sequential 50 | from keras.layers.core import Dense 51 | from keras.optimizers import SGD 52 | 53 | np.random.seed(1) 54 | 55 | model = Sequential() 56 | model.add(Dense(input_dim=X_train.shape[1], 57 | output_dim=50, 58 | init='uniform', 59 | activation='tanh')) 60 | 61 | model.add(Dense(input_dim=50, 62 | output_dim=50, 63 | init='uniform', 64 | activation='tanh')) 65 | 66 | model.add(Dense(input_dim=50, 67 | output_dim=y_train_ohe.shape[1], 68 | init='uniform', 69 | activation='softmax')) 70 | 71 | sgd = SGD(lr=0.001, decay=1e-7, momentum=.9) 72 | model.compile(loss='categorical_crossentropy', optimizer=sgd) 73 | 74 | model.fit(X_train, y_train_ohe, 75 | nb_epoch=50, 76 | batch_size=300, 77 | verbose=1, 78 | validation_split=0.1, 79 | show_accuracy=True) 80 | 81 | 82 | 83 | y_train_pred = model.predict_classes(X_train, verbose=0) 84 | print('First 3 predictions: ', y_train_pred[:3]) 85 | 86 | 87 | train_acc = np.sum(y_train == y_train_pred, axis=0) / X_train.shape[0] 88 | print('Training accuracy: %.2f%%' % (train_acc * 100)) 89 | 90 | 91 | y_test_pred = model.predict_classes(X_test, verbose=0) 92 | test_acc = np.sum(y_test == y_test_pred, axis=0) / X_test.shape[0] 93 | print('Test accuracy: %.2f%%' % (test_acc * 100)) 94 | 95 | 96 | -------------------------------------------------------------------------------- /ch13-theano.py: -------------------------------------------------------------------------------- 1 | import theano 2 | from theano import tensor as T 3 | 4 | # initialize 5 | x1 = T.scalar() 6 | w1 = T.scalar() 7 | w0 = T.scalar() 8 | z1 = w1 * x1 + w0 9 | 10 | # compile 11 | net_input = theano.function(inputs=[w1, x1, w0], outputs=z1) 12 | 13 | # execute 14 | net_input(2.0, 1.0, 0.5) 15 | 16 | 17 | 18 | 19 | import numpy as np 20 | 21 | # initialize 22 | # if you are running Theano on 64 bit mode, 23 | # you need to use dmatrix instead of fmatrix 24 | x = T.fmatrix(name='x') 25 | x_sum = T.sum(x, axis=0) 26 | 27 | # compile 28 | calc_sum = theano.function(inputs=[x], outputs=x_sum) 29 | 30 | # execute (Python list) 31 | ary = [[1, 2, 3], [1, 2, 3]] 32 | print('Column sum:', calc_sum(ary)) 33 | 34 | # execute (NumPy array) 35 | ary = np.array([[1, 2, 3], [1, 2, 3]], dtype=theano.config.floatX) 36 | print('Column sum:', calc_sum(ary)) 37 | 38 | 39 | 40 | 41 | # initialize 42 | x = T.fmatrix(name='x') 43 | w = theano.shared(np.asarray([[0.0, 0.0, 0.0]], dtype=theano.config.floatX)) 44 | z = x.dot(w.T) 45 | update = [[w, w + 1.0]] 46 | 47 | # compile 48 | net_input = theano.function(inputs=[x], updates=update, outputs=z) 49 | 50 | # execute 51 | data = np.array([[1, 2, 3]], dtype=theano.config.floatX) 52 | for i in range(5): 53 | print('z%d:' % i, net_input(data)) 54 | 55 | 56 | 57 | # initialize 58 | data = np.array([[1, 2, 3]], 59 | dtype=theano.config.floatX) 60 | x = T.fmatrix(name='x') 61 | w = theano.shared(np.asarray([[0.0, 0.0, 0.0]], 62 | dtype=theano.config.floatX)) 63 | z = x.dot(w.T) 64 | update = [[w, w + 1.0]] 65 | 66 | # compile 67 | net_input = theano.function(inputs=[], 68 | updates=update, 69 | givens={x: data}, 70 | outputs=z) 71 | 72 | # execute 73 | for i in range(5): 74 | print('z:', net_input()) 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | # Wrapping things up: A linear regression example 84 | 85 | import numpy as np 86 | X_train = np.asarray([[0.0], [1.0], [2.0], [3.0], [4.0], 87 | [5.0], [6.0], [7.0], [8.0], [9.0]], 88 | dtype=theano.config.floatX) 89 | 90 | y_train = np.asarray([1.0, 1.3, 3.1, 2.0, 5.0, 91 | 6.3, 6.6, 7.4, 8.0, 9.0], 92 | dtype=theano.config.floatX) 93 | 94 | 95 | import theano 96 | from theano import tensor as T 97 | import numpy as np 98 | 99 | def train_linreg(X_train, y_train, eta, epochs): 100 | 101 | costs = [] 102 | # Initialize arrays 103 | eta0 = T.fscalar('eta0') 104 | y = T.fvector(name='y') 105 | X = T.fmatrix(name='X') 106 | w = theano.shared(np.zeros( 107 | shape=(X_train.shape[1] + 1), 108 | dtype=theano.config.floatX), 109 | name='w') 110 | 111 | # calculate cost 112 | net_input = T.dot(X, w[1:]) + w[0] 113 | errors = y - net_input 114 | cost = T.sum(T.pow(errors, 2)) 115 | 116 | # perform gradient update 117 | gradient = T.grad(cost, wrt=w) 118 | update = [(w, w - eta0 * gradient)] 119 | 120 | # compile model 121 | train = theano.function(inputs=[eta0], 122 | outputs=cost, 123 | updates=update, 124 | givens={X: X_train, 125 | y: y_train,}) 126 | 127 | for _ in range(epochs): 128 | costs.append(train(eta)) 129 | 130 | return costs, w 131 | 132 | 133 | 134 | 135 | 136 | import matplotlib.pyplot as plt 137 | 138 | costs, w = train_linreg(X_train, y_train, eta=0.001, epochs=10) 139 | 140 | plt.plot(range(1, len(costs)+1), costs) 141 | 142 | plt.tight_layout() 143 | plt.xlabel('Epoch') 144 | plt.ylabel('Cost') 145 | plt.tight_layout() 146 | # plt.savefig('./figures/cost_convergence.png', dpi=300) 147 | plt.show() 148 | 149 | def predict_linreg(X, w): 150 | Xt = T.matrix(name='X') 151 | net_input = T.dot(Xt, w[1:]) + w[0] 152 | predict = theano.function(inputs=[Xt], givens={w: w}, outputs=net_input) 153 | return predict(X) 154 | 155 | plt.scatter(X_train, y_train, marker='s', s=50) 156 | plt.plot(range(X_train.shape[0]), 157 | predict_linreg(X_train, w), 158 | color='gray', 159 | marker='o', 160 | markersize=4, 161 | linewidth=3) 162 | 163 | plt.xlabel('x') 164 | plt.ylabel('y') 165 | 166 | plt.tight_layout() 167 | # plt.savefig('./figures/linreg.png', dpi=300) 168 | plt.show() -------------------------------------------------------------------------------- /ch2-Adaline-BatchGD.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | from matplotlib.colors import ListedColormap 5 | 6 | class AdalineGD(object): 7 | """ADAptive LInear NEuron classifier. 8 | 9 | Parameters 10 | ------------ 11 | eta : float 12 | Learning rate (between 0.0 and 1.0) 13 | n_iter : int 14 | Passes over the training dataset. 15 | 16 | Attributes 17 | ----------- 18 | w_ : 1d-array 19 | Weights after fitting. 20 | errors_ : list 21 | Number of misclassifications in every epoch. 22 | 23 | """ 24 | def __init__(self, eta=0.01, n_iter=50): 25 | self.eta = eta 26 | self.n_iter = n_iter 27 | 28 | def fit(self, X, y): 29 | """ Fit training data. 30 | 31 | Parameters 32 | ---------- 33 | X : {array-like}, shape = [n_samples, n_features] 34 | Training vectors, 35 | where n_samples is the number of samples and” 36 | 37 | where n_samples is the number of samples and 38 | n_features is the number of features. 39 | y : array-like, shape = [n_samples] 40 | Target values. 41 | 42 | Returns 43 | ------- 44 | self : object 45 | 46 | """ 47 | self.w_ = np.zeros(1 + X.shape[1]) 48 | self.cost_ = [] 49 | 50 | for i in range(self.n_iter): 51 | output = self.net_input(X) 52 | errors = (y - output) 53 | self.w_[1:] += self.eta * X.T.dot(errors) 54 | self.w_[0] += self.eta * errors.sum() 55 | cost = (errors**2).sum() / 2.0 56 | self.cost_.append(cost) 57 | return self 58 | 59 | def net_input(self, X): 60 | """Calculate net input""" 61 | return np.dot(X, self.w_[1:]) + self.w_[0] 62 | 63 | def activation(self, X): 64 | """Compute linear activation""" 65 | return self.net_input(X) 66 | 67 | def predict(self, X): 68 | """Return class label after unit step""" 69 | return np.where(self.activation(X) >= 0.0, 1, -1) 70 | 71 | def plot_decision_regions(X, y, classifier, resolution=0.02): 72 | # setup marker generator and color map 73 | markers = ('s', 'x', 'o', '^', 'v') 74 | colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan') 75 | cmap = ListedColormap(colors[:len(np.unique(y))]) 76 | 77 | # plot the decision surface 78 | x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1 79 | x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1 80 | xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), 81 | np.arange(x2_min, x2_max, resolution)) 82 | Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T) 83 | Z = Z.reshape(xx1.shape) 84 | plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap) 85 | plt.xlim(xx1.min(), xx1.max()) 86 | plt.ylim(xx2.min(), xx2.max()) 87 | 88 | # plot class samples 89 | for idx, cl in enumerate(np.unique(y)): 90 | plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1], 91 | alpha=0.8, c=cmap(idx), 92 | marker=markers[idx], label=cl) 93 | 94 | 95 | if __name__ == "__main__": 96 | df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None) 97 | y = df.iloc[0:100, 4].values 98 | y = np.where(y == 'Iris-setosa', -1, 1) 99 | X = df.iloc[0:100, [0, 2]].values 100 | fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(8, 4)) 101 | ada1 = AdalineGD(n_iter=10, eta=0.01).fit(X, y) 102 | ax[0].plot(range(1, len(ada1.cost_) + 1), np.log10(ada1.cost_), marker='o') 103 | ax[0].set_xlabel('Epochs') 104 | ax[0].set_ylabel('log(Sum-squared-error)') 105 | ax[0].set_title('Adaline - Learning rate 0.01') 106 | ada2 = AdalineGD(n_iter=10, eta=0.0001).fit(X, y) 107 | ax[1].plot(range(1, len(ada2.cost_) + 1),ada2.cost_, marker='o') 108 | ax[1].set_xlabel('Epochs') 109 | ax[1].set_ylabel('Sum-squared-error') 110 | ax[1].set_title('Adaline - Learning rate 0.0001') 111 | plt.show() 112 | 113 | X_std = np.copy(X) 114 | X_std[:,0] = (X[:,0] - X[:,0].mean()) / X[:,0].std() 115 | X_std[:,1] = (X[:,1] - X[:,1].mean()) / X[:,1].std() 116 | 117 | ada = AdalineGD(n_iter=15, eta=0.01) 118 | ada.fit(X_std, y) 119 | plot_decision_regions(X_std, y, classifier=ada) 120 | plt.title('Adaline - Gradient Descent') 121 | plt.xlabel('sepal length [standardized]') 122 | plt.ylabel('petal length [standardized]') 123 | plt.legend(loc='upper left') 124 | plt.show() 125 | plt.plot(range(1, len(ada.cost_) + 1), ada.cost_, marker='o') 126 | plt.xlabel('Epochs') 127 | plt.ylabel('Sum-squared-error') 128 | 129 | plt.show() 130 | 131 | 132 | 133 | 134 | 135 | 136 | -------------------------------------------------------------------------------- /ch2-Adaline-stochasticGD.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | from matplotlib.colors import ListedColormap 5 | from numpy.random import seed 6 | 7 | class AdalineSGD(object): 8 | """ADAptive LInear NEuron classifier. 9 | 10 | Parameters 11 | ------------ 12 | eta : float 13 | Learning rate (between 0.0 and 1.0) 14 | n_iter : int 15 | Passes over the training dataset. 16 | 17 | Attributes 18 | ----------- 19 | w_ : 1d-array 20 | Weights after fitting. 21 | errors_ : list 22 | Number of misclassifications in every epoch. 23 | shuffle : bool (default: True) 24 | Shuffles training data every epoch 25 | if True to prevent cycles. 26 | random_state : int (default: None) 27 | Set random state for shuffling 28 | and initializing the weights. 29 | 30 | """ 31 | def __init__(self, eta=0.01, n_iter=10, 32 | shuffle=True, random_state=None): 33 | self.eta = eta 34 | self.n_iter = n_iter 35 | self.w_initialized = False 36 | self.shuffle = shuffle 37 | if random_state: 38 | seed(random_state) 39 | 40 | def fit(self, X, y): 41 | """ Fit training data. 42 | 43 | Parameters 44 | ---------- 45 | X : {array-like}, shape = [n_samples, n_features] 46 | Training vectors, where n_samples 47 | is the number of samples and 48 | n_features is the number of features. 49 | y : array-like, shape = [n_samples] 50 | Target values. 51 | 52 | Returns 53 | ------- 54 | self : object 55 | 56 | """ 57 | self._initialize_weights(X.shape[1]) 58 | self.cost_ = [] 59 | for i in range(self.n_iter): 60 | if self.shuffle: 61 | X, y = self._shuffle(X, y) 62 | cost = [] 63 | for xi, target in zip(X, y): 64 | cost.append(self._update_weights(xi, target)) 65 | avg_cost = sum(cost)/len(y) 66 | self.cost_.append(avg_cost) 67 | return self 68 | 69 | def partial_fit(self, X, y): 70 | """Fit training data without reinitializing the weights""" 71 | if not self.w_initialized: 72 | self._initialize_weights(X.shape[1]) 73 | if y.ravel().shape[0] > 1: 74 | for xi, target in zip(X, y): 75 | self._update_weights(xi, target) 76 | else: 77 | self._update_weights(X, y) 78 | return self 79 | 80 | def _shuffle(self, X, y): 81 | """Shuffle training data""" 82 | r = np.random.permutation(len(y)) 83 | return X[r], y[r] 84 | 85 | def _initialize_weights(self, m): 86 | """Initialize weights to zeros""" 87 | self.w_ = np.zeros(1 + m) 88 | self.w_initialized = True 89 | 90 | def _update_weights(self, xi, target): 91 | """Apply Adaline learning rule to update the weights""" 92 | output = self.net_input(xi) 93 | error = (target - output) 94 | self.w_[1:] += self.eta * xi.dot(error) 95 | error = (target - output) 96 | self.w_[1:] += self.eta * xi.dot(error) 97 | self.w_[0] += self.eta * error 98 | cost = 0.5 * error**2 99 | return cost 100 | 101 | def net_input(self, X): 102 | """Calculate net input""" 103 | return np.dot(X, self.w_[1:]) + self.w_[0] 104 | 105 | def activation(self, X): 106 | """Compute linear activation""" 107 | return self.net_input(X) 108 | 109 | def predict(self, X): 110 | """Return class label after unit step""" 111 | return np.where(self.activation(X) >= 0.0, 1, -1) 112 | 113 | 114 | def plot_decision_regions(X, y, classifier, resolution=0.02): 115 | # setup marker generator and color map 116 | markers = ('s', 'x', 'o', '^', 'v') 117 | colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan') 118 | cmap = ListedColormap(colors[:len(np.unique(y))]) 119 | 120 | # plot the decision surface 121 | x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1 122 | x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1 123 | xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), 124 | np.arange(x2_min, x2_max, resolution)) 125 | Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T) 126 | Z = Z.reshape(xx1.shape) 127 | plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap) 128 | plt.xlim(xx1.min(), xx1.max()) 129 | plt.ylim(xx2.min(), xx2.max()) 130 | 131 | # plot class samples 132 | for idx, cl in enumerate(np.unique(y)): 133 | plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1], 134 | alpha=0.8, c=cmap(idx), 135 | marker=markers[idx], label=cl) 136 | 137 | 138 | if __name__ == "__main__": 139 | df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None) 140 | y = df.iloc[0:100, 4].values 141 | y = np.where(y == 'Iris-setosa', -1, 1) 142 | X = df.iloc[0:100, [0, 2]].values 143 | X_std = np.copy(X) 144 | X_std[:,0] = (X[:,0] - X[:,0].mean()) / X[:,0].std() 145 | X_std[:,1] = (X[:,1] - X[:,1].mean()) / X[:,1].std() 146 | ada = AdalineSGD(n_iter=15, eta=0.01, random_state=1) 147 | ada.fit(X_std, y) 148 | plot_decision_regions(X_std, y, classifier=ada) 149 | plt.title('Adaline - Stochastic Gradient Descent') 150 | plt.xlabel('sepal length [standardized]') 151 | plt.ylabel('petal length [standardized]') 152 | plt.legend(loc='upper left') 153 | plt.show() 154 | plt.plot(range(1, len(ada.cost_) + 1), ada.cost_, marker='o') 155 | plt.xlabel('Epochs') 156 | plt.ylabel('Average Cost') 157 | plt.show() 158 | 159 | -------------------------------------------------------------------------------- /ch2-perceptron.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | from matplotlib.colors import ListedColormap 5 | 6 | class Perceptron(object): 7 | """Perceptron classifier. 8 | 9 | Parameters 10 | ------------ 11 | eta : float 12 | Learning rate (between 0.0 and 1.0) 13 | n_iter : int 14 | Passes over the training dataset. 15 | 16 | Attributes 17 | ----------- 18 | w_ : 1d-array 19 | Weights after fitting. 20 | errors_ : list 21 | Number of misclassifications in every epoch. 22 | 23 | """ 24 | def __init__(self, eta=0.01, n_iter=10): 25 | self.eta = eta 26 | self.n_iter = n_iter 27 | 28 | def fit(self, X, y): 29 | """Fit training data. 30 | 31 | Parameters 32 | ---------- 33 | X : {array-like}, shape = [n_samples, n_features] 34 | Training vectors, where n_samples is the number of samples and 35 | n_features is the number of features. 36 | y : array-like, shape = [n_samples] 37 | Target values. 38 | 39 | Returns 40 | ------- 41 | self : object 42 | 43 | """ 44 | self.w_ = np.zeros(1 + X.shape[1]) 45 | self.errors_ = [] 46 | 47 | for _ in range(self.n_iter): 48 | errors = 0 49 | for xi, target in zip(X, y): 50 | update = self.eta * (target - self.predict(xi)) 51 | self.w_[1:] += update * xi 52 | self.w_[0] += update 53 | errors += int(update != 0.0) 54 | self.errors_.append(errors) 55 | return self 56 | 57 | def net_input(self, X): 58 | """Calculate net input""" 59 | return np.dot(X, self.w_[1:]) + self.w_[0] 60 | 61 | def predict(self, X): 62 | """Return class label after unit step""" 63 | return np.where(self.net_input(X) >= 0.0, 1, -1) 64 | 65 | 66 | def plot_decision_regions(X, y, classifier, resolution=0.02): 67 | # setup marker generator and color map 68 | markers = ('s', 'x', 'o', '^', 'v') 69 | colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan') 70 | cmap = ListedColormap(colors[:len(np.unique(y))]) 71 | 72 | # plot the decision surface 73 | x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1 74 | x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1 75 | xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), 76 | np.arange(x2_min, x2_max, resolution)) 77 | Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T) 78 | Z = Z.reshape(xx1.shape) 79 | plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap) 80 | plt.xlim(xx1.min(), xx1.max()) 81 | plt.ylim(xx2.min(), xx2.max()) 82 | 83 | # plot class samples 84 | for idx, cl in enumerate(np.unique(y)): 85 | plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1], 86 | alpha=0.8, c=cmap(idx), 87 | marker=markers[idx], label=cl) 88 | 89 | 90 | if __name__ == "__main__": 91 | 92 | df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None) 93 | y = df.iloc[0:100, 4].values 94 | y = np.where(y == 'Iris-setosa', -1, 1) 95 | X = df.iloc[0:100, [0, 2]].values 96 | plt.scatter(X[:50, 0], X[:50, 1],color='red', marker='o', label='setosa') 97 | plt.scatter(X[50:100, 0], X[50:100, 1],color='blue', marker='x', label='versicolor') 98 | plt.xlabel('sepal length') 99 | plt.ylabel('petal length') 100 | plt.legend(loc='upper left') 101 | plt.show() 102 | 103 | ppn = Perceptron(eta=0.1, n_iter=10) 104 | ppn.fit(X, y) 105 | plt.plot(range(1, len(ppn.errors_) + 1), ppn.errors_,marker='o') 106 | plt.xlabel('Epochs') 107 | plt.ylabel('Number of misclassifications') 108 | plt.show() 109 | 110 | plot_decision_regions(X, y, classifier=ppn) 111 | plt.xlabel('sepal length [cm]') 112 | plt.ylabel('petal length [cm]') 113 | plt.legend(loc='upper left') 114 | plt.show() -------------------------------------------------------------------------------- /ch3-SVM-Kernel-on-Iris.py: -------------------------------------------------------------------------------- 1 | from matplotlib.colors import ListedColormap 2 | import matplotlib.pyplot as plt 3 | import warnings 4 | 5 | def versiontuple(v): 6 | return tuple(map(int, (v.split(".")))) 7 | 8 | def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02): 9 | 10 | # setup marker generator and color map 11 | markers = ('s', 'x', 'o', '^', 'v') 12 | colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan') 13 | cmap = ListedColormap(colors[:len(np.unique(y))]) 14 | 15 | # plot the decision surface 16 | x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1 17 | x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1 18 | xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), 19 | np.arange(x2_min, x2_max, resolution)) 20 | Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T) 21 | Z = Z.reshape(xx1.shape) 22 | plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap) 23 | plt.xlim(xx1.min(), xx1.max()) 24 | plt.ylim(xx2.min(), xx2.max()) 25 | 26 | for idx, cl in enumerate(np.unique(y)): 27 | plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1], 28 | alpha=0.8, c=cmap(idx), 29 | marker=markers[idx], label=cl) 30 | 31 | # highlight test samples 32 | if test_idx: 33 | # plot all samples 34 | if not versiontuple(np.__version__) >= versiontuple('1.9.0'): 35 | X_test, y_test = X[list(test_idx), :], y[list(test_idx)] 36 | warnings.warn('Please update to NumPy 1.9.0 or newer') 37 | else: 38 | X_test, y_test = X[test_idx, :], y[test_idx] 39 | 40 | plt.scatter(X_test[:, 0], 41 | X_test[:, 1], 42 | c='', 43 | alpha=1.0, 44 | linewidths=1, 45 | marker='o', 46 | s=55, label='test set') 47 | 48 | 49 | if __name__ == "__main__": 50 | 51 | from sklearn import datasets 52 | import numpy as np 53 | 54 | # Loading Iris dataset 55 | iris = datasets.load_iris() 56 | X = iris.data[:, [2, 3]] 57 | y = iris.target 58 | print('Class labels:', np.unique(y)) 59 | 60 | # Splitting data into 70% training and 30% test data: 61 | from sklearn.cross_validation import train_test_split 62 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) 63 | 64 | # Standardizing the features: 65 | from sklearn.preprocessing import StandardScaler 66 | sc = StandardScaler() 67 | sc.fit(X_train) 68 | X_train_std = sc.transform(X_train) 69 | X_test_std = sc.transform(X_test) 70 | 71 | X_combined_std = np.vstack((X_train_std, X_test_std)) 72 | y_combined = np.hstack((y_train, y_test)) 73 | 74 | from sklearn.svm import SVC 75 | 76 | # Use low Gamma 77 | svm = SVC(kernel='rbf', random_state=0, gamma=0.2, C=1.0) 78 | svm.fit(X_train_std, y_train) 79 | plot_decision_regions(X_combined_std, y_combined, 80 | classifier=svm, test_idx=range(105,150)) 81 | plt.xlabel('petal length [standardized]') 82 | plt.ylabel('petal width [standardized]') 83 | plt.legend(loc='upper left') 84 | plt.tight_layout() 85 | # plt.savefig('./figures/support_vector_machine_rbf_iris_1.png', dpi=300) 86 | plt.show() 87 | 88 | # Use high Gamma 89 | svm = SVC(kernel='rbf', random_state=0, gamma=100.0, C=1.0) 90 | svm.fit(X_train_std, y_train) 91 | plot_decision_regions(X_combined_std, y_combined, 92 | classifier=svm, test_idx=range(105, 150)) 93 | plt.xlabel('petal length [standardized]') 94 | plt.ylabel('petal width [standardized]') 95 | plt.legend(loc='upper left') 96 | plt.tight_layout() 97 | # plt.savefig('./figures/support_vector_machine_rbf_iris_2.png', dpi=300) 98 | plt.show() 99 | 100 | 101 | -------------------------------------------------------------------------------- /ch3-SVM-Kernel.py: -------------------------------------------------------------------------------- 1 | from matplotlib.colors import ListedColormap 2 | import matplotlib.pyplot as plt 3 | import warnings 4 | import numpy as np 5 | 6 | 7 | def versiontuple(v): 8 | return tuple(map(int, (v.split(".")))) 9 | 10 | 11 | def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02): 12 | 13 | # setup marker generator and color map 14 | markers = ('s', 'x', 'o', '^', 'v') 15 | colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan') 16 | cmap = ListedColormap(colors[:len(np.unique(y))]) 17 | 18 | # plot the decision surface 19 | x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1 20 | x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1 21 | xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), 22 | np.arange(x2_min, x2_max, resolution)) 23 | Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T) 24 | Z = Z.reshape(xx1.shape) 25 | plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap) 26 | plt.xlim(xx1.min(), xx1.max()) 27 | plt.ylim(xx2.min(), xx2.max()) 28 | 29 | for idx, cl in enumerate(np.unique(y)): 30 | plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1], 31 | alpha=0.8, c=cmap(idx), 32 | marker=markers[idx], label=cl) 33 | 34 | # highlight test samples 35 | if test_idx: 36 | # plot all samples 37 | if not versiontuple(np.__version__) >= versiontuple('1.9.0'): 38 | X_test, y_test = X[list(test_idx), :], y[list(test_idx)] 39 | warnings.warn('Please update to NumPy 1.9.0 or newer') 40 | else: 41 | X_test, y_test = X[test_idx, :], y[test_idx] 42 | 43 | plt.scatter(X_test[:, 0], 44 | X_test[:, 1], 45 | c='', 46 | alpha=1.0, 47 | linewidths=1, 48 | marker='o', 49 | s=55, label='test set') 50 | 51 | 52 | 53 | if __name__ == "__main__": 54 | 55 | np.random.seed(0) 56 | X_xor = np.random.randn(200, 2) 57 | y_xor = np.logical_xor(X_xor[:, 0] > 0, X_xor[:, 1] > 0) 58 | y_xor = np.where(y_xor, 1, -1) 59 | plt.scatter(X_xor[y_xor == 1, 0], X_xor[y_xor == 1, 1], c='b', marker='x', label='1') 60 | plt.scatter(X_xor[y_xor == -1, 0], X_xor[y_xor == -1, 1], c='r', marker='s', label='-1') 61 | plt.xlim([-3, 3]) 62 | plt.ylim([-3, 3]) 63 | plt.legend(loc='best') 64 | plt.tight_layout() 65 | # plt.savefig('./figures/xor.png', dpi=300) 66 | plt.show() 67 | 68 | from sklearn.svm import SVC 69 | svm = SVC(kernel='rbf', random_state=0, gamma=0.10, C=10.0) 70 | svm.fit(X_xor, y_xor) 71 | plot_decision_regions(X_xor, y_xor, classifier=svm) 72 | plt.legend(loc='upper left') 73 | plt.tight_layout() 74 | # plt.savefig('./figures/support_vector_machine_rbf_xor.png', dpi=300) 75 | plt.show() -------------------------------------------------------------------------------- /ch3-SVM.py: -------------------------------------------------------------------------------- 1 | from matplotlib.colors import ListedColormap 2 | import matplotlib.pyplot as plt 3 | import warnings 4 | 5 | 6 | def versiontuple(v): 7 | return tuple(map(int, (v.split(".")))) 8 | 9 | 10 | def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02): 11 | 12 | # setup marker generator and color map 13 | markers = ('s', 'x', 'o', '^', 'v') 14 | colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan') 15 | cmap = ListedColormap(colors[:len(np.unique(y))]) 16 | 17 | # plot the decision surface 18 | x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1 19 | x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1 20 | xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), 21 | np.arange(x2_min, x2_max, resolution)) 22 | Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T) 23 | Z = Z.reshape(xx1.shape) 24 | plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap) 25 | plt.xlim(xx1.min(), xx1.max()) 26 | plt.ylim(xx2.min(), xx2.max()) 27 | 28 | for idx, cl in enumerate(np.unique(y)): 29 | plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1], 30 | alpha=0.8, c=cmap(idx), 31 | marker=markers[idx], label=cl) 32 | 33 | # highlight test samples 34 | if test_idx: 35 | # plot all samples 36 | if not versiontuple(np.__version__) >= versiontuple('1.9.0'): 37 | X_test, y_test = X[list(test_idx), :], y[list(test_idx)] 38 | warnings.warn('Please update to NumPy 1.9.0 or newer') 39 | else: 40 | X_test, y_test = X[test_idx, :], y[test_idx] 41 | 42 | plt.scatter(X_test[:, 0], 43 | X_test[:, 1], 44 | c='', 45 | alpha=1.0, 46 | linewidths=1, 47 | marker='o', 48 | s=55, label='test set') 49 | 50 | 51 | if __name__ == "__main__": 52 | 53 | from sklearn import datasets 54 | import numpy as np 55 | 56 | # Loading Iris dataset 57 | iris = datasets.load_iris() 58 | X = iris.data[:, [2, 3]] 59 | y = iris.target 60 | print('Class labels:', np.unique(y)) 61 | 62 | # Splitting data into 70% training and 30% test data: 63 | from sklearn.cross_validation import train_test_split 64 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) 65 | 66 | # Standardizing the features: 67 | from sklearn.preprocessing import StandardScaler 68 | sc = StandardScaler() 69 | sc.fit(X_train) 70 | X_train_std = sc.transform(X_train) 71 | X_test_std = sc.transform(X_test) 72 | 73 | from sklearn.svm import SVC 74 | svm = SVC(kernel='linear', C=1.0, random_state=0) 75 | svm.fit(X_train_std, y_train) 76 | 77 | X_combined_std = np.vstack((X_train_std, X_test_std)) 78 | y_combined = np.hstack((y_train, y_test)) 79 | 80 | plot_decision_regions(X_combined_std, y_combined,classifier=svm, test_idx=range(105, 150)) 81 | plt.xlabel('petal length [standardized]') 82 | plt.ylabel('petal width [standardized]') 83 | plt.legend(loc='upper left') 84 | plt.tight_layout() 85 | # plt.savefig('./figures/support_vector_machine_linear.png', dpi=300) 86 | plt.show() 87 | 88 | 89 | ''' 90 | SGDClassifier can be used for online learning by calling partial_fit method 91 | 92 | from sklearn.linear_model import SGDClassifier 93 | ppn = SGDClassifier(loss='perceptron') 94 | lr = SGDClassifier(loss='log') 95 | svm = SGDClassifier(loss='hinge') 96 | 97 | ''' -------------------------------------------------------------------------------- /ch3-decisionTree-RandomForests.py: -------------------------------------------------------------------------------- 1 | from matplotlib.colors import ListedColormap 2 | import matplotlib.pyplot as plt 3 | import warnings 4 | 5 | def versiontuple(v): 6 | return tuple(map(int, (v.split(".")))) 7 | 8 | 9 | def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02): 10 | 11 | # setup marker generator and color map 12 | markers = ('s', 'x', 'o', '^', 'v') 13 | colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan') 14 | cmap = ListedColormap(colors[:len(np.unique(y))]) 15 | 16 | # plot the decision surface 17 | x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1 18 | x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1 19 | xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), 20 | np.arange(x2_min, x2_max, resolution)) 21 | Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T) 22 | Z = Z.reshape(xx1.shape) 23 | plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap) 24 | plt.xlim(xx1.min(), xx1.max()) 25 | plt.ylim(xx2.min(), xx2.max()) 26 | 27 | for idx, cl in enumerate(np.unique(y)): 28 | plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1], 29 | alpha=0.8, c=cmap(idx), 30 | marker=markers[idx], label=cl) 31 | 32 | # highlight test samples 33 | if test_idx: 34 | # plot all samples 35 | if not versiontuple(np.__version__) >= versiontuple('1.9.0'): 36 | X_test, y_test = X[list(test_idx), :], y[list(test_idx)] 37 | warnings.warn('Please update to NumPy 1.9.0 or newer') 38 | else: 39 | X_test, y_test = X[test_idx, :], y[test_idx] 40 | 41 | plt.scatter(X_test[:, 0], 42 | X_test[:, 1], 43 | c='', 44 | alpha=1.0, 45 | linewidths=1, 46 | marker='o', 47 | s=55, label='test set') 48 | 49 | 50 | if __name__ == "__main__": 51 | 52 | from sklearn import datasets 53 | import numpy as np 54 | 55 | # Loading Iris dataset 56 | iris = datasets.load_iris() 57 | X = iris.data[:, [2, 3]] 58 | y = iris.target 59 | print('Class labels:', np.unique(y)) 60 | 61 | # Splitting data into 70% training and 30% test data: 62 | from sklearn.cross_validation import train_test_split 63 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) 64 | 65 | X_combined = np.vstack((X_train, X_test)) 66 | y_combined = np.hstack((y_train, y_test)) 67 | 68 | # Note: No feature scaling required 69 | 70 | from sklearn.ensemble import RandomForestClassifier 71 | 72 | forest = RandomForestClassifier(criterion='entropy', 73 | n_estimators=10, 74 | random_state=1, 75 | n_jobs=2) 76 | forest.fit(X_train, y_train) 77 | 78 | plot_decision_regions(X_combined, y_combined, classifier=forest, test_idx=range(105,150)) 79 | plt.xlabel('petal length [cm]') 80 | plt.ylabel('petal width [cm]') 81 | plt.legend(loc='upper left') 82 | plt.tight_layout() 83 | # plt.savefig('./figures/random_forest.png', dpi=300) 84 | plt.show() -------------------------------------------------------------------------------- /ch3-decisionTrees-InformationGain.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | 5 | def gini(p): 6 | return (p)*(1 - (p)) + (1-p)*(1 - (1-p)) 7 | 8 | 9 | def entropy(p): 10 | return - p*np.log2(p) - (1 - p)*np.log2((1 - p)) 11 | 12 | 13 | def error(p): 14 | return 1 - np.max([p, 1 - p]) 15 | 16 | x = np.arange(0.0, 1.0, 0.01) 17 | 18 | ent = [entropy(p) if p != 0 else None for p in x] 19 | sc_ent = [e*0.5 if e else None for e in ent] 20 | err = [error(i) for i in x] 21 | 22 | fig = plt.figure() 23 | ax = plt.subplot(111) 24 | for i, lab, ls, c, in zip([ent, sc_ent, gini(x), err], 25 | ['Entropy', 'Entropy (scaled)', 26 | 'Gini Impurity', 'Misclassification Error'], 27 | ['-', '-', '--', '-.'], 28 | ['black', 'lightgray', 'red', 'green', 'cyan']): 29 | line = ax.plot(x, i, label=lab, linestyle=ls, lw=2, color=c) 30 | 31 | ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.15), 32 | ncol=3, fancybox=True, shadow=False) 33 | 34 | ax.axhline(y=0.5, linewidth=1, color='k', linestyle='--') 35 | ax.axhline(y=1.0, linewidth=1, color='k', linestyle='--') 36 | plt.ylim([0, 1.1]) 37 | plt.xlabel('p(i=1)') 38 | plt.ylabel('Impurity Index') 39 | plt.tight_layout() 40 | #plt.savefig('./figures/impurity.png', dpi=300, bbox_inches='tight') 41 | plt.show() -------------------------------------------------------------------------------- /ch3-decisionTrees.py: -------------------------------------------------------------------------------- 1 | from matplotlib.colors import ListedColormap 2 | import matplotlib.pyplot as plt 3 | import warnings 4 | 5 | def versiontuple(v): 6 | return tuple(map(int, (v.split(".")))) 7 | 8 | 9 | def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02): 10 | 11 | # setup marker generator and color map 12 | markers = ('s', 'x', 'o', '^', 'v') 13 | colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan') 14 | cmap = ListedColormap(colors[:len(np.unique(y))]) 15 | 16 | # plot the decision surface 17 | x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1 18 | x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1 19 | xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), 20 | np.arange(x2_min, x2_max, resolution)) 21 | Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T) 22 | Z = Z.reshape(xx1.shape) 23 | plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap) 24 | plt.xlim(xx1.min(), xx1.max()) 25 | plt.ylim(xx2.min(), xx2.max()) 26 | 27 | for idx, cl in enumerate(np.unique(y)): 28 | plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1], 29 | alpha=0.8, c=cmap(idx), 30 | marker=markers[idx], label=cl) 31 | 32 | # highlight test samples 33 | if test_idx: 34 | # plot all samples 35 | if not versiontuple(np.__version__) >= versiontuple('1.9.0'): 36 | X_test, y_test = X[list(test_idx), :], y[list(test_idx)] 37 | warnings.warn('Please update to NumPy 1.9.0 or newer') 38 | else: 39 | X_test, y_test = X[test_idx, :], y[test_idx] 40 | 41 | plt.scatter(X_test[:, 0], 42 | X_test[:, 1], 43 | c='', 44 | alpha=1.0, 45 | linewidths=1, 46 | marker='o', 47 | s=55, label='test set') 48 | 49 | 50 | if __name__ == "__main__": 51 | 52 | from sklearn import datasets 53 | import numpy as np 54 | 55 | # Loading Iris dataset 56 | iris = datasets.load_iris() 57 | X = iris.data[:, [2, 3]] 58 | y = iris.target 59 | print('Class labels:', np.unique(y)) 60 | 61 | # Splitting data into 70% training and 30% test data: 62 | from sklearn.cross_validation import train_test_split 63 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) 64 | 65 | # Note: No feature scaling required 66 | 67 | from sklearn.tree import DecisionTreeClassifier 68 | tree = DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=0) 69 | tree.fit(X_train, y_train) 70 | 71 | X_combined = np.vstack((X_train, X_test)) 72 | y_combined = np.hstack((y_train, y_test)) 73 | plot_decision_regions(X_combined, y_combined, classifier=tree, test_idx=range(105,150)) 74 | plt.xlabel('petal length [cm]') 75 | plt.ylabel('petal width [cm]') 76 | plt.legend(loc='upper left') 77 | plt.tight_layout() 78 | # plt.savefig('./figures/decision_tree_decision.png', dpi=300) 79 | plt.show() 80 | 81 | 82 | # export decision tree to an image 83 | from sklearn.tree import export_graphviz 84 | export_graphviz(tree, out_file='/tmp/tree.dot', feature_names=['petal length', 'petal width']) 85 | # Run below on your computer to create png from .dot file (need GraphViz installed) 86 | # “dot -Tpng /tmp/tree.dot -o /tmp/tree.png” 87 | -------------------------------------------------------------------------------- /ch3-k-nearest-neighbors.py: -------------------------------------------------------------------------------- 1 | from matplotlib.colors import ListedColormap 2 | import matplotlib.pyplot as plt 3 | import warnings 4 | 5 | def versiontuple(v): 6 | return tuple(map(int, (v.split(".")))) 7 | 8 | 9 | def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02): 10 | 11 | # setup marker generator and color map 12 | markers = ('s', 'x', 'o', '^', 'v') 13 | colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan') 14 | cmap = ListedColormap(colors[:len(np.unique(y))]) 15 | 16 | # plot the decision surface 17 | x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1 18 | x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1 19 | xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), 20 | np.arange(x2_min, x2_max, resolution)) 21 | Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T) 22 | Z = Z.reshape(xx1.shape) 23 | plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap) 24 | plt.xlim(xx1.min(), xx1.max()) 25 | plt.ylim(xx2.min(), xx2.max()) 26 | 27 | for idx, cl in enumerate(np.unique(y)): 28 | plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1], 29 | alpha=0.8, c=cmap(idx), 30 | marker=markers[idx], label=cl) 31 | 32 | # highlight test samples 33 | if test_idx: 34 | # plot all samples 35 | if not versiontuple(np.__version__) >= versiontuple('1.9.0'): 36 | X_test, y_test = X[list(test_idx), :], y[list(test_idx)] 37 | warnings.warn('Please update to NumPy 1.9.0 or newer') 38 | else: 39 | X_test, y_test = X[test_idx, :], y[test_idx] 40 | 41 | plt.scatter(X_test[:, 0], 42 | X_test[:, 1], 43 | c='', 44 | alpha=1.0, 45 | linewidths=1, 46 | marker='o', 47 | s=55, label='test set') 48 | 49 | 50 | if __name__ == "__main__": 51 | 52 | from sklearn import datasets 53 | import numpy as np 54 | 55 | # Loading Iris dataset 56 | iris = datasets.load_iris() 57 | X = iris.data[:, [2, 3]] 58 | y = iris.target 59 | print('Class labels:', np.unique(y)) 60 | 61 | # Splitting data into 70% training and 30% test data: 62 | from sklearn.cross_validation import train_test_split 63 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) 64 | 65 | 66 | # Standardizing the features: 67 | from sklearn.preprocessing import StandardScaler 68 | sc = StandardScaler() 69 | sc.fit(X_train) 70 | X_train_std = sc.transform(X_train) 71 | X_test_std = sc.transform(X_test) 72 | 73 | X_combined_std = np.vstack((X_train_std, X_test_std)) 74 | y_combined = np.hstack((y_train, y_test)) 75 | 76 | from sklearn.neighbors import KNeighborsClassifier 77 | 78 | knn = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski') 79 | knn.fit(X_train_std, y_train) 80 | 81 | plot_decision_regions(X_combined_std, y_combined, classifier=knn, test_idx=range(105,150)) 82 | 83 | plt.xlabel('petal length [standardized]') 84 | plt.ylabel('petal width [standardized]') 85 | plt.legend(loc='upper left') 86 | plt.tight_layout() 87 | # plt.savefig('./figures/k_nearest_neighbors.png', dpi=300) 88 | plt.show() 89 | 90 | -------------------------------------------------------------------------------- /ch3-logisticRegression.py: -------------------------------------------------------------------------------- 1 | from matplotlib.colors import ListedColormap 2 | import matplotlib.pyplot as plt 3 | import warnings 4 | from sklearn.linear_model import LogisticRegression 5 | 6 | def versiontuple(v): 7 | return tuple(map(int, (v.split(".")))) 8 | 9 | 10 | def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02): 11 | 12 | # setup marker generator and color map 13 | markers = ('s', 'x', 'o', '^', 'v') 14 | colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan') 15 | cmap = ListedColormap(colors[:len(np.unique(y))]) 16 | 17 | # plot the decision surface 18 | x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1 19 | x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1 20 | xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), 21 | np.arange(x2_min, x2_max, resolution)) 22 | Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T) 23 | Z = Z.reshape(xx1.shape) 24 | plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap) 25 | plt.xlim(xx1.min(), xx1.max()) 26 | plt.ylim(xx2.min(), xx2.max()) 27 | 28 | for idx, cl in enumerate(np.unique(y)): 29 | plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1], 30 | alpha=0.8, c=cmap(idx), 31 | marker=markers[idx], label=cl) 32 | 33 | # highlight test samples 34 | if test_idx: 35 | # plot all samples 36 | if not versiontuple(np.__version__) >= versiontuple('1.9.0'): 37 | X_test, y_test = X[list(test_idx), :], y[list(test_idx)] 38 | warnings.warn('Please update to NumPy 1.9.0 or newer') 39 | else: 40 | X_test, y_test = X[test_idx, :], y[test_idx] 41 | 42 | plt.scatter(X_test[:, 0], 43 | X_test[:, 1], 44 | c='', 45 | alpha=1.0, 46 | linewidths=1, 47 | marker='o', 48 | s=55, label='test set') 49 | 50 | 51 | if __name__ == "__main__": 52 | 53 | from sklearn import datasets 54 | import numpy as np 55 | 56 | # Loading Iris dataset 57 | iris = datasets.load_iris() 58 | X = iris.data[:, [2, 3]] 59 | y = iris.target 60 | print('Class labels:', np.unique(y)) 61 | 62 | # Splitting data into 70% training and 30% test data: 63 | from sklearn.cross_validation import train_test_split 64 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) 65 | 66 | # Standardizing the features: 67 | from sklearn.preprocessing import StandardScaler 68 | sc = StandardScaler() 69 | sc.fit(X_train) 70 | X_train_std = sc.transform(X_train) 71 | X_test_std = sc.transform(X_test) 72 | 73 | lr = LogisticRegression(C=1000.0, random_state=0) 74 | lr.fit(X_train_std, y_train) 75 | 76 | X_combined_std = np.vstack((X_train_std, X_test_std)) 77 | y_combined = np.hstack((y_train, y_test)) 78 | 79 | plot_decision_regions(X_combined_std, y_combined, classifier=lr, test_idx=range(105,150)) 80 | plt.xlabel('petal length [standardized]') 81 | plt.ylabel('petal width [standardized]') 82 | plt.legend(loc='upper left') 83 | plt.show() 84 | 85 | # Display probability of x0 86 | lr.predict_proba(X_test_std[0,:]) -------------------------------------------------------------------------------- /ch3-logisticregression-cost.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | def sigmoid(z): 5 | return 1.0 / (1.0 + np.exp(-z)) 6 | 7 | def cost_1(z): 8 | return - np.log(sigmoid(z)) 9 | 10 | def cost_0(z): 11 | return - np.log(1 - sigmoid(z)) 12 | 13 | z = np.arange(-10, 10, 0.1) 14 | phi_z = sigmoid(z) 15 | 16 | c1 = [cost_1(x) for x in z] 17 | plt.plot(phi_z, c1, label='J(w) if y=1') 18 | 19 | c0 = [cost_0(x) for x in z] 20 | plt.plot(phi_z, c0, linestyle='--', label='J(w) if y=0') 21 | 22 | plt.ylim(0.0, 5.1) 23 | plt.xlim([0, 1]) 24 | plt.xlabel('$\phi$(z)') 25 | plt.ylabel('J(w)') 26 | plt.legend(loc='best') 27 | plt.tight_layout() 28 | # plt.savefig('./figures/log_cost.png', dpi=300) 29 | plt.show() -------------------------------------------------------------------------------- /ch3-scikit-learn-perceptron.py: -------------------------------------------------------------------------------- 1 | from matplotlib.colors import ListedColormap 2 | import matplotlib.pyplot as plt 3 | import warnings 4 | 5 | def versiontuple(v): 6 | return tuple(map(int, (v.split(".")))) 7 | 8 | 9 | def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02): 10 | 11 | # setup marker generator and color map 12 | markers = ('s', 'x', 'o', '^', 'v') 13 | colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan') 14 | cmap = ListedColormap(colors[:len(np.unique(y))]) 15 | 16 | # plot the decision surface 17 | x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1 18 | x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1 19 | xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), 20 | np.arange(x2_min, x2_max, resolution)) 21 | Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T) 22 | Z = Z.reshape(xx1.shape) 23 | plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap) 24 | plt.xlim(xx1.min(), xx1.max()) 25 | plt.ylim(xx2.min(), xx2.max()) 26 | 27 | for idx, cl in enumerate(np.unique(y)): 28 | plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1], 29 | alpha=0.8, c=cmap(idx), 30 | marker=markers[idx], label=cl) 31 | 32 | # highlight test samples 33 | if test_idx: 34 | # plot all samples 35 | if not versiontuple(np.__version__) >= versiontuple('1.9.0'): 36 | X_test, y_test = X[list(test_idx), :], y[list(test_idx)] 37 | warnings.warn('Please update to NumPy 1.9.0 or newer') 38 | else: 39 | X_test, y_test = X[test_idx, :], y[test_idx] 40 | 41 | plt.scatter(X_test[:, 0], 42 | X_test[:, 1], 43 | c='', 44 | alpha=1.0, 45 | linewidths=1, 46 | marker='o', 47 | s=55, label='test set') 48 | 49 | 50 | if __name__ == "__main__": 51 | 52 | from sklearn import datasets 53 | import numpy as np 54 | 55 | # Loading Iris dataset 56 | iris = datasets.load_iris() 57 | X = iris.data[:, [2, 3]] 58 | y = iris.target 59 | print('Class labels:', np.unique(y)) 60 | 61 | # Splitting data into 70% training and 30% test data: 62 | from sklearn.cross_validation import train_test_split 63 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) 64 | 65 | # Standardizing the features: 66 | from sklearn.preprocessing import StandardScaler 67 | sc = StandardScaler() 68 | sc.fit(X_train) 69 | X_train_std = sc.transform(X_train) 70 | X_test_std = sc.transform(X_test) 71 | 72 | # Fit data to model 73 | from sklearn.linear_model import Perceptron 74 | ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0) 75 | ppn.fit(X_train_std, y_train) 76 | 77 | # Predict 78 | y_pred = ppn.predict(X_test_std) 79 | print('Misclassified samples: %d' % (y_test != y_pred).sum()) 80 | 81 | # Accuracy 82 | from sklearn.metrics import accuracy_score 83 | print('Accuracy: %.2f' % accuracy_score(y_test, y_pred)) 84 | 85 | X_combined_std = np.vstack((X_train_std, X_test_std)) 86 | y_combined = np.hstack((y_train, y_test)) 87 | 88 | plot_decision_regions(X=X_combined_std, y=y_combined, classifier=ppn, test_idx=range(105, 150)) 89 | plt.xlabel('petal length [standardized]') 90 | plt.ylabel('petal width [standardized]') 91 | plt.legend(loc='upper left') 92 | 93 | plt.tight_layout() 94 | # plt.savefig('./figures/iris_perceptron_scikit.png', dpi=300) 95 | plt.show() 96 | -------------------------------------------------------------------------------- /ch3-sigmoid.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | 5 | def sigmoid(z): 6 | return 1.0 / (1.0 + np.exp(-z)) 7 | 8 | z = np.arange(-7, 7, 0.1) 9 | phi_z = sigmoid(z) 10 | 11 | plt.plot(z, phi_z) 12 | plt.axvline(0.0, color='k') 13 | plt.ylim(-0.1, 1.1) 14 | plt.xlabel('z') 15 | plt.ylabel('$\phi (z)$') 16 | 17 | # y axis ticks and gridline 18 | plt.yticks([0.0, 0.5, 1.0]) 19 | ax = plt.gca() 20 | ax.yaxis.grid(True) 21 | 22 | plt.tight_layout() 23 | # plt.savefig('./figures/sigmoid.png', dpi=300) 24 | plt.show() -------------------------------------------------------------------------------- /ch4-categoricalData.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | df = pd.DataFrame([ 3 | ['green', 'M', 10.1, 'class1'], 4 | ['red', 'L', 13.5, 'class2'], 5 | ['blue', 'XL', 15.3, 'class1']]) 6 | 7 | df.columns = ['color', 'size', 'price', 'classlabel'] 8 | df 9 | 10 | # convert ordinal data: sizes to integers 11 | size_mapping = { 12 | 'XL': 3, 13 | 'L': 2, 14 | 'M': 1} 15 | 16 | df['size'] = df['size'].map(size_mapping) 17 | df 18 | inv_size_mapping = {v: k for k, v in size_mapping.items()} 19 | df['size'].map(inv_size_mapping) 20 | 21 | # class labels 22 | import numpy as np 23 | class_mapping = {label:idx for idx,label in enumerate(np.unique(df['classlabel']))} 24 | class_mapping 25 | df['classlabel'] = df['classlabel'].map(class_mapping) 26 | df 27 | inv_class_mapping = {v: k for k, v in class_mapping.items()} 28 | df['classlabel'] = df['classlabel'].map(inv_class_mapping) 29 | df 30 | 31 | 32 | # alternative way for class labels 33 | from sklearn.preprocessing import LabelEncoder 34 | class_le = LabelEncoder() 35 | y = class_le.fit_transform(df['classlabel'].values) 36 | y 37 | class_le.inverse_transform(y) 38 | 39 | 40 | 41 | X = df[['color', 'size', 'price']].values 42 | # Convert nominal data: color to numerical 43 | color_le = LabelEncoder() 44 | X[:, 0] = color_le.fit_transform(X[:, 0]) 45 | X 46 | 47 | 48 | # One hot encoding to derive extra features from color so color integer values 49 | # don't cause algorithm issues 50 | from sklearn.preprocessing import OneHotEncoder 51 | ohe = OneHotEncoder(categorical_features=[0]) 52 | ohe.fit_transform(X).toarray() 53 | 54 | # Alternative: One hot encoding using pandas 55 | pd.get_dummies(df[['price', 'color', 'size']]) 56 | -------------------------------------------------------------------------------- /ch4-featureSelection-randomForest.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | 5 | if __name__ == "__main__": 6 | 7 | # Grab wines data set 8 | df_wine = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data', header=None) 9 | 10 | df_wine.columns = ['Class label', 'Alcohol', 'Malic acid', 'Ash', 11 | 'Alcalinity of ash', 'Magnesium', 'Total phenols', 12 | 'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins', 13 | 'Color intensity', 'Hue', 'OD280/OD315 of diluted wines', 'Proline'] 14 | 15 | from sklearn.cross_validation import train_test_split 16 | X, y = df_wine.iloc[:, 1:].values, df_wine.iloc[:, 0].values 17 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) 18 | 19 | from sklearn.preprocessing import StandardScaler 20 | stdsc = StandardScaler() 21 | X_train_std = stdsc.fit_transform(X_train) 22 | X_test_std = stdsc.transform(X_test) 23 | 24 | from sklearn.ensemble import RandomForestClassifier 25 | feat_labels = df_wine.columns[1:] 26 | forest = RandomForestClassifier(n_estimators=10000, 27 | random_state=0, 28 | n_jobs=-1) 29 | 30 | forest.fit(X_train, y_train) 31 | importances = forest.feature_importances_ 32 | 33 | indices = np.argsort(importances)[::-1] 34 | 35 | for f in range(X_train.shape[1]): 36 | print("%2d) %-*s %f" % (f + 1, 30, 37 | feat_labels[indices[f]], 38 | importances[indices[f]])) 39 | 40 | plt.title('Feature Importances') 41 | plt.bar(range(X_train.shape[1]), 42 | importances[indices], 43 | color='lightblue', 44 | align='center') 45 | 46 | plt.xticks(range(X_train.shape[1]), 47 | feat_labels[indices], rotation=90) 48 | plt.xlim([-1, X_train.shape[1]]) 49 | plt.tight_layout() 50 | #plt.savefig('./random_forest.png', dpi=300) 51 | plt.show() 52 | -------------------------------------------------------------------------------- /ch4-imputation.py: -------------------------------------------------------------------------------- 1 | from sklearn.preprocessing import Imputer 2 | import pandas as pd 3 | from io import StringIO 4 | 5 | csv_data = '''A,B,C,D 6 | 1.0,2.0,3.0,4.0 7 | 5.0,6.0,,8.0 8 | 10.0,11.0,12.0,''' 9 | 10 | # If you are using Python 2.7, you need 11 | # to convert the string to unicode: 12 | # csv_data = unicode(csv_data) 13 | 14 | df = pd.read_csv(StringIO(csv_data)) 15 | 16 | imr = Imputer(missing_values='NaN', strategy='mean', axis=0) 17 | imr = imr.fit(df) 18 | imputed_data = imr.transform(df.values) 19 | imputed_data 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /ch4-missingData.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from io import StringIO 3 | 4 | csv_data = '''A,B,C,D 5 | 1.0,2.0,3.0,4.0 6 | 5.0,6.0,,8.0 7 | 10.0,11.0,12.0,''' 8 | 9 | # If you are using Python 2.7, you need 10 | # to convert the string to unicode: 11 | # csv_data = unicode(csv_data) 12 | 13 | df = pd.read_csv(StringIO(csv_data)) 14 | 15 | # Show count of missing data 16 | df.isnull().sum() 17 | 18 | # Drop rows with missing data 19 | df.dropna() 20 | 21 | # Drop column if NaN in colum 22 | df.dropna(axis=1) 23 | 24 | # only drop rows where all columns are NaN 25 | df.dropna(how='all') 26 | 27 | # drop rows that have not at least 4 non-NaN values 28 | df.dropna(thresh=4) 29 | 30 | # only drop rows where NaN appear in specific columns (here: 'C') 31 | df.dropna(subset=['C']) 32 | 33 | -------------------------------------------------------------------------------- /ch4-partitioningData.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | # Grab wines data set 5 | df_wine = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data', header=None) 6 | 7 | df_wine.columns = ['Class label', 'Alcohol', 'Malic acid', 'Ash', 8 | 'Alcalinity of ash', 'Magnesium', 'Total phenols', 9 | 'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins', 10 | 'Color intensity', 'Hue', 'OD280/OD315 of diluted wines', 'Proline'] 11 | 12 | print('Class labels', np.unique(df_wine['Class label'])) 13 | df_wine.head() 14 | 15 | from sklearn.cross_validation import train_test_split 16 | 17 | X, y = df_wine.iloc[:, 1:].values, df_wine.iloc[:, 0].values 18 | 19 | X_train, X_test, y_train, y_test = \ 20 | train_test_split(X, y, test_size=0.3, random_state=0) 21 | 22 | 23 | # normalization 24 | from sklearn.preprocessing import MinMaxScaler 25 | mms = MinMaxScaler() 26 | X_train_norm = mms.fit_transform(X_train) 27 | X_test_norm = mms.transform(X_test) 28 | 29 | 30 | # standardization (more apt to machine learning) 31 | from sklearn.preprocessing import StandardScaler 32 | stdsc = StandardScaler() 33 | X_train_std = stdsc.fit_transform(X_train) 34 | X_test_std = stdsc.transform(X_test) 35 | 36 | from sklearn.linear_model import LogisticRegression 37 | 38 | # Using L1 regularization penatly 39 | lr = LogisticRegression(penalty='l1', C=0.1) 40 | lr.fit(X_train_std, y_train) 41 | print('Training accuracy:', lr.score(X_train_std, y_train)) 42 | print('Test accuracy:', lr.score(X_test_std, y_test)) 43 | 44 | 45 | lr.intercept_ 46 | 47 | # show weights (3 rows for three classes) 48 | lr.coef_ 49 | ''' 50 | We notice that the weight vectors are sparse, which means that they only have a 51 | few non-zero entries. As a result of the L1 regularization, which serves as a method 52 | for feature selection, we just trained a model that is robust to the potentially 53 | irrelevant features in this dataset.''' 54 | 55 | 56 | import matplotlib.pyplot as plt 57 | 58 | fig = plt.figure() 59 | ax = plt.subplot(111) 60 | 61 | colors = ['blue', 'green', 'red', 'cyan', 62 | 'magenta', 'yellow', 'black', 63 | 'pink', 'lightgreen', 'lightblue', 64 | 'gray', 'indigo', 'orange'] 65 | 66 | weights, params = [], [] 67 | for c in np.arange(-4, 6): 68 | lr = LogisticRegression(penalty='l1', C=10**c, random_state=0) 69 | lr.fit(X_train_std, y_train) 70 | weights.append(lr.coef_[1]) 71 | params.append(10**c) 72 | 73 | weights = np.array(weights) 74 | 75 | for column, color in zip(range(weights.shape[1]), colors): 76 | plt.plot(params, weights[:, column], 77 | label=df_wine.columns[column+1], 78 | color=color) 79 | plt.axhline(0, color='black', linestyle='--', linewidth=3) 80 | plt.xlim([10**(-5), 10**5]) 81 | plt.ylabel('weight coefficient') 82 | plt.xlabel('C') 83 | plt.xscale('log') 84 | plt.legend(loc='upper left') 85 | ax.legend(loc='upper center', 86 | bbox_to_anchor=(1.38, 1.03), 87 | ncol=1, fancybox=True) 88 | # plt.savefig('./figures/l1_path.png', dpi=300) 89 | plt.show() 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /ch4-seq-feature-selection.py: -------------------------------------------------------------------------------- 1 | from sklearn.base import clone 2 | from itertools import combinations 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.metrics import accuracy_score 6 | 7 | # Sequential Backward Selection (SBS) 8 | class SBS(): 9 | def __init__(self, estimator, k_features, scoring=accuracy_score, 10 | test_size=0.25, random_state=1): 11 | self.scoring = scoring 12 | self.estimator = clone(estimator) 13 | self.k_features = k_features 14 | self.test_size = test_size 15 | self.random_state = random_state 16 | 17 | def fit(self, X, y): 18 | 19 | X_train, X_test, y_train, y_test = \ 20 | train_test_split(X, y, test_size=self.test_size, 21 | random_state=self.random_state) 22 | 23 | dim = X_train.shape[1] 24 | self.indices_ = tuple(range(dim)) 25 | self.subsets_ = [self.indices_] 26 | score = self._calc_score(X_train, y_train, 27 | X_test, y_test, self.indices_) 28 | self.scores_ = [score] 29 | 30 | while dim > self.k_features: 31 | scores = [] 32 | subsets = [] 33 | 34 | for p in combinations(self.indices_, r=dim-1): 35 | score = self._calc_score(X_train, y_train, 36 | X_test, y_test, p) 37 | scores.append(score) 38 | subsets.append(p) 39 | 40 | best = np.argmax(scores) 41 | self.indices_ = subsets[best] 42 | self.subsets_.append(self.indices_) 43 | dim -= 1 44 | 45 | self.scores_.append(scores[best]) 46 | self.k_score_ = self.scores_[-1] 47 | 48 | return self 49 | 50 | def transform(self, X): 51 | return X[:, self.indices_] 52 | 53 | def _calc_score(self, X_train, y_train, X_test, y_test, indices): 54 | self.estimator.fit(X_train[:, indices], y_train) 55 | y_pred = self.estimator.predict(X_test[:, indices]) 56 | score = self.scoring(y_test, y_pred) 57 | return score 58 | 59 | if __name__ == "__main__": 60 | 61 | # Grab wines data set 62 | df_wine = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data', header=None) 63 | 64 | df_wine.columns = ['Class label', 'Alcohol', 'Malic acid', 'Ash', 65 | 'Alcalinity of ash', 'Magnesium', 'Total phenols', 66 | 'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins', 67 | 'Color intensity', 'Hue', 'OD280/OD315 of diluted wines', 'Proline'] 68 | 69 | from sklearn.cross_validation import train_test_split 70 | X, y = df_wine.iloc[:, 1:].values, df_wine.iloc[:, 0].values 71 | 72 | X_train, X_test, y_train, y_test = \ 73 | train_test_split(X, y, test_size=0.3, random_state=0) 74 | 75 | 76 | from sklearn.preprocessing import StandardScaler 77 | stdsc = StandardScaler() 78 | X_train_std = stdsc.fit_transform(X_train) 79 | X_test_std = stdsc.transform(X_test) 80 | 81 | from sklearn.neighbors import KNeighborsClassifier 82 | import matplotlib.pyplot as plt 83 | knn = KNeighborsClassifier(n_neighbors=2) 84 | 85 | # selecting features using SBS 86 | sbs = SBS(knn, k_features=1) 87 | sbs.fit(X_train_std, y_train) 88 | 89 | # plotting performance of feature subsets 90 | k_feat = [len(k) for k in sbs.subsets_] 91 | 92 | plt.plot(k_feat, sbs.scores_, marker='o') 93 | plt.ylim([0.7, 1.1]) 94 | plt.ylabel('Accuracy') 95 | plt.xlabel('Number of features') 96 | plt.grid() 97 | plt.tight_layout() 98 | # plt.savefig('./sbs.png', dpi=300) 99 | plt.show() 100 | 101 | -------------------------------------------------------------------------------- /ch5-LDA-scikit.py: -------------------------------------------------------------------------------- 1 | from matplotlib.colors import ListedColormap 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | def plot_decision_regions(X, y, classifier, resolution=0.02): 6 | # setup marker generator and color map 7 | markers = ('s', 'x', 'o', '^', 'v') 8 | colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan') 9 | cmap = ListedColormap(colors[:len(np.unique(y))]) 10 | 11 | # plot the decision surface 12 | x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1 13 | x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1 14 | xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), 15 | np.arange(x2_min, x2_max, resolution)) 16 | Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T) 17 | Z = Z.reshape(xx1.shape) 18 | plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap) 19 | plt.xlim(xx1.min(), xx1.max()) 20 | plt.ylim(xx2.min(), xx2.max()) 21 | 22 | # plot class samples 23 | for idx, cl in enumerate(np.unique(y)): 24 | plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1], 25 | alpha=0.8, c=cmap(idx), 26 | marker=markers[idx], label=cl) 27 | 28 | import pandas as pd 29 | df_wine = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data', header=None) 30 | 31 | # Split and Normalize dataset 32 | from sklearn.cross_validation import train_test_split 33 | from sklearn.preprocessing import StandardScaler 34 | X, y = df_wine.iloc[:, 1:].values, df_wine.iloc[:, 0].values 35 | X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.3, random_state=0) 36 | sc = StandardScaler() 37 | X_train_std = sc.fit_transform(X_train) 38 | X_test_std = sc.transform(X_test) 39 | 40 | from sklearn.linear_model import LogisticRegression 41 | from sklearn.lda import LDA 42 | 43 | 44 | lda = LDA(n_components=2) 45 | X_train_lda = lda.fit_transform(X_train_std, y_train) 46 | lr = LogisticRegression() 47 | lr = lr.fit(X_train_lda, y_train) 48 | 49 | 50 | plot_decision_regions(X_train_lda, y_train, classifier=lr) 51 | plt.xlabel('LD 1') 52 | plt.ylabel('LD 2') 53 | plt.legend(loc='lower left') 54 | plt.show() 55 | 56 | 57 | X_test_lda = lda.transform(X_test_std) 58 | plot_decision_regions(X_test_lda, y_test, classifier=lr) 59 | plt.xlabel('LD 1') 60 | plt.ylabel('LD 2') 61 | plt.legend(loc='lower left') 62 | plt.show() 63 | -------------------------------------------------------------------------------- /ch5-PCA-Kernel-ex1.py: -------------------------------------------------------------------------------- 1 | from scipy.spatial.distance import pdist, squareform 2 | from scipy import exp 3 | from scipy.linalg import eigh 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | 8 | def rbf_kernel_pca(X, gamma, n_components): 9 | """ 10 | RBF kernel PCA implementation. 11 | 12 | Parameters 13 | ------------ 14 | X: {NumPy ndarray}, shape = [n_samples, n_features] 15 | 16 | gamma: float 17 | Tuning parameter of the RBF kernel 18 | 19 | n_components: int 20 | Number of principal components to return 21 | 22 | Returns 23 | ------------ 24 | X_pc: {NumPy ndarray}, shape = [n_samples, k_features] 25 | Projected dataset 26 | 27 | """ 28 | # Calculate pairwise squared Euclidean distances 29 | # in the MxN dimensional dataset. 30 | sq_dists = pdist(X, 'sqeuclidean') 31 | 32 | # Convert pairwise distances into a square matrix. 33 | mat_sq_dists = squareform(sq_dists) 34 | 35 | # Compute the symmetric kernel matrix. 36 | K = exp(-gamma * mat_sq_dists) 37 | 38 | # Center the kernel matrix. 39 | N = K.shape[0] 40 | one_n = np.ones((N,N)) / N 41 | K = K - one_n.dot(K) - K.dot(one_n) + one_n.dot(K).dot(one_n) 42 | 43 | # Obtaining eigenpairs from the centered kernel matrix 44 | # numpy.eigh returns them in sorted order 45 | eigvals, eigvecs = eigh(K) 46 | 47 | # Collect the top k eigenvectors (projected samples) 48 | X_pc = np.column_stack((eigvecs[:, -i] 49 | for i in range(1, n_components + 1))) 50 | 51 | return X_pc 52 | 53 | 54 | from sklearn.datasets import make_moons 55 | X, y = make_moons(n_samples=100, random_state=123) 56 | plt.scatter(X[y==0, 0], X[y==0, 1], color='red', marker='^', alpha=0.5) 57 | plt.scatter(X[y==1, 0], X[y==1, 1], color='blue', marker='o', alpha=0.5) 58 | plt.show() 59 | 60 | from sklearn.decomposition import PCA 61 | scikit_pca = PCA(n_components=2) 62 | X_spca = scikit_pca.fit_transform(X) 63 | 64 | # Show effects of normal PCA 65 | fig, ax = plt.subplots(nrows=1,ncols=2, figsize=(7,3)) 66 | ax[0].scatter(X_spca[y==0, 0], X_spca[y==0, 1], color='red', marker='^', alpha=0.5) 67 | ax[0].scatter(X_spca[y==1, 0], X_spca[y==1, 1], color='blue', marker='o', alpha=0.5) 68 | ax[1].scatter(X_spca[y==0, 0], np.zeros((50,1))+0.02, color='red', marker='^', alpha=0.5) 69 | ax[1].scatter(X_spca[y==1, 0], np.zeros((50,1))-0.02, color='blue', marker='o', alpha=0.5) 70 | ax[0].set_xlabel('PC1') 71 | ax[0].set_ylabel('PC2') 72 | ax[1].set_ylim([-1, 1]) 73 | ax[1].set_yticks([]) 74 | ax[1].set_xlabel('PC1') 75 | plt.show() 76 | 77 | 78 | # Apply RBF Kernel 79 | from matplotlib.ticker import FormatStrFormatter 80 | X_kpca = rbf_kernel_pca(X, gamma=15, n_components=2) 81 | 82 | # Show effects of RBF PCA Kernel 83 | fig, ax = plt.subplots(nrows=1,ncols=2, figsize=(7,3)) 84 | ax[0].scatter(X_kpca[y==0, 0], X_kpca[y==0, 1], color='red', marker='^', alpha=0.5) 85 | ax[0].scatter(X_kpca[y==1, 0], X_kpca[y==1, 1], color='blue', marker='o', alpha=0.5) 86 | ax[1].scatter(X_kpca[y==0, 0], np.zeros((50,1))+0.02, color='red', marker='^', alpha=0.5) 87 | ax[1].scatter(X_kpca[y==1, 0], np.zeros((50,1))-0.02, color='blue', marker='o', alpha=0.5) 88 | ax[0].set_xlabel('PC1') 89 | ax[0].set_ylabel('PC2') 90 | ax[1].set_ylim([-1, 1]) 91 | ax[1].set_yticks([]) 92 | ax[1].set_xlabel('PC1') 93 | ax[0].xaxis.set_major_formatter(FormatStrFormatter('%0.1f')) 94 | ax[1].xaxis.set_major_formatter(FormatStrFormatter('%0.1f')) 95 | plt.show() 96 | 97 | 98 | -------------------------------------------------------------------------------- /ch5-PCA-Kernel-ex2.py: -------------------------------------------------------------------------------- 1 | from scipy.spatial.distance import pdist, squareform 2 | from scipy import exp 3 | from scipy.linalg import eigh 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | 8 | def rbf_kernel_pca(X, gamma, n_components): 9 | """ 10 | RBF kernel PCA implementation. 11 | 12 | Parameters 13 | ------------ 14 | X: {NumPy ndarray}, shape = [n_samples, n_features] 15 | 16 | gamma: float 17 | Tuning parameter of the RBF kernel 18 | 19 | n_components: int 20 | Number of principal components to return 21 | 22 | Returns 23 | ------------ 24 | X_pc: {NumPy ndarray}, shape = [n_samples, k_features] 25 | Projected dataset 26 | 27 | """ 28 | # Calculate pairwise squared Euclidean distances 29 | # in the MxN dimensional dataset. 30 | sq_dists = pdist(X, 'sqeuclidean') 31 | 32 | # Convert pairwise distances into a square matrix. 33 | mat_sq_dists = squareform(sq_dists) 34 | 35 | # Compute the symmetric kernel matrix. 36 | K = exp(-gamma * mat_sq_dists) 37 | 38 | # Center the kernel matrix. 39 | N = K.shape[0] 40 | one_n = np.ones((N,N)) / N 41 | K = K - one_n.dot(K) - K.dot(one_n) + one_n.dot(K).dot(one_n) 42 | 43 | # Obtaining eigenpairs from the centered kernel matrix 44 | # numpy.eigh returns them in sorted order 45 | eigvals, eigvecs = eigh(K) 46 | 47 | # Collect the top k eigenvectors (projected samples) 48 | X_pc = np.column_stack((eigvecs[:, -i] 49 | for i in range(1, n_components + 1))) 50 | 51 | return X_pc 52 | 53 | 54 | from sklearn.datasets import make_circles 55 | X, y = make_circles(n_samples=1000, random_state=123, noise=0.1, factor=0.2) 56 | plt.scatter(X[y==0, 0], X[y==0, 1], color='red', marker='^', alpha=0.5) 57 | plt.scatter(X[y==1, 0], X[y==1, 1], color='blue', marker='o', alpha=0.5) 58 | plt.show() 59 | 60 | from sklearn.decomposition import PCA 61 | scikit_pca = PCA(n_components=2) 62 | X_spca = scikit_pca.fit_transform(X) 63 | fig, ax = plt.subplots(nrows=1,ncols=2, figsize=(7,3)) 64 | ax[0].scatter(X_spca[y==0, 0], X_spca[y==0, 1], color='red', marker='^', alpha=0.5) 65 | ax[0].scatter(X_spca[y==1, 0], X_spca[y==1, 1], color='blue', marker='o', alpha=0.5) 66 | ax[1].scatter(X_spca[y==0, 0], np.zeros((500,1))+0.02, color='red', marker='^', alpha=0.5) 67 | ax[1].scatter(X_spca[y==1, 0], np.zeros((500,1))-0.02, color='blue', marker='o', alpha=0.5) 68 | ax[0].set_xlabel('PC1') 69 | ax[0].set_ylabel('PC2') 70 | ax[1].set_ylim([-1, 1]) 71 | ax[1].set_yticks([]) 72 | ax[1].set_xlabel('PC1') 73 | plt.show() 74 | 75 | 76 | X_kpca = rbf_kernel_pca(X, gamma=15, n_components=2) 77 | fig, ax = plt.subplots(nrows=1,ncols=2, figsize=(7,3)) 78 | ax[0].scatter(X_kpca[y==0, 0], X_kpca[y==0, 1], color='red', marker='^', alpha=0.5) 79 | ax[0].scatter(X_kpca[y==1, 0], X_kpca[y==1, 1], color='blue', marker='o', alpha=0.5) 80 | ax[1].scatter(X_kpca[y==0, 0], np.zeros((500,1))+0.02, color='red', marker='^', alpha=0.5) 81 | ax[1].scatter(X_kpca[y==1, 0], np.zeros((500,1))-0.02, color='blue', marker='o', alpha=0.5) 82 | ax[0].set_xlabel('PC1') 83 | ax[0].set_ylabel('PC2') 84 | ax[1].set_ylim([-1, 1]) 85 | ax[1].set_yticks([]) 86 | ax[1].set_xlabel('PC1') 87 | plt.show() -------------------------------------------------------------------------------- /ch5-PCA-Kernel-newRBF.py: -------------------------------------------------------------------------------- 1 | from scipy.spatial.distance import pdist, squareform 2 | from scipy import exp 3 | from scipy.linalg import eigh 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | def rbf_kernel_pca(X, gamma, n_components): 8 | """ 9 | RBF kernel PCA implementation. 10 | 11 | Parameters 12 | ------------ 13 | X: {NumPy ndarray}, shape = [n_samples, n_features] 14 | 15 | gamma: float 16 | Tuning parameter of the RBF kernel 17 | 18 | n_components: int 19 | Number of principal components to return 20 | 21 | Returns 22 | ------------ 23 | X_pc: {NumPy ndarray}, shape = [n_samples, k_features] 24 | Projected dataset 25 | 26 | lambdas: list 27 | Eigenvalues 28 | 29 | """ 30 | # Calculate pairwise squared Euclidean distances 31 | # in the MxN dimensional dataset. 32 | sq_dists = pdist(X, 'sqeuclidean') 33 | 34 | # Convert pairwise distances into a square matrix. 35 | mat_sq_dists = squareform(sq_dists) 36 | 37 | # Compute the symmetric kernel matrix. 38 | K = exp(-gamma * mat_sq_dists) 39 | 40 | # Center the kernel matrix. 41 | N = K.shape[0] 42 | one_n = np.ones((N,N)) / N 43 | K = K - one_n.dot(K) - K.dot(one_n) + one_n.dot(K).dot(one_n) 44 | 45 | # Obtaining eigenpairs from the centered kernel matrix 46 | # numpy.eigh returns them in sorted order 47 | eigvals, eigvecs = eigh(K) 48 | 49 | # Collect the top k eigenvectors (projected samples) 50 | alphas = np.column_stack((eigvecs[:,-i] 51 | for i in range(1,n_components+1))) 52 | 53 | # Collect the corresponding eigenvalues 54 | lambdas = [eigvals[-i] for i in range(1,n_components+1)] 55 | 56 | return alphas, lambdas 57 | 58 | 59 | def project_x(x_new, X, gamma, alphas, lambdas): 60 | pair_dist = np.array([np.sum((x_new-row)**2) for row in X]) 61 | k = np.exp(-gamma * pair_dist) 62 | return k.dot(alphas / lambdas) 63 | 64 | 65 | from sklearn.datasets import make_moons 66 | X, y = make_moons(n_samples=100, random_state=123) 67 | alphas, lambdas =rbf_kernel_pca(X, gamma=15, n_components=1) 68 | x_new = X[25] 69 | x_proj = alphas[25] # original projection 70 | 71 | x_reproj = project_x(x_new, X, gamma=15, alphas=alphas, lambdas=lambdas) 72 | 73 | 74 | plt.scatter(alphas[y==0, 0], np.zeros((50)), color='red', marker='^',alpha=0.5) 75 | plt.scatter(alphas[y==1, 0], np.zeros((50)), color='blue', marker='o', alpha=0.5) 76 | plt.scatter(x_proj, 0, color='black', label='original projection of point X[25]', marker='^', s=100) 77 | plt.scatter(x_reproj, 0, color='green', label='remapped point X[25]', marker='x', s=500) 78 | plt.legend(scatterpoints=1) 79 | plt.show() 80 | 81 | 82 | -------------------------------------------------------------------------------- /ch5-PCA-Kernel-scikit.py: -------------------------------------------------------------------------------- 1 | from sklearn.decomposition import KernelPCA 2 | from sklearn.datasets import make_moons 3 | import matplotlib.pyplot as plt 4 | 5 | 6 | X, y = make_moons(n_samples=100, random_state=123) 7 | scikit_kpca = KernelPCA(n_components=2, kernel='rbf', gamma=15) 8 | X_skernpca = scikit_kpca.fit_transform(X) 9 | plt.scatter(X_skernpca[y==0, 0], X_skernpca[y==0, 1], color='red', marker='^', alpha=0.5) 10 | plt.scatter(X_skernpca[y==1, 0], X_skernpca[y==1, 1], color='blue', marker='o', alpha=0.5) 11 | plt.xlabel('PC1') 12 | plt.ylabel('PC2') 13 | plt.show() 14 | -------------------------------------------------------------------------------- /ch5-PCA-scikit.py: -------------------------------------------------------------------------------- 1 | from matplotlib.colors import ListedColormap 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | def plot_decision_regions(X, y, classifier, resolution=0.02): 6 | # setup marker generator and color map 7 | markers = ('s', 'x', 'o', '^', 'v') 8 | colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan') 9 | cmap = ListedColormap(colors[:len(np.unique(y))]) 10 | 11 | # plot the decision surface 12 | x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1 13 | x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1 14 | xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), 15 | np.arange(x2_min, x2_max, resolution)) 16 | Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T) 17 | Z = Z.reshape(xx1.shape) 18 | plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap) 19 | plt.xlim(xx1.min(), xx1.max()) 20 | plt.ylim(xx2.min(), xx2.max()) 21 | 22 | # plot class samples 23 | for idx, cl in enumerate(np.unique(y)): 24 | plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1], 25 | alpha=0.8, c=cmap(idx), 26 | marker=markers[idx], label=cl) 27 | 28 | import pandas as pd 29 | df_wine = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data', header=None) 30 | 31 | # Split and Normalize dataset 32 | from sklearn.cross_validation import train_test_split 33 | from sklearn.preprocessing import StandardScaler 34 | X, y = df_wine.iloc[:, 1:].values, df_wine.iloc[:, 0].values 35 | X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.3, random_state=0) 36 | sc = StandardScaler() 37 | X_train_std = sc.fit_transform(X_train) 38 | X_test_std = sc.transform(X_test) 39 | 40 | from sklearn.linear_model import LogisticRegression 41 | from sklearn.decomposition import PCA 42 | 43 | # set n_components equal to number of components (k). If set to None, all components are kept 44 | pca = PCA(n_components=2) 45 | lr = LogisticRegression() 46 | X_train_pca = pca.fit_transform(X_train_std) 47 | X_test_pca = pca.transform(X_test_std) 48 | lr.fit(X_train_pca, y_train) 49 | plot_decision_regions(X_train_pca, y_train, classifier=lr) 50 | plt.xlabel('PC1') 51 | plt.ylabel('PC2') 52 | plt.legend(loc='lower left') 53 | plt.show() 54 | -------------------------------------------------------------------------------- /ch5-PCA1.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | df_wine = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data', header=None) 3 | 4 | # Split and Normalize dataset 5 | from sklearn.cross_validation import train_test_split 6 | from sklearn.preprocessing import StandardScaler 7 | X, y = df_wine.iloc[:, 1:].values, df_wine.iloc[:, 0].values 8 | X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.3, random_state=0) 9 | sc = StandardScaler() 10 | X_train_std = sc.fit_transform(X_train) 11 | X_test_std = sc.transform(X_test) 12 | 13 | 14 | # Calculate Covariance Matrix and Eigen Vectors/Values 15 | import numpy as np 16 | cov_mat = np.cov(X_train_std.T) 17 | eigen_vals, eigen_vecs = np.linalg.eig(cov_mat) 18 | print('\nEigenvalues \n%s' % eigen_vals) 19 | 20 | # Plot graph 21 | tot = sum(eigen_vals) 22 | var_exp = [(i / tot) for i in sorted(eigen_vals, reverse=True)] 23 | cum_var_exp = np.cumsum(var_exp) 24 | import matplotlib.pyplot as plt 25 | plt.bar(range(1,14), var_exp, alpha=0.5, align='center', label='individual explained variance') 26 | plt.step(range(1,14), cum_var_exp, where='mid',label='cumulative explained variance') 27 | plt.ylabel('Explained variance ratio') 28 | plt.xlabel('Principal components') 29 | plt.legend(loc='best') 30 | plt.show() 31 | 32 | # Sort the eigenpairs by decreasing order of the eigenvalues: 33 | eigen_pairs =[(np.abs(eigen_vals[i]),eigen_vecs[:,i]) for i in range(len(eigen_vals))] 34 | eigen_pairs.sort(reverse=True) 35 | 36 | # Pick k eigenvectors (k=2 here) 37 | w= np.hstack((eigen_pairs[0][1][:, np.newaxis],eigen_pairs[1][1][:, np.newaxis])) 38 | 39 | # Perform PCA 40 | X_train_pca = X_train_std.dot(w) 41 | 42 | 43 | # PLot PCAed data 44 | colors = ['r', 'b', 'g'] 45 | markers = ['s', 'x', 'o'] 46 | for l, c, m in zip(np.unique(y_train), colors, markers): 47 | plt.scatter(X_train_pca[y_train==l, 0], X_train_pca[y_train==l, 1], c=c, label=l, marker=m) 48 | plt.xlabel('PC 1') 49 | plt.ylabel('PC 2') 50 | plt.legend(loc='lower left') 51 | plt.show() 52 | 53 | 54 | -------------------------------------------------------------------------------- /ch6-F1-score.py: -------------------------------------------------------------------------------- 1 | from sklearn.preprocessing import StandardScaler 2 | from sklearn.pipeline import Pipeline 3 | 4 | import pandas as pd 5 | df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data', header=None) 6 | 7 | from sklearn.preprocessing import LabelEncoder 8 | X = df.loc[:, 2:].values 9 | y = df.loc[:, 1].values 10 | le = LabelEncoder() 11 | y = le.fit_transform(y) 12 | 13 | from sklearn.cross_validation import train_test_split 14 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1) 15 | 16 | from sklearn.svm import SVC 17 | pipe_svc = Pipeline([('scl', StandardScaler()), ('clf', SVC(random_state=1))]) 18 | 19 | 20 | pipe_svc.fit(X_train, y_train) 21 | y_pred = pipe_svc.predict(X_test) 22 | 23 | from sklearn.metrics import precision_score 24 | from sklearn.metrics import recall_score, f1_score 25 | 26 | print('Precision: %.3f' % precision_score(y_true=y_test, y_pred=y_pred)) 27 | print('Recall: %.3f' % recall_score(y_true=y_test, y_pred=y_pred)) 28 | print('F1: %.3f' % f1_score(y_true=y_test, y_pred=y_pred)) -------------------------------------------------------------------------------- /ch6-Kfold-CrossValidation.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data', header=None) 3 | 4 | from sklearn.preprocessing import LabelEncoder 5 | X = df.loc[:, 2:].values 6 | y = df.loc[:, 1].values 7 | le = LabelEncoder() 8 | y = le.fit_transform(y) 9 | 10 | from sklearn.cross_validation import train_test_split 11 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1) 12 | from sklearn.preprocessing import StandardScaler 13 | from sklearn.decomposition import PCA 14 | from sklearn.linear_model import LogisticRegression 15 | from sklearn.pipeline import Pipeline 16 | pipe_lr = Pipeline([('scl', StandardScaler()),('pca', PCA(n_components=2)),('clf', LogisticRegression(random_state=1))]) 17 | #pipe_lr.fit(X_train, y_train) 18 | 19 | 20 | import numpy as np 21 | from sklearn.cross_validation import StratifiedKFold 22 | kfold = StratifiedKFold(y=y_train, n_folds=10, random_state=1) 23 | scores = [] 24 | for k, (train, test) in enumerate(kfold): 25 | pipe_lr.fit(X_train[train], y_train[train]) 26 | score = pipe_lr.score(X_train[test], y_train[test]) 27 | scores.append(score) 28 | print('Fold: %s, Class dist.: %s, Acc: %.3f' % (k+1, np.bincount(y_train[train]), score)) 29 | 30 | print('CV accuracy: %.3f +/- %.3f' % (np.mean(scores), np.std(scores))) 31 | -------------------------------------------------------------------------------- /ch6-ModelSelect-ParamTune-Nested-Kfold-CrossValidation.py: -------------------------------------------------------------------------------- 1 | from sklearn.preprocessing import StandardScaler 2 | from sklearn.pipeline import Pipeline 3 | import numpy as np 4 | 5 | import pandas as pd 6 | df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data', header=None) 7 | 8 | from sklearn.preprocessing import LabelEncoder 9 | X = df.loc[:, 2:].values 10 | y = df.loc[:, 1].values 11 | le = LabelEncoder() 12 | y = le.fit_transform(y) 13 | 14 | from sklearn.cross_validation import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1) 16 | 17 | 18 | from sklearn.grid_search import GridSearchCV 19 | from sklearn.svm import SVC 20 | pipe_svc = Pipeline([('scl', StandardScaler()), ('clf', SVC(random_state=1))]) 21 | param_range = [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0] 22 | param_grid = [{'clf__C': param_range, 23 | 'clf__kernel': ['linear']}, 24 | {'clf__C': param_range, 25 | 'clf__gamma': param_range, 26 | 'clf__kernel': ['rbf']}] 27 | 28 | from sklearn.cross_validation import cross_val_score 29 | # Inner Loop for parameter tuning 30 | gs = GridSearchCV(estimator=pipe_svc, 31 | param_grid=param_grid, 32 | scoring='accuracy', 33 | cv=2, 34 | n_jobs=-1) 35 | 36 | #Outer Loop for model selection 37 | scores = cross_val_score(gs, X_train, y_train, scoring='accuracy', cv=5) 38 | print('CV accuracy: %.3f +/- %.3f' % (np.mean(scores), np.std(scores))) 39 | 40 | 41 | from sklearn.tree import DecisionTreeClassifier 42 | gs = GridSearchCV( 43 | estimator=DecisionTreeClassifier(random_state=0), 44 | param_grid=[{'max_depth': [1, 2, 3, 4, 5, 6, 7, None]}], 45 | scoring='accuracy', 46 | cv=2) 47 | 48 | scores = cross_val_score(gs, 49 | X_train, 50 | y_train, 51 | scoring='accuracy', 52 | cv=5) 53 | 54 | print('CV accuracy: %.3f +/- %.3f' % (np.mean(scores), np.std(scores))) 55 | -------------------------------------------------------------------------------- /ch6-ROC-curve.py: -------------------------------------------------------------------------------- 1 | from sklearn.preprocessing import StandardScaler 2 | from sklearn.pipeline import Pipeline 3 | from sklearn.decomposition import PCA 4 | import numpy as np 5 | from sklearn.cross_validation import StratifiedKFold 6 | import matplotlib.pyplot as plt 7 | from sklearn.linear_model import LogisticRegression 8 | 9 | import pandas as pd 10 | df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data', header=None) 11 | 12 | from sklearn.preprocessing import LabelEncoder 13 | X = df.loc[:, 2:].values 14 | y = df.loc[:, 1].values 15 | le = LabelEncoder() 16 | y = le.fit_transform(y) 17 | 18 | from sklearn.cross_validation import train_test_split 19 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1) 20 | 21 | from sklearn.metrics import roc_curve, auc 22 | from scipy import interp 23 | pipe_lr = Pipeline([('scl', StandardScaler()), 24 | ('pca', PCA(n_components=2)), 25 | ('clf', LogisticRegression(penalty='l2', random_state=0, C=100.0))]) 26 | 27 | X_train2 = X_train[:, [4, 14]] 28 | cv = StratifiedKFold(y_train,n_folds=3,random_state=1) 29 | 30 | fig = plt.figure(figsize=(7, 5)) 31 | mean_tpr = 0.0 32 | mean_fpr = np.linspace(0, 1, 100) 33 | all_tpr = [] 34 | 35 | 36 | for i, (train, test) in enumerate(cv): 37 | probas = pipe_lr.fit(X_train2[train], y_train[train]).predict_proba(X_train2[test]) 38 | fpr, tpr, thresholds = roc_curve(y_train[test],probas[:, 1],pos_label=1) 39 | mean_tpr += interp(mean_fpr, fpr, tpr) 40 | mean_tpr[0] = 0.0 41 | roc_auc = auc(fpr, tpr) 42 | plt.plot(fpr,tpr,lw=1,label='ROC fold %d (area = %0.2f)'% (i+1, roc_auc)) 43 | 44 | 45 | plt.plot([0, 1],[0, 1],linestyle='--',color=(0.6, 0.6, 0.6),label='random guessing') 46 | mean_tpr /= len(cv) 47 | mean_tpr[-1] = 1.0 48 | mean_auc = auc(mean_fpr, mean_tpr) 49 | plt.plot(mean_fpr, mean_tpr, 'k--',label='mean ROC (area = %0.2f)' % mean_auc, lw=2) 50 | plt.plot([0, 0, 1],[0, 1, 1],lw=2,linestyle=':',color='black',label='perfect performance') 51 | 52 | plt.xlim([-0.05, 1.05]) 53 | plt.ylim([-0.05, 1.05]) 54 | plt.xlabel('false positive rate') 55 | plt.ylabel('true positive rate') 56 | plt.title('Receiver Operator Characteristic') 57 | plt.legend(loc="lower right") 58 | plt.show() 59 | 60 | 61 | pipe_lr = pipe_lr.fit(X_train2, y_train) 62 | y_pred2 = pipe_lr.predict(X_test[:, [4, 14]]) 63 | 64 | from sklearn.metrics import roc_auc_score 65 | from sklearn.metrics import accuracy_score 66 | print('ROC AUC: %.3f' % roc_auc_score(y_true=y_test, y_score=y_pred2)) 67 | 68 | print('Accuracy: %.3f' % accuracy_score(y_true=y_test, y_pred=y_pred2)) 69 | 70 | -------------------------------------------------------------------------------- /ch6-confusion-matrix.py: -------------------------------------------------------------------------------- 1 | from sklearn.preprocessing import StandardScaler 2 | from sklearn.pipeline import Pipeline 3 | 4 | import pandas as pd 5 | df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data', header=None) 6 | 7 | from sklearn.preprocessing import LabelEncoder 8 | X = df.loc[:, 2:].values 9 | y = df.loc[:, 1].values 10 | le = LabelEncoder() 11 | y = le.fit_transform(y) 12 | 13 | from sklearn.cross_validation import train_test_split 14 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1) 15 | 16 | from sklearn.svm import SVC 17 | pipe_svc = Pipeline([('scl', StandardScaler()), ('clf', SVC(random_state=1))]) 18 | 19 | from sklearn.metrics import confusion_matrix 20 | pipe_svc.fit(X_train, y_train) 21 | y_pred = pipe_svc.predict(X_test) 22 | confmat = confusion_matrix(y_true=y_test, y_pred=y_pred) 23 | # Print confusion Matrix 24 | print(confmat) 25 | 26 | 27 | # Plot 28 | import matplotlib.pyplot as plt 29 | fig, ax = plt.subplots(figsize=(2.5, 2.5)) 30 | ax.matshow(confmat, cmap=plt.cm.Blues, alpha=0.3) 31 | for i in range(confmat.shape[0]): 32 | for j in range(confmat.shape[1]): 33 | ax.text(x=j, y=i,s=confmat[i, j], va='center', ha='center') 34 | 35 | plt.xlabel('predicted label') 36 | plt.ylabel('true label') 37 | plt.show() -------------------------------------------------------------------------------- /ch6-hyperparameterTuning-gridSearch.py: -------------------------------------------------------------------------------- 1 | from sklearn.preprocessing import StandardScaler 2 | from sklearn.pipeline import Pipeline 3 | 4 | 5 | import pandas as pd 6 | df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data', header=None) 7 | 8 | from sklearn.preprocessing import LabelEncoder 9 | X = df.loc[:, 2:].values 10 | y = df.loc[:, 1].values 11 | le = LabelEncoder() 12 | y = le.fit_transform(y) 13 | 14 | from sklearn.cross_validation import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1) 16 | 17 | 18 | from sklearn.grid_search import GridSearchCV 19 | from sklearn.svm import SVC 20 | pipe_svc = Pipeline([('scl', StandardScaler()), ('clf', SVC(random_state=1))]) 21 | param_range = [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0] 22 | param_grid = [{'clf__C': param_range, 23 | 'clf__kernel': ['linear']}, 24 | {'clf__C': param_range, 25 | 'clf__gamma': param_range, 26 | 'clf__kernel': ['rbf']}] 27 | 28 | gs = GridSearchCV(estimator=pipe_svc, 29 | param_grid=param_grid, 30 | scoring='accuracy', 31 | cv=10, 32 | n_jobs=-1) 33 | 34 | gs = gs.fit(X_train, y_train) 35 | 36 | print(gs.best_score_) 37 | print(gs.best_params_) 38 | 39 | clf = gs.best_estimator_ 40 | clf.fit(X_train, y_train) 41 | 42 | print('Test accuracy: %.3f' % clf.score(X_test, y_test)) 43 | 44 | -------------------------------------------------------------------------------- /ch6-learningCurve.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from sklearn.learning_curve import learning_curve 3 | from sklearn.preprocessing import StandardScaler 4 | from sklearn.linear_model import LogisticRegression 5 | from sklearn.pipeline import Pipeline 6 | import numpy as np 7 | 8 | import pandas as pd 9 | df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data', header=None) 10 | 11 | from sklearn.preprocessing import LabelEncoder 12 | X = df.loc[:, 2:].values 13 | y = df.loc[:, 1].values 14 | le = LabelEncoder() 15 | y = le.fit_transform(y) 16 | 17 | from sklearn.cross_validation import train_test_split 18 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1) 19 | 20 | pipe_lr = Pipeline([('scl', StandardScaler()),('clf', LogisticRegression(penalty='l2', random_state=0))]) 21 | 22 | train_sizes, train_scores, test_scores = learning_curve(estimator=pipe_lr, 23 | X=X_train, 24 | y=y_train, 25 | train_sizes=np.linspace(0.1, 1.0, 10), 26 | cv=10, 27 | n_jobs=1) 28 | train_mean = np.mean(train_scores, axis=1) 29 | train_std = np.std(train_scores, axis=1) 30 | test_mean = np.mean(test_scores, axis=1) 31 | test_std = np.std(test_scores, axis=1) 32 | plt.plot(train_sizes, train_mean, color='blue', marker='o', markersize=5, label='training accuracy') 33 | 34 | plt.fill_between(train_sizes, train_mean + train_std, train_mean - train_std, alpha=0.15, color='blue') 35 | plt.plot(train_sizes, test_mean, color='green', linestyle='--',marker='s', markersize=5, label='validation accuracy') 36 | plt.fill_between(train_sizes,test_mean + test_std,test_mean - test_std,alpha=0.15, color='green') 37 | plt.grid() 38 | plt.xlabel('Number of training samples') 39 | plt.ylabel('Accuracy') 40 | plt.legend(loc='lower right') 41 | plt.ylim([0.8, 1.0]) 42 | plt.show() 43 | 44 | -------------------------------------------------------------------------------- /ch6-pipeline.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data', header=None) 3 | 4 | from sklearn.preprocessing import LabelEncoder 5 | X = df.loc[:, 2:].values 6 | y = df.loc[:, 1].values 7 | le = LabelEncoder() 8 | y = le.fit_transform(y) 9 | 10 | from sklearn.cross_validation import train_test_split 11 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1) 12 | 13 | from sklearn.preprocessing import StandardScaler 14 | from sklearn.decomposition import PCA 15 | from sklearn.linear_model import LogisticRegression 16 | from sklearn.pipeline import Pipeline 17 | pipe_lr = Pipeline([('scl', StandardScaler()),('pca', PCA(n_components=2)),('clf', LogisticRegression(random_state=1))]) 18 | pipe_lr.fit(X_train, y_train) 19 | print('Test Accuracy: %.3f' % pipe_lr.score(X_test, y_test)) 20 | 21 | -------------------------------------------------------------------------------- /ch6-scikit-Kfold-CrossValidation.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data', header=None) 3 | 4 | from sklearn.preprocessing import LabelEncoder 5 | X = df.loc[:, 2:].values 6 | y = df.loc[:, 1].values 7 | le = LabelEncoder() 8 | y = le.fit_transform(y) 9 | 10 | from sklearn.cross_validation import train_test_split 11 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1) 12 | from sklearn.preprocessing import StandardScaler 13 | from sklearn.decomposition import PCA 14 | from sklearn.linear_model import LogisticRegression 15 | from sklearn.pipeline import Pipeline 16 | pipe_lr = Pipeline([('scl', StandardScaler()),('pca', PCA(n_components=2)),('clf', LogisticRegression(random_state=1))]) 17 | #pipe_lr.fit(X_train, y_train) 18 | 19 | import numpy as np 20 | from sklearn.cross_validation import cross_val_score 21 | # use 10 kfolds on 1 CPU 22 | scores = cross_val_score(estimator=pipe_lr, X=X_train, y=y_train, cv=10, n_jobs=1) 23 | print('CV accuracy scores: %s' % scores) 24 | print('CV accuracy: %.3f +/- %.3f' % (np.mean(scores), np.std(scores))) 25 | -------------------------------------------------------------------------------- /ch6-validationCurve.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from sklearn.preprocessing import StandardScaler 3 | from sklearn.linear_model import LogisticRegression 4 | from sklearn.pipeline import Pipeline 5 | import numpy as np 6 | 7 | import pandas as pd 8 | df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data', header=None) 9 | 10 | from sklearn.preprocessing import LabelEncoder 11 | X = df.loc[:, 2:].values 12 | y = df.loc[:, 1].values 13 | le = LabelEncoder() 14 | y = le.fit_transform(y) 15 | 16 | from sklearn.cross_validation import train_test_split 17 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1) 18 | 19 | pipe_lr = Pipeline([('scl', StandardScaler()),('clf', LogisticRegression(penalty='l2', random_state=0))]) 20 | 21 | 22 | from sklearn.learning_curve import validation_curve 23 | param_range = [0.001, 0.01, 0.1, 1.0, 10.0, 100.0] 24 | train_scores, test_scores = validation_curve( 25 | estimator=pipe_lr, 26 | X=X_train, 27 | y=y_train, 28 | param_name='clf__C', 29 | param_range=param_range, 30 | cv=10) 31 | 32 | train_mean = np.mean(train_scores, axis=1) 33 | train_std = np.std(train_scores, axis=1) 34 | test_mean = np.mean(test_scores, axis=1) 35 | test_std = np.std(test_scores, axis=1) 36 | plt.plot(param_range, train_mean, 37 | color='blue', marker='o', 38 | markersize=5, 39 | label='training accuracy') 40 | 41 | plt.fill_between(param_range, train_mean + train_std, 42 | train_mean - train_std, alpha=0.15, 43 | color='blue') 44 | 45 | plt.plot(param_range, test_mean, 46 | color='green', linestyle='--', 47 | marker='s', markersize=5, 48 | label='validation accuracy') 49 | 50 | plt.fill_between(param_range, 51 | test_mean + test_std, 52 | test_mean - test_std, 53 | alpha=0.15, color='green') 54 | 55 | plt.grid() 56 | plt.xscale('log') 57 | plt.legend(loc='lower right') 58 | plt.xlabel('Parameter C') 59 | plt.ylabel('Accuracy') 60 | plt.ylim([0.8, 1.0]) 61 | plt.show() 62 | 63 | -------------------------------------------------------------------------------- /ch7-AdaBoost.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | df_wine = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data', header=None) 3 | df_wine.columns = ['Class label', 'Alcohol', 4 | 'Malic acid', 'Ash', 5 | 'Alcalinity of ash', 6 | 'Magnesium', 'Total phenols', 7 | 'Flavanoids', 'Nonflavanoid phenols', 8 | 'Proanthocyanins', 9 | 'Color intensity', 'Hue', 10 | 'OD280/OD315 of diluted wines', 11 | 'Proline'] 12 | 13 | df_wine = df_wine[df_wine['Class label'] != 1] 14 | y = df_wine['Class label'].values 15 | X = df_wine[['Alcohol', 'Hue']].values 16 | 17 | from sklearn.preprocessing import LabelEncoder 18 | from sklearn.cross_validation import train_test_split 19 | le = LabelEncoder() 20 | y = le.fit_transform(y) 21 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.40, random_state=1) 22 | 23 | 24 | from sklearn.ensemble import AdaBoostClassifier 25 | from sklearn.tree import DecisionTreeClassifier 26 | from sklearn.metrics import accuracy_score 27 | tree = DecisionTreeClassifier(criterion='entropy', 28 | max_depth=None, 29 | random_state=0) 30 | 31 | ada = AdaBoostClassifier(base_estimator=tree, 32 | n_estimators=500, 33 | learning_rate=0.1, 34 | random_state=0) 35 | 36 | tree = tree.fit(X_train, y_train) 37 | y_train_pred = tree.predict(X_train) 38 | y_test_pred = tree.predict(X_test) 39 | tree_train = accuracy_score(y_train, y_train_pred) 40 | tree_test = accuracy_score(y_test, y_test_pred) 41 | print('Decision tree train/test accuracies %.3f/%.3f'% (tree_train, tree_test)) 42 | 43 | ada = ada.fit(X_train, y_train) 44 | y_train_pred = ada.predict(X_train) 45 | y_test_pred = ada.predict(X_test) 46 | ada_train = accuracy_score(y_train, y_train_pred) 47 | ada_test = accuracy_score(y_test, y_test_pred) 48 | print('AdaBoost train/test accuracies %.3f/%.3f' % (ada_train, ada_test)) 49 | 50 | 51 | 52 | # Plot 53 | import numpy as np 54 | import matplotlib.pyplot as plt 55 | 56 | x_min = X_train[:, 0].min() - 1 57 | x_max = X_train[:, 0].max() + 1 58 | y_min = X_train[:, 1].min() - 1 59 | y_max = X_train[:, 1].max() + 1 60 | xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1)) 61 | f, axarr = plt.subplots(1, 2, sharex='col', sharey='row', figsize=(8, 3)) 62 | 63 | for idx, clf, tt in zip([0, 1], [tree, ada], ['Decision Tree', 'AdaBoost']): 64 | clf.fit(X_train, y_train) 65 | Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) 66 | Z = Z.reshape(xx.shape) 67 | axarr[idx].contourf(xx, yy, Z, alpha=0.3) 68 | axarr[idx].scatter(X_train[y_train==0, 0], 69 | X_train[y_train==0, 1], 70 | c='blue', 71 | marker='^') 72 | axarr[idx].scatter(X_train[y_train==1, 0], 73 | X_train[y_train==1, 1], 74 | c='red', 75 | marker='o') 76 | axarr[idx].set_title(tt) 77 | axarr[0].set_ylabel('Alcohol', fontsize=12) 78 | 79 | 80 | plt.text(10.2, -1.2, 81 | s='Hue', 82 | ha='center', 83 | va='center', 84 | fontsize=12) 85 | plt.show() 86 | -------------------------------------------------------------------------------- /ch7-BaggingClassifiers.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | df_wine = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data', header=None) 3 | df_wine.columns = ['Class label', 'Alcohol', 4 | 'Malic acid', 'Ash', 5 | 'Alcalinity of ash', 6 | 'Magnesium', 'Total phenols', 7 | 'Flavanoids', 'Nonflavanoid phenols', 8 | 'Proanthocyanins', 9 | 'Color intensity', 'Hue', 10 | 'OD280/OD315 of diluted wines', 11 | 'Proline'] 12 | 13 | df_wine = df_wine[df_wine['Class label'] != 1] 14 | y = df_wine['Class label'].values 15 | X = df_wine[['Alcohol', 'Hue']].values 16 | 17 | from sklearn.preprocessing import LabelEncoder 18 | from sklearn.cross_validation import train_test_split 19 | le = LabelEncoder() 20 | y = le.fit_transform(y) 21 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.40, random_state=1) 22 | 23 | from sklearn.ensemble import BaggingClassifier 24 | from sklearn.tree import DecisionTreeClassifier 25 | tree = DecisionTreeClassifier(criterion='entropy', max_depth=None, random_state=1) 26 | bag = BaggingClassifier(base_estimator=tree, 27 | n_estimators=500, 28 | max_samples=1.0, 29 | max_features=1.0, 30 | bootstrap=True, 31 | bootstrap_features=False, 32 | n_jobs=1, 33 | random_state=1) 34 | 35 | from sklearn.metrics import accuracy_score 36 | tree = tree.fit(X_train, y_train) 37 | y_train_pred = tree.predict(X_train) 38 | y_test_pred = tree.predict(X_test) 39 | tree_train = accuracy_score(y_train, y_train_pred) 40 | tree_test = accuracy_score(y_test, y_test_pred) 41 | print('Decision tree train/test accuracies %.3f/%.3f'% (tree_train, tree_test)) 42 | 43 | bag = bag.fit(X_train, y_train) 44 | y_train_pred = bag.predict(X_train) 45 | y_test_pred = bag.predict(X_test) 46 | bag_train = accuracy_score(y_train, y_train_pred) 47 | bag_test = accuracy_score(y_test, y_test_pred) 48 | print('Bagging train/test accuracies %.3f/%.3f'% (bag_train, bag_test)) 49 | 50 | 51 | #Plot 52 | import numpy as np 53 | import matplotlib.pyplot as plt 54 | x_min = X_train[:, 0].min() - 1 55 | x_max = X_train[:, 0].max() + 1 56 | y_min = X_train[:, 1].min() - 1 57 | y_max = X_train[:, 1].max() + 1 58 | xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),np.arange(y_min, y_max, 0.1)) 59 | f, axarr = plt.subplots(nrows=1, ncols=2, sharex='col', sharey='row',figsize=(8, 3)) 60 | 61 | for idx, clf, tt in zip([0, 1],[tree, bag],['Decision Tree', 'Bagging']): 62 | clf.fit(X_train, y_train) 63 | Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) 64 | Z = Z.reshape(xx.shape) 65 | axarr[idx].contourf(xx, yy, Z, alpha=0.3) 66 | axarr[idx].scatter(X_train[y_train==0, 0], X_train[y_train==0, 1], c='blue', marker='^') 67 | axarr[idx].scatter(X_train[y_train==1, 0], X_train[y_train==1, 1], c='red', marker='o') 68 | axarr[idx].set_title(tt) 69 | 70 | axarr[0].set_ylabel('Alcohol', fontsize=12) 71 | plt.text(10.2, -1.2, s='Hue',ha='center', va='center', fontsize=12) 72 | plt.show() -------------------------------------------------------------------------------- /ch7-majorityVote-Classifier.py: -------------------------------------------------------------------------------- 1 | from sklearn.base import BaseEstimator 2 | from sklearn.base import ClassifierMixin 3 | from sklearn.preprocessing import LabelEncoder 4 | from sklearn.externals import six 5 | from sklearn.base import clone 6 | from sklearn.pipeline import _name_estimators 7 | import numpy as np 8 | import operator 9 | 10 | 11 | class MajorityVoteClassifier(BaseEstimator, 12 | ClassifierMixin): 13 | """ A majority vote ensemble classifier 14 | 15 | Parameters 16 | ---------- 17 | classifiers : array-like, shape = [n_classifiers] 18 | Different classifiers for the ensemble 19 | 20 | vote : str, {'classlabel', 'probability'} 21 | Default: 'classlabel' 22 | If 'classlabel' the prediction is based on 23 | the argmax of class labels. Else if 24 | 'probability', the argmax of the sum of 25 | probabilities is used to predict the class label 26 | (recommended for calibrated classifiers). 27 | 28 | weights : array-like, shape = [n_classifiers] 29 | Optional, default: None 30 | If a list of `int` or `float` values are 31 | provided, the classifiers are weighted by 32 | importance; Uses uniform weights if `weights=None`. 33 | 34 | """ 35 | def __init__(self, classifiers, 36 | vote='classlabel', weights=None): 37 | 38 | self.classifiers = classifiers 39 | self.named_classifiers = {key: value for 40 | key, value in 41 | _name_estimators(classifiers)} 42 | self.vote = vote 43 | self.weights = weights 44 | 45 | def fit(self, X, y): 46 | """ Fit classifiers. 47 | 48 | Parameters 49 | ---------- 50 | X : {array-like, sparse matrix}, 51 | shape = [n_samples, n_features] 52 | Matrix of training samples. 53 | 54 | y : array-like, shape = [n_samples] 55 | Vector of target class labels. 56 | 57 | Returns 58 | ------- 59 | self : object 60 | 61 | """ 62 | # Use LabelEncoder to ensure class labels start 63 | # with 0, which is important for np.argmax 64 | # call in self.predict 65 | self.lablenc_ = LabelEncoder() 66 | self.lablenc_.fit(y) 67 | self.classes_ = self.lablenc_.classes_ 68 | self.classifiers_ = [] 69 | for clf in self.classifiers: 70 | fitted_clf = clone(clf).fit(X, 71 | self.lablenc_.transform(y)) 72 | self.classifiers_.append(fitted_clf) 73 | return self 74 | 75 | 76 | def predict(self, X): 77 | """ Predict class labels for X. 78 | 79 | Parameters 80 | ---------- 81 | X : {array-like, sparse matrix}, 82 | Shape = [n_samples, n_features] 83 | Matrix of training samples. 84 | 85 | Returns 86 | ---------- 87 | maj_vote : array-like, shape = [n_samples] 88 | Predicted class labels. 89 | 90 | """ 91 | if self.vote == 'probability': 92 | maj_vote = np.argmax(self.predict_proba(X), 93 | axis=1) 94 | else: # 'classlabel' vote 95 | 96 | # Collect results from clf.predict calls 97 | predictions = np.asarray([clf.predict(X) 98 | for clf in 99 | self.classifiers_]).T 100 | 101 | maj_vote = np.apply_along_axis( 102 | lambda x: 103 | np.argmax(np.bincount(x, 104 | weights=self.weights)), 105 | axis=1, 106 | arr=predictions) 107 | maj_vote = self.lablenc_.inverse_transform(maj_vote) 108 | return maj_vote 109 | 110 | 111 | def predict_proba(self, X): 112 | """ Predict class probabilities for X. 113 | 114 | Parameters 115 | ---------- 116 | X : {array-like, sparse matrix}, 117 | shape = [n_samples, n_features] 118 | Training vectors, where n_samples is 119 | the number of samples and 120 | n_features is the number of features. 121 | 122 | Returns 123 | ---------- 124 | avg_proba : array-like, 125 | shape = [n_samples, n_classes] 126 | Weighted average probability for 127 | each class per sample. 128 | 129 | """ 130 | probas = np.asarray([clf.predict_proba(X) 131 | for clf in self.classifiers_]) 132 | avg_proba = np.average(probas, 133 | axis=0, weights=self.weights) 134 | return avg_proba 135 | 136 | 137 | def get_params(self, deep=True): 138 | """ Get classifier parameter names for GridSearch""" 139 | if not deep: 140 | return super(MajorityVoteClassifier, 141 | self).get_params(deep=False) 142 | else: 143 | out = self.named_classifiers.copy() 144 | for name, step in\ 145 | six.iteritems(self.named_classifiers): 146 | for key, value in six.iteritems( 147 | step.get_params(deep=True)): 148 | out['%s__%s' % (name, key)] = value 149 | return out 150 | 151 | 152 | # Main 153 | 154 | from sklearn import datasets 155 | from sklearn.cross_validation import train_test_split 156 | from sklearn.preprocessing import StandardScaler 157 | from sklearn.preprocessing import LabelEncoder 158 | iris = datasets.load_iris() 159 | X, y = iris.data[50:, [1, 2]], iris.target[50:] 160 | le = LabelEncoder() 161 | y = le.fit_transform(y) 162 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=1) 163 | 164 | from sklearn.cross_validation import cross_val_score 165 | from sklearn.linear_model import LogisticRegression 166 | from sklearn.tree import DecisionTreeClassifier 167 | from sklearn.neighbors import KNeighborsClassifier 168 | from sklearn.pipeline import Pipeline 169 | 170 | import numpy as np 171 | clf1 = LogisticRegression(penalty='l2', C=0.001, random_state=0) 172 | clf2 = DecisionTreeClassifier(max_depth=1, criterion='entropy', random_state=0) 173 | clf3 = KNeighborsClassifier(n_neighbors=1, p=2, metric='minkowski') 174 | pipe1 = Pipeline([['sc', StandardScaler()],['clf', clf1]]) 175 | pipe3 = Pipeline([['sc', StandardScaler()],['clf', clf3]]) 176 | clf_labels = ['Logistic Regression', 'Decision Tree', 'KNN'] 177 | 178 | print('10-fold cross validation:\n') 179 | 180 | for clf, label in zip([pipe1, clf2, pipe3], clf_labels): 181 | scores = cross_val_score(estimator=clf, X=X_train, y=y_train, cv=10, scoring='roc_auc') 182 | print("ROC AUC: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label)) 183 | 184 | mv_clf = MajorityVoteClassifier(classifiers=[pipe1, clf2, pipe3]) 185 | clf_labels += ['Majority Voting'] 186 | all_clf = [pipe1, clf2, pipe3, mv_clf] 187 | for clf, label in zip(all_clf, clf_labels): 188 | scores = cross_val_score(estimator=clf, X=X_train, y=y_train, cv=10, scoring='roc_auc') 189 | print("Accuracy: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label)) 190 | 191 | 192 | 193 | #plot 194 | from sklearn.metrics import roc_curve 195 | from sklearn.metrics import auc 196 | import matplotlib.pyplot as plt 197 | colors = ['black', 'orange', 'blue', 'green'] 198 | linestyles = [':', '--', '-.', '-'] 199 | for clf, label, clr, ls in zip(all_clf, clf_labels, colors, linestyles): 200 | # assuming the label of the positive class is 1 201 | y_pred = clf.fit(X_train, y_train).predict_proba(X_test)[:, 1] 202 | fpr, tpr, thresholds = roc_curve(y_true=y_test, y_score=y_pred) 203 | roc_auc = auc(x=fpr, y=tpr) 204 | plt.plot(fpr, tpr, color=clr, linestyle=ls, label='%s (auc = %0.2f)' % (label, roc_auc)) 205 | 206 | plt.legend(loc='lower right') 207 | plt.plot([0, 1], [0, 1], linestyle='--', color='gray', linewidth=2) 208 | plt.xlim([-0.1, 1.1]) 209 | plt.ylim([-0.1, 1.1]) 210 | plt.grid() 211 | plt.xlabel('False Positive Rate') 212 | plt.ylabel('True Positive Rate') 213 | plt.show() 214 | 215 | 216 | 217 | #grid search for tuning params for classifier 218 | from sklearn.grid_search import GridSearchCV 219 | params = {'decisiontreeclassifier__max_depth': [1, 2],'pipeline-1__clf__C': [0.001, 0.1, 100.0]} 220 | grid = GridSearchCV(estimator=mv_clf, param_grid=params, cv=10, scoring='roc_auc') 221 | grid.fit(X_train, y_train) 222 | print('Best parameters: %s' % grid.best_params_) 223 | print('Accuracy: %.2f' % grid.best_score_) -------------------------------------------------------------------------------- /ch8-Online-Sentiment-Analysis.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import re 3 | from nltk.corpus import stopwords 4 | stop = stopwords.words('english') 5 | 6 | def tokenizer(text): 7 | text = re.sub('<[^>]*>', '', text) 8 | emoticons = re.findall('(?::|;|=)(?:-)?(?:\)|\(|D|P)',text.lower()) 9 | text = re.sub('[\W]+', ' ', text.lower()) + ' '.join(emoticons).replace('-', '') 10 | tokenized = [w for w in text.split() if w not in stop] 11 | return tokenized 12 | 13 | 14 | def stream_docs(path): 15 | with open(path, 'r', encoding='utf-8') as csv: 16 | next(csv) # skip header 17 | for line in csv: 18 | text, label = line[:-3], int(line[-2]) 19 | yield text, label 20 | 21 | 22 | def get_minibatch(doc_stream, size): 23 | docs, y = [], [] 24 | try: 25 | for _ in range(size): 26 | text, label = next(doc_stream) 27 | docs.append(text) 28 | y.append(label) 29 | except StopIteration: 30 | return None, None 31 | 32 | return docs, y 33 | 34 | 35 | from sklearn.feature_extraction.text import HashingVectorizer 36 | from sklearn.linear_model import SGDClassifier 37 | 38 | vect = HashingVectorizer(decode_error='ignore', 39 | n_features=2**21, 40 | preprocessor=None, 41 | tokenizer=tokenizer) 42 | 43 | # regularized linear models with stochastic gradient descent (SGD) 44 | clf = SGDClassifier(loss='log', random_state=1, n_iter=1) 45 | doc_stream = stream_docs(path='./movie_data.csv') 46 | 47 | import pyprind 48 | pbar = pyprind.ProgBar(45) 49 | classes = np.array([0, 1]) 50 | for _ in range(45): 51 | X_train, y_train = get_minibatch(doc_stream, size=1000) 52 | if not X_train: 53 | break 54 | 55 | X_train = vect.transform(X_train) 56 | clf.partial_fit(X_train, y_train, classes=classes) 57 | pbar.update() 58 | 59 | 60 | X_test, y_test = get_minibatch(doc_stream, size=5000) 61 | X_test = vect.transform(X_test) 62 | print('Accuracy: %.3f' % clf.score(X_test, y_test)) 63 | 64 | clf = clf.partial_fit(X_test, y_test) 65 | -------------------------------------------------------------------------------- /ch8-Sentiment-Analysis.py: -------------------------------------------------------------------------------- 1 | import pyprind 2 | import pandas as pd 3 | import os 4 | pbar = pyprind.ProgBar(50000) 5 | labels = {'pos':1, 'neg':0} 6 | df = pd.DataFrame() 7 | for s in ('test', 'train'): 8 | for l in ('pos', 'neg'): 9 | path ='./aclImdb/%s/%s' % (s, l) 10 | for file in os.listdir(path): 11 | with open(os.path.join(path, file), 'r') as infile: 12 | txt = infile.read() 13 | 14 | df = df.append([[txt, labels[l]]], ignore_index=True) 15 | pbar.update() 16 | 17 | 18 | df.columns = ['review', 'sentiment'] 19 | 20 | import numpy as np 21 | np.random.seed(0) 22 | df = df.reindex(np.random.permutation(df.index)) 23 | df.to_csv('./movie_data.csv', index=False) 24 | df = pd.read_csv('./movie_data.csv') 25 | 26 | X_train = df.loc[:25000, 'review'].values 27 | y_train = df.loc[:25000, 'sentiment'].values 28 | X_test = df.loc[25000:, 'review'].values 29 | y_test = df.loc[25000:, 'sentiment'].values 30 | 31 | 32 | from sklearn.grid_search import GridSearchCV 33 | from sklearn.pipeline import Pipeline 34 | from sklearn.linear_model import LogisticRegression 35 | from sklearn.feature_extraction.text import TfidfVectorizer 36 | 37 | import nltk 38 | nltk.download('stopwords') 39 | from nltk.corpus import stopwords 40 | stop = stopwords.words('english') 41 | 42 | from nltk.stem.porter import PorterStemmer 43 | porter = PorterStemmer() 44 | 45 | def tokenizer(text): 46 | return text.split() 47 | 48 | def tokenizer_porter(text): 49 | return [porter.stem(word) for word in text.split()] 50 | 51 | tfidf = TfidfVectorizer(strip_accents=None, lowercase=False, preprocessor=None) 52 | 53 | param_grid = [{'vect__ngram_range': [(1,1)], 54 | 'vect__stop_words': [stop, None], 55 | 'vect__tokenizer': [tokenizer, 56 | tokenizer_porter], 57 | 'clf__penalty': ['l1', 'l2'], 58 | 'clf__C': [1.0, 10.0, 100.0]}, 59 | {'vect__ngram_range': [(1,1)], 60 | 'vect__stop_words': [stop, None], 61 | 'vect__tokenizer': [tokenizer, 62 | tokenizer_porter], 63 | 'vect__use_idf':[False], 64 | 'vect__norm':[None], 65 | 'clf__penalty': ['l1', 'l2'], 66 | 'clf__C': [1.0, 10.0, 100.0]} 67 | ] 68 | 69 | lr_tfidf = Pipeline([('vect', tfidf), 70 | ('clf', 71 | LogisticRegression(random_state=0))]) 72 | 73 | gs_lr_tfidf = GridSearchCV(lr_tfidf, param_grid, 74 | scoring='accuracy', 75 | cv=5, verbose=1, 76 | n_jobs=-1) 77 | 78 | gs_lr_tfidf.fit(X_train, y_train) 79 | 80 | print('Best parameter set: %s ' % gs_lr_tfidf.best_params_) 81 | print('CV Accuracy: %.3f'% gs_lr_tfidf.best_score_) 82 | 83 | clf = gs_lr_tfidf.best_estimator_ 84 | print('Test Accuracy: %.3f' % clf.score(X_test, y_test)) 85 | 86 | 87 | -------------------------------------------------------------------------------- /ch8-bagOfWords.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.feature_extraction.text import CountVectorizer 3 | 4 | # divide text into 1-grams (use ngram_range=2,2 for 2-grams) 5 | count = CountVectorizer(ngram_range=(1,1)) 6 | 7 | docs = np.array([ 8 | 'The sun is shining', 9 | 'The weather is sweet', 10 | 'The sun is shining and the weather is sweet']) 11 | 12 | bag = count.fit_transform(docs) 13 | print(count.vocabulary_) 14 | print(bag.toarray()) 15 | 16 | import re 17 | def preprocessor(text): 18 | text = re.sub('<[^>]*>', '', text) 19 | emoticons = re.findall('(?::|;|=)(?:-)?(?:\)|\(|D|P)', text) 20 | text = re.sub('[\W]+', ' ', text.lower()) + ''.join(emoticons).replace('-', '') 21 | return text 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /ch9-pickle-model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import re 3 | from nltk.corpus import stopwords 4 | stop = stopwords.words('english') 5 | 6 | def tokenizer(text): 7 | text = re.sub('<[^>]*>', '', text) 8 | emoticons = re.findall('(?::|;|=)(?:-)?(?:\)|\(|D|P)',text.lower()) 9 | text = re.sub('[\W]+', ' ', text.lower()) + ' '.join(emoticons).replace('-', '') 10 | tokenized = [w for w in text.split() if w not in stop] 11 | return tokenized 12 | 13 | 14 | def stream_docs(path): 15 | with open(path, 'r', encoding='utf-8') as csv: 16 | next(csv) # skip header 17 | for line in csv: 18 | text, label = line[:-3], int(line[-2]) 19 | yield text, label 20 | 21 | 22 | def get_minibatch(doc_stream, size): 23 | docs, y = [], [] 24 | try: 25 | for _ in range(size): 26 | text, label = next(doc_stream) 27 | docs.append(text) 28 | y.append(label) 29 | except StopIteration: 30 | return None, None 31 | 32 | return docs, y 33 | 34 | 35 | from sklearn.feature_extraction.text import HashingVectorizer 36 | from sklearn.linear_model import SGDClassifier 37 | 38 | vect = HashingVectorizer(decode_error='ignore', 39 | n_features=2**21, 40 | preprocessor=None, 41 | tokenizer=tokenizer) 42 | 43 | # regularized linear models with stochastic gradient descent (SGD) 44 | clf = SGDClassifier(loss='log', random_state=1, n_iter=1) 45 | doc_stream = stream_docs(path='./movie_data.csv') 46 | 47 | import pyprind 48 | pbar = pyprind.ProgBar(45) 49 | classes = np.array([0, 1]) 50 | for _ in range(45): 51 | X_train, y_train = get_minibatch(doc_stream, size=1000) 52 | if not X_train: 53 | break 54 | 55 | X_train = vect.transform(X_train) 56 | clf.partial_fit(X_train, y_train, classes=classes) 57 | pbar.update() 58 | 59 | 60 | X_test, y_test = get_minibatch(doc_stream, size=5000) 61 | X_test = vect.transform(X_test) 62 | print('Accuracy: %.3f' % clf.score(X_test, y_test)) 63 | 64 | clf = clf.partial_fit(X_test, y_test) 65 | 66 | 67 | 68 | import pickle 69 | import os 70 | 71 | dest = os.path.join('movieclassifier', 'pkl_objects') 72 | if not os.path.exists(dest): 73 | os.makedirs(dest) 74 | 75 | pickle.dump(stop, open(os.path.join(dest, 'stopwords.pkl'),'wb'), protocol=4) 76 | pickle.dump(clf, open(os.path.join(dest, 'classifier.pkl'), 'wb'),protocol=4) 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /movieclassifier/__pycache__/update.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rupskygill/python-ML-book-Raschka/3e69c6f9ee8514888b45e8a882c25bafafd7f3d5/movieclassifier/__pycache__/update.cpython-35.pyc -------------------------------------------------------------------------------- /movieclassifier/__pycache__/vectorizer.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rupskygill/python-ML-book-Raschka/3e69c6f9ee8514888b45e8a882c25bafafd7f3d5/movieclassifier/__pycache__/vectorizer.cpython-35.pyc -------------------------------------------------------------------------------- /movieclassifier/app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, render_template, request 2 | from wtforms import Form, TextAreaField, validators 3 | import pickle 4 | import sqlite3 5 | import os 6 | import numpy as np 7 | 8 | # import HashingVectorizer from local dir 9 | from vectorizer import vect 10 | 11 | ######## Preparing the Classifier 12 | cur_dir = os.path.dirname(__file__) 13 | clf = pickle.load(open(os.path.join(cur_dir, 14 | 'pkl_objects/classifier.pkl'), 'rb')) 15 | db = os.path.join(cur_dir, 'reviews.sqlite') 16 | 17 | def classify(document): 18 | label = {0: 'negative', 1: 'positive'} 19 | X = vect.transform([document]) 20 | y = clf.predict(X)[0] 21 | proba = clf.predict_proba(X).max() 22 | return label[y], proba 23 | 24 | def train(document, y): 25 | X = vect.transform([document]) 26 | clf.partial_fit(X, [y]) 27 | 28 | def sqlite_entry(path, document, y): 29 | conn = sqlite3.connect(path) 30 | c = conn.cursor() 31 | c.execute("INSERT INTO review_db (review, sentiment, date)"\ 32 | " VALUES (?, ?, DATETIME('now'))", (document, y)) 33 | conn.commit() 34 | conn.close() 35 | 36 | 37 | app = Flask(__name__) 38 | class ReviewForm(Form): 39 | moviereview = TextAreaField('', 40 | [validators.DataRequired(), 41 | validators.length(min=15)]) 42 | 43 | @app.route('/') 44 | def index(): 45 | form = ReviewForm(request.form) 46 | return render_template('reviewform.html', form=form) 47 | 48 | @app.route('/results', methods=['POST']) 49 | def results(): 50 | form = ReviewForm(request.form) 51 | if request.method == 'POST' and form.validate(): 52 | review = request.form['moviereview'] 53 | y, proba = classify(review) 54 | return render_template('results.html', 55 | content=review, 56 | prediction=y, 57 | probability=round(proba*100, 2)) 58 | return render_template('reviewform.html', form=form) 59 | 60 | @app.route('/thanks', methods=['POST']) 61 | def feedback(): 62 | feedback = request.form['feedback_button'] 63 | review = request.form['review'] 64 | prediction = request.form['prediction'] 65 | 66 | inv_label = {'negative': 0, 'positive': 1} 67 | y = inv_label[prediction] 68 | if feedback == 'Incorrect': 69 | y = int(not(y)) 70 | train(review, y) 71 | sqlite_entry(db, review, y) 72 | return render_template('thanks.html') 73 | 74 | if __name__ == '__main__': 75 | # Update classifier from db on startup 76 | from update import update_model 77 | clf = update_model(db_path="reviews.sqlite", model=clf, batch_size=10000) 78 | 79 | app.run(debug=True) 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /movieclassifier/ch9-ex.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import re 3 | import os 4 | from vectorizer import vect 5 | clf = pickle.load(open(os.path.join('pkl_objects', 'classifier.pkl'), 'rb')) 6 | 7 | import numpy as np 8 | label = {0:'negative', 1:'positive'} 9 | example = ['I love this movie'] 10 | X = vect.transform(example) 11 | print('Prediction: %s\nProbability: %.2f%%' %(label[clf.predict(X)[0]], np.max(clf.predict_proba(X))*100)) 12 | 13 | 14 | 15 | import sqlite3 16 | import os 17 | os.unlink('reviews.sqlite') 18 | conn = sqlite3.connect('reviews.sqlite') 19 | c = conn.cursor() 20 | 21 | c.execute('CREATE TABLE review_db (review TEXT, sentiment INTEGER, date TEXT)') 22 | example1 = 'I love this movie' 23 | c.execute("INSERT INTO review_db (review, sentiment, date) VALUES (?, ?, DATETIME('now'))", (example1, 1)) 24 | 25 | example2 = 'I disliked this movie' 26 | c.execute("INSERT INTO review_db (review, sentiment, date) VALUES (?, ?, DATETIME('now'))", (example2, 0)) 27 | 28 | conn.commit() 29 | conn.close() 30 | 31 | 32 | conn = sqlite3.connect('reviews.sqlite') 33 | c = conn.cursor() 34 | c.execute("SELECT * FROM review_db WHERE date BETWEEN '2015-01-01 00:00:00' AND DATETIME('now')") 35 | results = c.fetchall() 36 | conn.close() 37 | print(results) 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /movieclassifier/pkl_objects/classifier.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rupskygill/python-ML-book-Raschka/3e69c6f9ee8514888b45e8a882c25bafafd7f3d5/movieclassifier/pkl_objects/classifier.pkl -------------------------------------------------------------------------------- /movieclassifier/pkl_objects/stopwords.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rupskygill/python-ML-book-Raschka/3e69c6f9ee8514888b45e8a882c25bafafd7f3d5/movieclassifier/pkl_objects/stopwords.pkl -------------------------------------------------------------------------------- /movieclassifier/reviews.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rupskygill/python-ML-book-Raschka/3e69c6f9ee8514888b45e8a882c25bafafd7f3d5/movieclassifier/reviews.sqlite -------------------------------------------------------------------------------- /movieclassifier/static/style.css: -------------------------------------------------------------------------------- 1 | body{ 2 | width:600px; 3 | } 4 | .button{ 5 | padding-top: 20px; 6 | } -------------------------------------------------------------------------------- /movieclassifier/templates/_formhelpers.html: -------------------------------------------------------------------------------- 1 | {% macro render_field(field) %} 2 |
{{ field.label }} 3 |
{{ field(**kwargs)|safe }} 4 | {% if field.errors %} 5 | 10 | {% endif %} 11 |
12 | 13 | {% endmacro %} -------------------------------------------------------------------------------- /movieclassifier/templates/results.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Movie Classification 5 | 6 | 7 | 8 | 9 |

Your movie review:

10 |
{{ content }}
11 | 12 |

Prediction:

13 |
This movie review is {{ prediction }} 14 | (probability: {{ probability }}%).
15 | 16 |
17 |
18 | 19 | 20 | 21 | 22 |
23 |
24 | 25 |
26 |
27 | 28 |
29 |
30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /movieclassifier/templates/reviewform.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Movie Classification 5 | 6 | 7 | 8 |

Please enter your movie review:

9 | 10 | {% from "_formhelpers.html" import render_field %} 11 | 12 |
13 |
14 | {{ render_field(form.moviereview, cols='30', rows='10') }} 15 |
16 |
17 | 18 |
19 |
20 | 21 | 22 | -------------------------------------------------------------------------------- /movieclassifier/templates/thanks.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Movie Classification 5 | 6 | 7 | 8 |

Thank you for your feedback!

9 |
10 |
11 | 12 |
13 |
14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /movieclassifier/update.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import sqlite3 3 | import numpy as np 4 | import os 5 | 6 | # import HashingVectorizer from local dir 7 | from vectorizer import vect 8 | 9 | def update_model(db_path, model, batch_size=10000): 10 | 11 | conn = sqlite3.connect(db_path) 12 | c = conn.cursor() 13 | c.execute('SELECT * from review_db') 14 | 15 | results = c.fetchmany(batch_size) 16 | while results: 17 | data = np.array(results) 18 | X = data[:, 0] 19 | y = data[:, 1].astype(int) 20 | 21 | classes = np.array([0, 1]) 22 | X_train = vect.transform(X) 23 | model.partial_fit(X_train, y, classes=classes) 24 | results = c.fetchmany(batch_size) 25 | 26 | conn.close() 27 | return model 28 | 29 | -------------------------------------------------------------------------------- /movieclassifier/vectorizer.py: -------------------------------------------------------------------------------- 1 | from sklearn.feature_extraction.text import HashingVectorizer 2 | import re 3 | import os 4 | import pickle 5 | 6 | cur_dir = os.path.dirname(__file__) 7 | stop = pickle.load(open( 8 | os.path.join(cur_dir, 9 | 'pkl_objects', 10 | 'stopwords.pkl'), 'rb')) 11 | 12 | def tokenizer(text): 13 | text = re.sub('<[^>]*>', '', text) 14 | emoticons = re.findall('(?::|;|=)(?:-)?(?:\)|\(|D|P)', 15 | text.lower()) 16 | text = re.sub('[\W]+', ' ', text.lower()) \ 17 | + ' '.join(emoticons).replace('-', '') 18 | tokenized = [w for w in text.split() if w not in stop] 19 | return tokenized 20 | 21 | vect = HashingVectorizer(decode_error='ignore', 22 | n_features=2**21, 23 | preprocessor=None, 24 | tokenizer=tokenizer) 25 | 26 | -------------------------------------------------------------------------------- /neuralnet.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.special import expit 3 | import sys 4 | 5 | 6 | class NeuralNetMLP(object): 7 | """ Feedforward neural network / Multi-layer perceptron classifier. 8 | 9 | Parameters 10 | ------------ 11 | n_output : int 12 | Number of output units, should be equal to the 13 | number of unique class labels. 14 | 15 | n_features : int 16 | Number of features (dimensions) in the target dataset. 17 | Should be equal to the number of columns in the X array. 18 | 19 | n_hidden : int (default: 30) 20 | Number of hidden units. 21 | 22 | l1 : float (default: 0.0) 23 | Lambda value for L1-regularization. 24 | No regularization if l1=0.0 (default) 25 | 26 | l2 : float (default: 0.0) 27 | Lambda value for L2-regularization. 28 | No regularization if l2=0.0 (default) 29 | 30 | epochs : int (default: 500) 31 | Number of passes over the training set. 32 | 33 | eta : float (default: 0.001) 34 | Learning rate. 35 | 36 | alpha : float (default: 0.0) 37 | Momentum constant. Factor multiplied with the 38 | gradient of the previous epoch t-1 to improve 39 | learning speed 40 | w(t) := w(t) - (grad(t) + alpha*grad(t-1)) 41 | 42 | decrease_const : float (default: 0.0) 43 | Decrease constant. Shrinks the learning rate 44 | after each epoch via eta / (1 + epoch*decrease_const) 45 | 46 | shuffle : bool (default: True) 47 | Shuffles training data every epoch if True to prevent circles. 48 | 49 | minibatches : int (default: 1) 50 | Divides training data into k minibatches for efficiency. 51 | Normal gradient descent learning if k=1 (default). 52 | 53 | random_state : int (default: None) 54 | Set random state for shuffling and initializing the weights. 55 | 56 | Attributes 57 | ----------- 58 | cost_ : list 59 | Sum of squared errors after each epoch. 60 | 61 | """ 62 | def __init__(self, n_output, n_features, n_hidden=30, 63 | l1=0.0, l2=0.0, epochs=500, eta=0.001, 64 | alpha=0.0, decrease_const=0.0, shuffle=True, 65 | minibatches=1, random_state=None): 66 | 67 | np.random.seed(random_state) 68 | self.n_output = n_output 69 | self.n_features = n_features 70 | self.n_hidden = n_hidden 71 | self.w1, self.w2 = self._initialize_weights() 72 | self.l1 = l1 73 | self.l2 = l2 74 | self.epochs = epochs 75 | self.eta = eta 76 | self.alpha = alpha 77 | self.decrease_const = decrease_const 78 | self.shuffle = shuffle 79 | self.minibatches = minibatches 80 | 81 | def _encode_labels(self, y, k): 82 | """Encode labels into one-hot representation 83 | 84 | Parameters 85 | ------------ 86 | y : array, shape = [n_samples] 87 | Target values. 88 | 89 | Returns 90 | ----------- 91 | onehot : array, shape = (n_labels, n_samples) 92 | 93 | """ 94 | onehot = np.zeros((k, y.shape[0])) 95 | for idx, val in enumerate(y): 96 | onehot[val, idx] = 1.0 97 | return onehot 98 | 99 | def _initialize_weights(self): 100 | """Initialize weights with small random numbers.""" 101 | w1 = np.random.uniform(-1.0, 1.0, size=self.n_hidden*(self.n_features + 1)) 102 | w1 = w1.reshape(self.n_hidden, self.n_features + 1) 103 | w2 = np.random.uniform(-1.0, 1.0, size=self.n_output*(self.n_hidden + 1)) 104 | w2 = w2.reshape(self.n_output, self.n_hidden + 1) 105 | return w1, w2 106 | 107 | def _sigmoid(self, z): 108 | """Compute logistic function (sigmoid) 109 | 110 | Uses scipy.special.expit to avoid overflow 111 | error for very small input values z. 112 | 113 | """ 114 | # return 1.0 / (1.0 + np.exp(-z)) 115 | return expit(z) 116 | 117 | def _sigmoid_gradient(self, z): 118 | """Compute gradient of the logistic function""" 119 | sg = self._sigmoid(z) 120 | return sg * (1 - sg) 121 | 122 | def _add_bias_unit(self, X, how='column'): 123 | """Add bias unit (column or row of 1s) to array at index 0""" 124 | if how == 'column': 125 | X_new = np.ones((X.shape[0], X.shape[1]+1)) 126 | X_new[:, 1:] = X 127 | elif how == 'row': 128 | X_new = np.ones((X.shape[0]+1, X.shape[1])) 129 | X_new[1:, :] = X 130 | else: 131 | raise AttributeError('`how` must be `column` or `row`') 132 | return X_new 133 | 134 | def _feedforward(self, X, w1, w2): 135 | """Compute feedforward step 136 | 137 | Parameters 138 | ----------- 139 | X : array, shape = [n_samples, n_features] 140 | Input layer with original features. 141 | 142 | w1 : array, shape = [n_hidden_units, n_features] 143 | Weight matrix for input layer -> hidden layer. 144 | 145 | w2 : array, shape = [n_output_units, n_hidden_units] 146 | Weight matrix for hidden layer -> output layer. 147 | 148 | Returns 149 | ---------- 150 | a1 : array, shape = [n_samples, n_features+1] 151 | Input values with bias unit. 152 | 153 | z2 : array, shape = [n_hidden, n_samples] 154 | Net input of hidden layer. 155 | 156 | a2 : array, shape = [n_hidden+1, n_samples] 157 | Activation of hidden layer. 158 | 159 | z3 : array, shape = [n_output_units, n_samples] 160 | Net input of output layer. 161 | 162 | a3 : array, shape = [n_output_units, n_samples] 163 | Activation of output layer. 164 | 165 | """ 166 | a1 = self._add_bias_unit(X, how='column') 167 | z2 = w1.dot(a1.T) 168 | a2 = self._sigmoid(z2) 169 | a2 = self._add_bias_unit(a2, how='row') 170 | z3 = w2.dot(a2) 171 | a3 = self._sigmoid(z3) 172 | return a1, z2, a2, z3, a3 173 | 174 | def _L2_reg(self, lambda_, w1, w2): 175 | """Compute L2-regularization cost""" 176 | return (lambda_/2.0) * (np.sum(w1[:, 1:] ** 2) + np.sum(w2[:, 1:] ** 2)) 177 | 178 | def _L1_reg(self, lambda_, w1, w2): 179 | """Compute L1-regularization cost""" 180 | return (lambda_/2.0) * (np.abs(w1[:, 1:]).sum() + np.abs(w2[:, 1:]).sum()) 181 | 182 | def _get_cost(self, y_enc, output, w1, w2): 183 | """Compute cost function. 184 | 185 | y_enc : array, shape = (n_labels, n_samples) 186 | one-hot encoded class labels. 187 | 188 | output : array, shape = [n_output_units, n_samples] 189 | Activation of the output layer (feedforward) 190 | 191 | w1 : array, shape = [n_hidden_units, n_features] 192 | Weight matrix for input layer -> hidden layer. 193 | 194 | w2 : array, shape = [n_output_units, n_hidden_units] 195 | Weight matrix for hidden layer -> output layer. 196 | 197 | Returns 198 | --------- 199 | cost : float 200 | Regularized cost. 201 | 202 | """ 203 | term1 = -y_enc * (np.log(output)) 204 | term2 = (1 - y_enc) * np.log(1 - output) 205 | cost = np.sum(term1 - term2) 206 | L1_term = self._L1_reg(self.l1, w1, w2) 207 | L2_term = self._L2_reg(self.l2, w1, w2) 208 | cost = cost + L1_term + L2_term 209 | return cost 210 | 211 | def _get_gradient(self, a1, a2, a3, z2, y_enc, w1, w2): 212 | """ Compute gradient step using backpropagation. 213 | 214 | Parameters 215 | ------------ 216 | a1 : array, shape = [n_samples, n_features+1] 217 | Input values with bias unit. 218 | 219 | a2 : array, shape = [n_hidden+1, n_samples] 220 | Activation of hidden layer. 221 | 222 | a3 : array, shape = [n_output_units, n_samples] 223 | Activation of output layer. 224 | 225 | z2 : array, shape = [n_hidden, n_samples] 226 | Net input of hidden layer. 227 | 228 | y_enc : array, shape = (n_labels, n_samples) 229 | one-hot encoded class labels. 230 | 231 | w1 : array, shape = [n_hidden_units, n_features] 232 | Weight matrix for input layer -> hidden layer. 233 | 234 | w2 : array, shape = [n_output_units, n_hidden_units] 235 | Weight matrix for hidden layer -> output layer. 236 | 237 | Returns 238 | --------- 239 | 240 | grad1 : array, shape = [n_hidden_units, n_features] 241 | Gradient of the weight matrix w1. 242 | 243 | grad2 : array, shape = [n_output_units, n_hidden_units] 244 | Gradient of the weight matrix w2. 245 | 246 | """ 247 | # backpropagation 248 | sigma3 = a3 - y_enc 249 | z2 = self._add_bias_unit(z2, how='row') 250 | sigma2 = w2.T.dot(sigma3) * self._sigmoid_gradient(z2) 251 | sigma2 = sigma2[1:, :] 252 | grad1 = sigma2.dot(a1) 253 | grad2 = sigma3.dot(a2.T) 254 | 255 | # regularize 256 | grad1[:, 1:] += (w1[:, 1:] * (self.l1 + self.l2)) 257 | grad2[:, 1:] += (w2[:, 1:] * (self.l1 + self.l2)) 258 | 259 | return grad1, grad2 260 | 261 | def predict(self, X): 262 | """Predict class labels 263 | 264 | Parameters 265 | ----------- 266 | X : array, shape = [n_samples, n_features] 267 | Input layer with original features. 268 | 269 | Returns: 270 | ---------- 271 | y_pred : array, shape = [n_samples] 272 | Predicted class labels. 273 | 274 | """ 275 | if len(X.shape) != 2: 276 | raise AttributeError('X must be a [n_samples, n_features] array.\n' 277 | 'Use X[:,None] for 1-feature classification,' 278 | '\nor X[[i]] for 1-sample classification') 279 | 280 | a1, z2, a2, z3, a3 = self._feedforward(X, self.w1, self.w2) 281 | y_pred = np.argmax(z3, axis=0) 282 | return y_pred 283 | 284 | def fit(self, X, y, print_progress=False): 285 | """ Learn weights from training data. 286 | 287 | Parameters 288 | ----------- 289 | X : array, shape = [n_samples, n_features] 290 | Input layer with original features. 291 | 292 | y : array, shape = [n_samples] 293 | Target class labels. 294 | 295 | print_progress : bool (default: False) 296 | Prints progress as the number of epochs 297 | to stderr. 298 | 299 | Returns: 300 | ---------- 301 | self 302 | 303 | """ 304 | self.cost_ = [] 305 | X_data, y_data = X.copy(), y.copy() 306 | y_enc = self._encode_labels(y, self.n_output) 307 | 308 | delta_w1_prev = np.zeros(self.w1.shape) 309 | delta_w2_prev = np.zeros(self.w2.shape) 310 | 311 | for i in range(self.epochs): 312 | 313 | # adaptive learning rate 314 | self.eta /= (1 + self.decrease_const*i) 315 | 316 | if print_progress: 317 | sys.stderr.write('\rEpoch: %d/%d' % (i+1, self.epochs)) 318 | sys.stderr.flush() 319 | 320 | if self.shuffle: 321 | idx = np.random.permutation(y_data.shape[0]) 322 | X_data, y_enc = X_data[idx], y_enc[:, idx] 323 | 324 | mini = np.array_split(range(y_data.shape[0]), self.minibatches) 325 | for idx in mini: 326 | 327 | # feedforward 328 | a1, z2, a2, z3, a3 = self._feedforward(X_data[idx], self.w1, self.w2) 329 | cost = self._get_cost(y_enc=y_enc[:, idx], 330 | output=a3, 331 | w1=self.w1, 332 | w2=self.w2) 333 | self.cost_.append(cost) 334 | 335 | # compute gradient via backpropagation 336 | grad1, grad2 = self._get_gradient(a1=a1, a2=a2, 337 | a3=a3, z2=z2, 338 | y_enc=y_enc[:, idx], 339 | w1=self.w1, 340 | w2=self.w2) 341 | 342 | delta_w1, delta_w2 = self.eta * grad1, self.eta * grad2 343 | self.w1 -= (delta_w1 + (self.alpha * delta_w1_prev)) 344 | self.w2 -= (delta_w2 + (self.alpha * delta_w2_prev)) 345 | delta_w1_prev, delta_w2_prev = delta_w1, delta_w2 346 | 347 | return self 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | ### Improved Neural Net with back propogation 356 | 357 | class MLPGradientCheck(object): 358 | """ Feedforward neural network / Multi-layer perceptron classifier. 359 | 360 | Parameters 361 | ------------ 362 | n_output : int 363 | Number of output units, should be equal to the 364 | number of unique class labels. 365 | 366 | n_features : int 367 | Number of features (dimensions) in the target dataset. 368 | Should be equal to the number of columns in the X array. 369 | 370 | n_hidden : int (default: 30) 371 | Number of hidden units. 372 | 373 | l1 : float (default: 0.0) 374 | Lambda value for L1-regularization. 375 | No regularization if l1=0.0 (default) 376 | 377 | l2 : float (default: 0.0) 378 | Lambda value for L2-regularization. 379 | No regularization if l2=0.0 (default) 380 | 381 | epochs : int (default: 500) 382 | Number of passes over the training set. 383 | 384 | eta : float (default: 0.001) 385 | Learning rate. 386 | 387 | alpha : float (default: 0.0) 388 | Momentum constant. Factor multiplied with the 389 | gradient of the previous epoch t-1 to improve 390 | learning speed 391 | w(t) := w(t) - (grad(t) + alpha*grad(t-1)) 392 | 393 | decrease_const : float (default: 0.0) 394 | Decrease constant. Shrinks the learning rate 395 | after each epoch via eta / (1 + epoch*decrease_const) 396 | 397 | shuffle : bool (default: False) 398 | Shuffles training data every epoch if True to prevent circles. 399 | 400 | minibatches : int (default: 1) 401 | Divides training data into k minibatches for efficiency. 402 | Normal gradient descent learning if k=1 (default). 403 | 404 | random_state : int (default: None) 405 | Set random state for shuffling and initializing the weights. 406 | 407 | Attributes 408 | ----------- 409 | cost_ : list 410 | Sum of squared errors after each epoch. 411 | 412 | """ 413 | def __init__(self, n_output, n_features, n_hidden=30, 414 | l1=0.0, l2=0.0, epochs=500, eta=0.001, 415 | alpha=0.0, decrease_const=0.0, shuffle=True, 416 | minibatches=1, random_state=None): 417 | 418 | np.random.seed(random_state) 419 | self.n_output = n_output 420 | self.n_features = n_features 421 | self.n_hidden = n_hidden 422 | self.w1, self.w2 = self._initialize_weights() 423 | self.l1 = l1 424 | self.l2 = l2 425 | self.epochs = epochs 426 | self.eta = eta 427 | self.alpha = alpha 428 | self.decrease_const = decrease_const 429 | self.shuffle = shuffle 430 | self.minibatches = minibatches 431 | 432 | def _encode_labels(self, y, k): 433 | """Encode labels into one-hot representation 434 | 435 | Parameters 436 | ------------ 437 | y : array, shape = [n_samples] 438 | Target values. 439 | 440 | Returns 441 | ----------- 442 | onehot : array, shape = (n_labels, n_samples) 443 | 444 | """ 445 | onehot = np.zeros((k, y.shape[0])) 446 | for idx, val in enumerate(y): 447 | onehot[val, idx] = 1.0 448 | return onehot 449 | 450 | def _initialize_weights(self): 451 | """Initialize weights with small random numbers.""" 452 | w1 = np.random.uniform(-1.0, 1.0, size=self.n_hidden*(self.n_features + 1)) 453 | w1 = w1.reshape(self.n_hidden, self.n_features + 1) 454 | w2 = np.random.uniform(-1.0, 1.0, size=self.n_output*(self.n_hidden + 1)) 455 | w2 = w2.reshape(self.n_output, self.n_hidden + 1) 456 | return w1, w2 457 | 458 | def _sigmoid(self, z): 459 | """Compute logistic function (sigmoid) 460 | 461 | Uses scipy.special.expit to avoid overflow 462 | error for very small input values z. 463 | 464 | """ 465 | # return 1.0 / (1.0 + np.exp(-z)) 466 | return expit(z) 467 | 468 | def _sigmoid_gradient(self, z): 469 | """Compute gradient of the logistic function""" 470 | sg = self._sigmoid(z) 471 | return sg * (1 - sg) 472 | 473 | def _add_bias_unit(self, X, how='column'): 474 | """Add bias unit (column or row of 1s) to array at index 0""" 475 | if how == 'column': 476 | X_new = np.ones((X.shape[0], X.shape[1]+1)) 477 | X_new[:, 1:] = X 478 | elif how == 'row': 479 | X_new = np.ones((X.shape[0]+1, X.shape[1])) 480 | X_new[1:, :] = X 481 | else: 482 | raise AttributeError('`how` must be `column` or `row`') 483 | return X_new 484 | 485 | def _feedforward(self, X, w1, w2): 486 | """Compute feedforward step 487 | 488 | Parameters 489 | ----------- 490 | X : array, shape = [n_samples, n_features] 491 | Input layer with original features. 492 | 493 | w1 : array, shape = [n_hidden_units, n_features] 494 | Weight matrix for input layer -> hidden layer. 495 | 496 | w2 : array, shape = [n_output_units, n_hidden_units] 497 | Weight matrix for hidden layer -> output layer. 498 | 499 | Returns 500 | ---------- 501 | a1 : array, shape = [n_samples, n_features+1] 502 | Input values with bias unit. 503 | 504 | z2 : array, shape = [n_hidden, n_samples] 505 | Net input of hidden layer. 506 | 507 | a2 : array, shape = [n_hidden+1, n_samples] 508 | Activation of hidden layer. 509 | 510 | z3 : array, shape = [n_output_units, n_samples] 511 | Net input of output layer. 512 | 513 | a3 : array, shape = [n_output_units, n_samples] 514 | Activation of output layer. 515 | 516 | """ 517 | a1 = self._add_bias_unit(X, how='column') 518 | z2 = w1.dot(a1.T) 519 | a2 = self._sigmoid(z2) 520 | a2 = self._add_bias_unit(a2, how='row') 521 | z3 = w2.dot(a2) 522 | a3 = self._sigmoid(z3) 523 | return a1, z2, a2, z3, a3 524 | 525 | def _L2_reg(self, lambda_, w1, w2): 526 | """Compute L2-regularization cost""" 527 | return (lambda_/2.0) * (np.sum(w1[:, 1:] ** 2) + np.sum(w2[:, 1:] ** 2)) 528 | 529 | def _L1_reg(self, lambda_, w1, w2): 530 | """Compute L1-regularization cost""" 531 | return (lambda_/2.0) * (np.abs(w1[:, 1:]).sum() + np.abs(w2[:, 1:]).sum()) 532 | 533 | def _get_cost(self, y_enc, output, w1, w2): 534 | """Compute cost function. 535 | 536 | y_enc : array, shape = (n_labels, n_samples) 537 | one-hot encoded class labels. 538 | 539 | output : array, shape = [n_output_units, n_samples] 540 | Activation of the output layer (feedforward) 541 | 542 | w1 : array, shape = [n_hidden_units, n_features] 543 | Weight matrix for input layer -> hidden layer. 544 | 545 | w2 : array, shape = [n_output_units, n_hidden_units] 546 | Weight matrix for hidden layer -> output layer. 547 | 548 | Returns 549 | --------- 550 | cost : float 551 | Regularized cost. 552 | 553 | """ 554 | term1 = -y_enc * (np.log(output)) 555 | term2 = (1 - y_enc) * np.log(1 - output) 556 | cost = np.sum(term1 - term2) 557 | L1_term = self._L1_reg(self.l1, w1, w2) 558 | L2_term = self._L2_reg(self.l2, w1, w2) 559 | cost = cost + L1_term + L2_term 560 | return cost 561 | 562 | def _get_gradient(self, a1, a2, a3, z2, y_enc, w1, w2): 563 | """ Compute gradient step using backpropagation. 564 | 565 | Parameters 566 | ------------ 567 | a1 : array, shape = [n_samples, n_features+1] 568 | Input values with bias unit. 569 | 570 | a2 : array, shape = [n_hidden+1, n_samples] 571 | Activation of hidden layer. 572 | 573 | a3 : array, shape = [n_output_units, n_samples] 574 | Activation of output layer. 575 | 576 | z2 : array, shape = [n_hidden, n_samples] 577 | Net input of hidden layer. 578 | 579 | y_enc : array, shape = (n_labels, n_samples) 580 | one-hot encoded class labels. 581 | 582 | w1 : array, shape = [n_hidden_units, n_features] 583 | Weight matrix for input layer -> hidden layer. 584 | 585 | w2 : array, shape = [n_output_units, n_hidden_units] 586 | Weight matrix for hidden layer -> output layer. 587 | 588 | Returns 589 | --------- 590 | 591 | grad1 : array, shape = [n_hidden_units, n_features] 592 | Gradient of the weight matrix w1. 593 | 594 | grad2 : array, shape = [n_output_units, n_hidden_units] 595 | Gradient of the weight matrix w2. 596 | 597 | """ 598 | # backpropagation 599 | sigma3 = a3 - y_enc 600 | z2 = self._add_bias_unit(z2, how='row') 601 | sigma2 = w2.T.dot(sigma3) * self._sigmoid_gradient(z2) 602 | sigma2 = sigma2[1:, :] 603 | grad1 = sigma2.dot(a1) 604 | grad2 = sigma3.dot(a2.T) 605 | 606 | # regularize 607 | grad1[:, 1:] += (w1[:, 1:] * (self.l1 + self.l2)) 608 | grad2[:, 1:] += (w2[:, 1:] * (self.l1 + self.l2)) 609 | 610 | return grad1, grad2 611 | 612 | def _gradient_checking(self, X, y_enc, w1, w2, epsilon, grad1, grad2): 613 | """ Apply gradient checking (for debugging only) 614 | 615 | Returns 616 | --------- 617 | relative_error : float 618 | Relative error between the numerically 619 | approximated gradients and the backpropagated gradients. 620 | 621 | """ 622 | num_grad1 = np.zeros(np.shape(w1)) 623 | epsilon_ary1 = np.zeros(np.shape(w1)) 624 | for i in range(w1.shape[0]): 625 | for j in range(w1.shape[1]): 626 | epsilon_ary1[i, j] = epsilon 627 | a1, z2, a2, z3, a3 = self._feedforward(X, w1 - epsilon_ary1, w2) 628 | cost1 = self._get_cost(y_enc, a3, w1-epsilon_ary1, w2) 629 | a1, z2, a2, z3, a3 = self._feedforward(X, w1 + epsilon_ary1, w2) 630 | cost2 = self._get_cost(y_enc, a3, w1 + epsilon_ary1, w2) 631 | num_grad1[i, j] = (cost2 - cost1) / (2 * epsilon) 632 | epsilon_ary1[i, j] = 0 633 | 634 | num_grad2 = np.zeros(np.shape(w2)) 635 | epsilon_ary2 = np.zeros(np.shape(w2)) 636 | for i in range(w2.shape[0]): 637 | for j in range(w2.shape[1]): 638 | epsilon_ary2[i, j] = epsilon 639 | a1, z2, a2, z3, a3 = self._feedforward(X, w1, w2 - epsilon_ary2) 640 | cost1 = self._get_cost(y_enc, a3, w1, w2 - epsilon_ary2) 641 | a1, z2, a2, z3, a3 = self._feedforward(X, w1, w2 + epsilon_ary2) 642 | cost2 = self._get_cost(y_enc, a3, w1, w2 + epsilon_ary2) 643 | num_grad2[i, j] = (cost2 - cost1) / (2 * epsilon) 644 | epsilon_ary2[i, j] = 0 645 | 646 | num_grad = np.hstack((num_grad1.flatten(), num_grad2.flatten())) 647 | grad = np.hstack((grad1.flatten(), grad2.flatten())) 648 | norm1 = np.linalg.norm(num_grad - grad) 649 | norm2 = np.linalg.norm(num_grad) 650 | norm3 = np.linalg.norm(grad) 651 | relative_error = norm1 / (norm2 + norm3) 652 | return relative_error 653 | 654 | def predict(self, X): 655 | """Predict class labels 656 | 657 | Parameters 658 | ----------- 659 | X : array, shape = [n_samples, n_features] 660 | Input layer with original features. 661 | 662 | Returns: 663 | ---------- 664 | y_pred : array, shape = [n_samples] 665 | Predicted class labels. 666 | 667 | """ 668 | if len(X.shape) != 2: 669 | raise AttributeError('X must be a [n_samples, n_features] array.\n' 670 | 'Use X[:,None] for 1-feature classification,' 671 | '\nor X[[i]] for 1-sample classification') 672 | 673 | a1, z2, a2, z3, a3 = self._feedforward(X, self.w1, self.w2) 674 | y_pred = np.argmax(z3, axis=0) 675 | return y_pred 676 | 677 | def fit(self, X, y, print_progress=False): 678 | """ Learn weights from training data. 679 | 680 | Parameters 681 | ----------- 682 | X : array, shape = [n_samples, n_features] 683 | Input layer with original features. 684 | 685 | y : array, shape = [n_samples] 686 | Target class labels. 687 | 688 | print_progress : bool (default: False) 689 | Prints progress as the number of epochs 690 | to stderr. 691 | 692 | Returns: 693 | ---------- 694 | self 695 | 696 | """ 697 | self.cost_ = [] 698 | X_data, y_data = X.copy(), y.copy() 699 | y_enc = self._encode_labels(y, self.n_output) 700 | 701 | delta_w1_prev = np.zeros(self.w1.shape) 702 | delta_w2_prev = np.zeros(self.w2.shape) 703 | 704 | for i in range(self.epochs): 705 | 706 | # adaptive learning rate 707 | self.eta /= (1 + self.decrease_const*i) 708 | 709 | if print_progress: 710 | sys.stderr.write('\rEpoch: %d/%d' % (i+1, self.epochs)) 711 | sys.stderr.flush() 712 | 713 | if self.shuffle: 714 | idx = np.random.permutation(y_data.shape[0]) 715 | X_data, y_enc = X_data[idx], y_enc[idx] 716 | 717 | mini = np.array_split(range(y_data.shape[0]), self.minibatches) 718 | for idx in mini: 719 | 720 | # feedforward 721 | a1, z2, a2, z3, a3 = self._feedforward(X[idx], self.w1, self.w2) 722 | cost = self._get_cost(y_enc=y_enc[:, idx], 723 | output=a3, 724 | w1=self.w1, 725 | w2=self.w2) 726 | self.cost_.append(cost) 727 | 728 | # compute gradient via backpropagation 729 | grad1, grad2 = self._get_gradient(a1=a1, a2=a2, 730 | a3=a3, z2=z2, 731 | y_enc=y_enc[:, idx], 732 | w1=self.w1, 733 | w2=self.w2) 734 | 735 | ## start gradient checking 736 | grad_diff = self._gradient_checking(X=X_data[idx], y_enc=y_enc[:, idx], 737 | w1=self.w1, w2=self.w2, 738 | epsilon=1e-5, 739 | grad1=grad1, grad2=grad2) 740 | 741 | if grad_diff <= 1e-7: 742 | print('Ok: %s' % grad_diff) 743 | elif grad_diff <= 1e-4: 744 | print('Warning: %s' % grad_diff) 745 | else: 746 | print('PROBLEM: %s' % grad_diff) 747 | 748 | # update weights; [alpha * delta_w_prev] for momentum learning 749 | delta_w1, delta_w2 = self.eta * grad1, self.eta * grad2 750 | self.w1 -= (delta_w1 + (self.alpha * delta_w1_prev)) 751 | self.w2 -= (delta_w2 + (self.alpha * delta_w2_prev)) 752 | delta_w1_prev, delta_w2_prev = delta_w1, delta_w2 753 | 754 | return self 755 | 756 | 757 | --------------------------------------------------------------------------------