├── .gitignore ├── README.md ├── ch01 ├── iris_class.ipynb ├── knn.py └── library.ipynb ├── ch02 ├── README.md ├── boosting_decision_tree.ipynb ├── decision_tree.ipynb ├── kmeans.ipynb ├── linear_model.ipynb ├── mlp.ipynb ├── mytree.dot ├── naive_bayes.ipynb ├── random_forest.ipynb ├── svm.ipynb ├── tmp ├── tmp.png ├── tree.dot └── uncertainty_estimates.ipynb ├── ch03 ├── README.md ├── applying_data_transformations.ipynb ├── clustering.ipynb └── dimensionality_reduction.ipynb ├── ch04 ├── README.md ├── automatic_feature_selection.ipynb ├── binning.ipynb ├── categorical_variables.ipynb ├── interactions_and_polynomials.ipynb ├── univariate_non-linear_transformations.ipynb └── utilizing_expert_knowledge.ipynb ├── ch05 ├── README.md ├── cross-validation.ipynb ├── evaluation-metrics-and-scoring.ipynb └── grid-search.ipynb ├── ch06 ├── README.md ├── algorithm_chains_and_pipelines.ipynb ├── building_pipelines.ipynb ├── grid-searching-preprocessing-steps-and-model-parameters.ipynb ├── parameter_selection_with_preprocessing .ipynb ├── selection-of-model-by-grid-searching.ipynb ├── the-general-pipeline-interface.ipynb └── using_pipelines_in_grid-searches.ipynb └── ch07 ├── README.md └── working-with-text-data.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | ch06/data/ 2 | ch07/data/ 3 | # Created by https://www.gitignore.io/api/python 4 | 5 | ### Python ### 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | 11 | # C extensions 12 | *.so 13 | 14 | # Distribution / packaging 15 | .Python 16 | env/ 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *,cover 52 | .hypothesis/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # pyenv 79 | .python-version 80 | 81 | # celery beat schedule file 82 | celerybeat-schedule 83 | 84 | # SageMath parsed files 85 | *.sage.py 86 | 87 | # dotenv 88 | .env 89 | 90 | # virtualenv 91 | .venv 92 | venv/ 93 | ENV/ 94 | 95 | # Spyder project settings 96 | .spyderproject 97 | .spyproject 98 | 99 | # Rope project settings 100 | .ropeproject 101 | 102 | # mkdocs documentation 103 | /site 104 | 105 | *cache/ 106 | # End of https://www.gitignore.io/api/python -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [Pythonではじめる機械学習](https://www.oreilly.co.jp/books/9784873117980/)のメモ 2 | 3 | - [第一章 はじめに] 4 | - [第二章 教師あり学習] 5 | - [第三章 教師なし学習と前処理] 6 | - [第四章 データの表現と特徴量エンジニアリング] 7 | - [第五章 モデルの評価と改良] 8 | - [第六章 アルゴリズムチェーンとパイプライン] 9 | - [第七章 テキストデータの処理] 10 | 11 | - 環境 12 | 13 | python3.5 14 | 15 | pip install numpy 16 | pip install scipy 17 | pip install pandas 18 | pip install matplotlib 19 | pip install seaborn 20 | pip install jupyter 21 | pip install mglearn 22 | pip install graphviz 23 | 24 | 25 | - tex 26 | 27 | https://www.codecogs.com/latex/eqneditor.php 28 | 29 | -------------------------------------------------------------------------------- /ch01/knn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import Counter 3 | from sklearn.datasets import load_iris 4 | from sklearn.model_selection import train_test_split 5 | 6 | class Knn(object): 7 | 8 | def train(self, X, y): 9 | self.X_train = X 10 | self.y_train = y 11 | 12 | def predict(self, X, k=1): 13 | dists = self.compute_distances(X) 14 | return self.predict_labels(dists, k=k) 15 | 16 | def compute_distances(self, X): 17 | num_test = X.shape[0] 18 | num_train = self.X_train.shape[0] 19 | dists = np.zeros((num_test, num_train)) 20 | X_train_2 = self.X_train*self.X_train 21 | X_train_2 = np.sum(X_train_2, axis = 1) 22 | X_train_2_repeat = np.array([X_train_2]*X.shape[0]) 23 | 24 | X_2 = X*X 25 | X_2 = np.sum(X_2, axis = 1) 26 | X_2_repeat = np.array( [X_2]*self.X_train.shape[0]).transpose() 27 | X_dot_X_train = X.dot(self.X_train.T) 28 | 29 | dists = X_train_2_repeat + X_2_repeat - 2*X_dot_X_train 30 | dists = np.sqrt(dists) 31 | return dists 32 | 33 | def predict_labels(self, dists, k=1): 34 | num_test = dists.shape[0] 35 | y_pred = np.zeros(num_test) 36 | for i in range(num_test): 37 | dists_i = dists[i] 38 | closest_y = self.y_train[dists_i.argsort()[:k]] 39 | y_pred[i] = Counter(closest_y).most_common(1)[0][0] 40 | return y_pred 41 | 42 | if __name__ == '__main__': 43 | iris_dataset = load_iris() 44 | X_train, X_test, y_train, y_test = train_test_split( 45 | iris_dataset['data'], iris_dataset['target'], random_state=0) 46 | num_test = len(y_test) 47 | 48 | knn = Knn() 49 | knn.train(X_train, y_train) 50 | dists = knn.compute_distances(X_test) 51 | y_test_pred = knn.predict_labels(dists, k=3) 52 | 53 | num_correct = np.sum(y_test_pred == y_test) 54 | accuracy = float(num_correct) / num_test 55 | print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)) 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /ch02/README.md: -------------------------------------------------------------------------------- 1 | # 教師あり学習 2 | 3 | ## アルゴリズム 4 | 5 | - [最近傍法](https://github.com/kajyuuen/IntroductionToMachineLearningWithPython/blob/master/ch02/kmeans.ipynb) 6 | - [最強の当て馬] 7 | - 利点 8 | モデルが単純。 9 | パラメータ調整をあまりしなくても精度が高くなる。 10 | 多くの場合は非常に高速。(特徴量とサンプル個数が多くなると遅くなる) 11 | - 欠点< 12 | データ前処理が重要。 13 | 数百以上の特徴量をもつデータではうまく機能しない。 14 | 疎なデータは特にうまく機能しない。 15 | - パラメータ 16 | 近傍点の数とデータポイント間の距離測度。 17 | 近傍点の数は3〜5で十分な場合が多いが調整する必要がある。 18 | 距離測度はユーグリッド距離で多くの場合うまくいく。 19 | 20 | 21 | - [線形モデル](https://github.com/kajyuuen/IntroductionToMachineLearningWithPython/blob/master/ch02/linear_model.ipynb) 22 | - [最初に試すべきアルゴリズム] 23 | - 利点 24 | 線形モデルは訓練,予測ともに高速.サンプル数が10万~100万点のデータに対しては,LinearRegression,Ridgeにsolver='sag'オプションを用いると高速になる場合がある. 25 | 予測手法が理解しやすい 26 | 特徴量の数がサンプル数の個数よりも多いときに性能を発揮する 27 | - 欠点 28 | 係数の意味を理解するのが難しい 29 | - パラメータ 30 | 正則化パラメータ 31 | 線形モデル: alpha, LinearSVC, LinearRegression: C 32 | alphaが大きいとき, Cが小さい場合は単純なモデルに対応する. 33 | 両方とも対数スケールで値を変更する 34 | 35 | - [ナイーブベイズ](https://github.com/kajyuuen/IntroductionToMachineLearningWithPython/blob/master/ch02/NaiveBayes.ipynb) 36 | - クラス分類にしか使えない.線形モデルより高速.線形モデルより精度が落ちる. 37 | 38 | - [決定木](https://github.com/kajyuuen/IntroductionToMachineLearningWithPython/blob/master/ch02/decision_tree.ipynb) 39 | - 高速.可視化が可能で説明が容易. 40 | 41 | - [ランダムフォレスト](https://github.com/kajyuuen/IntroductionToMachineLearningWithPython/blob/master/ch02/random_forest.ipynb) 42 | - 単一の決定木より高速で強力.スパースなデータには適さない. 43 | 44 | - [勾配ブースティング決定木](https://github.com/kajyuuen/IntroductionToMachineLearningWithPython/blob/master/ch02/boosting_decision_tree.ipynb) 45 | - ランダムフォレストよりも精度が高いが時間がかかる.パラメータが重要. 46 | 47 | - [サポートベクターマシン](https://github.com/kajyuuen/IntroductionToMachineLearningWithPython/blob/master/ch02/svm.ipynb) 48 | - 中規模なデータセットに対して強力.データのスケールを調整する必要がある.パラメータが重要. 49 | 50 | - [ニューラルネットワーク](https://github.com/kajyuuen/IntroductionToMachineLearningWithPython/blob/master/ch02/mlp.ipynb) 51 | - 複雑なモデルを構築出来る.データのスケールを調整する必要がある.パラメータが重要.時間がかかる. 52 | -------------------------------------------------------------------------------- /ch02/boosting_decision_tree.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### 勾配ブースティング回帰木(勾配ブースティングマシン)\n", 8 | "\n", 9 | "一つ前に決定木の誤りを次の決定木が修正するようにして、決定木を順番に作っていく。勾配ブースティングのポイントは浅い決定木のような、簡単なモデルを多数組み合わせることにある。回帰にも分類にも使える。\n", 10 | "ランダムフォレストに比べ、パラメータ設定の影響を受けやすいが正しく設定されていれば、こちらの方が性能がいい。\n", 11 | "\n", 12 | "デフォルトでは勾配ブースティングに乱数性はないが強力な事前枝刈りが用いられる。\n", 13 | "深さ1から5くらいの浅い決定木が用いられることにより、モデルの占めるメモリが低くなり高速になる。\n", 14 | "\n", 15 | "勾配ブースティング回帰木には、事前枝刈りとアンサンブルに用いる決定木の数を設定するパラメータの他に学習率というパラメータがある。学習率を大きくすると個々の決定木が強く補正を行おうとし、モデルは複雑になる。" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 1, 21 | "metadata": {}, 22 | "outputs": [ 23 | { 24 | "name": "stdout", 25 | "output_type": "stream", 26 | "text": [ 27 | "Accuracy on training set:1.000\n", 28 | "Accuracy on test set:0.958\n" 29 | ] 30 | } 31 | ], 32 | "source": [ 33 | "from sklearn.ensemble import GradientBoostingClassifier\n", 34 | "from sklearn.model_selection import train_test_split\n", 35 | "from sklearn.datasets import load_breast_cancer\n", 36 | "\n", 37 | "cancer = load_breast_cancer()\n", 38 | "\n", 39 | "X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, random_state=0)\n", 40 | "\n", 41 | "gbrt = GradientBoostingClassifier(random_state=0)\n", 42 | "gbrt.fit(X_train, y_train)\n", 43 | "\n", 44 | "print(\"Accuracy on training set:{:.3f}\".format(gbrt.score(X_train, y_train)))\n", 45 | "print(\"Accuracy on test set:{:.3f}\".format(gbrt.score(X_test, y_test)))" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": [ 52 | "学習データが100%になってるので過剰適合している。深さを制限して事前枝刈りを行うか、学習率を下げるとよい。" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "深さ1に下げて見る" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 2, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "name": "stdout", 69 | "output_type": "stream", 70 | "text": [ 71 | "Accuracy on training set:0.991\n", 72 | "Accuracy on test set:0.972\n" 73 | ] 74 | } 75 | ], 76 | "source": [ 77 | "gbrt = GradientBoostingClassifier(random_state=0, max_depth=1)\n", 78 | "gbrt.fit(X_train, y_train)\n", 79 | "\n", 80 | "print(\"Accuracy on training set:{:.3f}\".format(gbrt.score(X_train, y_train)))\n", 81 | "print(\"Accuracy on test set:{:.3f}\".format(gbrt.score(X_test, y_test)))" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "学習率をさげる" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 3, 94 | "metadata": {}, 95 | "outputs": [ 96 | { 97 | "name": "stdout", 98 | "output_type": "stream", 99 | "text": [ 100 | "Accuracy on training set:0.988\n", 101 | "Accuracy on test set:0.965\n" 102 | ] 103 | } 104 | ], 105 | "source": [ 106 | "gbrt = GradientBoostingClassifier(random_state=0, learning_rate=0.01)\n", 107 | "gbrt.fit(X_train, y_train)\n", 108 | "\n", 109 | "print(\"Accuracy on training set:{:.3f}\".format(gbrt.score(X_train, y_train)))\n", 110 | "print(\"Accuracy on test set:{:.3f}\".format(gbrt.score(X_test, y_test)))" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "どちらも訓練データのスコアはさがってるが、テスト精度はあがってる。" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 7, 123 | "metadata": { 124 | "collapsed": true 125 | }, 126 | "outputs": [], 127 | "source": [ 128 | "import numpy as np\n", 129 | "\n", 130 | "def plot_feature_importances_cancer(model):\n", 131 | " n_features = cancer.data.shape[1]\n", 132 | " plt.barh(range(n_features), model.feature_importances_, align=\"center\")\n", 133 | " plt.yticks(np.arange(n_features), cancer.feature_names)\n", 134 | " plt.xlabel(\"Feature importance\")\n", 135 | " plt.ylabel(\"Feature\")" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 9, 141 | "metadata": {}, 142 | "outputs": [ 143 | { 144 | "data": { 145 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAewAAAEKCAYAAADQN2b/AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzsnXe0XVXVvp83oScSPiBqRDBINQYSkpsgTQIiFhCCgKh0\n/YCgVAXB8kOqUuQDQTpCKBGQXlQC0hJCSSHlhi5FpIjUQKgxmb8/5jy5+56cc+65ya3JfMa4I/us\nvfbaa+/DYJ211nznKzMjSZIkSZKuTY/O7kCSJEmSJC2TA3aSJEmSdANywE6SJEmSbkAO2EmSJEnS\nDcgBO0mSJEm6ATlgJ0mSJEk3IAfsJEmSJOkG5ICdJEmSJN2AHLCTJEmSpBuwVGd3IFl8WHXVVa1/\n//6d3Y0kSZJuxZQpU143s74t1csBO2kz+vfvz+TJkzu7G0mSJN0KSf+sp14uiSdJkiRJNyAH7CRJ\nkiTpBuSAnSRJkiTdgBywkyRJkqQbkAN2kiRJknQDcsBOkiRJkm5ADthJkiRJ0g3otgO2pJGSBlQ5\n11fSw5KmStpiEe/TX9L366w3s456oyXtEscXV3uG9kTSKEl7dfR9kyRJkoWnyydOkdTTzOZWODUS\nuA14rMK5rwCNZva/rWivGv2B7wN/asU1dVGpfx2BmZ3fHu02vjSL/kf/pT2aTropz5+8XWd3IUkW\nG9pthi3pSEmHxPEZku6O460ljYnj70lqlDRT0imFa2dLOl3SdGATSSdLekzSDEm/k7QpsANwmqRp\nktYqXDsYOBXYMc4tX6G9YyRNivteKElx7dqS/i5puqRHot2TgS2ircNjJj0+zj8Sfan1HiTpD5Ke\nlPR34JOFc/dKaig882mSHo0+DI/zz0raIer0jDqT4l0cEOUjou51kp6QNKbwTM3eXZQdK+mI0vuS\n9FCcv1HS/xT6doqkiZKeWtSViiRJkmTRaM8l8fFA6X/yDUBvSUtH2ThJnwFOAbYGBgPDJI2M+r2A\nh81sEPA4sBPwRTPbEDjRzB4AbgGONLPBZvZM6aZmNg04Brgmzn1QbM/M7gf+YGbDzGwgsDywfVw+\nBjgn7rsp8ApwNDA+2joD+A/wVTMbAuwGnNXCe9gJWA8YAOwV7VaiF3C3mX0ReBc4EfhqXH981Pkh\nMMvMhgHDgP0krRnnNgIOi/t8HthM0irl767CfS8HjorzjcCvC+eWMrPh0e6vK1ybJEmSdBDtOWBP\nAYZKWhH4CHgQH7i3wAfzYcC9Zvaamf0XHyy/HNfOBa6P41nAh8AfJX0beH8h+lJsD2Cr2ONuxH8w\nfFHSJ4DVzOxGADP70Mwq3Wtp4KK49lp8gKzFl4GrzGyumb0M3F2l3sfA7XHcCNxnZnPiuH+Ubwvs\nJWka8DCwCrBOnJtoZi+a2TxgWlxT891J6gOsZGb3RdFlNH0HADfEv1MKfWiGpP0lTZY0ee77s6q+\nhCRJkmTRaLcBOwab54B9gAfwQXorYG181lyLD0v7zDGYDweuw2fCt9e6sKX2JC0HnAvsYmYbABcB\ny7WircOBV4FB+A+QZRaiP5WYY2YWx/PwHznEAFyKNRBwcMz2B5vZmmZ2R5z7qNDWXHx2vKjvrtTm\nXKrEO5jZhWbWYGYNPVfo08rmkyRJknpp7yjx8cARwLg4HgVMjYFpIrClpFUl9QS+B9xX3oCk3kAf\nM/srPlgOilPvAp9YiD6VBufXo+1dAMzsXeDF0rK8pGUlrVDhPn2AV2Ig3RPo2cL9xgG7xf5zP/xH\ny8IyFjgwthaQtK6kXtUq13h3AJjZLOCtwv70nlT4DpIkSZLOp72jxMcDvwQeNLP3JH0YZZjZK5KO\nBu7BZ45/MbObK7TxCeDmmBkL+EmUX40vTR+Cz5afqXDtApjZ25IuAmYC/wYmFU7vCVwg6XhgDrAr\nMAOYGwFro/HZ+fVyWdTtwHvFvkoaYGbFyPUb8WX3x4AX8K2BheVifGn6kQgqew2Plq9GtXcHsJyk\nHwF7A+fHj5NngX0XtnMbrNaHyRkVnCRJ0i6oaRU2qRdVkYZJGg3cZmbXdXyvWoek/nhfB1Y4V1pO\nbxXL9lvH+u19Zhv0LkmSlMQtOUiaYmYNLdXrtolTFgZ1ktQsrt812pwuaVyUjZPL0Ep17pc0KGRX\nl8nlY/+U9G1Jp0a/bi8siT8v6bdxv8mShkgaK+kZSaPKnrskBTsuik8G1oprTwtp2HhJtwCPSTpe\n0mGFNk6SdGhbfh9JkiRJ/SxRAzadJDULjgG+FtfvEGV/xIPykLQusJyZTY9za0U/dgCuBO6JILkP\ngOJP7xfMbHA822h8T/5LwHHR7rZ4JPnweKahkr6My9Weib4eGW0NAQ41s3WBS3AZGpJ6AN+NfiRJ\nkiSdwJI2YHem1GwCMFrSfjQFql0LbB8/Gn6AD7gl/laQdfWkueSrf6HeLYXyh83sXTN7DfhI0kq4\nFGxbYCrwCLA+TVKwciaa2XMAZvY88IakjUrXm9kb5RekrCtJkqRj6PKpSdsSM5sjqSg1m0FzqVm1\ngQzKpGaShuMpUHcBDsJnw7XuPUrSxvjseIqkoWb2hqQ7gR2B7wBDC5fMl3VJKpd8LVVej4IUrKye\ngN+a2QXF/sQedjnvlX2+GH9Xn8Zn3JWe60LgQvA97Ep1kiRJkkVniRqwg5LU7Af4rPT/gClmZpIm\nAmdJWhV4C5eanV3eQMilVjCzv0qagEdXQw2pmaS1zOxh4GFJ3wBWB97AB8Vb8Wxqb7Xhc5YYC5wg\naYyZzZa0Gh4BX48s7kY8y9rSeD71mmSUeJIkSfuxxCyJq8ndazzQD5eavYovbc+XmuF7u/cA0/GB\nvJrU7DZJM4D7aS41O1LuErZW2TWnlYLZ8Nn99LjnFOAd4NK2e9omIrHKn4BnJD2LJ1H5RCxvT4hA\nuNOi+ioq5EY3s4/xd/HnVhqmJEmSJG3MYifr6m6Sqwh0uxdYP5KxdGZfjgVmm1nJJKQHvu+9q5k9\n3dL1KetKku5JSsg6l24n6+pkydWn5E5V0+Nv0yj/SdxrZkniJHfrelzSRXJnrTskLR/nFnD7ktRb\n0l3xuVHSjlH3ZEmX4TnBfwkcoyYHrUoyrPL3NTve06PRft8or+a+VfThfl7ScYU+rR972qOAw+Md\n7YGbn3wWuE4hRUuSJEk6hy4zYNO5kquzcLONQbi06VFJQ/GsXxvjMqn9ImIaPDjtnHDWehvYOcor\nuX19COwU7l5bAadLEnAN0N/MVjeza/Ggs2tqyLDK6QVMjj7cR5ObVi33rSKvR5/OA46IqPDzgTPi\nHV2JO5MNKpOiNSOjxJMkSTqGrjRgd6bkamt84CJctWYBmwM3mtl7ZjYbd64q/aB4ztzGs9Tv/qru\n9iXgN7Hf/XdgNeBTZjYV+KSkz0gaBLxlZv+ifhnWPHzQB9dHb66W3beKtOjERWUpWjPS/CNJkqRj\n6DJR4p0puVoIyp2xlq9Rd3egLzA0nvF5mgxIro0+fpqmwbeiDKsOWhuMUI8TV0UpWivvkyRJkrQB\nXWbADjpFcgXcBRwInCl3DusdfRkt6WR8EN0JNwepiJm9K+nFwjL9c8DTuLvXf2Kw3gr4XOGya3B7\nz1WBLaOsogzLzP5Tdsse+GB/NS65ut/MZkl6S9IWZjaeFty3CtsMyFOkrga8XDhfTYpWkZR1JUmS\ntB9dccDuDHevQ4ELJf0Qn3EeaGYPRmT5xKhzsZlNVSHhSAzuRfYELsD3nt8Cvoov3d8qqRGYDDxR\nqmxmj8ZS+kshKcPM7pD0BeBB3+pmNrAHvp9c5D1guKRfxbndorxu9y0ze1nSUcDvos/LAztFYNzB\neADaOvEe7yKkaNVofGkW/Y/+S60qyRJGRh8nSdvRpQZsM7sLT9JR+rxu2fmrgKsqXNe7cPwKHrBV\nXmcCMADmR2F/ZGZnSToDD6zaWtLWwA9jsP4eTYPdX8yspFeaiVtwTgd+jM+Od4j98juAXwG34TPg\nG4CdzWyTUj8kfQofUD8fRQeY2QOSfoKvLID/ONggfhz8DTg6ItdfAnY0sw/wQXRD/AfGSsCKkj4A\nTgeWifOXmtlbsUowqSBpGw3sI+m6OB4C/AMfsF8CfotnNtvUzF6Ty7ueAk7ELT2TJEmSDqYrBZ11\nJItDRPpytC4i/TuFPnyHpj3zUoKUY4Bros/X4IFsu0eVbYDpkaM8SZIk6QSW1AF7cYhIf6UNItJr\nMd+tC5/5V8zElrKuJEmSjmGJHLDNXbCKEenjaR6RXotmEen48vt1wPY0OWq1JeUR6bW2MYoR6YOB\nV1kwIn03CrPrasSA/mpsEwzHl+Yr1UtZV5IkSQfQpfawO5jFIiLdzG6StCyuk25tRHqRSn2+GF8a\nv6KeXOIZJZ4kSdJ+LJEz7KCSCchbkgZY+5qAHApsFVHjU4ABZvYIHvw1EU9VenEsY9diT+CQuPej\n+LL6GKAh2t6Lsoj06O/8iPQy7gEGRFrSUsT5LfgPinYxJkmSJEnqZ7Ez/6gHdTODkJZor35LasBT\nlW7RYmWgoaHBJk+e3JZdSJIkWexRdzP/qAd1L4OQJ+SGG09JGiNpG0kTJD0tz8SGpGMlXSHpwSjf\nL8orGobEub2iz9Pj2gX6LeleSadImhj33yKu7SnpNDUZixwQ5f0kjYvrZ0raQtLPcRex1aIPh7f9\nN5okSZLUS3fbwx4P/BSXRjUAy1aRYw3F957vKO3z0iTH+qmkVYA/4paWJmklM3tb0i1Un6mW5Fg7\nlfaey+RYwjOC3Rf3XhvYFd8jn4RnI9scH1x/AZRkYhviUq5ewFRJf8EToexkZu/EPvpD0bcBuM57\nUzN7XdLKZvZmeb9dycVSZjZc0jdxA5BtgB8Cs8xsWOx7T5B0B/BtYKyZnRTPtgIe8b61mX012lyp\n0hciaX9gf4A11lij9reXJEmSLDTdaoZN95NjNZp7XD8K3GW+/9BIc7ONm83sAzN7Hd9HHk4VeVb0\n4dqoi5m9WaO/lcw9tgX2kjQN3ytfBdd5TwL2lfthb2Bm7+IBdJ+XdLakrwPvVLpJMUq8b9++NbqT\nJEmSLArdasDuxnKseYXP82i+slEeRGDUlme19v5FKZiAgyM5ymAzW9PM7jCzcfgPm5fwaPW9zOwt\nYBC+LD4KjxhPkiRJOoluNWAHJTnWuDgeBUyN2etEYEtJq8bS7veoYH4Rcqw+ZvZX4HB8YIIqcix5\nlrNJuByrtBfcJ+4/UtIKknrhcqzxrXyeHSUtF8v0I+I+1eRZnwC+G3WR9EIsmdeSkRUZCxwY2whI\nWldSL0mfA141s4vwgXlItNvDzK7Hl+GHtPK5kiRJkjaku+1hQ/sahPwZzxNebhAyEl9O30GtNAip\ngxnR31WBE8wNOaoZhmwLXA7cJ2kusHKUNzM2qXGvi/Hl8UfkG92vxbONwGVoc3Czkb3wZfhL5XnE\nAX7e0oOk+UeStB1pnJKU0+1m2GZ2l5ktbWbvxed1gZ4xWIEvdb9mZgOBsTH4gefnbpQ0EzjMzIZH\n/u81gQ3l0eOGa4+XBW4six7/FT7THRnLyQ9Gu6UUn3Px4C3wAfAfku6U+19PBtaQNBUfXEv76p/G\nk5j0wk1FSsFun8V/SBhu7LEpvlffgAev/RcPVHsdd9U6O+7/vfiRcS/wI0n34j8m/i/e1TzgMeCD\naOMpfIC+Mvq4dNxvpJlNx7Xhy+D/nexd+5tJkiRJ2pNuN2BXoTPNPI4BvhbX71AoH4gP4MOAk4D3\nzWwjPFCulKN7J3wVYEM8GO3XUX45cFSxPCLAJwO7Rz8+iLqvh9nHefhWQYn1ga/hP2B+LWlpuW3n\nbsBmsTc+F98vHwysZmYDzWwDmhKlHA1sFP0YVe3lJ0mSJO3P4jJgd2b0+AQ8UGs/PD1oiXvM7F1z\nh6tZwK1R3ogbePQBPjKzH0X5ZcCXo3wlM7uvWF7j/pWiwcF/CHwUEeX/waPMv4JL3iZFpPhXgM9T\nPSJ8BjBG0h74jHwBlOYfSZIkHcJiMWB3ZvS4mY3Cl8tXB6aUAsKoP0p8UakUDV5+/9I5AZcVosTX\nM7Nja0SEbwecgwecTZK0QL/T/CNJkqRjWCwG7KDDo8fjmrXM7GEzOwYP4lq9ns6GjvutUhYyPDf4\nfdXKW+pHndwF7CLpk9H3lSV9rlJEeASbrW5m9wBH4ZHrvRfh3kmSJMki0B2jxKvRntHjzaKwy/ax\nT5O0TtS/CzcLGVxnn/cGzpe0Ar4svW8L5aOj/ANgEzwj2bp48FmLmNljkn6FZ4DrAcwBfowHoZVH\nhPcErowlegFnmdnbtdpPt64kSZL2Y4k0/+huqJ3MSiQtFdsAbcKy/daxfnuf2VbNJckSTcq6lhy0\nOJp/dDfUuWYl35L0sNzi8++SPhXlJcORCcAVqm4IUtWAJEmSJOl4Fqcl8a5IZ5qV3A98Ker/L/Cz\n6Au4icjmZvaB3LyjkiHIv6hgQGK5JJMkSdIp5IDdvpTLzR6hSW52CAW5GUDMur8M3ER1udltwG11\n3PuzwDWS+uHJT54rnLuloOPeFk8cU8qQ1gc3BHkRNyD5Mh7ZXjIg+XfxJiq4dfVcMc0/kiRJ2otc\nEm9HOtms5GzgD5EI5QCam4e8VziuaAhCnQYkKetKkiTpGHKG3f6U5GY/wJOm/B8wJZaqJwJnxZLz\nW7jc7OzyBkJutoKZ/TX2np+NU7VkXn1w9y2onVa0ZAhyd5iNrBvXVTMgqUpGiSdJkrQfOcNuYySN\nlDSgUDQe6IfLzV7Fl7bny83w9J/34HKwKTXkZrfJ/bFfprnc7MgILFur7JpjgWslTaGC7EvSiAhc\nuxjPL/5I5Fm/AP8hNwZokBuQ7EWTAUmSJEnSCaSsayFpL6lVjfu1qQRL0rHAbDP7XVv1IWVdyZJA\nyq2StiZlXVXoZKnVaEnny3NvPyVp+yivJq0aIWl8RIM/VupD4dx9km6W9Gz0ZXdJE6Pva0W9vpKu\nj7YnSdpMbv85Cjg8+rlFpXpxfTMZWDt9LUmSJEkLLIl72J0ptQI36BgOrAXcI2ltfMm5krQKPI/3\nQDN7rkJbg4AvAG/i+9oXm9lwSYfitpuHAb8HzjCz+yWtAYw1sy9IOp/CDFvSn8rrRdtQkIGVdyCj\nxJMkSTqGJXHA7kypFcCfw5f6aUnP4jaY1aRVHwMTqwzWAJNiHxxJzwClQb4Rj0YH2AYYIKl0zYoR\nxFZOrXq3VBqswaPEgQvBl8SrP3aSJEmyKCxxA3ZEPRelVjNoLrVap8blzaRWkobjFpW7AAfhftst\ndqHC55K0amzxhKQRNJdglVOPI1gPPIHKh2Vtl7dVq16tPiRJkiQdwBI3YAedJbUC2FXSZcCauBf1\nk7jG+WcVpFUAn5I0wMweW8hnfRo3Rfl/0e/BZjYt+rliod4d+DL6aWX16iZlXUmSJO3Hkjxgd4az\nl4AXcLvPFYFRZvah3JVrFi6tEm7TOTKu+TS+h7ywA/Y8YOOQhC2F24+OAm4FrpPnCD8Y3w44p0K9\numl8aRb9j/7LQnYzSZKke9JRyoElcsA2s7uApQuf1y07fxVwVemzpCOBj8yst6QzgEFmtnUM7D80\ns91LkeX4oHyrmR0V187Gtc3b4LPmj3BbzA+BEZLexCPLZ8XfzqVBXtLHeKDbaXJbzJ2BQZJux7OQ\nvS9pfTN7Iq4dAEwG1gNmxZ74oLjvB3gg3eOSVjWzpyT9APidmY0PmdfH+Mz7BeDHkk4DRuCBee+a\n2QWL9OKTJEmShWaJHLAXgraKLL8KD2D7bD2R5Wb2QPk5SXfhM/OnJW0MnIvvne+PR5c/F339kpm9\nKekg4AgzmxzX13rOFk1BagTAJUmSJO1IDtj10VaR5XtGW62NLCfa7Q1simcwKxUvC2Bmr0o6Bl/K\n38nM3lyI56zHFKTZgJ2yriRJko4hB+w66AKR5SV6AG+HGUclNgDeAD5To43/0pQwp9zMo5IpyFhq\nkLKuJEmSjmGJy3S2CJQiy8fF8ShgavhDTwS2lLSqpJ54ZPl95Q3EDLmPmf0VOBzfX4bakeXzz5nZ\nO8BzknaN9iRpUBwPB74BbAQcIWnNKm0/jy/dg++JV6NkCrJ0tL+upF416idJkiTtSM6w66dDI8sl\njQSeKj+H216eF0FoSwNXS3oCuAjY18xelvRT4BJJWwOjgfMlfQC8A1wO/D7202ulGr0Yz8pWKXK9\nIinrSpIkaT/S/KOT0SKaiKgVpiCS7qUQgNbWpPlHUk4aZSRJyyjNP9oXdQ0TkYeBUyUNl/Sg3Gbz\nAUnrRb3lJV0t6XFJNwLLF9p4Ppbw+8ttNUvlR4TEC0mHFPp1dfu8ySRJkqQeckl84elsE5HPApua\n2dyIXt8igtq2AX6D708fCLwfZh8b4tHtreFoYE0z+0jSSq28NkmSJGlDcoa98JRLvR6kSeo1noLU\nK5asS1IvqG4i8m3g/Trvf21hKb0PLvWaCZwBfDHKvwxcCWBmM/Do9tYwAxgjaQ88unwBJO0vtwud\nPPf9Wa1sPkmSJKmXHLAXEjObg2uS98GlXuNpLvWqRTOpF263eR2wPXB7nV0oSrBOAO4xs4HAt1hQ\nrlWLosyLsmu3A87BLT4nSVpgRcbMLjSzBjNr6LlCn1bcNkmSJGkNuSS+aHSmiUiRPjSZhexTKB8H\nfB+4W9JAYMMK174KfDKW5mcTPxok9QBWN7N7JN0PfBfoDbxdrRMZJZ4kSdJ+dNgMOwKYHi8FZC1i\nW/vEHnFL9UYXMnVVqzM/6EpSg6SzWtGV8UA/XOr1Kr60PV/qhe8B3wNMxwfyalKv2yS9B9xPc6nX\nkRFItlaF64qcCvxW0lSa/wg7D+gt6XHgeHwZvxmxUnA8riW/E3giTvUErpTnR58KnGVmVQfrJEmS\npH3pMFlXaIW3MbMXy8rrliUVrrmXOuRJ9UijJPWPOgNb04fuTPk7r/c7aKleQ0ODTZ7cLoqxJEmS\nxZZ6ZV0dsiQu6Xzc+/lvki7Bl3DXirIXJP0cT+JRyqR1kJk9ENceBeyB20T+DXejasCDoT4ANgGO\nxPdul8f3kw+wGr9EJA0FLomPdxTKR+A/BLYPaVPJs3oNPDPZl/BsYi8B34qUpUPxpfDewOvAPpFI\n5V7gYXxfeyXc1Wu8pC8ClwLL4CscO4eRx+xwAxM+Y/4GYMCJZnZN9O3YuMdAfLa8R/lzxmz8HMLN\nC9jPzJ6IHy8f4pnQJkh6p+w72BefkTfg+9o/ieXwfYBvx/P1BLas9l6TJEmS9qNDlsTNbBTwMrCV\nmZ0RxQPwGff3gP8AXzWzIcBuuFQKSd8AdgQ2NrNBwKkxW54M7G5mg8Os4g9mNixmycvj+7C1uBTP\nkz2ohXpr4bm+d8Cjre8xsw1wq8rtQsZ1Np6drPQj4KTC9UuZ2XDgMODXUTYK+H3kA28Amq044IPj\nYDxt6Ta4FrtfnNso2hqAD7SbVejzhfFsQ/H99XML50pSsNKye/E7+DFg8XzfAy6TZ2QDDzrbxcwW\nGKyLUeKvvfZahe4kSZIkbUFnBp0VnaGWBv4gaTAueSr5U28DXGpm7wPUcKDaStLPcJ/plYFHgVsr\nVQw98UpmNi6KrsBns5X4W8yiG/HZZSmCuxFP27kePtu90yfG9AReKVx/Q/w7JeqDy79+KemzwA1m\n9nTZPTcHrooo8lcl3YdLxN4BJpa2FCRNizbvLzxbVTevoCgFg+bfweZEUFzMyP9J0/dwZ7V3XzT/\naGhoyLR5SZIk7URnDthFWdLheLTyIHzW/2G9jcQs8Fygwcz+FUvZrZE11eIjADObJ2lOYfl5Hv7u\nBDxqZpvUuh7/EbJUtPWnyFC2HfBXSQeY2d2t6U95mwVacvN6r4XP1ai3XpIkSdJOdBUddh/gFTOb\nh3tG94zyO4F9Ja0AIGnlKC9KnkqD8+sxw6wZFR6Rzm9L2jyKdl+Efj8J9JW0SfRv6dijroqkzwPP\nmtlZwM0UpFZyw4/ngN0k9ZTUF09+MrGeztRy86qD8cS7kLQuvm//ZJ3XJkmSJO1MV9FhnwtcL2kv\nfNn5PQAzuz2WySdL+hj4K/ALmjtQbYI7Vc0E/g1MquN+++JuVkYh6Ky1mNnHIRs7S1If/H2eiS/J\nV+M7wJ6S5kR/f1M4NxK4Dc8wNh0POvuZmf27pR8CBRZw84q25lOeACU+nxvXNeJBZ/tEStJm9WpF\niTe+NIv+R/+lzm52D9K8IkmSrsIS69Yl93b+Mx6I1RPPFvYacIiZjYw6XwV+ZGY7SZqNR1F/E9+n\n/gUezb0GcJiZ3RIR1SPxaPd1gN/h0eB74svZ3zSzNytFcuN777fhqUpn4bnA/whMw/eXb8WToqwb\n++or4gPxuqGlLj1XX+D86BfRtwmxVTA/Khz3uy5Gf4+genT6CXjyl/XNrLSvvQCLo1tXDthJkrQ3\nXUrW1UX5OvCymW0HEDPkd4BzJfU1s9eImXjU7wXcbWZHyp2vTgS+ikdaXwbcEvUG4tHcywH/AI4y\ns40knQHshc/ALwRGhZxrY+BcM9taZYYfMbtdpvRFhmZ8O+AmPPPYDcXBOvg9cIaZ3S9pDXxg/kKc\nGwBsbmYfxI+LIcCG8SNiZ5qi01fFU5GWAvOGAAPN7LnWv+YkSZKkLViSB+xG4HS57eVtZjYeQNIV\nwB6SLsWX2/eK+h/TPEr8o0IEef9Cu/eY2bvAu5Jm0RSt3ghsWEckdznXFI4vBn6GD9j74jPzcrYB\nBhTaXjHuCc2jwqF59HdL0ekVB2tJ+wP7A/RcsW+Nx0iSJEkWhSV2wDazpyQNwZe4T5R0l5kdj2u0\nb8Uj1a8t7NmWR4kXI8iL77EYyT2v8LkUWd5SJHc58yO0Y2m7fyxT9zSzmRXq9wC+ZGbNIu1jAG/z\nKPGirGvZfussmfsrSZIkHUBXiRLvcOS5yN83syuB0/BlX8zsZTzJy6/wwbtNaSGSux7Dj8uBP9Xo\n2x3AwaUPEbRXD+NZyOj0JEmSpP1ZYmfYwAZ4FrF5wBzgwMK5MUBfM2vJJnNh2R2PUr8I/3FQiuS+\nGrhI0iE0ydP2xTO7Fft2InBVlbYPAc6RNAP/fsfh2dVa4kZ8C6A8On39eh8q3bqSJEnajyU2SrwW\nkv4ATDXpWbDIAAAgAElEQVSzP3aBvsw2s96Fz7sAO5rZnovQZpp/JEmSdBHqjRLvskvikvaSNEPS\n9AgEK1lh3h3ld0UUdMlG8yxJD0h6VgVLTUlHSWqMdk6Osv0kTYqy6yWtIKmPpH9KmoInM7lB0r8i\nGcpakm6XNEXS+EqzTknHSrpC0oOSnpa0X5RL0mmSZkY/dis8S8nWcx9JN8Q9npZ0apSfDCwvaZqk\nMZLOwyPSG6K93Sr0o2Jf4x2dL8+ydmqhvxOAKyQtJ+nS6ONUSVsV+naLpLuBu9rm202SJElaS5dc\nEpcnCfkVblTxupoynJ0NXGZml0n6AW4SMjLO9cMjndfHJVbXqbl5yPuFdm4ws4viXifiTlpny/Nz\nnxkuVbsBYyMSfAEZFm4KUs6GuKNXL2CqpL/gy8zV5FJFBuNysI+AJyWdbWZHSzqoFKAml14tZWal\nHwN9KrRTq68l84+5cl12Ueb1U8L8Iwb5O+QZz6Ag/yq/mQpR4mussUb56SRJkqSNqGuGLWndmNGW\nZoQbyjNptRdb4xHar0Mz049N8IArcNOOzQvX3GRm88zsMeBTUVbNPGRgzD4b8f3kUhaxa3C3MHCd\n8zVqLsOaBlyA/zioxM1m9kH0+x5gOAW5lJm9CpTkUuXcZWazIrr7MeBzFeo0Al+VdIqkLcxsVvFk\nHX1tyfzjynhPTwB1m3+YWYOZNfTtm7KuJEmS9qLeJfGLgJ/jwVmY2Qx8QOtKFOVUqlrLGY17bm8A\nHEdTPvJbgK/HTHwocDcFGVbh7wuVGsWDtWp9rrf/lYw9MLOn8NluIy5FO6asSkt9TfOPJEmSbkq9\nA/YKZlYu8WkxSGkRuBvYVdIq0Mz04wGafijsjkuRalHNPOQTwCtyP+v55h9mNhvPRf57PJnK3FYa\nauwYe8Gr4Kk+J9FKuZSk4/GlcyQdBsyJflaVohX6n+YfSZIkiyn17mG/Ls9/bTA/UvmV2pcsPGb2\nqKSTgPskzQWm4nm0DwYulXQknvd73xbaqWYe8v+Ah6ONh2mufb4GuBYfcEu0aKgRzMCXwlcFTjCz\nl+VpTCvJpfpX6fMx8oQuAIfhS/8zJD2Ca7CrSdHq7quknmWf0/wj6dZkzvdkSaDeAfvHeDDT+pJe\nwi0gF8WWskXM7DI8IrpY9k8qBHuZ2T5ln3sXjk8GTi47fx5u5AE0RaTjA+oMM1NEcd+ND76vAfua\n2QuSRgOrSnoA+DQ+AF8XTa2Cm2m8jxttAPwvPvjPxXOL3xrBYvcBa0ada4ETJB2Obz+MxgPYPgNs\nhbt63QV8w8w2jD7vh38H5TqqdXC70rnAU8D/RfkIPOVoybRkBDBN0mRc0309vm9uwJvAM4XrPozo\n8gnAT0iSJEk6nBaXxCX1ABrMbBvcXWp9M9s8Bs9uTyEifWszGwQcGqdKEekb4slKzipcVopI356m\nHwNr48FrG0c7p0b5DWY2LMoexyPSZ+EuXFtGne2JiPTSDcz9sl8GtjKzrXBnsW+VlsdpbkxSepZV\n41m2MbMh+GBeHGDfMLMhZnZ1fF4mAsZOb+F5S9HlOVgnSZJ0Ei0O2GY2DzecwMzeC2OLxYm2ikh/\nFThpUSLSa3Uy9tfvBrYP2dXSZtZYVu1LuFRrQkSJ703zaPPyexQ/13re8ujy+UjaX9JkSZPnvj+r\nUpUkSZKkDah3Sfzvko7A/wdfNKOoKPVZAmhtRPpIM5sut7QcEeW3AL8pi0hviYvxPfgnqJxLXLgE\n63tVrk/zjyRJkm5KvVHiu+H72OOAKfG3uOSg7DIR6RXabGYGYmYPA6sD36dyLvGHgM0krR196FVI\nftISrX3eJEmSpAOpa4ZtZmu2XKt70k4R6XOBl4Cv0/qI9CIXArdLejn2scH3sgeb2VsV+vBazOKv\nklTy2P4VHnzWEq163kqk+UeSJEn7UZf5h6S9KpWb2eVt3qPFgJBs3WZmA9uh7duAM8xsofN6S+pZ\nnNGXf65xXZp/JEmStDFqY/OPYYW/LYBjgR0WundthFpvEHKepIfkBiEjJF0i6fGQapXanC3pDEmP\nxvV9o3wBw5Ao/5SkG6N8uqRN8cjxteSmHafFve6VdJ2kJ+RGHorrh0q6T27WMVZSvyg/RNJj8RxX\nS1pJ0r/wYLDT5QYdC3hnS9pD0sS49wUlzXU81+mSpgObSHpenuL0EXxLYHC8mxnxPP8T190r6cyQ\nfx1afr8kSZKkgzCzVv8BKwG3L8y1bfWHR1s/Bawan1eOf28F9o7jH+AR3eDBX1fjgVk7Au/gntg9\n8D35wVHPgN3j+BjgD3G8SuHeJwIHx/E1wGFx3BPXQPcHZhbqjwBm4fKoHsCD+MC7NL533Dfq7QZc\nEscvA8uW3nfh2TaL4964EUjxnXwh6iwdn88F9io813cKdZ/HNeSlzzOALeP4eNwEBeBe4Nx6vpOh\nQ4dakiRJ0jqAyVbH/2MX1q3rPZqSfnQWteRY347jK2jSQwPcamYWEqtXLWRRkh7FB9lpwDya5E5X\nAjfE8UC5s9dK+GA5ttCPvaIPc4FZpdlpGRPN7MW437S439vAQODOmHD3pCmD3AxgjKSbgJuibALw\nf5LG4PruF8vu8RU84nxStLc88J84NxdPjlLkmuhPH/xHwX1Rfhm+t96sXiWUbl1JkiQdQl0DtqRb\naTKy6IFrfa+tfkWXpSTHmkdzadY8qr+L0nOPprI8q7X3hiZzDwGPmtkmFepvh+cd/xbwS0kbmNnJ\ncsvOb+Ja66+ZO2uVEJ785OcV2vvQFtynblNZV0NDQ8q6kiRJ2ol697B/B5wef78FvmxmR7Vbr+qj\nreRY5fQAdonj7wP3x3FFeRaeMvTA6EPPmK02k2PV4Emgr6RN4vqlJX1Rnl1udTO7BzgKX2bvLWkt\nM2s0s1NwSdj6Ze3dBewi6ZPR3sqSKtl0NsM889pbkraIoj3x1KlJkiRJF6HeJfFvlg/Qkk7pzEHb\n2kiOVYH3gOFy84z/0JSNbAF5VkSD3wt8Q9IP8YF1opntImmC3D/8b0BFRwwz+1hupHJWDPRLAWfi\ne/NXRpmAs8zsbUknSNoKXxF4NNoutvdY9PuOGPTn4Pr5etLI7g2cH8F0z7IQsq4kSZKk/ahX1vWI\neW7qYtkMCyOKxQlJs61gHtJC3RHAEWa2fXzeB8+7flD79XDRUZk8q/xzvdeVs2y/dazf3me2VTeX\nONJxKkmWTNQWsi5JB0aA1noh9yn9PYcHRXU55Nm9/hISq5mSdovy5yX9NuROkyUNCRnVM5JGRR0B\ny8R1jYVrFfKsZuW4fGuLaPPwKPuMpNslPS3p1EK/Zks6Kfr1kKRPRXlfuUxsUvxtFuVbRrvTShIu\nSf0kjYuymYUl7OLzV5OJNZNnyWVu58tduE6N5fOb4vt9SFLJFexYSVdImoAH8SVJkiSdQEtL4n/C\nl11/CxxdKH/Xum4e8a8DL5vZdjA/ArrEC2Y2WNIZeBDZZsBywEzgfDy6/L5oY1U82nocsCkwGBhU\nVn40C86wBwMb4UFmT0o628z+BfQCHjKzX8ZAvh8uD/s9ngjlfrlmfCwuzzoC+LGZTZDUG/gQj8Ye\na2YnyfXVKxQfPPbXzwZ2NM96thtwEi5vg3DnirqjaXLhmivpbGCqmY2UtDXuvT04rhsAbG5mH5S/\nbBWixHuu2LfmF5MkSZIsPDUH7AhGmgV8DyCCmZbDA6B6m9kL7d/FVtOIJxY5Bc82Vgw6u6VQp7e5\n89i7kj6StBKujb4qoqlflXQfniymWvk7Fe5/V7w3JD2Gu2X9C/gYuC3qTAG+GsfbAAN8cg/AijFA\nLyDhkjQJuCQG5pvMbFrZvdejukwMFpRnFV24Ngd2BjCzuyWtImnF0nurNFhH3TT/SJIk6QDqihKX\n9C1JTwPP4TPQ5ykLeOoqmNlTwBB8UD5R0jGF0wsj62otleRbAHOsKWCgWN4D+JKZDY6/1cxstpmd\nDPwvrqWeIGl9MxuHS71eAkZrwZSxJZlYqa0NzGzbwvk2d+tKkiRJOoZ6B6kTca/lv5vZRhGpvEf7\ndWvhkfQZ4E0zu1LS2/igVy/jgQMkXQasjA+OR+LvqVL5atQn36rFHXhk+2nR/8FmNk0h4QIaJQ0D\n1pf0AfCimV0kN/cYgi9dl5gvEzOzB2Mmvq6ZPVrns+8OnCAPpnvdzN4pzPxbJM0/kiRJ2o96B+w5\nZvaGpB6SepjZPZK6ajjwBsBpkubhsqYDW3HtjXimtOl4wpSfmdm/JVUrfwOYK8/PPRpYwEGrDg4B\nzpE0A/8+xgMHAIdpQQnXd4EjJc0BZhMZ1krUkIk1G7AlLVXh87H4cvsM4H1c5kVZPeHKgnmVHqTx\npVn0P7qigi1JklaSqoGknHoH7LdjX3U8ni7zP3TRZVIzG0tT2tBief/C8Wh8gF3gHD5zPrLsWqtS\nPgdPTVqk2O72hePe8jSjq+NxAL+P8tclbQdcgO9nXyFpKLAhvrz+OvATM/tI0jLAB8AyuEb81Qqv\n4GngH/he9jyaUpOOBo6J77En8Gt8Nr0XsL6ZrRuBdGvjqVe3xpUAo4GxETU+FM+yVo+uO0mSJGlD\n6s10tiM+6zoMuB14Bk+ZmbSOH5jZUKABOESRpQ2PIH/YzAbhSVnOBnaJupfgkd7gwWfDot7jwA8r\n3OOXwN1mNhzYCl9t6BXnhkS7WxY+HxqD9VA8WcrG+PbHfpI2inrr4AYgXzSzHKyTJEk6gbpm2Gb2\nnjzF5Tpmdpk8G1bP9u3aYskhknaK49XxgfANmhtz1Ir0rmZAUmRbYAdJR8Tn5YCSK8edZXK8iWb2\nXBxvDtxoZu8BSLoBt1K9BfinmT1U6YFS1pUkSdIx1Gv+sR/+P+WVgbXwYKvzcXeopA4ikGsbYBMz\ne1/SvfhgCs2NOWoZgoymZQMSATub2ZNl99+YdogST1lXkiRJx1DvkviP8SQj7wCY2dPAJ9urU4sp\nfYC3YrBeH192rkRFQ5A4V82ApMhY4OAIEKOwrN0S44GRklaIJfSdaL1xSpIkSdJO1Bt09lFEIAPz\no4pzNgXI05q+b2aXt1D1dmCUpMfxQXmBJWZJvzCz39SI9F7AgKTCfU6I+jPkBiDPAdtXqNcMM3sk\nsp9NjKKLzWyq3OCkLlLWlSRJ0n7Ua/5xKvA2LiM6GPgR8JiZ/bJ9u9e1UZ2mGa1or27jkcI1PSv4\nXLd0TbuYfzQ0NNjkyZNb05UkSZIlHrWF+UeBo/FZXSOuEf4r8KuF717XQFJ/SU9IGiPpcUnXRUBd\na0w0ji0FeMW5M+TmIo9LGibpBrkRyImF++4haaLcxOMCuY/2ycDyUTamWr0ony3p9NB/b1L2TGvJ\nzUemSBofy+9oQbOPZqYekpaTdKnc3GRqaMCRtI+kWyTdjfttJ0mSJJ1AS25dawCY2Twzu8jMdjWz\nXeJ4cVkSXw+XLH0B36P/kZpMNCpJqyBMNMzs9ArtfRy/lM4Hbsb3/wcC+8jzc38B99jezMwG4xHi\nu5vZ0cAHkVJ092r14h7zZWBmdn/Z/S8EDo5+HwGcWzhXMvv4SXweAGxjZt+LfpqZbYDnjr9MUiko\nrlwOliRJknQwLe1h34T/zxpJ15vZzu3fpQ7nX2Y2IY6vxDOP3U7rTDSKFA1GHjWzVwAkPYtLuTbH\nE5BMiraXpym5SZGv1KhXlIHNJ5KibApcq6aUossWqhTNPqC5qcfm+I8UzOwJSf8E1o1z5XKw4j3n\ny7rWWGONSlWSJEmSNqClAbuYSPrz7dmRTqR8pcCoLa2C2nKolgxGBFxmZj9voV+16n1olfetewBv\nx4y8Eu0q62poaFhcVl2SJEm6HC3tYVuV48WJNUoSKuD7wP3UllYtKncBu8itSpG0ciSlAZgTy/Et\n1auImb0DPCdp17hGkgbV2a+S+QeS1sWTrTxZ84okSZKkw2hpwB4k6R1J7wIbxvE7kt6VVMkLujvy\nJPDjkFv9D3CemX0M7AKcEoFd0/ClZvBZ8i6LcL/3cSOPO+RGG3cC/eLchbgca4yZPYYH9lWqV4vd\ngR9Gvx/F08rWw7lAD0mN+JL/Pmb2UQvXJEmSJB1EXbKuxZXQGN9mZgPb85qy60cARxSNQeq8rtXy\nrRpttYusa9l+61i/vdvfxC1djJIkWZxoa1lX0sTJwFohtSp5WB8paZKkGZKOi7Jh8Xk5Sb0kPSpp\nYFy/RVx/eMim/lBqXNJtMagvIN9SFalZEUl9JV0f/ZkkabMoL5dxNZNrxfL5aZJmhrRrt7huRMjD\nbgEea88XmyRJklSn3kxniyVm9jweDd4ajgYGlgK7JG2Lm3gMxwPFbpH0ZTMbF4PciXiE95VmNlPS\n0RRm2PKc4NUoybd+Gnvb9wE7mtlrMaCeBPyg7JrfA2eY2f0hyxsLfCHODQA2N7MP4r5DgA3N7E1J\nOwODgUHAqnh0+ri4bkg883OUoTT/SJIk6RCW6AG7jdg2/qbG5974AD4OOB6YBHyIy8VaS70uXkW2\nAQYUZF0rhtwLmsu4oLlca3Pgqlh2f1XSfcAwXJs+sdJgDWn+kSRJ0lHkgL3oCPitmV1Q4dwq+AC+\nNO7MVUke9V+ab00sVziu18WrSA/gS2b2YbNO+gDe5rKuJEmSpGPIAbv1vEtz042xwAkR2T1b0mrA\nHDP7D3ABbtixJnAKcFDx+tirPtAPtSPu4DW8yn3nS83M7MFYIl/XzB4tq3cHnu+9tL8+2Mym1fFc\n44EDJF2G26h+GTgSWL+Oa4E0/0iSJGlPcsBuJWb2hqQJkmYCfzOzIyON6IMxi50N7Cnp6/jA/Sd5\nDvAHJG2ND4xzI5BsPPAWMAsf0B8HHqly349V3cWryCHAOSEFWwpfmh9VrCB3Wyv/fCOel3w6rrn/\nmZn9W5GLvFSvLc1OkiRJkvrJAXshMLPvlxXdjDuYPYynE52DD36fk/QocJ2ZbQwQA/lncD32vGhv\n9wgCazCzneRGHauWnLvU5OL1aqkL8fdEhe59Dvh09OEV4LgoHwFMk5uWXAVsAHwoNwOZgAfHrRN9\neh//8VC67q2ILn8BzzOeJEmSdDA5YLcd6wB7m9lDAJJ+GdHXPXHZ1IbAU8BFwNbAP6idk7wS3wfG\nmtlJ0e4KxZNqMi2pFkm+TEnrJ/e+LpmBzJV0NjDVzEbGSsDleNQ4FKLLW9nfJEmSpI3IAbvt+Gdp\nsA6+E5KnpfAMZQPwgLDnzOxpAElXEpKoOpkEXBID800V9qZbiiQv/4FQNAPZHNgZwMzuljuLrRjn\nyqPL56M0/0iSJOkQMnFK2zE/klrSmri15VfMbEPgLzSP/m6J+ZHjknoAywCY2Tg8GOwlYLSkvcqu\nK0WSD46/Dcxs20p9rPK5GjXNP8JqtKFv39RhJ0mStBc5YLcPK+KD3CxJnwK+EeVPAP0lrRWfq+0H\nP4/vhQPsgMvCkJt/vGpmFwEXE9anBRbFtKRo/jECeD3MRJIkSZIuwGI5YEtaP1J/Ti0Mjgvb1mBJ\n36yjai9JtwGY2XQ8kcoTwJ/woC5CG/02vqf9CC7jqrQtcRGwZSklKU0z3BHAdElTgd3wrGbzacG0\npPhMx7OgkcixwNCILj8Z2LuOZ06SJEk6iMXS/CPSfy5lZieWlQt/5nmtaGsfPHr7oBbqjaAOUw9J\n90a9yfX2obvQ0NBgkycvdo+VJEnSrqgzzT8k9Zf0RMiTnpI0RtI2oV9+WtLwqNdL0iWSJsZseMfC\n9eMlPRJ/m0b5CEn3Srou2h+jQg7OqPNN4DDgQEn3RFtPSrocmAmsLuk8SZPlhhzHFa4dJukBSdOj\nT33w9KK7xYx9N0nDJT0Y/X1A0notvIvlJV0t6XFJN+J5xUvnnpe0ahu8r30k3SDp9qh/apT3jDZL\nhh6HR/louaYbSV+Jthqj7WULfTsu3n+jCnrsJEmSpBMwszb/A/rjgVMb4D8KpgCX4EFRO+IRzgC/\nAfaI45Vw2VMvXK60XJSvA0yO4xF4kpHPRrsP4nKj8vsfi89iS32Zh6frLJ1fOf7tCdwLbIgHdj0L\nDItzK+LL1fsAfyhcuyI+ewfP2319oW+3VejLT4BL4njDeC8N8fl53GhjUd/XPtH3Pnhw2z+B1fF9\n8DsLfVkp/h2NL50vB/wLz5gGLuU6rNC3g+P4R8DFLX3vQ4cOtSRJkqR1lMa4lv7acw/7OTNrNF9+\nfhS4KzrWiA9Q4KYZR0uahg+cywFr4EFWF0lqBK7FJVElJprZi9HutEJbtagkuXoE32f+YrS/HvCK\nmU0CMLN3rHJWrz7AtfJMZ2fE9bX4MnBltDkDmFGl3qK8L6L+LPN98sfwBCrPAp+XdLY8YUt5ENl6\ncd+n4vNl0d8SN8S/U6jyniXtH6sVk1977bXqbyFJkiRZJNpTh/1R4Xhe4fO8wn0F7GxmTxYvlHQs\nntVrED7jLBpZFNudS33PUElyNczM3pInEGmN5OoE4B7zjGT98YGzLViU97UxFd5LPN8g4Gt4etLv\nsKAdZz19qvqereDW1dDQsPgFRCRJknQROjtKfCxwcGkfWtJGUd4Hn+3OA/bEl67bimqSqyeBfpKG\nRV8+Ic+xXW720QfXQYMvRbfEODxDGZIG4sviC0u191URSasCPczseuBXVJaB9Ze0dnzeE/fcTpIk\nSboYnT1gn4Avf8+Q59w+IcrPBfaWS5PWpw3tHa265OpjXCp1dtz3TnzmfQ/uLz1NnurzVOC3cmlV\nPbP784Dekh7HA9imLEL3q72vaqwG3BtL6FcCPy+c6wt8G9gXX+JvxGfz5y9C/5IkSZJ2YrGUdSUt\noxoyNC2kK9ey/daxfnuf2Rbdq8nzaeGZJMlihDpT1tUVaQPp1EJLzaLeIZIekzQjZF494r5943wP\nSf+Q1Df6eJ6khyQ9G/e4JKRhowttzpZ0mlye9ne55OzeuGaHqNMz6kyKex8Ql58MbBErB4eHNOwW\nSXfjiV0ulzSycK8xpXeRJEmSdDxLzIAdrA2cji+zr4/vLW+OB6H9Iur8ErjbzIYDWwGnSeoF/Af4\nqpkNwZfOzyq0uxGu/R4AfB7YrMK9jwY2Ms8tPir2568k0oHiErHpZlYKtf4fPMvZ4cAtNEWkbyCp\n5KLVK/r6RXyv/UTgq8BO+PI7wA+BWWY2DBgG7BeBd0cD481zjp8RdYcAu5jZlsAfiT16uR59Uzwn\nejOKUeJz359V4bGTJEmStmBJG7A7U2o2AxgjaQ9ccw2utS4ZePwAuLRQ/9ZC314t63ep/Y+B2+O4\nEbjPzOZUeJ694nkeBlbBte2VuNPM3gQws/uAdWIF4Hu43nyBZXIrmH/0XKFPlWaTJEmSRWVJs9fs\nTKnZdrjG+VvALyVtYGb/kvSq3H96OE2z7WKbxX6W93WONQUhzK9nZvMiwr30PAeb2diy5xlRoY/l\nwX2XA3sA38WD05IkSZJOYkkbsOuhJJ062MxM0kZmNhWXc70Yg+HetEJqJrfIXN3M7pF0Pz4A9saN\nQC7Gl8avsCZv6rZkLJ6m9Tv4dsB/cVlauVwNYE1JA8zssfg8GpgI/LtQVpUNVuvD5AwIS5IkaRdy\nwF6QE4AzcelUD+A5YHtcana93IP6dlonNesJXBl7wQLOMrO349wt+FL4pdUuXkQuxpfHvwX8GXgN\nGIkv0c8NCdto4C18/30AnikNM3s15Gg31XOjxpdm0f/oBba5k6TdSeVAsiTQJfawOyKCGx9kp5ci\nuM1sHzO7LrqwFPDvGLwm4FrrDYG/4olFGoEhETB2KLAxcLukJ4D98MEQedKVIcDh0cdPyLOh3Y3n\nR58D7G9mJ0ek+Hb4Evt0fN98F0k98UH1KLnV5dfMbGDpXZnZPsDkuPfNETl+HXCqmf1OYeYBPCfp\nEmBpM/sF8Aawj5lthc+wj8X3sz/A9ehPAZ/Cg+ymSVpL0k+BrYGDJF29aN9ykiRJsih0iQE76MwI\n7jHAOWY2CI+GfgVPKjIYH1C3iXuVPKQXaFPSMsA1wKHRzjb4YFitb9fgkdzXA/8P+AoehV0tqruc\n9YBzzewLeI7wH0laDp8t72ZmG+A/RA6scG0v4KHo5zhgPzN7AJ/tH2lmg4E1gVOAn8cPhlEV2kmS\nJEk6iK40YHdKBLekTwCrmdmNAGb2oZm9j/9YuMrM5prZq3jKzmE12qxmHlKtb3/Ds42ti++PjzOz\nD6g/qvtfZjYhjq+M/rZk5lHiY+C2OK5o7GFmfwf+DmxSFtnejJR1JUmSdAxdaQ+7K5mFtKavLbV5\neKW+mdmHku7FjTl2A0pLzhWjuitQnqKuNSnritHltfpfKbK92cBdNP9Ytt86mTYvSZKknehKA3Y9\ntHkEt5m9K+lFSSPN7CZJy8b144EDJF0GrIwPXEfiy/WVmG8eYmaTYub+QQt9uwb4X6CBJiORUlT3\n3WY2R9K6wEtmVh7ktoakTczsQXz74H4KZh5m9g9ab+YxP3K8hcj2imSUeJIkSfvRlZbE66G9zEL2\nBA6JIK8HgE8DN+KR1NPxoLGfmdm/qzVQwzykVt/uALYE/h7Xg0d1PwY8IvfcvoDKP6yeBH4cUdz/\nA5wXXth1m3lIaiDsNkOXPRM4MoLW1sEj2xtxs5RiZHuSJEnSwaT5RzckIs9vK0aPt0GbxwKzzex3\nC9tGmn8kSZK0HqX5R2U6QkKm2iYga8uNOqbH9WvJOU3STEmNchvPltpcXtID0c58CVmVvpUkZKU+\njA4J2QhJt8UPgFG4HG2apC0kPSdp6ai/YvFzkiRJ0vF0tz3stmJtYFd8OXgSTRKyHXAJ2UiaJGQ/\nkLQSMFHS32mSaX0oaR3gKnwPGlzu9UXgZVzPvRm+t1xkDHCymd0YMqweNJeQrQpMkjSuRpsT8eC0\nQ2O/fEWaS8jK+3YN8B3gLyE/+wou99oYwMyel3Q+hRl2BMRthydN+S5wQ+Qpb4ak/YH9AXqu2LeO\nV58kSZIsDEvcDDtYEiVkW0VA3TdokpDV4mKa8ofvS5VMbGn+kSRJ0jEsqTPslJC1gJlNiCX2EUBP\nM7kEDAIAABRJSURBVJu5UL1PkiRJ2oQldcCuh+4mIdsV+AzwZCskZEXeBVYsK7scT1t6woLVFyRl\nXUmSJO3HkrokXg/dTUK2L7703RoJWZFbgZ1KQWdRNgaXjF3VymdMkiRJ2phuNcOOaObbgYfwnN+T\n8L3V44BPArub2UR5fvGzgYH4oHusmd0c118BfCzpEeAgM9unFI0NvA4sJWkMsIeZHVB2/7WB8/Cs\nYnPxDF9HSzoN3xs2SbuZ2TXAdcBxknYCNsCdshpx85DlgZFm9oyk0fjSdQOwDPATMytFbp8A9Cr0\n9aDox1G4T/U8fJCejOcQPxkPPtsEeBw4Pa5dGtjVzFYuRb8X30083tLRj2WAcyTtjHt0vwHcJzcl\nOSGeLUmSJOlozKzb/OEBV//FB8AeeB7sS/D95h2Bm6Leb/ABF2Al3ImqF+6YtVyUrwNMjuMRwCzg\ns9Hug8DmFe7/MLBTHC8X7e2Mz3B74m5XLwD9os2343hZ3CHruLj2UODMOB6N/wjpEX16sdB2pb5+\nA5+ZrxCfV45/7wUaCn19Hk9xCvAj4OIW3s3Z+A8e8EH7XNwE5ZpCm31qfT9Dhw61JEmSpHWU/v/e\n0l93XBJ/zrpPhPckM3vFzD4CnsGXpSnrK8CfzWyemT0NPIsvZ1fr6zbApXFvzOzNGu/qhvi3aPBR\n7d08CPwiZu+fM7Mf4UvoG0s6RdIWZraAu4cK5h+vvfZaja4kSZIki0K3WhIPumuEd7W+QmUjj4oR\n3wt5/+LzVHw3wOOSHsa113+VdICZ3S1pCPBN4ERJd5nZ8c06WjD/aGhoyLR5SZIk7UR3nGHXQynC\nWwCSNoryPrh+eR4e/NWqCG/gRUkjo81lJa2AR3jvJqmnpL54hPfEVvZ3V0k9JK2F+2s/WaOvdwL7\nxr2RtHKUzzfuaIGK70bS54Fnzews4GZgQ0mfAd43syuB04AhrXyuJEmSpI1YXAfsLhvhXYUX8EH+\nb8AocxOPBfoq6TBgHHALMFnSXOCIaGM0cH5EeS9f417V3s13gJmxVD4Ql3RtgGd4mwb8Gjixlc+V\nJEmStBFp/tHJRJT4bWZ2XR11n8cDy16Pz7PNrHf79rB+GhoabPLkyZ3djSRJkm6FlhTzD3W+mcch\nkh6TNEPS1VF2rKTLot1/Svq2pFPlxh63q8lU4yvAt3D51SWRSAVJX4k+NpbKJR2CJ0a5R9I9hfuf\nJDcAeUjSp6JstKSz5OYgz0rapVD//7d371F2VuUdx78/kiZcAgmXYGO4BEMgIpckhECozeJWFgqF\nwIJlWyxELiourEBB7OrSCpWKxipCBEpBg0IFApRi1HBrQKFAE8idcKvhmlSsmHBTbnn6x94n82Yy\nZ+bMuc6Z+X3WOmvOvOfd7977zMzZ877vfvZzgaQFub0XFd6bn+bjLFdH8pFLC32rOouXmZnVru0H\n7Gx34J9Jl47H05HM43xSMg/oSOYxBTgUmKkUr11KmDGJtBjJ5YXjTgTOIc3Q/hAp8UZnXwImRsS+\npIxXJWOBw0gJRW4A5kfEPqQ46aOVEn/MBqZGxK6kSWFnFbZ/Iu8/GDgr31teDRwaEYfmOrYCHomI\n/UiXys8s1D8qvwfHkOKzkXQkKURsCinZyP6SpgFHAasjYr9IKTvnSdoeOB74SO6bL4ebmbVQfxmw\nWxLqlS0FbpT0SVKMeMnPI2W3WkaaMDYvby+1ac/c7qfz9utJE9bKbe/KO8Dc/LwYugUpJn19RDxB\nig8vvQdHAouAx0n/3IzLbfqzTuFb60gz06+TdALwVlcNcFiXmVlz9JcBuzehXhPyY5eIWMnG4VOl\n1ca6Om65UK+jge+RZlAvkFTa522APNi/Gx2TBTqHdNWieNzO7Su2XYWvXy+8B7tHxHX5n4NJpIH7\na5K+Ein71xTSim3H0PEPx0aikK1r5Ein1zQza5T+MmBXou6hXpI2A3aOiPnAhflYlU4CewoYo7Tc\nKbnuB7rZDpWHbpVzF3CapGG5/aMl7dhV+FbeZ3hE/Iz0T81+NdRrZmY1aseFU6r1j8BlpHCmzYBV\npDPHK4HbJJ1COovsTajXIOAGScNJZ6+XR8TaLuambSJSystPAXPyWfkC4OqIeLur7bnYNaT7y6sL\n97ErFhF3S/ow8HBu4xukNcl3J93TXw+8C5xF+sfgP/I9dQHn9bY+MzOrH4d1DWCSBudL33XhsC4z\ns94bMGFdfVUfCDc7M4dvLZF0mzpWRpst6WqlZUi/2dv6zcysNTxgN1Yrw81uj4gDcsjXSuD0wms7\nAQdHxHlV1r+BZ4mbmTXHQLqH3QqrImIZgNIyoPdFROQQsjF5nyOBYyWVlhgthZutBmZJmkCaAb5H\n4bj/HREv5eOWws0e7FT33pK+RkqhOYw04axkTkS8X0P9Gzj5h5lZc3jAbqxWZhabDUyPiCWSZpDy\nc5cUJ9ZVU7+ZmTWZL4m3XsXhZjn86qIKj7s1sEZpGdSTO78oabqkvXpTf1W9MzOzuvCA3XoVZxaL\niNWkrFmV+DLwKPAQ8GQXr08n3QOvuP7CojBmZtZkA2rAbubMbeD5vC43ABExAzhb0neBO3K5KRHx\ne1KM8wLSperRko6LiGeAbwPPAwcAj0kaA8yKiGMkzZB0B2kp069KOlvSebm9jwA3R8RuwF+Slh/d\nR9IvSeuKryatcT4TeBj4JmkAfxEYlfcblNcQX0Q6216R9zMzsxYYiGdMuwMnAaeRBsnSzO1jSTO3\np9Mxc/o0SSNIOaHvpWPm9B8kjQN+TFrOFNLM7Y+QBsOHSDO3O08EA9gyIiYoJd34Pin3dLn6IC0Z\num9EvJoH7KK9c72bA88CF0bEREnfAU4hLRRzDSnH9jOSDgSujIjDJN1JIa2npPs670dKXgIds8rf\nx8zMWmIgDtitnLkNaZAnIn4haZs8QJerD+CeiHi1TF/mR8TrwOuS1gE/yduXAfsqLS96MGnVtFKZ\noZ0PUsF+c8oN1pI+DXwaYJdddulqFzMzq4OBOGC3cuY2QOfQp+imvgPpfqnUnvqyGbA2IiZ0cwwq\n2K9sGxzWZWbWHAPqHnYvNHLm9CfyMT8KrMupLMvVV5OIeA1YJemkfFxJKiXx2JBIpIf9zMysD/CA\n3bWKZ05Xcew/SFpESuhRWn2sXH0A43M4V7XuBc7MbV4BHJe33wRckCepjSWFfp3exX5mZtYHOPlH\nE0m6Hzg/IirOkFFNmU7lnwMmR8T/9aJMVUlBho4aF6NOvay3xawfe+7So1vdBLM+T07+UR/1DAUD\n9gf2zdt7TOIh6UTSLPQbJS2WtIWk/SU9IOkxSXdJGiVpsFKij0Nyua9LukTS3wAfBOZLmp9fe6N4\nfEmz8/OKkoKYmVlreMCuTL2SeEwCPlc4brdJPHLI1ULg5Dwh7D3gCuDEiNifFBZ2ST4bngFcJekI\n4Cjgooi4nDSz/dAK82dXkhTEzMxaYCDOEq9Gq0PBSvYkxV7fk0/GBwFrACJihaQfAXOBqRHxThX9\nrCQpyMpigWJY16BtRlZRpZmZVcIDdmVaHQq24XDAioiYWub1fYC1wI7dHKM4aWHzTq/1mBRkk4MV\nwrqGjhrnCRFmZg3iAbt+SqFZn89n3xMjorSs50sRsV7SqfQ+FGxD+BXwFDBS0tSIeFgpscce+ez6\nBGA7YBowV2nZ07WF8qVJZ7+W9OF8rOPz673pT1n7jB7OQk8yMjNrCN/Drp9GhYLNBq7Ol8wHAScC\n38jHWwwcLGkH0hrhZ0TE08As4Lu5/DXAPEnz84zxS0iXzdeRL6f3sj9mZtYCDutqc70JwaomxKs3\nJk+eHAsXVhV9ZmY2YDmsq4HqGeqlHrJ+dQ71yvvdL+kySQuBL0j6c0mP5jrulfSBvN/2ku6WtELS\ntaT70qVjvFGoc25h+yxJM/LzSyU9IWmppG817A01M7Me+R529Vqd9WtI6T8ySdsCB+V7zWcAXwT+\nlpQ7+8GIuFjS0XSsrNYjSduT7nGPz8cdUWY/J/8wM2sCD9jVa3Wo182F5zsBN0saBQwBVuXt04AT\nACLip5J+14v+rSPNaL8un4HP7WonJ/8wM2sOXxKvXm9CvSbkxy4RsRI4l45Qr8mkQbar43YX6lWc\nvHYFMCsi9gE+w6bhWt15j41/DzYHyPfFpwC3AscA83pxTDMzqzMP2I3VyKxfRcOBl/PzUwvbf0G6\nVI+kjwHbdlH2eWAvSUPzZe/D8/7DgOER8TPSPxjO3mVm1kIesBurkVm/ir4KzJH0GB3x1gAXAdNy\n3ScAL3QuGBEvArcAy/PXUqz11qR47qWkS/Ln1dhGMzOrgcO6rG4c1mVm1nsO6zIzM+tHPGCbmZm1\nAQ/YZmZmbcADtpmZWRvwgG1mZtYGPGCbmZm1AQ/YZmZmbcBx2FY3kl4Hnmp1O+psBzZejKY/cJ/a\nQ3/sE/TPftXap10jYmRPOzn5h9XTU5UE/7cTSQvdp77PfWof/bFfzeqTL4mbmZm1AQ/YZmZmbcAD\nttXTNa1uQAO4T+3BfWof/bFfTemTJ52ZmZm1AZ9hm5mZtQEP2NYjSUdJekrSs5K+1MXrknR5fn2p\npEmVlm2VavskaWdJ8yU9IWmFpC80v/Xl1fKzyq8PkrRI0tzmtbp7Nf7+jZB0q6QnJa2UNLW5re9a\njX06N//uLZf0Y0mbN7f1XaugT+MlPSzpbUnn96Zsq1Tbp4Z9TkSEH36UfQCDgP8BPgQMAZYAe3Xa\n5+PAzwEBBwGPVlq2Dfs0CpiUn28NPN0X+lRrvwqvnwf8GzC31f2pR5+A64Ez8vMhwIh27hMwGlgF\nbJG/vwWY0SZ92hE4ALgEOL83ZduwTw35nPAZtvVkCvBsRPwqIt4BbgKO67TPccAPI3kEGCFpVIVl\nW6HqPkXEmoh4HCAiXgdWkj5E+4JaflZI2gk4Gri2mY3uQdV9kjQcmAZcBxAR70TE2mY2voyafk6k\n9TO2kDQY2BJY3ayGd6PHPkXEKxGxAHi3t2VbpOo+NepzwgO29WQ08GLh+5fY9Bev3D6VlG2FWvq0\ngaQxwETg0bq3sDq19usy4IvA+kY1sAq19Gk34DfAD/Jl/mslbdXIxlao6j5FxMvAt4AXgDXAuoi4\nu4FtrVQtf+vt/DnRo3p+TnjANquCpGHAbcA5EfFaq9tTK0nHAK9ExGOtbksdDQYmAVdFxETgTaDP\n3B+thqRtSWd5uwEfBLaS9MnWtsrKqffnhAds68nLwM6F73fK2yrZp5KyrVBLn5D0R6Q/whsj4vYG\ntrO3aunXnwDHSnqOdOnvMEk3NK6pFaulTy8BL0VE6czmVtIA3mq19OkIYFVE/CYi3gVuBw5uYFsr\nVcvfejt/TpTViM8JD9jWkwXAOEm7SRoC/AVwZ6d97gROyTNbDyJdpltTYdlWqLpPkkS6J7oyIr7d\n3Gb3qOp+RcTfRcROETEml/vPiOgLZ2619Ol/gRcl7Zn3Oxx4omktL6+Wv6kXgIMkbZl/Fw8n3R9t\ntVr+1tv5c6JLDfucaOasOz/a80Gasfo0acbk3+dtnwU+m58L+F5+fRkwubuyfeFRbZ+AjwIBLAUW\n58fHW92fevysCsc4hD4yS7wOv38TgIX553UHsG2r+1OHPl0EPAksB34EDG11fyrs0x+Trnq8BqzN\nz7cpV7YvPKrtU6M+J7zSmZmZWRvwJXEzM7M24AHbzMysDXjANjMzawMesM3MzNqAB2wzM7M24AHb\nzHok6X1JiwuPMVUcY4Skz9W/dRuOf2yzMz1Jmi5pr2bWaQOXw7rMrEeS3oiIYTUeYwwpvnvvXpYb\nFBHv11J3I+TkG9eS+nRrq9tj/Z/PsM2sKkq5s2dKWqCUs/kzefswSfdJelzSMkmlDEeXAmPzGfpM\nSYeokHdb0ixJM/Lz5yR9Q9LjwEmSxkqaJ+kxSb+UNL6L9syQNCs/ny3pKkmPSPpVruv7SjmxZxfK\nvCHpO0o5i++TNDJvn5DLLpX073kNbyTdL+kySQuBC4FjgZm5T2MlnZnfjyWSbpO0ZaE9l0v6r9ye\nEwttuDC/T0skXZq39dhfG3gGt7oBZtYWtpC0OD9fFRHHA6eTlsw8QNJQ4CFJd5MyHB0fEa9J2gF4\nRNKdpMQbe0fEBABJh/RQ528jYlLe9z7S6lLPSDoQuBI4rIfy2wJTSYPqnaT10s8AFkiaEBGLga2A\nhRFxrqSvAP8AnA38EPh8RDwg6eK8/Zx83CERMTm3axyFM2xJayPiX/Pzr+X36IpcbhRpBazxuT23\nSvoYKZnHgRHxlqTt8r7XVNFf6+c8YJtZJX5fGmgLjgT2LZwtDgfGkZZn/CdJ00ipOkcDH6iizpth\nQ8ajg4E5aYlmAIZWUP4nERGSlgG/johl+XgrgDGk5SLXl+oBbgBuV8qjPSIiHsjbrwfmdG5XGXvn\ngXoEMAy4q/DaHRGxHnhCUun9OAL4QUS8BRARr9bQX+vnPGCbWbVEOgu9a6ON6bL2SGD/iHhXKQPY\n5l2Uf4+Nb8t13ufN/HUzYG0X/zD05O38dX3heen7cp99lUzqebOb12YD0yNiSX4fDumiPZDeu3Kq\n7a/1c76HbWbVugs4SymNIJL2kLQV6Uz7lTxYHwrsmvd/Hdi6UP55YC9JQyWNIGWe2kSkPMKrJJ2U\n65Gk/erUh82A0hWCvwIejIh1wO8k/Wne/tfAA10VZtM+bQ2sye/JyRXUfw/wqcK97u0a3F9rYx6w\nzaxa15LSVT4uaTnwL6Qz1xuByflS9CmkzFJExG9J97mXS5oZES8Ct5CyTt0CLOqmrpOB0yUtAVaQ\n7vvWw5vAlNz+w4CL8/ZTSZPJlpIyfl1cpvxNwAWSFkkaC3wZeBR4iNzv7kTEPNL97IV5jsD5+aVG\n9dfamMO6zGzAUh3C1cyaxWfYZmZmbcBn2GZmZm3AZ9hmZmZtwAO2mZlZG/CAbWZm1gY8YJuZmbUB\nD9hmZmZtwAO2mZlZG/h/zJ9IT33ZtPkAAAAASUVORK5CYII=\n", 146 | "text/plain": [ 147 | "" 148 | ] 149 | }, 150 | "metadata": {}, 151 | "output_type": "display_data" 152 | } 153 | ], 154 | "source": [ 155 | "import matplotlib.pyplot as plt\n", 156 | "\n", 157 | "gbrt = GradientBoostingClassifier(random_state=0, max_depth=1)\n", 158 | "gbrt.fit(X_train, y_train)\n", 159 | "\n", 160 | "plot_feature_importances_cancer(gbrt)\n", 161 | "plt.show()" 162 | ] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": {}, 167 | "source": [ 168 | "勾配ブースティングではいくつかの特徴量が完全に無視されている。勾配ブースティングとランダムフォレストは同じ様なデータを得意とするので、一般的には頑健なランダムフォレストを先に試す。ランダムフォレストの精度をあげたいときに勾配ブースティングを試す。もっと大きな問題に適応したい場合はxgboostのほうが高速で多くのチューニングが可能。" 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": {}, 174 | "source": [ 175 | "### 勾配ブースティング回帰木の長所, 欠点, パラメータ\n", 176 | "\n", 177 | "- パラメータ\n", 178 | "\n", 179 | "```\n", 180 | "n_estimators: 決定木の数を指定\n", 181 | "learning_rate: 誤りの補正度合いを制御する -> 小さいと、複雑なモデルを作るにはより沢山の決定木が必要になる\n", 182 | "```\n", 183 | "\n", 184 | "- 利点\n", 185 | " - 強い(教師あり学習の中でもトップクラス) \n", 186 | "- 欠点\n", 187 | " - パラメータのチューニングが大切であること\n", 188 | " - 時間がかかる\n", 189 | " " 190 | ] 191 | }, 192 | { 193 | "cell_type": "markdown", 194 | "metadata": {}, 195 | "source": [ 196 | "高次元で疎なデータにはうまく機能しない。" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": null, 202 | "metadata": {}, 203 | "outputs": [], 204 | "source": [] 205 | } 206 | ], 207 | "metadata": { 208 | "kernelspec": { 209 | "display_name": "Python 3", 210 | "language": "python", 211 | "name": "python3" 212 | }, 213 | "language_info": { 214 | "codemirror_mode": { 215 | "name": "ipython", 216 | "version": 3 217 | }, 218 | "file_extension": ".py", 219 | "mimetype": "text/x-python", 220 | "name": "python", 221 | "nbconvert_exporter": "python", 222 | "pygments_lexer": "ipython3", 223 | "version": "3.5.4" 224 | } 225 | }, 226 | "nbformat": 4, 227 | "nbformat_minor": 2 228 | } 229 | -------------------------------------------------------------------------------- /ch02/mytree.dot: -------------------------------------------------------------------------------- 1 | digraph Tree { 2 | node [shape=box, style="filled", color="black"] ; 3 | 0 [label="X[1] <= -5.8141\nsamples = 100\nvalue = [50, 50]", fillcolor="#e5813900"] ; 4 | 1 [label="samples = 25\nvalue = [25, 0]", fillcolor="#e58139ff"] ; 5 | 0 -> 1 [labeldistance=2.5, labelangle=45, headlabel="True"] ; 6 | 2 [label="X[1] <= 5.3475\nsamples = 75\nvalue = [25, 50]", fillcolor="#399de57f"] ; 7 | 0 -> 2 [labeldistance=2.5, labelangle=-45, headlabel="False"] ; 8 | 3 [label="samples = 50\nvalue = [0, 50]", fillcolor="#399de5ff"] ; 9 | 2 -> 3 ; 10 | 4 [label="samples = 25\nvalue = [25, 0]", fillcolor="#e58139ff"] ; 11 | 2 -> 4 ; 12 | } -------------------------------------------------------------------------------- /ch02/naive_bayes.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# ナイーブベイズクラス分類器" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "ナイーブベイズクラス分類器は線形モデルによく似た分類器。 \n", 15 | "線形モデルより高速だが汎化性能が劣る。" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "- 特徴\n", 23 | "\n", 24 | "```\n", 25 | "線形モデルに似たクラス分類器.\n", 26 | "訓練が線形モデルよりも高速.\n", 27 | "速度の代償として,汎化機能ではLogisticRegression, LinearSVCよりも僅かに劣る.\n", 28 | "\n", 29 | "高速で学習出来る理由として,クラスに対する統計値をここの特徴量ごとに集めてパラメータを学習するからである.\n", 30 | "```\n", 31 | "\n", 32 | "- 種類\n", 33 | " - GaussianNB: 任意の連続値\n", 34 | " - BernoulliNB: 二値データ\n", 35 | " - MultinomialNB: カウントデータ" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 2, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "import numpy as np\n", 45 | "\n", 46 | "X = np.array([[0, 1, 0, 1],\n", 47 | " [1, 0, 1, 1],\n", 48 | " [0, 0, 0, 1],\n", 49 | " [1, 0, 1, 0]])\n", 50 | "\n", 51 | "y = np.array([0, 1, 0, 1])" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "ゼロと非ゼロの要素数のカウントのコーディング例" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 3, 64 | "metadata": {}, 65 | "outputs": [ 66 | { 67 | "name": "stdout", 68 | "output_type": "stream", 69 | "text": [ 70 | "label 0\n", 71 | "label 1\n", 72 | "Feature counts:\n", 73 | "{0: array([0, 1, 0, 2]), 1: array([2, 0, 2, 1])}\n" 74 | ] 75 | } 76 | ], 77 | "source": [ 78 | "counts = {}\n", 79 | "for label in np.unique(y):\n", 80 | " print(\"label\", label)\n", 81 | " counts[label] = X[y == label].sum(axis=0)\n", 82 | "print(\"Feature counts:\\n{}\".format(counts))" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "# GaussianNB" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "ユリデータ分類" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 2, 102 | "metadata": { 103 | "collapsed": true 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "import mglearn\n", 108 | "import matplotlib.pyplot as plt\n", 109 | "from sklearn.linear_model import LinearRegression\n", 110 | "from sklearn.model_selection import train_test_split\n", 111 | "%matplotlib inline" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 3, 117 | "metadata": {}, 118 | "outputs": [ 119 | { 120 | "data": { 121 | "text/plain": [ 122 | "1.0" 123 | ] 124 | }, 125 | "execution_count": 3, 126 | "metadata": {}, 127 | "output_type": "execute_result" 128 | } 129 | ], 130 | "source": [ 131 | "from sklearn import datasets\n", 132 | "iris = datasets.load_iris()\n", 133 | "X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=42)\n", 134 | "from sklearn.naive_bayes import GaussianNB\n", 135 | "gnb = GaussianNB().fit(X_train, y_train)\n", 136 | "gnb.score(X_test, y_test)" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 79, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "# from sklearn.naive_bayes import MultinomialNB\n", 146 | "# from sklearn import metrics\n", 147 | "# from sklearn.datasets import fetch_20newsgroups\n", 148 | "# from sklearn.feature_extraction.text import CountVectorizer\n", 149 | "\n", 150 | "# categories = ['alt.atheism', 'talk.religion.misc','comp.graphics', 'sci.space']\n", 151 | "# newsgroups_test = fetch_20newsgroups(subset='test', categories=categories)\n", 152 | "# vectorizer = CountVectorizer()\n", 153 | "# vectors_test = vectorizer.transform(newsgroups_test.data)\n", 154 | "# clf = MultinomialNB(alpha=.01)\n", 155 | "# clf.fit(vectors, newsgroups_train.target)\n", 156 | "# pred = clf.predict(vectors_test)\n", 157 | "# metrics.f1_score(newsgroups_test.target, pred, average='macro')" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 80, 163 | "metadata": {}, 164 | "outputs": [ 165 | { 166 | "data": { 167 | "text/plain": [ 168 | "0.83519815555028587" 169 | ] 170 | }, 171 | "execution_count": 80, 172 | "metadata": {}, 173 | "output_type": "execute_result" 174 | } 175 | ], 176 | "source": [ 177 | "from sklearn.naive_bayes import MultinomialNB\n", 178 | "from sklearn import metrics\n", 179 | "from sklearn.datasets import fetch_20newsgroups\n", 180 | "from sklearn.feature_extraction.text import CountVectorizer\n", 181 | "newsgroups_train = fetch_20newsgroups(subset='train',\n", 182 | " remove=('headers', 'footers', 'quotes'),\n", 183 | " categories=categories)\n", 184 | "vectors = vectorizer.fit_transform(newsgroups_train.data)\n", 185 | "clf = MultinomialNB(alpha=.01)\n", 186 | "clf.fit(vectors, newsgroups_train.target)\n", 187 | "vectors_test = vectorizer.transform(newsgroups_test.data)\n", 188 | "pred = clf.predict(vectors_test)\n", 189 | "metrics.f1_score(newsgroups_test.target, pred, average='macro')" 190 | ] 191 | }, 192 | { 193 | "cell_type": "markdown", 194 | "metadata": {}, 195 | "source": [ 196 | "### ナイーブベイズの利点, 欠点, パラメータ\n", 197 | "\n", 198 | "- パラメータ\n", 199 | "\n", 200 | "```\n", 201 | "alpha: モデルの複雑さを制御する.\n", 202 | "alphaが大きいとスムーズになり,モデルの複雑さが減少する.\n", 203 | "\n", 204 | "GaussianNBは高次元データ,他の二つはテキストのようなカウントデータに対して用いられる.\n", 205 | "MultinomialNBよりBernoulliNBの方が若干性能がいいが,非ゼロ特徴量(大きなドキュメント)にはMultinomialNBが有効\n", 206 | "```\n", 207 | "\n", 208 | " \n", 209 | "ナイーブベイズモデルの利点と欠点の多くは線形モデルと共通する。 \n", 210 | "訓練も予測も非常に高速で、訓練の過程も理解しやすい。 \n", 211 | "高次元の疎なデータに対してうまく機能するし、パラメータの設定に対しても頑健である。 \n", 212 | "線形モデルですら時間のかかるような大規模データに対するベースラインモデルとして非常に有用。" 213 | ] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": {}, 218 | "source": [ 219 | "
" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 95, 225 | "metadata": {}, 226 | "outputs": [ 227 | { 228 | "name": "stdout", 229 | "output_type": "stream", 230 | "text": [ 231 | "(2034, 26879)\n", 232 | " (0, 21225)\t1\n", 233 | " (0, 22504)\t1\n", 234 | " (0, 10874)\t1\n", 235 | " (0, 4030)\t1\n", 236 | " (0, 5604)\t1\n", 237 | " (0, 10651)\t1\n", 238 | " (0, 10701)\t2\n", 239 | " (0, 11962)\t1\n", 240 | " (0, 21173)\t1\n", 241 | " (0, 20041)\t1\n", 242 | " (0, 2408)\t1\n", 243 | " (0, 4326)\t1\n", 244 | " (0, 14847)\t1\n", 245 | " (0, 21177)\t1\n", 246 | " (0, 24057)\t2\n", 247 | " (0, 21409)\t1\n", 248 | " (0, 2427)\t1\n", 249 | " (0, 15444)\t1\n", 250 | " (0, 21284)\t1\n", 251 | " (0, 9935)\t1\n", 252 | " (0, 17134)\t1\n", 253 | " (0, 24082)\t7\n", 254 | " (0, 12920)\t3\n", 255 | " (0, 23103)\t1\n", 256 | " (0, 17121)\t1\n", 257 | " :\t:\n", 258 | " (1, 23758)\t1\n", 259 | " (1, 16783)\t1\n", 260 | " (1, 24212)\t1\n", 261 | " (1, 26270)\t1\n", 262 | " (1, 10165)\t1\n", 263 | " (1, 7972)\t1\n", 264 | " (1, 3341)\t1\n", 265 | " (1, 22163)\t1\n", 266 | " (1, 26066)\t1\n", 267 | " (1, 14330)\t2\n", 268 | " (1, 6887)\t1\n", 269 | " (1, 9744)\t1\n", 270 | " (1, 4252)\t1\n", 271 | " (1, 21696)\t1\n", 272 | " (1, 10651)\t1\n", 273 | " (1, 11962)\t1\n", 274 | " (1, 4326)\t1\n", 275 | " (1, 14847)\t1\n", 276 | " (1, 24082)\t1\n", 277 | " (1, 3254)\t1\n", 278 | " (1, 13712)\t1\n", 279 | " (1, 24380)\t4\n", 280 | " (1, 3042)\t1\n", 281 | " (1, 26405)\t1\n", 282 | " (1, 24080)\t1\n" 283 | ] 284 | } 285 | ], 286 | "source": [ 287 | "from sklearn.naive_bayes import MultinomialNB\n", 288 | "from sklearn import metrics\n", 289 | "from sklearn.datasets import fetch_20newsgroups\n", 290 | "from sklearn.feature_extraction.text import CountVectorizer\n", 291 | "newsgroups_train = fetch_20newsgroups(subset='train',\n", 292 | " remove=('headers', 'footers', 'quotes'),\n", 293 | " categories=categories)\n", 294 | "vectors = vectorizer.fit_transform(newsgroups_train.data)\n", 295 | "print(vectors.shape)\n", 296 | "print(vectors[:2,:])\n", 297 | "\n", 298 | "# vectors_test = vectorizer.transform(newsgroups_test.data)\n", 299 | "\n", 300 | "# print(vectors_test[:5])\n", 301 | "# iris = datasets.load_iris()\n", 302 | "# X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=42)\n", 303 | "\n", 304 | "#http://www.kamishima.net/mlmpyja/nbayes1/fit1.html\n", 305 | "#https://github.com/krzjoa/Bayes/blob/master/bayes/classifiers/cnb.py" 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": 19, 311 | "metadata": {}, 312 | "outputs": [ 313 | { 314 | "name": "stdout", 315 | "output_type": "stream", 316 | "text": [ 317 | "5\n" 318 | ] 319 | } 320 | ], 321 | "source": [ 322 | "# # alpha = 1\n", 323 | "# # weight_normalized=False\n", 324 | "# # complement_features_ = None\n", 325 | "# # alpha_sum_ = None\n", 326 | "# import numpy as np\n", 327 | "\n", 328 | "\n", 329 | "# X = np.array([[1,1,1,1,1],[0,0,0,0,0], [0,0,0,0,0], [1,1,1,1,1]])\n", 330 | "# y = [1,0,0,1]\n", 331 | "\n", 332 | "# pY_ = None\n", 333 | "# pXgY_ = None\n", 334 | "# n_samples = X.shape[0]\n", 335 | "# n_features = X.shape[1]\n", 336 | "# n_classes = 2\n", 337 | "# n_fvalues = 2\n", 338 | "\n", 339 | "# print(n_features)" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": 24, 345 | "metadata": {}, 346 | "outputs": [], 347 | "source": [ 348 | "# def fit(X, y):\n", 349 | "# nY = np.zeros(n_classes, dtype=int)\n", 350 | "# for i in range(n_samples):\n", 351 | "# nY[y[i]] += 1\n", 352 | "\n", 353 | "# pY_ = np.empty(n_classes, dtype=float)\n", 354 | "# print(pY_)\n", 355 | "# for i in range(n_classes):\n", 356 | "# pY_[i] = nY[i] / n_samples\n", 357 | "\n", 358 | "# nXY = np.zeros((n_features, n_fvalues, n_classes), dtype=int)\n", 359 | "# for i in range(n_samples):\n", 360 | "# for j in range(n_features):\n", 361 | "# nXY[j, X[i, j], y[i]] += 1\n", 362 | "\n", 363 | "# pXgY_ = np.empty((n_features, n_fvalues, n_classes), dtype=float)\n", 364 | "# print(pXgY_)\n", 365 | "# for j in range(n_features):\n", 366 | "# for xi in range(n_fvalues):\n", 367 | "# for yi in range(n_classes):\n", 368 | "# pXgY_[j, xi, yi] = nXY[j, xi, yi] / float(nY[yi])\n", 369 | " \n", 370 | "# def predict(X):\n", 371 | "# print(pY_)\n", 372 | "# logpXY = np.log(pY_)\n", 373 | "# for j in range(n_features):\n", 374 | "# logpXY = logpXY + np.log(pXgY_[j, xi[j], :])\n", 375 | "# y[i] = np.argmax(logpXY)\n" 376 | ] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "execution_count": 26, 381 | "metadata": {}, 382 | "outputs": [ 383 | { 384 | "name": "stdout", 385 | "output_type": "stream", 386 | "text": [ 387 | "[ 9.88131292e-324 9.88131292e-324]\n", 388 | "[[[ 9.88131292e-324 0.00000000e+000]\n", 389 | " [ 0.00000000e+000 9.88131292e-324]]\n", 390 | "\n", 391 | " [[ 9.88131292e-324 0.00000000e+000]\n", 392 | " [ 0.00000000e+000 9.88131292e-324]]\n", 393 | "\n", 394 | " [[ 9.88131292e-324 0.00000000e+000]\n", 395 | " [ 0.00000000e+000 9.88131292e-324]]\n", 396 | "\n", 397 | " [[ 9.88131292e-324 0.00000000e+000]\n", 398 | " [ 0.00000000e+000 9.88131292e-324]]\n", 399 | "\n", 400 | " [[ 9.88131292e-324 0.00000000e+000]\n", 401 | " [ 0.00000000e+000 9.88131292e-324]]]\n" 402 | ] 403 | } 404 | ], 405 | "source": [ 406 | "# fit(X, y)" 407 | ] 408 | }, 409 | { 410 | "cell_type": "code", 411 | "execution_count": 27, 412 | "metadata": {}, 413 | "outputs": [ 414 | { 415 | "name": "stdout", 416 | "output_type": "stream", 417 | "text": [ 418 | "None\n" 419 | ] 420 | }, 421 | { 422 | "ename": "AttributeError", 423 | "evalue": "'NoneType' object has no attribute 'log'", 424 | "output_type": "error", 425 | "traceback": [ 426 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 427 | "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", 428 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 429 | "\u001b[0;32m\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(X)\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpY_\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 25\u001b[0;31m \u001b[0mlogpXY\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlog\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpY_\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 26\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mj\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_features\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 27\u001b[0m \u001b[0mlogpXY\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlogpXY\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlog\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpXgY_\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mxi\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 430 | "\u001b[0;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'log'" 431 | ] 432 | } 433 | ], 434 | "source": [ 435 | "predict(X)" 436 | ] 437 | }, 438 | { 439 | "cell_type": "code", 440 | "execution_count": 70, 441 | "metadata": { 442 | "collapsed": true 443 | }, 444 | "outputs": [], 445 | "source": [ 446 | "# cnb = ComplementNB()\n", 447 | "# cnb.fit(X_train, y_train).score(X_test, y_test)\n", 448 | "\n", 449 | "\n", 450 | "# alpha = 1\n", 451 | "# weight_normalized=False\n", 452 | "# complement_features_ = None\n", 453 | "# alpha_sum_ = None\n", 454 | "\n", 455 | "# pY_ = None\n", 456 | "# pXgY_ = None\n", 457 | "# n_samples = X.shape[0]\n", 458 | "# n_features = X.shape[1]\n", 459 | "# n_classes = 2\n", 460 | "# n_fvalues = 2\n", 461 | "\n", 462 | "# if n_samples != len(y):\n", 463 | "# raise ValueError('Mismatched number of samples.')\n", 464 | "\n", 465 | "import numpy as np\n", 466 | "\n", 467 | "\n", 468 | "# X = np.array([[1,1,1,1,1],[0,0,0,0,0], [0,0,0,0,0], [1,1,1,1,1]])\n", 469 | "# y = [1,0,0,1]\n", 470 | "\n", 471 | "class ComplementNB(object):\n", 472 | " def __init__(self):\n", 473 | " self.pY_ = None\n", 474 | " self.pXgY_ = None\n", 475 | "\n", 476 | " def fit(self, X, y):\n", 477 | " n_samples = X.shape[0]\n", 478 | " n_features = X.shape[1]\n", 479 | " if n_samples != len(y):\n", 480 | " raise ValueError('Mismatched number of samples.')\n", 481 | "\n", 482 | " \n", 483 | " nY = np.zeros(n_classes, dtype=int)\n", 484 | " print(\"nY.shape \", nY.shape)\n", 485 | " print(\"len(y) \", len(y))\n", 486 | " for i in range(n_samples):\n", 487 | " nY[y[i]] += 1\n", 488 | "\n", 489 | " self.pY_ = np.empty(n_classes, dtype=float)\n", 490 | " for i in range(n_classes):\n", 491 | " self.pY_[i] = nY[i] / n_samples\n", 492 | "\n", 493 | " nXY = np.zeros((n_features, n_fvalues, n_classes), dtype=int)\n", 494 | " for i in range(n_samples):\n", 495 | " for j in range(n_features):\n", 496 | " nXY[j, X[i, j], y[i]] += 1\n", 497 | "\n", 498 | " self.pXgY_ = np.empty((n_features, n_fvalues, n_classes),\n", 499 | " dtype=float)\n", 500 | " for j in range(n_features):\n", 501 | " for xi in range(n_fvalues):\n", 502 | " for yi in range(n_classes):\n", 503 | " self.pXgY_[j, xi, yi] = nXY[j, xi, yi] / float(nY[yi])\n", 504 | "\n", 505 | " def predict(self, X):\n", 506 | " logpXY = np.log(self.pY_)\n", 507 | "# for j in range(n_features):\n", 508 | "# logpXY = logpXY + np.log(self.pXgY_[j, xi[j], :])\n", 509 | " print(np.argmax(logpXY))\n", 510 | "\n" 511 | ] 512 | }, 513 | { 514 | "cell_type": "code", 515 | "execution_count": 71, 516 | "metadata": {}, 517 | "outputs": [ 518 | { 519 | "name": "stdout", 520 | "output_type": "stream", 521 | "text": [ 522 | "nY.shape (2,)\n", 523 | "len(y) 4\n" 524 | ] 525 | } 526 | ], 527 | "source": [ 528 | "cnb = ComplementNB()\n", 529 | "cnb.fit(X, y)" 530 | ] 531 | }, 532 | { 533 | "cell_type": "code", 534 | "execution_count": 61, 535 | "metadata": {}, 536 | "outputs": [ 537 | { 538 | "name": "stdout", 539 | "output_type": "stream", 540 | "text": [ 541 | "0\n" 542 | ] 543 | } 544 | ], 545 | "source": [ 546 | "# np.array([1,1,1,1,1])\n", 547 | "t0 = np.array([0,0,0,0,0])\n", 548 | "t1 = np.array([1,1,1,1,1])\n", 549 | "\n", 550 | "cnb.predict(t0)" 551 | ] 552 | }, 553 | { 554 | "cell_type": "code", 555 | "execution_count": 43, 556 | "metadata": {}, 557 | "outputs": [ 558 | { 559 | "name": "stdout", 560 | "output_type": "stream", 561 | "text": [ 562 | "0\n" 563 | ] 564 | } 565 | ], 566 | "source": [ 567 | "cnb.predict(t1)" 568 | ] 569 | }, 570 | { 571 | "cell_type": "code", 572 | "execution_count": 72, 573 | "metadata": {}, 574 | "outputs": [ 575 | { 576 | "name": "stdout", 577 | "output_type": "stream", 578 | "text": [ 579 | "nY.shape (2,)\n", 580 | "len(y) 112\n" 581 | ] 582 | }, 583 | { 584 | "ename": "IndexError", 585 | "evalue": "index 2 is out of bounds for axis 0 with size 2", 586 | "output_type": "error", 587 | "traceback": [ 588 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 589 | "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", 590 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;31m# y_train = list(y_train)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mcnb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mComplementNB\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mcnb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 591 | "\u001b[0;32m\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y)\u001b[0m\n\u001b[1;32m 40\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"len(y) \"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 41\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_samples\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 42\u001b[0;31m \u001b[0mnY\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 43\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 44\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpY_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mempty\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_classes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfloat\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 592 | "\u001b[0;31mIndexError\u001b[0m: index 2 is out of bounds for axis 0 with size 2" 593 | ] 594 | } 595 | ], 596 | "source": [ 597 | "iris = datasets.load_iris()\n", 598 | "X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=42)\n", 599 | "# y_train = list(y_train)\n", 600 | "cnb = ComplementNB()\n", 601 | "cnb.fit(X_train, y_train)\n" 602 | ] 603 | }, 604 | { 605 | "cell_type": "code", 606 | "execution_count": 45, 607 | "metadata": {}, 608 | "outputs": [ 609 | { 610 | "data": { 611 | "text/plain": [ 612 | "(112, 4)" 613 | ] 614 | }, 615 | "execution_count": 45, 616 | "metadata": {}, 617 | "output_type": "execute_result" 618 | } 619 | ], 620 | "source": [ 621 | "X_train.shape" 622 | ] 623 | }, 624 | { 625 | "cell_type": "code", 626 | "execution_count": 46, 627 | "metadata": {}, 628 | "outputs": [ 629 | { 630 | "data": { 631 | "text/plain": [ 632 | "(112,)" 633 | ] 634 | }, 635 | "execution_count": 46, 636 | "metadata": {}, 637 | "output_type": "execute_result" 638 | } 639 | ], 640 | "source": [ 641 | "y_train.shape" 642 | ] 643 | }, 644 | { 645 | "cell_type": "code", 646 | "execution_count": 51, 647 | "metadata": {}, 648 | "outputs": [], 649 | "source": [ 650 | "y_train = list(y_train)\n" 651 | ] 652 | }, 653 | { 654 | "cell_type": "code", 655 | "execution_count": 47, 656 | "metadata": {}, 657 | "outputs": [ 658 | { 659 | "data": { 660 | "text/plain": [ 661 | "(4, 5)" 662 | ] 663 | }, 664 | "execution_count": 47, 665 | "metadata": {}, 666 | "output_type": "execute_result" 667 | } 668 | ], 669 | "source": [ 670 | "X = np.array([[1,1,1,1,1],[0,0,0,0,0], [0,0,0,0,0], [1,1,1,1,1]])\n", 671 | "X.shape" 672 | ] 673 | }, 674 | { 675 | "cell_type": "code", 676 | "execution_count": 48, 677 | "metadata": {}, 678 | "outputs": [ 679 | { 680 | "ename": "AttributeError", 681 | "evalue": "'list' object has no attribute 'shape'", 682 | "output_type": "error", 683 | "traceback": [ 684 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 685 | "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", 686 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0my\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 687 | "\u001b[0;31mAttributeError\u001b[0m: 'list' object has no attribute 'shape'" 688 | ] 689 | } 690 | ], 691 | "source": [ 692 | "y = [1,0,0,1]\n", 693 | "y.shape" 694 | ] 695 | }, 696 | { 697 | "cell_type": "code", 698 | "execution_count": null, 699 | "metadata": { 700 | "collapsed": true 701 | }, 702 | "outputs": [], 703 | "source": [] 704 | } 705 | ], 706 | "metadata": { 707 | "kernelspec": { 708 | "display_name": "Python 3", 709 | "language": "python", 710 | "name": "python3" 711 | }, 712 | "language_info": { 713 | "codemirror_mode": { 714 | "name": "ipython", 715 | "version": 3 716 | }, 717 | "file_extension": ".py", 718 | "mimetype": "text/x-python", 719 | "name": "python", 720 | "nbconvert_exporter": "python", 721 | "pygments_lexer": "ipython3", 722 | "version": "3.5.4" 723 | } 724 | }, 725 | "nbformat": 4, 726 | "nbformat_minor": 2 727 | } 728 | -------------------------------------------------------------------------------- /ch02/tmp: -------------------------------------------------------------------------------- 1 | digraph { 2 | node [shape=box] 3 | edge [labeldistance=10.5] 4 | 0 [label="Has feathers?"] 5 | 1 [label="Can fly?"] 6 | 2 [label="Has fins?"] 7 | 3 [label=Hawk] 8 | 4 [label=Penguin] 9 | 5 [label=Dolphin] 10 | 6 [label=Bear] 11 | 0 -> 1 [label=True] 12 | 0 -> 2 [label=False] 13 | 1 -> 3 [label=True] 14 | 1 -> 4 [label=False] 15 | 2 -> 5 [label=True] 16 | 2 -> 6 [label=False] 17 | } 18 | -------------------------------------------------------------------------------- /ch02/tmp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miyamotok0105/IntroductionToMachineLearningWithPython_sample/ed16e1e3b51132e11b9699e10ab7b0ccacc605a0/ch02/tmp.png -------------------------------------------------------------------------------- /ch02/tree.dot: -------------------------------------------------------------------------------- 1 | digraph Tree { 2 | node [shape=box, style="filled", color="black"] ; 3 | 0 [label="worst radius <= 16.795\nsamples = 426\nvalue = [159, 267]\nclass = benign", fillcolor="#399de567"] ; 4 | 1 [label="worst concave points <= 0.1359\nsamples = 284\nvalue = [25, 259]\nclass = benign", fillcolor="#399de5e6"] ; 5 | 0 -> 1 [labeldistance=2.5, labelangle=45, headlabel="True"] ; 6 | 2 [label="radius error <= 1.0475\nsamples = 252\nvalue = [4, 248]\nclass = benign", fillcolor="#399de5fb"] ; 7 | 1 -> 2 ; 8 | 3 [label="smoothness error <= 0.0033\nsamples = 251\nvalue = [3, 248]\nclass = benign", fillcolor="#399de5fc"] ; 9 | 2 -> 3 ; 10 | 4 [label="samples = 4\nvalue = [1, 3]\nclass = benign", fillcolor="#399de5aa"] ; 11 | 3 -> 4 ; 12 | 5 [label="samples = 247\nvalue = [2, 245]\nclass = benign", fillcolor="#399de5fd"] ; 13 | 3 -> 5 ; 14 | 6 [label="samples = 1\nvalue = [1, 0]\nclass = maligant", fillcolor="#e58139ff"] ; 15 | 2 -> 6 ; 16 | 7 [label="worst texture <= 25.62\nsamples = 32\nvalue = [21, 11]\nclass = maligant", fillcolor="#e5813979"] ; 17 | 1 -> 7 ; 18 | 8 [label="worst smoothness <= 0.1786\nsamples = 12\nvalue = [3, 9]\nclass = benign", fillcolor="#399de5aa"] ; 19 | 7 -> 8 ; 20 | 9 [label="samples = 10\nvalue = [1, 9]\nclass = benign", fillcolor="#399de5e3"] ; 21 | 8 -> 9 ; 22 | 10 [label="samples = 2\nvalue = [2, 0]\nclass = maligant", fillcolor="#e58139ff"] ; 23 | 8 -> 10 ; 24 | 11 [label="worst symmetry <= 0.2682\nsamples = 20\nvalue = [18, 2]\nclass = maligant", fillcolor="#e58139e3"] ; 25 | 7 -> 11 ; 26 | 12 [label="samples = 3\nvalue = [1, 2]\nclass = benign", fillcolor="#399de57f"] ; 27 | 11 -> 12 ; 28 | 13 [label="samples = 17\nvalue = [17, 0]\nclass = maligant", fillcolor="#e58139ff"] ; 29 | 11 -> 13 ; 30 | 14 [label="texture error <= 0.4732\nsamples = 142\nvalue = [134, 8]\nclass = maligant", fillcolor="#e58139f0"] ; 31 | 0 -> 14 [labeldistance=2.5, labelangle=-45, headlabel="False"] ; 32 | 15 [label="samples = 5\nvalue = [0, 5]\nclass = benign", fillcolor="#399de5ff"] ; 33 | 14 -> 15 ; 34 | 16 [label="worst concavity <= 0.1907\nsamples = 137\nvalue = [134, 3]\nclass = maligant", fillcolor="#e58139f9"] ; 35 | 14 -> 16 ; 36 | 17 [label="worst texture <= 30.975\nsamples = 5\nvalue = [2, 3]\nclass = benign", fillcolor="#399de555"] ; 37 | 16 -> 17 ; 38 | 18 [label="samples = 3\nvalue = [0, 3]\nclass = benign", fillcolor="#399de5ff"] ; 39 | 17 -> 18 ; 40 | 19 [label="samples = 2\nvalue = [2, 0]\nclass = maligant", fillcolor="#e58139ff"] ; 41 | 17 -> 19 ; 42 | 20 [label="samples = 132\nvalue = [132, 0]\nclass = maligant", fillcolor="#e58139ff"] ; 43 | 16 -> 20 ; 44 | } -------------------------------------------------------------------------------- /ch03/README.md: -------------------------------------------------------------------------------- 1 | # 教師なし学習 2 | 3 | ### [前処理とスケール変換](https://github.com/kajyuuen/IntroductionToMachineLearningWithPython/blob/master/ch03/applying_data_transformations.ipynb) 4 | 5 | - StanderdScaler 6 | - 特徴: 個々の特徴量を平均0分散が1になるように変換する 7 | - 問題点: Min, Maxが一定の範囲に入ることを保証するわけではない 8 | - RobustScaler 9 | - 特徴: 分散の代わりに中央値と四分位数を用いる -> 外れ値を無視 10 | - MinMaxScaler 11 | - 特徴: データが全て0~1の範囲に定まるように変換する 12 | - Normalizer 13 | - 特徴: 特徴量ベクトルがユークリッド長1になるようにする -> ベクトルの長さではなく角度が問題になる場合に用いられる 14 | 15 | 16 | ### [次元削減](https://github.com/kajyuuen/IntroductionToMachineLearningWithPython/blob/master/ch03/dimensionality_reduction.ipynb) 17 | 18 | 教師無し学習を用いたデータ変換には,可視化,データの圧縮,表現の発見が挙げられる. 19 | それらを目的として最も用いられるアルゴリズム 20 | 21 | - 主成分分析(Principal Component Analysis: PCA) 22 | - PCAはデータセットの特徴量を相互に統計的に関連しないように回転させる手法 23 | - 非負値行列因子分解(Non-Negative Matriz Factorization: NMF) 24 | - 組み合わされたデータを作りあげている元の成分を特定することが出来る 25 | - PCAよりわかりやすい成分に分解出来る 26 | - t-SNE 27 | - 二次元散布図を用いたデータの可視化によく用いられるアルゴリズム 28 | - 殆どの場合3以上の新しい特徴量を生成するように利用することはない 29 | 30 | ### [クラスタリング](https://github.com/kajyuuen/IntroductionToMachineLearningWithPython/blob/master/ch03/clustering.ipynb) 31 | 32 | #### アルゴリズム 33 | 34 | - k-means 35 | - クラスタセンタを用いてクラスタの特徴を表すことが出来る 36 | - 個々のデータポイントがクラスタセンタによって表現された成分分解手法 37 | - DBSCAN 38 | - ノイズを検出可能 39 | - 自動的にクラスタの数を決められる 40 | - 複雑な形状のクラスタを発見出来る 41 | - 凝集型クラスタリング 42 | - データの階層的な分割の候補を提示出来る 43 | 44 | #### クラスタリングアルゴリズムの評価 45 | 46 | - 正解アルゴリズムを用いたクラスタリングの評価 47 | - 調整ランド指数(ARI) 48 | - 正規化相互情報量(NMI) 49 | - 正解データを用いないクラスタリングの評価 50 | - シルエット係数 51 | -------------------------------------------------------------------------------- /ch04/README.md: -------------------------------------------------------------------------------- 1 | # データの表現と特徴量エンジニアリング 2 | 3 | - 連続値特徴量 4 | - 浮動小数点配列の配列のような形を取る 5 | - 離散値特徴量 6 | - カテゴリ変数 7 | 8 | 特定のアプリケーションに対して,最良のデータ表現を模索することを`特徴量エンジニアリング`という 9 | 10 | ### [カテゴリ変数](https://github.com/kajyuuen/IntroductionToMachineLearningWithPython/blob/master/ch04/categorical_variables.ipynb) 11 | 12 | - ワンホットエンコーディング(ダミー変数) 13 | - ダミー変数とは,カテゴリ変数を一つ以上の0と1の値を持つ新しい特徴量で置き換えるもの. 値0と1を使えば,線形ニクラス分類の式が意味を持つので,sklearnの殆どのモデルを利用出来る. 14 | 15 | ### [ビニング,離散化,線形モデル,決定木](https://github.com/kajyuuen/IntroductionToMachineLearningWithPython/blob/master/ch04/binning.ipynb) 16 | 17 | - ビニング, 離散化 18 |    - 連続値を,等間隔な区分に分割する 19 | 20 | ### [交互作用と多項式](https://github.com/kajyuuen/IntroductionToMachineLearningWithPython/blob/master/ch04/interactions_and_polynomials.ipynb) 21 | 22 | - 交互作用特徴量 23 |    - ある特徴量x_1, x_2に対して, x_1 * x_2を考える 24 | - 多項式特徴量 25 | - ある特徴量xに対して, x^2, x^3と考える 26 | 27 | ### [単変量非線形変換](https://github.com/kajyuuen/IntroductionToMachineLearningWithPython/blob/master/ch04/univariate_non-linear_transformations.ipynb) 28 | 29 | 殆どのモデルは,個々の特徴量がおおよそガウス分布に従っているときに最もうまく機能する. 30 | 整数のカウントデータに対してlog, expを用いると正規分布になったりする.(この種の変換は決定木ベースのモデルには関係が無い) 31 | 32 | ### [自動特徴量選択](https://github.com/kajyuuen/IntroductionToMachineLearningWithPython/blob/master/ch04/automatic_feature_selection.ipynb) 33 | 34 | よい特徴量を調べるためには 35 | 36 | - 単変量統計 37 | - 変量統計では,個々の特徴量とターゲットの間に統計的に顕著な関係があるかどうかを計算する 38 | - モデルベース選択 39 | - 全ての特徴量を同時に考慮するので変数間の相互作用を捉える 40 | - 反復選択 41 | - 異なる特徴量を用いた一連のモデルを作る 42 | の三つがある.これらは教師あり手法であり,特徴量選択時には訓練セットだけを用いて行う必要がある. 43 | 44 | ### [専門家知識の利用](https://github.com/kajyuuen/IntroductionToMachineLearningWithPython/blob/master/ch04/utilizing_expert_knowledge.ipynb) 45 | 46 | ### まとめ 47 | 48 | - 線形モデル 49 | - ビニングや多項式特徴量,交互作用特徴量の追加の恩恵を受けやすい 50 | - 非線形モデル(ランダムフォレストやSVM) 51 | - 明示的に特徴空間を拡張することなく,複雑なタスクを学習出来る 52 | -------------------------------------------------------------------------------- /ch04/automatic_feature_selection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 自動特徴量選択\n", 8 | "\n", 9 | "高次元データセットの場合,最も有用な特徴量だけを残して残りを捨て特徴量を減らすと,モデルが単純になり汎化性能が向上する.\n", 10 | "\n", 11 | "よい特徴量を調べるためには\n", 12 | "\n", 13 | "- 単変量統計\n", 14 | "- モデルベース選択\n", 15 | "- 反復選択\n", 16 | "\n", 17 | "の三つがある.これらは教師あり手法であり,特徴量選択時には訓練セットだけを用いて行う必要がある." 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "## 単変量統計\n", 25 | "\n", 26 | "単変量統計では,個々の特徴量とターゲットの間に統計的に顕著な関係があるかどうかを計算する.そして最も高い確信度で関連している特徴量が選択される.クラス分類の場合は`分散分析`(ANOVA)と知られている.この方法の特性は`単変量`であることであり,他の特徴量と組み合わさって意味を成すような特徴量は捨てられてしまう." 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 1, 32 | "metadata": {}, 33 | "outputs": [ 34 | { 35 | "name": "stdout", 36 | "output_type": "stream", 37 | "text": [ 38 | "X_train.shape: (284, 80)\n", 39 | "X_train_selected.shape: (284, 40)\n" 40 | ] 41 | } 42 | ], 43 | "source": [ 44 | "import mglearn\n", 45 | "import numpy as np\n", 46 | "import matplotlib.pyplot as plt\n", 47 | "from sklearn.datasets import load_breast_cancer\n", 48 | "from sklearn.feature_selection import SelectPercentile\n", 49 | "from sklearn.model_selection import train_test_split\n", 50 | "\n", 51 | "cancer = load_breast_cancer()\n", 52 | "\n", 53 | "rng = np.random.RandomState(42)\n", 54 | "noise = rng.normal(size=(len(cancer.data), 50))\n", 55 | "\n", 56 | "# ノイズを加える\n", 57 | "X_w_noise = np.hstack([cancer.data, noise])\n", 58 | "\n", 59 | "X_train, X_test, y_train, y_test = train_test_split(\n", 60 | " X_w_noise, cancer.target, random_state=0, test_size=.5)\n", 61 | "\n", 62 | "select = SelectPercentile(percentile=50)\n", 63 | "select.fit(X_train, y_train)\n", 64 | "\n", 65 | "X_train_selected = select.transform(X_train)\n", 66 | "\n", 67 | "print(\"X_train.shape: {}\".format(X_train.shape))\n", 68 | "print(\"X_train_selected.shape: {}\".format(X_train_selected.shape))" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 4, 74 | "metadata": {}, 75 | "outputs": [ 76 | { 77 | "name": "stdout", 78 | "output_type": "stream", 79 | "text": [ 80 | "[ True True True True True True True True True False True False\n", 81 | " True True True True True True False False True True True True\n", 82 | " True True True True True True False False False True False True\n", 83 | " False False True False False False False True False False True False\n", 84 | " False True False True False False False False False False True False\n", 85 | " True False False False False True False True False False False False\n", 86 | " True True False True False False False False]\n" 87 | ] 88 | }, 89 | { 90 | "data": { 91 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAA5IAAAA4CAYAAACPHscHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAACdVJREFUeJzt3WuMHWUdx/HvjyIqoOFWDbFAJSKCRgoFhGAQgRBQAiZi\nlEACxoQYeaFRYrwQESMveIMaryBeSFRQEYVgYkqQihpFWEALVAFJEQLSNop4SYqUvy/OU7uuq93Z\nPafnzPL9JM2ZeWZ25un5nZmz/84z01QVkiRJkiTN1Q7j7oAkSZIkqV8sJCVJkiRJnVhISpIkSZI6\nsZCUJEmSJHViISlJkiRJ6sRCUpIkSZLUyYIKySQnJ/ldkgeTfGhYndJoJPlqkvVJ7pnWtkeSm5I8\n0F53H2cfNbsk+yS5JcnaJPcmeW9rN78eSPKCJL9K8uuW38Wt/eVJbmv5fTvJTuPuq2aXZEmSu5Lc\n2ObNrieSrEuyJsndSe5obZ47eyDJbkmuTfLb9v13tNn1Q5ID2zG35c9TSd5nfovLvAvJJEuAzwOn\nAAcDZyY5eFgd00h8HTh5RtuHgJur6gDg5javyfMM8IGqOgg4Cji/HW/m1w+bgOOr6hBgBXBykqOA\nS4FPtfz+DLxrjH3U//deYO20ebPrlzdW1YqqOrzNe+7sh88AP6qqVwGHMDgGza4Hqup37ZhbAawE\n/gF8H/NbVBZyRfJI4MGqeqiqngauAU4fTrc0ClV1K/CnGc2nA1e16auAt2zXTmlOqurxqrqzTf+V\nwZfpyzC/XqiBv7XZ57U/BRwPXNvazW9CJVkGvBm4ss0Hs+s7z50TLsmLgWOBrwBU1dNV9SRm10cn\nAL+vqocxv0VlIYXky4BHps0/2trULy+tqsdhUKwALxlzf7QNSZYDhwK3YX690YZG3g2sB24Cfg88\nWVXPtFU8h06uTwMfBJ5t83tidn1SwKokU0nOa22eOyff/sAG4GttWPmVSXbB7ProHcDVbdr8FpGF\nFJKZpa0WsD1J25BkV+B7wPuq6qlx90dzV1Wb2xCfZQxGdBw022rbt1faliSnAuuramp68yyrmt3k\nOqaqDmNwK875SY4dd4c0JzsChwFfrKpDgb/jMMjeafePnwZ8d9x90fAtpJB8FNhn2vwy4LGFdUdj\n8ESSvQHa6/ox90f/Q5LnMSgiv1lV17Vm8+uZNjRrNYN7XXdLsmNb5Dl0Mh0DnJZkHYNbOI5ncIXS\n7Hqiqh5rr+sZ3KN1JJ47++BR4NGquq3NX8ugsDS7fjkFuLOqnmjz5reILKSQvB04oD25bicGl61v\nGE63tB3dAJzTps8Brh9jX/Q/tHuyvgKsrarLpi0yvx5IsjTJbm36hcCJDO5zvQU4o61mfhOoqj5c\nVcuqajmD77kfV9VZmF0vJNklyYu2TAMnAffguXPiVdUfgUeSHNiaTgDuw+z65ky2DmsF81tUUjX/\n0ThJ3sTgX2aXAF+tqkuG1TENX5KrgeOAvYAngIuAHwDfAfYF/gC8rapmPpBHY5bk9cBPgTVsvU/r\nIwzukzS/CZfktQweKrCEwT/gfaeqPpFkfwZXufYA7gLOrqpN4+up/p8kxwEXVNWpZtcPLafvt9kd\ngW9V1SVJ9sRz58RLsoLBQ652Ah4C3kk7h2J2Ey/Jzgyep7J/Vf2ltXnsLSILKiQlSZIkSc89Cxna\nKkmSJEl6DrKQlCRJkiR1YiEpSZIkSerEQlKSJEmS1ImFpCRJkiSpkwUXkknOG0ZHNB7m119m12/m\n12/m119m12/m119mt/gM44qkH4p+M7/+Mrt+M79+M7/+Mrt+M7/+MrtFxqGtkiRJkqROUlVzXzmZ\n+8oauZUrV3Zaf2pqaiTb7rLdxa5rJnPle9xvs30uNmzYwNKlS7dbH7p+hkZ1DpiEY2QYfZjk/Cbh\nPZ6UfnQ59ibhPe6jxX5uGaVJ+L1se75vwzpvjup3osXwHg/L1NTUxqraZlgWkj3WJTuAJCPZdpft\nLnZdM5kr3+N+G9Xnoouun6FRnQMm4RiZhDy6moS/3yg/Q6PqR98+x3202M8to+RneX5G9TuR7/FW\nSaaq6vBtrefQVkmSJElSJxaSkiRJkqROLCQlSZIkSZ1YSEqSJEmSOrGQlCRJkiR1YiEpSZIkSerE\nQlKSJEmS1ImFpCRJkiSpEwtJSZIkSVInqaq5r5xsAB6e0bwXsHGYndJ2ZX79ZXb9Zn79Zn79ZXb9\nZn79ZXb9sV9VLd3WSp0KyVk3kNxRVYcvaCMaG/PrL7PrN/PrN/PrL7PrN/PrL7NbfBzaKkmSJEnq\nxEJSkiRJktTJMArJK4awDY2P+fWX2fWb+W1Dko8muTfJb5LcneR1I97f6iRzHXZ1RZJPJDmx4z7W\nJdlrHt3T8Hjs9Zv59ZfZLTILvkdSkqRhS3I0cBlwXFVtasXXTlX12Aj3uRq4oKruGOE+1gGHV5UP\nnJAk9ZpDWyVJk2hvYGNVbQKoqo1bisgkH0tye5J7klyRJK19dZJPJbk1ydokRyS5LskDST7Z1lme\n5LdJrmpXOq9NsvPMnSc5KckvktyZ5LtJdp1lna8nOaNNr0tycVt/TZJXtfY9k6xKcleSy4FM+/mz\nk/yqXW29PMmSJPu1/u6VZIckP01y0vDfXkmSFsZCUpI0iVYB+yS5P8kXkrxh2rLPVdURVfUa4IXA\nqdOWPV1VxwJfAq4HzgdeA5ybZM+2zoHAFVX1WuAp4D3Td9yufl4InFhVhwF3AO+fQ583tvW/CFzQ\n2i4CflZVhwI3APu2fRwEvB04pqpWAJuBs6rqYeDS1v8PAPdV1ao57FuSpO3KQlKSNHGq6m/ASuA8\nYAPw7STntsVvTHJbkjXA8cCrp/3oDe11DXBvVT3ermo+BOzTlj1SVT9v098AXj9j90cBBwM/T3I3\ncA6w3xy6fV17nQKWt+lj2z6oqh8Cf27tJ7S/3+1tHycA+7f1rgReBLybrQWpJEkTZcdxd0CSpNlU\n1WZgNbC6FY3nJLkG+AKD+wwfSfJx4AXTfmxTe3122vSW+S3feTMfDjBzPsBNVXVmxy5v2d9m/vP7\ndbaHEQS4qqo+/F8LBkNtl7XZXYG/duyHJEkj5xVJSdLESXJgkgOmNa0AHmZr0bix3bd4xjw2v297\nmA/AmcDPZiz/JXBMkle0vuyc5JXz2A/ArcBZbTunALu39puBM5K8pC3bI8mWq56XAt8EPgZ8eZ77\nlSRppLwiKUmaRLsCn02yG/AM8CBwXlU9meTLDIaurgNun8e21zK4unk58ACDexr/rao2tGG0Vyd5\nfmu+ELh/Hvu6uG3nTuAnwB/aPu5LciGwKskOwD+B85MsB45gcO/k5iRvTfLOqvraPPYtSdLI+N9/\nSJKeM1qhdmN7UI8kSZonh7ZKkiRJkjrxiqQkSZIkqROvSEqSJEmSOrGQlCRJkiR1YiEpSZIkSerE\nQlKSJEmS1ImFpCRJkiSpEwtJSZIkSVIn/wKb9oSkrO6qzgAAAABJRU5ErkJggg==\n", 92 | "text/plain": [ 93 | "" 94 | ] 95 | }, 96 | "metadata": {}, 97 | "output_type": "display_data" 98 | } 99 | ], 100 | "source": [ 101 | "mask = select.get_support()\n", 102 | "print(mask)\n", 103 | "\n", 104 | "plt.matshow(mask.reshape(1, -1), cmap='gray_r')\n", 105 | "plt.xlabel(\"Sample index\")\n", 106 | "plt.yticks(())\n", 107 | "plt.show()" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 6, 113 | "metadata": {}, 114 | "outputs": [ 115 | { 116 | "name": "stdout", 117 | "output_type": "stream", 118 | "text": [ 119 | "Score with all features: 0.930\n", 120 | "Score with only selected features: 0.940\n" 121 | ] 122 | } 123 | ], 124 | "source": [ 125 | "from sklearn.linear_model import LogisticRegression\n", 126 | "\n", 127 | "X_test_selected = select.transform(X_test)\n", 128 | "\n", 129 | "lr = LogisticRegression()\n", 130 | "lr.fit(X_train, y_train)\n", 131 | "print(\"Score with all features: {:.3f}\".format(lr.score(X_test, y_test)))\n", 132 | "lr.fit(X_train_selected, y_train)\n", 133 | "print(\"Score with only selected features: {:.3f}\".format(\n", 134 | " lr.score(X_test_selected, y_test)))" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "## モデルベース特徴量\n", 142 | "\n", 143 | "モデルベース特徴量洗濯は,教師あり学習モデルを用いて個々の特徴量の重要性を判断し,重要なものだけを残す手法である.\n", 144 | "単変量選択の場合とは対して,モデルベースの選択は,全ての特徴量を同時に考慮するので変数間の相互作用を捉えることが出来る" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 8, 150 | "metadata": {}, 151 | "outputs": [], 152 | "source": [ 153 | "from sklearn.feature_selection import SelectFromModel\n", 154 | "from sklearn.ensemble import RandomForestClassifier\n", 155 | "select = SelectFromModel(\n", 156 | " RandomForestClassifier(n_estimators=100, random_state=42),\n", 157 | " threshold=\"median\")" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "SelectFromModelクラスは教師あり学習モデルで得られた特徴量の重要性が,指定したスレッショルドよりも大きい特徴量だけを選択する." 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 9, 170 | "metadata": {}, 171 | "outputs": [ 172 | { 173 | "name": "stdout", 174 | "output_type": "stream", 175 | "text": [ 176 | "X_train.shape: (284, 80)\n", 177 | "X_train_l1.shape: (284, 40)\n" 178 | ] 179 | } 180 | ], 181 | "source": [ 182 | "select.fit(X_train, y_train)\n", 183 | "X_train_l1 = select.transform(X_train)\n", 184 | "print(\"X_train.shape: {}\".format(X_train.shape))\n", 185 | "print(\"X_train_l1.shape: {}\".format(X_train_l1.shape))" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 12, 191 | "metadata": {}, 192 | "outputs": [ 193 | { 194 | "data": { 195 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAA5IAAAA4CAYAAACPHscHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAACb9JREFUeJzt3WusHGUdx/HvjyIqouFWjLFAJSKCBAsFxWAQgRBQIiZi\nlGACxIQYeQFRYlCJitEXvPESryBeSFS8ICrRxJSgFTCKcAAtUBUkRQhI2yjiJSlS/77Yp3I8HuyZ\nc3a7O8v3k5zszDNzZp7ub3Y2/zPPTFNVSJIkSZK0UDuNuwOSJEmSpH6xkJQkSZIkdWIhKUmSJEnq\nxEJSkiRJktSJhaQkSZIkqRMLSUmSJElSJ0sqJJOcnOS3Se5NctGwOqXRSPKlJBuT3Dmrbc8k1yW5\np73uMc4+an5J9k3ykyTrk9yV5PzWbn49kORZSX6Z5Fctv0ta+4uS3Nzy+2aSXcbdV80vybIktyf5\nQZs3u55IsiHJuiR3JLm1tXnu7IEkuye5Oslv2vffq8yuH5Ic1D5z234eS3KB+U2XRReSSZYBnwFO\nAQ4BzkhyyLA6ppH4CnDynLaLgOur6kDg+javyfME8O6qOhg4Gjivfd7Mrx+2AMdX1cuBVcDJSY4G\nLgU+3vL7M/D2MfZR/9/5wPpZ82bXL6+tqlVVdWSb99zZD58EflRVLwVezuAzaHY9UFW/bZ+5VcBq\n4B/AdzG/qbKUK5KvAO6tqvuq6nHgG8Bpw+mWRqGqbgD+NKf5NODKNn0l8MYd2iktSFU9XFW3tem/\nMvgyfSHm1ws18Lc2+4z2U8DxwNWt3fwmVJIVwOuBK9p8MLu+89w54ZI8DzgW+CJAVT1eVY9idn10\nAvD7qrof85sqSykkXwg8MGv+wdamfnl+VT0Mg2IF2GfM/dF2JFkJHA7cjPn1RhsaeQewEbgO+D3w\naFU90VbxHDq5PgG8B/hXm98Ls+uTAtYkmUlybmvz3Dn5DgA2AV9uw8qvSPIczK6P3gpc1abNb4os\npZDMPG21hO1J2o4kuwHfAS6oqsfG3R8tXFVtbUN8VjAY0XHwfKvt2F5pe5KcCmysqpnZzfOsanaT\n65iqOoLBrTjnJTl23B3SguwMHAF8rqoOB/6OwyB7p90//gbg2+Pui4ZvKYXkg8C+s+ZXAA8trTsa\ng0eSvACgvW4cc3/0FJI8g0ER+bWquqY1m1/PtKFZaxnc67p7kp3bIs+hk+kY4A1JNjC4heN4Blco\nza4nquqh9rqRwT1ar8BzZx88CDxYVTe3+asZFJZm1y+nALdV1SNt3vymyFIKyVuAA9uT63ZhcNn6\n2uF0SzvQtcBZbfos4Ptj7IueQrsn64vA+qr62KxF5tcDSZYn2b1NPxs4kcF9rj8BTm+rmd8Eqqr3\nVtWKqlrJ4Hvux1V1JmbXC0mek+S526aBk4A78dw58arqj8ADSQ5qTScAd2N2fXMGTw5rBfObKqla\n/GicJK9j8JfZZcCXquqjw+qYhi/JVcBxwN7AI8AHge8B3wL2A/4AvLmq5j6QR2OW5NXAjcA6nrxP\n630M7pM0vwmX5DAGDxVYxuAPeN+qqg8nOYDBVa49gduBt1XVlvH1VP9PkuOAC6vqVLPrh5bTd9vs\nzsDXq+qjSfbCc+fES7KKwUOudgHuA86hnUMxu4mXZFcGz1M5oKr+0tr87E2RJRWSkiRJkqSnn6UM\nbZUkSZIkPQ1ZSEqSJEmSOrGQlCRJkiR1YiEpSZIkSerEQlKSJEmS1MmSC8kk5w6jIxoP8+svs+s3\n8+s38+svs+s38+svs5s+w7gi6UHRb+bXX2bXb+bXb+bXX2bXb+bXX2Y3ZRzaKkmSJEnqJFW18JWT\nha+sXlu9evWC152ZmZnaPkjDMN+xvGnTJpYvX/4/7V2O5S6fka4mpR/j1vXcMqr3YlR5jPLcOQnH\nxaQcx5PwPTktJv3cOc2W+h4/VXaj5HGxODMzM5urarthWUhqXh2Pi6ntgzQMozqWu2y3q0npx7h1\nPbeM6r0YVR6jPHdOwnExKcfxJHxPTrtJyXqa9fE97mOfJ0GSmao6cnvrObRVkiRJktSJhaQkSZIk\nqRMLSUmSJElSJxaSkiRJkqROLCQlSZIkSZ1YSEqSJEmSOrGQlCRJkiR1YiEpSZIkSerEQlKSJEmS\n1EmqauErJ5uA++c07w1sHmantEOZX3+ZXb+ZX7+ZX3+ZXb+ZX3+ZXX/sX1XLt7dSp0Jy3g0kt1bV\nkUvaiMbG/PrL7PrN/PrN/PrL7PrN/PrL7KaPQ1slSZIkSZ1YSEqSJEmSOhlGIXn5ELah8TG//jK7\nfjO/7Ujy/iR3Jfl1kjuSvHLE+1ubZKHDri5P8uEkJ3bcx4Ykey+iexoeP3v9Zn79ZXZTZsn3SEqS\nNGxJXgV8DDiuqra04muXqnpohPtcC1xYVbeOcB8bgCOrygdOSJJ6zaGtkqRJ9AJgc1VtAaiqzduK\nyCQfSHJLkjuTXJ4krX1tko8nuSHJ+iRHJbkmyT1JPtLWWZnkN0mubFc6r06y69ydJzkpyc+T3Jbk\n20l2m2edryQ5vU1vSHJJW39dkpe29r2SrElye5LLgMz6/bcl+WW72npZkmVJ9m/93TvJTkluTHLS\n8N9eSZKWxkJSkjSJ1gD7Jvldks8mec2sZZ+uqqOq6lDg2cCps5Y9XlXHAp8Hvg+cBxwKnJ1kr7bO\nQcDlVXUY8Bjwztk7blc/LwZOrKojgFuBdy2gz5vb+p8DLmxtHwRuqqrDgWuB/do+DgbeAhxTVauA\nrcCZVXU/cGnr/7uBu6tqzQL2LUnSDmUhKUmaOFX1N2A1cC6wCfhmkrPb4tcmuTnJOuB44GWzfvXa\n9roOuKuqHm5XNe8D9m3LHqiqn7XprwKvnrP7o4FDgJ8luQM4C9h/Ad2+pr3OACvb9LFtH1TVD4E/\nt/YT2r/vlraPE4AD2npXAM8F3sGTBakkSRNl53F3QJKk+VTVVmAtsLYVjWcl+QbwWQb3GT6Q5EPA\ns2b92pb2+q9Z09vmt33nzX04wNz5ANdV1Rkdu7xtf1v57+/X+R5GEODKqnrv/ywYDLVd0WZ3A/7a\nsR+SJI2cVyQlSRMnyUFJDpzVtAq4nyeLxs3tvsXTF7H5/drDfADOAG6as/wXwDFJXtz6smuSlyxi\nPwA3AGe27ZwC7NHarwdOT7JPW7Znkm1XPS8FvgZ8APjCIvcrSdJIeUVSkjSJdgM+lWR34AngXuDc\nqno0yRcYDF3dANyyiG2vZ3B18zLgHgb3NP5HVW1qw2ivSvLM1nwx8LtF7OuStp3bgJ8Cf2j7uDvJ\nxcCaJDsB/wTOS7ISOIrBvZNbk7wpyTlV9eVF7FuSpJHxv/+QJD1ttELtB+1BPZIkaZEc2ipJkiRJ\n6sQrkpIkSZKkTrwiKUmSJEnqxEJSkiRJktSJhaQkSZIkqRMLSUmSJElSJxaSkiRJkqROLCQlSZIk\nSZ38G2ehaaTJL0krAAAAAElFTkSuQmCC\n", 196 | "text/plain": [ 197 | "" 198 | ] 199 | }, 200 | "metadata": {}, 201 | "output_type": "display_data" 202 | } 203 | ], 204 | "source": [ 205 | "mask = select.get_support()\n", 206 | "# 黒が真, 白が偽\n", 207 | "plt.matshow(mask.reshape(1, -1), cmap='gray_r')\n", 208 | "plt.xlabel(\"Sample index\")\n", 209 | "plt.yticks(())\n", 210 | "plt.show()" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 13, 216 | "metadata": {}, 217 | "outputs": [ 218 | { 219 | "name": "stdout", 220 | "output_type": "stream", 221 | "text": [ 222 | "Test score: 0.951\n" 223 | ] 224 | } 225 | ], 226 | "source": [ 227 | "X_test_l1 = select.transform(X_test)\n", 228 | "score = LogisticRegression().fit(X_train_l1, y_train).score(X_test_l1, y_test)\n", 229 | "print(\"Test score: {:.3f}\".format(score))" 230 | ] 231 | }, 232 | { 233 | "cell_type": "markdown", 234 | "metadata": {}, 235 | "source": [ 236 | "### 反復特徴量選択\n", 237 | "\n", 238 | "- 単変量統計\n", 239 | " - モデルを全く使わない\n", 240 | "- モデルベース選択\n", 241 | " - モデルを一つ使う\n", 242 | "- 反復特徴量選択\n", 243 | " - 異なる特徴量を用いた一連のモデルを作る\n", 244 | " \n", 245 | "反復特徴量では全く特徴量を使わないところから,ある基準が満たされるところまで特徴量を一つずつ加えていく方法と,全ての特徴量を使う状態から一つずつ取り除いていく方法の2つの方法がある.この方法の一つが`再帰的特徴量削減`(PFE)である.この方法は全ての特徴量から開始してモデルを作り,そのモデルで最も重要度が低い特徴量を削除する." 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 14, 251 | "metadata": {}, 252 | "outputs": [ 253 | { 254 | "data": { 255 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAA5IAAAA4CAYAAACPHscHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAACbxJREFUeJzt3WuMHWUdx/HvjyIKgkGgGkOBSkQEDRQKCsEgtxBQgiZi\nlEACxoQYeQFRYrwQFKIveAMaFeSmkqigIgrRxJSgFTWKUEALVLmlXALSNoqIJkXq3xfnqV2Xhe7s\n7uk5s/1+kubMPGd25tnzm5mT/84z01QVkiRJkiRN1zaj7oAkSZIkqV8sJCVJkiRJnVhISpIkSZI6\nsZCUJEmSJHViISlJkiRJ6sRCUpIkSZLUyawKySQnJPlzkoeSfGquOqXhSPKNJGuS3DuhbZcktyR5\nsL2+dpR91NSS7JHkF0lWJbkvyTmt3fx6IMmrkvw+yR9afhe29jcmub3l970k2426r5pakgVJ7k7y\nkzZvdj2RZHWSlUnuSXJna/Pc2QNJdk5yQ5I/te+/w82uH5Ls2465jf+eTXKu+c0vMy4kkywAvgac\nCOwPnJpk/7nqmIbiW8AJk9o+BdxaVfsAt7Z5jZ8XgE9U1X7AYcDZ7Xgzv35YDxxTVQcCS4ATkhwG\nXAxc2vL7G/CREfZRL+8cYNWEebPrl6OraklVHdLmPXf2w5eBn1XVW4ADGRyDZtcDVfXndswtAZYC\n/wJ+hPnNK7O5Ivl24KGqeqSqngeuB947N93SMFTVbcBfJzW/F7i2TV8LvG+LdkrTUlVPVdVdbfof\nDL5Md8f8eqEGnmuzr2j/CjgGuKG1m9+YSrIIeA9wdZsPZtd3njvHXJLXAEcC1wBU1fNV9Qxm10fH\nAg9X1aOY37wym0Jyd+DxCfNPtDb1y+ur6ikYFCvA60bcH21GksXAQcDtmF9vtKGR9wBrgFuAh4Fn\nquqFtojn0PH1JeCTwH/a/K6YXZ8UsCzJiiRntTbPneNvb2At8M02rPzqJK/G7ProQ8B1bdr85pHZ\nFJKZoq1msT5Jm5FkR+CHwLlV9eyo+6Ppq6oNbYjPIgYjOvabarEt2yttTpKTgDVVtWJi8xSLmt34\nOqKqDmZwK87ZSY4cdYc0LdsCBwOXV9VBwD9xGGTvtPvHTwZ+MOq+aO7NppB8Athjwvwi4MnZdUcj\n8HSSNwC01zUj7o9eQpJXMCgiv1NVN7Zm8+uZNjRrOYN7XXdOsm17y3PoeDoCODnJaga3cBzD4Aql\n2fVEVT3ZXtcwuEfr7Xju7IMngCeq6vY2fwODwtLs+uVE4K6qerrNm988MptC8g5gn/bkuu0YXLa+\neW66pS3oZuCMNn0GcNMI+6KX0O7JugZYVVWXTHjL/HogycIkO7fp7YHjGNzn+gvglLaY+Y2hqvp0\nVS2qqsUMvud+XlWnYXa9kOTVSXbaOA0cD9yL586xV1V/AR5Psm9rOha4H7Prm1PZNKwVzG9eSdXM\nR+MkeTeDv8wuAL5RVV+cq45p7iW5DjgK2A14Gvgc8GPg+8CewGPAB6pq8gN5NGJJ3gn8CljJpvu0\nPsPgPknzG3NJDmDwUIEFDP6A9/2quijJ3gyucu0C3A2cXlXrR9dTvZwkRwHnVdVJZtcPLacftdlt\nge9W1ReT7IrnzrGXZAmDh1xtBzwCfJh2DsXsxl6SHRg8T2Xvqvp7a/PYm0dmVUhKkiRJkrY+sxna\nKkmSJEnaCllISpIkSZI6sZCUJEmSJHViISlJkiRJ6sRCUpIkSZLUyawLySRnzUVHNBrm119m12/m\n12/m119m12/m119mN//MxRVJd4p+M7/+Mrt+M79+M7/+Mrt+M7/+Mrt5xqGtkiRJkqROUlXTXziZ\n/sLSGFu6dOm0l12xYsUQe6KtwVT729q1a1m4cOGL2rvsb132Y82tl8pPmwzr3Dms8/cwj6dx+CyG\npevvNts+z6djb1j757js95ONIrtx/SxeypY+nl6mH+uqarNhWUhqq9Rxvx9iT7Q1GNb+1mW90pY2\nrHNnH4+ncfgshqXr7zYOfR4Xw9o/x2W/Hwd9+yzG5XhKsqKqDtnccg5tlSRJkiR1YiEpSZIkSerE\nQlKSJEmS1ImFpCRJkiSpEwtJSZIkSVInFpKSJEmSpE4sJCVJkiRJnVhISpIkSZI6sZCUJEmSJHWS\nqpr+wsla4NFJzbsB6+ayU9qizK+/zK7fzK/fzK+/zK7fzK+/zK4/9qqqhZtbqFMhOeUKkjur6pBZ\nrUQjY379ZXb9Zn79Zn79ZXb9Zn79ZXbzj0NbJUmSJEmdWEhKkiRJkjqZi0LyyjlYh0bH/PrL7PrN\n/DYjyWeT3Jfkj0nuSfKOIW9veZLpDru6MslFSY7ruI3VSXabQfc0dzz2+s38+svs5plZ3yMpSdJc\nS3I4cAlwVFWtb8XXdlX15BC3uRw4r6ruHOI2VgOHVJUPnJAk9ZpDWyVJ4+gNwLqqWg9QVes2FpFJ\nLkhyR5J7k1yZJK19eZJLk9yWZFWSQ5PcmOTBJF9oyyxO8qck17YrnTck2WHyxpMcn+S3Se5K8oMk\nO06xzLeSnNKmVye5sC2/MslbWvuuSZYluTvJFUAm/PzpSX7frrZekWRBkr1af3dLsk2SXyU5fu4/\nXkmSZsdCUpI0jpYBeyR5IMllSd414b2vVtWhVfU2YHvgpAnvPV9VRwJfB24CzgbeBpyZZNe2zL7A\nlVV1APAs8LGJG25XP88Hjquqg4E7gY9Po8/r2vKXA+e1ts8Bv66qg4CbgT3bNvYDPggcUVVLgA3A\naVX1KHBx6/8ngPuratk0ti1J0hZlISlJGjtV9RywFDgLWAt8L8mZ7e2jk9yeZCVwDPDWCT96c3td\nCdxXVU+1q5qPAHu09x6vqt+06W8D75y0+cOA/YHfJLkHOAPYaxrdvrG9rgAWt+kj2zaoqp8Cf2vt\nx7bf7462jWOBvdtyVwM7AR9lU0EqSdJY2XbUHZAkaSpVtQFYDixvReMZSa4HLmNwn+HjST4PvGrC\nj61vr/+ZML1xfuN33uSHA0yeD3BLVZ3ascsbt7eB//9+nephBAGurapPv+iNwVDbRW12R+AfHfsh\nSdLQeUVSkjR2kuybZJ8JTUuAR9lUNK5r9y2eMoPV79ke5gNwKvDrSe//DjgiyZtaX3ZI8uYZbAfg\nNuC0tp4Tgde29luBU5K8rr23S5KNVz0vBr4DXABcNcPtSpI0VF6RlCSNox2BryTZGXgBeAg4q6qe\nSXIVg6Grq4E7ZrDuVQyubl4BPMjgnsb/qaq1bRjtdUle2ZrPBx6YwbYubOu5C/gl8Fjbxv1JzgeW\nJdkG+DdwdpLFwKEM7p3ckOT9ST5cVd+cwbYlSRoa//sPSdJWoxVqP2kP6pEkSTPk0FZJkiRJUide\nkZQkSZIkdeIVSUmSJElSJxaSkiRJkqROLCQlSZIkSZ1YSEqSJEmSOrGQlCRJkiR1YiEpSZIkSerk\nv9SjYKQXyZtMAAAAAElFTkSuQmCC\n", 256 | "text/plain": [ 257 | "" 258 | ] 259 | }, 260 | "metadata": {}, 261 | "output_type": "display_data" 262 | } 263 | ], 264 | "source": [ 265 | "from sklearn.feature_selection import RFE\n", 266 | "select = RFE(RandomForestClassifier(n_estimators=100, random_state=42),\n", 267 | " n_features_to_select=40)\n", 268 | "\n", 269 | "select.fit(X_train, y_train)\n", 270 | "\n", 271 | "mask = select.get_support()\n", 272 | "plt.matshow(mask.reshape(1, -1), cmap='gray_r')\n", 273 | "plt.xlabel(\"Sample index\")\n", 274 | "plt.yticks(())\n", 275 | "plt.show()" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": 15, 281 | "metadata": {}, 282 | "outputs": [ 283 | { 284 | "name": "stdout", 285 | "output_type": "stream", 286 | "text": [ 287 | "Test score: 0.951\n" 288 | ] 289 | } 290 | ], 291 | "source": [ 292 | "X_train_rfe = select.transform(X_train)\n", 293 | "X_test_rfe = select.transform(X_test)\n", 294 | "\n", 295 | "score = LogisticRegression().fit(X_train_rfe, y_train).score(X_test_rfe, y_test)\n", 296 | "print(\"Test score: {:.3f}\".format(score))" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 16, 302 | "metadata": {}, 303 | "outputs": [ 304 | { 305 | "name": "stdout", 306 | "output_type": "stream", 307 | "text": [ 308 | "Test score: 0.951\n" 309 | ] 310 | } 311 | ], 312 | "source": [ 313 | "print(\"Test score: {:.3f}\".format(select.score(X_test, y_test)))" 314 | ] 315 | } 316 | ], 317 | "metadata": { 318 | "kernelspec": { 319 | "display_name": "Python 3", 320 | "language": "python", 321 | "name": "python3" 322 | }, 323 | "language_info": { 324 | "codemirror_mode": { 325 | "name": "ipython", 326 | "version": 3 327 | }, 328 | "file_extension": ".py", 329 | "mimetype": "text/x-python", 330 | "name": "python", 331 | "nbconvert_exporter": "python", 332 | "pygments_lexer": "ipython3", 333 | "version": "3.5.4" 334 | } 335 | }, 336 | "nbformat": 4, 337 | "nbformat_minor": 2 338 | } 339 | -------------------------------------------------------------------------------- /ch04/categorical_variables.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 特徴エンジニアリング" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "連続地特徴量と離散地特徴量がある。カテゴリの特徴量などが離散地特徴量。 \n", 15 | "また特徴量の相互作用(積)や一般的な多項式を追加して特徴量の強化をすることもある。 \n", 16 | "特定のアプリケーションに対して最良のデータ表現を模索することのことを特徴エンジニアリングという。 \n", 17 | "データを正しく表現することはパラメータを正しく選択するより影響があることもある。" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": { 23 | "collapsed": true 24 | }, 25 | "source": [ 26 | "## カテゴリ変数" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "age、hours-per-weekは連続値。workclass、education、sex、occupationはカテゴリ特徴量。 \n", 34 | "定量的でなく定性的な特性を示している。" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 1, 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "data": { 44 | "text/html": [ 45 | "
\n", 46 | "\n", 59 | "\n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | "
ageworkclasseducationgenderhours-per-weekoccupationincome
039State-govBachelorsMale40Adm-clerical<=50K
150Self-emp-not-incBachelorsMale13Exec-managerial<=50K
238PrivateHS-gradMale40Handlers-cleaners<=50K
353Private11thMale40Handlers-cleaners<=50K
428PrivateBachelorsFemale40Prof-specialty<=50K
\n", 125 | "
" 126 | ], 127 | "text/plain": [ 128 | " age workclass education gender hours-per-week \\\n", 129 | "0 39 State-gov Bachelors Male 40 \n", 130 | "1 50 Self-emp-not-inc Bachelors Male 13 \n", 131 | "2 38 Private HS-grad Male 40 \n", 132 | "3 53 Private 11th Male 40 \n", 133 | "4 28 Private Bachelors Female 40 \n", 134 | "\n", 135 | " occupation income \n", 136 | "0 Adm-clerical <=50K \n", 137 | "1 Exec-managerial <=50K \n", 138 | "2 Handlers-cleaners <=50K \n", 139 | "3 Handlers-cleaners <=50K \n", 140 | "4 Prof-specialty <=50K " 141 | ] 142 | }, 143 | "execution_count": 1, 144 | "metadata": {}, 145 | "output_type": "execute_result" 146 | } 147 | ], 148 | "source": [ 149 | "import pandas as pd\n", 150 | "import mglearn\n", 151 | "import os\n", 152 | "\n", 153 | "adult_path = os.path.join(mglearn.datasets.DATA_PATH, \"adult.data\")\n", 154 | "data = pd.read_csv(\n", 155 | " adult_path, header=None, index_col=False,\n", 156 | " names=['age', 'workclass', 'fnlwgt', 'education', 'education-num',\n", 157 | " 'marital-status', 'occupation', 'relationship', 'race', 'gender',\n", 158 | " 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country',\n", 159 | " 'income'])\n", 160 | "\n", 161 | "data = data[['age', 'workclass', 'education', 'gender', 'hours-per-week',\n", 162 | " 'occupation', 'income']]\n", 163 | "\n", 164 | "data.head()" 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": [ 171 | "### ワンホットエンコーディング(ダミー変数)\n", 172 | "\n", 173 | "ダミー変数とは,カテゴリ変数を一つ以上の0と1の値を持つ新しい特徴量で置き換えるもの.\n", 174 | "値0と1を使えば,線形2クラス分類の式が意味を持つので,sklearnの殆どのモデルを利用出来る.\n", 175 | " \n", 176 | "※ワンホットエンコーディングは統計で用いられるダミーデータエンコーディングに似ているが同じではない。いずれもここのカテゴリを複数の2値特徴量で表現する。統計ではk個の異なる値をとるカテゴリ特徴量をk-1個の特徴量で表現する。これは解析を容易にするためだ。(技術的にはデータ行列のランク不足を避けるため)" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 2, 182 | "metadata": {}, 183 | "outputs": [ 184 | { 185 | "data": { 186 | "text/html": [ 187 | "
\n", 188 | "\n", 201 | "\n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | "
ageworkclasseducationgenderhours-per-weekoccupationincome
039State-govBachelorsMale40Adm-clerical<=50K
150Self-emp-not-incBachelorsMale13Exec-managerial<=50K
238PrivateHS-gradMale40Handlers-cleaners<=50K
353Private11thMale40Handlers-cleaners<=50K
428PrivateBachelorsFemale40Prof-specialty<=50K
\n", 267 | "
" 268 | ], 269 | "text/plain": [ 270 | " age workclass education gender hours-per-week \\\n", 271 | "0 39 State-gov Bachelors Male 40 \n", 272 | "1 50 Self-emp-not-inc Bachelors Male 13 \n", 273 | "2 38 Private HS-grad Male 40 \n", 274 | "3 53 Private 11th Male 40 \n", 275 | "4 28 Private Bachelors Female 40 \n", 276 | "\n", 277 | " occupation income \n", 278 | "0 Adm-clerical <=50K \n", 279 | "1 Exec-managerial <=50K \n", 280 | "2 Handlers-cleaners <=50K \n", 281 | "3 Handlers-cleaners <=50K \n", 282 | "4 Prof-specialty <=50K " 283 | ] 284 | }, 285 | "execution_count": 2, 286 | "metadata": {}, 287 | "output_type": "execute_result" 288 | } 289 | ], 290 | "source": [ 291 | "import pandas as pd\n", 292 | "import mglearn\n", 293 | "import os\n", 294 | "\n", 295 | "adult_path = os.path.join(mglearn.datasets.DATA_PATH, \"adult.data\")\n", 296 | "data = pd.read_csv(\n", 297 | " adult_path, header=None, index_col=False,\n", 298 | " names=['age', 'workclass', 'fnlwgt', 'education', 'education-num',\n", 299 | " 'marital-status', 'occupation', 'relationship', 'race', 'gender',\n", 300 | " 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country',\n", 301 | " 'income'])\n", 302 | "\n", 303 | "data = data[['age', 'workclass', 'education', 'gender', 'hours-per-week',\n", 304 | " 'occupation', 'income']]\n", 305 | "\n", 306 | "data.head()" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": 3, 312 | "metadata": {}, 313 | "outputs": [ 314 | { 315 | "name": "stdout", 316 | "output_type": "stream", 317 | "text": [ 318 | " Male 21790\n", 319 | " Female 10771\n", 320 | "Name: gender, dtype: int64\n" 321 | ] 322 | } 323 | ], 324 | "source": [ 325 | "print(data.gender.value_counts())" 326 | ] 327 | }, 328 | { 329 | "cell_type": "markdown", 330 | "metadata": {}, 331 | "source": [ 332 | "get_dummies関数を使うと簡単にデータをワンホットエンコーディングできる。 \n", 333 | "※get_dummiesは学習データとテストデーアに対して同じように適用しないと、当然テスト時におかしなことになる。" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": 4, 339 | "metadata": {}, 340 | "outputs": [ 341 | { 342 | "name": "stdout", 343 | "output_type": "stream", 344 | "text": [ 345 | "Original features:\n", 346 | " ['age', 'workclass', 'education', 'gender', 'hours-per-week', 'occupation', 'income'] \n", 347 | "\n", 348 | "Features after get_dummies:\n", 349 | " ['age', 'hours-per-week', 'workclass_ ?', 'workclass_ Federal-gov', 'workclass_ Local-gov', 'workclass_ Never-worked', 'workclass_ Private', 'workclass_ Self-emp-inc', 'workclass_ Self-emp-not-inc', 'workclass_ State-gov', 'workclass_ Without-pay', 'education_ 10th', 'education_ 11th', 'education_ 12th', 'education_ 1st-4th', 'education_ 5th-6th', 'education_ 7th-8th', 'education_ 9th', 'education_ Assoc-acdm', 'education_ Assoc-voc', 'education_ Bachelors', 'education_ Doctorate', 'education_ HS-grad', 'education_ Masters', 'education_ Preschool', 'education_ Prof-school', 'education_ Some-college', 'gender_ Female', 'gender_ Male', 'occupation_ ?', 'occupation_ Adm-clerical', 'occupation_ Armed-Forces', 'occupation_ Craft-repair', 'occupation_ Exec-managerial', 'occupation_ Farming-fishing', 'occupation_ Handlers-cleaners', 'occupation_ Machine-op-inspct', 'occupation_ Other-service', 'occupation_ Priv-house-serv', 'occupation_ Prof-specialty', 'occupation_ Protective-serv', 'occupation_ Sales', 'occupation_ Tech-support', 'occupation_ Transport-moving', 'income_ <=50K', 'income_ >50K']\n" 350 | ] 351 | } 352 | ], 353 | "source": [ 354 | "print(\"Original features:\\n\", list(data.columns), \"\\n\")\n", 355 | "data_dummies = pd.get_dummies(data)\n", 356 | "print(\"Features after get_dummies:\\n\", list(data_dummies.columns))" 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": 5, 362 | "metadata": {}, 363 | "outputs": [ 364 | { 365 | "data": { 366 | "text/html": [ 367 | "
\n", 368 | "\n", 381 | "\n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | "
agehours-per-weekworkclass_ ?workclass_ Federal-govworkclass_ Local-govworkclass_ Never-workedworkclass_ Privateworkclass_ Self-emp-incworkclass_ Self-emp-not-incworkclass_ State-gov...occupation_ Machine-op-inspctoccupation_ Other-serviceoccupation_ Priv-house-servoccupation_ Prof-specialtyoccupation_ Protective-servoccupation_ Salesoccupation_ Tech-supportoccupation_ Transport-movingincome_ <=50Kincome_ >50K
0394000000001...0000000010
1501300000010...0000000010
2384000001000...0000000010
3534000001000...0000000010
\n", 507 | "

4 rows × 46 columns

\n", 508 | "
" 509 | ], 510 | "text/plain": [ 511 | " age hours-per-week workclass_ ? workclass_ Federal-gov \\\n", 512 | "0 39 40 0 0 \n", 513 | "1 50 13 0 0 \n", 514 | "2 38 40 0 0 \n", 515 | "3 53 40 0 0 \n", 516 | "\n", 517 | " workclass_ Local-gov workclass_ Never-worked workclass_ Private \\\n", 518 | "0 0 0 0 \n", 519 | "1 0 0 0 \n", 520 | "2 0 0 1 \n", 521 | "3 0 0 1 \n", 522 | "\n", 523 | " workclass_ Self-emp-inc workclass_ Self-emp-not-inc workclass_ State-gov \\\n", 524 | "0 0 0 1 \n", 525 | "1 0 1 0 \n", 526 | "2 0 0 0 \n", 527 | "3 0 0 0 \n", 528 | "\n", 529 | " ... occupation_ Machine-op-inspct occupation_ Other-service \\\n", 530 | "0 ... 0 0 \n", 531 | "1 ... 0 0 \n", 532 | "2 ... 0 0 \n", 533 | "3 ... 0 0 \n", 534 | "\n", 535 | " occupation_ Priv-house-serv occupation_ Prof-specialty \\\n", 536 | "0 0 0 \n", 537 | "1 0 0 \n", 538 | "2 0 0 \n", 539 | "3 0 0 \n", 540 | "\n", 541 | " occupation_ Protective-serv occupation_ Sales occupation_ Tech-support \\\n", 542 | "0 0 0 0 \n", 543 | "1 0 0 0 \n", 544 | "2 0 0 0 \n", 545 | "3 0 0 0 \n", 546 | "\n", 547 | " occupation_ Transport-moving income_ <=50K income_ >50K \n", 548 | "0 0 1 0 \n", 549 | "1 0 1 0 \n", 550 | "2 0 1 0 \n", 551 | "3 0 1 0 \n", 552 | "\n", 553 | "[4 rows x 46 columns]" 554 | ] 555 | }, 556 | "execution_count": 5, 557 | "metadata": {}, 558 | "output_type": "execute_result" 559 | } 560 | ], 561 | "source": [ 562 | "data_dummies.head(n=4)" 563 | ] 564 | }, 565 | { 566 | "cell_type": "code", 567 | "execution_count": 6, 568 | "metadata": {}, 569 | "outputs": [ 570 | { 571 | "name": "stdout", 572 | "output_type": "stream", 573 | "text": [ 574 | "X.shape: (32561, 44) y.shape: (32561,)\n" 575 | ] 576 | } 577 | ], 578 | "source": [ 579 | "features = data_dummies.loc[:, 'age':'occupation_ Transport-moving']\n", 580 | "\n", 581 | "X = features.values\n", 582 | "y = data_dummies['income_ >50K'].values\n", 583 | "print(\"X.shape: {} y.shape: {}\".format(X.shape, y.shape))" 584 | ] 585 | }, 586 | { 587 | "cell_type": "code", 588 | "execution_count": 7, 589 | "metadata": {}, 590 | "outputs": [ 591 | { 592 | "name": "stdout", 593 | "output_type": "stream", 594 | "text": [ 595 | "Test score: 0.81\n" 596 | ] 597 | } 598 | ], 599 | "source": [ 600 | "from sklearn.linear_model import LogisticRegression\n", 601 | "from sklearn.model_selection import train_test_split\n", 602 | "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)\n", 603 | "logreg = LogisticRegression()\n", 604 | "logreg.fit(X_train, y_train)\n", 605 | "print(\"Test score: {:.2f}\".format(logreg.score(X_test, y_test)))" 606 | ] 607 | }, 608 | { 609 | "cell_type": "markdown", 610 | "metadata": {}, 611 | "source": [ 612 | "## 数値でエンコードされているカテゴリ\n", 613 | "\n", 614 | "{0: \"男\", 1: \"女\", 2: \"その他\"}のような表現をされているものに対しては,get_dummies関数は使えないが,連続値ではない." 615 | ] 616 | }, 617 | { 618 | "cell_type": "markdown", 619 | "metadata": {}, 620 | "source": [ 621 | "### pandasの列を数値から文字列に変える" 622 | ] 623 | }, 624 | { 625 | "cell_type": "markdown", 626 | "metadata": {}, 627 | "source": [ 628 | "数値で表されているカテゴリ特徴量を連続値として扱ってはいけない。意味的に順番のない場合には離散値として扱うべき。エンコード方法はタスクやアルゴリズムに依存する。" 629 | ] 630 | }, 631 | { 632 | "cell_type": "code", 633 | "execution_count": 8, 634 | "metadata": { 635 | "scrolled": true 636 | }, 637 | "outputs": [ 638 | { 639 | "data": { 640 | "text/html": [ 641 | "
\n", 642 | "\n", 655 | "\n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | "
Categorical FeatureInteger Feature
0socks0
1fox1
2socks2
3box1
\n", 686 | "
" 687 | ], 688 | "text/plain": [ 689 | " Categorical Feature Integer Feature\n", 690 | "0 socks 0\n", 691 | "1 fox 1\n", 692 | "2 socks 2\n", 693 | "3 box 1" 694 | ] 695 | }, 696 | "execution_count": 8, 697 | "metadata": {}, 698 | "output_type": "execute_result" 699 | } 700 | ], 701 | "source": [ 702 | "demo_df = pd.DataFrame({'Integer Feature': [0, 1, 2, 1],\n", 703 | " 'Categorical Feature': ['socks', 'fox', 'socks', 'box']})\n", 704 | "demo_df" 705 | ] 706 | }, 707 | { 708 | "cell_type": "code", 709 | "execution_count": 17, 710 | "metadata": {}, 711 | "outputs": [ 712 | { 713 | "data": { 714 | "text/html": [ 715 | "
\n", 716 | "\n", 729 | "\n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | "
Integer FeatureCategorical Feature_boxCategorical Feature_foxCategorical Feature_socks
00001
11010
22001
31100
\n", 770 | "
" 771 | ], 772 | "text/plain": [ 773 | " Integer Feature Categorical Feature_box Categorical Feature_fox \\\n", 774 | "0 0 0 0 \n", 775 | "1 1 0 1 \n", 776 | "2 2 0 0 \n", 777 | "3 1 1 0 \n", 778 | "\n", 779 | " Categorical Feature_socks \n", 780 | "0 1 \n", 781 | "1 0 \n", 782 | "2 1 \n", 783 | "3 0 " 784 | ] 785 | }, 786 | "metadata": {}, 787 | "output_type": "display_data" 788 | } 789 | ], 790 | "source": [ 791 | "# Integer Featureに変化が無い => 使えない\n", 792 | "pd.get_dummies(demo_df)" 793 | ] 794 | }, 795 | { 796 | "cell_type": "code", 797 | "execution_count": 21, 798 | "metadata": {}, 799 | "outputs": [ 800 | { 801 | "data": { 802 | "text/html": [ 803 | "
\n", 804 | "\n", 817 | "\n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | "
Integer Feature_0Integer Feature_1Integer Feature_2Categorical Feature_boxCategorical Feature_foxCategorical Feature_socks
0100001
1010010
2001001
3010100
\n", 868 | "
" 869 | ], 870 | "text/plain": [ 871 | " Integer Feature_0 Integer Feature_1 Integer Feature_2 \\\n", 872 | "0 1 0 0 \n", 873 | "1 0 1 0 \n", 874 | "2 0 0 1 \n", 875 | "3 0 1 0 \n", 876 | "\n", 877 | " Categorical Feature_box Categorical Feature_fox Categorical Feature_socks \n", 878 | "0 0 0 1 \n", 879 | "1 0 1 0 \n", 880 | "2 0 0 1 \n", 881 | "3 1 0 0 " 882 | ] 883 | }, 884 | "execution_count": 21, 885 | "metadata": {}, 886 | "output_type": "execute_result" 887 | } 888 | ], 889 | "source": [ 890 | "demo_df['Integer Feature'] = demo_df['Integer Feature'].astype(str) # intからstringに変換している\n", 891 | "pd.get_dummies(demo_df, columns=['Integer Feature', 'Categorical Feature'])" 892 | ] 893 | }, 894 | { 895 | "cell_type": "code", 896 | "execution_count": 20, 897 | "metadata": {}, 898 | "outputs": [ 899 | { 900 | "data": { 901 | "text/html": [ 902 | "
\n", 903 | "\n", 916 | "\n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | "
Categorical FeatureInteger Feature
0socks0
1fox1
2socks2
3box1
\n", 947 | "
" 948 | ], 949 | "text/plain": [ 950 | " Categorical Feature Integer Feature\n", 951 | "0 socks 0\n", 952 | "1 fox 1\n", 953 | "2 socks 2\n", 954 | "3 box 1" 955 | ] 956 | }, 957 | "execution_count": 20, 958 | "metadata": {}, 959 | "output_type": "execute_result" 960 | } 961 | ], 962 | "source": [ 963 | "demo_df['Integer Feature'] = demo_df['Integer Feature'].astype(str)\n", 964 | "demo_df" 965 | ] 966 | } 967 | ], 968 | "metadata": { 969 | "kernelspec": { 970 | "display_name": "Python 3", 971 | "language": "python", 972 | "name": "python3" 973 | }, 974 | "language_info": { 975 | "codemirror_mode": { 976 | "name": "ipython", 977 | "version": 3 978 | }, 979 | "file_extension": ".py", 980 | "mimetype": "text/x-python", 981 | "name": "python", 982 | "nbconvert_exporter": "python", 983 | "pygments_lexer": "ipython3", 984 | "version": "3.5.4" 985 | } 986 | }, 987 | "nbformat": 4, 988 | "nbformat_minor": 2 989 | } 990 | -------------------------------------------------------------------------------- /ch04/univariate_non-linear_transformations.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 単変量非線形変換\n", 8 | "\n", 9 | "特徴量を二乗,三乗すること以外にも, log, exp, sinなどの数学関数を用いることでモデルに有用な特徴量を作成できることがある.\n", 10 | "log, expなどの関数はデータの相対的なスケールを修正してくれるので,線形モデルやニューラルネットワークモデルでモデリングしやすくなる.sin, cos関数は周期的なパターンをもつ関数を扱うときに有用である.\n", 11 | "\n", 12 | "殆どのモデルは,個々の特徴量がおおよそガウス分布に従っているときに最もうまく機能する.整数のカウントデータに対してlog, expを用いると特に効果的である." 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 1, 18 | "metadata": { 19 | "collapsed": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "import mglearn\n", 24 | "import numpy as np\n", 25 | "import matplotlib.pyplot as plt\n", 26 | "\n", 27 | "rnd = np.random.RandomState(0)\n", 28 | "X_org = rnd.normal(size=(1000, 3))\n", 29 | "w = rnd.normal(size=3)\n", 30 | "\n", 31 | "X = rnd.poisson(10 * np.exp(X_org))\n", 32 | "y = np.dot(X_org, w)" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "Number of feature appearances:\n", 45 | "[28 38 68 48 61 59 45 56 37 40 35 34 36 26 23 26 27 21 23 23 18 21 10 9 17\n", 46 | " 9 7 14 12 7 3 8 4 5 5 3 4 2 4 1 1 3 2 5 3 8 2 5 2 1\n", 47 | " 2 3 3 2 2 3 3 0 1 2 1 0 0 3 1 0 0 0 1 3 0 1 0 2 0\n", 48 | " 1 1 0 0 0 0 1 0 0 2 2 0 1 1 0 0 0 0 1 1 0 0 0 0 0\n", 49 | " 0 0 1 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0\n", 50 | " 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1]\n" 51 | ] 52 | } 53 | ], 54 | "source": [ 55 | "print(\"Number of feature appearances:\\n{}\".format(np.bincount(X[:, 0])))" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 3, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "data": { 65 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEKCAYAAAAfGVI8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGORJREFUeJzt3X2UJXV95/H3B0YeVUakISM4zpDMITEanvoYCBwiICuI\nmUEjBg+JE2Uz5qxRNGYjrIbZluyKJ/ExKmZW0NEQHiTojIgPZAKYzW6QGRCQJwcRZGRkRgRBiMLA\nZ/+oX69N0w/V0111b3d9Xufcc2/VvVX328V0f6hf/er3k20iIqK7duh1ARER0VsJgoiIjksQRER0\nXIIgIqLjEgQRER2XIIiI6LgEQURExyUIIiI6LkEQEdFx83pdQB177bWXFy1a1OsyIiJmlQ0bNvzY\n9sBkn2ssCCQdAFw8YtX+wFnA58r6RcDdwOttPzjRvhYtWsT69eubKTQiYo6SdE+dzzXWNGT7DtsH\n2T4IOBR4DPgicAawzvYSYF1ZjoiIHmnrGsGxwPds3wMsA1aX9auBk1qqISIixtBWEJwCXFhe72N7\nM0B53rulGiIiYgyNB4GknYClwBemuN0KSeslrd+6dWszxUVERCtnBCcA19u+vyzfL2kBQHneMtZG\ntlfZHrQ9ODAw6UXviIjYTm0EwRv4ZbMQwFpgeXm9HFjTQg0RETGORoNA0m7AccBlI1afAxwnaWN5\n75wma4iIiIk1ekOZ7ceA549a9wBVL6KIiOgDGWIiIqLjOhcEQ0NDDA0N9bqMiIi+0bkgiIiIp0sQ\nRER0XIIgIqLjEgQRER2XIIiI6LgEQURExyUIIiI6LkEQEdFxCYKIiI5LEEREdFyCICKi4xIEEREd\nlyCIiOi4BEFERMclCCIiOi5BEBHRcQmCiIiOSxBERHRcgiAiouMaDQJJ8yVdKul2SbdJOlzSnpKu\nlLSxPD+vyRoiImJiTZ8RfBT4mu1fBw4EbgPOANbZXgKsK8s9lcnsI6LLGgsCSc8FjgLOA7D9uO2H\ngGXA6vKx1cBJTdUQERGTa/KMYH9gK/AZSTdI+rSk3YF9bG8GKM97N1hDRERMoskgmAccApxr+2Dg\nUabQDCRphaT1ktZv3bq1qRojIjqvySDYBGyyfW1ZvpQqGO6XtACgPG8Za2Pbq2wP2h4cGBhosMyI\niG5rLAhs/wi4V9IBZdWxwK3AWmB5WbccWNNUDRERMbl5De//bcAFknYC7gLeRBU+l0g6DfgBcHLD\nNdQ23Hto5cqVPa4kIqI9jQaB7W8Dg2O8dWyT3xsREfXlzuKIiI5LEEREdFyCICKi4xIEEREdlyCI\niOi4BEFERMclCCIiOi5BMIGhoaEMUR0Rc16CICKi4xIEEREdlyCIiOi4SYNA0smSnlNev1fSZZIO\nab60iIhoQ50zgr+y/YikI4FXUk0veW6zZUVERFvqBMGT5flEqtnG1gA7NVdSRES0qU4Q/FDS3wOv\nB66QtHPN7SIiYhao8wf99cDXgeNtPwTsCfzXRquKiIjWTBoEth+jmlf4yLJqG7CxyaIiIqI9dXoN\nrQTeDZxZVj0L+Icmi4qIiPbUaRp6DbAUeBTA9n3Ac5osKiIi2lMnCB63bcAAknZvtqSIiGhTnSC4\npPQami/pT4B/Bv5Xs2VFRERb5k32Adt/K+k44GHgAOAs21c2XllERLRi0iCQtBj41+E//pJ2lbTI\n9t01tr0beITqprRttgcl7QlcDCwC7gZeb/vB7f0BIiJieuo0DX0BeGrE8pNlXV1H2z7I9mBZPgNY\nZ3sJsK4sR0REj9QJgnm2Hx9eKK+nM8TEMqrxiijPJ01jXxERMU11gmCrpKXDC5KWAT+uuX8D35C0\nQdKKsm4f25sByvPeUyk4IiJm1qTXCIA/BS6Q9HFAwL3AG2vu/wjb90naG7hS0u11CyvBsQJg4cKF\ndTeLiIgpqtNr6HvAYZKeDcj2I3V3Xm4+w/YWSV8EXgbcL2mB7c2SFlANXzHWtquAVQCDg4Ou+50R\nETE1dXoN7Qz8PlUvn3mSALD9vkm22x3YocxlsDvwn4D3AWuB5cA55XnNNOqPiIhpqtM0tAb4KbAB\n+MUU9r0P8MUSHPOAf7T9NUnXUd2kdhrwA+DkqZUcEREzqU4Q7Gf7+Knu2PZdwIFjrH8AOHaq+4uI\niGbU6TX0fyS9tPFKIiKiJ+oEwZHABkl3SLpJ0s2Sbmq6sDYMDQ31uoSIiJ6r0zR0QuNVREREz9Tp\nPnoPQLkXYJfGK4qIiFbVmaFsqaSNwPeBa6gGivtqw3VFRERL6lwjOBs4DPiu7cVUPX7+rdGqIiKi\nNXWC4InS5XMHSTvYvgo4qOG6IiKiJXUuFj9Uhpf4JtWYQ1uAbc2WFRERbalzRrAMeAx4J/A14HvA\n7zVZVEREtGfCMwJJOwJrbL+CanKa1RN9PiIiZp8Jg8D2k5Iek7SH7Z+2VVSv5UaziOiSOtcIfg7c\nLOlK4NHhlbbf3lhVERHRmjpB8JXyiIiIOajOncW5LhARMYfVmZhmCfB+4MWMGGLC9v4N1hURES2p\n0330M8C5VPcOHA18Dvh8k0VFRER76gTBrrbXUc1XfI/t/w4c02xZERHRllq9hiTtAGyU9GfAD4G9\nmy0rIiLaUueM4B3AbsDbgUOBP6SadD4iIuaAOr2GrgOQZNtvar6kiIhoU535CA6XdCtwW1k+UNIn\nG68sIiJaUadp6CPAK4EHAGzfCBxV9wsk7SjpBkmXl+XFkq6VtFHSxZJ22p7CIyJiZtQJAmzfO2rV\nk1P4jtMpZxPFB4AP214CPAicNoV9RUTEDKsTBPdK+h3AknaS9Bc8/Q/7uCTtB5wIfLosi6rr6aXl\nI6uBk6ZcdUREzJg6QfCnwFuBfam6jh5Uluv4CPCXVENYAzwfeMj28MQ2m8p+IyKiR+r0GvoxcOpU\ndyzp1cAW2xskvXx49VhfMc72K4AVAAsXLpzq1zdieHjqlStX9riSiIiZU6fX0P6Svixpq6QtktZI\nqjPO0BHAUkl3AxdRNQl9BJgvaTiA9gPuG2tj26tsD9oeHBgYqPXDRETE1NVpGvpH4BJgAfAC4AvA\nhZNtZPtM2/vZXgScAvyL7VOBq4DXlY8tB9ZsR90RETFD6gSBbH/e9rby+AfGac6p6d3An0u6k+qa\nwXnT2FdERExTnbGGrpJ0BlXzjoE/AL4iaU8A2z+ZbAe2rwauLq/vAl62nfVGRMQMqxMEf1Ce3zJq\n/ZupgiHzEkREzGJ1eg0tbqOQpmVC+oiIsdWZoWwX4L8AR1KdAfwr8CnbP2+4toiIaEGdpqHPAY8A\nf1eW30A1Q9nJTRUVERHtqRMEB9g+cMTyVZJubKqgfpRmpYiYy+p0H71B0mHDC5J+G/i35kqKiIg2\n1Tkj+G3gjZJ+UJYXArdJuhmw7d9qrLqIiGhcnSA4vvEq5oCMQxQRs1Wd7qP3AEjaG9hlxPofjLtR\nRETMGnUGnVsqaSPwfeAa4G7gqw3XFRERLalzsfhs4DDgu+XmsmPJxeL0JIqIOaNOEDxh+wFgB0k7\n2L6KanKaiIiYA+pcLH5I0rOBbwIXSNoCbJtkm4iImCXqBMEy4D+Ad1LNVLYH8L4mi5pN0kQUEbNd\nnV5Dj5aXT1FNNh8REXNInWsEERExhyUIIiI6btwgkLSuPH+gvXIiIqJtE10jWCDpd4Glki4CNPJN\n29c3WllERLRioiA4CzgD2A/40Kj3DBzTVFEREdGecYPA9qXApZL+yvbZLdYUEREtqtN99GxJS4Gj\nyqqrbV/ebFkREdGWOoPOvR84Hbi1PE4v6ybbbhdJ35J0o6RbJA2V9YslXStpo6SLJe003R8iIiK2\nX53uoycCx9k+3/b5VPMTnFhju18Ax5RpLg8Cji8znX0A+LDtJcCDwGnbV3pERMyEuvcRzB/xeo86\nG7jys7L4rPIYvsh8aVm/GjipZg0REdGAOmMNvZ9q3uKrqLqQHgWcWWfnknYENgC/BnwC+B7wkO3h\nQes2AfuOs+0KYAXAwoUL63xdRERsh0nPCGxfSDUfwWXlcbjti+rs3PaTtg+i6oL6MuA3xvrYONuu\nsj1oe3BgYKDO10VExHaoc0aA7c3A2u39EtsPSbqaKlDmS5pXzgr2A+7b3v1GRMT0NTbWkKQBSfPL\n612BVwC3AVcBrysfWw6saaqGiIiYXK0zgu20AFhdrhPsAFxi+3JJtwIXSfpr4AbgvAZriIiISUwY\nBJJ2AG6y/ZKp7tj2TcDBY6y/i+p6QURE9IEJm4ZsPwXcKCnddiIi5qg6TUMLgFskfQsYnq0M20sb\nq2oGZSrJiIiJ1QmC/CWNiJjD6gw6d42kFwFLbP+zpN2AHZsvLSIi2lBn0Lk/oRoS4u/Lqn2BLzVZ\n1FyVZqqI6Ed17iN4K3AE8DCA7Y3A3k0WFRER7akTBL+w/fjwgqR5jDMsREREzD51guAaSf8N2FXS\nccAXgC83W9bsNTQ09LQmoNHLERH9pk4QnAFsBW4G3gJcAby3yaIiIqI9dXoNPSVpNXAtVZPQHbbT\nNBQRMUdMGgSSTgQ+RTWXgIDFkt5i+6tNFxcREc2rc0PZB4Gjbd8JIOlXga8ACYKIiDmgzjWCLcMh\nUNwFbGmonoiIaNm4ZwSSXlte3iLpCuASqmsEJwPXtVBbRES0YKKmod8b8fp+4HfL663A8xqrKCIi\nWjVuENh+U5uFREREb9TpNbQYeBuwaOTnZ8sw1L1S5yay0Z9ZuXJlU+VERIyrTq+hL1FNJ/ll4Klm\ny4mIiLbVCYKf2/5Y45VERERP1AmCj0paCXwD+MXwStvXN1ZVRES0pk4QvBT4I+AYftk05LIcERGz\nXJ0geA2w/8ihqOuQ9ELgc8CvUAXIKtsflbQncDHVxee7gdfbfnAq+46IiJlTJwhuBOYz9buJtwHv\nsn29pOcAGyRdCfwxsM72OZLOoBrd9N1T3PesNlmPouH304soItpQJwj2AW6XdB1Pv0YwYfdR25uB\nzeX1I5Juo5rmchnw8vKx1cDVdCwIIiL6SZ0gmPb/lkpaBBxMNZT1PiUksL1ZUqa9jIjooTrzEVwz\nnS+Q9Gzgn4B32H5YUt3tVgArABYuXDidEiIiYgKTjj4q6RFJD5fHzyU9KenhOjuX9CyqELjA9mVl\n9f2SFpT3FzDOtQfbq2wP2h4cGBio99NERMSUTRoEtp9j+7nlsQvw+8DHJ9tO1f/6nwfcZvtDI95a\nCywvr5cDa6ZedkREzJQ68xE8je0vUe8egiMo9x9I+nZ5vAo4BzhO0kbguLIcERE9UmfQudeOWNwB\nGKS6oWxCtv831dSWYzm2VnUREdG4Or2GRs5LsI3qJrBljVQTERGtq9NrKPMStKTO0NURETNtoqkq\nz5pgO9s+u4F6IiKiZROdETw6xrrdgdOA5wMJgoiIOWCiqSo/OPy6jBV0OvAm4CLgg+NtF83KOEQR\nMdMmvEZQRgr9c+BUqnGBDslIoRERc8tE1wj+BngtsAp4qe2ftVZVRES0ZqIbyt4FvAB4L3DfiGEm\nHqk7xERERPS/ia4RTPmu44iImH3yxz4iouMSBBERHZcgiIjouARBRETHJQjmiKGhoYxVFBHbJUEQ\nEdFxCYKIiI5LEEREdFyCICKi4xIEEREdlyCYg9J7KCKmIkEQEdFxCYKIiI5rLAgknS9pi6TvjFi3\np6QrJW0sz89r6vsjIqKeJs8IPgscP2rdGcA620uAdWU5IiJ6qLEgsP1N4CejVi+jmvKS8nxSU98f\nERH1tH2NYB/bmwHK897jfVDSCknrJa3funVrawX2m/QAioim9e3FYturbA/aHhwYGOh1ORERc1bb\nQXC/pAUA5XlLy98fERGjtB0Ea4Hl5fVyYE3L3z9rjW4iyrDTETFTmuw+eiHwf4EDJG2SdBpwDnCc\npI3AcWU5IiJ6aF5TO7b9hnHeOrap74yIiKnr24vFMfN61ZyUZqyI/pYgiIjouARBRETHJQhmuYma\nXdIcExF1JAgiIjouQRAR0XEJgoiIjksQRER0XIIgIqLjEgQRER2XIOiwkV1PZ7KraZ07idO1NaJ/\nJAgiIjouQRAR0XGNjT4a/atOs8zwZ1auXDnmckTMHTkjiIjouARBRETHpWmoA6bSQ2doaGjKzT/j\nbTOdnkGjt62z/+1pttqeJq80k8VckzOCiIiOSxBERHRcmoZiQlNtVqrz/sgmlfF6J423/cqVKxtv\n6poJTTcfjT4WaaaK6cgZQUREx/UkCCQdL+kOSXdKOqMXNURERKX1piFJOwKfAI4DNgHXSVpr+9a2\na4mpG9kUMZPNLjM91hE8swlqolrrNLHUabaqu22d4zadHk3b8z1T2Way/W1vz7PJeqBNp7ap7KMf\nmtzabNbsxRnBy4A7bd9l+3HgImBZD+qIiAh6EwT7AveOWN5U1kVERA/IdrtfKJ0MvNL2fy7LfwS8\nzPbbRn1uBbCiLB4A3DGNr90L+PE0tm9Tam3ObKo3tTZjNtUK06/3RbYHJvtQL7qPbgJeOGJ5P+C+\n0R+yvQpYNRNfKGm97cGZ2FfTUmtzZlO9qbUZs6lWaK/eXjQNXQcskbRY0k7AKcDaHtQRERH04IzA\n9jZJfwZ8HdgRON/2LW3XERERlZ7cWWz7CuCKFr9yRpqYWpJamzOb6k2tzZhNtUJL9bZ+sTgiIvpL\nhpiIiOi4OR0E/T6UhaQXSrpK0m2SbpF0elm/p6QrJW0sz8/rda3DJO0o6QZJl5flxZKuLbVeXDoA\n9Jyk+ZIulXR7Ob6H9+txlfTO8t//O5IulLRLPx1XSedL2iLpOyPWjXksVflY+Z27SdIhfVDr35R/\nBzdJ+qKk+SPeO7PUeoekV/a61hHv/YUkS9qrLDd6XOdsEIwYyuIE4MXAGyS9uLdVPcM24F22fwM4\nDHhrqfEMYJ3tJcC6stwvTgduG7H8AeDDpdYHgdN6UtUzfRT4mu1fBw6kqrnvjqukfYG3A4O2X0LV\ngeIU+uu4fhY4ftS68Y7lCcCS8lgBnNtSjcM+yzNrvRJ4ie3fAr4LnAlQftdOAX6zbPPJ8nejLZ/l\nmbUi6YVUQ/D8YMTqRo/rnA0CZsFQFrY3276+vH6E6o/VvlR1ri4fWw2c1JsKn07SfsCJwKfLsoBj\ngEvLR/qiVknPBY4CzgOw/bjth+jT40rVaWNXSfOA3YDN9NFxtf1N4CejVo93LJcBn3Pl34H5kha0\nU+nYtdr+hu1tZfHfqe5dGq71Itu/sP194E6qvxs9q7X4MPCXwMgLuI0e17kcBLNqKAtJi4CDgWuB\nfWxvhiosgL17V9nTfITqH+hTZfn5wEMjfsn65RjvD2wFPlOasT4taXf68Lja/iHwt1T/97cZ+Cmw\ngf48riONdyz7/ffuzcBXy+u+q1XSUuCHtm8c9Vajtc7lINAY6/qyi5SkZwP/BLzD9sO9rmcskl4N\nbLG9YeTqMT7aD8d4HnAIcK7tg4FH6YNmoLGUtvVlwGLgBcDuVM0Ao/XDca2jX/9NIOk9VM2xFwyv\nGuNjPatV0m7Ae4Czxnp7jHUzVutcDoJaQ1n0mqRnUYXABbYvK6vvHz7tK89belXfCEcASyXdTdXM\ndgzVGcL80qQB/XOMNwGbbF9bli+lCoZ+PK6vAL5ve6vtJ4DLgN+hP4/rSOMdy778vZO0HHg1cKp/\n2We+32r9Var/Ibix/J7tB1wv6VdouNa5HAR9P5RFaWM/D7jN9odGvLUWWF5eLwfWtF3baLbPtL2f\n7UVUx/JfbJ8KXAW8rnysX2r9EXCvpAPKqmOBW+nD40rVJHSYpN3Kv4fhWvvuuI4y3rFcC7yx9HI5\nDPjpcBNSr0g6Hng3sNT2YyPeWgucImlnSYupLsR+qxc1Ati+2fbetheV37NNwCHl33Ozx9X2nH0A\nr6LqJfA94D29rmeM+o6kOr27Cfh2ebyKqu19HbCxPO/Z61pH1f1y4PLyen+qX547gS8AO/e6vlLX\nQcD6cmy/BDyvX48rMATcDnwH+Dywcz8dV+BCqusXT1D9cTptvGNJ1YTxifI7dzNVb6he13onVfv6\n8O/Yp0Z8/j2l1juAE3pd66j37wb2auO45s7iiIiOm8tNQxERUUOCICKi4xIEEREdlyCIiOi4BEFE\nRMclCCIASVePHn1S0jskfXKCbX7WfGURzUsQRFQupLpRbqRTyvqIOS1BEFG5FHi1pJ3h/w8C+ALg\n25LWSbpe0s2SnjGCraSXq8zPUJY/LumPy+tDJV0jaYOkr7c5EmdEXQmCCMD2A1R38g6PD38KcDHw\nH8BrbB8CHA18sAwFMakyjtTfAa+zfShwPvA/Zrr2iOnqyeT1EX1quHloTXl+M9Wt/f9T0lFUw2/v\nC+wD/KjG/g4AXgJcWbJjR6ohBSL6SoIg4pe+BHyoTAO4q+3rSxPPAHCo7SfKqJC7jNpuG08/ux5+\nX8Attg9vtuyI6UnTUERh+2fA1VRNOMMXifegmofhCUlHAy8aY9N7gBeXUSz3oBpBFKqBzAYkHQ5V\nU5Gk32zyZ4jYHjkjiHi6C6nmBBjuQXQB8GVJ66lGrrx99Aa275V0CdVIpxuBG8r6xyW9DvhYCYh5\nVHM43NL4TxExBRl9NCKi49I0FBHRcQmCiIiOSxBERHRcgiAiouMSBBERHZcgiIjouARBRETHJQgi\nIjru/wH5DUKVviNpFAAAAABJRU5ErkJggg==\n", 66 | "text/plain": [ 67 | "" 68 | ] 69 | }, 70 | "metadata": {}, 71 | "output_type": "display_data" 72 | } 73 | ], 74 | "source": [ 75 | "bins = np.bincount(X[:, 0])\n", 76 | "plt.bar(range(len(bins)), bins, color='grey')\n", 77 | "plt.ylabel(\"Number of appearances\")\n", 78 | "plt.xlabel(\"Value\")\n", 79 | "plt.show()" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "ポアソン分布になっている\n" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 5, 92 | "metadata": {}, 93 | "outputs": [ 94 | { 95 | "name": "stdout", 96 | "output_type": "stream", 97 | "text": [ 98 | "Test score: 0.622\n" 99 | ] 100 | } 101 | ], 102 | "source": [ 103 | "from sklearn.linear_model import Ridge\n", 104 | "from sklearn.model_selection import train_test_split\n", 105 | "\n", 106 | "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)\n", 107 | "score = Ridge().fit(X_train, y_train).score(X_test, y_test)\n", 108 | "print(\"Test score: {:.3f}\".format(score))" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 6, 114 | "metadata": { 115 | "collapsed": true 116 | }, 117 | "outputs": [], 118 | "source": [ 119 | "# log0は定義されていない為+1\n", 120 | "X_train_log = np.log(X_train + 1)\n", 121 | "X_test_log = np.log(X_test + 1)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 9, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "data": { 131 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEKCAYAAAAfGVI8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFyFJREFUeJzt3X+UJWV95/H3BxBBRBFt2BHEwbMcdl2NiL0ExaMCEnE1\ngFkxsIkZDZtJzhpFk92AOYkj0V31bPwZE5NZNTtGAyKigz+i4gQw7rrIDIIIoxlEfsnItD9YUaM4\n8N0/qjq2szPd1T1d9053vV/n3HNv1b1V9b1nztxP1/NUPU+qCknScO017gIkSeNlEEjSwBkEkjRw\nBoEkDZxBIEkDZxBI0sAZBJI0cAaBJA2cQSBJA7fPuAvo4lGPelStXLly3GVI0pKyadOmb1fVxFyf\nWxJBsHLlSjZu3DjuMiRpSUlyW5fP2TQkSQNnEEjSwBkEkjRwBoEkDZxBIEkDZxBI0sAZBJI0cAaB\nJA2cQSBJA7ck7iyWxu2CCy6Y1+fXrFnTUyXS4uv1jCDJq5LcmOQrSS5Msl+SI5NcnWRLkg8m2bfP\nGiRJs+stCJIcBrwCmKyqJwB7A2cBbwLeWlVHAd8DzumrBknS3PruI9gH2D/JPsBDgK3AScAl7fvr\ngDN6rkGSNIvegqCqvgn8KXA7TQD8X2ATcE9VbW8/didwWF81SJLm1mfT0COA04EjgUcDBwDP3clH\naxfbr06yMcnGqampvsqUpMHrs2no2cA3qmqqqn4KXAo8DTiobSoCOBy4a2cbV9XaqpqsqsmJiTnn\nVZAkLVCfQXA7cHyShyQJcDJwE3AF8ML2M6uA9T3WIEmaQ599BFfTdApfC9zQHmstcB7we0luBh4J\nvKevGiRJc+v1hrKqWgPseGfNLcBxfR5XktSdQ0xI0sAZBJI0cAaBJA2cQSBJA2cQSNLAGQSSNHAG\ngSQNnEEgSQNnEEjSwBkEkjRwBoEkDZxBIEkDZxBI0sAZBJI0cAaBJA2cQSBJA9fn5PVHJ7luxuP7\nSV6Z5OAklyfZ0j4/oq8aJElz63Oqyq9V1TFVdQzwFOBHwEeA84ENVXUUsKFdliSNyaiahk4Gvl5V\ntwGnA+va9euAM0ZUgyRpJ0YVBGcBF7avD62qrQDt8yEjqkGStBO9B0GSfYHTgA/Nc7vVSTYm2Tg1\nNdVPcZKkkZwRPBe4tqrubpfvTrICoH3etrONqmptVU1W1eTExMQIypSkYRpFEJzNz5qFAC4DVrWv\nVwHrR1CDJGkXeg2CJA8BTgEunbH6jcApSba0772xzxokSbPbp8+dV9WPgEfusO47NFcRSZL2AN5Z\nLEkDZxBI0sAZBJI0cL32EUijcsEFF8zr82vWrOmpEmnp8YxAkgZuziBIcmaSA9vXf5Tk0iTH9l+a\nJGkUupwR/HFV3Zvk6cBzaAaKe1e/ZUmSRqVLENzfPj8PeFdVrQf27a8kSdIodeks/maSvwKeDbwp\nyYOxb0EaOzvItVi6/KC/CPg0cGpV3QMcDPyXXquSJI3MnEHQDhOxDXh6u2o7sKXPoiRJo9PlqqE1\nwHnAq9tVDwLe32dRkqTR6dI09AKaiWV+CFBVdwEH9lmUJGl0ugTBfVVVQAEkOaDfkiRJo9QlCC5u\nrxo6KMlvAZ8F/ke/ZUmSRmXOy0er6k+TnAJ8HzgaeE1VXd57ZZKkkZgzCJIcCfzD9I9/kv2TrKyq\nW/suTpLUvy5NQx8CHpixfH+7bk5JDkpySZKvJtmc5KlJDk5yeZIt7fMjFlK4JGlxdAmCfarqvumF\n9nXXISbeDnyqqv4V8CRgM3A+sKGqjgI2tMuSpDHpEgRTSU6bXkhyOvDtuTZK8jDgGcB7oAmQ9s7k\n02kGrqN9PmO+RUuSFk+XsYZ+B/hAkncCAe4AfqPDdo8DpoC/TvIkYBNwLnBoVW0FqKqtSQ7Z2cZJ\nVgOrAY444ogOh5OWrvmOGyQtpi5DTHy9qo4HHg88vqqeVlU3d9j3PsCxNCOWPpnmhrTOzUBVtbaq\nJqtqcmJioutmkqR56nLV0IOBfw+sBPZJAkBV/ckcm94J3FlVV7fLl9AEwd1JVrRnAytoxjGSJI1J\nlz6C9TTt+ttp/qqffsyqqr4F3JHk6HbVycBNwGXAqnbdqnb/kqQx6dJHcHhVnbrA/b+cpn9hX+AW\n4KU04XNxknOA24EzF7hvSdIi6BIE/zvJE6vqhvnuvKquAyZ38tbJ892XJKkfXYLg6cBLknwD+AnN\nlUNVVb/Qa2WSpJHoEgTP7b0KSdLYdBl07jaA9nr//XqvSJI0Ul1mKDstyRbgG8BVwK3A3/VclyRp\nRLo0Db0OOB74bFU9OcmJwNn9lqXdMd+7VNesWdNTJZKWgi73Efy0qr4D7JVkr6q6Ajim57okSSPS\n5YzgniQPBT5Hc0/ANpqbyyRJy0CXM4LTgR8BrwI+BXwd+OU+i5Ikjc6sZwRJ9gbWV9WzaSanWTfb\n5yVJS8+sZwRVdT/woyQPH1E9kqQR69JH8GPghiSXM2Owuap6RW9VSZJGpksQfKJ9SJKWoS53Ftsv\nIEnLWJeJaY4C3kAzQ9k/DzFRVY/rsS5J0oh0uXz0r4F30dw7cCLwPuBv+ixKkjQ6XYJg/6raAKSq\nbquq1wIn9VuWJGlUOl01lGQvYEuS3wW+CRzSZedJbgXuBe4HtlfVZJKDgQ/SzIF8K/Ciqvre/EuX\nJC2GLmcErwQeArwCeArw6/xszuEuTqyqY6pqeqay84ENVXUUsKFdliSNSZerhq4BSFJV9dJFOObp\nwLPa1+uAK4HzFmG/kqQF6DIfwVOT3ARsbpeflOQvOu6/gM8k2ZRkdbvu0KraCtA+d2pmkiT1o0sf\nwduA5wCXAVTV9Ume0XH/J1TVXe3sZpcn+WrXwtrgWA1wxBFHdN1M2iPMd04IaZy69BFQVXfssOr+\njtvd1T5vAz4CHAfcnWQFQPu8bRfbrq2qyaqanJiY6HI4SdICdAmCO5I8Dagk+yb5z7TNRLNJckCS\nA6dfA78EfIXmzGK6s3kVsH5BlUuSFkWXpqHfAd4OHEZz6eingZd12O5Q4CNJpo/zt1X1qSTXABcn\nOQe4HThzIYVLkhZHl6uGvg382nx3XFW3AE/ayfrvACfPd3+SpH50GWvocTRnBMfTXAX0BeBV7Q+9\npCVivh3Ya9as6akS7Wm69BH8LXAxsAJ4NPAh4MI+i5IkjU6XIEhV/U1VbW8f76c5M5AkLQNdOouv\nSHI+cBFNAPwq8Il2zCCq6rs91idJ6lmXIPjV9vm3d1j/mzTB4LwEkrSEdblq6MhRFCJJGo8uVw3t\nB/wn4Ok0ZwD/APxlVf2459okSSPQpWnofTRzCvxZu3w2zQxl3ggmSctAlyA4uqpm3hh2RZLr+ypI\nkjRaXS4f/VKS46cXkvwi8L/6K0mSNEpdzgh+EfiNJLe3y0cAm5PcAFRV/UJv1WmwHMZZGp0uQXBq\n71VIksamy+WjtwG0k8vsN2P97bvcSJK0ZHSZqvK0JFuAbwBXAbcCf9dzXZKkEenSWfw6mpFH/7G9\nuexk7CyWpGWjSxD8tJ1DYK8ke1XVFcAxPdclSRqRLp3F9yR5KPA54ANJtgHbux4gyd7ARuCbVfX8\nJEfSDGB3MHAt8OKqum/+pUuSFkOXIDgd+CfgVTQzlT0c+JN5HONcmjmOH9Yuvwl4a1VdlOQvgXOA\nd81jf1pivBRU2rPN2TRUVT+sqgfauQjWVdU72qaiOSU5HHge8O52OcBJwCXtR9YBZyysdEnSYujS\nR7A73gb8AfBAu/xI4J6qmm5auhM4rOcaJEmz6C0Ikjwf2FZVm2au3slHdzrbWZLVSTYm2Tg1NdVL\njZKkWYIgyYb2+U0L3PcJwGlJbqXpHD6J5gzhoCTTfROHA3ftbOOqWltVk1U1OTExscASJElzme2M\nYEWSZ9L8mD85ybEzH3PtuKpeXVWHV9VK4Czg76vq14ArgBe2H1sFrN/N7yBJ2g2zXTX0GuB8mr/a\n37LDe0XzF/5CnAdclOT1wJeA9yxwP5KkRbDLIKiqS4BLkvxxVb1udw5SVVcCV7avbwGO2539SZIW\nT5dB516X5DTgGe2qK6vq4/2WJUkalS6Dzr2B5qawm9rHue06SdIy0OXO4ucBx1TVAwBJ1tG07b+6\nz8K05/JOYWl56XofwUEzXj+8j0IkSePR5YzgDTTzFl9Bc0PYM/BsQJKWjS6dxRcmuRL4tzRBcF5V\nfavvwiRJo9HljICq2gpc1nMtkqQx6HvQOUnSHs4gkKSBmzUIkuyV5CujKkaSNHqzBkF778D1SY4Y\nUT2SpBHr0lm8ArgxyReBH06vrKrTeqtKkjQyXYLA20glaRnrch/BVUkeCxxVVZ9N8hBg7/5LkySN\nQpdB536LZrL5v2pXHQZ8tM+iJEmj06Vp6GU08wdcDVBVW5Ic0mtVksZuvoMLrlmzpqdK1Lcu9xH8\npKrum15o5xve6YTzMyXZL8kXk1yf5MYkF7Trj0xydZItST6YZN+Fly9J2l1dguCqJH8I7J/kFOBD\nwMc6bPcT4KSqehJwDHBqkuOBNwFvraqjgO8B5yysdEnSYujSNHQ+zY/1DcBvA58E3j3XRlVVwA/a\nxQe1j+m5jv9Du34d8FrgXfMpeqnzlFvSnqTLVUMPtJPRXE3zQ/619kd+Tkn2BjYB/xL4c+DrwD1V\ntb39yJ00nc+SpDHpctXQ82h+wN8BvBO4Oclzu+y8qu6vqmOAw2k6nP/1zj62i+OuTrIxycapqaku\nh5MkLUCXPoI3AydW1bOq6pnAicBb53OQqroHuBI4Hjio7XCGJiDu2sU2a6tqsqomJyYm5nM4SdI8\ndAmCbVV184zlW4Btc22UZCLJQe3r/YFnA5uBK4AXth9bBayfV8WSpEW1yz6CJL/SvrwxySeBi2ma\ncc4Erumw7xXAurafYC/g4qr6eJKbgIuSvB74EvCe3fkC0kLMt8NeWs5m6yz+5Rmv7wae2b6eAh4x\n146r6svAk3ey/haa/gJJ0h5gl0FQVS8dZSGSpPGY8/LRJEcCLwdWzvy8w1BL0vLQ5Yayj9K0438M\neKDfciRJo9YlCH5cVe/ovRJJ0lh0CYK3J1kDfIZm/CAAqura3qqSJI1MlyB4IvBimjGCppuGpscM\nkiQtcV2C4AXA42YORS1JWj663Fl8PXBQ34VIksajyxnBocBXk1zDz/cRePmoJC0DXYLAwfAlaRnr\nMh/BVaMoRJI0Hl3uLL6Xn80ZsC/NTGM/rKqH9VmYJGk0upwRHDhzOckZOGicJC0bXfoIfk5VfTTJ\n+X0UI2npci7upatL09CvzFjcC5hkF9NLSpKWni5nBDPnJdgO3Aqc3ks1kqSR69JH4LwEkrSMzTZV\n5Wtm2a6q6nWz7TjJY4D3Af+CZoyitVX19iQHAx+kmd/gVuBFVfW9edYtSVoks50R/HAn6w4AzgEe\nCcwaBDTNSL9fVdcmORDYlORy4CXAhqp6Y9vpfD5w3rwrl7SkLWTeaDuY+zHbVJVvnn7d/pCfC7wU\nuAh48662m7H9VmBr+/reJJuBw2j6F57VfmwdcCUGgSSNzayDziU5OMnrgS/ThMaxVXVeVW2bz0GS\nrKSZyP5q4NA2JKbD4pBdbLM6ycYkG6empuZzOEnSPOwyCJL8d+Aa4F7giVX12oW05Sd5KPBh4JVV\n9f2u21XV2qqarKrJiYmJ+R5WktTRbGcEvw88Gvgj4K4k328f9ybp9IOe5EE0IfCBqrq0XX13khXt\n+yuAeZ1dSJIW12x9BF3mKtilJKGZ9H5zVb1lxluXAauAN7bP63fnOHPxbkdp+fD/cz/mPcTEPJxA\nM8XlDUmua9f9IU0AXJzkHOB24Mwea5AkzaG3IKiqzwPZxdsn93VcSdL87FbzjyRp6TMIJGngDAJJ\nGjiDQJIGziCQpIEzCCRp4AwCSRo4g0CSBs4gkKSBMwgkaeAMAkkaOINAkgbOIJCkgTMIJGngDAJJ\nGjiDQJIGrrcgSPLeJNuSfGXGuoOTXJ5kS/v8iL6OL0nqps8zgv8JnLrDuvOBDVV1FLChXZYkjVFv\nQVBVnwO+u8Pq04F17et1wBl9HV+S1M2o+wgOraqtAO3zISM+viRpB3tsZ3GS1Uk2Jtk4NTU17nIk\nadkadRDcnWQFQPu8bVcfrKq1VTVZVZMTExMjK1CShmbUQXAZsKp9vQpYP+LjS5J20OfloxcCXwCO\nTnJnknOANwKnJNkCnNIuS5LGaJ++dlxVZ+/irZP7OqYkaf56CwJJWu4uuOCCeX1+zZo1PVWye/bY\nq4YkSaPhGYGkZWu+f7EPlWcEkjRwBoEkDZxBIEkDZxBI0sAZBJI0cAaBJA2cQSBJA+d9BJI0Invq\nncieEUjSwBkEkjRwBoEkDZxBIEkDZxBI0sCNJQiSnJrka0luTnL+OGqQJDVGHgRJ9gb+HHgu8Hjg\n7CSPH3UdkqTGOM4IjgNurqpbquo+4CLg9DHUIUliPEFwGHDHjOU723WSpDFIVY32gMmZwHOq6j+2\nyy8Gjquql+/wudXA6nbxaOBrCzzko4BvL3Db5WDI39/vPlxD/v4zv/tjq2pirg3GMcTEncBjZiwf\nDty144eqai2wdncPlmRjVU3u7n6WqiF/f7/7ML87DPv7L+S7j6Np6BrgqCRHJtkXOAu4bAx1SJIY\nwxlBVW1P8rvAp4G9gfdW1Y2jrkOS1BjL6KNV9UngkyM63G43Ly1xQ/7+fvfhGvL3n/d3H3lnsSRp\nz+IQE5I0cMs6CIY6lEWS9ybZluQr465lHJI8JskVSTYnuTHJueOuaVSS7Jfki0mub7/7/GZCWQaS\n7J3kS0k+Pu5aRi3JrUluSHJdko2dt1uuTUPtUBb/CJxCc8nqNcDZVXXTWAsbgSTPAH4AvK+qnjDu\nekYtyQpgRVVdm+RAYBNwxkD+7QMcUFU/SPIg4PPAuVX1f8Zc2sgk+T1gEnhYVT1/3PWMUpJbgcmq\nmtc9FMv5jGCwQ1lU1eeA7467jnGpqq1VdW37+l5gMwO5e70aP2gXH9Q+ludfezuR5HDgecC7x13L\nUrKcg8ChLESSlcCTgavHW8notE0j1wHbgMurajDfHXgb8AfAA+MuZEwK+EySTe3oDJ0s5yDITtYN\n5i8jQZKHAh8GXllV3x93PaNSVfdX1TE0d+0fl2QQzYNJng9sq6pN465ljE6oqmNpRnd+WdtMPKfl\nHASdhrLQ8tS2j38Y+EBVXTruesahqu4BrgROHXMpo3ICcFrbTn4RcFKS94+3pNGqqrva523AR2ia\nyOe0nIPAoSwGqu0wfQ+wuareMu56RinJRJKD2tf7A88Gvjreqkajql5dVYdX1Uqa/+9/X1W/Puay\nRibJAe3FESQ5APgloNOVg8s2CKpqOzA9lMVm4OKhDGWR5ELgC8DRSe5Mcs64axqxE4AX0/xFeF37\n+HfjLmpEVgBXJPkyzR9Dl1fV4C6jHKhDgc8nuR74IvCJqvpUlw2X7eWjkqRulu0ZgSSpG4NAkgbO\nIJCkgTMIJGngDAJJGjiDQAKSXJnkOTuse2WSv5hlmx/s6j1pKTEIpMaFNDchzXRWu15a1gwCqXEJ\n8PwkD4Z/Hqzu0cB1STYkubYd5/3/G8E2ybNmjn2f5J1JXtK+fkqSq9pBwD7dDpEt7VEMAgmoqu/Q\n3I05PS7PWcAHgX8CXtAO5HUi8OZ2CIs5teMd/Rnwwqp6CvBe4L8udu3S7hrL5PXSHmq6eWh9+/yb\nNKPY/rd2FMcHaIYyPxT4Vof9HQ08Abi8zY69ga2LX7a0ewwC6Wc+CrwlybHA/u0MZy8BJoCnVNVP\n25Et99thu+38/Nn19PsBbqyqp/ZbtrR7bBqSWu3MXlfSNOFMdxI/nGaM+58mORF47E42vQ14fJIH\nJ3k4cHK7/mvARJKnQtNUlOTf9PkdpIXwjED6eRcCl/KzK4g+AHysnQj8OnYypHNV3ZHkYuDLwBbg\nS+36+5K8EHhHGxD70MygNYhRcLV0OPqoJA2cTUOSNHAGgSQNnEEgSQNnEEjSwBkEkjRwBoEkDZxB\nIEkDZxBI0sD9P6IniVbBhFG8AAAAAElFTkSuQmCC\n", 132 | "text/plain": [ 133 | "" 134 | ] 135 | }, 136 | "metadata": {}, 137 | "output_type": "display_data" 138 | } 139 | ], 140 | "source": [ 141 | "plt.hist(X_train_log[:, 0], bins=25, color='gray')\n", 142 | "plt.ylabel(\"Number of appearances\")\n", 143 | "plt.xlabel(\"Value\")\n", 144 | "plt.show()" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "非対称性が少なく,非常に大きい外れ値はなくなっている(ガウス分布(正規分布)に近くなっている)" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 10, 157 | "metadata": {}, 158 | "outputs": [ 159 | { 160 | "name": "stdout", 161 | "output_type": "stream", 162 | "text": [ 163 | "Test score: 0.875\n" 164 | ] 165 | } 166 | ], 167 | "source": [ 168 | "score = Ridge().fit(X_train_log, y_train).score(X_test_log, y_test)\n", 169 | "print(\"Test score: {:.3f}\".format(score))" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "この種の変換は決定木ベースのモデルには関係が無い,線形モデルには強く効く.また回帰モデルの場合はターゲット変数yも変換した方が良い場合がある.\n", 177 | "\n", 178 | "ビニング,多項式,交互作用はあるデータセットに対するモデルの性能に大きな影響を与える." 179 | ] 180 | } 181 | ], 182 | "metadata": { 183 | "kernelspec": { 184 | "display_name": "Python 3", 185 | "language": "python", 186 | "name": "python3" 187 | }, 188 | "language_info": { 189 | "codemirror_mode": { 190 | "name": "ipython", 191 | "version": 3 192 | }, 193 | "file_extension": ".py", 194 | "mimetype": "text/x-python", 195 | "name": "python", 196 | "nbconvert_exporter": "python", 197 | "pygments_lexer": "ipython3", 198 | "version": "3.5.4" 199 | } 200 | }, 201 | "nbformat": 4, 202 | "nbformat_minor": 2 203 | } 204 | -------------------------------------------------------------------------------- /ch05/README.md: -------------------------------------------------------------------------------- 1 | # モデルの評価と改良 2 | 3 | ### [交差検証](https://github.com/kajyuuen/IntroductionToMachineLearningWithPython/blob/master/ch05/cross-validation.ipynb) 4 | 5 | 交差検証手法 6 | 7 | - k分割交差検証 8 | - 層化k分割交差検証 9 | - シャッフル分割交差検証 10 | - グループ付き交差検証 11 | 12 | ### [グリッドサーチ](https://github.com/kajyuuen/IntroductionToMachineLearningWithPython/blob/master/ch05/grid-search.ipynb) 13 | 14 | パラメータのチューニング方法としてグリッドサーチが存在する.またその際パラメータの過剰適合を防ぐ為,データを以下のように分ける 15 | 16 | - 訓練セット(train): モデルを構築する 17 | - 検証セット(valid): モデルのパラメータを選択する 18 | - テストセット(test): 選択したパラメータの性能を評価する 19 | 20 | ### [評価基準とスコア](https://github.com/kajyuuen/IntroductionToMachineLearningWithPython/blob/master/ch05/evaluation-metrics-and-scoring.ipynb) 21 | 22 | #### 2クラス分類 23 | 24 | 2つのクラスを陽性,陰性と呼び,探している物を陽性と呼ぶ. 25 | 26 | - 偽陽性(FP): 陰性のものを陽性と間違えること 27 | - 偽陰性(FN): 陽性のものを陰性と間違えること 28 | - 真陽性(TP): 陽性クラスで正しく分類されたサンプル 29 | - 真陰性(TN): 陰性クラスで正しく分類されたサンプル 30 | 31 | #### 評価方法 32 | 33 | - 精度(Accuracy) 34 | - 適合率(Precision) 35 | - 再現率(Recall) 36 | - F値 37 | -------------------------------------------------------------------------------- /ch06/README.md: -------------------------------------------------------------------------------- 1 | # アルゴリズムチェーンとパイプライン 2 | 3 | Pipelineクラスを用いることでデータ変換とモデル実行のチェーンの構築を簡単に行える. 4 | 5 | ### [前処理を行う際のパラメータ選択](https://github.com/kajyuuen/IntroductionToMachineLearningWithPython/blob/master/ch06/parameter_selection_with_preprocessing%20.ipynb) 6 | 7 | モデルの構築過程において,スケール変換や特徴量抽出を行う際に交差検定の為のデータの分割を怠ると,訓練データセットの情報がテストデータにリークしてしまう. 8 | 9 | ### [パイプラインの構築](https://github.com/kajyuuen/IntroductionToMachineLearningWithPython/blob/master/ch06/building_pipelines.ipynb) 10 | 11 | パイプラインを用いると「前処理+クラス分類」といった一連のプロセスに必要なコードを減らすことが出来る. 12 | 13 | ### [パイプラインを用いたグリッドサーチ](https://github.com/kajyuuen/IntroductionToMachineLearningWithPython/blob/master/ch06/grid-searching-preprocessing-steps-and-model-parameters.ipynb) 14 | 15 | グリッドサーチもパイプラインで扱うことが出来る. 16 | 17 | ### [汎用パイプラインインターフェース](https://github.com/kajyuuen/IntroductionToMachineLearningWithPython/blob/master/ch06/the-general-pipeline-interface.ipynb) 18 | 19 | Pipelineクラスは任意個数のEstimatorを連結することができる.最後以外のステップでは次のステップで使うデータの新しい表現を生成するためにtransformメソッドが定義しなければならないという制約がある. 20 | 21 | ### [前処理ステップとモデルパラメータに対するグリッドサーチ](https://github.com/kajyuuen/IntroductionToMachineLearningWithPython/blob/master/ch06/grid-searching-preprocessing-steps-and-model-parameters.ipynb) 22 | 23 | パイプラインを使うと,機械学習ワークフローの全てのステップを一つのscikit-learn Estimatorにカプセル化することが出来る.そのメリットの一つは,回帰やクラス分類などの教師ありタスクの結果を使った前処理のパラメータの調整を行えるということである. 24 | 25 | ### [グリッドサーチによるモデルの選択](https://github.com/kajyuuen/IntroductionToMachineLearningWithPython/blob/master/ch06/selection-of-model-by-grid-searching.ipynb) 26 | 27 | GridSearchCVとPipelineの組み合わせでは,実際に行われるステップに対してもサーチすることができる 28 | . 29 | -------------------------------------------------------------------------------- /ch06/algorithm_chains_and_pipelines.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# アルゴリズムチェーンとパイプライン" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "多くの機械学習アルゴリズムではデータ表現は非常に重要。データ表現は、データのスケール変換から、手で特徴量を組み合わせたり、教師なし学習で特徴量を生成など多岐にわたる。その処理をパイプラインなら繋げれる。 \n", 15 | "下記の例はスケール変換後のsvmの復習。" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 4, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "from sklearn.svm import SVC\n", 25 | "from sklearn.datasets import load_breast_cancer\n", 26 | "from sklearn.model_selection import train_test_split\n", 27 | "from sklearn.preprocessing import MinMaxScaler" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 5, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "cancer = load_breast_cancer()\n", 37 | "X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, random_state=0)\n", 38 | "\n", 39 | "scaler = MinMaxScaler().fit(X_train)" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 6, 45 | "metadata": {}, 46 | "outputs": [ 47 | { 48 | "name": "stdout", 49 | "output_type": "stream", 50 | "text": [ 51 | "Test score: 0.95\n" 52 | ] 53 | } 54 | ], 55 | "source": [ 56 | "X_train_scaled = scaler.transform(X_train)\n", 57 | "\n", 58 | "svm = SVC()\n", 59 | "svm.fit(X_train_scaled, y_train)\n", 60 | "\n", 61 | "X_test_scaled = scaler.transform(X_test)\n", 62 | "print(\"Test score: {:.2f}\".format(svm.score(X_test_scaled, y_test)))" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [] 71 | } 72 | ], 73 | "metadata": { 74 | "kernelspec": { 75 | "display_name": "Python 3", 76 | "language": "python", 77 | "name": "python3" 78 | }, 79 | "language_info": { 80 | "codemirror_mode": { 81 | "name": "ipython", 82 | "version": 3 83 | }, 84 | "file_extension": ".py", 85 | "mimetype": "text/x-python", 86 | "name": "python", 87 | "nbconvert_exporter": "python", 88 | "pygments_lexer": "ipython3", 89 | "version": "3.5.4" 90 | } 91 | }, 92 | "nbformat": 4, 93 | "nbformat_minor": 2 94 | } 95 | -------------------------------------------------------------------------------- /ch06/building_pipelines.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# パイプライン構築" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Pipelineクラスを用いてMinMaxScalerによるスケール変換を行ってからSVMを訓練するワークフローを構築する." 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "from sklearn.svm import SVC\n", 24 | "from sklearn.datasets import load_breast_cancer\n", 25 | "from sklearn.model_selection import train_test_split\n", 26 | "from sklearn.preprocessing import MinMaxScaler\n", 27 | "import mglearn\n", 28 | "import matplotlib.pyplot as plt" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 5, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "cancer = load_breast_cancer()\n", 38 | "X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, random_state=0)\n", 39 | "\n", 40 | "scaler = MinMaxScaler().fit(X_train)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "2つのステップを作る。1つ目はscalerという名前でMinMaxScaler、2つ目はsvmという名前でSVCインスタンス。 \n" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 6, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "from sklearn.pipeline import Pipeline\n", 57 | "pipe = Pipeline([(\"scaler\", MinMaxScaler()), (\"svm\", SVC())])" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 7, 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "data": { 67 | "text/plain": [ 68 | "Pipeline(memory=None,\n", 69 | " steps=[('scaler', MinMaxScaler(copy=True, feature_range=(0, 1))), ('svm', SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", 70 | " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", 71 | " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", 72 | " tol=0.001, verbose=False))])" 73 | ] 74 | }, 75 | "execution_count": 7, 76 | "metadata": {}, 77 | "output_type": "execute_result" 78 | } 79 | ], 80 | "source": [ 81 | "pipe.fit(X_train, y_train)" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 8, 87 | "metadata": {}, 88 | "outputs": [ 89 | { 90 | "name": "stdout", 91 | "output_type": "stream", 92 | "text": [ 93 | "Test score: 0.95\n" 94 | ] 95 | } 96 | ], 97 | "source": [ 98 | "print(\"Test score: {:.2f}\".format(pipe.score(X_test, y_test)))" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "パイプラインのscoreメソッドを用いると,まずテストデータをscalerで変換し,SVMのscoreメソッドを変換されたデータで呼び出す." 106 | ] 107 | } 108 | ], 109 | "metadata": { 110 | "kernelspec": { 111 | "display_name": "Python 3", 112 | "language": "python", 113 | "name": "python3" 114 | }, 115 | "language_info": { 116 | "codemirror_mode": { 117 | "name": "ipython", 118 | "version": 3 119 | }, 120 | "file_extension": ".py", 121 | "mimetype": "text/x-python", 122 | "name": "python", 123 | "nbconvert_exporter": "python", 124 | "pygments_lexer": "ipython3", 125 | "version": "3.5.4" 126 | } 127 | }, 128 | "nbformat": 4, 129 | "nbformat_minor": 2 130 | } 131 | -------------------------------------------------------------------------------- /ch06/grid-searching-preprocessing-steps-and-model-parameters.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 前処理ステップとモデルパラメータに対するグリッドサーチ" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "パイプラインを使うと,機械学習ワークフローの全てのステップを一つのscikit-learn Estimatorにカプセル化することが出来る.このことのメリットの一つが,回帰やクラス分類などの教師ありタスクの結果を使った`前処理のパラメータの調整`を行うことができることである." 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 8, 20 | "metadata": { 21 | "collapsed": true 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "from sklearn.svm import SVC\n", 26 | "from sklearn.datasets import load_breast_cancer\n", 27 | "from sklearn.model_selection import train_test_split\n", 28 | "from sklearn.preprocessing import MinMaxScaler\n", 29 | "import mglearn\n", 30 | "import matplotlib.pyplot as plt\n", 31 | "from sklearn.pipeline import Pipeline\n", 32 | "from sklearn.model_selection import GridSearchCV\n", 33 | "from sklearn.pipeline import make_pipeline\n", 34 | "from sklearn.preprocessing import StandardScaler\n", 35 | "from sklearn.linear_model import Ridge" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 9, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "from sklearn.datasets import load_boston\n", 45 | "from sklearn.preprocessing import PolynomialFeatures\n", 46 | "\n", 47 | "boston = load_boston()\n", 48 | "X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=0)\n", 49 | "\n", 50 | "pipe = make_pipeline(\n", 51 | " StandardScaler(),\n", 52 | " PolynomialFeatures(),\n", 53 | " Ridge())" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 14, 59 | "metadata": { 60 | "collapsed": true 61 | }, 62 | "outputs": [], 63 | "source": [ 64 | "# 多項式の次数を決める\n", 65 | "param_grid = {\n", 66 | " 'polynomialfeatures__degree': [1, 2, 3],\n", 67 | " 'ridge__alpha': [0.001, 0.01, 0.1, 1, 10, 100]\n", 68 | "}" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 11, 74 | "metadata": {}, 75 | "outputs": [ 76 | { 77 | "data": { 78 | "text/plain": [ 79 | "GridSearchCV(cv=5, error_score='raise',\n", 80 | " estimator=Pipeline(memory=None,\n", 81 | " steps=[('standardscaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('polynomialfeatures', PolynomialFeatures(degree=2, include_bias=True, interaction_only=False)), ('ridge', Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,\n", 82 | " normalize=False, random_state=None, solver='auto', tol=0.001))]),\n", 83 | " fit_params=None, iid=True, n_jobs=-1,\n", 84 | " param_grid={'polynomialfeatures__degree': [1, 2, 3], 'ridge__alpha': [0.001, 0.01, 0.1, 1, 10, 100]},\n", 85 | " pre_dispatch='2*n_jobs', refit=True, return_train_score=True,\n", 86 | " scoring=None, verbose=0)" 87 | ] 88 | }, 89 | "execution_count": 11, 90 | "metadata": {}, 91 | "output_type": "execute_result" 92 | } 93 | ], 94 | "source": [ 95 | "grid = GridSearchCV(pipe, param_grid=param_grid, cv=5, n_jobs=-1)\n", 96 | "grid.fit(X_train, y_train)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 15, 102 | "metadata": {}, 103 | "outputs": [ 104 | { 105 | "data": { 106 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAADZCAYAAAAjQYsjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xd4VGX2wPHvSSehJpTQOyhKCx0FQVQEQewdGxbUtcvP\nunZQsKDuCuq69rq2pejaCzakFwXpJUASSCAkpGfm/P6YIYUkZC7OZFLO53nyZO697505JzM58857\n77xXVBVjjDG1X0iwAzDGGFM1rOAbY0wdYQXfGGPqCCv4xhhTR1jBN8aYOsIKvjHG1BFW8I0xpo6w\ngm+MMXWEFXxjjKkjwoIdQEkREqlRxAQ7DGPKKGxWu1+X9ZtmBzuEgMp3hwY7hIDav25Pqqo2q6xd\ntSr4UcQwSEYFOwxjykg9Z2iwQwioIVctDXYIAZWY1STYIQTU/OH/3OZLOxvSMcaYOsIKvjHG1BFW\n8I0xpo6wgm+MMXWEFXxjjKkjrOAbY0wdYQXfGGPqCCv4xhhTR1jBN8aYOsIKvjHG1BFW8I0xpo6w\ngm+MMXWEFXxjjKkjHBV8EaknIt0DFYwxxpjA8bngi8h4YAXwuXe5j4jMDVRgxhhj/MtJD/9BYCCQ\nDqCqK4AO/g/JGGNMIDgp+IWquj9gkRhjjAkoJ1e8+l1ELgJCRaQrcBPwS2DCMsYY429Oevg3AscA\necA7wH7glkAEZYwxxv987uGrajZwr4hMU9WsAMZkjDEmAJycpTNURNYAa73LvUVkVsAiM8YY41dO\nhnRmAqOBNABVXQkMD0RQxhhj/M/JQVtUNVFESq5y+Tcc59p2b8Udr9xAl4SOvHrfu3z41LyibWfd\nchpjJo1CVdm6ejtPXDmLgrwCJtxwKmfefBqtu8RzdrMryUjLLPe+m7Vtyu3/mkyztnGowr2nTSNl\n2x6f9w+Ubv0789yv05h6wUx+/GghAFc9fjEDxyYA8PajH/HDf8oeTz/71nGMmTQKV6GL/XsyeHLS\nLHZvT6Vz7w7cNOtqohvWw+1y8860j8vdvyoNOb0/lz98AepWXIUuZt36Gn/8/GeZdl0TOjHl1RuI\nqBfBov8tY9bNrwLQuXcHbp59NRFREbgKXTx3w8usW7yxqtMo19Ae7Zly7ghCJIT//vI7r365uNT2\nS05M4MzjjqXQ7WZfZg4PvfUlSXs9r7GbzxzGsGM6IiHw29rtzPjg+yBkcHjbf9nFT08uQV3K0Wd0\nIeGKY8q02fjlNpa8tApEiOvamJOnHU9m0gE+v+NH1K24C930PL8bx5zTLQgZHF5Ck6O5utPZhEgI\nXyX/yoc7viq1fVTzQVzRaQJpeZ6TGj/dtYAvU34F4LIOpzMg1vP3eG/7F/yUuqxKY3dS8BNFZCig\nIhKB5yydtYfbQUSigAVApPexPlTVB4402PJk7j3A8ze/wnFnDCy1Pq5VLGfcOJarjrmV/Nx87nvv\nVkZecBxfvv49v//8JwvnL+XJ7x487H3f+frfeGfaxyz7ehVRMVGo2w3g8/6BEBISwlWPX8LSL1YU\nrRs4NoEufTsxue8UIiLDeer7h1j8v+VkZ+aU2nfj8i3cMOBO8nLyGTf5FK6ePpGpF84kNzuPGZf9\ng50bk4lr2YTnl0xnyRcryNqfXdXpFVn+ze/8OvcOADr2bMd979/GpB5lzxG4adbVzLz2RdYuXM/U\nT+9hwKl9WPz5Cq6efglvPvwBiz9fwcAxfbl6+iXcceKDVZxFWSEi3HX+iVz33MekpGfy9p0X8cOq\nTWxO3lvU5s8du7n48XfILSjk3GG9uPnMYdz178/o3aklfTq14rypbwLw6u3n0a9rG5Zu2BGsdMpw\nu9z8+Phixs86kZgW0Xw08XM6nNCG2E6Nitqkb89g+Wt/cOYrpxDZMJLsvbkARDetx1mvnkJoRCgF\n2QW8f96ndDihDTHNooOVThkhCJM7n8vff3+etLx0nu4zhd/2riYxO7lUux/3LOfFTR+UWte/yTF0\nrt+Wm5ZNJzwkjMd63czSfWvIceVWYfy+mwzcALQGdgB9vMuHkwecqKq9ve1PFZHBRxJoRdL3ZLB+\nySYKCwrLbAsNCyGyXgQhoSFERkeStsvzT7VpxVZStu057P22O7oNoWGhLPt6FQC5Wbnk5eT7vH+g\nTLjxVH76eCHpuzOK1rXv0YZVC/7A7XKTm53HplXb6H9qnzL7rvz+j6Ic1i5cT7M2sQDs3JDEzo2e\nF2xa0j7Sd++ncbOGVZBNxXKziv8JomKiQLVMm9j4xkQ3rMfahesB+PrNHxjqfeNXVaIbegpFTKNo\n0nbtq4KoK3dsh3gS96SzM20/hS43Xyxdx4jenUu1WbJ+B7ne1/OqLUm0aNwA8PwJIsJDCQ8LISIs\nlLDQUPZmBu9NuTy7/0ijUdsGNGzTgNDwULqc0p6t3yeWarP2k40cc243IhtGAhAdGwVAaHgooRGh\nALjy3ai77HMebF0btCcpN5WU3DQK1cWCPUsZFNvTp33bRsfz+/6NuHGT585nS9ZO+jU5OsARl+ZT\nD19EQoGJqnqxkztXVQUOeBfDvT9V8iym7drLh0/N4+1ts8nLyWfplytZ+tUqn/dv060lB9KzeODD\nO4jv2Jxl36zm33e9jdvbyw+GuFaxHH/GIKaMeojuL3cpWr955VYm3n8uHz09n8joSPqMOIbtaxIP\nc08wZtIoFn2+vMz67gO6EB4Rxq5NKX6P36njzhjIldMuonHzRtw37rEy25u2jiV1R1rR8p4daTRt\n5XkTm33razz2+X1c88REQkJCuPm4e6ss7sNp3rg+KfuKhwBT9h3g2A7xFbY/Y+ix/PzHFsBT/Jes\nT+Srx64BEd7/YQVbSnwyqA6yducQ06K4Rx7TIprdv6eVapO+zZP/J1d+gdulDLi2F+2GtgLgQHIW\nn978PRmJmQy5pW+16t0DxEU2JjWvuPOQlp9OtwYdyrQb2rQ3xzTqzK6c3by86WNS89PZmrWTC9qN\nYc7Ob4kMiaBXo65lPhkEmk89fFV1AROO5AFEJFREVgC7ga9U9bdDtl8jIktEZEkBeUfyEOWq3ziG\nIacPYGKnG7ig9TVExUQy6uJhPu8fGhZKz2FH8+KUN7hh4F207NicUy4f4bf4jsT1My/n5bveKvOm\ns/SrVSz633Ke/Xkq97xzC2t+XY+rsOI3plEXD6Nbv0588ETpqZBi4xtz5xs38uSVs9ByetRV7ef/\nLmJSj1t48MwZXP7w+WUblD6eBFAU97jrTmH2ba9xcfvrmH3ba9z+8nWBDvcvKP9vPXbgUfRo34LX\nv14KQNtmjegYH8voe19m9D3/YmC3tiR0aV2VgVauvNfNIU+Tutzs357J6S+ezMnTjuf7RxaSl+n5\n5Fk/Pobz3z+Ni+aczrr5W8hOyyl7f0FU9hUHesjzt2jvaiYtepCblj3Oin3ruKX7RACWp//J0n1/\nMKP3bdxx1OX8mbkFl1btYVAnQzo/i8g/RWSYiCQc/KlsJ1V1qWofoA0wUESOPWT7S6raX1X7hxPp\nUyCnXz+aF5Y9wQvLniCuZZNy2ySc1JPkrbvZn5qBq9DFT5/8Ro+hvk/0mbojjY3Lt5C8ZTdul5tf\n5iyma0JHn/f3l5K5du3fmXvevYU3Nz/PsHMGc+PzVzF0wgAA3pn2MZMTpnDX6EcQEXZuSCr3/vqO\n6slF95zF/ROmU5BfPAwW3aAej86/m9f+/i5rf9tQJbkdqqLndfWPa2nZOZ6GcQ1KtU/dkUbTNnFF\ny83axJGW5Ol9nXLpCH762NO3WPDBr3Qf2IXqYHf6AVo0Kc6jRZP67Nlf9mstg7q3Y9KpA7ll9hwK\nCj1FYWTvLqzekkxOXgE5eQX8/MdWenZsWWWx+yKmRTRZKcXDTFkp2cQ0rVemTYcT2hAaHkLD1vVp\n3L4h+7eXPvEhplk0TTo1Iml5cIZOK5Kal07TyOLXZlxEY/bmlZ5xJrMwm0L1/G99mfwLXeq3Ldr2\nn8QvuXn5dO7//XkEYVdO1ebnpOAPxfNN24eBp7w/T/q6s6qmA98Dpzp4zHLNnfUFkxOmMDlhStE/\n+KF2b0/l6EFdiawXAUDfE3uyfa3vB7fWLd5E/SYxNGrqGcvuM/JYtq2p+oNjJXO9tPMNTOzk+fnx\nw4X844aX+WXOYkJCQmgQWx/wHODs2KsdS75cWea+OvfpwC0vXMP9E6aTvqf4GEBYeBgPfjyFr978\ngQUfLqyy3A5VMtfI6OI3/y59OxIeEVbmbKi9yenkZOZw9KCuAJw08QR+neM54yVt1156ndADgL4n\nHsvODVX70bkif2xLpl3zJrSKa0hYaAij+3Xn+1WbS7Xp3qYZ9140iltnz2XfgeIebvK+TPp1bUNo\niBAWEkJC1zbVbkineY840hMzydh5AFeBi41fbqPDCW1Ktek4oi07l3iGDHP25ZK+PZOGretzICWb\nwlxPoczLyCN55R4at29Q5jGCaUPmdlpFNaNFZBxhEsrwZv1YtHd1qTZNwouPfw2M61k0bBOC0CDM\nM0TVIboVHWJasXxf2TPPAsnJN21HOr1zEWkGFKhquojUA04Cpju9n8Np0qIxzy9+nOiG9VC3ctbN\np3HVMbfy56KN/PjRQmYtnYGr0MWm5Vv57KWvATjjxjGcN2UCsfGNeWnlkyz633KevvoFuvXrxLjJ\np/D01S/gdrt5acqbzPj6fkSEDUs389m/vjns/sESGh7KzAWPAJCdkc30if/A7fIM6Vz20PmsX7KJ\nX+ct4ZoZE6lXP4q//+d2wPOmeP8Z0znhvCH0HH40DeMaMPoyz9P8xBXPs2nl1qDkAzDs7EGcNPEE\nXAUu8nLyefSCmUXbXlj2BJMTpgDw3PX/4o5XbyCyXgSLP1/Bov95jks8fc2LXP/MFYSGhZCfW8Az\n174YlDwO5XIr09//lll/O4uQEGHOr3+wOSmN68YNYc22FH5YvZlbzxpOdGQ4M646DfAU+ltemMvX\nyzYwoFtb/nPfRFD4Zc1WFqzeXMkjVq2QsBCG/V9/5v/tW9SlHDWhM7GdG7No9kqa9Yij4wltaDuk\nJYkLk3jvnHlIiDDk5r5ENY4kcWESv8xchohnZKjPxKOJ61r+J/hgcePmhU0f8NCx1xMiwtcpC9me\nnczF7ceyIXM7i/b+zvjWJzAoticudZNZmMWz698GIFRCeby350yz7MJcnlr3Bm6q9pig+DpWKyK3\nlbN6P7DUO1Vyefv0Al4HQvF8mviPqj5c0WM0lFgdJKN8iseYqpQ6eWiwQwioIVctDXYIAZWYVb3e\nOPxt/vB/LlXV/pW1c3Iefn/vz8FvNp0GLAYmi8gHqjrj0B1UdRXQ18FjGGOMCRAnBT8OSFDVAwAi\n8gDwIZ7pFZYCZQq+McaY6sPJQdt2QH6J5QKgvarmgB/PpzTGGBMQTnr47wALRWSOd3k88K6IxABr\n/B6ZMcYYv3Jyls4jIvIZcDye7x9MVtUl3s2OvoFrjDGm6jkZ0gGoB2So6jPANhGp+m8iGWOMOSJO\nLoDyAHAncLd3VTjwViCCMsYY439OevhnAqcDWQCquguoXl+DM8YYUyEnBT/fO/ulAngP1hpjjKkh\nnBT8/4jIi0BjEbka+Br4V2DCMsYY429OztJ5UkROBjKA7sD9qvpVJbsZY4ypJpxe0/YrwIq8McbU\nQJUWfBHJ5DBXqVLV4F4LzxhjjE8qLfiq2gBARB4GkoE38Xzx6mLsLB1jjKkxnBy0Ha2qs1Q1U1Uz\nVHU2cHagAjPGGONfTgq+S0Qu9l6jNkRELgaq9oKMxhhjjpiTgn8RcB6Q4v0517vOGGNMDeDktMyt\nwISKtovI3ar6mD+CMsYY439OJ087nHP9eF/GGGP8zJ8FX/x4X8YYY/zMnwXft6uhG2OMCQrr4Rtj\nTB3hz4L/Hz/elzHGGD+zg7bGGFNHOJo8rRJ/eUinZc9s7p67yh+xVEsu9ef7a/UTJQXBDiFgWof9\nFOwQAqpDWO2eJSVH84IdQkDV97GdHbQ1xpg6wg7aGmNMHeHPgv+BH+/LGGOMn/lc8EVkhog0FJFw\nEflGRFJF5JKD21V1WmBCNMYY4w9OevinqGoGMA7YAXQDpgQkKmOMMX7npOCHe3+PBd5V1b0BiMcY\nY0yAODktc56I/AnkANeLSDMgNzBhGWOM8Tefe/iqehcwBOivqgVANoeZLtkYY0z14uSgbTRwAzDb\nu6oV0D8QQRljjPE/J2P4rwL5wFDv8g7gUb9HZIwxJiCcFPzOqjoDKABQ1Rzsy1bGGFNjOCn4+SJS\nD+8UCiLSGajdE1QYY0wt4uQsnQeAz4G2IvI2cBxweSCCMsYY438+FXwREeBP4CxgMJ6hnJtVNTWA\nsRljjPEjnwq+qqqI/FdV+wGfBjgmY4wxAeBkDH+hiAwIWCTGGGMCyskY/kjgWhHZBmThGdZRVe0V\nkMiMMcb4lZOCPyZgURhjjAk4JwXfrmhljDE1mJOC/ymeoi9AFNARWAccE4C4jDHG+JnPBV9Ve5Zc\nFpEE4Fq/R2SMMSYgjvgSh6q6DKi2Z+0s+iGLy07cwsQRW3h3dtmp+2c9sptrxm7jmrHbuHTkFk7v\ntbHU9qxMF+cN3sRz96dUVciOLP7hAFeO2szlIzfx3uy0MttnP5LC5NO2MPm0LVxx4ibO7L2+1Pas\nTBcXDtnIPx9IrqqQHfnth2wuPnE7F47Yxluz95XZ/o9HUrlybCJXjk3kopHbGdtrS6ntWZluzhq8\nlZn376mqkH32w3d5nHTCHkYev4cXnj9QZvujD2YwbnQq40anMmr4HvocU/wafHxqJqeOSuWUkXt4\n6P4MVKvfSOvn32Zx9PFb6TZkK9P/UfZ/b/uOAkadvYN+J2+nz4nb+OybLAC++iGLAadsp/fIbQw4\nZTvf/pRd1aH75Kvvcuh7/C56Dd3FU//YX2Z74o5CxpyTwtCTkxg0KokvvskBIG2vizHnpNCiSyK3\n3ROcy4n43MMXkdtKLIYACcBh/5tEpC3wBhAPuIGXVPXZI4jTEZdLee7+3cx4szXN4sO5fsI2hpwU\nQ4eukUVtrv9786Lbn7y2j41rSs8S8erTafQeFB3oUI+Iy6X884EUHn+jLU3jw7nxjK0MOak+7Uvk\nd93fWxTd/u/re9n0R+n8Xp+ZSq+B1Te/mffv4ek3W9EsPoxrJuzg+JNi6NA1oqjNjX9vWnT7o9f2\ns+GQ5+/lp/fSZ1C9KovZVy6X8uB9Gbz+ThPiW4Zy5rg0Rp0cRdduxf+K9z3YsOj2669mseb3QgCW\nLsln6ZJ8Pv0yDoDzz9rLbwvzGTwkkurC5VJuvGcPX7zfmjYtwxg0ZjvjT4mhR/fiGKc+s5dzTq/P\ndZc1Zs26PMZdsovNizvSNDaUOW+0olV8GL//mceYC3eSuLxTELMpy+VSbrtnH3Pfa07rlqEMH5vM\n2NHRHN0tvKjN9Gf3c9b4aK6+rAFr1xdw9iW7WbOoNVFRwt+nNGLNugLW/FkQlPid9PAblPiJxDOm\nX9l8+IXA7ap6NJ5v6N4gIj2OJFAn/lyZS+v24bRqF0F4hDByfEN++Sqrwvbfzstk5PgGRcvrV+ey\nL9VFv2HVsyCuW5lLq/YRtPTmd8K4hvzyVdme4kHfz8tkxPjiIuLJr7Da5rd2ZZ73+QsnPEIYNb4+\nPx3m+ft6XiajxtcvWl63Oo99qS4GDKt+BX/ligLadwilXfswIiKEcadH8fWXFV9HaN6cXMZPiAJA\nBPLylIJ8yM+HggKladPQqgrdJ4uW59K5Qzid2ocTESGcP6EBc78o/dyJQGamG4D9mW5axXve7Pr2\njCq6fUz3CHLzlLw8d9UmUIkly/Pp1CGMjt7n75wJ0Xz6RelPIiJCZqbnk1dGhpuWLTzPUUx0CEMH\nRREVGbw5J50U/DWq+pD3Z6qqvg2MP9wOqprkHfpBVTOBtUDrIw/XN6nJhTRrWdxjahYfRmpy+e+o\nKTsKSE4soO9QT/Fzu5UXpu7h2rubltu+OkhNLiidX8sw0lIqyG9nAcmJ+fQpkd9L01K4+q7m5bav\nDlKTC2l+yPO3J7mw3LbJOwpISiwkYainuLvdyvNTU7nu7rgqidWplGQ3LVsVF+n4lqGkJJdf1Hbu\ncLEj0cWQ4zyfbBL6RTB4SASD++9mcL/dDDshki5dnZx3EXg7kwtp27o4ptYtw9h5yHP3wB1xvP1R\nJu0StjDukl08+2izMvfz0acH6HtsJJGRRzzqHBC7kl20KfH8tW4Zxq4kV6k2997eiPc+zqJbv52c\nPXE3T06NreowK+Tkr3m3j+vKJSIdgL7Abw4e88iUM6zpmQ6orG/nZzJ8TANCQz3b576ZzsARMTRv\nFV5u++qqgvT4fl4Gw0rkN++tdAaOqF+t8ytvWLqi/L6Zf4ARY2KK8vvkzQwGj4imRavqVQgPKnfI\nvYLc5s/N4dSxUUW5bd1SyKaNLn5e1IxfFjdj4S/5LFqYH7hgj4Avz917n2Ry2fkN2b6sI/PfasVl\nN6bgdhfv+Me6PO5+NI3ZM6pfp8SX/D74bxaXnBfD+qWt+ejN5lx1Y2qp/IKp0v8KERmD58LlrUXk\nuRKbGuIZsqmUiNQHPgJuUdWMQ7ZdA1wD0NxP/6RNW4axJ6k4tD3JhcS1KP++v5+XyU0PF7+w1izP\nZfXiHOa+lU5OtpvCAqgXE8LVd5bthQRL0/jw0vklFRLbvPwC/v38DP72UPF4/pplOfy+OJt5b+0j\nJ1spLFDqRYcw6c7q88/VrGUYuw95/ppW8Px9O+8Atzxc/Nz8sTyXVYtz+e9bGeRkuykoUOrFhDD5\nzurR449vGULSruIeYXKSixYtyu93zZ+by4OPFg/FfflFHn36hhMT42l/wshIli/PZ+DgiHL3D4Y2\nLcNI3Fn83O1MKqTVIc/dK+9m8Nk7rQAY0r8euXluUve6aN40jB27Cjj7yiRee64FnTtUn7wOat0y\nlB0lnr+dSYW0jC89rPb6u1n8923Pa3JQ/0jy8pTUvW6aV4PhN18q7C5gCXA6sLTE+kzg1sp2FpFw\nPMX+bVX9+NDtqvoS8BJA915RfnkbPKpXFDu3FpCUWEDTFmF8Ny+De59tWaZd4qZ8Mve76JEQVbTu\nnmeK233+4X7Wr8qtVsUeoHuvKHZuzScpMZ+mLcL5YX4Gdz3Tqky7xM15HNjvokdC8Vj23SXafflh\nOutX51arYg9wVK9IdmwtYFdiAc1ahPHNvAPc/2yLMu22b8onc7+bYxOKDwje/0xxu/99mMGfq/Kq\nTbEH6NU7nK1bXSRuL6RFfCjz5+Yy8x+NyrTbvKmQ/fvdJPQrfiNv1SqE99/NobBQUYXfFuZzxaTq\ndRxmQJ8oNm7JZ8v2AlrHh/H+nEzemhVfqk3b1mF881MOl58fztr1+eTmKc3iQknf72L8xF1MvTuO\n4wZWv+MvAP36RLBpSwFbtxfSKj6UD+dk88rzpV9fbVuH8v1PuVxyfn3+3FBAbh40i6seQ1OVFnxV\nXQmsFJF3vBcv95l3WuV/A2tV9ekjjNGx0DDhxoeaceelO3C7Ycy5DenQLZJXn06le88ohp7sOcD3\n7bwMRo5vUOFwT3UVGib87cEW3HNZIm43jD63ER26RfL6zD106xnFkJM8B6C/m5vBiHENa1x+YWHC\nLQ815Y5Lk3C7lbHnNqRjtwj+/fReuveM5PiTYwD4et4BThxfv0blFxYmPPBIQy6/ZB9uF5xzfj26\ndQ9n5pOZ9OwVzkmneDof8+bkMO70eqVyG3NaFL/+ks/Yk1MRgeEnRDLq5KiKHioowsKE56Y1Z8yF\nO3G54IoLGnJM90gemJFGv96RnD66Pk8+0JRrp+zm2Zf2IQKvPNMCEeH5V/azcUsBU5/Zy9RnPKct\nfv5ea5o3rT7Dc2FhwlNTYznjot24XDDxghh6dI/gkRnpJPSO4LTR0Ux7oAk33pHGP/+ViQAvzowt\neh57DNxJ5gElP1+Z/0UOc95tXuoMn0ATX8/jFZGuwGNADzzftAVAVSs8b0pEjgd+BFbjOS0T4B5V\n/ay89t17Rensue19i7wGcmn1eJcPlCgJzqlmVaF1WMVnCdUGHcIaVN6oBsvR2n1xvvqtti9V1f6V\ntXPy1vkqnqtezcQzc+YVVHJNW1X9qbI2xhhjqoaTLmc9Vf0Gz6eCbar6IHBiYMIyxhjjb056+Lki\nEgJsEJG/ATuB6nW0zxhjTIWc9PBvAaKBm4B+wCXAZYEIyhhjjP85mS1zMYCIqKpeEbiQjDHGBILP\nPXwRGSIia/BMj4CI9BaRWQGLzBhjjF85GdJ5BhgNpEHR+fnDAxGUMcYY/3N0YriqJh6yylVuQ2OM\nMdWOk7N0EkVkKKAiEoHn4O3awIRljDHG35z08CcDN+CZ3ngH0Me7bIwxpgbwZbbM6ap6JzBSVS+u\ngpiMMcYEgC89/LHeGS99nvveGGNM9ePLGP7nQCoQIyIZeObG0YO/VbXh4XY2xhhTPVTaw1fVKara\nCPhUVRuqaoOSv6sgRmOMMX7g80FbVa3sguXGGGOqMV8O2mZSfJXYg1Md25COMcbUML5c8ap2XxnB\nGGPqCMfXDhOR5pS+4tV2v0ZkjDEmIJxMnna6iGwAtgA/AFuB/wUoLmOMMX7m5Ju2jwCDgfWq2hEY\nBfwckKiMMcb4nZOCX6CqaUCIiISo6nd4plcwxhhTAzgZw08XkfrAAuBtEdkNFAYmLGOMMf7mpIc/\nAcgBbsXz7dtNwPhABGWMMcb/nFziMKvE4usBiMUYY0wAVdrDF5GfvL8zRSTj0N+BD9EYY4w/+PLF\nq+O9vwP+BSyXhpDprhfohzEBkq2RwQ4hYPbm1w92CAG1Oj/YEQRWhis22CEEmG9fh3L0xSsRaQK0\nLbmfqi5zFJcxxpig8Lngi8gjwOXAZsDtXa3Aif4PyxhjjL856eGfB3RW1Vr+4c8YY2onJ6dl/g40\nDlQgxhhjAstJD/8xYLmI/A7kHVypqqf7PSpjjDF+56Tgvw5MB1ZTPIZvjDGmhnBS8FNV9bmARWKM\nMSagnBT8pSLyGDCX0kM6dlqmMcbUAE4Kfl/v78El1tlpmcYYU0M4mUtnZCADMcYYE1hOrnjVSESe\nFpEl3p/lKQiHAAAMUklEQVSnRKRRIIMzxhjjP07Ow38FyMTzBazzgAzg1UAEZYwxxv+cjOF3VtWz\nSyw/JCIr/B2QMcaYwHDSw88RkeMPLojIcXguiGKMMaYGcNLDnwy84R23F2AvnsnUjDHG1ABOztJZ\nCfQWkYbeZbv4iTHG1CBOpkeOBM4GOgBhIgKAqj4ckMiMMcb4lZMhnTnAfmApJb5pa4wxpmZwUvDb\nqOqpAYvEGGNMQDk5S+cXEekZsEiMMcYElJMe/vHA5SKyBc+QjgCqqr0CEpkxxhi/clLwxwQsigBY\n9kMGLz+yC7dLOfn8WM6e3KLU9n8/upPVCw8AkJ/jJj2tkHdW9GTzmhxevH8H2QdchIQI517fnOPH\nNQlGCodVm/OrzbmB5VfT81u1IJ23pm7D7VJOOLc5469tVWr729O2sXah5yTGvFw3mWkFvLC0PwBP\nTPqTTSsO0LVfA25/qXuVx+6k4N8IvKKqa3zdQUReAcYBu1X1WKfBHSmXS3nxwZ089Hon4uLDmXLm\nBgaOakTbrlFFbSbd17ro9vzX97Bljec7ZJH1Qrj5iXa06hjJ3pQCbp+wnj7DG1K/YWhVhV+p2pxf\nbc4NLD+o2fm5XcobD23l/149itj4CB44+w8SRjWmdZfoojYX39O+6PaXbySzbW120fLYSS3Jz3Xz\n7Xu7qzTug5yM4f8J/EtEfhORyT5OnPYaUOUHejeszKZl+wji20USHhHC8eMa89vX+yts/+O8dIaN\n9/QkWneMpFXHSABiW4TTKC6MjLTCKonbV7U5v9qcG1h+h6pp+W1adYDm7aNo3i6KsIgQBp8Wy7Kv\n91XYfuGnaQwZF1e0fMzQRkTFBO8NzOeCr6ovq+pxwKV4zsVfJSLviEiF0yar6gI838itUntTCmja\nMqJoOS4+nL0pBeW23b0zn9078uk5pH6ZbetXZlNYoMS3jyhnz+CpzfnV5tzA8iupJua3LyWfuPji\nmGLjI9hXQX6pO/PYsyOPHoMbVlV4lXLSw0dEQoGjvD+pwErgNhF5LwCxHTFV39v+ND+dIac2IjRU\nSq3fu7uAZ27fzo3T2xISIhXsHRy1Ob/anBtYfiXVxPwoL78KQlz4aRoDRscSElp9cnAyH/7TwDpg\nLDBNVfup6nRVHU/x1bAcE5FrDs6xn7HXPx/f4uLDSU3KL1pOSy4gtkV4uW1/nL+P4eNLHxjKznTx\n6FVbuPi2eLr3jfFLTP5Um/OrzbmB5VdSTcyvSXwEacnF+e1NzqdJ8/LzO3Q4pzpw0sP/Heilqteq\n6qJDtg080gBU9SVV7a+q/RvGOjmGXLGuvaJJ2ppPSmIeBflufpqfzsBRZQ857Nycy4H9LronFB9w\nKch389h1WxlxZhOOG9vYL/H4W23OrzbnBpbfQTU1v04965OyNZc9ibkU5rtZ+Ole+o4qeyZR0uYc\nsjMK6dK37HBVMFVaYUUkwXtzBXDUwTl0DlLVZapa8VGZIAgNE65+oDUPXb4ZlxtOOieWdt2ieGdm\nMl161mPgSZ4X4IJ56Qwb15iSOf382X7WLD5AZnoh337kOfxw04x2dOpRLyi5lKc251ebcwPLrzbk\nd+n9HZgxaR3qUoaf04w2XaP56NkddDw2hgRv8f91fhqDxsZxaL189MI1JG3OITfbxc3DljFpWid6\nDau6NzfRSgbdROS7w2xWVa3wIuYi8i4wAmgKpAAPqOq/K2rfpWe0PjWn22HjMcYYpzJcUZU3qsEu\n7fbbUlXtX1m7Snv4f+Xi5ap64ZHua4wxxr+cTI8cDlwHDPeu+h54UVXLPyfJGGNMteLkKOlsIByY\n5V2e6F13lb+DMsYY439OCv4AVe1dYvlbEVnp74CMMcYEhpPTMl0i0vnggoh0Alz+D8kYY0wgOOnh\nTwG+E5HN3uUOwBV+j8gYY0xAOOnh/wy8CLi9Py8CvwYiKGOMMf7npIf/BpABPOJdvhB4EzjX30EZ\nY4zxPycFv/shB22/s4O2xhhTczgZ0lkuIoMPLojIIDzDPMYYY2oAJz38QcClIrLdu9wOWCsiq7Fr\n2xpjTLXnpOBX+ZWrjDHG+I/PBV9VtwUyEGOMMYHl6IpXxhhjai4r+MYYU0dYwTfGmDrCCr4xxtQR\nVvCNMaaOsIJvjDF1hBV8Y4ypI6zgG2NMHWEF3xhj6ggr+MYYU0dYwTfGmDpCVDXYMRQRkT1AVc7Z\n0xRIrcLHq2qWX81m+dVcVZ1be1VtVlmjalXwq5qILFHV/sGOI1Asv5rN8qu5qmtuNqRjjDF1hBV8\nY4ypI+p6wX8p2AEEmOVXs1l+NVe1zK1Oj+EbY0xdUtd7+MYYU2fUqoIvIqeKyDoR2Sgid5WzPVJE\n3vdu/01EOpTYdrd3/ToRGV1i/SsisltEfq+aLHxzpLmKSJyIfCciB0Tkn1Ud95HwIdfhIrJMRApF\n5JxgxOhP1fU1d6TKy0dEYkXkKxHZ4P3dJJgxOuUkJ/F4zvv6XSUiCcGKu9YUfBEJBZ4HxgA9gAtF\npMchzSYB+1S1CzATmO7dtwdwAXAMnou1z/LeH8BrVLMLuP+VXIFc4O/AHVUU7l/iY67bgcuBd6o2\nuoB5jWr2mvuLXqNsPncB36hqV+Ab73JN8hq+5zQG6Or9uQaYXUUxllFrCj4wENioqptVNR94D5hw\nSJsJwOve2x8Co0REvOvfU9U8Vd0CbPTeH6q6ANhbFQk4cMS5qmqWqv6Ep/DXBJXmqqpbVXUV4A5G\ngP5WTV9zR6yCfEq+Pl8HzqjSoP4ihzlNAN5Qj4VAYxFpWTWRllabCn5rILHE8g7vunLbqGohsB+I\n83Hf6uSv5FrT1LTnxvimhaomAXh/Nw9yPP5QUU7V5jVcmwq+lLPu0FOQKmrjy77VyV/JtaapLXmY\nuqvavIZrU8HfAbQtsdwG2FVRGxEJAxrh+Vjmy77VyV/Jtaapac+N8U3KwWEN7+/dQY7HHyrKqdq8\nhmtTwV8MdBWRjiISgecg7NxD2swFLvPePgf4Vj1fRJgLXOA9s6UjnoMri6oo7iPxV3KtaXzJ1dQ8\nJV+flwFzghiLv1SU01zgUu/ZOoOB/QeHfqqcqtaaH2AssB7YBNzrXfcwcLr3dhTwAZ6DsouATiX2\nvde73zpgTIn17wJJQAGed+pJwc7TD7luxdPbP+DNqUew8/mLuQ7w5pEFpAF/BDvmv5hvtXzN+TMf\nPMeTvgE2eH/HBjvOQOWEZ0jnee/rdzXQP1hx2zdtjTGmjqhNQzrGGGMOwwq+McbUEVbwjTGmjrCC\nb4wxdYQVfGOMqSOs4BtjTB1hBd/UaCLymYg0Lmf9gyIStBlBReSAP9oY409hwQ7AmCPlnel0nKrW\nilkyjQk06+GbGkVEOojIWhGZBSwDXCLS1LvtXu+FUr4GupfYZ4D3whO/isgTBy9aISKh3uXF3u3X\nOoylvoh84734ymoROXSKakRkhIgsEJFPRGSNiLwgIiEltk8VkZUislBEWnjXjfdetGa5iHx9cL0x\nf5UVfFMTdcczv3hfYBuAiPTDM89OX+AsPNMtHPQqMFlVhwCuEusn4ZnXZIC3/dXeuZR8lQucqaoJ\nwEjgKe+njkMNBG4HegKdvfEBxAALVbU3sAC42rv+J2CwN7/3gP9zEJMxFbIhHVMTbVPPhSRKGgZ8\noqrZACIy1/u7MdBAVX/xtnsHGOe9fQrQq8RlERvhmThvi49xCDBNRIbjufhKa6AFkHxIu0Wqutkb\nz7vA8XguSpMPzPe2WQqc7L3dBnjfO+NihIN4jDksK/imJsqqYH15E0OV1+Muue1GVf3iCOO4GGgG\n9FPVAhHZimfSusriOrhcoMWTWbko/n/8B/C0qs4VkRHAg0cYnzGl2JCOqS0WAGeKSD0RaQCMB1DV\nfUCmd1pa8Az7HPQFcJ2IhAOISDcRiXHwmI2A3d5iPxJoX0G7gd7pnUOA8/EM2VR2vzu9ty87XENj\nnLAevqkVVHWZiLwPrMAzrv9jic2TgH+JSBbwPZ7LPQK8DHQAlnnH3vfg7NqqbwPzRGSJ93H/rKDd\nr8DjeMbwFwCfVHK/DwIfiMhOYCHg5LiCMRWy6ZFNrSci9VX1gPf2XUBLVb25ih57BHCHqo6rrK0x\ngWY9fFMXnCYid+N5vW8DLg9uOMYEh/XwjSlBRHoCbx6yui2QeMi6PFUdVDVRGeMfVvCNMaaOsLN0\njDGmjrCCb4wxdYQVfGOMqSOs4BtjTB1hBd8YY+qI/weCj+imwMGuegAAAABJRU5ErkJggg==\n", 107 | "text/plain": [ 108 | "" 109 | ] 110 | }, 111 | "metadata": {}, 112 | "output_type": "display_data" 113 | } 114 | ], 115 | "source": [ 116 | "mglearn.tools.heatmap(grid.cv_results_['mean_test_score'].reshape(3, -1),\n", 117 | " xlabel=\"ridge__alpha\", ylabel=\"polynomialfeatures__degree\",\n", 118 | " xticklabels=param_grid['ridge__alpha'],\n", 119 | " yticklabels=param_grid['polynomialfeatures__degree'], vmin=0)\n", 120 | "plt.show()" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 16, 126 | "metadata": {}, 127 | "outputs": [ 128 | { 129 | "name": "stdout", 130 | "output_type": "stream", 131 | "text": [ 132 | "Best parameters: {'polynomialfeatures__degree': 2, 'ridge__alpha': 10}\n" 133 | ] 134 | } 135 | ], 136 | "source": [ 137 | "print(\"Best parameters: {}\".format(grid.best_params_))" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 17, 143 | "metadata": {}, 144 | "outputs": [ 145 | { 146 | "name": "stdout", 147 | "output_type": "stream", 148 | "text": [ 149 | "Test-set score: 0.77\n" 150 | ] 151 | } 152 | ], 153 | "source": [ 154 | "print(\"Test-set score: {:.2f}\".format(grid.score(X_test, y_test)))" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 18, 160 | "metadata": {}, 161 | "outputs": [ 162 | { 163 | "name": "stdout", 164 | "output_type": "stream", 165 | "text": [ 166 | "Score without poly features: 0.63\n" 167 | ] 168 | } 169 | ], 170 | "source": [ 171 | "param_grid = {'ridge__alpha': [0.001, 0.01, 0.1, 1, 10, 100]}\n", 172 | "pipe = make_pipeline(StandardScaler(), Ridge())\n", 173 | "grid = GridSearchCV(pipe, param_grid, cv=5)\n", 174 | "grid.fit(X_train, y_train)\n", 175 | "print(\"Score without poly features: {:.2f}\".format(grid.score(X_test, y_test)))" 176 | ] 177 | } 178 | ], 179 | "metadata": { 180 | "kernelspec": { 181 | "display_name": "Python 3", 182 | "language": "python", 183 | "name": "python3" 184 | }, 185 | "language_info": { 186 | "codemirror_mode": { 187 | "name": "ipython", 188 | "version": 3 189 | }, 190 | "file_extension": ".py", 191 | "mimetype": "text/x-python", 192 | "name": "python", 193 | "nbconvert_exporter": "python", 194 | "pygments_lexer": "ipython3", 195 | "version": "3.6.2" 196 | } 197 | }, 198 | "nbformat": 4, 199 | "nbformat_minor": 2 200 | } 201 | -------------------------------------------------------------------------------- /ch06/selection-of-model-by-grid-searching.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# グリッドサーチによるモデルの選択" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "GridSearchCVとPipelineの組み合わせでは,実際に行われるステップに対してもサーチすることが可能." 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": { 21 | "collapsed": true 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "from sklearn.svm import SVC\n", 26 | "from sklearn.datasets import load_breast_cancer\n", 27 | "from sklearn.model_selection import train_test_split\n", 28 | "from sklearn.preprocessing import MinMaxScaler\n", 29 | "import mglearn\n", 30 | "import matplotlib.pyplot as plt\n", 31 | "from sklearn.pipeline import Pipeline\n", 32 | "from sklearn.model_selection import GridSearchCV\n", 33 | "from sklearn.pipeline import make_pipeline\n", 34 | "from sklearn.preprocessing import StandardScaler\n", 35 | "from sklearn.linear_model import Ridge" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 3, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "from sklearn.datasets import load_boston\n", 45 | "from sklearn.preprocessing import PolynomialFeatures\n", 46 | "\n", 47 | "pipe = Pipeline([('preprocessing', StandardScaler()), ('classifier', SVC())])" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 4, 53 | "metadata": { 54 | "collapsed": true 55 | }, 56 | "outputs": [], 57 | "source": [ 58 | "from sklearn.ensemble import RandomForestClassifier\n", 59 | "\n", 60 | "param_grid = [\n", 61 | " {\n", 62 | " 'classifier': [SVC()], 'preprocessing': [StandardScaler(), None],\n", 63 | " 'classifier__gamma': [0.001, 0.01, 0.1, 1, 10, 100],\n", 64 | " 'classifier__C': [0.001, 0.01, 0.1, 1, 10, 100]\n", 65 | " },\n", 66 | " {\n", 67 | " 'classifier': [RandomForestClassifier(n_estimators=100)],\n", 68 | " 'preprocessing': [None], 'classifier__max_features': [1, 2, 3]\n", 69 | " }\n", 70 | "]" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 6, 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "name": "stdout", 80 | "output_type": "stream", 81 | "text": [ 82 | "Best params:\n", 83 | "{'classifier': SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,\n", 84 | " decision_function_shape='ovr', degree=3, gamma=0.01, kernel='rbf',\n", 85 | " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", 86 | " tol=0.001, verbose=False), 'classifier__C': 10, 'classifier__gamma': 0.01, 'preprocessing': StandardScaler(copy=True, with_mean=True, with_std=True)}\n", 87 | "\n", 88 | "Best cross-validation score: 0.99\n", 89 | "Test-set score: 0.98\n" 90 | ] 91 | } 92 | ], 93 | "source": [ 94 | "cancer = load_breast_cancer()\n", 95 | "\n", 96 | "X_train, X_test, y_train, y_test = train_test_split(\n", 97 | " cancer.data, cancer.target, random_state=0)\n", 98 | "\n", 99 | "grid = GridSearchCV(pipe, param_grid, cv=5)\n", 100 | "grid.fit(X_train, y_train)\n", 101 | "\n", 102 | "print(\"Best params:\\n{}\\n\".format(grid.best_params_))\n", 103 | "print(\"Best cross-validation score: {:.2f}\".format(grid.best_score_))\n", 104 | "print(\"Test-set score: {:.2f}\".format(grid.score(X_test, y_test)))" 105 | ] 106 | } 107 | ], 108 | "metadata": { 109 | "kernelspec": { 110 | "display_name": "Python 3", 111 | "language": "python", 112 | "name": "python3" 113 | }, 114 | "language_info": { 115 | "codemirror_mode": { 116 | "name": "ipython", 117 | "version": 3 118 | }, 119 | "file_extension": ".py", 120 | "mimetype": "text/x-python", 121 | "name": "python", 122 | "nbconvert_exporter": "python", 123 | "pygments_lexer": "ipython3", 124 | "version": "3.6.2" 125 | } 126 | }, 127 | "nbformat": 4, 128 | "nbformat_minor": 2 129 | } 130 | -------------------------------------------------------------------------------- /ch06/the-general-pipeline-interface.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 汎用パイプラインインターフェース" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 8, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "from sklearn.svm import SVC\n", 17 | "from sklearn.datasets import load_breast_cancer\n", 18 | "from sklearn.model_selection import train_test_split\n", 19 | "from sklearn.preprocessing import MinMaxScaler\n", 20 | "import mglearn\n", 21 | "import matplotlib.pyplot as plt\n", 22 | "from sklearn.pipeline import Pipeline\n", 23 | "from sklearn.model_selection import GridSearchCV\n", 24 | "from sklearn.pipeline import make_pipeline" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 19, 30 | "metadata": { 31 | "collapsed": true 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "cancer = load_breast_cancer()" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 13, 41 | "metadata": { 42 | "collapsed": true 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "def fit(self, X, y):\n", 47 | " X_transformed = X\n", 48 | " for name, estimator in self.steps[:-1]:\n", 49 | " X_transformed = estimator.fit_transform(X_transformed, y)\n", 50 | " self.steps[-1][1].fit(X_transformed, y)\n", 51 | " return self" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 14, 57 | "metadata": { 58 | "collapsed": true 59 | }, 60 | "outputs": [], 61 | "source": [ 62 | "def predict(self, X):\n", 63 | " X_transformed = X\n", 64 | " for step in self.steps[:-1]:\n", 65 | " X_transformed = step[1].transform(X_transformed)\n", 66 | " return self.steps[-1][1].predict(X_transformed)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "## make_pipelineによる簡便なパイプライン生成" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 9, 79 | "metadata": { 80 | "collapsed": true 81 | }, 82 | "outputs": [], 83 | "source": [ 84 | "pipe_long = Pipeline([(\"scaler\", MinMaxScaler()), (\"svm\", SVC(C=100))])\n", 85 | "pipe_short = make_pipeline(MinMaxScaler(), SVC(C=100))" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 10, 91 | "metadata": {}, 92 | "outputs": [ 93 | { 94 | "name": "stdout", 95 | "output_type": "stream", 96 | "text": [ 97 | "Pipeline steps:\n", 98 | "[('minmaxscaler', MinMaxScaler(copy=True, feature_range=(0, 1))), ('svc', SVC(C=100, cache_size=200, class_weight=None, coef0=0.0,\n", 99 | " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", 100 | " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", 101 | " tol=0.001, verbose=False))]\n" 102 | ] 103 | } 104 | ], 105 | "source": [ 106 | "print(\"Pipeline steps:\\n{}\".format(pipe_short.steps))" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 16, 112 | "metadata": {}, 113 | "outputs": [ 114 | { 115 | "name": "stdout", 116 | "output_type": "stream", 117 | "text": [ 118 | "Pipeline steps:\n", 119 | "[('standardscaler-1', StandardScaler(copy=True, with_mean=True, with_std=True)), ('pca', PCA(copy=True, iterated_power='auto', n_components=2, random_state=None,\n", 120 | " svd_solver='auto', tol=0.0, whiten=False)), ('standardscaler-2', StandardScaler(copy=True, with_mean=True, with_std=True))]\n" 121 | ] 122 | } 123 | ], 124 | "source": [ 125 | "from sklearn.preprocessing import StandardScaler\n", 126 | "from sklearn.decomposition import PCA\n", 127 | "\n", 128 | "pipe = make_pipeline(StandardScaler(), PCA(n_components=2), StandardScaler())\n", 129 | "print(\"Pipeline steps:\\n{}\".format(pipe.steps))" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "## ステップ属性へのアクセス" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 20, 142 | "metadata": {}, 143 | "outputs": [ 144 | { 145 | "name": "stdout", 146 | "output_type": "stream", 147 | "text": [ 148 | "components.shape: (2, 30)\n" 149 | ] 150 | } 151 | ], 152 | "source": [ 153 | "pipe.fit(cancer.data)\n", 154 | "\n", 155 | "components = pipe.named_steps[\"pca\"].components_\n", 156 | "print(\"components.shape: {}\".format(components.shape))" 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": {}, 162 | "source": [ 163 | "## GridSearchCV内のパイプライン属性へのアクセス" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 22, 169 | "metadata": { 170 | "collapsed": true 171 | }, 172 | "outputs": [], 173 | "source": [ 174 | "from sklearn.linear_model import LogisticRegression\n", 175 | "\n", 176 | "pipe = make_pipeline(StandardScaler(), LogisticRegression())" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 23, 182 | "metadata": { 183 | "collapsed": true 184 | }, 185 | "outputs": [], 186 | "source": [ 187 | "param_grid = {'logisticregression__C': [0.01, 0.1, 1, 10, 100]}" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 24, 193 | "metadata": {}, 194 | "outputs": [ 195 | { 196 | "data": { 197 | "text/plain": [ 198 | "GridSearchCV(cv=5, error_score='raise',\n", 199 | " estimator=Pipeline(memory=None,\n", 200 | " steps=[('standardscaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('logisticregression', LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", 201 | " intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n", 202 | " penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n", 203 | " verbose=0, warm_start=False))]),\n", 204 | " fit_params=None, iid=True, n_jobs=1,\n", 205 | " param_grid={'logisticregression__C': [0.01, 0.1, 1, 10, 100]},\n", 206 | " pre_dispatch='2*n_jobs', refit=True, return_train_score=True,\n", 207 | " scoring=None, verbose=0)" 208 | ] 209 | }, 210 | "execution_count": 24, 211 | "metadata": {}, 212 | "output_type": "execute_result" 213 | } 214 | ], 215 | "source": [ 216 | "X_train, X_test, y_train, y_test = train_test_split(\n", 217 | " cancer.data, cancer.target, random_state=4)\n", 218 | "grid = GridSearchCV(pipe, param_grid, cv=5)\n", 219 | "grid.fit(X_train, y_train)" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 25, 225 | "metadata": {}, 226 | "outputs": [ 227 | { 228 | "name": "stdout", 229 | "output_type": "stream", 230 | "text": [ 231 | "Best estimator:\n", 232 | "Pipeline(memory=None,\n", 233 | " steps=[('standardscaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('logisticregression', LogisticRegression(C=0.1, class_weight=None, dual=False, fit_intercept=True,\n", 234 | " intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n", 235 | " penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n", 236 | " verbose=0, warm_start=False))])\n" 237 | ] 238 | } 239 | ], 240 | "source": [ 241 | "print(\"Best estimator:\\n{}\".format(grid.best_estimator_))" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 26, 247 | "metadata": {}, 248 | "outputs": [ 249 | { 250 | "name": "stdout", 251 | "output_type": "stream", 252 | "text": [ 253 | "Logistic regression step:\n", 254 | "LogisticRegression(C=0.1, class_weight=None, dual=False, fit_intercept=True,\n", 255 | " intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n", 256 | " penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n", 257 | " verbose=0, warm_start=False)\n" 258 | ] 259 | } 260 | ], 261 | "source": [ 262 | "print(\"Logistic regression step:\\n{}\".format(grid.best_estimator_.named_steps[\"logisticregression\"]))" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 27, 268 | "metadata": {}, 269 | "outputs": [ 270 | { 271 | "name": "stdout", 272 | "output_type": "stream", 273 | "text": [ 274 | "Logistic regression coefficients:\n", 275 | "[[-0.38856355 -0.37529972 -0.37624793 -0.39649439 -0.11519359 0.01709608\n", 276 | " -0.3550729 -0.38995414 -0.05780518 0.20879795 -0.49487753 -0.0036321\n", 277 | " -0.37122718 -0.38337777 -0.04488715 0.19752816 0.00424822 -0.04857196\n", 278 | " 0.21023226 0.22444999 -0.54669761 -0.52542026 -0.49881157 -0.51451071\n", 279 | " -0.39256847 -0.12293451 -0.38827425 -0.4169485 -0.32533663 -0.13926972]]\n" 280 | ] 281 | } 282 | ], 283 | "source": [ 284 | "print(\"Logistic regression coefficients:\\n{}\".format(grid.best_estimator_.named_steps[\"logisticregression\"].coef_))" 285 | ] 286 | } 287 | ], 288 | "metadata": { 289 | "kernelspec": { 290 | "display_name": "Python 3", 291 | "language": "python", 292 | "name": "python3" 293 | }, 294 | "language_info": { 295 | "codemirror_mode": { 296 | "name": "ipython", 297 | "version": 3 298 | }, 299 | "file_extension": ".py", 300 | "mimetype": "text/x-python", 301 | "name": "python", 302 | "nbconvert_exporter": "python", 303 | "pygments_lexer": "ipython3", 304 | "version": "3.6.2" 305 | } 306 | }, 307 | "nbformat": 4, 308 | "nbformat_minor": 2 309 | } 310 | -------------------------------------------------------------------------------- /ch06/using_pipelines_in_grid-searches.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# パイプラインを用いたグリッドサーチ" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "パイプラインをグリットサーチで用いる方法は他のEstimatorを用いる場合と全く同じである.サーチするパラメータグリッドを定義し,パイプラインとパラメータグリッドをもいいてGridSearchCVをつくる.\n", 15 | "ただし,パラメータグリッドを定義する際には,パイプラインのどのステップに属するかを指定する必要がある.命名方法は「(ステップ名)__(パラメータ名)」で指定を行う." 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 1, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "from sklearn.svm import SVC\n", 25 | "from sklearn.datasets import load_breast_cancer\n", 26 | "from sklearn.model_selection import train_test_split\n", 27 | "from sklearn.preprocessing import MinMaxScaler\n", 28 | "import mglearn\n", 29 | "import matplotlib.pyplot as plt\n", 30 | "from sklearn.pipeline import Pipeline\n", 31 | "from sklearn.model_selection import GridSearchCV" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 5, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "cancer = load_breast_cancer()\n", 41 | "X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, random_state=0)\n", 42 | "\n", 43 | "pipe = Pipeline([(\"scaler\", MinMaxScaler()), (\"svm\", SVC())])" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "ここのパラメータにはパイプラインのどのステップに属するか指定する必要がある。SVCのCパラメータに対してサーチするには\"svm__C\"をディクショナリのキーにする。" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 6, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "param_grid = {\n", 60 | " 'svm__C': [0.001, 0.01, 0.1, 1, 10, 100],\n", 61 | " 'svm__gamma': [0.001, 0.01, 0.1, 1, 10, 100]\n", 62 | "}" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 7, 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "name": "stdout", 72 | "output_type": "stream", 73 | "text": [ 74 | "Best cross-validation accuracy: 0.98\n", 75 | "Test set score: 0.97\n", 76 | "Best parameters: {'svm__gamma': 1, 'svm__C': 1}\n" 77 | ] 78 | } 79 | ], 80 | "source": [ 81 | "grid = GridSearchCV(pipe, param_grid=param_grid, cv=5)\n", 82 | "grid.fit(X_train, y_train)\n", 83 | "print(\"Best cross-validation accuracy: {:.2f}\".format(grid.best_score_))\n", 84 | "print(\"Test set score: {:.2f}\".format(grid.score(X_test, y_test)))\n", 85 | "print(\"Best parameters: {}\".format(grid.best_params_))" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "以前の行ったグリッドサーチ(parameter_selection_with_preprocessing)とは違い、交差検証の個々の分割に対して、学習用のパートのみを対象にMinMaxScalerをfitし直すので、テスト用のパートから情報がパラメータサーチに漏れることはない。 " 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 14, 98 | "metadata": {}, 99 | "outputs": [ 100 | { 101 | "data": { 102 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAA1cAAAHcCAYAAAA6KlDCAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3Xl4VeW5/vH7IQNhDoMMkRpwrj0KDsWJQlTUygyiOFSh\nFFrtsVUrB6W1J8FfBxQHVOqxBQt1wCpFERFBLaCoYLEIHitWCwQUmcooEdCE5/fHWsnZSXYGwkr2\nhnw/1/VeyV7rXu96d3ayVp69hm3uLgAAAADAoWmQ6AEAAAAAwJGA4goAAAAAIkBxBQAAAAARoLgC\nAAAAgAhQXAEAAABABCiuAAAAACACFFcAgMOSmbmZHR9+/6iZ/bI62Rqs51oze6Wm4wQA1B/G51wB\nQP1hZtdI+pmkkyV9IWmFpF+7+5sJHVgNmJlLOsHd/xVV1sw6SVorKc3dC6MYJwCg/uDIFQDUE2b2\nM0kTJf1GUjtJx0h6RNKACvKpdTc6AAAOfxRXAFAPmFkLSXdJ+k93f87dC9z9a3d/0d3/K8zkmdlf\nzOxJM9stabiZNTSziWb2edgmmlnDMN/GzOaY2U4z225mi82sQTjvdjPbYGZfmNk/zeyiOGM6x8w2\nmVlKzLRBZvZ++H03M1sS9r/RzCaZWXoFz2+amf0q5vF/hct8bmYjymT7mNl7ZrbbzD41s7yY2W+E\nX3ea2R4zO9fMhpvZmzHLn2dmy8xsV/j1vJh5i8zs/5nZW+Fzf8XM2lTvVQIAHO4orgCgfjhXUoak\n56vIDZD0F0mZkp6S9AtJ50jqKqmLpG6S7gyzt0n6TNJRCo6E/VySm9lJkm6S9G13bybpUkn5ZVfk\n7kslFUi6MGbyNZKmh98XSbpVUptw/BdJ+nFVT9TMvitptKSLJZ0gqVeZSIGk68Pn2EfSjWY2MJzX\nI/ya6e5N3X1Jmb5bSXpJ0kOSWku6X9JLZta6zHP4vqS2ktLDsQAA6gGKKwCoH1pL+nc1riNa4u6z\n3P2Au++VdK2ku9x9i7tvlTRO0nVh9mtJHSRlh0fBFntwIW+RpIaSTjGzNHfPd/fVFazvaUlXS5KZ\nNZPUO5wmd/+7uy9190J3z5f0e0k9q/Fcr5Q01d0/cPcCSXmxM919kbv/b/gc3w/XV51+paAY+8Td\nnwjH9bSkjyT1i8lMdfePw5/fswoKUwBAPUBxBQD1wzZJbapxHdWnZR5nSVoX83hdOE2SJkj6l6RX\nzGyNmd0hSeFNI25RUNRsMbM/m1mW4psuaXB4quFgScvdfZ0kmdmJ4WmHm8LTFH+j4ChWVbLKPI/Y\n8cvMzjazhWa21cx2Sbqhmv0W972uzLR1ko6Oebwp5vsvJTWtZt8AgMMcxRUA1A9LJO2TNLCKXNlb\nyH4uKTvm8THhNLn7F+5+m7sfq+DIzc+Kr61y9+nu3j1c1iXdHXdl7h8qKE4uU+lTAiXpfxQcFTrB\n3ZsrOO3Qqhi/JG2U9I0yY441XdJsSd9w9xaSHo3pt6pb6Jb9eRT3v6Ea4wIAHOEorgCgHnD3XZL+\nW9LvzGygmTU2szQzu8zM7qlk0acl3WlmR4U3ZvhvSU9Kkpn1NbPjzcwk7VZwOmCRmZ1kZheGR6P2\nSdobzqvIdEk/VXC904yY6c3CfveY2cmSbqzm031Wwc04TjGzxpJyy8xvJmm7u+8zs24KirpiWyUd\nkHRsBX3PlXSimV1jZqlmNlTSKZLmVHNsAIAjGMUVANQT7n6/gs+4ulNBEfGpghtPzKpksV9JelfS\n+5L+V9LycJoU3CziNUl7FBwZe8TdFym43mq8pH8rOEWurYKjThV5WlKOpAXu/u+Y6aMVFD5fSJos\n6ZlqPs+XFdxyfoGC0xYXlIn8WNJdZvaFgmLx2Zhlv5T0a0lvhXcpPKdM39sk9VVwM49tksZI6ltm\n3ACAeooPEQYAAACACHDkCgAAAAAiQHEFAAAAABGguAIAAACACFBcAQAAAEAEKK4AAAAAIAIUVwAA\nAAAQAYorAAAAAIgAxRUAAAAARIDiCgAAAAAiQHEFAAAAABFITfQAgPqoUaNGm/bt29cu0eM4XGVk\nZGzeu3dv+0SPA0hmbGfAthKoe+buiR4DUO+YmfO3V3NmJne3RI8DSGZsZ8C2Eqh7nBYIAAAAABGg\nuAIAAACACFBcAQAAAEAEKK4AAAAAIAIUVwAitXv3bv30pz9Vp06dlJqaKjPTihUrtGjRIpmZ8vLy\nEj1EAKhVw4cPl5kpPz+/ZFp+fr7MTMOHD0/YuADUPoorIAkVFRVp8uTJ6tmzp1q1aqW0tDS1bdtW\np512mkaOHKnZs2dLkg4cOKBjjjlGZqYPP/yw0j737t2rzMxMpaena8uWLaXmFRQUaOLEibrwwgvV\ntm1bpaenKzMzU926ddMvfvELrVmzptpjHzNmjB5++GGdeuqpGjt2rHJzc9W+fcV3As7JyZEZN7MC\n6trhvJ2pr+IVbQCSC59zBSSZoqIi9e3bV/PmzVNmZqb69Omjjh07avv27Vq9erWmT5+ujz76SP37\n91eDBg00YsQIjRs3TlOmTNH9999fYb/PPvusdu3apSFDhqht27Yl05cuXaohQ4Zow4YN6tixo3r3\n7q2srCwVFBTovffe0913360JEyZo6dKlOuOMM6oc/5w5c3TiiSfqxRdfLDW9efPmWrVqldq0aVPz\nHw6ASBzu25nD0dFHH61Vq1apRYsWiR4KgNrk7jQarY5b8KcX3xNPPOGSvEuXLr5z585y8wsKCnzB\nggUlj9evX+8pKSnepk0b379/f4X9du/e3SX5K6+8UjJt1apV3rx5c2/QoIGPHz/ev/7663LLrVmz\nxq+44gpfuHBhhX3HMjPv2bNntbLu7j179vTKfh7xhPmEv440WjK3I3k7k+yGDRvmknzt2rUJ7Zdt\nJY1W9y3hA6DR6mOr7J+eG2+80SX5Aw88UGGmrD59+rgk//Of/xx3/qpVq1ySd+7c2Q8cOFAyvVev\nXi7Jx44dW+U69u3bV+n84iKpbCsutBYuXOiSPDc3193d165dGzcfu0xF+IeBRqu6HYnbGXf3qVOn\nuiSfOnWqz5kzx88991xv3LixZ2Zm+uWXX+4ff/xxuWWKi5LVq1f7Qw895KeeeqpnZGSU29bMmzfP\nL7vsMm/durWnp6f7scce66NHj/YdO3bEHcurr77q3bt398aNG3vLli19wIABvmrVqrhFUPE2b9iw\nYeX6KSgo8PHjx/uZZ57pTZs29SZNmvjJJ5/sP/nJT3zTpk3u7hVuL7Ozsyv8WbGtpNHqvnFaIJBk\nWrduLUn6+OOPq73MqFGj9NJLL2nKlCkaOnRouflTpkyRJI0cObLk+qa1a9fqtddeU0ZGhsaMGVPl\nOho2bFjp/OHDhysnJ0fjxo1TdnZ2yUXbnTp1ipvPzMxUbm6upk2bpnXr1ik3N7dkXkXLAIjG4bqd\nifXcc8/p5Zdf1qBBg5STk6MVK1Zo5syZWrhwod5++22ddNJJ5Za5+eabtXjxYvXp00e9e/dWSkpK\nyby77rpLubm5atWqlfr27au2bdvq/fff17333qu5c+dqyZIlat68eUn+L3/5i4YOHar09HQNHTpU\nHTp00Jtvvqlzzz1Xp512WrWfx44dO3TBBRdo5cqVOumkkzRixAilp6dr9erV+uMf/6jBgwerXbt2\nys3N1axZs7Ry5UrdfPPNyszMlKSSrwCSRKKrOxqtPjZV8o7y8uXLPS0tzc3Mv/e97/nMmTM9Pz+/\nwry7e2FhoWdlZbmZ+Zo1a0rN279/vx911FGemprqGzduLJn++OOPuyQ///zzK+37YKmCI09lj1wV\n47RAGq122pG6nSk+ciXJX3zxxVLzJk6c6JL8wgsvLDW9+EhSVlZWubG7uy9YsMAl+bnnnlvuKFXx\n+m655ZaSaV988YW3atXKU1NTfdmyZaXyt9xyS8n4qnPk6uqrr3ZJfsMNN3hRUVGpebt37y512ian\nBdJoyd+4WyCQZE4//XQ9+eSTateunZ588kldfvnl6tSpk1q3bq1BgwaVu1GEJKWkpGjEiBFydz32\n2GOl5r3wwgvaunWr+vXrV+qufRs3bpQkdezYsXafEICkcyRsZy688EL17du31LSbbrpJxx13nBYs\nWKB169aVW2bMmDHq3LlzuekPPfSQJGny5MnljgQNHz5cXbt21VNPPVUy7YUXXtD27dt1zTXX6Kyz\nziqVz8vLq/ZNK7Zs2aJnnnlGHTp00L333qsGDUr/W9asWTNugAEcZiiugCR05ZVXav369Zo/f75+\n+ctfqm/fvjpw4IBmzZql/v37a9iwYXL3UsuMHDlSDRo00NSpU1VUVFQyffLkyZKkH/7wh6Xyxctz\nG3SgfjrctzM9e/YsNy0lJUXdu3eXJL333nvl5nfr1i1uX0uWLFFaWppmzJihvLy8cu2rr77S1q1b\ntW3bNknS8uXLKxxDixYt1LVr12o9h2XLlunAgQPq0aOHmjRpUq1lACQ3rrkCklRaWpouueQSXXLJ\nJZKCWyfPnDlTI0aM0OOPP65BgwZp4MCBJfns7GxdfPHFmj9/vubOnat+/fopPz9fr732mrKzs0v6\nKZaVlSVJ+uyzz+ruSQFIKofzdqZdu3ZxpxcfOdu1a1eF88ratm2bCgsLNW7cuErXuWfPHrVu3bqk\n76rGUJWdO3dKCm7TDuDIwJEr4DCRkpKiK6+8UrfeeqskacGCBeUyxe8aF7+L/Nhjj8nd9YMf/KDc\n6SbF7+6+++67cf8JAVD/HE7bmc2bN8edvmnTJkmKezpdRUfQWrRooZYtW1Z5LUV2dnapvqsaQ1WK\nT0HcsGFDtfIAkh/FFXCYadasmSSVO11Hkvr376/27dtr7ty5+vTTTzVt2rSS6yTK6ty5s3r16qV9\n+/ZpwoQJVa53//79hz74OIrv1hV7ihGAxDoctjOvv/56uWlFRUV68803JQXXlVXXOeecox07dugf\n//hHtfLFH3Qcbwy7du3SihUrqtVPt27d1KBBA73xxhsqKCioMs/2Ekh+FFdAknn66af16quv6sCB\nA+Xmbdq0qeTd4h49epSbn5qaquHDh6uoqEjXXnutPvvsM/Xu3bvCU04efvhhNW/eXL/97W913333\nqbCwsFxm/fr1uuqqq7RkyZJDfGbxFd8Sev369bXSP4DyjoTtzIIFCzRnzpxS0yZNmqTVq1frggsu\nKDnKVB3FR+pGjRqlzz//vNz8goICLV26tOTxgAED1LJlS02fPl3vvvtuqWxeXl61j9IdddRRuuqq\nq7Rx40aNHj263OuxZ8+eUn2xvQSSH9dcAUnmnXfe0YMPPqj27dure/fuJXe2Wrt2rV566SXt3btX\nAwYM0JAhQ+IuP2rUKN19991avHixpPIXmMc6+eSTNX/+fF1++eUaPXq0HnzwQV100UXKyspSQUGB\nVq5cqbfeektmpttvvz36Jyvpoosu0owZMzR48GD17t1bjRo1UnZ2tq677rpaWR+AI2M7069fPw0a\nNEiDBg3S8ccfr5UrV2ru3Llq1aqVHnnkkYP4aQTbofHjx2vs2LE64YQT1Lt3b3Xu3Fl79uzRunXr\n9Prrr6t79+6aN2+eJKlp06b6wx/+oKFDh+o73/lOqc+5+uCDD9SjRw+98cYb1Vr3pEmT9MEHH+jR\nRx/VokWLdOmllyo9PV1r167V/PnzNXv2bOXk5JSMc8KECRo1apSGDBmipk2bKjMzUzfddNNBPV8A\ntSjR94Kn0epjUyWfP7N+/XqfNGmSDxw40E888URv1qyZp6Wlefv27f2yyy7zJ554otxnoZTVq1cv\nl+QdO3b0wsLCSrPuwWe23H///Z6Tk1PyWTXNmzf3M844w++44464nwtTER3k51wVFhb62LFjvXPn\nzp6amlrh8mXX4UnwOtJoydyO1O1M8edOTZ061V988UU/55xzvHHjxt6iRQsfPHiw//Of/yy3THU/\nH2rx4sV+xRVXeIcOHTwtLc3btGnjXbp08VtvvbXc51m5u7/yyit+/vnne6NGjTwzM9P79+/vq1at\niru+ij7nyt19z549/qtf/cpPPfVUb9SokTdt2tS/+c1v+s033+ybN28ulb3vvvv85JNP9vT0dJfk\n2dnZFT4ftpU0Wt03cy9/PjWA2mVmzt9ezZmZ3J17yAOVOFK3M9OmTdP3v/99TZ06VcOHD0/0cJIa\n20qg7nHNFQAAAABEgOIKAAAAACJAcQUAAAAAEeCaKyABjtRrIeoK1xEAVWM7A7aVQN3jyBUAAAAA\nRIDiCgAAAAAiQHEFAAAAABGguAIAAACACFBcAQAAAEAEUhM9AKA+ysjI2Gxm7RI9jsNVRkbG5kSP\nAUh2bGfAthKoe9yKHUApZjZO0k53fyDRYwGAmjCzoyVNkHSvuy9P9HgA1B8UVwBKmFkjSV9K+sLd\nmyd6PABQE2Z2g6T/kfSwu/800eMBUH9wzRWAWMXbhJSEjgIADk1Kma8AUCcorgAAAAAgAhRXAAAA\nABABiisAAAAAiADFFQAAAABEgOIKAAAAACJAcQUAAAAAEaC4AgAAAIAIUFwBAAAAQAQorgAAAAAg\nAhRXAAAAABABiisAAAAAiADFFQAAAABEgOIKAAAAACJAcQUAAAAAEaC4AgAAAIAIUFwBAAAAQAQo\nrgAAAAAgAhRXAAAAABCB1Nro1MxukPSd2ugbQK1qEn5tbGZPJXQkAGrqJXefXpMFzay1pHGSWkY7\npDp3Tfj1x2aWmdCRHLq9ku52908SPRAAVTN3j7ZDswxJBeKoGAAAibDT3WtUHJnZKEl/iHg8OHQT\n3f3WRA8CQNVq48hVioLC6itJI2qhfwC16xRJ+yWtTvRAAByUDElTJKUfQh/Fy/5V0tRDHlHipErq\nLulvkr5M8FgOxcWShunQXlMAdahWTgsMFbo7pxUBAFAHzKyJguIqCv88Avbhf0r0AA5VeErjsESP\nA0D1ceoeAAAAAESgNo5cfSVpt6TttdA3AACIL4r979bw65ZDHw4iwOsBHGYiv6GFJJnZKZL2uzvX\nbAAAUEcOdf9rZiXXKrn74Xyt0hGB1wM4/NRKcQUAAAAA9Q3XXAEAAABABCiuAAAAACACFFcAAAAA\nEAGKKwAAAACIAMUVAAAAAESA4goAAAAAIkBxBQAAAAARoLgCAAAAgAhQXAEAAABABCiuAAAAACAC\nFFcAAAAAEAGKKwAAAACIAMUVAAAAAESA4goAAAAAIkBxBQAAAAARoLgCAAAAgAhQXAEAAABABCiu\nAAAAACACFFcAAAAAEAGKKwAAAACIAMUVAAAAAEQgNdEDAI40jRo12rRv3752iR5HfZeRkbF57969\n7RM9DgAAUH+Yuyd6DMARxcycv6vEMzO5uyV6HAAAoP7gtEAAAAAAiADFFQAAAABEgOIKAAAAACJA\ncQUAAAAAEaC4AuqhadOmycw0bdq0hI7j3Xff1cUXX6w2bdrIzNS1a1dJ0vDhw2Vmys/PT+j4AAAA\nDga3YgeQELt371afPn20b98+XXfddWrTpo3at6/4zun5+fnq3Lmzhg0blvCiEAAAIB6KKwAJ8be/\n/U1btmzRr3/9a/385z8vNe+3v/2t7rjjDh199NEJGh0AAMDBo7gCkBCff/65JCkrK6vcvA4dOqhD\nhw51PSQAAIBDwjVXQB2bPXu2LrroInXo0EENGzZUVlaWevbsqUceeaRcdvv27frFL36h//iP/1Dj\nxo3VokULdenSRXfccYcKCgpKcn//+9918803q0uXLmrVqpUyMjJ0wgkn6LbbbtOOHTsOanyfffaZ\nbrrpJh177LFq2LChWrdurf79+2vZsmXlsnl5eTIzLVq0SNOnT9fZZ5+tpk2bqlOnThX2n5+fLzPT\nsGHDJEnf//73ZWalrgEre81VXl6eOnfuLEn605/+VJJPhuvGAAAAinHkCqhDf/jDH/SjH/1I7du3\nV79+/dSmTRtt2bJF77//vqZOnaof//jHJdm1a9fqggsu0Lp163TmmWfqxhtv1IEDB/Txxx/rgQce\n0A033KAmTZpIkiZPnqznn39ePXv2VK9evVRUVKTly5fr/vvv18svv6x33nlHzZo1q3J8y5cv1yWX\nXKLt27fr0ksv1eDBg/Xvf/9bs2bNUvfu3fX888+rd+/e5Za777779Oqrr6pfv3664IILtGvXrgrX\nkZmZqdzcXK1YsUIvvPCCBgwYUHIji+KvZeXk5Gjnzp168MEH1aVLFw0cOLBkXkXLAAAA1Dl3p9Fo\nEbbgzyq+M844w9PT033z5s3l5m3durXU4/POO88l+W9+85u42b1795Y8zs/P98LCwnK5KVOmuCQf\nP358qelTp051ST516tSSaV9//bUfd9xx3rBhQ1+0aFGp/IYNGzwrK8vbt2/v+/btK5mem5vrkrxx\n48a+fPnyCp93PPHGUGzYsGEuydeuXVsybe3atS7Jhw0bVq3+w9ch4b8PNBqNRqPR6k/jtECgjqWm\npiotLa3c9DZt2pR8//e//11vv/22unbtqttvvz1uNiMjo+Rxdna2UlJSyuVGjBih5s2ba/78+VWO\n66WXXtLq1av1k5/8RD179iw1LysrS2PGjNGmTZv017/+tdyyP/zhD3X66adXuQ4AAIAjGacFAnXo\n2muv1W233aZvfetbGjp0qHr27Knzzz9fRx11VKnc0qVLJUmXXnqpGjSo+j2Qr7/+Wr///e/15z//\nWR9++KF27dqlAwcOlMzfsGFDlX0sWbJEkrRu3Trl5eWVm//JJ59IklatWlXu1MBu3bpV2T8AAMCR\njuIKqEM/+9nP1KZNGz3yyCN66KGHNHHiRJmZevbsqQkTJuiss86SJO3cuVOSqn0r8qFDh+r555/X\nscceqwEDBqh9+/Zq2LChJGnixInav39/lX1s27ZNkjRjxoxKc3v27Ck3rbLPpwIAAKgvKK6AOnb9\n9dfr+uuv186dO/X222/r+eef1x//+EddeumlWrVqldq2bavMzExJ1Tvi9O677+r5559Xr169NHfu\n3FKnHB44cED33HNPtcbVokULSdILL7yg/v37H9RzMrODygMAAByJuOYKSJDMzEz17t1bkydP1vDh\nw7V9+3YtXrxYknTOOedIkubPn1/q9L54/vWvf0mS+vfvX+5arr/97W/au3dvtcZTvM7iMSSb4mvK\nioqKEjwSAACA+CiugDo0b948FRYWlpu+ZcsWSVLjxo0lSWeeeabOO+88rVixQnfffXe5/LZt27Rv\n3z5JKvlMqUWLFpXr8z//8z+rPbYBAwbouOOO0+9+9zvNnTs3bmbJkiX68ssvq91nlFq2bCkz0/r1\n6xOyfgAAgKpwWiBQh6666iplZGSoe/fu6tSpk9xdixcv1rJly3TmmWeqV69eJdknn3xSOTk5+vnP\nf66ZM2cqJydH7q5PPvlEr7zyij766CN16tRJ3/72t3X++efrueee03nnnafu3btr8+bNevnll3XS\nSScpKyurWmNLS0vTc889p0svvVR9+vTReeedp65du6px48b69NNPtWzZMq1Zs0YbN24sKQLrUtOm\nTXX22Wdr8eLFuvbaa3XiiScqJSVF/fv312mnnVbn4wEAACiL4gqoQ+PHj9f8+fO1fPlyzZ07VxkZ\nGcrOztbdd9+tG2+8sdRpfZ07d9by5ct1zz33aNasWZo0aZIyMjLUqVMn3XbbbWrbtq2k4HS52bNn\n684779TcuXP10EMP6eijj9bIkSN155136pRTTqn2+E477TStXLlS999/v+bMmaOpU6eqQYMG6tCh\ng04//XSNGzeu1C3j69oTTzyhW2+9VfPmzdPTTz8td1fHjh0prgAAQFIwd0/0GIAjipk5f1eJZ2Zy\nd+60AQAA6gzXXAEAAABABCiuAAAAACACFFcAAAAAEAGKKwAAAACIAMUVAAAAAESA4goAAAAAIkBx\nBQAAAAARoLgCAAAAgAhQXAEAAABABFITPQDgSJORkbHZzNolehz1XUZGxuZEjwEAANQv5u6JHgOA\nJGJmgyXtcfdXEj0WAACAwwnFFYASZtZQ0heSdrp720SPBwAA4HDCNVcAYqVKSpPUJNEDAQAAONxQ\nXAEAAABABCiuAAAAACACFFcAAAAAEAGKKwAAAACIAMUVAAAAAESA4goAAAAAIpBaG52a2W8k9ayN\nvgHUquJbsDc2s7cSOhIANTXL3SckehAAUB9F/iHCZtZYUkGknQIAgOra4+7NEj0IAKiPauPIlYVf\n90u6qBb6B1C7Wkv6WtLuRA8EwEFpJOlVcco/ACRMrZwWGCpyd04rAgCgDphZk6pTAIDaxLtbAAAA\nABCB2jhytV/SZknbaqFvAAAQH/tfAEiwyG9oIUlm1lrS1+7ONRsAANQR9r8AkFi1UlwBAAAAQH3D\nNVcAAAAAEAGKKwAAAACIAMUVAAAAAESA4goAAAAAIkBxBQAAAAARoLgCAAAAgAhQXAEAAABABCiu\nAAAAACACFFcAAAAAEAGKKwAAAACIAMUVAAAAAESA4goAAAAAIkBxBQAAAAARoLgCAAAAgAhQXAEA\nAABABCiuAAAAACACqTVZqFGjRpv27dvXLurBAACAaGVkZGzeu3dv+9hpaWlpmwoLC9mPH6FSU1M3\nf/311yWvOa83UD1l/3Zqwtz94Bcy84ULF+qKK67QjBkzlJOTU+UyixYtIk+ePHny5MnXcd7M5O5W\nZprn5eXFza9du1YzZszQFVdcoc6dO1fZP/nky+fl5ZV6zSt7vQH8n7J/OzVR49MCk2nHQZ48efLk\nyZOPnz8Yh0PhQL7meQC1r8bFVTLtOMiTJ0+ePHny8fPVlWyFAPlo8wDqRo2Lq2TacZAnT548efLk\n4+erI9kKAfLR5gHUnRoXV1VJ5h0NefLkyZMnTz6QbIUA+WjzAOpWrRRXybbjIE+ePHny5MmXl2yF\nAPlo8wDqXuTFVbLtOMiTJ0+ePHny5SVbIUA+2jyAxIi0uEq2HQd58uTJkydPPr5kKgTIR5sHkDiR\nFVfJtuMgT548efLkyVcsWQoB8tHnASROJMVVsu04yJMnT548efKVS5ZCgHz0eQCJc8jFVbLtOMiT\nJ0+ePHlbANEXAAAgAElEQVTyhy5ZCwfyVecBJM4hFVfJtuMgT548efLkyR+6ZC4cyB96HkDtqXFx\nlWw7DvLkyZMnT578oUu2QoB8tHkAtavGxVUy7TjIkydPnjx58vHzByPZCgHy0eYB1L4aF1fJtOMg\nT548efLkycfPV1eyFQLko80DqBs1Lq6SacdBnjx58uTJk4+fr45kKwTIR5sHUHdqXFxVJZl3NOTJ\nkydPnjz5QLIVAuSjzQOoW7VSXCXbjoM8efLkyZMnX16yFQLko80DqHuRF1fJtuMgT548efLkyZeX\nbIUA+WjzABIj0uIq2XYc5MmTJ0+ePPn4kqkQIB9tHkDiRFZcJduOgzx58uTJkydfsWQpBMhHnweQ\nOJEUV8m24yBPnjx58uTJVy5ZCgHy0ecBJM4hF1fJtuMgT548efLkyR+6ZC0cyFedB5A4h1RcJduO\ngzx58uTJkyd/6JK5cCB/6HkAtafGxVWy7TjIkydPnjx58ocu2QoB8tHmAdSuGhdXybTjIE+ePHny\n5MnHzx+MZCsEyEebB1D7alxcJdOOgzx58uTJkycfP19dyVYIkI82D6Bu1Li4SqYdB3ny5MmTJ08+\nfr46kq0QIB9tHkDdqXFxVZVk3tGQJ0+ePHny5APJVgiQjzYPoG7VSnGVbDsO8uTJkydPnnx5yVYI\nkI82D6DuRV5cJduOgzx58uTJkydfXrIVAuSjzQNIjEiLq2TbcZAnT548efLk40umQoB8tHkAiWPu\nftALNWrUaNO+ffva1cJ4AABAhDIyMjbv3bu3fey01NTUbUVFRa0SNSbUrpSUlO2FhYWtix+npaVt\nKiws5P82oAqpqambv/766/ZVJytWo+IKQOKY2SJJH7j7TQexTL6kSe5+b22NK1xPA0n/I2mIpFaS\nLnD3RVUs00nSWknfdvd3K8icJWmZpM7unh/diAHUpbLbr+psz8zsA0l/cfe8KNddW8ysvaTHJZ0v\nqbG7WzWWGa5gG920ksxoSTe5e6eIhgqgFqQmegDAka4WduiDJX19kMt8W1JBROuvTG9J35eUI2mN\npO11sE4Ah6+abM8qVUmhEvm6KjBaUpakrpK+qIP1oZbURkFuZjmSFko6yt3/HVW/iVoPyqO4ApKE\nmaW5e5U7fnc/6ILF3bfWbFQH7XhJG9397TpaH4DDWE22Z4fBuo6X9Hd3/6SO1gcgiUR+t0AA/8fM\npknqKek/zczD1snMcsLve5vZ38zsK0mXmtlxZvaCmW0yswIzW25mfcv0ucjMJsU8zjezO83s92a2\n28w+M7P/KrNMfnhKSfFjN7MfmtmMcD1rzOx7ZZY5O1z/PjN7Lxyrh++GVfRcH5B0TJjLD6c3NLOJ\nZrY57GupmXWv4uf2XTP7KMwvlnRimfktzOwJM9sSZtaY2S2V9Qmg5szsR+HfcGqZ6dPN7IXw+yq3\nX3H6Lbs9axv2sdfM1pnZiDjL/MzM3g/XscHMpphZZjgvR9JUSU1itrl5FayrpZn9ycx2hOt7zcy+\nFTN/uJntMbOLzOyDcH0LzazCO0qE270Bkq4P1z0tnH6MmT1vZl+E7Tkz61jFz2ZM+LPcY2aPS6rw\nlEFEr6L9dzjvFDN7KXwtt5jZ0xacDlq87Klm9tdwn/yFma00swvC5ReGsa2xvyNx1p9mZg+Z2edm\ntt/MPjWz8THz083s7nCfX2Bmy8zs0nBetdeD6FFcAbXrZklLFOzsO4Tt05j5d0u6U9LJkt5RsPN8\nWdLFkrpIminpOTM7uYr13CrpfyWdEfZ5j5mdW8Uy/y3phXA9z0j6o5llS5KZNZU0R9JHks6UNEbS\nhGo817skfRY+z2+H0++RNFTSCEmnh+OcZ2Yd4nViZt+QNEvSqwpOq3k47CPWrySdKqmvgp/dCEkb\nqhgfgJp7VlKmpF7FE8ysiYJC4slwUk23X7GmKTjy00vSQEnXS+pUJnNA0i2SviXpGkndFGwnJOnt\ncN6X+r9tbkXXmk6TdHb4HLqFy8wzs0YxmYaSxirYxpyr4GfwaCXj/7ak1xT8vDpIutnMTME2rZ2k\nCyVdoOC0wVnhvHLM7EoF27lcBdv1f0r6WSXrRfTi7r/Dfdcbkj5Q8HvTS8Hv/mwLrjuWpOmSNobz\nT5eUJ2mfgv3/5WHmW2GfN1ew/p9KGiTpKkknKNiP/jNm/lQFxd81CvaHf5L0opl1Ocj1IGruTqPR\narFJWqTg/P/YaTmSXNLl1Vh+qaQ7K+pPUr6kp8ss80mZZfIljY557JJ+G/M4VcE/Ft8LH/9IwfVS\njWIy14TL5VQy1tGS8mMeN5H0laTrY6alSFot6Vfh405hv2eFj38j6WOFN9wJp90ZZjqFj2dLmpro\n15ZGq09N0vOSnoh5/D1JuyRlVLJMVduvkscKjlC7pPNj5mdLKpKUV8k6vitpv6QG4ePhkvbEycWu\n64RwXT1i5rcIn8/ImH5c0kkxmWvDbVqDSsYzR9K0mMcXh8+hU8y0YxUUib3ijVlBkTi5TL+vxW5f\naXXyO1/q9zWcdpekv5aZ1jL8XekWPt4taVgFfeaE2TZVrPshSX+N3RfGzDsu/P05psz0WZIeOZj1\n0KJvHLkCEqvU3fHMrImZ3WNmH4anquyRdJakY6ro5/0yjz+X1La6y7h7oaStMcucrOAi3r0x+Xeq\n6C+e4ySlSXorZl1FCt4NPKWCZb4paamHe4fQkjKZ/5F0ZXiqxb1m1rMGYwNwcJ6UNNDMGoePr1Vw\nF7990iFtv4p9U8E/jH8rnuDu6xRsz0qY2YVm9mp4OtQXkp6TlC7pYG6fXLyukm2Lu+9ScGQ9dtu0\n391jjxZ8rmCblnmQ6/rcY+506u5rwr4q2w6W3e6VfYzEOFNSj/B0zT3h73nxGSnHhV/vlzTFzBaY\n2S8O8uhtsWkKzt742Mx+Z2Z9Yo6MnSHJJH1YZhx9YsaABKG4AhKr7B387pV0haRfKjjc31XBPxrp\nVfRT9kYYrqr/vitbxsLHh6r4lJd4fVXUf5W3LXb3lxW8o32vpDaSXjKzqTUaIYDqmiOpUNIAM2ur\n4HSoJ2Pm13T7Vaw6tyzPlvSSpFXhus5UcMqeDmI9Va0rdttUWMG8g/n/qbLtKZ+Hc/hpoOB3sGuZ\ndoKCvxF58LEBpyg4knSepPfjXT9YGXdfruDMjp+H6/yTpFfDAquBgt+db5cZwzf1f38PSBCKK6D2\nfaXgVLjq6C7pcXef6e7vK7h+KRHvQq2SdGqZaw+61aCffyl4/iU3sDCzFAXXLnxYwTIfSjq7zLUI\n55QNufu/3f0Jdx8u6QeShplZwxqMEUA1uPt+SX9RcMRqqKRNkl6PiRzq9muVgv9Liq/XlJkdo+D6\npGJnKSiibnX3Je7+cZn5UvW2uR+G6yq5NtXMmiu4dqWibVNNfSjp6OKbIYTrOlbBuCta1yqV3+6V\n2w6i1sX7XVqu4Dqmde7+rzKt5Nb77v6Juz/k7n0kPSZpZEyfitNvOe7+hbvPcPcbFRyVulDBNYnv\nKSja28cZQ/H1x9VeD6JFcQXUvnxJ3Sy4S2CbmMP68XwsaZCZnWFmpyp4VzijLgZZxlMKrhGYHN4V\nqZeCd8+kg3in1d0LFJzCN96Cuw1+M3zcTtIjFSz2qIJ36yaa2UlmNkTSDbEBM7vLzAaa2Qlhn4Ml\nrQn/+QNQe56UdKmCv8np7n4gZt4hbb/C0+/mSfq9mZ1rZl0VnBoVe3ryJwr+d7nFzDqb2dUKbmAR\nK19ShpldHG5zG5eZLw9uk/5CuK7vxIx3t4KbEUTpNUkrJT1lZmda8KHoTyn4J31BBcs8qOANo1Hh\ndm6sgptvoG7lq/z++3cKrs97xoK76h5rZr3M7A9m1szMGoWn8eWEy52t4I2H4kJ6nYL9aB8zOyq8\ngVQ5FtwV82oz+6aZHa/guufdkj4L31R4StI0MxsSjuEsMxttZoMPZj2IHsUVUPvuVfAO0ocKrmuq\n7PqDn0naImmxgrtuLQ2/r1PuvkdSPwXvzr2n4E6BeeHsfQfZ3e0K7pw1VdIKSadJ+q67b6xg3esV\nFEvfVfAPya2S7igT2y/p1+H8tyQ1C8cLoHa9oeDOnKeo9CmBUjTbr+GS1iooOl5UUOjkF88Mj4jd\nHK7rQwVHA0bHduDB5+w9KulpBdvcMRWs6/sKTlucHX5trGDbtLeCfI2E148ODMeySMEtsjdJGljm\n2tLYZZ5RsM39tYJt8KkKruNB3Sq3/3b3zyWdr+CavXmS/qGg4NoftiIFN7j4k4K7+z2v4Hq5n0lS\neGQpV8Fru1nSJMX3haT/UvC7uVzBaX+XufuX4fzvK9iv3qPgzr5zJPVQUFQdzHoQMavg7xoASjGz\nAQp2Em2dT3sHAAAoJ7XqCID6yMyGSVqj4C5I/yFpoqQXKawAAADio7gCUJF2ksYp+PDBTQrujnR7\nQkcEAACQxDgtEAAAAAAiwA0tAAAAACACFFcAAAAAEAGKKwAAAACIAMUVAAAAAESA4goAAAAAIkBx\nBQAAAAARoLgCAAAAgAhQXAEAAABABCiuAAAAACACFFcAAAAAEAGKKwAAAACIAMUVAAAAAESA4goA\nAAAAIkBxBQAAAAARoLgCAAAAgAhQXAEAAABABCiuAAAAACACFFcAAAAAEAGKKwAAAACIAMUVAAAA\nAESA4goAAAAAIkBxBQAAAAARoLgCAAAAgAhQXAEAAABABCiuAAAAACACFFcAAAAAEAGKKwAAAACI\nAMUVAAAAAESA4goAAAAAIkBxBQCol8xsvJlNCb8/0cx21rCfcWY2KdrRAQAORxRXAHAEMLM9Me2A\nme2NeXztIfS71My+F+VYw35vMLPXou63ptz9Y3fPrCpnZt81s3+VWTbX3W+qvdEBAA4XqYkeAADg\n0Ll70+LvzSxf0kh3T5ripbaZWaq7FyZ6HACA+o0jVwBQD5hZipn90szWmNm/zewpM8sM5zUxsz+b\n2XYz22lm75hZSzO7T9K3JU0Jj4DdF6ffuMuG81qZ2eNmtsnMPjWzXDNrYGanS5ooKSfsd1MFY15q\nZv/PzP5uZrvMbKaZtQjnnWxmhWY2ysw+lTQ3nP6dcAw7zWy5mZ0f09/xZvaWmX1hZi9Lahkz72Qz\nK4x53CZm7DvM7Bkzay3peUnHxhwVbB17emG47OVm9mE4htfM7ISYeZvM7FYz+yB8Tk+ZWXpNXlMA\nQPKhuAKA+uG/JF0iqbukjpK+lvRAOG+kgjMZjpbURtJNkr5y99skLVNwFKxp+LisuMuG856StEvS\nsZK6SRoo6Tp3f0/SLZIWhf22r2Tc10u6Nuw/XVJsgZci6WxJJ0kaYGadJM2S9AtJrSTdKWlWcbEn\n6VlJb0hqLeleSddVst5nJJmkkyW1k/Q7d98maZCkNeG4m4bTSpjZf0iaJunHktpKel3SbDOLPVNk\niKSLJB0fjv+aSsYBADiMUFwBQP3wI0l3uPvn7r5P0jhJQ83MFBRaR0k6zt0L3X2ZuxdUs9+4y5pZ\ntqQekn7m7l+6+0ZJD0m66iDHPdXdP3L3PZJyJV1dZv5/h/3vlTRM0nPu/pq7H3D3uZI+lHSJmZ0o\n6RRJ49z9K3f/q6R58VZoZp0lfUfSj919Z5h/o5rjvVrS8+6+yN2/kvQbBUXnWTGZB9x9s7tvVXDE\nrWs1+wYAJDmuuQKAI1xYQH1D0lwz85hZDRQcxXlMUntJfzGzppIel/RLdy+qRvdxl5WULSlD0tZg\n9SXr+1e8Tirxacz36yQ1Lj41UNIBd/88Zn62pKvN7IqYaWmSsiRtlrQ1LCxj+2sWZ53fkLTF3b84\nyLEqXNe64gfuXmRmGxQceSsWexrklwqKLwDAEYDiCgCOcO7u4T/4g9397xXE/lvSf5vZsZLmS/qH\ngtP6vIJ8cd/7K1j2bUl7JLV093h9VNpvjG/EfH+MpC/dfZeZdYjTx6eSprj7T8p2YmYnSWpjZhkx\nBdYxknbEWeenktqaWdPwiNnBjPtzBUVe8XpTFBRWG6pYDgBwBOC0QACoHx6VNN7MviFJZtbWzPqF\n3/cys1PMrIGk3ZIKJRUftdqs4JqpuCpa1t3XSloq6R4zaxbeyOIEM+se0+83zCytinEPt+AzqJpK\nylNwLVRF/iTpCjO7KLyBR6Pw+/aSPpb0kaRfmlm6mV0g6bvxOgnH/oakSWbWIsz3iBl323A88Twj\naZCZ9Qif2x2Stkl6t4rnCQA4AlBcAUD9cI+k1yQtMLMvFBxZOiOcd7SkFyR9IekDBdcBPRvOe0DS\n9eEd8+6J029ly14tKVNBUbNdQeHRLpw3T1K+pC1m9lkl435C0tMKjvwckBTvphqSJHdfI+lyBdeT\n/VvB6Xk3S2oQHj0bKumCcCxjJD1ZyXqvVnBK4ScKTuO7MZy+UtJsSevCuwG2KjOG9yX9QNLvJW1V\ncOOKAdwmHgDqB4t/tgYAAIllZkslTXL3yoogAACSBkeuAAAAACACFFcAAAAAEAFOCwQAAACACHDk\nCgAAAAAiQHEFAAAAABGguAIAAACACFBcAQAAAEAEKK4AAAAAIAIUVwAAAAAQAYorAAAAAIhAaqIH\nAKD+aNSo0aZ9+/a1S/Q4DlcZGRmb9+7d2z7R4wCAqLBfwJG2b+NDhAHUGTNztjk1Z2Zyd0v0OAAg\nKuwXcKTt2zgtEAAAAAAiQHEFAAAAABGguAIAAACACFBcAQAAAEAEKK4AIAK7d+/WT3/6U3Xq1Emp\nqakyM61YsUKLFi2SmSkvLy/RQwQAoMTw4cNlZsrPzy+Zlp+fLzPT8OHDEzauwx3FFYCkUVRUpMmT\nJ6tnz55q1aqV0tLS1LZtW5122mkaOXKkZs+eLUk6cOCAjjnmGJmZPvzww0r73Lt3rzIzM5Wenq4t\nW7aUmldQUKCJEyfqwgsvVNu2bZWenq7MzEx169ZNv/jFL7RmzZpqj33MmDF6+OGHdeqpp2rs2LHK\nzc1V+/YV31k2JydHZkfMzZEAoFYczvuF+ipe0Vaf8DlXAJJCUVGR+vbtq3nz5ikzM1N9+vRRx44d\ntX37dq1evVrTp0/XRx99pP79+6tBgwYaMWKExo0bpylTpuj++++vsN9nn31Wu3bt0pAhQ9S2bduS\n6UuXLtWQIUO0YcMGdezYUb1791ZWVpYKCgr03nvv6e6779aECRO0dOlSnXHGGVWOf86cOTrxxBP1\n4osvlprevHlzrVq1Sm3atKn5DwcA6qHDfb9wODr66KO1atUqtWjRItFDOXy5O41Go9VJCzY58T3x\nxBMuybt06eI7d+4sN7+goMAXLFhQ8nj9+vWekpLibdq08f3791fYb/fu3V2Sv/LKKyXTVq1a5c2b\nN/cGDRr4+PHj/euvvy633Jo1a/yKK67whQsXVth3LDPznj17Vivr7t6zZ0+v7OcRT5hP+OtIo9Fo\nUbUjeb+Q7IYNG+aSfO3atQnt90jbtyV8ADQarf60ynaiN954o0vyBx54oMJMWX369HFJ/uc//znu\n/FWrVrkk79y5sx84cKBkeq9evVySjx07tsp17Nu3r9L5xUVS2VZcaC1cuNAleW5urru7r127Nm4+\ndpmKHGk7IBqNRjsS9wvu7lOnTnVJPnXqVJ8zZ46fe+653rhxY8/MzPTLL7/cP/7443LLFBclq1ev\n9oceeshPPfVUz8jIKLdvmDdvnl922WXeunVrT09P92OPPdZHjx7tO3bsiDuWV1991bt37+6NGzf2\nli1b+oABA3zVqlVxi6DifdSwYcPK9VNQUODjx4/3M88805s2bepNmjTxk08+2X/yk5/4pk2b3N0r\n3L9lZ2dX+LM60vZtnBYIICm0bt1akvTxxx9Xe5lRo0bppZde0pQpUzR06NBy86dMmSJJGjlyZMn1\nTWvXrtVrr72mjIwMjRkzpsp1NGzYsNL5w4cPV05OjsaNG6fs7OySi4A7deoUN5+Zmanc3FxNmzZN\n69atU25ubsm8ipYBgProcN0vxHruuef08ssva9CgQcrJydGKFSs0c+ZMLVy4UG+//bZOOumkcsvc\nfPPNWrx4sfr06aPevXsrJSWlZN5dd92l3NxctWrVSn379lXbtm31/vvv695779XcuXO1ZMkSNW/e\nvCT/l7/8RUOHDlV6erqGDh2qDh066M0339S5556r0047rdrPY8eOHbrgggu0cuVKnXTSSRoxYoTS\n09O1evVq/fGPf9TgwYPVrl075ebmatasWVq5cqVuvvlmZWZmSlLJ13oh0dUdjUarP02VvEO5fPly\nT0tLczPz733vez5z5kzPz8+vMO/uXlhY6FlZWW5mvmbNmlLz9u/f70cddZSnpqb6xo0bS6Y//vjj\nLsnPP//8Svs+WKrgyFPZI1fFOC2QRqPRjtz9QvGRK0n+4osvlpo3ceJEl+QXXnhhqenFR5KysrLK\njd3dfcGCBS7Jzz333HJHqYrXd8stt5RM++KLL7xVq1aemprqy5YtK5W/5ZZbSsZXnSNXV199tUvy\nG264wYuKikrN2717d6nTNuv7aYHcLRBAUjj99NP15JNPql27dnryySd1+eWXq1OnTmrdurUGDRpU\n7kYRkpSSkqIRI0bI3fXYY4+VmvfCCy9o69at6tevX6m79m3cuFGS1LFjx9p9QgCAQ3Ik7BcuvPBC\n9e3bt9S0m266Sccdd5wWLFigdevWlVtmzJgx6ty5c7npDz30kCRp8uTJ5Y4EDR8+XF27dtVTTz1V\nMu2FF17Q9u3bdc011+iss84qlc/Ly6v2TSu2bNmiZ555Rh06dNC9996rBg1Klw/NmjXjBhgxKK4A\nJI0rr7xS69ev1/z58/XLX/5Sffv21YEDBzRr1iz1799fw4YNK36ns8TIkSPVoEEDTZ06VUVFRSXT\nJ0+eLEn64Q9/WCpfvDy3QQeA5He47xd69uxZblpKSoq6d+8uSXrvvffKze/WrVvcvpYsWaK0tDTN\nmDFDeXl55dpXX32lrVu3atu2bZKk5cuXVziGFi1aqGvXrtV6DsuWLdOBAwfUo0cPNWnSpFrL1Gdc\ncwUgqaSlpemSSy7RJZdcIim4Fe/MmTM1YsQIPf744xo0aJAGDhxYks/OztbFF1+s+fPna+7cuerX\nr5/y8/P12muvKTs7u6SfYllZWZKkzz77rO6eFACgxg7n/UK7du3iTi8+crZr164K55W1bds2FRYW\naty4cZWuc8+ePWrdunVJ31WNoSo7d+6UFNymHVXjyBWApJaSkqIrr7xSt956qyRpwYIF5TLF70IW\nvyv52GOPyd31gx/8oNzpC8XvFr777rtxd2oAgOR2OO0XNm/eHHf6pk2bJCnu6XQVHUFr0aKFWrZs\nWeU1P9nZ2aX6rmoMVSk+BXHDhg3Vytd3FFcADgvNmjWTpHKnf0hS//791b59e82dO1effvqppk2b\nVnLefVmdO3dWr169tG/fPk2YMKHK9e7fv//QBx9H8d2fYk9ZAQBU3+GwX3j99dfLTSsqKtKbb74p\nKbiurLrOOecc7dixQ//4xz+qlS/+oON4Y9i1a5dWrFhRrX66deumBg0a6I033lBBQUGV+fq+f6O4\nApAUnn76ab366qs6cOBAuXmbNm0qefexR48e5eanpqZq+PDhKioq0rXXXqvPPvtMvXv3rvAUhocf\nfljNmzfXb3/7W913330qLCwsl1m/fr2uuuoqLVmy5BCfWXzFtxhev359rfQPAIe7I2G/sGDBAs2Z\nM6fUtEmTJmn16tW64IILSo4yVUfxkbpRo0bp888/Lze/oKBAS5cuLXk8YMAAtWzZUtOnT9e7775b\nKpuXl1fto3RHHXWUrrrqKm3cuFGjR48u93rs2bOnVF/1ff/GNVcAksI777yjBx98UO3bt1f37t1L\n7pS0du1avfTSS9q7d68GDBigIUOGxF1+1KhRuvvuu7V48WJJ5S9YjnXyySdr/vz5uvzyyzV69Gg9\n+OCDuuiii5SVlaWCggKtXLlSb731lsxMt99+e/RPVtJFF12kGTNmaPDgwerdu7caNWqk7OxsXXfd\ndbWyPgA43BwJ+4V+/fpp0KBBGjRokI4//nitXLlSc+fOVatWrfTII48cxE8j2G+MHz9eY8eO1Qkn\nnKDevXurc+fO2rNnj9atW6fXX39d3bt317x58yRJTZs21R/+8AcNHTpU3/nOd0p9ztUHH3ygHj16\n6I033qjWuidNmqQPPvhAjz76qBYtWqRLL71U6enpWrt2rebPn6/Zs2crJyenZJwTJkzQqFGjNGTI\nEDVt2lSZmZm66aabDur5HrYSfS94Go1Wf5oq+TyT9evX+6RJk3zgwIF+4oknerNmzTwtLc3bt2/v\nl112mT/xxBPlPlujrF69erkk79ixoxcWFlaadQ8+A+T+++/3nJycks8+ad68uZ9xxhl+xx13xP2c\nkYroID/nqrCw0MeOHeudO3f21NTUCpcvuw5PgteRRqPRompH6n6h+HOnpk6d6i+++KKfc8453rhx\nY2/RooUPHjzY//nPf5ZbprqfD7V48WK/4oorvEOHDp6WluZt2rTxLl26+K233lru86z+f3v3Hh5V\nda9x/P1BgAlijW3EEK0EqPcqICgtcJpQsfRIBWtrbUslYK2XXg5VrFLLOaZ9tAr1Qqly6uU0WK22\nh0qAKoKXlpanoqCpelqwohAqKEGlqCChXNb5Y+/EyWQmk0zWzGyS7+d59pPM7HfvtZJM9prfzNp7\nnHPusccec6NGjXKFhYWuqKjITZgwwa1bty5pe6k+58o553bu3Omuv/56d8opp7jCwkLXp08fd+KJ\nJ7pp06a5+vr6ZtlbbrnFnXDCCa5nz55Okuvfv3/Kn6ezjW0W/EwAkH1m5jjmZM7M5JzjGvIAOo3O\nOi7Mnz9fU6dOVXV1taZMmZLv7kRaZxvbOOcKAAAAADyguAIAAAAADyiuAAAAAMADzrkCkDOddW59\nrnS2eekAwLiAzja28c4VAAAAAHhAcQUAAAAAHlBcAQAAAIAHFFcAAAAA4AHFFQAAAAB4UJDvDgDo\nOiJUtywAABpTSURBVGKxWL2ZHZnvfhysYrFYfb77AAA+MS6gs41tXIodACSZ2W2Shko60zm3P9/9\nAQB0bmZ2pqR5ki50zq3Od3/gB9MCASBwvqRySf3y3REAQJdwlqTjJH063x2BPxRXAAAAAOABxRUA\nAAAAeEBxBQAAAAAeUFwBAAAAgAcUVwAAAADgAcUVAAAAAHhAcQUAAAAAHlBcAQAAAIAHFFcAAAAA\n4AHFFQAAAAB4QHEFAAAAAB5QXAEAAACABxRXAAAAAOABxRUAAAAAeEBxBQAAAAAeUFwBAAAAgAcU\nVwAAAADgAcUVAAAAAHhAcQUAAAAAHphzLns7N5shaUrWGgAAf44Pv+6StDmfHQGANqiVNMl14Imc\nmZ0u6eeSDvHWK7TH8XHf/z1vveja3pU01Tn3N187zHZx9Zqko7PWAAAAQNd1tHNuS6Ybm9mNkmZ4\n7A9wMJrhnJvla2cFvnaUgoVfx0jamuW2AKAjCiTFJO3Md0cAII0/SuqrD55nZapx+9sk3dXBfSEz\nRZJ25LsTXdR0SRer4/9HzWS7uGr0inOOaTYAAAAdZGZ7Pe9ym3PuJc/7BCLNzN7Oxn65oAUAAAAA\neJDtd67WSuopKSuVIQAAQBfk6/nV2vDrug7uBzgYZeXxn+0LWhRIijnnOIcBAADAA5/Pr8ysyDnH\nOT/okrLx+M9qcQUAAAAAXQXnXAEAAACABxRXAAAAAOABxRUAAAAAeEBxBQAAAAAeUFwBAAAAgAcU\nVwAAAADgAcUVAAAAAHhAcQUAAAAAHlBcAQAAAIAHFFcAAAAA4AHFFQAAAAB4QHEFAAAAAB5QXAEA\nAACABxRXAAAAAOABxRUAAAAAeEBxBQAAAAAeUFwBAAAAgAcUVwAAAADgAcUVAAAAAHhAcQUAAAAA\nHlBcAQAAAIAHBfnuAIDOobCwcGtDQ8OR+e4HpFgsVr979+6SfPcDAICuxpxz+e4DgE7AzBzHk2gw\nMznnLN/9AACgq2FaIAAAAAB4QHEFAAAAAB5QXAEAAACABxRXAAAAAOABxRWALmP+/PkyM82fPz+v\n/Xj22Wd11llnqbi4WGamIUOGSJKmTJkiM1NdXV1e+wcAADLDpdgBIIfeffddjR8/Xg0NDbrwwgtV\nXFyskpLUV02vq6vTgAEDVFlZmfeiEAAAtI7iCgByaPXq1dq2bZtuuOEGXXvttc3W3XjjjZoxY4aO\nOuqoPPUOAAB0BMUVAOTQ66+/LkkqLS1tsa5fv37q169frrsEAAA84ZwrADmxZMkSnXnmmerXr596\n9eql0tJSlZeXa968eS2y27dv1w9+8AN9/OMfV+/evXXYYYdp8ODBmjFjhnbt2tWUe+655zRt2jQN\nHjxYH/7whxWLxXTsscdq+vTp+uc//9mu/m3evFnf/va3NXDgQPXq1Usf+chHNGHCBK1Zs6ZFtqqq\nSmamFStW6IEHHtCIESPUp08flZWVpdx/XV2dzEyVlZWSpKlTp8rMmp0DlnjOVVVVlQYMGCBJuvfe\ne5vyUThvDAAAtMQ7VwCy7q677tKll16qkpISnXPOOSouLta2bdv04osvqrq6Wt/85jebshs3btSY\nMWO0adMmDRs2TJdffrkOHDigl19+Wbfddpsuu+wyHXLIIZKku+++WzU1NSovL9fYsWO1f/9+1dbW\n6tZbb9Wjjz6qZ555Roceemja/tXW1uozn/mMtm/frnHjxum8887TW2+9pUWLFmn06NGqqanR2Wef\n3WK7W265RY8//rjOOeccjRkzRu+8807KNoqKinTdddfp+eef1+LFizVx4sSmC1k0fk1UUVGhHTt2\n6Kc//akGDx6sc889t2ldqm0AAEAeOedYWFhYOrwEh5PkTjvtNNezZ09XX1/fYt2bb77Z7PbIkSOd\nJPfjH/84aXb37t1Nt+vq6ty+ffta5O655x4nyd10003N7q+urnaSXHV1ddN9e/fudYMGDXK9evVy\nK1asaJbfsmWLKy0tdSUlJa6hoaHp/uuuu85Jcr1793a1tbUpf+5kkvWhUWVlpZPkNm7c2HTfxo0b\nnSRXWVnZ5jbCv0XeHxMsLCwsLCxdbWFaIICcKCgoUI8ePVrcX1xc3PT9c889p6eeekpDhgzRNddc\nkzQbi8Wabvfv31/du3dvkbvooov0oQ99SMuXL0/br0ceeUSvvvqqvvOd76i8vLzZutLSUl199dXa\nunWrnnzyyRbbXnLJJRo6dGjaNgAAQNfAtEAAWTdp0iRNnz5dJ598si644AKVl5dr1KhROuKII5rl\nnn76aUnSuHHj1K1b+td+9u7dqzvvvFO//vWvtXbtWr3zzjs6cOBA0/otW7ak3ceqVaskSZs2bVJV\nVVWL9evXr5ckrVu3rsXUwDPOOCPt/gEAQNdBcQUg66688koVFxdr3rx5mjt3rubMmSMzU3l5uX7y\nk59o+PDhkqQdO3ZIUpsvRX7BBReopqZGAwcO1MSJE1VSUqJevXpJkubMmaM9e/ak3cfbb78tSVqw\nYEGruZ07d7a4r7XPpwIAAF0PxRWAnJg8ebImT56sHTt26KmnnlJNTY1+8YtfaNy4cVq3bp369u2r\noqIiSW17x+nZZ59VTU2Nxo4dq6VLlzabcnjgwAHNnj27Tf067LDDJEmLFy/WhAkT2vUzmVm78gAA\noHPjnCsAOVVUVKSzzz5bd999t6ZMmaLt27dr5cqVkqRPfOITkqTly5c3m96XzCuvvCJJmjBhQotz\nuVavXq3du3e3qT+NbTb2IWoazynbv39/nnsCAADSobgCkHXLli3Tvn37Wty/bds2SVLv3r0lScOG\nDdPIkSP1/PPPa9asWS3yb7/9thoaGiSp6TOlVqxY0WKf3/rWt9rct4kTJ2rQoEG64447tHTp0qSZ\nVatW6f3332/zPn06/PDDZWb6xz/+kZf2AQBA2zEtEEDWffnLX1YsFtPo0aNVVlYm55xWrlypNWvW\naNiwYRo7dmxT9v7771dFRYWuvfZaPfTQQ6qoqJBzTuvXr9djjz2ml156SWVlZTr99NM1atQoLVy4\nUCNHjtTo0aNVX1+vRx99VMcff7xKS0vb1LcePXpo4cKFGjdunMaPH6+RI0dqyJAh6t27t1577TWt\nWbNGGzZs0BtvvNFUBOZSnz59NGLECK1cuVKTJk3Scccdp+7du2vChAk69dRTc94fAACQGsUVgKy7\n6aabtHz5ctXW1mrp0qWKxWLq37+/Zs2apcsvv7zZtL4BAwaotrZWs2fP1qJFi3T77bcrFouprKxM\n06dPV9++fSUF0+WWLFmimTNnaunSpZo7d66OOuooXXzxxZo5c6ZOOumkNvfv1FNP1QsvvKBbb71V\nDz/8sKqrq9WtWzf169dPQ4cO1Q9/+MNml4zPtfvuu09XXHGFli1bpgcffFDOOR199NEUVwAARIw5\n5/LdBwCdgJk5jifRYGZyznG1DQAAcoxzrgAAAADAA4orAAAAAPCA4goAAAAAPKC4AgAAAAAPKK4A\nAAAAwAOKKwAAAADwgOIKAAAAADyguAIAAAAADyiuAAAAAMCDgnx3AEDnEIvF6s3syHz3A8HfIt99\nAACgKzLnXL77AAB5Z2Zfl3SCc+57+e4LAAA4OFFcAYAkM9sk6RhJpc65N/LdHwAAcPDhnCsACHRP\n+AoAANAuFFcAAAAA4AHFFQAAAAB4QHEFAAAAAB5QXAEAAACABxRXAAAAAOABxRUAAAAAeJDVz7ky\ns59J+kbWGgAAf3rFfb8nb70AgLaplfRvzrn9+e4IgA9ku7jaLOmorDUAAADQdX3UObc5350A8IGC\nHLVzrCT++QFEnUnK3itOAODHq5JK890JAC3lqrhqcM415KgtAACATsvMeBEIiCguaAEAAAAAHmS7\nuHpS0t8k1We5HQAAgK6C51dARGX1ghaSZGbmst0IAABAF8LzKyCasl5cAQAAAEBXwDlXAAAAAOAB\nxRUAAAAAeEBxBQAAAAAeUFwBAAAAgAcUVwAAAADgAcUVAAAAAHhAcQUAAAAAHlBcAQAAAIAHFFcA\nAAAA4AHFFQAAAAB4QHEFAAAAAB5QXAEAAACABxRXAAAAAOABxRUAAAAAeEBxBQAAAAAeUFwBAAAA\ngAcFmWxUWFi4taGh4UjfnQEAAH7FYrH63bt3l8Tf16NHj6379u1jHO+kCgoK6vfu3dv0N+fvDbRN\n4v9OJsw51/6NzFyq7VasWKHzzz9fCxYsUEVFRdp9kSdPnjx58uSzlzczOecsPmtmrrKyUgsWLND5\n55+vAQMGpN3/xo0byR8k+aqqqmZ/czNzVVVVadsAurrE/51MeJ0WeLAMNOTJkydPnnxXzkuKTCFA\n3n8eQP54K66iNnCQJ0+ePHny5FOLSiFA3n8eQP54Ka6iNnCQJ0+ePHny5FsXlUKAvP88gPzpcHEV\ntYGDPHny5MmTJ99xUS0cyKfPA8ifDhVXURs4yJMnT548efIdF+XCgXzH8wCyJ+PiKmoDB3ny5MmT\nJ0++46JWCJD3mweQXRkXV1EaOMiTJ0+ePHnyyfPtEbVCgLzfPIDsy7i4itLAQZ48efLkyZNPnm+r\nqBUC5P3mAeRGxsVVlAYO8uTJkydPnnzyfFtErRAg7zcPIHcyLq7SifJAQ548efLkyZMPRK0QIO83\nDyC3slJcRW3gIE+ePHny5Mm3FLVCgLzfPIDc815cRW3gIE+ePHny5Mm3FLVCgLzfPID88FpcRW3g\nIE+ePHny5MknF6VCgLzfPID88VZcRW3gIE+ePHny5MmnFpVCgLz/PID88VJcRW3gIE+ePHny5Mm3\nLiqFAHn/eQD50+HiKmoDB3ny5MmTJ0++46JaOJBPnweQPx0qrqI2cJAnT548efLkOy7KhQP5jucB\nZE/GxVXUBg7y5MmTJ0+efMdFrRAg7zcPILsyLq6iNHCQJ0+ePHny5JPn2yNqhQB5v3kA2ZdxcRWl\ngYM8efLkyZMnnzzfVlErBMj7zQPIjYyLqygNHOTJkydPnjz55Pm2iFohQN5vHkDuZFxcpRPlgYY8\nefLkyZMnH4haIUDebx5AbmWluIrawEGePHny5MmTbylqhQB5v3kAuee9uIrawEGePHny5MmTbylq\nhQB5v3kA+eG1uIrawEGePHny5MmTTy5KhQB5v3kA+eOtuIrawEGePHny5MmTTy0qhQB5/3kA+eOl\nuIrawEGePHny5MmTb11UCgHy/vMA8qfDxVXUBg7y5MmTJ0+efMdFtXAgnz4PIH86VFxFbeAgT548\nefLkyXdclAsH8h3PA8iejIurqA0c5MmTJ0+ePPmOi1ohQN5vHkB2mXOu3RsVFhbWNzQ09M1CfwAA\ngEc9e/bcvmfPno/E39ejR4+t+/btOzJffUJ2FRQU1O/du7ek8TZ/b6BtEv93MpFRcQUAAAAAaK7D\nF7QAkFtmtsLMbm/nNnVmdlW2+hTXTjczu9PM3jYzZ2YVbdimLMwObyUzPMyUeewugBxLPH615Xhm\nZn81syrfbWeLmZWY2WNmtsvM2vQKtplNMbOdaTJXmVmdl04CyJqCfHcA6OzMbIWkvzrnvu1pl+dJ\n2tvObU6XtMtT+605W9JUSRWSNkjanoM2ARy8MjmetcrMpki63TnXJ9ttpXCVpFJJQyS9l4P2kCVZ\nGL8Vvuj4B0lHOOfe8rXffLWDliiugIgwsx7OubQDv3Ou3QWLc+7NzHrVbh+T9IZz7qkctQfgIJbJ\n8ewgaOtjkp5zzq3PUXsAIoRpgUAWmdl8SeWSvhVOa3PhNLiK8PuzzWy1mf1L0jgzG2Rmi81sazil\npNbMPpewz8RpNXVmNjOcjveumW02s+8lbNNsWmDY9iVmtiBsZ4OZfS1hmxFh+w1m9pewrymn+oU/\n622SjglzdeH9vcxsjpnVh/t62sxGp/m9fdbMXgrzKyUdl7D+MDO7z8y2hZkNZvbd1vYJIHNmdmn4\nP1yQcP8DZrY4/D7t8SvJfhOPZ33Dfew2s01mdlGSba40sxfDNraY2T1mVhSuq5BULemQuGNuVYq2\nDjeze83sn2F7T5jZyXHrp5jZTjM7M5yauMvM/mBmKS/JFx73JkqaHLY9P7z/GDOrMbP3wmWhmR2d\n5ndzdfi73Glmv5SU+E4csijV+B2uO8nMHgn/ltvM7EEzK4nb9hQzezIck98zsxfMbEy4/R/C2Jvx\nj5Ek7fcws7lm9rqZ7TGz18zsprj1Pc1sVjjm7zKzNWY2LlzX5nbgH8UVkF3TJK1SMNj3C5fX4tbP\nkjRT0gmSnlEweD4q6SxJgyU9JGmhmZ2Qpp0rJP2fpNPCfc42s0+m2ea/JC0O2/mNpF+YWX9JMrM+\nkh6W9JKkYZKulvSTNvysP5K0Ofw5Tw/vny3pAkkXSRoa9nOZmfVLthMz+6ikRZIeVzCt5mfhPuJd\nL+kUSZ9T8Lu7SNKWNP0DkLn/lVQkaWzjHWZ2iIJC4v7wrkyPX/HmK3jnZ6ykcyVNllSWkDkg6buS\nTpb0VUlnKDhOSNJT4br39cEx9+ZW2hoR/gxnhNssM7PCuEwvSd9XcIz5pILfwc9b6f/pkp5Q8Pvq\nJ2mamZmCY9qRkj4taYyCaYOLwnUtmNmXFBznrlNwXP+7pCtbaRf+JR2/w7HrT5L+quBxM1bBY3+J\nmTU+r35A0hvh+qGSqiQ1KBj/vxBmTg73OS1F+/8h6fOSvizpWAXj6N/j1lcrKP6+qmA8vFfS78xs\ncDvbgW/OORYWliwuklYomP8ff1+FJCfpC23Y/mlJM1PtT1KdpAcTtlmfsE2dpKvibjtJN8bdLlDw\nxOJr4e1LFZwvVRiX+Wq4XUUrfb1KUl3c7UMk/UvS5Lj7ukt6VdL14e2ycL/Dw9s/lvSywquZhvfN\nDDNl4e0lkqrz/bdlYelKi6QaSffF3f6apHckxVrZJt3xq+m2gneonaRRcev7S9ovqaqVNj4raY+k\nbuHtKZJ2JsnFt3Vs2Nan4tYfFv48F8ftx0k6Pi4zKTymdWulPw9Lmh93+6zwZyiLu2+ggiJxbLI+\nKygS707Y7xPxx1eWnDzmmz1ew/t+JOnJhPsODx8rZ4S335VUmWKfFWG2OE3bcyU9GT8Wxq0bFD5+\njkm4f5Gkee1ph8X/wjtXQH49G3/DzA4xs9lmtjacqrJT0nBJx6TZz4sJt1+XlO6z6Jq2cc7tk/Rm\n3DYnKDiJd3dc/pk0+0tmkKQekv4c19Z+Ba8GnpRimxMlPe3C0SG0KiHz35K+FE61uNnMyjPoG4D2\nuV/SuWbWO7w9SdJvnXMNUoeOX41OVPCEcXXjHc65TQqOZ03M7NNm9ng4Heo9SQsl9ZTUns+maWyr\n6djinHtHwTvr8cemPc65+HcLXldwTCtqZ1uvO+fq4traEO6rteNg4nEv8TbyY5ikT4XTNXeGj/PG\nGSmDwq+3SrrHzH5vZj9o57u3jeYrmL3xspndYWbj494ZO02SSVqb0I/xcX1AnlBcAfmVeAW/myWd\nL+k/FbzdP0TBE42eafaTeCEMp/T/361tY+Htjmqc8pJsX6n2n3SaTLMNnXtUwSvaN0sqlvSImVVn\n1EMAbfWwpH2SJppZXwXToe6PW5/p8atR2v/9cOryI5LWhW0NUzBlT+1oJ11b8cemfSnWtef5U2vH\nUz5s9ODTTcFjcEjCcqyC/xE556oUFM6LJI2U9GKy8wdb45yrVTCz49qwzXslPR4WWN0UPHZOT+jD\nifrg/wF5QnEFZN+/FEyFa4vRkn7pnHvIOfeigvOX8vEq1DpJpySce3BGBvt5RcHP33QBCzPrruDc\nhbUptlkraUTCuQifSAw5595yzt3nnJsi6euSKs2sVwZ9BNAGzrk9kn6r4B2rCyRtlfTHuEhHj1/r\nFDwvaTxfU2Z2jILzkxoNV1BEXeGcW+WcezlhvdS2Y+7asK2mc1PN7EMKzl1JdWzK1FpJR1nc5/SZ\n2UAF/U7V1jq1PO61OA4i65I9lmoVnMe0yTn3SsLSdOl959x659xc59x4Sf8j6eK4fSrJfltwzr3n\nnFvgnLtcwbtSn1ZwTuJfFBTtJUn60Hj+cZvbgV8UV0D21Uk6w4KrBBbHva2fzMuSPm9mp5nZKQpe\nFY7lopMJfqXgHIG7w6sijVXw6pnUjldanXO7FEzhu8mCqw2eGN4+UtK8FJv9XMGrdXPM7Hgz+6Kk\ny+IDZvYjMzvXzI4N93mepA3hkz8A2XO/pHEK/icfcM4diFvXoeNXOP1umaQ7zeyTZjZEwdSo+OnJ\n6xU8d/mumQ0ws68ouIBFvDpJMTM7Kzzm9k5YLxdcJn1x2Na/xfX3XQUXI/DpCUkvSPqVmQ2z4APT\nf6XgSfrvU2zzUwUvGH0jPM59X8HFN5BbdWo5ft+h4Py831hwVd2BZjbWzO4ys0PNrDCcxlcRbjdC\nwQsPjYX0JgXj6HgzOyK8gFQLFlwV8ytmdqKZfUzBec/vStocvqjwK0nzzeyLYR+GW/BB0+e1px34\nR3EFZN/NCl5BWqvgvKbWzj+4UtI2SSsVXHXr6fD7nHLO7ZR0joJX5/6i4EqBVeHqhnbu7hoFV86q\nlvS8pFMlfdY590aKtv+hoFj6rIInJFdImpEQ2yPphnD9nyUdGvYXQHb9ScGVOU9S8ymBkp/j1xRJ\nGxUUHb9TUOjUNa4M3xGbFra1VsG7AVfF78AFn7P3c0kPKjjmXp2irakKpi0uCb/2VnBs2p0in5Hw\n/NFzw76sUHCJ7K2Szk04tzR+m98oOObeoOAYfIqC83iQWy3Gb+fc65JGKThnb5mkvykouPaEy34F\nF7i4V8HV/WoUnC93pSSF7yxdp+BvWy/pdiX3nqTvKXhs1iqY9vfvzrn3w/VTFYyrsxVc2fdhSZ9S\nUFS1px14Zin+rwGgGTObqGCQ6Ov4tHcAAIAWCtJHAHRFZlYpaYOCqyB9XNIcSb+jsAIAAEiO4gpA\nKkdK+qGCDx/cquDqSNfktUcAAAARxrRAAAAAAPCAC1oAAAAAgAcUVwAAAADgAcUVAAAAAHhAcQUA\nAAAAHlBcAQAAAIAHFFcAAAAA4MH/AzRZ/EYUQ5hZAAAAAElFTkSuQmCC\n", 103 | "text/plain": [ 104 | "" 105 | ] 106 | }, 107 | "metadata": {}, 108 | "output_type": "display_data" 109 | } 110 | ], 111 | "source": [ 112 | "mglearn.plots.plot_proper_processing()\n", 113 | "plt.show()" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "交差検定で情報がリークするのは前処理ステップの性質に依存する.スケールはテストデータを用いて推定してもあまり影響は出ないが.特徴量抽出や特徴量選択をテストデータで行うと結果に大きく影響が出る." 121 | ] 122 | } 123 | ], 124 | "metadata": { 125 | "kernelspec": { 126 | "display_name": "Python 3", 127 | "language": "python", 128 | "name": "python3" 129 | }, 130 | "language_info": { 131 | "codemirror_mode": { 132 | "name": "ipython", 133 | "version": 3 134 | }, 135 | "file_extension": ".py", 136 | "mimetype": "text/x-python", 137 | "name": "python", 138 | "nbconvert_exporter": "python", 139 | "pygments_lexer": "ipython3", 140 | "version": "3.5.4" 141 | } 142 | }, 143 | "nbformat": 4, 144 | "nbformat_minor": 2 145 | } 146 | -------------------------------------------------------------------------------- /ch07/README.md: -------------------------------------------------------------------------------- 1 | # テキストデータの処理 2 | 3 | ### 文字列として表現されているデータのタイプ 4 | 5 | 文字列データには次の4つの種類がある. 6 | 7 | - カテゴリデータ 8 | - 意味的にはカテゴリに分類出来る自由に書かれた文字列 9 | - 構造化された文字列 10 | - テキストデータ --------------------------------------------------------------------------------