├── README.md ├── .gitignore ├── .DS_Store ├── pdfs └── 事前準備ガイド.pdf └── notebooks ├── .DS_Store ├── ch00-14-matplotlib2.ipynb ├── ch00-13-numpy2.ipynb ├── ch00-12-python2.ipynb ├── ch07-regression-xtech.ipynb ├── ch09-multi-classify-xtech.ipynb ├── ch08-bi-classify-xtech.ipynb ├── ch00-11-python-entry.ipynb └── .ipynb_checkpoints ├── ch00-11-python-entry-checkpoint.ipynb └── 11-python-entry-checkpoint.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # xtech-course-contents 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | local 3 | .ipynb_checkpoints 4 | -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/makaishi2/xtech-course-contents/master/.DS_Store -------------------------------------------------------------------------------- /pdfs/事前準備ガイド.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/makaishi2/xtech-course-contents/master/pdfs/事前準備ガイド.pdf -------------------------------------------------------------------------------- /notebooks/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/makaishi2/xtech-course-contents/master/notebooks/.DS_Store -------------------------------------------------------------------------------- /notebooks/ch00-14-matplotlib2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Matplotlib2\n", 8 | "matplotlibを使ったグラフの補足例です。" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "# ライブラリのロード\n", 18 | "%matplotlib inline\n", 19 | "import numpy as np\n", 20 | "import matplotlib.pyplot as plt" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "# PDF出力用\n", 30 | "from IPython.display import set_matplotlib_formats\n", 31 | "set_matplotlib_formats('png', 'pdf')" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "# 交差エントロピー関数 (正解値 = 1の場合)\n", 41 | "def cross_entropy_1(x):\n", 42 | " return -np.log(x)" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "# 交差エントロピー関数 (正解値 = 0の場合)\n", 52 | "def cross_entropy_0(x):\n", 53 | " return -np.log(1-x)" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "# xの配列作成\n", 63 | "# linescapeは与えられた区間の間に等間隔の点を取る関数です。\n", 64 | "\n", 65 | "x1 = np.linspace(0.5, 1, 101)\n", 66 | "x0 = np.linspace(0, 0.5, 101)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "# yの配列作成\n", 76 | "y1 = cross_entropy_1(x1)\n", 77 | "y0 = cross_entropy_0(x0)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "# グラフ表示 (正解値=1)\n", 87 | "plt.figure(figsize=(6,6))\n", 88 | "\n", 89 | "plt.plot(x1, y1, label='cross_entropy_1')\n", 90 | "\n", 91 | "# グラフ上にグリッド表示を追加\n", 92 | "plt.grid()\n", 93 | "\n", 94 | "# グラフ上に凡例表示を追加\n", 95 | "plt.legend(fontsize=14)\n", 96 | "\n", 97 | "\n", 98 | "# 画面描画を明示的に指示\n", 99 | "plt.show()" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "# グラフ表示 (正解値=0)\n", 109 | "plt.figure(figsize=(6,6))\n", 110 | "\n", 111 | "plt.plot(x0, y0, label='cross_entropy_0')\n", 112 | "\n", 113 | "# グラフ上にグリッド表示を追加\n", 114 | "plt.grid()\n", 115 | "\n", 116 | "# グラフ上に凡例表示を追加\n", 117 | "plt.legend(fontsize=14)\n", 118 | "\n", 119 | "# 画面描画を明示的に指示\n", 120 | "plt.show()" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [] 129 | } 130 | ], 131 | "metadata": { 132 | "kernelspec": { 133 | "display_name": "Python 3", 134 | "language": "python", 135 | "name": "python3" 136 | }, 137 | "language_info": { 138 | "codemirror_mode": { 139 | "name": "ipython", 140 | "version": 3 141 | }, 142 | "file_extension": ".py", 143 | "mimetype": "text/x-python", 144 | "name": "python", 145 | "nbconvert_exporter": "python", 146 | "pygments_lexer": "ipython3", 147 | "version": "3.7.3" 148 | } 149 | }, 150 | "nbformat": 4, 151 | "nbformat_minor": 2 152 | } 153 | -------------------------------------------------------------------------------- /notebooks/ch00-13-numpy2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Numpy入門2\n", 8 | "7章の単回帰モデル実習での、学習データ準備の過程を詳しく説明しました。 \n", 9 | "(この解説は書籍には含まれていません)" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "# 必要ライブラリimport\n", 19 | "import numpy as np\n", 20 | "from sklearn.datasets import load_boston" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "# 学習用データの読み込み\n", 30 | "# Boston Dataセットというよく使われるデータなので、関数を呼び出すと自動的にダウンロードされます\n", 31 | "\n", 32 | "boston = load_boston()\n", 33 | "x_org, yt = boston.data, boston.target\n", 34 | "feature_names = boston.feature_names" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "# 正解データ ytの表示(一部)\n", 44 | "\n", 45 | "print(yt[:5])" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "# x_orgの内容を調べる\n", 55 | "\n", 56 | "print('クラス名:', type(x_org))\n", 57 | "print('サイズ:', x_org.shape)\n", 58 | "print('内容表示(一部):')\n", 59 | "print( x_org[:5,:])\n", 60 | "print('項目名: ', feature_names)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "# x_org は506 x 13 の行列\n", 70 | "# この行列から'RM' の列だけを抽出する\n", 71 | "\n", 72 | "x_data = x_org[:,feature_names == 'RM']" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "# 結果確認\n", 82 | "\n", 83 | "print('絞り込み後のサイズ', x_data.shape)\n", 84 | "print('絞り込み後の内容(一部)')\n", 85 | "print( x_data[:5])" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "# ダミー変数を追加\n", 95 | "\n", 96 | "x = np.insert(x_data, 0, 1.0, axis=1)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "# 結果確認\n", 106 | "\n", 107 | "print('ダミー変数追加後のサイズ', x.shape)\n", 108 | "print('ダミー変数追加後の内容(一部):')\n", 109 | "print( x[:5,:])" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "## 重回帰用データ追加" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "# 列(LSTAT: 低所得者率)の取得\n", 126 | "x_add = x_org[:,feature_names == 'LSTAT']" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": null, 132 | "metadata": {}, 133 | "outputs": [], 134 | "source": [ 135 | "# 結果確認\n", 136 | "print('絞り込み後のサイズ', x_add.shape)\n", 137 | "print('絞り込み後の内容(一部)')\n", 138 | "print( x_add[:5])" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "# xに列を追加\n", 148 | "x2 = np.hstack((x, x_add))" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "# 結果確認\n", 158 | "print('絞り込み後のサイズ', x2.shape)\n", 159 | "print('絞り込み後の内容(一部)')\n", 160 | "print( x2[:5])" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [] 169 | } 170 | ], 171 | "metadata": { 172 | "kernelspec": { 173 | "display_name": "Python 3", 174 | "language": "python", 175 | "name": "python3" 176 | }, 177 | "language_info": { 178 | "codemirror_mode": { 179 | "name": "ipython", 180 | "version": 3 181 | }, 182 | "file_extension": ".py", 183 | "mimetype": "text/x-python", 184 | "name": "python", 185 | "nbconvert_exporter": "python", 186 | "pygments_lexer": "ipython3", 187 | "version": "3.7.3" 188 | } 189 | }, 190 | "nbformat": 4, 191 | "nbformat_minor": 2 192 | } 193 | -------------------------------------------------------------------------------- /notebooks/ch00-12-python2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Python入門2\n", 8 | "文字列から関数定義まで" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "### 文字列\n", 16 | "\n", 17 | "文字列はstrという型を持っていますが、文字の配列としての性質も持っています。" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "str1 = 'Hello World!'" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "# クラス名が表示される (文字列クラスとしての振る舞い)\n", 36 | "\n", 37 | "print(type(str1))" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "# 文字数が表示される (配列としての振る舞い)\n", 47 | "\n", 48 | "print(len(str1))" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "# 最初の文字が表示される (配列としての振る舞い)\n", 58 | "\n", 59 | "print(str1[0])" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "# 0以上5未満の要素が表示 (配列としての振る舞い)\n", 69 | "\n", 70 | "print(str1[0:5])" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "#### print関数の利用\n", 78 | "\n", 79 | " print関数で、文字列と数値をつないで表示したい場合、複数のパラメータを列挙する形にします。" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "# 整数変数\n", 89 | "\n", 90 | "x1 = 1\n", 91 | "\n", 92 | "# 浮動小数点変数\n", 93 | "\n", 94 | "x2 = 2.0\n", 95 | "\n", 96 | "# 整数の表示\n", 97 | "print('x1 = ', x1, ' x2 = ', x2)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "# print文実行の別方法\n", 107 | "# (書式つき文字列)\n", 108 | "\n", 109 | "print('x1 = %d x2 = %3.1f' % (x1, x2))" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "### range関数\n", 117 | "\n", 118 | "``range``関数を使うと、数値を要素をとしてもつリストを一気に作ることができます。 \n", 119 | "Python3では、そのままprint関数にかけると``range``オブジェクトとして表示されてしまうので、 \n", 120 | "``print``関数でその内容を見たい場合は、その結果を``list``関数にかけます。" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "# 1引数の例 (0から10未満まで)\n", 130 | "\n", 131 | "print(range(10))\n", 132 | "print(list(range(10)))" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [ 141 | "# 2引数の例 (2から10未満まで)\n", 142 | "\n", 143 | "print(list(range(2, 10)))" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "# 3引数の例 (1から11未満まで2刻み)\n", 153 | "\n", 154 | "print(list(range(1, 11, 2)))" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "### 辞書(key-value)\n", 162 | "\n", 163 | "辞書型のデータも扱うことができます。 \n", 164 | "辞書は``dict['key1']``のような形で参照します。 \n", 165 | "他のプログラム言語でハッシュ型とか連想配列を呼ばれる型になります。" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "# 辞書型の宣言\n", 175 | "\n", 176 | "dict = {\n", 177 | " 'key1': 1,\n", 178 | " 'key2': 2.0,\n", 179 | " 'key3': 'abc'\n", 180 | "}" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": null, 186 | "metadata": {}, 187 | "outputs": [], 188 | "source": [ 189 | "# キーから値の取得\n", 190 | "\n", 191 | "print(dict['key1'])\n", 192 | "print(dict['key2'])\n", 193 | "print(dict['key3'])" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": null, 199 | "metadata": {}, 200 | "outputs": [], 201 | "source": [ 202 | "# キーの一覧表示\n", 203 | "\n", 204 | "keys = dict.keys()\n", 205 | "print(keys)" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": null, 211 | "metadata": {}, 212 | "outputs": [], 213 | "source": [ 214 | "# ソートしたい場合\n", 215 | "print(sorted(keys))" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": null, 221 | "metadata": {}, 222 | "outputs": [], 223 | "source": [ 224 | "# 値の一覧表示\n", 225 | "\n", 226 | "values = dict.values()\n", 227 | "print(values)" 228 | ] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "metadata": {}, 233 | "source": [ 234 | "#### items関数\n", 235 | "\n", 236 | "辞書型データに対しては、``keys()``や``values()``という関数以外に``items()``という関数があり、 \n", 237 | "``(key, value)``のペアをリストにして取得できます。 \n", 238 | "この関数は、後で説明するループ処理の時に便利な関数です。" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "metadata": {}, 245 | "outputs": [], 246 | "source": [ 247 | "print(dict)\n", 248 | "print(dict.items())" 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": {}, 254 | "source": [ 255 | "### 分岐\n", 256 | "\n", 257 | "Pythonの他の言語との大きな違いの一つとしては分岐、ループなどの制御構造の書き方があります。 \n", 258 | "制御構造の始まりの行は:で終わります。 \n", 259 | "また、制御構造の内部はインデント(字下げ)で表現します。\n", 260 | "(インデントの時はタブキーを入力) \n", 261 | "字下げが文法的に意味があるということです。 \n", 262 | "こうすることで、制御構造の戻りの記述が不要になり、プログラムが見やすくなっています。" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": null, 268 | "metadata": {}, 269 | "outputs": [], 270 | "source": [ 271 | "list4 = [0, 1, 2, 3]\n", 272 | "if list4[0] == 0:\n", 273 | " print('true branch')\n", 274 | "else:\n", 275 | " print('false branch')\n", 276 | "\n", 277 | "list4[0] = 1\n", 278 | "if list4[0] == 0:\n", 279 | " print('true branch')\n", 280 | "else:\n", 281 | " print('false branch')" 282 | ] 283 | }, 284 | { 285 | "cell_type": "markdown", 286 | "metadata": {}, 287 | "source": [ 288 | "### ループ\n", 289 | "\n", 290 | "ループも分岐同様、始まりが:でループ内部をインデントで表現します。 \n", 291 | "forループの場合、制御変数を使うことはあまりなく、 \n", 292 | "\n", 293 | "for (要素) in (List): \n", 294 | " (要素を使った処理) \n", 295 | "\n", 296 | "で組むことが多いです。\n" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": null, 302 | "metadata": {}, 303 | "outputs": [], 304 | "source": [ 305 | "list5 = ['one', 'two', 'three']\n", 306 | "for item in list5:\n", 307 | " print(item)" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": null, 313 | "metadata": {}, 314 | "outputs": [], 315 | "source": [ 316 | "# range関数とループを組み合わせた例\n", 317 | "for index in range(5):\n", 318 | " print(index)" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": null, 324 | "metadata": {}, 325 | "outputs": [], 326 | "source": [ 327 | "# 辞書型データを対象にしたループ処理\n", 328 | "dict = {\n", 329 | " 'key1': 1,\n", 330 | " 'key2': 2.0,\n", 331 | " 'key3': 'abc'\n", 332 | "}\n", 333 | "\n", 334 | "# 辞書型変数dictに対してitems()関数で(key, value)をセットで取得します。\n", 335 | "\n", 336 | "for key, value in dict.items():\n", 337 | " print(key, ' = ', value)" 338 | ] 339 | }, 340 | { 341 | "cell_type": "markdown", 342 | "metadata": {}, 343 | "source": [ 344 | "#### リスト内包表記\n", 345 | "\n", 346 | "リスト内包表記はPython固有の表記法の一つです。 \n", 347 | "ループ処理を簡潔で直感的にわかりやすい表記で記述することができます。 \n", 348 | "機械学習のコーディングでよく出てくるので、是非使えるようにして下さい。" 349 | ] 350 | }, 351 | { 352 | "cell_type": "markdown", 353 | "metadata": {}, 354 | "source": [ 355 | "![](https://github.com/makaishi2/kcgi-course-contents/raw/master/images/python-fig1.png)" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": null, 361 | "metadata": {}, 362 | "outputs": [], 363 | "source": [ 364 | "list_str = ['abc', 'def', 'ghi']" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": null, 370 | "metadata": {}, 371 | "outputs": [], 372 | "source": [ 373 | "# 「リスト内包表記」の実装サンプル\n", 374 | "# 各要素の後ろに':'を追加\n", 375 | "\n", 376 | "ret3 = [item + ':' for item in list_str]\n", 377 | "print(ret3)" 378 | ] 379 | }, 380 | { 381 | "cell_type": "markdown", 382 | "metadata": {}, 383 | "source": [ 384 | "![](https://github.com/makaishi2/kcgi-course-contents/raw/master/images/python-fig2.png)" 385 | ] 386 | }, 387 | { 388 | "cell_type": "code", 389 | "execution_count": null, 390 | "metadata": {}, 391 | "outputs": [], 392 | "source": [ 393 | "# 「リスト内包表記」をifと組みあわせた実装サンプル\n", 394 | "# 前の計算結果のうち、要素が'def'のものを除く\n", 395 | "\n", 396 | "ret4 = [item + ':' for item in list_str if item != 'def']\n", 397 | "print(ret4)" 398 | ] 399 | }, 400 | { 401 | "cell_type": "markdown", 402 | "metadata": {}, 403 | "source": [ 404 | "### enumerate関数\n", 405 | "\n", 406 | "``enumerate``関数を使うと、リストから``(index, element)``のペアを生成することができます。 \n", 407 | "この関数はループ処理で便利です。" 408 | ] 409 | }, 410 | { 411 | "cell_type": "code", 412 | "execution_count": null, 413 | "metadata": {}, 414 | "outputs": [], 415 | "source": [ 416 | "list5 = ['one', 'two', 'three']\n", 417 | "\n", 418 | "for index, item in enumerate(list5):\n", 419 | " print(index, ': ', item)" 420 | ] 421 | }, 422 | { 423 | "cell_type": "markdown", 424 | "metadata": {}, 425 | "source": [ 426 | "### 関数\n", 427 | "\n", 428 | "関数も分岐、ループ同様インデントで関数内部を表現します。\n" 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": null, 434 | "metadata": {}, 435 | "outputs": [], 436 | "source": [ 437 | "# 関数定義の例\n", 438 | "\n", 439 | "def square(x) :\n", 440 | " return x * x" 441 | ] 442 | }, 443 | { 444 | "cell_type": "code", 445 | "execution_count": null, 446 | "metadata": {}, 447 | "outputs": [], 448 | "source": [ 449 | "# 関数呼出しの例\n", 450 | "\n", 451 | "print(square(3))" 452 | ] 453 | }, 454 | { 455 | "cell_type": "code", 456 | "execution_count": null, 457 | "metadata": {}, 458 | "outputs": [], 459 | "source": [ 460 | "# 無名関数(lambda式)を使った関数定義の例\n", 461 | "\n", 462 | "square2 = lambda x: x**2\n", 463 | "print(square2(3))" 464 | ] 465 | }, 466 | { 467 | "cell_type": "code", 468 | "execution_count": null, 469 | "metadata": {}, 470 | "outputs": [], 471 | "source": [ 472 | "# 複数の値を返す関数の例\n", 473 | "def powers(x):\n", 474 | " return x ** 2, x ** 3, x ** 4" 475 | ] 476 | }, 477 | { 478 | "cell_type": "code", 479 | "execution_count": null, 480 | "metadata": {}, 481 | "outputs": [], 482 | "source": [ 483 | "x2, x3, x4 = powers(5)\n", 484 | "print('x2 = ', x2)\n", 485 | "print('x3 = ', x3)\n", 486 | "print('x4 = ', x4)\n" 487 | ] 488 | }, 489 | { 490 | "cell_type": "code", 491 | "execution_count": null, 492 | "metadata": {}, 493 | "outputs": [], 494 | "source": [] 495 | } 496 | ], 497 | "metadata": { 498 | "kernelspec": { 499 | "display_name": "Python 3", 500 | "language": "python", 501 | "name": "python3" 502 | }, 503 | "language_info": { 504 | "codemirror_mode": { 505 | "name": "ipython", 506 | "version": 3 507 | }, 508 | "file_extension": ".py", 509 | "mimetype": "text/x-python", 510 | "name": "python", 511 | "nbconvert_exporter": "python", 512 | "pygments_lexer": "ipython3", 513 | "version": "3.7.3" 514 | } 515 | }, 516 | "nbformat": 4, 517 | "nbformat_minor": 1 518 | } 519 | -------------------------------------------------------------------------------- /notebooks/ch07-regression-xtech.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 7章 線形回帰" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "# 必要ライブラリの宣言\n", 17 | "%matplotlib inline\n", 18 | "import numpy as np\n", 19 | "import matplotlib.pyplot as plt" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "# PDF出力用\n", 29 | "from IPython.display import set_matplotlib_formats\n", 30 | "set_matplotlib_formats('png', 'pdf')" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "# 必要ライブラリimport\n", 40 | "from sklearn.datasets import load_boston" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "# 学習用データの読み込み\n", 50 | "# Boston Dataセットというよく使われるデータなので、関数を呼び出すと自動的にダウンロードされます\n", 51 | "\n", 52 | "boston = load_boston()\n", 53 | "x_org, yt = boston.data, boston.target\n", 54 | "feature_names = boston.feature_names" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "# 正解データ ytの表示(一部)\n", 64 | "\n", 65 | "print(yt[:5])" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "# x_orgの内容を調べる\n", 75 | "\n", 76 | "print('クラス名:', type(x_org))\n", 77 | "print('サイズ:', x_org.shape)\n", 78 | "print('内容表示(一部):')\n", 79 | "print( x_org[:5,:])\n", 80 | "print('項目名: ', feature_names)" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "# x_org は506 x 13 の行列\n", 90 | "# この行列から'RM' の列だけを抽出する\n", 91 | "\n", 92 | "x_data = x_org[:,feature_names == 'RM']" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "# 結果確認\n", 102 | "\n", 103 | "print('絞り込み後のサイズ', x_data.shape)\n", 104 | "print('絞り込み後の内容(一部)')\n", 105 | "print( x_data[:5])" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "# ダミー変数を追加\n", 115 | "\n", 116 | "x = np.insert(x_data, 0, 1.0, axis=1)" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "# 結果確認\n", 126 | "\n", 127 | "print('ダミー変数追加後のサイズ', x.shape)\n", 128 | "print('ダミー変数追加後の内容(一部):')\n", 129 | "print( x[:5,:])" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "# 散布図の表示\n", 139 | "plt.scatter(x[:,1], yt, s=10, c='b')\n", 140 | "plt.xlabel('ROOM', fontsize=14)\n", 141 | "plt.ylabel('PRICE', fontsize=14)\n", 142 | "plt.grid()\n", 143 | "plt.show()" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "# 予測関数 (1, x)の値から予測値ypを計算する\n", 153 | "def pred(x, w):\n", 154 | " return(x @ w)" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": {}, 161 | "outputs": [], 162 | "source": [ 163 | "# 初期化処理\n", 164 | "\n", 165 | "# データ系列総数\n", 166 | "M = x.shape[0]\n", 167 | "\n", 168 | "# 入力データ次元数(ダミー変数を含む)\n", 169 | "D = x.shape[1]\n", 170 | "\n", 171 | "# 繰り返し回数\n", 172 | "iters = 50000\n", 173 | "\n", 174 | "# 学習率\n", 175 | "alpha = 0.01\n", 176 | "\n", 177 | "# 重みベクトルの初期値 (すべての値を1にする)\n", 178 | "w = np.ones(D)\n", 179 | "\n", 180 | "# 評価結果記録用 (損失関数値のみ記録)\n", 181 | "history = np.zeros((0,2))" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "# 繰り返しループ\n", 191 | "for k in range(iters):\n", 192 | " \n", 193 | " # 予測値の計算 (7.8.1)\n", 194 | " yp = pred(x, w)\n", 195 | " \n", 196 | " # 誤差の計算 (7.8.2)\n", 197 | " yd = yp - yt\n", 198 | " \n", 199 | " # 勾配降下法の実装 (7.8.4)\n", 200 | " w = w - alpha * (x.T @ yd) / M\n", 201 | " \n", 202 | " # 学習曲線描画用データの計算、保存\n", 203 | " if ( k % 100 == 0):\n", 204 | " # 損失関数値の計算 (7.6.1)\n", 205 | " loss = np.mean(yd ** 2) / 2\n", 206 | " # 計算結果の記録\n", 207 | " history = np.vstack((history, np.array([k, loss])))\n", 208 | " # 画面表示\n", 209 | " print( \"iter = %d loss = %f\" % (k, loss)) " 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": null, 215 | "metadata": {}, 216 | "outputs": [], 217 | "source": [ 218 | "# 最終的な損失関数初期値、最終値\n", 219 | "print('損失関数初期値: %f' % history[0,1])\n", 220 | "print('損失関数最終値: %f' % history[-1,1])" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": null, 226 | "metadata": {}, 227 | "outputs": [], 228 | "source": [ 229 | "# 下記直線描画用の座標値計算\n", 230 | "xall = x[:,1]\n", 231 | "xl = np.array([[1, xall.min()],[1, xall.max()]])\n", 232 | "yl = pred(xl, w)" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": null, 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "# 散布図と回帰直線の描画\n", 242 | "plt.figure(figsize=(6,6))\n", 243 | "plt.scatter(x[:,1], yt, s=10, c='b')\n", 244 | "plt.xlabel('ROOM', fontsize=14)\n", 245 | "plt.ylabel('PRICE', fontsize=14)\n", 246 | "plt.plot(xl[:,1], yl, c='k')\n", 247 | "plt.grid()\n", 248 | "plt.show()" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": null, 254 | "metadata": {}, 255 | "outputs": [], 256 | "source": [ 257 | "# 学習曲線の表示 (最初の1個分を除く)\n", 258 | "plt.plot(history[1:,0], history[1:,1])\n", 259 | "plt.grid()\n", 260 | "plt.show()" 261 | ] 262 | }, 263 | { 264 | "cell_type": "markdown", 265 | "metadata": {}, 266 | "source": [ 267 | "## 単回帰モデルをライブラリを使って実装する" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": null, 273 | "metadata": {}, 274 | "outputs": [], 275 | "source": [ 276 | "# ライブラリ利用時はダミー変数が不要なので除去します\n", 277 | "x_lib = x[:,1:2]\n", 278 | "\n", 279 | "# 結果確認\n", 280 | "print(x_lib[:5])" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": null, 286 | "metadata": {}, 287 | "outputs": [], 288 | "source": [ 289 | "# scikit-learn ライブラリから線形回帰モデルをロードします\n", 290 | "from sklearn import linear_model" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": null, 296 | "metadata": {}, 297 | "outputs": [], 298 | "source": [ 299 | "# 線形回帰モデルのインスタンスを生成します\n", 300 | "model1 = linear_model.LinearRegression()" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": null, 306 | "metadata": {}, 307 | "outputs": [], 308 | "source": [ 309 | "# fit関数で学習します (第一引数 入力データ、第二引数 教師データ)\n", 310 | "model1.fit(x_lib, yt)" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": null, 316 | "metadata": {}, 317 | "outputs": [], 318 | "source": [ 319 | "# 2つの方式の結果を比較します\n", 320 | "\n", 321 | "# 7章の方式での切片と傾き\n", 322 | "print(\"7章の方式: %6.2f %6.2f\" % (w[0], w[1]))\n", 323 | "\n", 324 | "# ライブラリ利用時の切片と傾き\n", 325 | "# ライブラリの場合、切片はintercept_、傾きはcoef_で所得できます\n", 326 | "print(\"ライブラリ: %6.2f %6.2f\" % (model1.intercept_, model1.coef_))" 327 | ] 328 | }, 329 | { 330 | "cell_type": "markdown", 331 | "metadata": {}, 332 | "source": [ 333 | "## 7.10 重回帰モデルへの拡張" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": null, 339 | "metadata": {}, 340 | "outputs": [], 341 | "source": [ 342 | "# 列(LSTAT: 低所得者率)の取得\n", 343 | "x_add = x_org[:,feature_names == 'LSTAT']" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": null, 349 | "metadata": {}, 350 | "outputs": [], 351 | "source": [ 352 | "# 結果確認\n", 353 | "print('絞り込み後のサイズ', x_add.shape)\n", 354 | "print('絞り込み後の内容(一部)')\n", 355 | "print( x_add[:5])" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": null, 361 | "metadata": {}, 362 | "outputs": [], 363 | "source": [ 364 | "# xに列を追加\n", 365 | "x2 = np.hstack((x, x_add))" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": null, 371 | "metadata": {}, 372 | "outputs": [], 373 | "source": [ 374 | "# 結果確認\n", 375 | "print('絞り込み後のサイズ', x2.shape)\n", 376 | "print('絞り込み後の内容(一部)')\n", 377 | "print( x2[:5])" 378 | ] 379 | }, 380 | { 381 | "cell_type": "code", 382 | "execution_count": null, 383 | "metadata": {}, 384 | "outputs": [], 385 | "source": [ 386 | "# 初期化処理\n", 387 | "\n", 388 | "# データ系列総数\n", 389 | "M = x2.shape[0]\n", 390 | "\n", 391 | "# 入力データ次元数(ダミー変数を含む)\n", 392 | "D = x2.shape[1]\n", 393 | "\n", 394 | "# 繰り返し回数\n", 395 | "iters = 50000\n", 396 | "\n", 397 | "# 学習率\n", 398 | "alpha = 0.01\n", 399 | "\n", 400 | "# 重みベクトルの初期値 (すべての値を1にする)\n", 401 | "w = np.ones(D)\n", 402 | "\n", 403 | "# 評価結果記録用 (損失関数値のみ記録)\n", 404 | "history = np.zeros((0,2))" 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": null, 410 | "metadata": {}, 411 | "outputs": [], 412 | "source": [ 413 | "# 繰り返しループ\n", 414 | "for k in range(iters):\n", 415 | " \n", 416 | " # 予測値の計算 (7.8.1)\n", 417 | " yp = pred(x2, w)\n", 418 | " \n", 419 | " # 誤差の計算 (7.8.2)\n", 420 | " yd = yp - yt\n", 421 | " \n", 422 | " # 勾配降下法の実装 (7.8.4)\n", 423 | " w = w - alpha * (x2.T @ yd) / M\n", 424 | " \n", 425 | " # 学習曲線描画用データの計算、保存\n", 426 | " if ( k % 100 == 0):\n", 427 | " # 損失関数値の計算 (7.6.1)\n", 428 | " loss = np.mean(yd ** 2) / 2\n", 429 | " # 計算結果の記録\n", 430 | " history = np.vstack((history, np.array([k, loss])))\n", 431 | " # 画面表示\n", 432 | " print( \"iter = %d loss = %f\" % (k, loss)) " 433 | ] 434 | }, 435 | { 436 | "cell_type": "code", 437 | "execution_count": null, 438 | "metadata": {}, 439 | "outputs": [], 440 | "source": [ 441 | "# 初期化処理 (パラメータを適切な値に変更)\n", 442 | "\n", 443 | "# データ系列総数\n", 444 | "M = x2.shape[0]\n", 445 | "\n", 446 | "# 入力データ次元数(ダミー変数を含む)\n", 447 | "D = x2.shape[1]\n", 448 | "\n", 449 | "# 繰り返し回数\n", 450 | "#iters = 50000\n", 451 | "iters = 2000\n", 452 | "\n", 453 | "# 学習率\n", 454 | "#alpha = 0.01\n", 455 | "alpha = 0.001\n", 456 | "\n", 457 | "# 重みベクトルの初期値 (すべての値を1にする)\n", 458 | "w = np.ones(D)\n", 459 | "\n", 460 | "# 評価結果記録用 (損失関数値のみ記録)\n", 461 | "history = np.zeros((0,2))" 462 | ] 463 | }, 464 | { 465 | "cell_type": "code", 466 | "execution_count": null, 467 | "metadata": {}, 468 | "outputs": [], 469 | "source": [ 470 | "# 繰り返しループ\n", 471 | "for k in range(iters):\n", 472 | " \n", 473 | " # 予測値の計算 (7.8.1)\n", 474 | " yp = pred(x2, w)\n", 475 | " \n", 476 | " # 誤差の計算 (7.8.2)\n", 477 | " yd = yp - yt\n", 478 | " \n", 479 | " # 勾配降下法の実装 (7.8.4)\n", 480 | " w = w - alpha * (x2.T @ yd) / M\n", 481 | " \n", 482 | " # 学習曲線描画用データの計算、保存\n", 483 | " if ( k % 100 == 0):\n", 484 | " # 損失関数値の計算 (7.6.1)\n", 485 | " loss = np.mean(yd ** 2) / 2\n", 486 | " # 計算結果の記録\n", 487 | " history = np.vstack((history, np.array([k, loss])))\n", 488 | " # 画面表示\n", 489 | " print( \"iter = %d loss = %f\" % (k, loss)) " 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | "execution_count": null, 495 | "metadata": { 496 | "scrolled": true 497 | }, 498 | "outputs": [], 499 | "source": [ 500 | "# 最終的な損失関数初期値、最終値\n", 501 | "print('損失関数初期値: %f' % history[0,1])\n", 502 | "print('損失関数最終値: %f' % history[-1,1])" 503 | ] 504 | }, 505 | { 506 | "cell_type": "code", 507 | "execution_count": null, 508 | "metadata": {}, 509 | "outputs": [], 510 | "source": [ 511 | "# 学習曲線の表示 (最初の10個分を除く)\n", 512 | "plt.plot(history[:,0], history[:,1])\n", 513 | "plt.show()" 514 | ] 515 | }, 516 | { 517 | "cell_type": "code", 518 | "execution_count": null, 519 | "metadata": {}, 520 | "outputs": [], 521 | "source": [] 522 | } 523 | ], 524 | "metadata": { 525 | "kernelspec": { 526 | "display_name": "Python 3", 527 | "language": "python", 528 | "name": "python3" 529 | }, 530 | "language_info": { 531 | "codemirror_mode": { 532 | "name": "ipython", 533 | "version": 3 534 | }, 535 | "file_extension": ".py", 536 | "mimetype": "text/x-python", 537 | "name": "python", 538 | "nbconvert_exporter": "python", 539 | "pygments_lexer": "ipython3", 540 | "version": "3.7.3" 541 | } 542 | }, 543 | "nbformat": 4, 544 | "nbformat_minor": 1 545 | } 546 | -------------------------------------------------------------------------------- /notebooks/ch09-multi-classify-xtech.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true 7 | }, 8 | "source": [ 9 | "# 9章 多値分類" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "# 必要ライブラリの宣言\n", 19 | "%matplotlib inline\n", 20 | "import numpy as np\n", 21 | "import matplotlib.pyplot as plt" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "# PDF出力用\n", 31 | "from IPython.display import set_matplotlib_formats\n", 32 | "set_matplotlib_formats('png', 'pdf')" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "### データ読み込み" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "# 学習用データ準備\n", 49 | "from sklearn.datasets import load_iris\n", 50 | "iris = load_iris()\n", 51 | "x_org, y_org = iris.data, iris.target\n", 52 | "\n", 53 | "# 入力データに関しては、sepal length(0)とpetal length(2)のみ抽出\n", 54 | "x_select = x_org[:,[0,2]]\n", 55 | "print('元データ', x_select.shape, y_org.shape)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "### 学習データの散布図表示" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "# 散布図の表示\n", 72 | "x_t0 = x_select[y_org == 0]\n", 73 | "x_t1 = x_select[y_org == 1]\n", 74 | "x_t2 = x_select[y_org == 2]\n", 75 | "plt.figure(figsize=(6,6))\n", 76 | "plt.scatter(x_t0[:,0], x_t0[:,1], marker='x', c='k', s=50, label='0 (setosa)')\n", 77 | "plt.scatter(x_t1[:,0], x_t1[:,1], marker='o', c='b', s=50, label='1 (versicolour)')\n", 78 | "plt.scatter(x_t2[:,0], x_t2[:,1], marker='+', c='k', s=50, label='2 (virginica)')\n", 79 | "plt.xlabel('sepal_length', fontsize=14)\n", 80 | "plt.ylabel('petal_length', fontsize=14)\n", 81 | "plt.xticks(size=14)\n", 82 | "plt.yticks(size=14)\n", 83 | "plt.legend(fontsize=14)\n", 84 | "plt.show()" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "### データ前処理" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "# ダミー変数を追加\n", 101 | "x_all = np.insert(x_select, 0, 1.0, axis=1)" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "# yをOne-hot-Vectorに\n", 111 | "from sklearn.preprocessing import OneHotEncoder\n", 112 | "ohe = OneHotEncoder(sparse=False,categories='auto')\n", 113 | "y_work = np.c_[y_org]\n", 114 | "y_all_one = ohe.fit_transform(y_work)\n", 115 | "print('オリジナル', y_org.shape)\n", 116 | "print('2次元化', y_work.shape)\n", 117 | "print('One Hot Vector化後', y_all_one.shape)" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "# 学習データ、検証データに分割\n", 127 | "from sklearn.model_selection import train_test_split\n", 128 | "\n", 129 | "x_train, x_test, y_train, y_test, y_train_one, y_test_one = train_test_split(\n", 130 | " x_all, y_org, y_all_one, train_size=75, test_size=75, random_state=123)\n", 131 | "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape, \n", 132 | " y_train_one.shape, y_test_one.shape)" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": { 139 | "scrolled": true 140 | }, 141 | "outputs": [], 142 | "source": [ 143 | "print('入力データ(x)')\n", 144 | "print(x_train[:5,:])" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": {}, 151 | "outputs": [], 152 | "source": [ 153 | "print('正解データ(y)')\n", 154 | "print(y_train[:5])" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": {}, 161 | "outputs": [], 162 | "source": [ 163 | "print('正解データ (One Hot Vector化後)')\n", 164 | "print(y_train_one[:5,:])" 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": [ 171 | "### 学習用変数の設定" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [ 180 | "# 学習対象の選択\n", 181 | "x, yt = x_train, y_train_one" 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "metadata": {}, 187 | "source": [ 188 | "### 予測関数" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": null, 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [ 197 | "# softmax関数 (9.7.3)\n", 198 | "def softmax(x):\n", 199 | " x = x.T\n", 200 | " x_max = x.max(axis=0)\n", 201 | " x = x - x_max\n", 202 | " w = np.exp(x)\n", 203 | " return (w / w.sum(axis=0)).T" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "metadata": {}, 210 | "outputs": [], 211 | "source": [ 212 | "# 予測値の計算 (9.7.1, 9.7.2)\n", 213 | "def pred(x, W):\n", 214 | " return softmax(x @ W)" 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": {}, 220 | "source": [ 221 | "### 評価" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": null, 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "# 交差エントロピー関数)(9.5.1)\n", 231 | "def cross_entropy(yt, yp):\n", 232 | " return -np.mean(np.sum(yt * np.log(yp), axis=1))" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": null, 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "# モデルの評価を行う関数\n", 242 | "from sklearn.metrics import accuracy_score\n", 243 | "\n", 244 | "def evaluate(x_test, y_test, y_test_one, W):\n", 245 | " \n", 246 | " # 予測値の計算(確率値)\n", 247 | " yp_test_one = pred(x_test, W)\n", 248 | " \n", 249 | " # 確率値から予測クラス(0, 1, 2)を導出\n", 250 | " yp_test = np.argmax(yp_test_one, axis=1)\n", 251 | " \n", 252 | " # 損失関数値の計算\n", 253 | " loss = cross_entropy(y_test_one, yp_test_one)\n", 254 | " \n", 255 | " # 精度の算出\n", 256 | " score = accuracy_score(y_test, yp_test)\n", 257 | " return loss, score " 258 | ] 259 | }, 260 | { 261 | "cell_type": "markdown", 262 | "metadata": {}, 263 | "source": [ 264 | "### 初期化処理" 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "execution_count": null, 270 | "metadata": {}, 271 | "outputs": [], 272 | "source": [ 273 | "# 初期化処理\n", 274 | "\n", 275 | "# 標本数\n", 276 | "M = x.shape[0]\n", 277 | "# 入力次元数(ダミー変数を含む\n", 278 | "D = x.shape[1]\n", 279 | "# 分類先クラス数\n", 280 | "N = yt.shape[1]\n", 281 | "\n", 282 | "# 繰り返し回数\n", 283 | "iters = 10000\n", 284 | "\n", 285 | "# 学習率\n", 286 | "alpha = 0.01\n", 287 | "\n", 288 | "# 重み行列の初期設定(すべて1)\n", 289 | "W = np.ones((D, N)) \n", 290 | "\n", 291 | "# 評価結果記録用\n", 292 | "history = np.zeros((0, 3))" 293 | ] 294 | }, 295 | { 296 | "cell_type": "markdown", 297 | "metadata": {}, 298 | "source": [ 299 | "### メイン処理" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": null, 305 | "metadata": { 306 | "scrolled": true 307 | }, 308 | "outputs": [], 309 | "source": [ 310 | "# メイン処理\n", 311 | "for k in range(iters):\n", 312 | " \n", 313 | " # 予測値の計算 (9.7.1) (9.7.2)\n", 314 | " yp = pred(x, W)\n", 315 | " \n", 316 | " # 誤差の計算 (9.7.4)\n", 317 | " yd = yp - yt\n", 318 | "\n", 319 | " # 重みの更新 (9.7.5)\n", 320 | " W = W - alpha * (x.T @ yd) / M\n", 321 | "\n", 322 | " if (k % 10 == 0):\n", 323 | " loss, score = evaluate(x_test, y_test, y_test_one, W)\n", 324 | " history = np.vstack((history,\n", 325 | " np.array([k, loss, score])))\n", 326 | " print(\"epoch = %d loss = %f score = %f\" \n", 327 | " % (k, loss, score))" 328 | ] 329 | }, 330 | { 331 | "cell_type": "markdown", 332 | "metadata": {}, 333 | "source": [ 334 | "### 結果確認" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": null, 340 | "metadata": {}, 341 | "outputs": [], 342 | "source": [ 343 | "#損失関数値と精度の確認\n", 344 | "print('初期状態: 損失関数:%f 精度:%f' \n", 345 | " % (history[0,1], history[0,2]))\n", 346 | "print('最終状態: 損失関数:%f 精度:%f' \n", 347 | " % (history[-1,1], history[-1,2]))" 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": null, 353 | "metadata": {}, 354 | "outputs": [], 355 | "source": [ 356 | "# 学習曲線の表示 (損失関数)\n", 357 | "plt.plot(history[:,0], history[:,1])\n", 358 | "plt.grid()\n", 359 | "plt.ylim(0,1.2)\n", 360 | "plt.xlabel('iter', fontsize=14)\n", 361 | "plt.ylabel('loss', fontsize=14)\n", 362 | "plt.title('iter vs loss', fontsize=14)\n", 363 | "plt.show()" 364 | ] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "execution_count": null, 369 | "metadata": {}, 370 | "outputs": [], 371 | "source": [ 372 | "# 学習曲線の表示 (精度)\n", 373 | "plt.plot(history[:,0], history[:,2])\n", 374 | "plt.ylim(0,1)\n", 375 | "plt.grid()\n", 376 | "plt.xlabel('iter', fontsize=14)\n", 377 | "plt.ylabel('accuracy', fontsize=14)\n", 378 | "plt.title('iter vs accuracy', fontsize=14)\n", 379 | "plt.show()" 380 | ] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "execution_count": null, 385 | "metadata": {}, 386 | "outputs": [], 387 | "source": [ 388 | "# 3次元表示\n", 389 | "from mpl_toolkits.mplot3d import Axes3D\n", 390 | "x1 = np.linspace(4, 8.5, 100)\n", 391 | "x2 = np.linspace(0.5, 7.5, 100)\n", 392 | "xx1, xx2 = np.meshgrid(x1, x2)\n", 393 | "xxx = np.array([np.ones(xx1.ravel().shape), \n", 394 | " xx1.ravel(), xx2.ravel()]).T\n", 395 | "pp = pred(xxx, W)\n", 396 | "c0 = pp[:,0].reshape(xx1.shape)\n", 397 | "c1 = pp[:,1].reshape(xx1.shape)\n", 398 | "c2 = pp[:,2].reshape(xx1.shape)\n", 399 | "plt.figure(figsize=(8,8))\n", 400 | "ax = plt.subplot(1, 1, 1, projection='3d')\n", 401 | "ax.plot_surface(xx1, xx2, c0, color='lightblue', \n", 402 | " edgecolor='black', rstride=10, cstride=10, alpha=0.7)\n", 403 | "ax.plot_surface(xx1, xx2, c1, color='blue', \n", 404 | " edgecolor='black', rstride=10, cstride=10, alpha=0.7)\n", 405 | "ax.plot_surface(xx1, xx2, c2, color='lightgrey', \n", 406 | " edgecolor='black', rstride=10, cstride=10, alpha=0.7)\n", 407 | "ax.scatter(x_t0[:,0], x_t0[:,1], 1, s=50, alpha=1, marker='+', c='k')\n", 408 | "ax.scatter(x_t1[:,0], x_t1[:,1], 1, s=30, alpha=1, marker='o', c='k')\n", 409 | "ax.scatter(x_t2[:,0], x_t2[:,1], 1, s=50, alpha=1, marker='x', c='k')\n", 410 | "ax.set_xlim(4,8.5)\n", 411 | "ax.set_ylim(0.5,7.5)\n", 412 | "ax.view_init(elev=40, azim=70)" 413 | ] 414 | }, 415 | { 416 | "cell_type": "code", 417 | "execution_count": null, 418 | "metadata": {}, 419 | "outputs": [], 420 | "source": [ 421 | "# 評価\n", 422 | "from sklearn.metrics import accuracy_score\n", 423 | "from sklearn.metrics import confusion_matrix\n", 424 | "from sklearn.metrics import classification_report\n", 425 | "\n", 426 | "# テストデータで予測値の計算\n", 427 | "yp_test_one = pred(x_test, W)\n", 428 | "yp_test = np.argmax(yp_test_one, axis=1)\n", 429 | "\n", 430 | "# 精度の計算\n", 431 | "from sklearn.metrics import accuracy_score\n", 432 | "score = accuracy_score(y_test, yp_test)\n", 433 | "print('accuracy: %f' % score)\n", 434 | "\n", 435 | "# 混同行列の表示\n", 436 | "from sklearn.metrics import confusion_matrix\n", 437 | "print(confusion_matrix(y_test, yp_test))\n", 438 | "print(classification_report(y_test, yp_test))" 439 | ] 440 | }, 441 | { 442 | "cell_type": "markdown", 443 | "metadata": {}, 444 | "source": [ 445 | "# 入力変数を4次元に変更" 446 | ] 447 | }, 448 | { 449 | "cell_type": "code", 450 | "execution_count": null, 451 | "metadata": {}, 452 | "outputs": [], 453 | "source": [ 454 | "# ダミー変数を追加\n", 455 | "x_all2 = np.insert(x_org, 0, 1.0, axis=1)" 456 | ] 457 | }, 458 | { 459 | "cell_type": "code", 460 | "execution_count": null, 461 | "metadata": {}, 462 | "outputs": [], 463 | "source": [ 464 | "# 学習データ、検証データに分割\n", 465 | "from sklearn.model_selection import train_test_split\n", 466 | "\n", 467 | "x_train2, x_test2, y_train, y_test,\\\n", 468 | "y_train_one, y_test_one = train_test_split(\n", 469 | " x_all2, y_org, y_all_one, train_size=75, \n", 470 | " test_size=75, random_state=123)\n", 471 | "print(x_train2.shape, x_test2.shape, \n", 472 | " y_train.shape, y_test.shape, \n", 473 | " y_train_one.shape, y_test_one.shape)" 474 | ] 475 | }, 476 | { 477 | "cell_type": "code", 478 | "execution_count": null, 479 | "metadata": {}, 480 | "outputs": [], 481 | "source": [ 482 | "print('入力データ(x)')\n", 483 | "print(x_train2[:5,:])" 484 | ] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": null, 489 | "metadata": {}, 490 | "outputs": [], 491 | "source": [ 492 | "# 学習対象の選択\n", 493 | "x, yt, x_test = x_train2, y_train_one, x_test2" 494 | ] 495 | }, 496 | { 497 | "cell_type": "code", 498 | "execution_count": null, 499 | "metadata": {}, 500 | "outputs": [], 501 | "source": [ 502 | "# 初期化処理\n", 503 | "\n", 504 | "# 標本数\n", 505 | "M = x.shape[0]\n", 506 | "# 入力次元数(ダミー変数を含む\n", 507 | "D = x.shape[1]\n", 508 | "# 分類先クラス数\n", 509 | "N = yt.shape[1]\n", 510 | "\n", 511 | "# 繰り返し回数\n", 512 | "iters = 10000\n", 513 | "\n", 514 | "# 学習率\n", 515 | "alpha = 0.01\n", 516 | "\n", 517 | "# 重み行列の初期設定(すべて1)\n", 518 | "W = np.ones((D, N)) \n", 519 | "\n", 520 | "# 評価結果記録用\n", 521 | "history = np.zeros((0, 3))" 522 | ] 523 | }, 524 | { 525 | "cell_type": "code", 526 | "execution_count": null, 527 | "metadata": { 528 | "scrolled": true 529 | }, 530 | "outputs": [], 531 | "source": [ 532 | "# メイン処理(4次元版)\n", 533 | "for k in range(iters):\n", 534 | " \n", 535 | " # 予測値の計算 (9.7.1) (9.7.2)\n", 536 | " yp = pred(x, W)\n", 537 | " \n", 538 | " # 誤差の計算 (9.7.4)\n", 539 | " yd = yp - yt\n", 540 | "\n", 541 | " # 重みの更新 (9.7.5)\n", 542 | " W = W - alpha * (x.T @ yd) / M\n", 543 | "\n", 544 | " if (k % 10 == 0):\n", 545 | " loss, score = evaluate(x_test, y_test, y_test_one, W)\n", 546 | " history = np.vstack((history, np.array([k, loss, score])))\n", 547 | " print(\"epoch = %d loss = %f score = %f\" % (k, loss, score))" 548 | ] 549 | }, 550 | { 551 | "cell_type": "code", 552 | "execution_count": null, 553 | "metadata": {}, 554 | "outputs": [], 555 | "source": [ 556 | "print(history.shape)" 557 | ] 558 | }, 559 | { 560 | "cell_type": "code", 561 | "execution_count": null, 562 | "metadata": {}, 563 | "outputs": [], 564 | "source": [ 565 | "#損失関数値と精度の確認\n", 566 | "print('初期状態: 損失関数:%f 精度:%f' \n", 567 | " % (history[0,1], history[0,2]))\n", 568 | "print('最終状態: 損失関数:%f 精度:%f' \n", 569 | " % (history[-1,1], history[-1,2]))" 570 | ] 571 | }, 572 | { 573 | "cell_type": "code", 574 | "execution_count": null, 575 | "metadata": {}, 576 | "outputs": [], 577 | "source": [ 578 | "# 学習曲線の表示 (損失関数)\n", 579 | "plt.plot(history[:,0], history[:,1])\n", 580 | "plt.ylim(0,1.2)\n", 581 | "plt.grid()\n", 582 | "plt.xlabel('iter', fontsize=14)\n", 583 | "plt.ylabel('loss', fontsize=14)\n", 584 | "plt.title('iter vs loss', fontsize=14)\n", 585 | "plt.show()" 586 | ] 587 | }, 588 | { 589 | "cell_type": "code", 590 | "execution_count": null, 591 | "metadata": {}, 592 | "outputs": [], 593 | "source": [ 594 | "# 学習曲線の表示 (精度)\n", 595 | "plt.plot(history[:,0], history[:,2])\n", 596 | "plt.ylim(0,1)\n", 597 | "plt.grid()\n", 598 | "plt.xlabel('iter', fontsize=14)\n", 599 | "plt.ylabel('accuracy', fontsize=14)\n", 600 | "plt.title('iter vs accuracy', fontsize=14)\n", 601 | "plt.show()" 602 | ] 603 | } 604 | ], 605 | "metadata": { 606 | "kernelspec": { 607 | "display_name": "Python 3", 608 | "language": "python", 609 | "name": "python3" 610 | }, 611 | "language_info": { 612 | "codemirror_mode": { 613 | "name": "ipython", 614 | "version": 3 615 | }, 616 | "file_extension": ".py", 617 | "mimetype": "text/x-python", 618 | "name": "python", 619 | "nbconvert_exporter": "python", 620 | "pygments_lexer": "ipython3", 621 | "version": "3.6.7" 622 | } 623 | }, 624 | "nbformat": 4, 625 | "nbformat_minor": 1 626 | } 627 | -------------------------------------------------------------------------------- /notebooks/ch08-bi-classify-xtech.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true 7 | }, 8 | "source": [ 9 | "# 8章 二値分類" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "# 必要ライブラリの宣言\n", 19 | "%matplotlib inline\n", 20 | "import numpy as np\n", 21 | "import matplotlib.pyplot as plt" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "# PDF出力用\n", 31 | "from IPython.display import set_matplotlib_formats\n", 32 | "set_matplotlib_formats('png', 'pdf')" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "### シグモイド関数のグラフ" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "xx = np.linspace(-6, 6, 500)\n", 49 | "yy = 1 / (np.exp(-xx) + 1)\n", 50 | "\n", 51 | "plt.figure(figsize=(6,6))\n", 52 | "plt.ylim(-3, 3)\n", 53 | "plt.xlim(-3, 3)\n", 54 | "plt.xticks(np.linspace(-3,3,13))\n", 55 | "plt.yticks(np.linspace(-3,3,13))\n", 56 | "plt.xlabel('x', fontsize=14)\n", 57 | "plt.ylabel('y', fontsize=14)\n", 58 | "plt.grid()\n", 59 | "plt.plot(xx, yy, c='b', label=r'$\\dfrac{1}{1+\\exp{(-x)}}$', lw=1)\n", 60 | "plt.plot(xx, xx, c='k', label=r'$y = x$', lw=1)\n", 61 | "plt.plot([-3,3], [0,0], c='k')\n", 62 | "plt.plot([0,0], [-3,3],c='k')\n", 63 | "plt.plot([-3,3],[1,1],linestyle='-.',c='k')\n", 64 | "plt.legend(fontsize=14)\n", 65 | "plt.show()" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "### 対数関数のグラフ" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "# fig05-05\n", 82 | "# 対数のグラフ\n", 83 | "x = np.linspace(0, 4, 200)\n", 84 | "xx = np.linspace(-2, 2, 200)\n", 85 | "x0 = np.delete(x, 0)\n", 86 | "y0 = np.log(x0)\n", 87 | "x1 = np.linspace(0, 4, 9)\n", 88 | "x2 = np.delete(x1, 0)\n", 89 | "y2 = np.log(x2)\n", 90 | "plt.figure(figsize=(6,6))\n", 91 | "plt.plot(x0, y0, c='b', label='$y=\\log{x}$',lw=2)\n", 92 | "plt.plot(xx, np.exp(xx), c='k', label='$y=e^x$',lw=2)\n", 93 | "plt.plot([-2,4],[-2,4], linestyle='-.', label='$y=x$', lw=2)\n", 94 | "plt.plot([-2,4],[0,0],lw=2,c='k')\n", 95 | "plt.plot([0,0],[-2,4],lw=2,c='k')\n", 96 | "plt.xticks(size=20)\n", 97 | "plt.yticks(size=20)\n", 98 | "plt.ylim(-2,4)\n", 99 | "plt.grid(which='major',linestyle='-',lw=2)\n", 100 | "plt.xlabel(\"$x$\", fontsize=18) \n", 101 | "plt.ylabel(\"$y$\", fontsize=18)\n", 102 | "plt.legend(fontsize=18)\n", 103 | "plt.show()" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "### データ準備" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "# 学習用データ準備\n", 120 | "from sklearn.datasets import load_iris\n", 121 | "iris = load_iris()\n", 122 | "x_org, y_org = iris.data, iris.target\n", 123 | "print('元データ', x_org.shape, y_org.shape)" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "# データ絞り込み\n", 133 | "# クラス0, 1のみ\n", 134 | "# 項目sepal_lengthとsepal_widthのみ\n", 135 | "x_data, y_data = iris.data[:100,:2], iris.target[:100]\n", 136 | "print('対象データ', x_data.shape, y_data.shape)" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "# ダミー変数を追加\n", 146 | "x_data = np.insert(x_data, 0, 1.0, axis=1)\n", 147 | "print('ダミー変数追加後', x_data.shape)" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [ 156 | "#  元データのサイズ\n", 157 | "print(x_data.shape, y_data.shape)\n", 158 | "# 学習データ、検証データに分割 (シャフルも同時に実施)\n", 159 | "from sklearn.model_selection import train_test_split\n", 160 | "x_train, x_test, y_train, y_test = train_test_split(\n", 161 | " x_data, y_data, train_size=70, test_size=30, \n", 162 | " random_state=123)\n", 163 | "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)" 164 | ] 165 | }, 166 | { 167 | "cell_type": "markdown", 168 | "metadata": {}, 169 | "source": [ 170 | "### 学習データの散布図表示" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "# 散布図の表示\n", 180 | "x_t0 = x_train[y_train == 0]\n", 181 | "x_t1 = x_train[y_train == 1]\n", 182 | "plt.figure(figsize=(6,6))\n", 183 | "plt.scatter(x_t0[:,1], x_t0[:,2], marker='x', c='b', label='0 (setosa)')\n", 184 | "plt.scatter(x_t1[:,1], x_t1[:,2], marker='o', c='k', label='1 (versicolor)')\n", 185 | "plt.xlabel('sepal_length', fontsize=14)\n", 186 | "plt.ylabel('sepal_width', fontsize=14)\n", 187 | "plt.xticks(size=16)\n", 188 | "plt.yticks(size=16)\n", 189 | "plt.legend(fontsize=16)\n", 190 | "plt.show()" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [ 199 | "# 散布図の表示\n", 200 | "x_t0 = x_train[y_train == 0]\n", 201 | "x_t1 = x_train[y_train == 1]\n", 202 | "plt.figure(figsize=(6,6))\n", 203 | "plt.scatter(x_t0[:,1], x_t0[:,2], marker='x', s=50, c='b', label='yt = 0')\n", 204 | "plt.scatter(x_t1[:,1], x_t1[:,2], marker='o', s=50, c='k', label='yt = 1')\n", 205 | "plt.xlabel(r'$x_1$', fontsize=16)\n", 206 | "plt.ylabel(r'$x_2$', fontsize=16)\n", 207 | "plt.xticks(size=16)\n", 208 | "plt.yticks(size=16)\n", 209 | "plt.legend(fontsize=16)\n", 210 | "plt.show()" 211 | ] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "metadata": {}, 216 | "source": [ 217 | "### 学習用変数の設定" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": null, 223 | "metadata": {}, 224 | "outputs": [], 225 | "source": [ 226 | "# 学習用変数の設定\n", 227 | "x = x_train\n", 228 | "yt = y_train" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "metadata": {}, 235 | "outputs": [], 236 | "source": [ 237 | "# 入力データ x の表示 (ダミーデータを含む)\n", 238 | "print(x[:5])" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "metadata": {}, 245 | "outputs": [], 246 | "source": [ 247 | "# 正解値 yt の表示\n", 248 | "print(yt[:5])" 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": {}, 254 | "source": [ 255 | "### 予測関数" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "metadata": {}, 262 | "outputs": [], 263 | "source": [ 264 | "# シグモイド関数\n", 265 | "def sigmoid(x):\n", 266 | " return 1/(1+ np.exp(-x))" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "metadata": {}, 273 | "outputs": [], 274 | "source": [ 275 | "# 予測値の計算\n", 276 | "def pred(x, w):\n", 277 | " return sigmoid(x @ w)" 278 | ] 279 | }, 280 | { 281 | "cell_type": "markdown", 282 | "metadata": {}, 283 | "source": [ 284 | "### 評価" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": null, 290 | "metadata": {}, 291 | "outputs": [], 292 | "source": [ 293 | "# 損失関数(交差エントロピー関数)\n", 294 | "def cross_entropy(yt, yp):\n", 295 | " # 交差エントロピーの計算(この段階ではベクトル)\n", 296 | " ce1 = -(yt * np.log(yp) + (1 - yt) * np.log(1 - yp))\n", 297 | " # 交差エントロピーベクトルの平均値を計算\n", 298 | " return(np.mean(ce1)) " 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": null, 304 | "metadata": {}, 305 | "outputs": [], 306 | "source": [ 307 | "# 予測結果の確率値から 0 or 1 を判断する関数\n", 308 | "def classify(y):\n", 309 | " return np.where(y < 0.5, 0, 1)" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": null, 315 | "metadata": {}, 316 | "outputs": [], 317 | "source": [ 318 | "# モデルの評価を行う関数\n", 319 | "from sklearn.metrics import accuracy_score\n", 320 | "def evaluate(xt, yt, w):\n", 321 | " \n", 322 | " # 予測値の計算\n", 323 | " yp = pred(xt, w)\n", 324 | " \n", 325 | " # 損失関数値の計算\n", 326 | " loss = cross_entropy(yt, yp)\n", 327 | " \n", 328 | " # 予測値(確率値)を0または1に変換\n", 329 | " yp_b = classify(yp)\n", 330 | " \n", 331 | " #精度の算出\n", 332 | " score = accuracy_score(yt, yp_b)\n", 333 | " return loss, score" 334 | ] 335 | }, 336 | { 337 | "cell_type": "markdown", 338 | "metadata": {}, 339 | "source": [ 340 | "### 初期化処理" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": null, 346 | "metadata": { 347 | "scrolled": true 348 | }, 349 | "outputs": [], 350 | "source": [ 351 | "# 初期化処理\n", 352 | "\n", 353 | "# 標本数\n", 354 | "M = x.shape[0]\n", 355 | "# 入力次元数(ダミー変数を含む)\n", 356 | "D = x.shape[1]\n", 357 | "\n", 358 | "# 繰り返し回数\n", 359 | "iters = 10000\n", 360 | "\n", 361 | "# 学習率\n", 362 | "alpha = 0.01\n", 363 | "\n", 364 | "# 初期値\n", 365 | "w = np.ones(D)\n", 366 | "\n", 367 | "# 評価結果記録用 (損失関数と精度)\n", 368 | "history = np.zeros((0,3))" 369 | ] 370 | }, 371 | { 372 | "cell_type": "markdown", 373 | "metadata": {}, 374 | "source": [ 375 | "### メイン処理" 376 | ] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "execution_count": null, 381 | "metadata": {}, 382 | "outputs": [], 383 | "source": [ 384 | "# 繰り返しループ\n", 385 | "\n", 386 | "for k in range(iters):\n", 387 | " \n", 388 | " # 予測値の計算 (8.6.1) (8.6.2)\n", 389 | " yp = pred(x, w)\n", 390 | " \n", 391 | " # 誤差の計算 (8.6.4)\n", 392 | " yd = yp - yt\n", 393 | " \n", 394 | " # 勾配降下法の実施 (8.6.6)\n", 395 | " w = w - alpha * (x.T @ yd) / M\n", 396 | " \n", 397 | " # ログ記録用\n", 398 | " if ( k % 10 == 0):\n", 399 | " loss, score = evaluate(x_test, y_test, w)\n", 400 | " history = np.vstack((history, \n", 401 | " np.array([k, loss, score])))\n", 402 | " print( \"iter = %d loss = %f score = %f\" \n", 403 | " % (k, loss, score))" 404 | ] 405 | }, 406 | { 407 | "cell_type": "markdown", 408 | "metadata": {}, 409 | "source": [ 410 | "### 結果確認" 411 | ] 412 | }, 413 | { 414 | "cell_type": "code", 415 | "execution_count": null, 416 | "metadata": {}, 417 | "outputs": [], 418 | "source": [ 419 | "#損失関数値と精度の確認\n", 420 | "print('初期状態: 損失関数:%f 精度:%f' \n", 421 | " % (history[0,1], history[0,2]))\n", 422 | "print('最終状態: 損失関数:%f 精度:%f' \n", 423 | " % (history[-1,1], history[-1,2]))" 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": null, 429 | "metadata": {}, 430 | "outputs": [], 431 | "source": [ 432 | "# 検証データを散布図用に準備\n", 433 | "x_t0 = x_test[y_test==0]\n", 434 | "x_t1 = x_test[y_test==1]\n", 435 | "\n", 436 | "# 決定境界描画用 x1の値から x2の値を計算する\n", 437 | "def b(x, w):\n", 438 | " return(-(w[0] + w[1] * x)/ w[2])\n", 439 | "# 散布図のx1の最小値と最大値\n", 440 | "xl = np.asarray([x[:,1].min(), x[:,1].max()])\n", 441 | "yl = b(xl, w)" 442 | ] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": null, 447 | "metadata": {}, 448 | "outputs": [], 449 | "source": [ 450 | "plt.figure(figsize=(6,6))\n", 451 | "# 散布図の表示\n", 452 | "plt.scatter(x_t0[:,1], x_t0[:,2], marker='x', \n", 453 | " c='b', s=50, label='class 0')\n", 454 | "plt.scatter(x_t1[:,1], x_t1[:,2], marker='o', \n", 455 | " c='k', s=50, label='class 1')\n", 456 | "# 散布図に決定境界の直線も追記\n", 457 | "plt.plot(xl, yl, c='b')\n", 458 | "plt.xlabel('sepal_length', fontsize=14)\n", 459 | "plt.ylabel('sepal_width', fontsize=14)\n", 460 | "plt.xticks(size=16)\n", 461 | "plt.yticks(size=16)\n", 462 | "plt.legend(fontsize=16)\n", 463 | "plt.show()" 464 | ] 465 | }, 466 | { 467 | "cell_type": "code", 468 | "execution_count": null, 469 | "metadata": {}, 470 | "outputs": [], 471 | "source": [ 472 | "# 学習曲線の表示をします (損失関数)\n", 473 | "plt.figure(figsize=(6,4))\n", 474 | "plt.plot(history[:,0], history[:,1], 'b')\n", 475 | "plt.xlabel('iter', fontsize=14)\n", 476 | "plt.ylabel('cost', fontsize=14)\n", 477 | "plt.title('iter vs cost', fontsize=14)\n", 478 | "plt.show()" 479 | ] 480 | }, 481 | { 482 | "cell_type": "code", 483 | "execution_count": null, 484 | "metadata": { 485 | "scrolled": true 486 | }, 487 | "outputs": [], 488 | "source": [ 489 | "# 学習曲線の表示をします (精度)\n", 490 | "plt.figure(figsize=(6,4))\n", 491 | "plt.plot(history[:,0], history[:,2], 'b')\n", 492 | "plt.xlabel('iter', fontsize=14)\n", 493 | "plt.ylabel('accuracy', fontsize=14)\n", 494 | "plt.title('iter vs accuracy', fontsize=14)\n", 495 | "plt.show()" 496 | ] 497 | }, 498 | { 499 | "cell_type": "code", 500 | "execution_count": null, 501 | "metadata": {}, 502 | "outputs": [], 503 | "source": [ 504 | "from mpl_toolkits.mplot3d import Axes3D\n", 505 | "x1 = np.linspace(4, 7.5, 100)\n", 506 | "x2 = np.linspace(2, 4.5, 100)\n", 507 | "xx1, xx2 = np.meshgrid(x1, x2)\n", 508 | "xxx = np.asarray([np.ones(xx1.ravel().shape), \n", 509 | " xx1.ravel(), xx2.ravel()]).T\n", 510 | "c = pred(xxx, w).reshape(xx1.shape)\n", 511 | "plt.figure(figsize=(8,8))\n", 512 | "ax = plt.subplot(1, 1, 1, projection='3d')\n", 513 | "ax.plot_surface(xx1, xx2, c, color='blue', \n", 514 | " edgecolor='black', rstride=10, cstride=10, alpha=0.1)\n", 515 | "ax.scatter(x_t1[:,1], x_t1[:,2], 1, s=20, alpha=0.9, marker='o', c='b')\n", 516 | "ax.scatter(x_t0[:,1], x_t0[:,2], 0, s=20, alpha=0.9, marker='s', c='b')\n", 517 | "ax.set_xlim(4,7.5)\n", 518 | "ax.set_ylim(2,4.5)\n", 519 | "ax.view_init(elev=20, azim=60)" 520 | ] 521 | }, 522 | { 523 | "cell_type": "markdown", 524 | "metadata": {}, 525 | "source": [ 526 | "## scikit-learnライブラリの利用" 527 | ] 528 | }, 529 | { 530 | "cell_type": "code", 531 | "execution_count": null, 532 | "metadata": {}, 533 | "outputs": [], 534 | "source": [ 535 | "# 必要ライブラリのロード\n", 536 | "from sklearn.linear_model import LogisticRegression\n", 537 | "\n", 538 | "# モデル生成\n", 539 | "model_lr = LogisticRegression(solver='liblinear')\n", 540 | "\n", 541 | "# 機械学習実施\n", 542 | "model_lr.fit(x, yt)" 543 | ] 544 | }, 545 | { 546 | "cell_type": "code", 547 | "execution_count": null, 548 | "metadata": {}, 549 | "outputs": [], 550 | "source": [ 551 | "# 線形回帰\n", 552 | "# 切片の値\n", 553 | "lr_w0 = model_lr.intercept_[0]\n", 554 | "# x1(sepal_length)の係数\n", 555 | "lr_w1 = model_lr.coef_[0,1]\n", 556 | "# x2(sepal_width)の係数\n", 557 | "lr_w2 = model_lr.coef_[0,2]" 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "execution_count": null, 563 | "metadata": {}, 564 | "outputs": [], 565 | "source": [ 566 | "# 限界直線描画用 x1の値から x2の値を計算する\n", 567 | "def rl(x):\n", 568 | " wk = lr_w0 + lr_w1 * x\n", 569 | " wk2 = -wk / lr_w2\n", 570 | " return(wk2)" 571 | ] 572 | }, 573 | { 574 | "cell_type": "code", 575 | "execution_count": null, 576 | "metadata": {}, 577 | "outputs": [], 578 | "source": [ 579 | "y_rl = rl(xl)\n", 580 | "# 結果確認\n", 581 | "print(xl, yl, y_rl)" 582 | ] 583 | }, 584 | { 585 | "cell_type": "code", 586 | "execution_count": null, 587 | "metadata": {}, 588 | "outputs": [], 589 | "source": [ 590 | "# 散布図に限界直線も追記する\n", 591 | "fig = plt.figure(figsize=(6,6))\n", 592 | "ax = fig.add_subplot(1,1,1)\n", 593 | "# 散布図の表示\n", 594 | "plt.scatter(x_t0[:,1], x_t0[:,2], marker='x', c='b')\n", 595 | "plt.scatter(x_t1[:,1], x_t1[:,2], marker='o', c='k')\n", 596 | "# 限界直線の表示\n", 597 | "ax.plot(xl, yl, linewidth=2, c='k', label='Hands On')\n", 598 | "# lr model\n", 599 | "ax.plot(xl, y_rl, linewidth=2, c='k', linestyle=\"--\", label='scikit LR')\n", 600 | "\n", 601 | "ax.legend()\n", 602 | "ax.set_xlabel('$x_1$', fontsize=16)\n", 603 | "ax.set_ylabel('$x_2$', fontsize=16)\n", 604 | "plt.show()" 605 | ] 606 | }, 607 | { 608 | "cell_type": "code", 609 | "execution_count": null, 610 | "metadata": {}, 611 | "outputs": [], 612 | "source": [] 613 | } 614 | ], 615 | "metadata": { 616 | "kernelspec": { 617 | "display_name": "Python 3", 618 | "language": "python", 619 | "name": "python3" 620 | }, 621 | "language_info": { 622 | "codemirror_mode": { 623 | "name": "ipython", 624 | "version": 3 625 | }, 626 | "file_extension": ".py", 627 | "mimetype": "text/x-python", 628 | "name": "python", 629 | "nbconvert_exporter": "python", 630 | "pygments_lexer": "ipython3", 631 | "version": "3.7.3" 632 | } 633 | }, 634 | "nbformat": 4, 635 | "nbformat_minor": 1 636 | } 637 | -------------------------------------------------------------------------------- /notebooks/ch00-11-python-entry.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Python入門" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## 第一部 Python文法1\n", 15 | "変数からタプルまで" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "### 変数の型\n", 23 | "Pythonの変数には**整数型**、**浮動小数点型**、**文字列型**、**ブーリアン型**などがあります。 \n", 24 | "C言語のような型の宣言は不要で、代入された値から自動的に型の設定がされます。 \n", 25 | "変数がどの型を持っているかは、``type``関数で調べることができます。 \n", 26 | "また、変数の値そのものは ``print``関数で表示できます。" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "# 整数型の例\n", 36 | "\n", 37 | "x1 = 2\n", 38 | "print(x1)\n", 39 | "print(type(x1))" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "# 浮動小数点型の例\n", 49 | "\n", 50 | "x2 = 3.0\n", 51 | "print(x2)\n", 52 | "print(type(x2))" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "# 文字列型の例\n", 62 | "\n", 63 | "x3 = 'abc'\n", 64 | "print(x3)\n", 65 | "print(type(x3))" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "# ブーリアン型(True/False)の例\n", 75 | "\n", 76 | "x4 = True\n", 77 | "print(x4)\n", 78 | "print(type(x4))" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "### 演算\n", 86 | "\n", 87 | "数値間の演算は他のプログラム言語同様 ``+, -, *, /`` 等を使います。 \n", 88 | "文字列同士の連結にも ``+`` を使います。" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "# 整数同士の和\n", 98 | "\n", 99 | "x1 = 2\n", 100 | "y1 = 3\n", 101 | "z1 = x1 + y1\n", 102 | "print(z1, type(z1))" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "# 浮動小数点同士の和\n", 112 | "\n", 113 | "x2 = 3.0\n", 114 | "y2 = 2.0\n", 115 | "z2 = x2 + y2\n", 116 | "print(z2, type(z2)) " 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "# 文字列の連結\n", 126 | "\n", 127 | "x3 = 'abc'\n", 128 | "y3 = 'XYZ'\n", 129 | "z3 = x3 + y3\n", 130 | "print(z3, type(z3))" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "# 整数と浮動小数点間の演算\n", 140 | "# 自動的に方が浮動小数点に合わせられる\n", 141 | "\n", 142 | "y4 = x1 + y2\n", 143 | "print(y4, type(y4))" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "#### 比較演算子\n", 151 | "比較演算子は ``==`` です。" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "x1 = 2\n", 161 | "y1 = 3\n", 162 | "w1 = 2\n", 163 | "\n", 164 | "print(w1 == x1)\n", 165 | "print(w1 == y1)" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "#### 論理演算子\n", 173 | "\n", 174 | "論理演算子は ``and``, ``or``, ``not``です。" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "t1 = True\n", 184 | "t2 = True\n", 185 | "f1 = False\n", 186 | "f2 = False" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [ 195 | "# AND演算\n", 196 | "\n", 197 | "p1 = t1 and t2\n", 198 | "p2 = t1 and f2\n", 199 | "print(p1, p2)" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "# OR演算\n", 209 | "\n", 210 | "q1 = t1 or f1\n", 211 | "q2 = f1 or f2\n", 212 | "print(q1, q2)" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "metadata": {}, 219 | "outputs": [], 220 | "source": [ 221 | "# NOT演算\n", 222 | "\n", 223 | "r1 = not t1\n", 224 | "r2 = not f1\n", 225 | "print(r1, r2)" 226 | ] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "metadata": {}, 231 | "source": [ 232 | "### リスト\n", 233 | "\n", 234 | "リストは``[x, y, ...]``のような形式で表現します。 \n", 235 | "第一要素は``list[0]``, 第二要素は``list[1]``で参照します。 \n", 236 | "リストの長さを知りたいときは``len``関数を使います。" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": null, 242 | "metadata": {}, 243 | "outputs": [], 244 | "source": [ 245 | "# リストの定義\n", 246 | "\n", 247 | "list1 = [2, 4, 6, 8, 10, 12, 14]" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": null, 253 | "metadata": {}, 254 | "outputs": [], 255 | "source": [ 256 | "# 型は\"list\"\n", 257 | "\n", 258 | "print(type(list1))" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": null, 264 | "metadata": {}, 265 | "outputs": [], 266 | "source": [ 267 | "# print関数にかけると、全要素が表示される\n", 268 | "\n", 269 | "print(list1)" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "metadata": {}, 276 | "outputs": [], 277 | "source": [ 278 | "# 第一要素の表示\n", 279 | "\n", 280 | "print(list1[0])" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": null, 286 | "metadata": {}, 287 | "outputs": [], 288 | "source": [ 289 | "# 第二要素の表示\n", 290 | "\n", 291 | "print(list1[1])" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": null, 297 | "metadata": {}, 298 | "outputs": [], 299 | "source": [ 300 | "# リストの長さはlen関数で取得\n", 301 | "\n", 302 | "print(len(list1))" 303 | ] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "metadata": {}, 308 | "source": [ 309 | "### 部分リスト\n", 310 | "\n", 311 | "``list[0:3]``のような形式の参照も可能で、 \n", 312 | "この式は第1要素から第3要素までを抜き出した部分リストとなります。 \n", 313 | "**「0以上で3より前」**という読み方をするとわかりやすいです。" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": null, 319 | "metadata": {}, 320 | "outputs": [], 321 | "source": [ 322 | "# list[0]から list[3]より前\n", 323 | "\n", 324 | "print(list1[0:3])" 325 | ] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "metadata": {}, 330 | "source": [ 331 | "#### 部分リストの参照パターン\n", 332 | "\n", 333 | "``list``を元に部分リストを作る方法には、以下のようないろいろなバリエーションがあります。 \n", 334 | "機械学習のコーディングでよく使われるテクニックなので、是非マスターして下さい。" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": null, 340 | "metadata": {}, 341 | "outputs": [], 342 | "source": [ 343 | "# 最初からlist[2]より前\n", 344 | "print(list1[0:2])\n", 345 | "\n", 346 | "# 第一引数を省略すると\"0\"であるとみなされる\n", 347 | "print(list1[:2])" 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": null, 353 | "metadata": {}, 354 | "outputs": [], 355 | "source": [ 356 | "# list[2]から最後まで\n", 357 | "\n", 358 | "len1 = len(list1)\n", 359 | "print(list1[2:len1])\n", 360 | "\n", 361 | "# 第二引数を省略すると最終要素(len(list)))であるとみなされる\n", 362 | "print(list1[2:])" 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": null, 368 | "metadata": {}, 369 | "outputs": [], 370 | "source": [ 371 | "# ':'' だけの場合は、元のリスト全体を表す\n", 372 | "\n", 373 | "print(list1[:])" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": null, 379 | "metadata": {}, 380 | "outputs": [], 381 | "source": [ 382 | "# 後ろから2つ\n", 383 | "\n", 384 | "print(list1[-2:])" 385 | ] 386 | }, 387 | { 388 | "cell_type": "code", 389 | "execution_count": null, 390 | "metadata": {}, 391 | "outputs": [], 392 | "source": [ 393 | "# 後ろの2つを取り除いたもの\n", 394 | "\n", 395 | "print(list1[:-2])" 396 | ] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "execution_count": null, 401 | "metadata": {}, 402 | "outputs": [], 403 | "source": [ 404 | "# 普段は使わないのですが、listには3つめの引数もあり、これを使うとこんなことも可能です\n", 405 | "\n", 406 | "# 一つおき\n", 407 | "\n", 408 | "print(list1[::2])" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": null, 414 | "metadata": {}, 415 | "outputs": [], 416 | "source": [ 417 | "# 第三引数に-1を指定すると「逆順」を意味します\n", 418 | "\n", 419 | "print(list1[::-1])" 420 | ] 421 | }, 422 | { 423 | "cell_type": "markdown", 424 | "metadata": {}, 425 | "source": [ 426 | "#### 異なる要素の混在\n", 427 | "\n", 428 | "リストの各要素は異なる型の混在もできます。" 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": null, 434 | "metadata": {}, 435 | "outputs": [], 436 | "source": [ 437 | "list2 = [1, 2.0, 'abc', False]\n", 438 | "print(list2)" 439 | ] 440 | }, 441 | { 442 | "cell_type": "markdown", 443 | "metadata": {}, 444 | "source": [ 445 | "#### リストに要素の追加\n", 446 | "\n", 447 | "リストに要素を追加したい場合は、``append``関数を使います。" 448 | ] 449 | }, 450 | { 451 | "cell_type": "code", 452 | "execution_count": null, 453 | "metadata": {}, 454 | "outputs": [], 455 | "source": [ 456 | "# 空リストの定義\n", 457 | "list3 = []\n", 458 | "\n", 459 | "# 要素の追加\n", 460 | "list3.append(1)\n", 461 | "list3.append(2.0)\n", 462 | "list3.append('abc')\n", 463 | "print(list3)" 464 | ] 465 | }, 466 | { 467 | "cell_type": "markdown", 468 | "metadata": {}, 469 | "source": [ 470 | "#### リスト間の連結\n", 471 | "\n", 472 | "リストとリストを連結したい場合は、``+``演算子を使います。" 473 | ] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "execution_count": null, 478 | "metadata": {}, 479 | "outputs": [], 480 | "source": [ 481 | "list4 = list2 + list3\n", 482 | "print(list4)" 483 | ] 484 | }, 485 | { 486 | "cell_type": "markdown", 487 | "metadata": {}, 488 | "source": [ 489 | "### タプル\n", 490 | "\n", 491 | "リストと似たデータ型として「タプル」があります。 \n", 492 | "タプルは、値の変更不可能なリストであると考えて下さい。" 493 | ] 494 | }, 495 | { 496 | "cell_type": "code", 497 | "execution_count": null, 498 | "metadata": {}, 499 | "outputs": [], 500 | "source": [ 501 | "# タプルの場合は[]でなく()で要素を囲みます\n", 502 | "\n", 503 | "tap1 = (1,3,5,7)\n", 504 | "print(tap1)\n", 505 | "print(type(tap1))" 506 | ] 507 | }, 508 | { 509 | "cell_type": "code", 510 | "execution_count": null, 511 | "metadata": {}, 512 | "outputs": [], 513 | "source": [ 514 | "# タプルの要素への参照はリスト同様[]で行います\n", 515 | "print(tap1[0], tap1[1])" 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": null, 521 | "metadata": {}, 522 | "outputs": [], 523 | "source": [ 524 | "# 値を変更しようとすると 。。。\n", 525 | "tap1[1] = 10" 526 | ] 527 | }, 528 | { 529 | "cell_type": "code", 530 | "execution_count": null, 531 | "metadata": {}, 532 | "outputs": [], 533 | "source": [ 534 | "# リストの場合、上のこと(要素を後で変更する)は可能です\n", 535 | "\n", 536 | "list7 = [2,4,6,8,10]\n", 537 | "list7[2] = 9\n", 538 | "print(list7)" 539 | ] 540 | }, 541 | { 542 | "cell_type": "markdown", 543 | "metadata": {}, 544 | "source": [ 545 | "## 第二部 Numpy入門1" 546 | ] 547 | }, 548 | { 549 | "cell_type": "markdown", 550 | "metadata": {}, 551 | "source": [ 552 | "### Numpyの特徴\n", 553 | "\n", 554 | "numpy配列を使って、配列計算を行えます。 \n", 555 | "NativeなPythonの機能と比較してNumpyを使うことにより次のメリットがあります。\n", 556 | "\n", 557 | "* 処理速度が早くなる\n", 558 | "* 配列の扱い方が柔軟\n", 559 | "* コードがシンプルになる\n", 560 | "\n", 561 | "より詳しい解説は例えば下記のリンク参照して下さい。 \n", 562 | "[numpyの何がすごいのか?](https://to-kei.net/python/data-analysis/what-is-numpy/)\n" 563 | ] 564 | }, 565 | { 566 | "cell_type": "markdown", 567 | "metadata": {}, 568 | "source": [ 569 | "### 一次元配列" 570 | ] 571 | }, 572 | { 573 | "cell_type": "markdown", 574 | "metadata": {}, 575 | "source": [ 576 | "#### 宣言" 577 | ] 578 | }, 579 | { 580 | "cell_type": "code", 581 | "execution_count": null, 582 | "metadata": {}, 583 | "outputs": [], 584 | "source": [ 585 | "# ライブラリのロード\n", 586 | "import numpy as np\n", 587 | "\n", 588 | "# 一次元配列の作成\n", 589 | "# リスト配列を引数に、array関数でnumpy一次元配列を作ります。\n", 590 | "\n", 591 | "list1 = list(range(2,12,2))\n", 592 | "array1 = np.array(list1)" 593 | ] 594 | }, 595 | { 596 | "cell_type": "code", 597 | "execution_count": null, 598 | "metadata": {}, 599 | "outputs": [], 600 | "source": [ 601 | "# print文による表示結果の比較\n", 602 | "# 画面上はカンマのあるなしで区別します。\n", 603 | "\n", 604 | "print('list配列: ', list1)\n", 605 | "print('numpy配列: ', array1)\n", 606 | "array1" 607 | ] 608 | }, 609 | { 610 | "cell_type": "code", 611 | "execution_count": null, 612 | "metadata": {}, 613 | "outputs": [], 614 | "source": [ 615 | "# データ型(dtype)つき変数作成\n", 616 | "\n", 617 | "# 個々の要素をデータ型付きで定義することも可能です。\n", 618 | "# 型には、以下のようなものがあります。\n", 619 | "#\n", 620 | "# 符号付き整数: int8, int16, int32, int64\n", 621 | "# 符号なし整数: unit8, uint16, uint32, uint64\n", 622 | "# 浮動小数点: float16, float32, float64, float128\n", 623 | "\n", 624 | "array2 = np.array([2, 4, 6, 8,10], dtype=np.int32)\n", 625 | "print(array2)\n", 626 | "\n", 627 | "array2" 628 | ] 629 | }, 630 | { 631 | "cell_type": "code", 632 | "execution_count": null, 633 | "metadata": {}, 634 | "outputs": [], 635 | "source": [ 636 | "# 型は' numpy.ndarray' となります\n", 637 | "\n", 638 | "print(type(array1))" 639 | ] 640 | }, 641 | { 642 | "cell_type": "code", 643 | "execution_count": null, 644 | "metadata": {}, 645 | "outputs": [], 646 | "source": [ 647 | "# 配列の要素数は'shape'という属性で取得可能です\n", 648 | "# 結果はtuppleで返されます\n", 649 | "\n", 650 | "print(array1.shape)" 651 | ] 652 | }, 653 | { 654 | "cell_type": "markdown", 655 | "metadata": {}, 656 | "source": [ 657 | "#### 参照" 658 | ] 659 | }, 660 | { 661 | "cell_type": "code", 662 | "execution_count": null, 663 | "metadata": {}, 664 | "outputs": [], 665 | "source": [ 666 | "# 要素の参照例\n", 667 | "\n", 668 | "# 先頭\n", 669 | "print(array1[0])\n", 670 | "\n", 671 | "# 一番後ろの要素は-1で参照できます\n", 672 | "print(array1[-1])" 673 | ] 674 | }, 675 | { 676 | "cell_type": "code", 677 | "execution_count": null, 678 | "metadata": {}, 679 | "outputs": [], 680 | "source": [ 681 | "# 範囲付き参照\n", 682 | "# このパターンはlist変数と同じです。\n", 683 | "\n", 684 | "# 0以上2未満\n", 685 | "print(array1[:2])\n", 686 | "\n", 687 | "# 2以上\n", 688 | "print(array1[2:])\n", 689 | "\n", 690 | "# 全部\n", 691 | "print(array1[:])" 692 | ] 693 | }, 694 | { 695 | "cell_type": "code", 696 | "execution_count": null, 697 | "metadata": {}, 698 | "outputs": [], 699 | "source": [ 700 | "# こういうアクセス方法も可能です\n", 701 | "# (これはlistではできない)\n", 702 | "\n", 703 | "# array1の0番目、2番目、4番目\n", 704 | "print(array1[[0,2,4]])\n", 705 | "\n", 706 | "# array1の3番目、1番目\n", 707 | "print(array1[[3,1]])" 708 | ] 709 | }, 710 | { 711 | "cell_type": "markdown", 712 | "metadata": {}, 713 | "source": [ 714 | "#### 計算" 715 | ] 716 | }, 717 | { 718 | "cell_type": "code", 719 | "execution_count": null, 720 | "metadata": {}, 721 | "outputs": [], 722 | "source": [ 723 | "# 計算の例\n", 724 | "# numpy配列を対象にすると計算を一気に行うことができます\n", 725 | "\n", 726 | "array1 = np.array(list(range(2,12,2)))\n", 727 | "array3 = np.array(list(range(5)))\n", 728 | "print(array1)\n", 729 | "print(array3)" 730 | ] 731 | }, 732 | { 733 | "cell_type": "code", 734 | "execution_count": null, 735 | "metadata": {}, 736 | "outputs": [], 737 | "source": [ 738 | "# 足し算\n", 739 | "# list変数だとループを回す必要があります\n", 740 | "\n", 741 | "array4 = array1 + array3\n", 742 | "print(array4)" 743 | ] 744 | }, 745 | { 746 | "cell_type": "markdown", 747 | "metadata": {}, 748 | "source": [ 749 | "#### ブロードキャスト機能" 750 | ] 751 | }, 752 | { 753 | "cell_type": "code", 754 | "execution_count": null, 755 | "metadata": {}, 756 | "outputs": [], 757 | "source": [ 758 | "# ブロードキャスト機能\n", 759 | "# サイズの異なる変数同士の演算では、サイズを自動的に合わせて計算します\n", 760 | "\n", 761 | "array5 = array1 + 3\n", 762 | "print(array5)" 763 | ] 764 | }, 765 | { 766 | "cell_type": "code", 767 | "execution_count": null, 768 | "metadata": {}, 769 | "outputs": [], 770 | "source": [ 771 | "# 関数呼び出し\n", 772 | "# numpy関数と組み合わせると、関数呼び出しも全要素分まとめて行えます\n", 773 | "\n", 774 | "# 対数関数の呼出し\n", 775 | "array6 = np.log(array1)\n", 776 | "print(array6)" 777 | ] 778 | }, 779 | { 780 | "cell_type": "markdown", 781 | "metadata": {}, 782 | "source": [ 783 | "#### 特定の条件を満たす要素の抽出" 784 | ] 785 | }, 786 | { 787 | "cell_type": "code", 788 | "execution_count": null, 789 | "metadata": {}, 790 | "outputs": [], 791 | "source": [ 792 | "# ブロードキャスト機能とindex機能を組み合わせてこんなことも可能です\n", 793 | "# arr2 から偶数の要素だけを抜き出す\n", 794 | "\n", 795 | "array3 = np.array(list(range(5)))\n", 796 | "w = (array3 % 2) == 0\n", 797 | "print(w)" 798 | ] 799 | }, 800 | { 801 | "cell_type": "code", 802 | "execution_count": null, 803 | "metadata": {}, 804 | "outputs": [], 805 | "source": [ 806 | "array7 = array3[w]\n", 807 | "print(array7)" 808 | ] 809 | }, 810 | { 811 | "cell_type": "code", 812 | "execution_count": null, 813 | "metadata": {}, 814 | "outputs": [], 815 | "source": [ 816 | "# まとめて書くとこうなります\n", 817 | "\n", 818 | "array8 = array3[(array3 % 2) == 0]\n", 819 | "print(array8)" 820 | ] 821 | }, 822 | { 823 | "cell_type": "markdown", 824 | "metadata": {}, 825 | "source": [ 826 | "#### 内積" 827 | ] 828 | }, 829 | { 830 | "cell_type": "code", 831 | "execution_count": null, 832 | "metadata": {}, 833 | "outputs": [], 834 | "source": [ 835 | "# 内積\n", 836 | "# 記号 '@' を使って内積計算が可能です。\n", 837 | "\n", 838 | "array1 = np.array(list(range(2,12,2)))\n", 839 | "array3 = np.array(list(range(5)))\n", 840 | "print(array1)\n", 841 | "print(array3)\n", 842 | "\n", 843 | "p = array1 @ array3\n", 844 | "print(p)" 845 | ] 846 | }, 847 | { 848 | "cell_type": "markdown", 849 | "metadata": {}, 850 | "source": [ 851 | "### 二次元配列\n", 852 | "\n", 853 | "numpyでは行列のような二次元配列も扱えます。 " 854 | ] 855 | }, 856 | { 857 | "cell_type": "markdown", 858 | "metadata": {}, 859 | "source": [ 860 | "#### 宣言" 861 | ] 862 | }, 863 | { 864 | "cell_type": "code", 865 | "execution_count": null, 866 | "metadata": {}, 867 | "outputs": [], 868 | "source": [ 869 | "# 二次元配列の宣言\n", 870 | "# この場合、引数はlistのlistとなります。\n", 871 | "\n", 872 | "array8 = np.array([[1,2,3,4,5], [6,7,8,9,10],[11,12,13,14,15]])" 873 | ] 874 | }, 875 | { 876 | "cell_type": "code", 877 | "execution_count": null, 878 | "metadata": {}, 879 | "outputs": [], 880 | "source": [ 881 | "# 二次元配列をprint関数にかけるとこのような表示になります\n", 882 | "\n", 883 | "print(array8)" 884 | ] 885 | }, 886 | { 887 | "cell_type": "code", 888 | "execution_count": null, 889 | "metadata": {}, 890 | "outputs": [], 891 | "source": [ 892 | "# 要素数の取得\n", 893 | "\n", 894 | "print(array8.shape)" 895 | ] 896 | }, 897 | { 898 | "cell_type": "markdown", 899 | "metadata": {}, 900 | "source": [ 901 | "#### 参照" 902 | ] 903 | }, 904 | { 905 | "cell_type": "code", 906 | "execution_count": null, 907 | "metadata": {}, 908 | "outputs": [], 909 | "source": [ 910 | "# 要素の参照は「(第一次元引数),(第二次元引数)」の形式\n", 911 | "\n", 912 | "print(array8[1,2])" 913 | ] 914 | }, 915 | { 916 | "cell_type": "code", 917 | "execution_count": null, 918 | "metadata": {}, 919 | "outputs": [], 920 | "source": [ 921 | "# それぞれの次元に対して範囲指定で参照することも可能\n", 922 | "\n", 923 | "print(array8[:2,2:])" 924 | ] 925 | }, 926 | { 927 | "cell_type": "markdown", 928 | "metadata": {}, 929 | "source": [ 930 | "#### 計算" 931 | ] 932 | }, 933 | { 934 | "cell_type": "code", 935 | "execution_count": null, 936 | "metadata": {}, 937 | "outputs": [], 938 | "source": [ 939 | "# スカラー積\n", 940 | "\n", 941 | "a = np.array([[1,2,3],[4,5,6]])\n", 942 | "b = a * 3\n", 943 | "print(b)" 944 | ] 945 | }, 946 | { 947 | "cell_type": "code", 948 | "execution_count": null, 949 | "metadata": {}, 950 | "outputs": [], 951 | "source": [ 952 | "# スカラー和\n", 953 | "\n", 954 | "a = np.array([[1,2,3],[4,5,6]])\n", 955 | "c = a + 3\n", 956 | "print(c)" 957 | ] 958 | }, 959 | { 960 | "cell_type": "code", 961 | "execution_count": null, 962 | "metadata": {}, 963 | "outputs": [], 964 | "source": [ 965 | "# 行列同士の和\n", 966 | "\n", 967 | "d = a + b\n", 968 | "print(d)" 969 | ] 970 | }, 971 | { 972 | "cell_type": "code", 973 | "execution_count": null, 974 | "metadata": {}, 975 | "outputs": [], 976 | "source": [ 977 | "# 行列と1次元配列の内積\n", 978 | "\n", 979 | "a = np.array([[1,2,3],[4,5,6]])\n", 980 | "x = np.array([3,2,1])\n", 981 | "print(a)\n", 982 | "print(x)" 983 | ] 984 | }, 985 | { 986 | "cell_type": "code", 987 | "execution_count": null, 988 | "metadata": {}, 989 | "outputs": [], 990 | "source": [ 991 | "# '@'による内積\n", 992 | "# 行列とベクトルの積になる\n", 993 | "\n", 994 | "y = a @ x\n", 995 | "print(y)" 996 | ] 997 | }, 998 | { 999 | "cell_type": "code", 1000 | "execution_count": null, 1001 | "metadata": {}, 1002 | "outputs": [], 1003 | "source": [ 1004 | "# * による積\n", 1005 | "# ブロードキャスト機能により要素間の積になる\n", 1006 | "\n", 1007 | "z = a * x\n", 1008 | "print(z)" 1009 | ] 1010 | }, 1011 | { 1012 | "cell_type": "markdown", 1013 | "metadata": {}, 1014 | "source": [ 1015 | "#### 特別な配列の生成" 1016 | ] 1017 | }, 1018 | { 1019 | "cell_type": "code", 1020 | "execution_count": null, 1021 | "metadata": {}, 1022 | "outputs": [], 1023 | "source": [ 1024 | "# 要素数(2,3) すべての要素が0の配列\n", 1025 | "\n", 1026 | "z23 = np.zeros((2,3))\n", 1027 | "print(z23)\n", 1028 | "print(z23.shape)" 1029 | ] 1030 | }, 1031 | { 1032 | "cell_type": "code", 1033 | "execution_count": null, 1034 | "metadata": {}, 1035 | "outputs": [], 1036 | "source": [ 1037 | "# 要素数(2,3) すべての要素が1の配列\n", 1038 | "\n", 1039 | "o23 = np.ones((2,3))\n", 1040 | "print(o23)\n", 1041 | "print(o23.shape)" 1042 | ] 1043 | }, 1044 | { 1045 | "cell_type": "code", 1046 | "execution_count": null, 1047 | "metadata": {}, 1048 | "outputs": [], 1049 | "source": [ 1050 | "# 要素数(2,3) すべての要素が[0,1]間の一様乱数の配列\n", 1051 | "\n", 1052 | "u23 = np.random.rand(2,3)\n", 1053 | "print(u23)\n", 1054 | "print(u23.shape)" 1055 | ] 1056 | }, 1057 | { 1058 | "cell_type": "code", 1059 | "execution_count": null, 1060 | "metadata": {}, 1061 | "outputs": [], 1062 | "source": [ 1063 | "# 要素数(2,3) すべての要素が平均0分散1の正規分布乱数の配列\n", 1064 | "\n", 1065 | "s23 = np.random.randn(2,3)\n", 1066 | "print(s23)\n", 1067 | "print(s23.shape)" 1068 | ] 1069 | }, 1070 | { 1071 | "cell_type": "markdown", 1072 | "metadata": {}, 1073 | "source": [ 1074 | "## 第3部 Matplotlib入門" 1075 | ] 1076 | }, 1077 | { 1078 | "cell_type": "markdown", 1079 | "metadata": {}, 1080 | "source": [ 1081 | "### Matplotlibとは\n", 1082 | "\n", 1083 | "PythonのJupyter Notebook環境でグラフ表示を行うためのライブラリです。 \n", 1084 | "NumpyやPandasとの連携を前提に設計されており、簡潔なコードで様々なグラフ表示を行うことができます。 \n", 1085 | "以下は7章の線形回帰の実習で出てくるMatplotlib呼出しコードの解説です。 \n", 1086 | "(書籍内では説明が省かれています)" 1087 | ] 1088 | }, 1089 | { 1090 | "cell_type": "markdown", 1091 | "metadata": {}, 1092 | "source": [ 1093 | "#### データ準備\n", 1094 | "(解説はch00-13-numpy2.ipynbにあります)" 1095 | ] 1096 | }, 1097 | { 1098 | "cell_type": "code", 1099 | "execution_count": null, 1100 | "metadata": {}, 1101 | "outputs": [], 1102 | "source": [ 1103 | "# 必要ライブラリimport\n", 1104 | "import numpy as np\n", 1105 | "from sklearn.datasets import load_boston\n", 1106 | "\n", 1107 | "boston = load_boston()\n", 1108 | "x_org, yt = boston.data, boston.target\n", 1109 | "feature_names = boston.feature_names\n", 1110 | "\n", 1111 | "x_data = x_org[:,feature_names == 'RM']\n", 1112 | "x = np.insert(x_data, 0, 1.0, axis=1)" 1113 | ] 1114 | }, 1115 | { 1116 | "cell_type": "markdown", 1117 | "metadata": {}, 1118 | "source": [ 1119 | "#### 散布図の表示" 1120 | ] 1121 | }, 1122 | { 1123 | "cell_type": "code", 1124 | "execution_count": null, 1125 | "metadata": {}, 1126 | "outputs": [], 1127 | "source": [ 1128 | "# 必要ライブラリimport\n", 1129 | "%matplotlib inline\n", 1130 | "import matplotlib.pyplot as plt" 1131 | ] 1132 | }, 1133 | { 1134 | "cell_type": "code", 1135 | "execution_count": null, 1136 | "metadata": {}, 1137 | "outputs": [], 1138 | "source": [ 1139 | "# 散布図の表示\n", 1140 | "\n", 1141 | "# 散布図は scatter関数で表示します。\n", 1142 | "# 第一引数 xの値の配列 (例では x[:,1])\n", 1143 | "# 第二引数 yの値の配列 (例ではyt)\n", 1144 | "# s: 点の大きさ (オプション)\n", 1145 | "# c: 色指定 (オプション)\n", 1146 | "plt.scatter(x[:,1], yt, s=10, c='b')\n", 1147 | "\n", 1148 | "# xlabel: x軸のラベル表示\n", 1149 | "plt.xlabel('ROOM', fontsize=14)\n", 1150 | "\n", 1151 | "# ylabel: y軸のラベル表示\n", 1152 | "plt.ylabel('PRICE', fontsize=14)\n", 1153 | "\n", 1154 | "# 画面描画を明示的に指示\n", 1155 | "plt.show()" 1156 | ] 1157 | }, 1158 | { 1159 | "cell_type": "markdown", 1160 | "metadata": {}, 1161 | "source": [ 1162 | "#### グラフ表示\n", 1163 | "7章では学習曲線の表示にplot関数が使われています。 \n", 1164 | "学習曲線の計算は大変なので、代わりに5章で出てくるシグモイド関数のグラフ表示コードを示します。" 1165 | ] 1166 | }, 1167 | { 1168 | "cell_type": "code", 1169 | "execution_count": null, 1170 | "metadata": {}, 1171 | "outputs": [], 1172 | "source": [ 1173 | "# シグモイド関数の定義\n", 1174 | "def sigmoid(x):\n", 1175 | " return 1/(1 + np.exp(-x))" 1176 | ] 1177 | }, 1178 | { 1179 | "cell_type": "code", 1180 | "execution_count": null, 1181 | "metadata": {}, 1182 | "outputs": [], 1183 | "source": [ 1184 | "# xの配列作成\n", 1185 | "# linescapeは与えられた区間の間に等間隔の点を取る関数です。\n", 1186 | "# 下記の例では 区間[-5, 5]の間に101個の点を取ります。\n", 1187 | "\n", 1188 | "x = np.linspace(-5, 5, 101)" 1189 | ] 1190 | }, 1191 | { 1192 | "cell_type": "code", 1193 | "execution_count": null, 1194 | "metadata": {}, 1195 | "outputs": [], 1196 | "source": [ 1197 | "# xの内容表示(一部)\n", 1198 | "print(x[:5])" 1199 | ] 1200 | }, 1201 | { 1202 | "cell_type": "code", 1203 | "execution_count": null, 1204 | "metadata": {}, 1205 | "outputs": [], 1206 | "source": [ 1207 | "# yの配列作成\n", 1208 | "y = sigmoid(x)" 1209 | ] 1210 | }, 1211 | { 1212 | "cell_type": "code", 1213 | "execution_count": null, 1214 | "metadata": {}, 1215 | "outputs": [], 1216 | "source": [ 1217 | "# yの内容表示(一部)\n", 1218 | "print(y[:5])" 1219 | ] 1220 | }, 1221 | { 1222 | "cell_type": "code", 1223 | "execution_count": null, 1224 | "metadata": {}, 1225 | "outputs": [], 1226 | "source": [ 1227 | "# グラフ表示\n", 1228 | "\n", 1229 | "# グラフ表示はplot関数で描画する\n", 1230 | "# 第一引数: xの配列\n", 1231 | "# 第二引数: yの配列\n", 1232 | "# label: グラフのラベル指定(オプション)\n", 1233 | "plt.plot(x, y, label='sigmoid')\n", 1234 | "\n", 1235 | "# グラフ上にグリッド表示を追加\n", 1236 | "plt.grid()\n", 1237 | "\n", 1238 | "# グラフ上に凡例表示を追加\n", 1239 | "plt.legend()\n", 1240 | "\n", 1241 | "# 画面描画を明示的に指示\n", 1242 | "plt.show()" 1243 | ] 1244 | }, 1245 | { 1246 | "cell_type": "code", 1247 | "execution_count": null, 1248 | "metadata": {}, 1249 | "outputs": [], 1250 | "source": [] 1251 | } 1252 | ], 1253 | "metadata": { 1254 | "kernelspec": { 1255 | "display_name": "Python 3", 1256 | "language": "python", 1257 | "name": "python3" 1258 | }, 1259 | "language_info": { 1260 | "codemirror_mode": { 1261 | "name": "ipython", 1262 | "version": 3 1263 | }, 1264 | "file_extension": ".py", 1265 | "mimetype": "text/x-python", 1266 | "name": "python", 1267 | "nbconvert_exporter": "python", 1268 | "pygments_lexer": "ipython3", 1269 | "version": "3.7.3" 1270 | } 1271 | }, 1272 | "nbformat": 4, 1273 | "nbformat_minor": 1 1274 | } 1275 | -------------------------------------------------------------------------------- /notebooks/.ipynb_checkpoints/ch00-11-python-entry-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Python入門" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## 第一部 Python文法1\n", 15 | "変数からタプルまで" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "### 変数の型\n", 23 | "Pythonの変数には**整数型**、**浮動小数点型**、**文字列型**、**ブーリアン型**などがあります。 \n", 24 | "C言語のような型の宣言は不要で、代入された値から自動的に型の設定がされます。 \n", 25 | "変数がどの型を持っているかは、``type``関数で調べることができます。 \n", 26 | "また、変数の値そのものは ``print``関数で表示できます。" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "# 整数型の例\n", 36 | "\n", 37 | "x1 = 2\n", 38 | "print(x1)\n", 39 | "print(type(x1))" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "# 浮動小数点型の例\n", 49 | "\n", 50 | "x2 = 3.0\n", 51 | "print(x2)\n", 52 | "print(type(x2))" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "# 文字列型の例\n", 62 | "\n", 63 | "x3 = 'abc'\n", 64 | "print(x3)\n", 65 | "print(type(x3))" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "# ブーリアン型(True/False)の例\n", 75 | "\n", 76 | "x4 = True\n", 77 | "print(x4)\n", 78 | "print(type(x4))" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "### 演算\n", 86 | "\n", 87 | "数値間の演算は他のプログラム言語同様 ``+, -, *, /`` 等を使います。 \n", 88 | "文字列同士の連結にも ``+`` を使います。" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "# 整数同士の和\n", 98 | "\n", 99 | "x1 = 2\n", 100 | "y1 = 3\n", 101 | "z1 = x1 + y1\n", 102 | "print(z1, type(z1))" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "# 浮動小数点同士の和\n", 112 | "\n", 113 | "x2 = 3.0\n", 114 | "y2 = 2.0\n", 115 | "z2 = x2 + y2\n", 116 | "print(z2, type(z2)) " 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "# 文字列の連結\n", 126 | "\n", 127 | "x3 = 'abc'\n", 128 | "y3 = 'XYZ'\n", 129 | "z3 = x3 + y3\n", 130 | "print(z3, type(z3))" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "# 整数と浮動小数点間の演算\n", 140 | "# 自動的に方が浮動小数点に合わせられる\n", 141 | "\n", 142 | "y4 = x1 + y2\n", 143 | "print(y4, type(y4))" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "#### 比較演算子\n", 151 | "比較演算子は ``==`` です。" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "x1 = 2\n", 161 | "y1 = 3\n", 162 | "w1 = 2\n", 163 | "\n", 164 | "print(w1 == x1)\n", 165 | "print(w1 == y1)" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "#### 論理演算子\n", 173 | "\n", 174 | "論理演算子は ``and``, ``or``, ``not``です。" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "t1 = True\n", 184 | "t2 = True\n", 185 | "f1 = False\n", 186 | "f2 = False" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [ 195 | "# AND演算\n", 196 | "\n", 197 | "p1 = t1 and t2\n", 198 | "p2 = t1 and f2\n", 199 | "print(p1, p2)" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "# OR演算\n", 209 | "\n", 210 | "q1 = t1 or f1\n", 211 | "q2 = f1 or f2\n", 212 | "print(q1, q2)" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "metadata": {}, 219 | "outputs": [], 220 | "source": [ 221 | "# NOT演算\n", 222 | "\n", 223 | "r1 = not t1\n", 224 | "r2 = not f1\n", 225 | "print(r1, r2)" 226 | ] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "metadata": {}, 231 | "source": [ 232 | "### リスト\n", 233 | "\n", 234 | "リストは``[x, y, ...]``のような形式で表現します。 \n", 235 | "第一要素は``list[0]``, 第二要素は``list[1]``で参照します。 \n", 236 | "リストの長さを知りたいときは``len``関数を使います。" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": null, 242 | "metadata": {}, 243 | "outputs": [], 244 | "source": [ 245 | "# リストの定義\n", 246 | "\n", 247 | "list1 = [2, 4, 6, 8, 10, 12, 14]" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": null, 253 | "metadata": {}, 254 | "outputs": [], 255 | "source": [ 256 | "# 型は\"list\"\n", 257 | "\n", 258 | "print(type(list1))" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": null, 264 | "metadata": {}, 265 | "outputs": [], 266 | "source": [ 267 | "# print関数にかけると、全要素が表示される\n", 268 | "\n", 269 | "print(list1)" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "metadata": {}, 276 | "outputs": [], 277 | "source": [ 278 | "# 第一要素の表示\n", 279 | "\n", 280 | "print(list1[0])" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": null, 286 | "metadata": {}, 287 | "outputs": [], 288 | "source": [ 289 | "# 第二要素の表示\n", 290 | "\n", 291 | "print(list1[1])" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": null, 297 | "metadata": {}, 298 | "outputs": [], 299 | "source": [ 300 | "# リストの長さはlen関数で取得\n", 301 | "\n", 302 | "print(len(list1))" 303 | ] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "metadata": {}, 308 | "source": [ 309 | "### 部分リスト\n", 310 | "\n", 311 | "``list[0:3]``のような形式の参照も可能で、 \n", 312 | "この式は第1要素から第3要素までを抜き出した部分リストとなります。 \n", 313 | "**「0以上で3より前」**という読み方をするとわかりやすいです。" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": null, 319 | "metadata": {}, 320 | "outputs": [], 321 | "source": [ 322 | "# list[0]から list[3]より前\n", 323 | "\n", 324 | "print(list1[0:3])" 325 | ] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "metadata": {}, 330 | "source": [ 331 | "#### 部分リストの参照パターン\n", 332 | "\n", 333 | "``list``を元に部分リストを作る方法には、以下のようないろいろなバリエーションがあります。 \n", 334 | "機械学習のコーディングでよく使われるテクニックなので、是非マスターして下さい。" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": null, 340 | "metadata": {}, 341 | "outputs": [], 342 | "source": [ 343 | "# 最初からlist[2]より前\n", 344 | "print(list1[0:2])\n", 345 | "\n", 346 | "# 第一引数を省略すると\"0\"であるとみなされる\n", 347 | "print(list1[:2])" 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": null, 353 | "metadata": {}, 354 | "outputs": [], 355 | "source": [ 356 | "# list[2]から最後まで\n", 357 | "\n", 358 | "len1 = len(list1)\n", 359 | "print(list1[2:len1])\n", 360 | "\n", 361 | "# 第二引数を省略すると最終要素(len(list)))であるとみなされる\n", 362 | "print(list1[2:])" 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": null, 368 | "metadata": {}, 369 | "outputs": [], 370 | "source": [ 371 | "# ':'' だけの場合は、元のリスト全体を表す\n", 372 | "\n", 373 | "print(list1[:])" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": null, 379 | "metadata": {}, 380 | "outputs": [], 381 | "source": [ 382 | "# 後ろから2つ\n", 383 | "\n", 384 | "print(list1[-2:])" 385 | ] 386 | }, 387 | { 388 | "cell_type": "code", 389 | "execution_count": null, 390 | "metadata": {}, 391 | "outputs": [], 392 | "source": [ 393 | "# 後ろの2つを取り除いたもの\n", 394 | "\n", 395 | "print(list1[:-2])" 396 | ] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "execution_count": null, 401 | "metadata": {}, 402 | "outputs": [], 403 | "source": [ 404 | "# 普段は使わないのですが、listには3つめの引数もあり、これを使うとこんなことも可能です\n", 405 | "\n", 406 | "# 一つおき\n", 407 | "\n", 408 | "print(list1[::2])" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": null, 414 | "metadata": {}, 415 | "outputs": [], 416 | "source": [ 417 | "# 第三引数に-1を指定すると「逆順」を意味します\n", 418 | "\n", 419 | "print(list1[::-1])" 420 | ] 421 | }, 422 | { 423 | "cell_type": "markdown", 424 | "metadata": {}, 425 | "source": [ 426 | "#### 異なる要素の混在\n", 427 | "\n", 428 | "リストの各要素は異なる型の混在もできます。" 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": null, 434 | "metadata": {}, 435 | "outputs": [], 436 | "source": [ 437 | "list2 = [1, 2.0, 'abc', False]\n", 438 | "print(list2)" 439 | ] 440 | }, 441 | { 442 | "cell_type": "markdown", 443 | "metadata": {}, 444 | "source": [ 445 | "#### リストに要素の追加\n", 446 | "\n", 447 | "リストに要素を追加したい場合は、``append``関数を使います。" 448 | ] 449 | }, 450 | { 451 | "cell_type": "code", 452 | "execution_count": null, 453 | "metadata": {}, 454 | "outputs": [], 455 | "source": [ 456 | "# 空リストの定義\n", 457 | "list3 = []\n", 458 | "\n", 459 | "# 要素の追加\n", 460 | "list3.append(1)\n", 461 | "list3.append(2.0)\n", 462 | "list3.append('abc')\n", 463 | "print(list3)" 464 | ] 465 | }, 466 | { 467 | "cell_type": "markdown", 468 | "metadata": {}, 469 | "source": [ 470 | "#### リスト間の連結\n", 471 | "\n", 472 | "リストとリストを連結したい場合は、``+``演算子を使います。" 473 | ] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "execution_count": null, 478 | "metadata": {}, 479 | "outputs": [], 480 | "source": [ 481 | "list4 = list2 + list3\n", 482 | "print(list4)" 483 | ] 484 | }, 485 | { 486 | "cell_type": "markdown", 487 | "metadata": {}, 488 | "source": [ 489 | "### タプル\n", 490 | "\n", 491 | "リストと似たデータ型として「タプル」があります。 \n", 492 | "タプルは、値の変更不可能なリストであると考えて下さい。" 493 | ] 494 | }, 495 | { 496 | "cell_type": "code", 497 | "execution_count": null, 498 | "metadata": {}, 499 | "outputs": [], 500 | "source": [ 501 | "# タプルの場合は[]でなく()で要素を囲みます\n", 502 | "\n", 503 | "tap1 = (1,3,5,7)\n", 504 | "print(tap1)\n", 505 | "print(type(tap1))" 506 | ] 507 | }, 508 | { 509 | "cell_type": "code", 510 | "execution_count": null, 511 | "metadata": {}, 512 | "outputs": [], 513 | "source": [ 514 | "# タプルの要素への参照はリスト同様[]で行います\n", 515 | "print(tap1[0], tap1[1])" 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": null, 521 | "metadata": {}, 522 | "outputs": [], 523 | "source": [ 524 | "# 値を変更しようとすると 。。。\n", 525 | "tap1[1] = 10" 526 | ] 527 | }, 528 | { 529 | "cell_type": "code", 530 | "execution_count": null, 531 | "metadata": {}, 532 | "outputs": [], 533 | "source": [ 534 | "# リストの場合、上のこと(要素を後で変更する)は可能です\n", 535 | "\n", 536 | "list7 = [2,4,6,8,10]\n", 537 | "list7[2] = 9\n", 538 | "print(list7)" 539 | ] 540 | }, 541 | { 542 | "cell_type": "markdown", 543 | "metadata": {}, 544 | "source": [ 545 | "## 第二部 Numpy入門1" 546 | ] 547 | }, 548 | { 549 | "cell_type": "markdown", 550 | "metadata": {}, 551 | "source": [ 552 | "### Numpyの特徴\n", 553 | "\n", 554 | "numpy配列を使って、配列計算を行えます。 \n", 555 | "NativeなPythonの機能と比較してNumpyを使うことにより次のメリットがあります。\n", 556 | "\n", 557 | "* 処理速度が早くなる\n", 558 | "* 配列の扱い方が柔軟\n", 559 | "* コードがシンプルになる\n", 560 | "\n", 561 | "より詳しい解説は例えば下記のリンク参照して下さい。 \n", 562 | "[numpyの何がすごいのか?](https://to-kei.net/python/data-analysis/what-is-numpy/)\n" 563 | ] 564 | }, 565 | { 566 | "cell_type": "markdown", 567 | "metadata": {}, 568 | "source": [ 569 | "### 一次元配列" 570 | ] 571 | }, 572 | { 573 | "cell_type": "markdown", 574 | "metadata": {}, 575 | "source": [ 576 | "#### 宣言" 577 | ] 578 | }, 579 | { 580 | "cell_type": "code", 581 | "execution_count": null, 582 | "metadata": {}, 583 | "outputs": [], 584 | "source": [ 585 | "# ライブラリのロード\n", 586 | "import numpy as np\n", 587 | "\n", 588 | "# 一次元配列の作成\n", 589 | "# リスト配列を引数に、array関数でnumpy一次元配列を作ります。\n", 590 | "\n", 591 | "list1 = list(range(2,12,2))\n", 592 | "array1 = np.array(list1)" 593 | ] 594 | }, 595 | { 596 | "cell_type": "code", 597 | "execution_count": null, 598 | "metadata": {}, 599 | "outputs": [], 600 | "source": [ 601 | "# print文による表示結果の比較\n", 602 | "# 画面上はカンマのあるなしで区別します。\n", 603 | "\n", 604 | "print('list配列: ', list1)\n", 605 | "print('numpy配列: ', array1)\n", 606 | "array1" 607 | ] 608 | }, 609 | { 610 | "cell_type": "code", 611 | "execution_count": null, 612 | "metadata": {}, 613 | "outputs": [], 614 | "source": [ 615 | "# データ型(dtype)つき変数作成\n", 616 | "\n", 617 | "# 個々の要素をデータ型付きで定義することも可能です。\n", 618 | "# 型には、以下のようなものがあります。\n", 619 | "#\n", 620 | "# 符号付き整数: int8, int16, int32, int64\n", 621 | "# 符号なし整数: unit8, uint16, uint32, uint64\n", 622 | "# 浮動小数点: float16, float32, float64, float128\n", 623 | "\n", 624 | "array2 = np.array([2, 4, 6, 8,10], dtype=np.int32)\n", 625 | "print(array2)\n", 626 | "\n", 627 | "array2" 628 | ] 629 | }, 630 | { 631 | "cell_type": "code", 632 | "execution_count": null, 633 | "metadata": {}, 634 | "outputs": [], 635 | "source": [ 636 | "# 型は' numpy.ndarray' となります\n", 637 | "\n", 638 | "print(type(array1))" 639 | ] 640 | }, 641 | { 642 | "cell_type": "code", 643 | "execution_count": null, 644 | "metadata": {}, 645 | "outputs": [], 646 | "source": [ 647 | "# 配列の要素数は'shape'という属性で取得可能です\n", 648 | "# 結果はtuppleで返されます\n", 649 | "\n", 650 | "print(array1.shape)" 651 | ] 652 | }, 653 | { 654 | "cell_type": "markdown", 655 | "metadata": {}, 656 | "source": [ 657 | "#### 参照" 658 | ] 659 | }, 660 | { 661 | "cell_type": "code", 662 | "execution_count": null, 663 | "metadata": {}, 664 | "outputs": [], 665 | "source": [ 666 | "# 要素の参照例\n", 667 | "\n", 668 | "# 先頭\n", 669 | "print(array1[0])\n", 670 | "\n", 671 | "# 一番後ろの要素は-1で参照できます\n", 672 | "print(array1[-1])" 673 | ] 674 | }, 675 | { 676 | "cell_type": "code", 677 | "execution_count": null, 678 | "metadata": {}, 679 | "outputs": [], 680 | "source": [ 681 | "# 範囲付き参照\n", 682 | "# このパターンはlist変数と同じです。\n", 683 | "\n", 684 | "# 0以上2未満\n", 685 | "print(array1[:2])\n", 686 | "\n", 687 | "# 2以上\n", 688 | "print(array1[2:])\n", 689 | "\n", 690 | "# 全部\n", 691 | "print(array1[:])" 692 | ] 693 | }, 694 | { 695 | "cell_type": "code", 696 | "execution_count": null, 697 | "metadata": {}, 698 | "outputs": [], 699 | "source": [ 700 | "# こういうアクセス方法も可能です\n", 701 | "# (これはlistではできない)\n", 702 | "\n", 703 | "# array1の0番目、2番目、4番目\n", 704 | "print(array1[[0,2,4]])\n", 705 | "\n", 706 | "# array1の3番目、1番目\n", 707 | "print(array1[[3,1]])" 708 | ] 709 | }, 710 | { 711 | "cell_type": "markdown", 712 | "metadata": {}, 713 | "source": [ 714 | "#### 計算" 715 | ] 716 | }, 717 | { 718 | "cell_type": "code", 719 | "execution_count": null, 720 | "metadata": {}, 721 | "outputs": [], 722 | "source": [ 723 | "# 計算の例\n", 724 | "# numpy配列を対象にすると計算を一気に行うことができます\n", 725 | "\n", 726 | "array1 = np.array(list(range(2,12,2)))\n", 727 | "array3 = np.array(list(range(5)))\n", 728 | "print(array1)\n", 729 | "print(array3)" 730 | ] 731 | }, 732 | { 733 | "cell_type": "code", 734 | "execution_count": null, 735 | "metadata": {}, 736 | "outputs": [], 737 | "source": [ 738 | "# 足し算\n", 739 | "# list変数だとループを回す必要があります\n", 740 | "\n", 741 | "array4 = array1 + array3\n", 742 | "print(array4)" 743 | ] 744 | }, 745 | { 746 | "cell_type": "markdown", 747 | "metadata": {}, 748 | "source": [ 749 | "#### ブロードキャスト機能" 750 | ] 751 | }, 752 | { 753 | "cell_type": "code", 754 | "execution_count": null, 755 | "metadata": {}, 756 | "outputs": [], 757 | "source": [ 758 | "# ブロードキャスト機能\n", 759 | "# サイズの異なる変数同士の演算では、サイズを自動的に合わせて計算します\n", 760 | "\n", 761 | "array5 = array1 + 3\n", 762 | "print(array5)" 763 | ] 764 | }, 765 | { 766 | "cell_type": "code", 767 | "execution_count": null, 768 | "metadata": {}, 769 | "outputs": [], 770 | "source": [ 771 | "# 関数呼び出し\n", 772 | "# numpy関数と組み合わせると、関数呼び出しも全要素分まとめて行えます\n", 773 | "\n", 774 | "# 対数関数の呼出し\n", 775 | "array6 = np.log(array1)\n", 776 | "print(array6)" 777 | ] 778 | }, 779 | { 780 | "cell_type": "markdown", 781 | "metadata": {}, 782 | "source": [ 783 | "#### 特定の条件を満たす要素の抽出" 784 | ] 785 | }, 786 | { 787 | "cell_type": "code", 788 | "execution_count": null, 789 | "metadata": {}, 790 | "outputs": [], 791 | "source": [ 792 | "# ブロードキャスト機能とindex機能を組み合わせてこんなことも可能です\n", 793 | "# arr2 から偶数の要素だけを抜き出す\n", 794 | "\n", 795 | "array3 = np.array(list(range(5)))\n", 796 | "w = (array3 % 2) == 0\n", 797 | "print(w)" 798 | ] 799 | }, 800 | { 801 | "cell_type": "code", 802 | "execution_count": null, 803 | "metadata": {}, 804 | "outputs": [], 805 | "source": [ 806 | "array7 = array3[w]\n", 807 | "print(array7)" 808 | ] 809 | }, 810 | { 811 | "cell_type": "code", 812 | "execution_count": null, 813 | "metadata": {}, 814 | "outputs": [], 815 | "source": [ 816 | "# まとめて書くとこうなります\n", 817 | "\n", 818 | "array8 = array3[(array3 % 2) == 0]\n", 819 | "print(array8)" 820 | ] 821 | }, 822 | { 823 | "cell_type": "markdown", 824 | "metadata": {}, 825 | "source": [ 826 | "#### 内積" 827 | ] 828 | }, 829 | { 830 | "cell_type": "code", 831 | "execution_count": null, 832 | "metadata": {}, 833 | "outputs": [], 834 | "source": [ 835 | "# 内積\n", 836 | "# 記号 '@' を使って内積計算が可能です。\n", 837 | "\n", 838 | "array1 = np.array(list(range(2,12,2)))\n", 839 | "array3 = np.array(list(range(5)))\n", 840 | "print(array1)\n", 841 | "print(array3)\n", 842 | "\n", 843 | "p = array1 @ array3\n", 844 | "print(p)" 845 | ] 846 | }, 847 | { 848 | "cell_type": "markdown", 849 | "metadata": {}, 850 | "source": [ 851 | "### 二次元配列\n", 852 | "\n", 853 | "numpyでは行列のような二次元配列も扱えます。 " 854 | ] 855 | }, 856 | { 857 | "cell_type": "markdown", 858 | "metadata": {}, 859 | "source": [ 860 | "#### 宣言" 861 | ] 862 | }, 863 | { 864 | "cell_type": "code", 865 | "execution_count": null, 866 | "metadata": {}, 867 | "outputs": [], 868 | "source": [ 869 | "# 二次元配列の宣言\n", 870 | "# この場合、引数はlistのlistとなります。\n", 871 | "\n", 872 | "array8 = np.array([[1,2,3,4,5], [6,7,8,9,10],[11,12,13,14,15]])" 873 | ] 874 | }, 875 | { 876 | "cell_type": "code", 877 | "execution_count": null, 878 | "metadata": {}, 879 | "outputs": [], 880 | "source": [ 881 | "# 二次元配列をprint関数にかけるとこのような表示になります\n", 882 | "\n", 883 | "print(array8)" 884 | ] 885 | }, 886 | { 887 | "cell_type": "code", 888 | "execution_count": null, 889 | "metadata": {}, 890 | "outputs": [], 891 | "source": [ 892 | "# 要素数の取得\n", 893 | "\n", 894 | "print(array8.shape)" 895 | ] 896 | }, 897 | { 898 | "cell_type": "markdown", 899 | "metadata": {}, 900 | "source": [ 901 | "#### 参照" 902 | ] 903 | }, 904 | { 905 | "cell_type": "code", 906 | "execution_count": null, 907 | "metadata": {}, 908 | "outputs": [], 909 | "source": [ 910 | "# 要素の参照は「(第一次元引数),(第二次元引数)」の形式\n", 911 | "\n", 912 | "print(array8[1,2])" 913 | ] 914 | }, 915 | { 916 | "cell_type": "code", 917 | "execution_count": null, 918 | "metadata": {}, 919 | "outputs": [], 920 | "source": [ 921 | "# それぞれの次元に対して範囲指定で参照することも可能\n", 922 | "\n", 923 | "print(array8[:2,2:])" 924 | ] 925 | }, 926 | { 927 | "cell_type": "markdown", 928 | "metadata": {}, 929 | "source": [ 930 | "#### 計算" 931 | ] 932 | }, 933 | { 934 | "cell_type": "code", 935 | "execution_count": null, 936 | "metadata": {}, 937 | "outputs": [], 938 | "source": [ 939 | "# スカラー積\n", 940 | "\n", 941 | "a = np.array([[1,2,3],[4,5,6]])\n", 942 | "b = a * 3\n", 943 | "print(b)" 944 | ] 945 | }, 946 | { 947 | "cell_type": "code", 948 | "execution_count": null, 949 | "metadata": {}, 950 | "outputs": [], 951 | "source": [ 952 | "# スカラー和\n", 953 | "\n", 954 | "a = np.array([[1,2,3],[4,5,6]])\n", 955 | "c = a + 3\n", 956 | "print(c)" 957 | ] 958 | }, 959 | { 960 | "cell_type": "code", 961 | "execution_count": null, 962 | "metadata": {}, 963 | "outputs": [], 964 | "source": [ 965 | "# 行列同士の和\n", 966 | "\n", 967 | "d = a + b\n", 968 | "print(d)" 969 | ] 970 | }, 971 | { 972 | "cell_type": "code", 973 | "execution_count": null, 974 | "metadata": {}, 975 | "outputs": [], 976 | "source": [ 977 | "# 行列と1次元配列の内積\n", 978 | "\n", 979 | "a = np.array([[1,2,3],[4,5,6]])\n", 980 | "x = np.array([3,2,1])\n", 981 | "print(a)\n", 982 | "print(x)" 983 | ] 984 | }, 985 | { 986 | "cell_type": "code", 987 | "execution_count": null, 988 | "metadata": {}, 989 | "outputs": [], 990 | "source": [ 991 | "# '@'による内積\n", 992 | "# 行列とベクトルの積になる\n", 993 | "\n", 994 | "y = a @ x\n", 995 | "print(y)" 996 | ] 997 | }, 998 | { 999 | "cell_type": "code", 1000 | "execution_count": null, 1001 | "metadata": {}, 1002 | "outputs": [], 1003 | "source": [ 1004 | "# * による積\n", 1005 | "# ブロードキャスト機能により要素間の積になる\n", 1006 | "\n", 1007 | "z = a * x\n", 1008 | "print(z)" 1009 | ] 1010 | }, 1011 | { 1012 | "cell_type": "markdown", 1013 | "metadata": {}, 1014 | "source": [ 1015 | "#### 特別な配列の生成" 1016 | ] 1017 | }, 1018 | { 1019 | "cell_type": "code", 1020 | "execution_count": null, 1021 | "metadata": {}, 1022 | "outputs": [], 1023 | "source": [ 1024 | "# 要素数(2,3) すべての要素が0の配列\n", 1025 | "\n", 1026 | "z23 = np.zeros((2,3))\n", 1027 | "print(z23)\n", 1028 | "print(z23.shape)" 1029 | ] 1030 | }, 1031 | { 1032 | "cell_type": "code", 1033 | "execution_count": null, 1034 | "metadata": {}, 1035 | "outputs": [], 1036 | "source": [ 1037 | "# 要素数(2,3) すべての要素が1の配列\n", 1038 | "\n", 1039 | "o23 = np.ones((2,3))\n", 1040 | "print(o23)\n", 1041 | "print(o23.shape)" 1042 | ] 1043 | }, 1044 | { 1045 | "cell_type": "code", 1046 | "execution_count": null, 1047 | "metadata": {}, 1048 | "outputs": [], 1049 | "source": [ 1050 | "# 要素数(2,3) すべての要素が[0,1]間の一様乱数の配列\n", 1051 | "\n", 1052 | "u23 = np.random.rand(2,3)\n", 1053 | "print(u23)\n", 1054 | "print(u23.shape)" 1055 | ] 1056 | }, 1057 | { 1058 | "cell_type": "code", 1059 | "execution_count": null, 1060 | "metadata": {}, 1061 | "outputs": [], 1062 | "source": [ 1063 | "# 要素数(2,3) すべての要素が平均0分散1の正規分布乱数の配列\n", 1064 | "\n", 1065 | "s23 = np.random.randn(2,3)\n", 1066 | "print(s23)\n", 1067 | "print(s23.shape)" 1068 | ] 1069 | }, 1070 | { 1071 | "cell_type": "markdown", 1072 | "metadata": {}, 1073 | "source": [ 1074 | "## 第3部 Matplotlib入門" 1075 | ] 1076 | }, 1077 | { 1078 | "cell_type": "markdown", 1079 | "metadata": {}, 1080 | "source": [ 1081 | "### Matplotlibとは\n", 1082 | "\n", 1083 | "PythonのJupyter Notebook環境でグラフ表示を行うためのライブラリです。 \n", 1084 | "NumpyやPandasとの連携を前提に設計されており、簡潔なコードで様々なグラフ表示を行うことができます。 \n", 1085 | "以下は7章の線形回帰の実習で出てくるMatplotlib呼出しコードの解説です。 \n", 1086 | "(書籍内では説明が省かれています)" 1087 | ] 1088 | }, 1089 | { 1090 | "cell_type": "markdown", 1091 | "metadata": {}, 1092 | "source": [ 1093 | "#### データ準備\n", 1094 | "(解説はch00-13-numpy2.ipynbにあります)" 1095 | ] 1096 | }, 1097 | { 1098 | "cell_type": "code", 1099 | "execution_count": null, 1100 | "metadata": {}, 1101 | "outputs": [], 1102 | "source": [ 1103 | "# 必要ライブラリimport\n", 1104 | "import numpy as np\n", 1105 | "from sklearn.datasets import load_boston\n", 1106 | "\n", 1107 | "boston = load_boston()\n", 1108 | "x_org, yt = boston.data, boston.target\n", 1109 | "feature_names = boston.feature_names\n", 1110 | "\n", 1111 | "x_data = x_org[:,feature_names == 'RM']\n", 1112 | "x = np.insert(x_data, 0, 1.0, axis=1)" 1113 | ] 1114 | }, 1115 | { 1116 | "cell_type": "markdown", 1117 | "metadata": {}, 1118 | "source": [ 1119 | "#### 散布図の表示" 1120 | ] 1121 | }, 1122 | { 1123 | "cell_type": "code", 1124 | "execution_count": null, 1125 | "metadata": {}, 1126 | "outputs": [], 1127 | "source": [ 1128 | "# 必要ライブラリimport\n", 1129 | "%matplotlib inline\n", 1130 | "import matplotlib.pyplot as plt" 1131 | ] 1132 | }, 1133 | { 1134 | "cell_type": "code", 1135 | "execution_count": null, 1136 | "metadata": {}, 1137 | "outputs": [], 1138 | "source": [ 1139 | "# 散布図の表示\n", 1140 | "\n", 1141 | "# 散布図は scatter関数で表示します。\n", 1142 | "# 第一引数 xの値の配列 (例では x[:,1])\n", 1143 | "# 第二引数 yの値の配列 (例ではyt)\n", 1144 | "# s: 点の大きさ (オプション)\n", 1145 | "# c: 色指定 (オプション)\n", 1146 | "plt.scatter(x[:,1], yt, s=10, c='b')\n", 1147 | "\n", 1148 | "# xlabel: x軸のラベル表示\n", 1149 | "plt.xlabel('ROOM', fontsize=14)\n", 1150 | "\n", 1151 | "# ylabel: y軸のラベル表示\n", 1152 | "plt.ylabel('PRICE', fontsize=14)\n", 1153 | "\n", 1154 | "# 画面描画を明示的に指示\n", 1155 | "plt.show()" 1156 | ] 1157 | }, 1158 | { 1159 | "cell_type": "markdown", 1160 | "metadata": {}, 1161 | "source": [ 1162 | "#### グラフ表示\n", 1163 | "7章では学習曲線の表示にplot関数が使われています。 \n", 1164 | "学習曲線の計算は大変なので、代わりに5章で出てくるシグモイド関数のグラフ表示コードを示します。" 1165 | ] 1166 | }, 1167 | { 1168 | "cell_type": "code", 1169 | "execution_count": null, 1170 | "metadata": {}, 1171 | "outputs": [], 1172 | "source": [ 1173 | "# シグモイド関数の定義\n", 1174 | "def sigmoid(x):\n", 1175 | " return 1/(1 + np.exp(-x))" 1176 | ] 1177 | }, 1178 | { 1179 | "cell_type": "code", 1180 | "execution_count": null, 1181 | "metadata": {}, 1182 | "outputs": [], 1183 | "source": [ 1184 | "# xの配列作成\n", 1185 | "# linescapeは与えられた区間の間に等間隔の点を取る関数です。\n", 1186 | "# 下記の例では 区間[-5, 5]の間に101個の点を取ります。\n", 1187 | "\n", 1188 | "x = np.linspace(-5, 5, 101)" 1189 | ] 1190 | }, 1191 | { 1192 | "cell_type": "code", 1193 | "execution_count": null, 1194 | "metadata": {}, 1195 | "outputs": [], 1196 | "source": [ 1197 | "# xの内容表示(一部)\n", 1198 | "print(x[:5])" 1199 | ] 1200 | }, 1201 | { 1202 | "cell_type": "code", 1203 | "execution_count": null, 1204 | "metadata": {}, 1205 | "outputs": [], 1206 | "source": [ 1207 | "# yの配列作成\n", 1208 | "y = sigmoid(x)" 1209 | ] 1210 | }, 1211 | { 1212 | "cell_type": "code", 1213 | "execution_count": null, 1214 | "metadata": {}, 1215 | "outputs": [], 1216 | "source": [ 1217 | "# yの内容表示(一部)\n", 1218 | "print(y[:5])" 1219 | ] 1220 | }, 1221 | { 1222 | "cell_type": "code", 1223 | "execution_count": null, 1224 | "metadata": {}, 1225 | "outputs": [], 1226 | "source": [ 1227 | "# グラフ表示\n", 1228 | "\n", 1229 | "# グラフ表示はplot関数で描画する\n", 1230 | "# 第一引数: xの配列\n", 1231 | "# 第二引数: yの配列\n", 1232 | "# label: グラフのラベル指定(オプション)\n", 1233 | "plt.plot(x, y, label='sigmoid')\n", 1234 | "\n", 1235 | "# グラフ上にグリッド表示を追加\n", 1236 | "plt.grid()\n", 1237 | "\n", 1238 | "# グラフ上に凡例表示を追加\n", 1239 | "plt.legend()\n", 1240 | "\n", 1241 | "# 画面描画を明示的に指示\n", 1242 | "plt.show()" 1243 | ] 1244 | }, 1245 | { 1246 | "cell_type": "code", 1247 | "execution_count": null, 1248 | "metadata": {}, 1249 | "outputs": [], 1250 | "source": [] 1251 | } 1252 | ], 1253 | "metadata": { 1254 | "kernelspec": { 1255 | "display_name": "Python 3", 1256 | "language": "python", 1257 | "name": "python3" 1258 | }, 1259 | "language_info": { 1260 | "codemirror_mode": { 1261 | "name": "ipython", 1262 | "version": 3 1263 | }, 1264 | "file_extension": ".py", 1265 | "mimetype": "text/x-python", 1266 | "name": "python", 1267 | "nbconvert_exporter": "python", 1268 | "pygments_lexer": "ipython3", 1269 | "version": "3.7.3" 1270 | } 1271 | }, 1272 | "nbformat": 4, 1273 | "nbformat_minor": 1 1274 | } 1275 | -------------------------------------------------------------------------------- /notebooks/.ipynb_checkpoints/11-python-entry-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Python入門" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## 第一部 Python文法1\n", 15 | "変数からタプルまで" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "### 変数の型\n", 23 | "Pythonの変数には**整数型**、**浮動小数点型**、**文字列型**、**ブーリアン型**などがあります。 \n", 24 | "C言語のような型の宣言は不要で、代入された値から自動的に型の設定がされます。 \n", 25 | "変数がどの型を持っているかは、``type``関数で調べることができます。 \n", 26 | "また、変数の値そのものは ``print``関数で表示できます。" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "# 整数型の例\n", 36 | "\n", 37 | "x1 = 2\n", 38 | "print(x1)\n", 39 | "print(type(x1))" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "# 浮動小数点型の例\n", 49 | "\n", 50 | "x2 = 3.0\n", 51 | "print(x2)\n", 52 | "print(type(x2))" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "# 文字列型の例\n", 62 | "\n", 63 | "x3 = 'abc'\n", 64 | "print(x3)\n", 65 | "print(type(x3))" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "# ブーリアン型(True/False)の例\n", 75 | "\n", 76 | "x4 = True\n", 77 | "print(x4)\n", 78 | "print(type(x4))" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "### 演算\n", 86 | "\n", 87 | "数値間の演算は他のプログラム言語同様 ``+, -, *, /`` 等を使います。 \n", 88 | "文字列同士の連結にも ``+`` を使います。" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "# 整数同士の和\n", 98 | "\n", 99 | "x1 = 2\n", 100 | "y1 = 3\n", 101 | "z1 = x1 + y1\n", 102 | "print(z1, type(z1))" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "# 浮動小数点同士の和\n", 112 | "\n", 113 | "x2 = 3.0\n", 114 | "y2 = 2.0\n", 115 | "z2 = x2 + y2\n", 116 | "print(z2, type(z2)) " 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "# 文字列の連結\n", 126 | "\n", 127 | "x3 = 'abc'\n", 128 | "y3 = 'XYZ'\n", 129 | "z3 = x3 + y3\n", 130 | "print(z3, type(z3))" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "# 整数と浮動小数点間の演算\n", 140 | "# 自動的に方が浮動小数点に合わせられる\n", 141 | "\n", 142 | "y4 = x1 + y2\n", 143 | "print(y4, type(y4))" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "#### 比較演算子\n", 151 | "比較演算子は ``==`` です。" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "x1 = 2\n", 161 | "y1 = 3\n", 162 | "w1 = 2\n", 163 | "\n", 164 | "print(w1 == x1)\n", 165 | "print(w1 == y1)" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "#### 論理演算子\n", 173 | "\n", 174 | "論理演算子は ``and``, ``or``, ``not``です。" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "t1 = True\n", 184 | "t2 = True\n", 185 | "f1 = False\n", 186 | "f2 = False" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [ 195 | "# AND演算\n", 196 | "\n", 197 | "p1 = t1 and t2\n", 198 | "p2 = t1 and f2\n", 199 | "print(p1, p2)" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "# OR演算\n", 209 | "\n", 210 | "q1 = t1 or f1\n", 211 | "q2 = f1 or f2\n", 212 | "print(q1, q2)" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "metadata": {}, 219 | "outputs": [], 220 | "source": [ 221 | "# NOT演算\n", 222 | "\n", 223 | "r1 = not t1\n", 224 | "r2 = not f1\n", 225 | "print(r1, r2)" 226 | ] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "metadata": {}, 231 | "source": [ 232 | "### リスト\n", 233 | "\n", 234 | "リストは``[x, y, ...]``のような形式で表現します。 \n", 235 | "第一要素は``list[0]``, 第二要素は``list[1]``で参照します。 \n", 236 | "リストの長さを知りたいときは``len``関数を使います。" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": null, 242 | "metadata": {}, 243 | "outputs": [], 244 | "source": [ 245 | "# リストの定義\n", 246 | "\n", 247 | "list1 = [2, 4, 6, 8, 10, 12, 14]" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": null, 253 | "metadata": {}, 254 | "outputs": [], 255 | "source": [ 256 | "# 型は\"list\"\n", 257 | "\n", 258 | "print(type(list1))" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": null, 264 | "metadata": {}, 265 | "outputs": [], 266 | "source": [ 267 | "# print関数にかけると、全要素が表示される\n", 268 | "\n", 269 | "print(list1)" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "metadata": {}, 276 | "outputs": [], 277 | "source": [ 278 | "# 第一要素の表示\n", 279 | "\n", 280 | "print(list1[0])" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": null, 286 | "metadata": {}, 287 | "outputs": [], 288 | "source": [ 289 | "# 第二要素の表示\n", 290 | "\n", 291 | "print(list1[1])" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": null, 297 | "metadata": {}, 298 | "outputs": [], 299 | "source": [ 300 | "# リストの長さはlen関数で取得\n", 301 | "\n", 302 | "print(len(list1))" 303 | ] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "metadata": {}, 308 | "source": [ 309 | "### 部分リスト\n", 310 | "\n", 311 | "``list[0:3]``のような形式の参照も可能で、 \n", 312 | "この式は第1要素から第3要素までを抜き出した部分リストとなります。 \n", 313 | "**「0以上で3より前」**という読み方をするとわかりやすいです。" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": null, 319 | "metadata": {}, 320 | "outputs": [], 321 | "source": [ 322 | "# list[0]から list[3]より前\n", 323 | "\n", 324 | "print(list1[0:3])" 325 | ] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "metadata": {}, 330 | "source": [ 331 | "#### 部分リストの参照パターン\n", 332 | "\n", 333 | "``list``を元に部分リストを作る方法には、以下のようないろいろなバリエーションがあります。 \n", 334 | "機械学習のコーディングでよく使われるテクニックなので、是非マスターして下さい。" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": null, 340 | "metadata": {}, 341 | "outputs": [], 342 | "source": [ 343 | "# 最初からlist[2]より前\n", 344 | "print(list1[0:2])\n", 345 | "\n", 346 | "# 第一引数を省略すると\"0\"であるとみなされる\n", 347 | "print(list1[:2])" 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": null, 353 | "metadata": {}, 354 | "outputs": [], 355 | "source": [ 356 | "# list[2]から最後まで\n", 357 | "\n", 358 | "len1 = len(list1)\n", 359 | "print(list1[2:len1])\n", 360 | "\n", 361 | "# 第二引数を省略すると最終要素(len(list)))であるとみなされる\n", 362 | "print(list1[2:])" 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": null, 368 | "metadata": {}, 369 | "outputs": [], 370 | "source": [ 371 | "# ':'' だけの場合は、元のリスト全体を表す\n", 372 | "\n", 373 | "print(list1[:])" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": null, 379 | "metadata": {}, 380 | "outputs": [], 381 | "source": [ 382 | "# 後ろから2つ\n", 383 | "\n", 384 | "print(list1[-2:])" 385 | ] 386 | }, 387 | { 388 | "cell_type": "code", 389 | "execution_count": null, 390 | "metadata": {}, 391 | "outputs": [], 392 | "source": [ 393 | "# 後ろの2つを取り除いたもの\n", 394 | "\n", 395 | "print(list1[:-2])" 396 | ] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "execution_count": null, 401 | "metadata": {}, 402 | "outputs": [], 403 | "source": [ 404 | "# 普段は使わないのですが、listには3つめの引数もあり、これを使うとこんなことも可能です\n", 405 | "\n", 406 | "# 一つおき\n", 407 | "\n", 408 | "print(list1[::2])" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": null, 414 | "metadata": {}, 415 | "outputs": [], 416 | "source": [ 417 | "# 第三引数に-1を指定すると「逆順」を意味します\n", 418 | "\n", 419 | "print(list1[::-1])" 420 | ] 421 | }, 422 | { 423 | "cell_type": "markdown", 424 | "metadata": {}, 425 | "source": [ 426 | "#### 異なる要素の混在\n", 427 | "\n", 428 | "リストの各要素は異なる型の混在もできます。" 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": null, 434 | "metadata": {}, 435 | "outputs": [], 436 | "source": [ 437 | "list2 = [1, 2.0, 'abc', False]\n", 438 | "print(list2)" 439 | ] 440 | }, 441 | { 442 | "cell_type": "markdown", 443 | "metadata": {}, 444 | "source": [ 445 | "#### リストに要素の追加\n", 446 | "\n", 447 | "リストに要素を追加したい場合は、``append``関数を使います。" 448 | ] 449 | }, 450 | { 451 | "cell_type": "code", 452 | "execution_count": null, 453 | "metadata": {}, 454 | "outputs": [], 455 | "source": [ 456 | "# 空リストの定義\n", 457 | "list3 = []\n", 458 | "\n", 459 | "# 要素の追加\n", 460 | "list3.append(1)\n", 461 | "list3.append(2.0)\n", 462 | "list3.append('abc')\n", 463 | "print(list3)" 464 | ] 465 | }, 466 | { 467 | "cell_type": "markdown", 468 | "metadata": {}, 469 | "source": [ 470 | "#### リスト間の連結\n", 471 | "\n", 472 | "リストとリストを連結したい場合は、``+``演算子を使います。" 473 | ] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "execution_count": null, 478 | "metadata": {}, 479 | "outputs": [], 480 | "source": [ 481 | "list4 = list2 + list3\n", 482 | "print(list4)" 483 | ] 484 | }, 485 | { 486 | "cell_type": "markdown", 487 | "metadata": {}, 488 | "source": [ 489 | "### タプル\n", 490 | "\n", 491 | "リストと似たデータ型として「タプル」があります。 \n", 492 | "タプルは、値の変更不可能なリストであると考えて下さい。" 493 | ] 494 | }, 495 | { 496 | "cell_type": "code", 497 | "execution_count": null, 498 | "metadata": {}, 499 | "outputs": [], 500 | "source": [ 501 | "# タプルの場合は[]でなく()で要素を囲みます\n", 502 | "\n", 503 | "tap1 = (1,3,5,7)\n", 504 | "print(tap1)\n", 505 | "print(type(tap1))" 506 | ] 507 | }, 508 | { 509 | "cell_type": "code", 510 | "execution_count": null, 511 | "metadata": {}, 512 | "outputs": [], 513 | "source": [ 514 | "# タプルの要素への参照はリスト同様[]で行います\n", 515 | "print(tap1[0], tap1[1])" 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": null, 521 | "metadata": {}, 522 | "outputs": [], 523 | "source": [ 524 | "# 値を変更しようとすると 。。。\n", 525 | "tap1[1] = 10" 526 | ] 527 | }, 528 | { 529 | "cell_type": "code", 530 | "execution_count": null, 531 | "metadata": {}, 532 | "outputs": [], 533 | "source": [ 534 | "# リストの場合、上のこと(要素を後で変更する)は可能です\n", 535 | "\n", 536 | "list7 = [2,4,6,8,10]\n", 537 | "list7[2] = 9\n", 538 | "print(list7)" 539 | ] 540 | }, 541 | { 542 | "cell_type": "markdown", 543 | "metadata": {}, 544 | "source": [ 545 | "## 第二部 Numpy入門1" 546 | ] 547 | }, 548 | { 549 | "cell_type": "markdown", 550 | "metadata": {}, 551 | "source": [ 552 | "### Numpyの特徴\n", 553 | "\n", 554 | "numpy配列を使って、配列計算を行えます。 \n", 555 | "NativeなPythonの機能と比較してNumpyを使うことにより次のメリットがあります。\n", 556 | "\n", 557 | "* 処理速度が早くなる\n", 558 | "* 配列の扱い方が柔軟\n", 559 | "* コードがシンプルになる\n", 560 | "\n", 561 | "より詳しい解説は例えば下記のリンク参照して下さい。 \n", 562 | "[numpyの何がすごいのか?](https://to-kei.net/python/data-analysis/what-is-numpy/)\n" 563 | ] 564 | }, 565 | { 566 | "cell_type": "markdown", 567 | "metadata": {}, 568 | "source": [ 569 | "### 一次元配列" 570 | ] 571 | }, 572 | { 573 | "cell_type": "markdown", 574 | "metadata": {}, 575 | "source": [ 576 | "#### 宣言" 577 | ] 578 | }, 579 | { 580 | "cell_type": "code", 581 | "execution_count": null, 582 | "metadata": {}, 583 | "outputs": [], 584 | "source": [ 585 | "# ライブラリのロード\n", 586 | "import numpy as np\n", 587 | "\n", 588 | "# 一次元配列の作成\n", 589 | "# リスト配列を引数に、array関数でnumpy一次元配列を作ります。\n", 590 | "\n", 591 | "list1 = list(range(2,12,2))\n", 592 | "array1 = np.array(list1)" 593 | ] 594 | }, 595 | { 596 | "cell_type": "code", 597 | "execution_count": null, 598 | "metadata": {}, 599 | "outputs": [], 600 | "source": [ 601 | "# print文による表示結果の比較\n", 602 | "# 画面上はカンマのあるなしで区別します。\n", 603 | "\n", 604 | "print('list配列: ', list1)\n", 605 | "print('numpy配列: ', array1)\n", 606 | "array1" 607 | ] 608 | }, 609 | { 610 | "cell_type": "code", 611 | "execution_count": null, 612 | "metadata": {}, 613 | "outputs": [], 614 | "source": [ 615 | "# データ型(dtype)つき変数作成\n", 616 | "\n", 617 | "# 個々の要素をデータ型付きで定義することも可能です。\n", 618 | "# 型には、以下のようなものがあります。\n", 619 | "#\n", 620 | "# 符号付き整数: int8, int16, int32, int64\n", 621 | "# 符号なし整数: unit8, uint16, uint32, uint64\n", 622 | "# 浮動小数点: float16, float32, float64, float128\n", 623 | "\n", 624 | "array2 = np.array([2, 4, 6, 8,10], dtype=np.int32)\n", 625 | "print(array2)\n", 626 | "\n", 627 | "array2" 628 | ] 629 | }, 630 | { 631 | "cell_type": "code", 632 | "execution_count": null, 633 | "metadata": {}, 634 | "outputs": [], 635 | "source": [ 636 | "# 型は' numpy.ndarray' となります\n", 637 | "\n", 638 | "print(type(array1))" 639 | ] 640 | }, 641 | { 642 | "cell_type": "code", 643 | "execution_count": null, 644 | "metadata": {}, 645 | "outputs": [], 646 | "source": [ 647 | "# 配列の要素数は'shape'という属性で取得可能です\n", 648 | "# 結果はtuppleで返されます\n", 649 | "\n", 650 | "print(array1.shape)" 651 | ] 652 | }, 653 | { 654 | "cell_type": "markdown", 655 | "metadata": {}, 656 | "source": [ 657 | "#### 参照" 658 | ] 659 | }, 660 | { 661 | "cell_type": "code", 662 | "execution_count": null, 663 | "metadata": {}, 664 | "outputs": [], 665 | "source": [ 666 | "# 要素の参照例\n", 667 | "\n", 668 | "# 先頭\n", 669 | "print(array1[0])\n", 670 | "\n", 671 | "# 一番後ろの要素は-1で参照できます\n", 672 | "print(array1[-1])" 673 | ] 674 | }, 675 | { 676 | "cell_type": "code", 677 | "execution_count": null, 678 | "metadata": {}, 679 | "outputs": [], 680 | "source": [ 681 | "# 範囲付き参照\n", 682 | "# このパターンはlist変数と同じです。\n", 683 | "\n", 684 | "# 0以上2未満\n", 685 | "print(array1[:2])\n", 686 | "\n", 687 | "# 2以上\n", 688 | "print(array1[2:])\n", 689 | "\n", 690 | "# 全部\n", 691 | "print(array1[:])" 692 | ] 693 | }, 694 | { 695 | "cell_type": "code", 696 | "execution_count": null, 697 | "metadata": {}, 698 | "outputs": [], 699 | "source": [ 700 | "# こういうアクセス方法も可能です\n", 701 | "# (これはlistではできない)\n", 702 | "\n", 703 | "# array1の0番目、2番目、4番目\n", 704 | "print(array1[[0,2,4]])\n", 705 | "\n", 706 | "# array1の3番目、1番目\n", 707 | "print(array1[[3,1]])" 708 | ] 709 | }, 710 | { 711 | "cell_type": "markdown", 712 | "metadata": {}, 713 | "source": [ 714 | "#### 計算" 715 | ] 716 | }, 717 | { 718 | "cell_type": "code", 719 | "execution_count": null, 720 | "metadata": {}, 721 | "outputs": [], 722 | "source": [ 723 | "# 計算の例\n", 724 | "# numpy配列を対象にすると計算を一気に行うことができます\n", 725 | "\n", 726 | "array1 = np.array(list(range(2,12,2)))\n", 727 | "array3 = np.array(list(range(5)))\n", 728 | "print(array1)\n", 729 | "print(array3)" 730 | ] 731 | }, 732 | { 733 | "cell_type": "code", 734 | "execution_count": null, 735 | "metadata": {}, 736 | "outputs": [], 737 | "source": [ 738 | "# 足し算\n", 739 | "# list変数だとループを回す必要があります\n", 740 | "\n", 741 | "array4 = array1 + array3\n", 742 | "print(array4)" 743 | ] 744 | }, 745 | { 746 | "cell_type": "markdown", 747 | "metadata": {}, 748 | "source": [ 749 | "#### ブロードキャスト機能" 750 | ] 751 | }, 752 | { 753 | "cell_type": "code", 754 | "execution_count": null, 755 | "metadata": {}, 756 | "outputs": [], 757 | "source": [ 758 | "# ブロードキャスト機能\n", 759 | "# サイズの異なる変数同士の演算では、サイズを自動的に合わせて計算します\n", 760 | "\n", 761 | "array5 = array1 + 3\n", 762 | "print(array5)" 763 | ] 764 | }, 765 | { 766 | "cell_type": "code", 767 | "execution_count": null, 768 | "metadata": {}, 769 | "outputs": [], 770 | "source": [ 771 | "# 関数呼び出し\n", 772 | "# numpy関数と組み合わせると、関数呼び出しも全要素分まとめて行えます\n", 773 | "\n", 774 | "# 対数関数の呼出し\n", 775 | "array6 = np.log(array1)\n", 776 | "print(array6)" 777 | ] 778 | }, 779 | { 780 | "cell_type": "markdown", 781 | "metadata": {}, 782 | "source": [ 783 | "#### 特定の条件を満たす要素の抽出" 784 | ] 785 | }, 786 | { 787 | "cell_type": "code", 788 | "execution_count": null, 789 | "metadata": {}, 790 | "outputs": [], 791 | "source": [ 792 | "# ブロードキャスト機能とindex機能を組み合わせてこんなことも可能です\n", 793 | "# arr2 から偶数の要素だけを抜き出す\n", 794 | "\n", 795 | "array3 = np.array(list(range(5)))\n", 796 | "w = (array3 % 2) == 0\n", 797 | "print(w)" 798 | ] 799 | }, 800 | { 801 | "cell_type": "code", 802 | "execution_count": null, 803 | "metadata": {}, 804 | "outputs": [], 805 | "source": [ 806 | "array7 = array3[w]\n", 807 | "print(array7)" 808 | ] 809 | }, 810 | { 811 | "cell_type": "code", 812 | "execution_count": null, 813 | "metadata": {}, 814 | "outputs": [], 815 | "source": [ 816 | "# まとめて書くとこうなります\n", 817 | "\n", 818 | "array8 = array3[(array3 % 2) == 0]\n", 819 | "print(array8)" 820 | ] 821 | }, 822 | { 823 | "cell_type": "markdown", 824 | "metadata": {}, 825 | "source": [ 826 | "#### 内積" 827 | ] 828 | }, 829 | { 830 | "cell_type": "code", 831 | "execution_count": null, 832 | "metadata": {}, 833 | "outputs": [], 834 | "source": [ 835 | "# 内積\n", 836 | "# 記号 '@' を使って内積計算が可能です。\n", 837 | "\n", 838 | "array1 = np.array(list(range(2,12,2)))\n", 839 | "array3 = np.array(list(range(5)))\n", 840 | "print(array1)\n", 841 | "print(array3)\n", 842 | "\n", 843 | "p = array1 @ array3\n", 844 | "print(p)" 845 | ] 846 | }, 847 | { 848 | "cell_type": "markdown", 849 | "metadata": {}, 850 | "source": [ 851 | "### 二次元配列\n", 852 | "\n", 853 | "numpyでは行列のような二次元配列も扱えます。 " 854 | ] 855 | }, 856 | { 857 | "cell_type": "markdown", 858 | "metadata": {}, 859 | "source": [ 860 | "#### 宣言" 861 | ] 862 | }, 863 | { 864 | "cell_type": "code", 865 | "execution_count": null, 866 | "metadata": {}, 867 | "outputs": [], 868 | "source": [ 869 | "# 二次元配列の宣言\n", 870 | "# この場合、引数はlistのlistとなります。\n", 871 | "\n", 872 | "array8 = np.array([[1,2,3,4,5], [6,7,8,9,10],[11,12,13,14,15]])" 873 | ] 874 | }, 875 | { 876 | "cell_type": "code", 877 | "execution_count": null, 878 | "metadata": {}, 879 | "outputs": [], 880 | "source": [ 881 | "# 二次元配列をprint関数にかけるとこのような表示になります\n", 882 | "\n", 883 | "print(array8)" 884 | ] 885 | }, 886 | { 887 | "cell_type": "code", 888 | "execution_count": null, 889 | "metadata": {}, 890 | "outputs": [], 891 | "source": [ 892 | "# 要素数の取得\n", 893 | "\n", 894 | "print(array8.shape)" 895 | ] 896 | }, 897 | { 898 | "cell_type": "markdown", 899 | "metadata": {}, 900 | "source": [ 901 | "#### 参照" 902 | ] 903 | }, 904 | { 905 | "cell_type": "code", 906 | "execution_count": null, 907 | "metadata": {}, 908 | "outputs": [], 909 | "source": [ 910 | "# 要素の参照例\n", 911 | "\n", 912 | "# 先頭\n", 913 | "print(array1[0])\n", 914 | "\n", 915 | "# 一番後ろの要素は-1で参照できます\n", 916 | "print(array1[-1])" 917 | ] 918 | }, 919 | { 920 | "cell_type": "code", 921 | "execution_count": null, 922 | "metadata": {}, 923 | "outputs": [], 924 | "source": [ 925 | "# 範囲付き参照\n", 926 | "# このパターンはlist変数と同じです。\n", 927 | "\n", 928 | "# 0以上2未満\n", 929 | "print(array1[:2])\n", 930 | "\n", 931 | "# 2以上\n", 932 | "print(array1[2:])\n", 933 | "\n", 934 | "# 全部\n", 935 | "print(array1[:])" 936 | ] 937 | }, 938 | { 939 | "cell_type": "code", 940 | "execution_count": null, 941 | "metadata": {}, 942 | "outputs": [], 943 | "source": [ 944 | "# こういうアクセス方法も可能です\n", 945 | "# (これはlistではできない)\n", 946 | "\n", 947 | "# array1の0番目、2番目、4番目\n", 948 | "print(array1[[0,2,4]])\n", 949 | "\n", 950 | "# array1の3番目、1番目\n", 951 | "print(array1[[3,1]])" 952 | ] 953 | }, 954 | { 955 | "cell_type": "markdown", 956 | "metadata": {}, 957 | "source": [ 958 | "#### 計算" 959 | ] 960 | }, 961 | { 962 | "cell_type": "code", 963 | "execution_count": null, 964 | "metadata": {}, 965 | "outputs": [], 966 | "source": [ 967 | "# 計算の例\n", 968 | "# numpy配列を対象にすると計算を一気に行うことができます\n", 969 | "\n", 970 | "array1 = np.array(list(range(2,12,2)))\n", 971 | "array3 = np.array(list(range(5)))\n", 972 | "print(array1)\n", 973 | "print(array3)" 974 | ] 975 | }, 976 | { 977 | "cell_type": "code", 978 | "execution_count": null, 979 | "metadata": {}, 980 | "outputs": [], 981 | "source": [ 982 | "# 足し算\n", 983 | "# list変数だとループを回す必要があります\n", 984 | "\n", 985 | "array4 = array1 + array3\n", 986 | "print(array4)" 987 | ] 988 | }, 989 | { 990 | "cell_type": "markdown", 991 | "metadata": {}, 992 | "source": [ 993 | "#### ブロードキャスト機能" 994 | ] 995 | }, 996 | { 997 | "cell_type": "code", 998 | "execution_count": null, 999 | "metadata": {}, 1000 | "outputs": [], 1001 | "source": [ 1002 | "# ブロードキャスト機能\n", 1003 | "# サイズの異なる変数同士の演算では、サイズを自動的に合わせて計算します\n", 1004 | "\n", 1005 | "array5 = array1 + 3\n", 1006 | "print(array5)" 1007 | ] 1008 | }, 1009 | { 1010 | "cell_type": "code", 1011 | "execution_count": null, 1012 | "metadata": {}, 1013 | "outputs": [], 1014 | "source": [ 1015 | "# 関数呼び出し\n", 1016 | "# numpy関数と組み合わせると、関数呼び出しも全要素分まとめて行えます\n", 1017 | "\n", 1018 | "# 対数関数の呼出し\n", 1019 | "array6 = np.log(array1)\n", 1020 | "print(array6)" 1021 | ] 1022 | }, 1023 | { 1024 | "cell_type": "markdown", 1025 | "metadata": {}, 1026 | "source": [ 1027 | "#### 特定の条件を満たす要素の抽出" 1028 | ] 1029 | }, 1030 | { 1031 | "cell_type": "code", 1032 | "execution_count": null, 1033 | "metadata": {}, 1034 | "outputs": [], 1035 | "source": [ 1036 | "# ブロードキャスト機能とindex機能を組み合わせてこんなことも可能です\n", 1037 | "# arr2 から偶数の要素だけを抜き出す\n", 1038 | "\n", 1039 | "array3 = np.array(list(range(5)))\n", 1040 | "w = (array3 % 2) == 0\n", 1041 | "print(w)" 1042 | ] 1043 | }, 1044 | { 1045 | "cell_type": "code", 1046 | "execution_count": null, 1047 | "metadata": {}, 1048 | "outputs": [], 1049 | "source": [ 1050 | "array7 = array3[w]\n", 1051 | "print(array7)" 1052 | ] 1053 | }, 1054 | { 1055 | "cell_type": "code", 1056 | "execution_count": null, 1057 | "metadata": {}, 1058 | "outputs": [], 1059 | "source": [ 1060 | "# まとめて書くとこうなります\n", 1061 | "\n", 1062 | "array8 = array3[(array3 % 2) == 0]\n", 1063 | "print(array8)" 1064 | ] 1065 | }, 1066 | { 1067 | "cell_type": "markdown", 1068 | "metadata": {}, 1069 | "source": [ 1070 | "#### 内積" 1071 | ] 1072 | }, 1073 | { 1074 | "cell_type": "code", 1075 | "execution_count": null, 1076 | "metadata": {}, 1077 | "outputs": [], 1078 | "source": [ 1079 | "# 内積\n", 1080 | "# 記号 '@' を使って内積計算が可能です。\n", 1081 | "\n", 1082 | "array1 = np.array(list(range(2,12,2)))\n", 1083 | "array3 = np.array(list(range(5)))\n", 1084 | "print(array1)\n", 1085 | "print(array3)\n", 1086 | "\n", 1087 | "p = array1 @ array3\n", 1088 | "print(p)" 1089 | ] 1090 | }, 1091 | { 1092 | "cell_type": "markdown", 1093 | "metadata": {}, 1094 | "source": [ 1095 | "### 二次元配列\n", 1096 | "\n", 1097 | "numpyでは行列のような二次元配列も扱えます。 " 1098 | ] 1099 | }, 1100 | { 1101 | "cell_type": "markdown", 1102 | "metadata": {}, 1103 | "source": [ 1104 | "#### 宣言" 1105 | ] 1106 | }, 1107 | { 1108 | "cell_type": "code", 1109 | "execution_count": null, 1110 | "metadata": {}, 1111 | "outputs": [], 1112 | "source": [ 1113 | "# 二次元配列の宣言\n", 1114 | "# この場合、引数はlistのlistとなります。\n", 1115 | "\n", 1116 | "array8 = np.array([[1,2,3,4,5], [6,7,8,9,10],[11,12,13,14,15]])" 1117 | ] 1118 | }, 1119 | { 1120 | "cell_type": "code", 1121 | "execution_count": null, 1122 | "metadata": {}, 1123 | "outputs": [], 1124 | "source": [ 1125 | "# 二次元配列をprint関数にかけるとこのような表示になります\n", 1126 | "\n", 1127 | "print(array8)" 1128 | ] 1129 | }, 1130 | { 1131 | "cell_type": "code", 1132 | "execution_count": null, 1133 | "metadata": {}, 1134 | "outputs": [], 1135 | "source": [ 1136 | "# 要素数の取得\n", 1137 | "\n", 1138 | "print(array8.shape)" 1139 | ] 1140 | }, 1141 | { 1142 | "cell_type": "markdown", 1143 | "metadata": {}, 1144 | "source": [ 1145 | "#### 参照" 1146 | ] 1147 | }, 1148 | { 1149 | "cell_type": "code", 1150 | "execution_count": null, 1151 | "metadata": {}, 1152 | "outputs": [], 1153 | "source": [ 1154 | "# 要素の参照は「(第一次元引数),(第二次元引数)」の形式\n", 1155 | "\n", 1156 | "print(array8[1,2])" 1157 | ] 1158 | }, 1159 | { 1160 | "cell_type": "code", 1161 | "execution_count": null, 1162 | "metadata": {}, 1163 | "outputs": [], 1164 | "source": [ 1165 | "# それぞれの次元に対して範囲指定で参照することも可能\n", 1166 | "\n", 1167 | "print(array8[:2,2:])" 1168 | ] 1169 | }, 1170 | { 1171 | "cell_type": "markdown", 1172 | "metadata": {}, 1173 | "source": [ 1174 | "#### 計算" 1175 | ] 1176 | }, 1177 | { 1178 | "cell_type": "code", 1179 | "execution_count": null, 1180 | "metadata": {}, 1181 | "outputs": [], 1182 | "source": [ 1183 | "# スカラー積\n", 1184 | "\n", 1185 | "a = np.array([[1,2,3],[4,5,6]])\n", 1186 | "b = a * 3\n", 1187 | "print(b)" 1188 | ] 1189 | }, 1190 | { 1191 | "cell_type": "code", 1192 | "execution_count": null, 1193 | "metadata": {}, 1194 | "outputs": [], 1195 | "source": [ 1196 | "# スカラー和\n", 1197 | "\n", 1198 | "a = np.array([[1,2,3],[4,5,6]])\n", 1199 | "c = a + 3\n", 1200 | "print(c)" 1201 | ] 1202 | }, 1203 | { 1204 | "cell_type": "code", 1205 | "execution_count": null, 1206 | "metadata": {}, 1207 | "outputs": [], 1208 | "source": [ 1209 | "# 行列同士の和\n", 1210 | "\n", 1211 | "d = a + b\n", 1212 | "print(d)" 1213 | ] 1214 | }, 1215 | { 1216 | "cell_type": "code", 1217 | "execution_count": null, 1218 | "metadata": {}, 1219 | "outputs": [], 1220 | "source": [ 1221 | "# 行列と1次元配列の内積\n", 1222 | "\n", 1223 | "a = np.array([[1,2,3],[4,5,6]])\n", 1224 | "x = np.array([3,2,1])\n", 1225 | "print(a)\n", 1226 | "print(x)" 1227 | ] 1228 | }, 1229 | { 1230 | "cell_type": "code", 1231 | "execution_count": null, 1232 | "metadata": {}, 1233 | "outputs": [], 1234 | "source": [ 1235 | "# '@'による内積\n", 1236 | "# 行列とベクトルの積になる\n", 1237 | "\n", 1238 | "y = a @ x\n", 1239 | "print(y)" 1240 | ] 1241 | }, 1242 | { 1243 | "cell_type": "code", 1244 | "execution_count": null, 1245 | "metadata": {}, 1246 | "outputs": [], 1247 | "source": [ 1248 | "# * による積\n", 1249 | "# ブロードキャスト機能により要素間の積になる\n", 1250 | "\n", 1251 | "z = a * x\n", 1252 | "print(z)" 1253 | ] 1254 | }, 1255 | { 1256 | "cell_type": "markdown", 1257 | "metadata": {}, 1258 | "source": [ 1259 | "#### 特別な配列の生成" 1260 | ] 1261 | }, 1262 | { 1263 | "cell_type": "code", 1264 | "execution_count": null, 1265 | "metadata": {}, 1266 | "outputs": [], 1267 | "source": [ 1268 | "# 要素数(2,3) すべての要素が0の配列\n", 1269 | "\n", 1270 | "z23 = np.zeros((2,3))\n", 1271 | "print(z23)\n", 1272 | "print(z23.shape)" 1273 | ] 1274 | }, 1275 | { 1276 | "cell_type": "code", 1277 | "execution_count": null, 1278 | "metadata": {}, 1279 | "outputs": [], 1280 | "source": [ 1281 | "# 要素数(2,3) すべての要素が1の配列\n", 1282 | "\n", 1283 | "o23 = np.ones((2,3))\n", 1284 | "print(o23)\n", 1285 | "print(o23.shape)" 1286 | ] 1287 | }, 1288 | { 1289 | "cell_type": "code", 1290 | "execution_count": null, 1291 | "metadata": {}, 1292 | "outputs": [], 1293 | "source": [ 1294 | "# 要素数(2,3) すべての要素が[0,1]間の一様乱数の配列\n", 1295 | "\n", 1296 | "u23 = np.random.rand(2,3)\n", 1297 | "print(u23)\n", 1298 | "print(u23.shape)" 1299 | ] 1300 | }, 1301 | { 1302 | "cell_type": "code", 1303 | "execution_count": null, 1304 | "metadata": {}, 1305 | "outputs": [], 1306 | "source": [ 1307 | "# 要素数(2,3) すべての要素が平均0分散1の正規分布乱数の配列\n", 1308 | "\n", 1309 | "s23 = np.random.randn(2,3)\n", 1310 | "print(s23)\n", 1311 | "print(s23.shape)" 1312 | ] 1313 | }, 1314 | { 1315 | "cell_type": "markdown", 1316 | "metadata": {}, 1317 | "source": [ 1318 | "## 第3部 Matplotlib入門" 1319 | ] 1320 | }, 1321 | { 1322 | "cell_type": "markdown", 1323 | "metadata": {}, 1324 | "source": [ 1325 | "### Matplotlibとは\n", 1326 | "\n", 1327 | "PythonのJupyter Notebook環境でグラフ表示を行うためのライブラリです。 \n", 1328 | "NumpyやPandasとの連携を前提に設計されており、簡潔なコードで様々なグラフ表示を行うことができます。 \n", 1329 | "以下は7章の線形回帰の実習で出てくるMatplotlib呼出しコードの解説です。 \n", 1330 | "(書籍内では説明が省かれています)" 1331 | ] 1332 | }, 1333 | { 1334 | "cell_type": "markdown", 1335 | "metadata": {}, 1336 | "source": [ 1337 | "#### データ準備\n", 1338 | "(解説は13-numpy2.ipynbにあります)" 1339 | ] 1340 | }, 1341 | { 1342 | "cell_type": "code", 1343 | "execution_count": null, 1344 | "metadata": {}, 1345 | "outputs": [], 1346 | "source": [ 1347 | "# 必要ライブラリimport\n", 1348 | "import numpy as np\n", 1349 | "from sklearn.datasets import load_boston\n", 1350 | "\n", 1351 | "boston = load_boston()\n", 1352 | "x_org, yt = boston.data, boston.target\n", 1353 | "feature_names = boston.feature_names\n", 1354 | "\n", 1355 | "x_data = x_org[:,feature_names == 'RM']\n", 1356 | "x = np.insert(x_data, 0, 1.0, axis=1)" 1357 | ] 1358 | }, 1359 | { 1360 | "cell_type": "markdown", 1361 | "metadata": {}, 1362 | "source": [ 1363 | "#### 散布図の表示" 1364 | ] 1365 | }, 1366 | { 1367 | "cell_type": "code", 1368 | "execution_count": null, 1369 | "metadata": {}, 1370 | "outputs": [], 1371 | "source": [ 1372 | "# 必要ライブラリimport\n", 1373 | "%matplotlib inline\n", 1374 | "import matplotlib.pyplot as plt" 1375 | ] 1376 | }, 1377 | { 1378 | "cell_type": "code", 1379 | "execution_count": null, 1380 | "metadata": {}, 1381 | "outputs": [], 1382 | "source": [ 1383 | "# 散布図の表示\n", 1384 | "\n", 1385 | "# 散布図は scatter関数で表示します。\n", 1386 | "# 第一引数 xの値の配列 (例では x[:,1])\n", 1387 | "# 第二引数 yの値の配列 (例ではyt)\n", 1388 | "# s: 点の大きさ (オプション)\n", 1389 | "# c: 色指定 (オプション)\n", 1390 | "plt.scatter(x[:,1], yt, s=10, c='b')\n", 1391 | "\n", 1392 | "# xlabel: x軸のラベル表示\n", 1393 | "plt.xlabel('ROOM', fontsize=14)\n", 1394 | "\n", 1395 | "# ylabel: y軸のラベル表示\n", 1396 | "plt.ylabel('PRICE', fontsize=14)\n", 1397 | "\n", 1398 | "# 画面描画を明示的に指示\n", 1399 | "plt.show()" 1400 | ] 1401 | }, 1402 | { 1403 | "cell_type": "markdown", 1404 | "metadata": {}, 1405 | "source": [ 1406 | "#### グラフ表示\n", 1407 | "7章では学習曲線の表示にplot関数が使われています。 \n", 1408 | "学習曲線の計算は大変なので、代わりに5章で出てくるシグモイド関数のグラフ表示コードを示します。" 1409 | ] 1410 | }, 1411 | { 1412 | "cell_type": "code", 1413 | "execution_count": null, 1414 | "metadata": {}, 1415 | "outputs": [], 1416 | "source": [ 1417 | "# シグモイド関数の定義\n", 1418 | "def sigmoid(x):\n", 1419 | " return 1/(1 + np.exp(-x))" 1420 | ] 1421 | }, 1422 | { 1423 | "cell_type": "code", 1424 | "execution_count": null, 1425 | "metadata": {}, 1426 | "outputs": [], 1427 | "source": [ 1428 | "# xの配列作成\n", 1429 | "# linescapeは与えられた区間の間に等間隔の点を取る関数です。\n", 1430 | "# 下記の例では 区間[-5, 5]の間に101個の点を取ります。\n", 1431 | "\n", 1432 | "x = np.linspace(-5, 5, 101)" 1433 | ] 1434 | }, 1435 | { 1436 | "cell_type": "code", 1437 | "execution_count": null, 1438 | "metadata": {}, 1439 | "outputs": [], 1440 | "source": [ 1441 | "# xの内容表示(一部)\n", 1442 | "print(x[:5])" 1443 | ] 1444 | }, 1445 | { 1446 | "cell_type": "code", 1447 | "execution_count": null, 1448 | "metadata": {}, 1449 | "outputs": [], 1450 | "source": [ 1451 | "# yの配列作成\n", 1452 | "y = sigmoid(x)" 1453 | ] 1454 | }, 1455 | { 1456 | "cell_type": "code", 1457 | "execution_count": null, 1458 | "metadata": {}, 1459 | "outputs": [], 1460 | "source": [ 1461 | "# yの内容表示(一部)\n", 1462 | "print(y[:5])" 1463 | ] 1464 | }, 1465 | { 1466 | "cell_type": "code", 1467 | "execution_count": null, 1468 | "metadata": {}, 1469 | "outputs": [], 1470 | "source": [ 1471 | "# グラフ表示\n", 1472 | "\n", 1473 | "# グラフ表示はplot関数で描画する\n", 1474 | "# 第一引数: xの配列\n", 1475 | "# 第二引数: yの配列\n", 1476 | "# label: グラフのラベル指定(オプション)\n", 1477 | "plt.plot(x, y, label='sigmoid')\n", 1478 | "\n", 1479 | "# グラフ上にグリッド表示を追加\n", 1480 | "plt.grid()\n", 1481 | "\n", 1482 | "# グラフ上に凡例表示を追加\n", 1483 | "plt.legend()\n", 1484 | "\n", 1485 | "# 画面描画を明示的に指示\n", 1486 | "plt.show()" 1487 | ] 1488 | }, 1489 | { 1490 | "cell_type": "code", 1491 | "execution_count": null, 1492 | "metadata": {}, 1493 | "outputs": [], 1494 | "source": [] 1495 | } 1496 | ], 1497 | "metadata": { 1498 | "kernelspec": { 1499 | "display_name": "Python 3", 1500 | "language": "python", 1501 | "name": "python3" 1502 | }, 1503 | "language_info": { 1504 | "codemirror_mode": { 1505 | "name": "ipython", 1506 | "version": 3 1507 | }, 1508 | "file_extension": ".py", 1509 | "mimetype": "text/x-python", 1510 | "name": "python", 1511 | "nbconvert_exporter": "python", 1512 | "pygments_lexer": "ipython3", 1513 | "version": "3.7.3" 1514 | } 1515 | }, 1516 | "nbformat": 4, 1517 | "nbformat_minor": 1 1518 | } 1519 | --------------------------------------------------------------------------------