├── stock_price_prediction
└── README.md
├── LICENSE
├── Small_training_large_test.ipynb
├── Best_number_of_neurons.ipynb
├── Your_own_AutoML.ipynb
├── Grid_search_and_random_search.ipynb
├── Feature_selection_via_grid_search_in_supervised_models.ipynb
├── Candlesticks.ipynb
├── Calculation_of_daily_pivot_levels.ipynb
├── Heikin_Ashi.ipynb
└── Threshold.ipynb
/stock_price_prediction/README.md:
--------------------------------------------------------------------------------
1 | # Readme
2 |
3 | These notebooks has been used for the article **Machine learning for stock prediction. A quantitative approach** (https://towardsdatascience.com/machine-learning-for-stock-prediction-a-quantitative-approach-4ca98c0bfb8c)
4 |
5 | ## File list
6 |
7 | * **MSFT.csv** contains the original data of Microsoft stock price as downloaded from Yahoo Finance.
8 | * **stock prediction random split.ipynb** has been used to make the predictions with a random training/test split.
9 | * **stock prediction sequential split.ipynb** has been used for the second part of the article, where a sequential split is discussed.
10 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Gianluca Malato
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Small_training_large_test.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Small training large test.ipynb",
7 | "provenance": [],
8 | "authorship_tag": "ABX9TyNo9EJ1aG207dryxeiQbV6R"
9 | },
10 | "kernelspec": {
11 | "name": "python3",
12 | "display_name": "Python 3"
13 | }
14 | },
15 | "cells": [
16 | {
17 | "cell_type": "code",
18 | "metadata": {
19 | "id": "hD750Qyr8Zoh",
20 | "colab_type": "code",
21 | "colab": {}
22 | },
23 | "source": [
24 | "import numpy as np\n",
25 | "from sklearn.model_selection import train_test_split\n",
26 | "from sklearn.linear_model import LinearRegression\n",
27 | "from sklearn.metrics import mean_squared_error"
28 | ],
29 | "execution_count": 0,
30 | "outputs": []
31 | },
32 | {
33 | "cell_type": "markdown",
34 | "metadata": {
35 | "id": "jpuXIj1UB8qO",
36 | "colab_type": "text"
37 | },
38 | "source": [
39 | "# Simulate a dataset for a linear regression\n",
40 | "\n",
41 | "$$ y = x_1 + x_2 + x_3 + x_4 + \\epsilon $$\n",
42 | "\n",
43 | "The experimental error $\\epsilon$ is normally distributed."
44 | ]
45 | },
46 | {
47 | "cell_type": "code",
48 | "metadata": {
49 | "id": "9uqAnLwNCPRj",
50 | "colab_type": "code",
51 | "colab": {}
52 | },
53 | "source": [
54 | "np.random.seed(0)"
55 | ],
56 | "execution_count": 0,
57 | "outputs": []
58 | },
59 | {
60 | "cell_type": "code",
61 | "metadata": {
62 | "id": "xR-EfcTOAllW",
63 | "colab_type": "code",
64 | "colab": {}
65 | },
66 | "source": [
67 | "X = np.random.normal(size=4000000).reshape(1000000,4)"
68 | ],
69 | "execution_count": 0,
70 | "outputs": []
71 | },
72 | {
73 | "cell_type": "code",
74 | "metadata": {
75 | "id": "boTPEKeqA0ZI",
76 | "colab_type": "code",
77 | "colab": {}
78 | },
79 | "source": [
80 | "y = []\n",
81 | "\n",
82 | "for record in X:\n",
83 | " y.append(np.sum(record) + np.random.normal())\n",
84 | "\n",
85 | "y = np.array(y)"
86 | ],
87 | "execution_count": 0,
88 | "outputs": []
89 | },
90 | {
91 | "cell_type": "markdown",
92 | "metadata": {
93 | "id": "5z2B8V1fCYLw",
94 | "colab_type": "text"
95 | },
96 | "source": [
97 | "## Small test set, large training set"
98 | ]
99 | },
100 | {
101 | "cell_type": "code",
102 | "metadata": {
103 | "id": "HfmW4GJv8ms6",
104 | "colab_type": "code",
105 | "colab": {}
106 | },
107 | "source": [
108 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
109 | ],
110 | "execution_count": 0,
111 | "outputs": []
112 | },
113 | {
114 | "cell_type": "code",
115 | "metadata": {
116 | "id": "EnpH8TJw87cs",
117 | "colab_type": "code",
118 | "outputId": "0cf02b57-16a9-4351-bb31-b01dbdb915c3",
119 | "colab": {
120 | "base_uri": "https://localhost:8080/",
121 | "height": 34
122 | }
123 | },
124 | "source": [
125 | "model = LinearRegression()\n",
126 | "model.fit(X_train,y_train)"
127 | ],
128 | "execution_count": 0,
129 | "outputs": [
130 | {
131 | "output_type": "execute_result",
132 | "data": {
133 | "text/plain": [
134 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
135 | ]
136 | },
137 | "metadata": {
138 | "tags": []
139 | },
140 | "execution_count": 181
141 | }
142 | ]
143 | },
144 | {
145 | "cell_type": "code",
146 | "metadata": {
147 | "id": "CsJIQ6tg9L3f",
148 | "colab_type": "code",
149 | "colab": {}
150 | },
151 | "source": [
152 | "def estimate_error(X_test,y_test):\n",
153 | " n_iter = 100\n",
154 | " np.random.seed(0)\n",
155 | " errors = []\n",
156 | "\n",
157 | " indices = list(range(X_test.shape[0]))\n",
158 | " for i in range(n_iter):\n",
159 | " new_indices = np.random.choice(indices,len(indices),replace=True)\n",
160 | "\n",
161 | " new_X_test = X_test[new_indices]\n",
162 | " new_y_test = y_test[new_indices]\n",
163 | "\n",
164 | " new_y_pred = model.predict(new_X_test)\n",
165 | "\n",
166 | " new_error = np.sqrt(mean_squared_error(new_y_test,new_y_pred))\n",
167 | "\n",
168 | " errors.append(new_error)\n",
169 | "\n",
170 | " return np.mean(errors),np.std(errors)"
171 | ],
172 | "execution_count": 0,
173 | "outputs": []
174 | },
175 | {
176 | "cell_type": "code",
177 | "metadata": {
178 | "id": "ok9eanO4-XmV",
179 | "colab_type": "code",
180 | "outputId": "c01b1886-9077-433b-bcf4-143b28e93252",
181 | "colab": {
182 | "base_uri": "https://localhost:8080/",
183 | "height": 34
184 | }
185 | },
186 | "source": [
187 | "estimate_error(X_test,y_test)"
188 | ],
189 | "execution_count": 0,
190 | "outputs": [
191 | {
192 | "output_type": "execute_result",
193 | "data": {
194 | "text/plain": [
195 | "(1.0028372852013618, 0.0015058423972133183)"
196 | ]
197 | },
198 | "metadata": {
199 | "tags": []
200 | },
201 | "execution_count": 184
202 | }
203 | ]
204 | },
205 | {
206 | "cell_type": "markdown",
207 | "metadata": {
208 | "id": "Rp8tEtTrCcYW",
209 | "colab_type": "text"
210 | },
211 | "source": [
212 | "## Large test set, small training set"
213 | ]
214 | },
215 | {
216 | "cell_type": "code",
217 | "metadata": {
218 | "id": "XU_e4_HE-dr1",
219 | "colab_type": "code",
220 | "colab": {}
221 | },
222 | "source": [
223 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42)"
224 | ],
225 | "execution_count": 0,
226 | "outputs": []
227 | },
228 | {
229 | "cell_type": "code",
230 | "metadata": {
231 | "id": "FQmjqsyE-fZ9",
232 | "colab_type": "code",
233 | "outputId": "9f89ed55-c383-423b-d863-7ec3123fcbb4",
234 | "colab": {
235 | "base_uri": "https://localhost:8080/",
236 | "height": 34
237 | }
238 | },
239 | "source": [
240 | "model = LinearRegression()\n",
241 | "model.fit(X_train,y_train)"
242 | ],
243 | "execution_count": 0,
244 | "outputs": [
245 | {
246 | "output_type": "execute_result",
247 | "data": {
248 | "text/plain": [
249 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
250 | ]
251 | },
252 | "metadata": {
253 | "tags": []
254 | },
255 | "execution_count": 186
256 | }
257 | ]
258 | },
259 | {
260 | "cell_type": "code",
261 | "metadata": {
262 | "id": "6OcToXeO-gzU",
263 | "colab_type": "code",
264 | "outputId": "fce2074b-f39f-4816-ac3b-8be8048e2c66",
265 | "colab": {
266 | "base_uri": "https://localhost:8080/",
267 | "height": 34
268 | }
269 | },
270 | "source": [
271 | "estimate_error(X_test,y_test)"
272 | ],
273 | "execution_count": 0,
274 | "outputs": [
275 | {
276 | "output_type": "execute_result",
277 | "data": {
278 | "text/plain": [
279 | "(1.0007244861649207, 0.000753571738923046)"
280 | ]
281 | },
282 | "metadata": {
283 | "tags": []
284 | },
285 | "execution_count": 187
286 | }
287 | ]
288 | },
289 | {
290 | "cell_type": "code",
291 | "metadata": {
292 | "id": "6OirFqEJDUfD",
293 | "colab_type": "code",
294 | "colab": {}
295 | },
296 | "source": [
297 | ""
298 | ],
299 | "execution_count": 0,
300 | "outputs": []
301 | }
302 | ]
303 | }
--------------------------------------------------------------------------------
/Best_number_of_neurons.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Best number of neurons.ipynb",
7 | "provenance": [],
8 | "authorship_tag": "ABX9TyNOql4Q2htcn/g+xk29cGc0"
9 | },
10 | "kernelspec": {
11 | "name": "python3",
12 | "display_name": "Python 3"
13 | },
14 | "language_info": {
15 | "name": "python"
16 | }
17 | },
18 | "cells": [
19 | {
20 | "cell_type": "code",
21 | "metadata": {
22 | "id": "HBvcmeB5exNU"
23 | },
24 | "source": [
25 | "import numpy as np"
26 | ],
27 | "execution_count": 1,
28 | "outputs": []
29 | },
30 | {
31 | "cell_type": "code",
32 | "metadata": {
33 | "id": "8VrpdnQBe1ml"
34 | },
35 | "source": [
36 | "from sklearn.preprocessing import StandardScaler\n",
37 | "from sklearn.neural_network import MLPClassifier\n",
38 | "from sklearn.datasets import load_breast_cancer\n",
39 | "from sklearn.model_selection import train_test_split, GridSearchCV\n",
40 | "from sklearn.pipeline import Pipeline\n",
41 | "from sklearn.metrics import roc_auc_score"
42 | ],
43 | "execution_count": 10,
44 | "outputs": []
45 | },
46 | {
47 | "cell_type": "code",
48 | "metadata": {
49 | "id": "VOamkPGjfDSr"
50 | },
51 | "source": [
52 | "X,y = load_breast_cancer(return_X_y=True)"
53 | ],
54 | "execution_count": 3,
55 | "outputs": []
56 | },
57 | {
58 | "cell_type": "code",
59 | "metadata": {
60 | "id": "bsyA-VL8fE-z"
61 | },
62 | "source": [
63 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)"
64 | ],
65 | "execution_count": 5,
66 | "outputs": []
67 | },
68 | {
69 | "cell_type": "code",
70 | "metadata": {
71 | "id": "uUJ_Z62-fLJw"
72 | },
73 | "source": [
74 | "model = Pipeline([\n",
75 | " ('scaler',StandardScaler()),\n",
76 | " ('model',MLPClassifier(random_state=0))\n",
77 | "])"
78 | ],
79 | "execution_count": 22,
80 | "outputs": []
81 | },
82 | {
83 | "cell_type": "code",
84 | "metadata": {
85 | "id": "7kRl97p-fOj_"
86 | },
87 | "source": [
88 | "search = GridSearchCV(model,\n",
89 | " {'model__hidden_layer_sizes':[(x,) for x in np.arange(5,100,2)]},\n",
90 | " cv = 5, scoring = \"roc_auc\", verbose=3, n_jobs = -1\n",
91 | " \n",
92 | " )"
93 | ],
94 | "execution_count": 23,
95 | "outputs": []
96 | },
97 | {
98 | "cell_type": "code",
99 | "metadata": {
100 | "colab": {
101 | "base_uri": "https://localhost:8080/"
102 | },
103 | "id": "tu1qJj6zfcTn",
104 | "outputId": "b1c5f43c-bc95-4fef-ae7a-c8973ae6cc64"
105 | },
106 | "source": [
107 | "search.fit(X_train, y_train)"
108 | ],
109 | "execution_count": 24,
110 | "outputs": [
111 | {
112 | "output_type": "stream",
113 | "text": [
114 | "Fitting 5 folds for each of 48 candidates, totalling 240 fits\n"
115 | ],
116 | "name": "stdout"
117 | },
118 | {
119 | "output_type": "stream",
120 | "text": [
121 | "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n",
122 | "[Parallel(n_jobs=-1)]: Done 28 tasks | elapsed: 5.2s\n",
123 | "[Parallel(n_jobs=-1)]: Done 124 tasks | elapsed: 23.3s\n",
124 | "[Parallel(n_jobs=-1)]: Done 240 out of 240 | elapsed: 51.4s finished\n",
125 | "/usr/local/lib/python3.7/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:571: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.\n",
126 | " % self.max_iter, ConvergenceWarning)\n"
127 | ],
128 | "name": "stderr"
129 | },
130 | {
131 | "output_type": "execute_result",
132 | "data": {
133 | "text/plain": [
134 | "GridSearchCV(cv=5, error_score=nan,\n",
135 | " estimator=Pipeline(memory=None,\n",
136 | " steps=[('scaler',\n",
137 | " StandardScaler(copy=True,\n",
138 | " with_mean=True,\n",
139 | " with_std=True)),\n",
140 | " ('model',\n",
141 | " MLPClassifier(activation='relu',\n",
142 | " alpha=0.0001,\n",
143 | " batch_size='auto',\n",
144 | " beta_1=0.9, beta_2=0.999,\n",
145 | " early_stopping=False,\n",
146 | " epsilon=1e-08,\n",
147 | " hidden_layer_sizes=(100,),\n",
148 | " learning_rate='constant',\n",
149 | " learning_rate_init=0.001,\n",
150 | " max_f...\n",
151 | " verbose=False),\n",
152 | " iid='deprecated', n_jobs=-1,\n",
153 | " param_grid={'model__hidden_layer_sizes': [(5,), (7,), (9,), (11,),\n",
154 | " (13,), (15,), (17,),\n",
155 | " (19,), (21,), (23,),\n",
156 | " (25,), (27,), (29,),\n",
157 | " (31,), (33,), (35,),\n",
158 | " (37,), (39,), (41,),\n",
159 | " (43,), (45,), (47,),\n",
160 | " (49,), (51,), (53,),\n",
161 | " (55,), (57,), (59,),\n",
162 | " (61,), (63,), ...]},\n",
163 | " pre_dispatch='2*n_jobs', refit=True, return_train_score=False,\n",
164 | " scoring='roc_auc', verbose=3)"
165 | ]
166 | },
167 | "metadata": {
168 | "tags": []
169 | },
170 | "execution_count": 24
171 | }
172 | ]
173 | },
174 | {
175 | "cell_type": "code",
176 | "metadata": {
177 | "colab": {
178 | "base_uri": "https://localhost:8080/"
179 | },
180 | "id": "-2f7omgmfdzk",
181 | "outputId": "e83191c7-03bb-4582-f8c3-54b02872071f"
182 | },
183 | "source": [
184 | "search.best_score_"
185 | ],
186 | "execution_count": 25,
187 | "outputs": [
188 | {
189 | "output_type": "execute_result",
190 | "data": {
191 | "text/plain": [
192 | "0.9947175348495965"
193 | ]
194 | },
195 | "metadata": {
196 | "tags": []
197 | },
198 | "execution_count": 25
199 | }
200 | ]
201 | },
202 | {
203 | "cell_type": "code",
204 | "metadata": {
205 | "colab": {
206 | "base_uri": "https://localhost:8080/"
207 | },
208 | "id": "3MHVzATmfgpG",
209 | "outputId": "db2a38d8-5919-4fd9-bb89-f5389d61df92"
210 | },
211 | "source": [
212 | "search.best_params_"
213 | ],
214 | "execution_count": 26,
215 | "outputs": [
216 | {
217 | "output_type": "execute_result",
218 | "data": {
219 | "text/plain": [
220 | "{'model__hidden_layer_sizes': (75,)}"
221 | ]
222 | },
223 | "metadata": {
224 | "tags": []
225 | },
226 | "execution_count": 26
227 | }
228 | ]
229 | },
230 | {
231 | "cell_type": "code",
232 | "metadata": {
233 | "colab": {
234 | "base_uri": "https://localhost:8080/"
235 | },
236 | "id": "jkl5w4osfumy",
237 | "outputId": "cfc7a270-3614-4ed9-a5c3-09ac67362dab"
238 | },
239 | "source": [
240 | "roc_auc_score(y_test,search.predict_proba(X_test)[:,1])"
241 | ],
242 | "execution_count": 27,
243 | "outputs": [
244 | {
245 | "output_type": "execute_result",
246 | "data": {
247 | "text/plain": [
248 | "0.9982730973233008"
249 | ]
250 | },
251 | "metadata": {
252 | "tags": []
253 | },
254 | "execution_count": 27
255 | }
256 | ]
257 | },
258 | {
259 | "cell_type": "code",
260 | "metadata": {
261 | "id": "wyS06oJff5cV"
262 | },
263 | "source": [
264 | ""
265 | ],
266 | "execution_count": null,
267 | "outputs": []
268 | }
269 | ]
270 | }
--------------------------------------------------------------------------------
/Your_own_AutoML.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Your own AutoML.ipynb",
7 | "provenance": [],
8 | "authorship_tag": "ABX9TyPPrHBEcAK/MW/yuy8vwq/s"
9 | },
10 | "kernelspec": {
11 | "name": "python3",
12 | "display_name": "Python 3"
13 | }
14 | },
15 | "cells": [
16 | {
17 | "cell_type": "code",
18 | "metadata": {
19 | "id": "6g-tZZaY0mmq"
20 | },
21 | "source": [
22 | "import numpy as np\n",
23 | "import pandas as pd\n",
24 | "from sklearn.model_selection import train_test_split\n",
25 | "from sklearn.impute import SimpleImputer\n",
26 | "from sklearn.compose import ColumnTransformer, make_column_selector\n",
27 | "\n",
28 | "from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler, OneHotEncoder\n",
29 | "from sklearn.feature_selection import SelectKBest, f_classif\n",
30 | "from sklearn.model_selection import RandomizedSearchCV\n",
31 | "from sklearn.pipeline import Pipeline\n",
32 | "from sklearn.linear_model import LogisticRegression\n",
33 | "from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n",
34 | "from sklearn.neighbors import KNeighborsClassifier\n",
35 | "from sklearn.tree import DecisionTreeClassifier\n",
36 | "from sklearn.svm import LinearSVC,SVC\n",
37 | "from sklearn.datasets import load_breast_cancer\n",
38 | "from sklearn.metrics import balanced_accuracy_score"
39 | ],
40 | "execution_count": 31,
41 | "outputs": []
42 | },
43 | {
44 | "cell_type": "code",
45 | "metadata": {
46 | "id": "-qcc4K-M8nTI"
47 | },
48 | "source": [
49 | "class MyAutoMLClassifier:\n",
50 | " def __init__(self, scoring_function = 'balanced_accuracy', n_iter = 50):\n",
51 | " self.scoring_function = scoring_function\n",
52 | " self.n_iter = n_iter\n",
53 | " \n",
54 | " def fit(self,X,y):\n",
55 | " X_train = X\n",
56 | " y_train = y\n",
57 | "\n",
58 | " categorical_values = []\n",
59 | "\n",
60 | " cat_subset = X_train.select_dtypes(include = ['object','category','bool'])\n",
61 | "\n",
62 | " for i in range(cat_subset.shape[1]):\n",
63 | " categorical_values.append(list(cat_subset.iloc[:,i].dropna().unique()))\n",
64 | "\n",
65 | " num_pipeline = Pipeline([\n",
66 | " ('cleaner',SimpleImputer()),\n",
67 | " ('scaler',StandardScaler())\n",
68 | " ])\n",
69 | "\n",
70 | " cat_pipeline = Pipeline([\n",
71 | " ('cleaner',SimpleImputer(strategy = 'most_frequent')),\n",
72 | " ('encoder',OneHotEncoder(sparse = False, categories=categorical_values))\n",
73 | " ])\n",
74 | "\n",
75 | "\n",
76 | " preprocessor = ColumnTransformer([\n",
77 | " ('numerical', num_pipeline, make_column_selector(dtype_exclude=['object','category','bool'])),\n",
78 | " ('categorical', cat_pipeline, make_column_selector(dtype_include=['object','category','bool']))\n",
79 | " ])\n",
80 | "\n",
81 | " model_pipeline_steps = []\n",
82 | " model_pipeline_steps.append(('preprocessor',preprocessor))\n",
83 | " model_pipeline_steps.append(('feature_selector',SelectKBest(f_classif,k='all')))\n",
84 | " model_pipeline_steps.append(('estimator',LogisticRegression()))\n",
85 | " model_pipeline = Pipeline(model_pipeline_steps)\n",
86 | "\n",
87 | " total_features = preprocessor.fit_transform(X_train).shape[1]\n",
88 | "\n",
89 | " optimization_grid = []\n",
90 | "\n",
91 | " # Logistic regression\n",
92 | " optimization_grid.append({\n",
93 | " 'preprocessor__numerical__scaler':[RobustScaler(),StandardScaler(),MinMaxScaler()],\n",
94 | " 'preprocessor__numerical__cleaner__strategy':['mean','median'],\n",
95 | " 'feature_selector__k': list(np.arange(1,total_features,5)) + ['all'],\n",
96 | " 'estimator':[LogisticRegression()]\n",
97 | " })\n",
98 | "\n",
99 | " # K-nearest neighbors\n",
100 | " optimization_grid.append({\n",
101 | " 'preprocessor__numerical__scaler':[RobustScaler(),StandardScaler(),MinMaxScaler()],\n",
102 | " 'preprocessor__numerical__cleaner__strategy':['mean','median'],\n",
103 | " 'feature_selector__k': list(np.arange(1,total_features,5)) + ['all'],\n",
104 | " 'estimator':[KNeighborsClassifier()],\n",
105 | " 'estimator__weights':['uniform','distance'],\n",
106 | " 'estimator__n_neighbors':np.arange(1,20,1)\n",
107 | " })\n",
108 | "\n",
109 | " # Random Forest\n",
110 | " optimization_grid.append({\n",
111 | " 'preprocessor__numerical__scaler':[None],\n",
112 | " 'preprocessor__numerical__cleaner__strategy':['mean','median'],\n",
113 | " 'feature_selector__k': list(np.arange(1,total_features,5)) + ['all'],\n",
114 | " 'estimator':[RandomForestClassifier(random_state=0)],\n",
115 | " 'estimator__n_estimators':np.arange(5,500,10),\n",
116 | " 'estimator__criterion':['gini','entropy']\n",
117 | " })\n",
118 | "\n",
119 | "\n",
120 | " # Gradient boosting\n",
121 | " optimization_grid.append({\n",
122 | " 'preprocessor__numerical__scaler':[None],\n",
123 | " 'preprocessor__numerical__cleaner__strategy':['mean','median'],\n",
124 | " 'feature_selector__k': list(np.arange(1,total_features,5)) + ['all'],\n",
125 | " 'estimator':[GradientBoostingClassifier(random_state=0)],\n",
126 | " 'estimator__n_estimators':np.arange(5,500,10),\n",
127 | " 'estimator__learning_rate':np.linspace(0.1,0.9,20),\n",
128 | " })\n",
129 | "\n",
130 | "\n",
131 | "\n",
132 | " # Decision tree\n",
133 | " optimization_grid.append({\n",
134 | " 'preprocessor__numerical__scaler':[None],\n",
135 | " 'preprocessor__numerical__cleaner__strategy':['mean','median'],\n",
136 | " 'feature_selector__k': list(np.arange(1,total_features,5)) + ['all'],\n",
137 | " 'estimator':[DecisionTreeClassifier(random_state=0)],\n",
138 | " 'estimator__criterion':['gini','entropy']\n",
139 | " })\n",
140 | "\n",
141 | " # Linear SVM\n",
142 | " optimization_grid.append({\n",
143 | " 'preprocessor__numerical__scaler':[RobustScaler(),StandardScaler(),MinMaxScaler()],\n",
144 | " 'preprocessor__numerical__cleaner__strategy':['mean','median'],\n",
145 | " 'feature_selector__k': list(np.arange(1,total_features,5)) + ['all'],\n",
146 | " 'estimator':[LinearSVC(random_state = 0)],\n",
147 | " 'estimator__C': np.arange(0.1,1,0.1),\n",
148 | " \n",
149 | " })\n",
150 | "\n",
151 | " search = RandomizedSearchCV(\n",
152 | " model_pipeline,\n",
153 | " optimization_grid,\n",
154 | " n_iter=self.n_iter,\n",
155 | " scoring = self.scoring_function, \n",
156 | " n_jobs = -1, \n",
157 | " random_state = 0, \n",
158 | " verbose = 3,\n",
159 | " cv = 5\n",
160 | " )\n",
161 | "\n",
162 | " search.fit(X_train, y_train)\n",
163 | " self.best_estimator_ = search.best_estimator_\n",
164 | " self.best_pipeline = search.best_params_\n",
165 | " \n",
166 | "\n",
167 | " \n",
168 | " def predict(self,X,y = None):\n",
169 | " return self.best_estimator_.predict(X)\n",
170 | "\n",
171 | " def predict_proba(self,X,y = None):\n",
172 | " return self.best_estimator_.predict_proba(X)"
173 | ],
174 | "execution_count": 32,
175 | "outputs": []
176 | },
177 | {
178 | "cell_type": "code",
179 | "metadata": {
180 | "id": "yIJERMw9FPtf"
181 | },
182 | "source": [
183 | "d = load_breast_cancer()\n",
184 | "y = d['target']\n",
185 | "X = pd.DataFrame(d['data'],columns = d['feature_names'])\n",
186 | "\n",
187 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)\n",
188 | "\n",
189 | "model = MyAutoMLClassifier()\n",
190 | "model.fit(X_train,y_train)"
191 | ],
192 | "execution_count": 33,
193 | "outputs": []
194 | },
195 | {
196 | "cell_type": "code",
197 | "metadata": {
198 | "id": "_fR1Xa8H-TzW",
199 | "colab": {
200 | "base_uri": "https://localhost:8080/"
201 | },
202 | "outputId": "0379f05e-5607-4061-f30b-a939254a845f"
203 | },
204 | "source": [
205 | "balanced_accuracy_score(y_test, model.predict(X_test))"
206 | ],
207 | "execution_count": 37,
208 | "outputs": [
209 | {
210 | "output_type": "execute_result",
211 | "data": {
212 | "text/plain": [
213 | "0.9428271863821389"
214 | ]
215 | },
216 | "metadata": {
217 | "tags": []
218 | },
219 | "execution_count": 37
220 | }
221 | ]
222 | },
223 | {
224 | "cell_type": "code",
225 | "metadata": {
226 | "id": "UQjT-9OE_c-L",
227 | "colab": {
228 | "base_uri": "https://localhost:8080/"
229 | },
230 | "outputId": "ca94334f-d9eb-4447-9dd5-653bfa0adff3"
231 | },
232 | "source": [
233 | "model.best_pipeline"
234 | ],
235 | "execution_count": 38,
236 | "outputs": [
237 | {
238 | "output_type": "execute_result",
239 | "data": {
240 | "text/plain": [
241 | "{'estimator': GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,\n",
242 | " learning_rate=0.1, loss='deviance', max_depth=3,\n",
243 | " max_features=None, max_leaf_nodes=None,\n",
244 | " min_impurity_decrease=0.0, min_impurity_split=None,\n",
245 | " min_samples_leaf=1, min_samples_split=2,\n",
246 | " min_weight_fraction_leaf=0.0, n_estimators=125,\n",
247 | " n_iter_no_change=None, presort='deprecated',\n",
248 | " random_state=0, subsample=1.0, tol=0.0001,\n",
249 | " validation_fraction=0.1, verbose=0,\n",
250 | " warm_start=False),\n",
251 | " 'estimator__learning_rate': 0.1,\n",
252 | " 'estimator__n_estimators': 125,\n",
253 | " 'feature_selector__k': 'all',\n",
254 | " 'preprocessor__numerical__cleaner__strategy': 'median',\n",
255 | " 'preprocessor__numerical__scaler': None}"
256 | ]
257 | },
258 | "metadata": {
259 | "tags": []
260 | },
261 | "execution_count": 38
262 | }
263 | ]
264 | },
265 | {
266 | "cell_type": "code",
267 | "metadata": {
268 | "id": "9iFNVZjeACCv"
269 | },
270 | "source": [
271 | ""
272 | ],
273 | "execution_count": null,
274 | "outputs": []
275 | }
276 | ]
277 | }
--------------------------------------------------------------------------------
/Grid_search_and_random_search.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Grid search and random search.ipynb",
7 | "provenance": [],
8 | "authorship_tag": "ABX9TyOQO6uVD2Sp+a9ND0Th2m6s",
9 | "include_colab_link": true
10 | },
11 | "kernelspec": {
12 | "name": "python3",
13 | "display_name": "Python 3"
14 | },
15 | "language_info": {
16 | "name": "python"
17 | }
18 | },
19 | "cells": [
20 | {
21 | "cell_type": "markdown",
22 | "metadata": {
23 | "id": "view-in-github",
24 | "colab_type": "text"
25 | },
26 | "source": [
27 | "
"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "metadata": {
33 | "id": "STKneFuLKwr9"
34 | },
35 | "source": [
36 | "from sklearn.datasets import load_diabetes\n",
37 | "from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split\n",
38 | "from sklearn.ensemble import RandomForestRegressor\n",
39 | "import numpy as np"
40 | ],
41 | "execution_count": 4,
42 | "outputs": []
43 | },
44 | {
45 | "cell_type": "code",
46 | "metadata": {
47 | "id": "IZtjhe61K_su"
48 | },
49 | "source": [
50 | "X,y = load_diabetes(return_X_y=True)"
51 | ],
52 | "execution_count": 2,
53 | "outputs": []
54 | },
55 | {
56 | "cell_type": "code",
57 | "metadata": {
58 | "id": "21FNqq7oLBf2"
59 | },
60 | "source": [
61 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)"
62 | ],
63 | "execution_count": 3,
64 | "outputs": []
65 | },
66 | {
67 | "cell_type": "code",
68 | "metadata": {
69 | "colab": {
70 | "base_uri": "https://localhost:8080/"
71 | },
72 | "id": "TQHJqViqLEn3",
73 | "outputId": "83e6de7c-84b1-4683-aafd-b8e49610c889"
74 | },
75 | "source": [
76 | "grid_search = GridSearchCV(RandomForestRegressor(random_state=0),\n",
77 | " {\n",
78 | " 'n_estimators':np.arange(5,100,5),\n",
79 | " 'max_features':np.arange(0.1,1.0,0.05),\n",
80 | " \n",
81 | " },cv=5, scoring=\"r2\",verbose=1,n_jobs=-1\n",
82 | " )\n",
83 | "grid_search.fit(X_train,y_train)"
84 | ],
85 | "execution_count": 6,
86 | "outputs": [
87 | {
88 | "output_type": "stream",
89 | "text": [
90 | "Fitting 5 folds for each of 342 candidates, totalling 1710 fits\n"
91 | ],
92 | "name": "stdout"
93 | },
94 | {
95 | "output_type": "stream",
96 | "text": [
97 | "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n",
98 | "[Parallel(n_jobs=-1)]: Done 128 tasks | elapsed: 7.6s\n",
99 | "[Parallel(n_jobs=-1)]: Done 728 tasks | elapsed: 46.5s\n",
100 | "[Parallel(n_jobs=-1)]: Done 1710 out of 1710 | elapsed: 2.0min finished\n"
101 | ],
102 | "name": "stderr"
103 | },
104 | {
105 | "output_type": "execute_result",
106 | "data": {
107 | "text/plain": [
108 | "GridSearchCV(cv=5, error_score=nan,\n",
109 | " estimator=RandomForestRegressor(bootstrap=True, ccp_alpha=0.0,\n",
110 | " criterion='mse', max_depth=None,\n",
111 | " max_features='auto',\n",
112 | " max_leaf_nodes=None,\n",
113 | " max_samples=None,\n",
114 | " min_impurity_decrease=0.0,\n",
115 | " min_impurity_split=None,\n",
116 | " min_samples_leaf=1,\n",
117 | " min_samples_split=2,\n",
118 | " min_weight_fraction_leaf=0.0,\n",
119 | " n_estimators=100, n_jobs=None,\n",
120 | " oob_score=False, random_state=0,\n",
121 | " verbose=0, warm_start=False),\n",
122 | " iid='deprecated', n_jobs=-1,\n",
123 | " param_grid={'max_features': array([0.1 , 0.15, 0.2 , 0.25, 0.3 , 0.35, 0.4 , 0.45, 0.5 , 0.55, 0.6 ,\n",
124 | " 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95]),\n",
125 | " 'n_estimators': array([ 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85,\n",
126 | " 90, 95])},\n",
127 | " pre_dispatch='2*n_jobs', refit=True, return_train_score=False,\n",
128 | " scoring='r2', verbose=1)"
129 | ]
130 | },
131 | "metadata": {
132 | "tags": []
133 | },
134 | "execution_count": 6
135 | }
136 | ]
137 | },
138 | {
139 | "cell_type": "code",
140 | "metadata": {
141 | "colab": {
142 | "base_uri": "https://localhost:8080/"
143 | },
144 | "id": "_Mf5r9tRL6DT",
145 | "outputId": "e6f81ce7-cb95-4489-99e8-160643eb5d61"
146 | },
147 | "source": [
148 | "grid_search.best_params_"
149 | ],
150 | "execution_count": 7,
151 | "outputs": [
152 | {
153 | "output_type": "execute_result",
154 | "data": {
155 | "text/plain": [
156 | "{'max_features': 0.5000000000000001, 'n_estimators': 90}"
157 | ]
158 | },
159 | "metadata": {
160 | "tags": []
161 | },
162 | "execution_count": 7
163 | }
164 | ]
165 | },
166 | {
167 | "cell_type": "code",
168 | "metadata": {
169 | "colab": {
170 | "base_uri": "https://localhost:8080/"
171 | },
172 | "id": "fTVYMK4PL8RS",
173 | "outputId": "ec4da40f-a6e6-4733-f5fb-4a794a3c9340"
174 | },
175 | "source": [
176 | "grid_search.best_score_"
177 | ],
178 | "execution_count": 8,
179 | "outputs": [
180 | {
181 | "output_type": "execute_result",
182 | "data": {
183 | "text/plain": [
184 | "0.4153440140065655"
185 | ]
186 | },
187 | "metadata": {
188 | "tags": []
189 | },
190 | "execution_count": 8
191 | }
192 | ]
193 | },
194 | {
195 | "cell_type": "code",
196 | "metadata": {
197 | "colab": {
198 | "base_uri": "https://localhost:8080/"
199 | },
200 | "id": "hegQ2NlZLlp8",
201 | "outputId": "b60f0b43-8440-46f1-d0c7-0216105b9566"
202 | },
203 | "source": [
204 | "random_search = RandomizedSearchCV(RandomForestRegressor(random_state=0),\n",
205 | " {\n",
206 | " 'n_estimators':np.arange(5,100,5),\n",
207 | " 'max_features':np.arange(0.1,1.0,0.05),\n",
208 | " },cv=5, scoring=\"r2\",verbose=1,n_jobs=-1, n_iter=50, random_state = 0\n",
209 | " )\n",
210 | "random_search.fit(X_train,y_train)"
211 | ],
212 | "execution_count": 9,
213 | "outputs": [
214 | {
215 | "output_type": "stream",
216 | "text": [
217 | "Fitting 5 folds for each of 50 candidates, totalling 250 fits\n"
218 | ],
219 | "name": "stdout"
220 | },
221 | {
222 | "output_type": "stream",
223 | "text": [
224 | "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n",
225 | "[Parallel(n_jobs=-1)]: Done 54 tasks | elapsed: 4.8s\n",
226 | "[Parallel(n_jobs=-1)]: Done 250 out of 250 | elapsed: 20.3s finished\n"
227 | ],
228 | "name": "stderr"
229 | },
230 | {
231 | "output_type": "execute_result",
232 | "data": {
233 | "text/plain": [
234 | "RandomizedSearchCV(cv=5, error_score=nan,\n",
235 | " estimator=RandomForestRegressor(bootstrap=True,\n",
236 | " ccp_alpha=0.0,\n",
237 | " criterion='mse',\n",
238 | " max_depth=None,\n",
239 | " max_features='auto',\n",
240 | " max_leaf_nodes=None,\n",
241 | " max_samples=None,\n",
242 | " min_impurity_decrease=0.0,\n",
243 | " min_impurity_split=None,\n",
244 | " min_samples_leaf=1,\n",
245 | " min_samples_split=2,\n",
246 | " min_weight_fraction_leaf=0.0,\n",
247 | " n_estimators=100,\n",
248 | " n_jobs=None, oob_score=Fals...\n",
249 | " warm_start=False),\n",
250 | " iid='deprecated', n_iter=50, n_jobs=-1,\n",
251 | " param_distributions={'max_features': array([0.1 , 0.15, 0.2 , 0.25, 0.3 , 0.35, 0.4 , 0.45, 0.5 , 0.55, 0.6 ,\n",
252 | " 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95]),\n",
253 | " 'n_estimators': array([ 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85,\n",
254 | " 90, 95])},\n",
255 | " pre_dispatch='2*n_jobs', random_state=0, refit=True,\n",
256 | " return_train_score=False, scoring='r2', verbose=1)"
257 | ]
258 | },
259 | "metadata": {
260 | "tags": []
261 | },
262 | "execution_count": 9
263 | }
264 | ]
265 | },
266 | {
267 | "cell_type": "code",
268 | "metadata": {
269 | "colab": {
270 | "base_uri": "https://localhost:8080/"
271 | },
272 | "id": "UYP8IsUbL_se",
273 | "outputId": "7cb253d5-cf6e-4319-e62b-48eaa74851f8"
274 | },
275 | "source": [
276 | "random_search.best_params_"
277 | ],
278 | "execution_count": 10,
279 | "outputs": [
280 | {
281 | "output_type": "execute_result",
282 | "data": {
283 | "text/plain": [
284 | "{'max_features': 0.5500000000000002, 'n_estimators': 95}"
285 | ]
286 | },
287 | "metadata": {
288 | "tags": []
289 | },
290 | "execution_count": 10
291 | }
292 | ]
293 | },
294 | {
295 | "cell_type": "code",
296 | "metadata": {
297 | "colab": {
298 | "base_uri": "https://localhost:8080/"
299 | },
300 | "id": "anPLW1r6MCaG",
301 | "outputId": "d0920356-253c-49d2-b350-84ebc4134d03"
302 | },
303 | "source": [
304 | "random_search.best_score_"
305 | ],
306 | "execution_count": 11,
307 | "outputs": [
308 | {
309 | "output_type": "execute_result",
310 | "data": {
311 | "text/plain": [
312 | "0.41479921526675173"
313 | ]
314 | },
315 | "metadata": {
316 | "tags": []
317 | },
318 | "execution_count": 11
319 | }
320 | ]
321 | },
322 | {
323 | "cell_type": "code",
324 | "metadata": {
325 | "id": "M0CNcevAMDxm"
326 | },
327 | "source": [
328 | ""
329 | ],
330 | "execution_count": null,
331 | "outputs": []
332 | }
333 | ]
334 | }
--------------------------------------------------------------------------------
/Feature_selection_via_grid_search_in_supervised_models.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Feature selection via grid search in supervised models",
7 | "provenance": [],
8 | "authorship_tag": "ABX9TyOTWlfg2XQqvTfhXoKmUvmU"
9 | },
10 | "kernelspec": {
11 | "name": "python3",
12 | "display_name": "Python 3"
13 | }
14 | },
15 | "cells": [
16 | {
17 | "cell_type": "code",
18 | "metadata": {
19 | "id": "fpMJg9DIqyOX",
20 | "colab_type": "code",
21 | "colab": {}
22 | },
23 | "source": [
24 | "import numpy as np\n",
25 | "from sklearn.datasets import load_boston\n",
26 | "from sklearn.feature_selection import SelectKBest,f_regression\n",
27 | "from sklearn.pipeline import Pipeline\n",
28 | "from sklearn.linear_model import LinearRegression\n",
29 | "from sklearn.neighbors import KNeighborsRegressor\n",
30 | "from sklearn.preprocessing import StandardScaler\n",
31 | "from sklearn.ensemble import RandomForestRegressor\n",
32 | "from sklearn.model_selection import GridSearchCV\n",
33 | "from sklearn.metrics import mean_squared_error"
34 | ],
35 | "execution_count": 0,
36 | "outputs": []
37 | },
38 | {
39 | "cell_type": "markdown",
40 | "metadata": {
41 | "id": "LGkBrLPGsbUX",
42 | "colab_type": "text"
43 | },
44 | "source": [
45 | "# Data loading"
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "metadata": {
51 | "id": "LP6xdkXWsGw6",
52 | "colab_type": "code",
53 | "colab": {}
54 | },
55 | "source": [
56 | "data,target = load_boston(return_X_y=True)"
57 | ],
58 | "execution_count": 0,
59 | "outputs": []
60 | },
61 | {
62 | "cell_type": "markdown",
63 | "metadata": {
64 | "id": "bvYfKpbduElz",
65 | "colab_type": "text"
66 | },
67 | "source": [
68 | "# Pipeline definition"
69 | ]
70 | },
71 | {
72 | "cell_type": "code",
73 | "metadata": {
74 | "id": "WuLRz6g6sfCa",
75 | "colab_type": "code",
76 | "colab": {}
77 | },
78 | "source": [
79 | "pipeline = Pipeline(\n",
80 | " [\n",
81 | " ('selector',SelectKBest(f_regression)),\n",
82 | " ('model',LinearRegression())\n",
83 | " ]\n",
84 | ")"
85 | ],
86 | "execution_count": 0,
87 | "outputs": []
88 | },
89 | {
90 | "cell_type": "markdown",
91 | "metadata": {
92 | "id": "aPxDM9Rcub7W",
93 | "colab_type": "text"
94 | },
95 | "source": [
96 | "# Grid Search with cross-validation for linear regression"
97 | ]
98 | },
99 | {
100 | "cell_type": "markdown",
101 | "metadata": {
102 | "id": "QRnjUdsWwgYJ",
103 | "colab_type": "text"
104 | },
105 | "source": [
106 | "Defining grid search parameters."
107 | ]
108 | },
109 | {
110 | "cell_type": "code",
111 | "metadata": {
112 | "id": "h7nyD1Biua-8",
113 | "colab_type": "code",
114 | "colab": {}
115 | },
116 | "source": [
117 | "search = GridSearchCV(\n",
118 | " estimator = pipeline,\n",
119 | " param_grid = {'selector__k':[3,4,5,6,7,8,9,10]},\n",
120 | " n_jobs=-1,\n",
121 | " scoring=\"neg_mean_squared_error\",\n",
122 | " cv=5,\n",
123 | " verbose=3\n",
124 | "\n",
125 | " )"
126 | ],
127 | "execution_count": 0,
128 | "outputs": []
129 | },
130 | {
131 | "cell_type": "markdown",
132 | "metadata": {
133 | "id": "ZcOxTYwfwk6W",
134 | "colab_type": "text"
135 | },
136 | "source": [
137 | "Performing grid search over the specified parameters."
138 | ]
139 | },
140 | {
141 | "cell_type": "code",
142 | "metadata": {
143 | "id": "NTGJf0suv6Km",
144 | "colab_type": "code",
145 | "outputId": "48bdc9a2-a2f5-498a-8fd1-090319bc83c8",
146 | "colab": {
147 | "base_uri": "https://localhost:8080/",
148 | "height": 323
149 | }
150 | },
151 | "source": [
152 | "search.fit(data,target)"
153 | ],
154 | "execution_count": 0,
155 | "outputs": [
156 | {
157 | "output_type": "stream",
158 | "text": [
159 | "Fitting 5 folds for each of 8 candidates, totalling 40 fits\n"
160 | ],
161 | "name": "stdout"
162 | },
163 | {
164 | "output_type": "stream",
165 | "text": [
166 | "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n",
167 | "[Parallel(n_jobs=-1)]: Done 40 out of 40 | elapsed: 1.2s finished\n"
168 | ],
169 | "name": "stderr"
170 | },
171 | {
172 | "output_type": "execute_result",
173 | "data": {
174 | "text/plain": [
175 | "GridSearchCV(cv=5, error_score=nan,\n",
176 | " estimator=Pipeline(memory=None,\n",
177 | " steps=[('selector',\n",
178 | " SelectKBest(k=10,\n",
179 | " score_func=)),\n",
180 | " ('model',\n",
181 | " LinearRegression(copy_X=True,\n",
182 | " fit_intercept=True,\n",
183 | " n_jobs=None,\n",
184 | " normalize=False))],\n",
185 | " verbose=False),\n",
186 | " iid='deprecated', n_jobs=-1,\n",
187 | " param_grid={'selector__k': [3, 4, 5, 6, 7, 8, 9, 10]},\n",
188 | " pre_dispatch='2*n_jobs', refit=True, return_train_score=False,\n",
189 | " scoring='neg_mean_squared_error', verbose=3)"
190 | ]
191 | },
192 | "metadata": {
193 | "tags": []
194 | },
195 | "execution_count": 5
196 | }
197 | ]
198 | },
199 | {
200 | "cell_type": "markdown",
201 | "metadata": {
202 | "id": "0iZWFL7SxCzm",
203 | "colab_type": "text"
204 | },
205 | "source": [
206 | "The best value for *k* is:"
207 | ]
208 | },
209 | {
210 | "cell_type": "code",
211 | "metadata": {
212 | "id": "MGRB8u85wDXK",
213 | "colab_type": "code",
214 | "outputId": "41811e04-026d-4229-bed6-85c523d29b72",
215 | "colab": {
216 | "base_uri": "https://localhost:8080/",
217 | "height": 34
218 | }
219 | },
220 | "source": [
221 | "search.best_params_"
222 | ],
223 | "execution_count": 0,
224 | "outputs": [
225 | {
226 | "output_type": "execute_result",
227 | "data": {
228 | "text/plain": [
229 | "{'selector__k': 3}"
230 | ]
231 | },
232 | "metadata": {
233 | "tags": []
234 | },
235 | "execution_count": 6
236 | }
237 | ]
238 | },
239 | {
240 | "cell_type": "markdown",
241 | "metadata": {
242 | "id": "S8NxRkDnxGHv",
243 | "colab_type": "text"
244 | },
245 | "source": [
246 | "The best score achieved is:"
247 | ]
248 | },
249 | {
250 | "cell_type": "code",
251 | "metadata": {
252 | "id": "_EtvKFTQwHf_",
253 | "colab_type": "code",
254 | "outputId": "01fd177c-e115-4306-9c63-07c90641737d",
255 | "colab": {
256 | "base_uri": "https://localhost:8080/",
257 | "height": 34
258 | }
259 | },
260 | "source": [
261 | "search.best_score_"
262 | ],
263 | "execution_count": 0,
264 | "outputs": [
265 | {
266 | "output_type": "execute_result",
267 | "data": {
268 | "text/plain": [
269 | "-36.4236890153343"
270 | ]
271 | },
272 | "metadata": {
273 | "tags": []
274 | },
275 | "execution_count": 7
276 | }
277 | ]
278 | },
279 | {
280 | "cell_type": "markdown",
281 | "metadata": {
282 | "id": "vvQlLGXV0i8F",
283 | "colab_type": "text"
284 | },
285 | "source": [
286 | "# Grid search with cross-validation for Random Forest Regressor"
287 | ]
288 | },
289 | {
290 | "cell_type": "code",
291 | "metadata": {
292 | "id": "q_okg7w_0xdv",
293 | "colab_type": "code",
294 | "colab": {}
295 | },
296 | "source": [
297 | "pipeline = Pipeline(\n",
298 | " [\n",
299 | " ('selector',SelectKBest(f_regression)),\n",
300 | " ('model',RandomForestRegressor(random_state = 0))\n",
301 | " ]\n",
302 | ")"
303 | ],
304 | "execution_count": 0,
305 | "outputs": []
306 | },
307 | {
308 | "cell_type": "code",
309 | "metadata": {
310 | "id": "jXxlQDwUweD0",
311 | "colab_type": "code",
312 | "colab": {}
313 | },
314 | "source": [
315 | "search = GridSearchCV(\n",
316 | " estimator = pipeline,\n",
317 | " param_grid = {'selector__k':[3,4,5,6,7,8,9,10] , 'model__n_estimators':np.arange(10,200,10) },\n",
318 | " n_jobs=-1,\n",
319 | " scoring=\"neg_mean_squared_error\",\n",
320 | " cv=5,\n",
321 | " verbose=3\n",
322 | "\n",
323 | " )"
324 | ],
325 | "execution_count": 0,
326 | "outputs": []
327 | },
328 | {
329 | "cell_type": "code",
330 | "metadata": {
331 | "id": "XSD6zprr0vFu",
332 | "colab_type": "code",
333 | "outputId": "9bc8c84a-72cb-41c2-82d1-75257f8ecc75",
334 | "colab": {
335 | "base_uri": "https://localhost:8080/",
336 | "height": 578
337 | }
338 | },
339 | "source": [
340 | "search.fit(data,target)"
341 | ],
342 | "execution_count": 0,
343 | "outputs": [
344 | {
345 | "output_type": "stream",
346 | "text": [
347 | "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n"
348 | ],
349 | "name": "stderr"
350 | },
351 | {
352 | "output_type": "stream",
353 | "text": [
354 | "Fitting 5 folds for each of 152 candidates, totalling 760 fits\n"
355 | ],
356 | "name": "stdout"
357 | },
358 | {
359 | "output_type": "stream",
360 | "text": [
361 | "[Parallel(n_jobs=-1)]: Done 92 tasks | elapsed: 3.2s\n",
362 | "[Parallel(n_jobs=-1)]: Done 338 tasks | elapsed: 30.2s\n",
363 | "[Parallel(n_jobs=-1)]: Done 498 tasks | elapsed: 1.0min\n",
364 | "[Parallel(n_jobs=-1)]: Done 722 tasks | elapsed: 2.1min\n",
365 | "[Parallel(n_jobs=-1)]: Done 760 out of 760 | elapsed: 2.4min finished\n"
366 | ],
367 | "name": "stderr"
368 | },
369 | {
370 | "output_type": "execute_result",
371 | "data": {
372 | "text/plain": [
373 | "GridSearchCV(cv=5, error_score=nan,\n",
374 | " estimator=Pipeline(memory=None,\n",
375 | " steps=[('selector',\n",
376 | " SelectKBest(k=10,\n",
377 | " score_func=)),\n",
378 | " ('model',\n",
379 | " RandomForestRegressor(bootstrap=True,\n",
380 | " ccp_alpha=0.0,\n",
381 | " criterion='mse',\n",
382 | " max_depth=None,\n",
383 | " max_features='auto',\n",
384 | " max_leaf_nodes=None,\n",
385 | " max_samples=None,\n",
386 | " min_impurity_decrease=0.0,\n",
387 | " min_impurity_split=None...\n",
388 | " oob_score=False,\n",
389 | " random_state=0,\n",
390 | " verbose=0,\n",
391 | " warm_start=False))],\n",
392 | " verbose=False),\n",
393 | " iid='deprecated', n_jobs=-1,\n",
394 | " param_grid={'model__n_estimators': array([ 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130,\n",
395 | " 140, 150, 160, 170, 180, 190]),\n",
396 | " 'selector__k': [3, 4, 5, 6, 7, 8, 9, 10]},\n",
397 | " pre_dispatch='2*n_jobs', refit=True, return_train_score=False,\n",
398 | " scoring='neg_mean_squared_error', verbose=3)"
399 | ]
400 | },
401 | "metadata": {
402 | "tags": []
403 | },
404 | "execution_count": 10
405 | }
406 | ]
407 | },
408 | {
409 | "cell_type": "code",
410 | "metadata": {
411 | "id": "jMZCRTjR0-fi",
412 | "colab_type": "code",
413 | "outputId": "b319785d-77cd-425e-9d57-086f2bc10cd9",
414 | "colab": {
415 | "base_uri": "https://localhost:8080/",
416 | "height": 34
417 | }
418 | },
419 | "source": [
420 | "search.best_params_"
421 | ],
422 | "execution_count": 0,
423 | "outputs": [
424 | {
425 | "output_type": "execute_result",
426 | "data": {
427 | "text/plain": [
428 | "{'model__n_estimators': 110, 'selector__k': 6}"
429 | ]
430 | },
431 | "metadata": {
432 | "tags": []
433 | },
434 | "execution_count": 11
435 | }
436 | ]
437 | },
438 | {
439 | "cell_type": "code",
440 | "metadata": {
441 | "id": "LsGilVty1EuG",
442 | "colab_type": "code",
443 | "outputId": "f1158541-c3c6-4bef-9be0-4410357cbf72",
444 | "colab": {
445 | "base_uri": "https://localhost:8080/",
446 | "height": 34
447 | }
448 | },
449 | "source": [
450 | "search.best_score_"
451 | ],
452 | "execution_count": 0,
453 | "outputs": [
454 | {
455 | "output_type": "execute_result",
456 | "data": {
457 | "text/plain": [
458 | "-22.170138432624004"
459 | ]
460 | },
461 | "metadata": {
462 | "tags": []
463 | },
464 | "execution_count": 12
465 | }
466 | ]
467 | }
468 | ]
469 | }
--------------------------------------------------------------------------------
/Candlesticks.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Candlesticks.ipynb",
7 | "provenance": [],
8 | "authorship_tag": "ABX9TyOLZTfFRZzu++sSs7IF1QaW"
9 | },
10 | "kernelspec": {
11 | "name": "python3",
12 | "display_name": "Python 3"
13 | }
14 | },
15 | "cells": [
16 | {
17 | "cell_type": "code",
18 | "metadata": {
19 | "id": "fqWPj5KM95ZH",
20 | "outputId": "d64775b0-87c6-4ce9-de90-8f894b1477cc",
21 | "colab": {
22 | "base_uri": "https://localhost:8080/"
23 | }
24 | },
25 | "source": [
26 | "!pip install yfinance"
27 | ],
28 | "execution_count": 49,
29 | "outputs": [
30 | {
31 | "output_type": "stream",
32 | "text": [
33 | "Requirement already satisfied: yfinance in /usr/local/lib/python3.6/dist-packages (0.1.55)\n",
34 | "Requirement already satisfied: lxml>=4.5.1 in /usr/local/lib/python3.6/dist-packages (from yfinance) (4.6.1)\n",
35 | "Requirement already satisfied: requests>=2.20 in /usr/local/lib/python3.6/dist-packages (from yfinance) (2.23.0)\n",
36 | "Requirement already satisfied: numpy>=1.15 in /usr/local/lib/python3.6/dist-packages (from yfinance) (1.18.5)\n",
37 | "Requirement already satisfied: multitasking>=0.0.7 in /usr/local/lib/python3.6/dist-packages (from yfinance) (0.0.9)\n",
38 | "Requirement already satisfied: pandas>=0.24 in /usr/local/lib/python3.6/dist-packages (from yfinance) (1.1.3)\n",
39 | "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests>=2.20->yfinance) (3.0.4)\n",
40 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests>=2.20->yfinance) (2020.6.20)\n",
41 | "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests>=2.20->yfinance) (2.10)\n",
42 | "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests>=2.20->yfinance) (1.24.3)\n",
43 | "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.6/dist-packages (from pandas>=0.24->yfinance) (2.8.1)\n",
44 | "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas>=0.24->yfinance) (2018.9)\n",
45 | "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.6/dist-packages (from python-dateutil>=2.7.3->pandas>=0.24->yfinance) (1.15.0)\n"
46 | ],
47 | "name": "stdout"
48 | }
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "metadata": {
54 | "id": "YypFoEHY98BW"
55 | },
56 | "source": [
57 | "import pandas as pd\n",
58 | "import yfinance"
59 | ],
60 | "execution_count": 50,
61 | "outputs": []
62 | },
63 | {
64 | "cell_type": "code",
65 | "metadata": {
66 | "id": "UWKF9JsQ995x"
67 | },
68 | "source": [
69 | "ticker = yfinance.Ticker(\"SPY\")\n",
70 | "df = ticker.history(period = '5mo')"
71 | ],
72 | "execution_count": 51,
73 | "outputs": []
74 | },
75 | {
76 | "cell_type": "code",
77 | "metadata": {
78 | "id": "ylebwMCT-KBG"
79 | },
80 | "source": [
81 | "for i in range(2,df.shape[0]):\n",
82 | " current = df.iloc[i,:]\n",
83 | " prev = df.iloc[i-1,:]\n",
84 | " prev_2 = df.iloc[i-2,:]\n",
85 | "\n",
86 | " realbody = abs(current['Open'] - current['Close'])\n",
87 | " candle_range = current['High'] - current['Low']\n",
88 | "\n",
89 | " idx = df.index[i]\n",
90 | " \n",
91 | " # Bullish swing\n",
92 | " df.loc[idx,'Bullish swing'] = current['Low'] > prev['Low'] and prev['Low'] < prev_2['Low']\n",
93 | "\n",
94 | " # Bearish swing\n",
95 | " df.loc[idx,'Bearish swing'] = current['High'] < prev['High'] and prev['High'] > prev_2['High']\n",
96 | "\n",
97 | " # Bullish pinbar\n",
98 | " df.loc[idx,'Bullish pinbar'] = realbody <= candle_range/3 and min(current['Open'], current['Close']) > (current['High'] + current['Low'])/2 and current['Low'] < prev['Low']\n",
99 | "\n",
100 | " # Bearish pinbar\n",
101 | " df.loc[idx,'Bearish pinbar'] = realbody <= candle_range/3 and max(current['Open'] , current['Close']) < (current['High'] + current['Low'])/2 and current['High'] > prev['High']\n",
102 | " \n",
103 | " # Inside bar\n",
104 | " df.loc[idx,'Inside bar'] = current['High'] < prev['High'] and current['Low'] > prev['Low']\n",
105 | " \n",
106 | " # Outside bar\n",
107 | " df.loc[idx,'Outside bar'] = current['High'] > prev['High'] and current['Low'] < prev['Low']\n",
108 | " \n",
109 | " # Bullish engulfing\n",
110 | " df.loc[idx,'Bullish engulfing'] = current['High'] > prev['High'] and current['Low'] < prev['Low'] and realbody >= 0.8 * candle_range and current['Close'] > current['Open']\n",
111 | "\n",
112 | " # Bearish engulfing\n",
113 | " df.loc[idx,'Bearish engulfing'] = current['High'] > prev['High'] and current['Low'] < prev['Low'] and realbody >= 0.8 * candle_range and current['Close'] < current['Open']\n",
114 | "\n",
115 | "\n",
116 | "\n",
117 | "\n",
118 | "df.fillna(False, inplace=True)"
119 | ],
120 | "execution_count": 52,
121 | "outputs": []
122 | },
123 | {
124 | "cell_type": "code",
125 | "metadata": {
126 | "id": "B7Pv42Dl-b0n",
127 | "outputId": "2ff55586-aefd-468e-9866-da87ea9d289c",
128 | "colab": {
129 | "base_uri": "https://localhost:8080/",
130 | "height": 637
131 | }
132 | },
133 | "source": [
134 | "df"
135 | ],
136 | "execution_count": 53,
137 | "outputs": [
138 | {
139 | "output_type": "execute_result",
140 | "data": {
141 | "text/html": [
142 | "\n",
143 | "\n",
156 | "
\n",
157 | " \n",
158 | " \n",
159 | " | \n",
160 | " Open | \n",
161 | " High | \n",
162 | " Low | \n",
163 | " Close | \n",
164 | " Volume | \n",
165 | " Dividends | \n",
166 | " Stock Splits | \n",
167 | " Bullish swing | \n",
168 | " Bearish swing | \n",
169 | " Bullish pinbar | \n",
170 | " Bearish pinbar | \n",
171 | " Inside bar | \n",
172 | " Outside bar | \n",
173 | " Bullish engulfing | \n",
174 | " Bearish engulfing | \n",
175 | "
\n",
176 | " \n",
177 | " | Date | \n",
178 | " | \n",
179 | " | \n",
180 | " | \n",
181 | " | \n",
182 | " | \n",
183 | " | \n",
184 | " | \n",
185 | " | \n",
186 | " | \n",
187 | " | \n",
188 | " | \n",
189 | " | \n",
190 | " | \n",
191 | " | \n",
192 | " | \n",
193 | "
\n",
194 | " \n",
195 | " \n",
196 | " \n",
197 | " | 2020-06-03 | \n",
198 | " 307.649227 | \n",
199 | " 310.604352 | \n",
200 | " 307.351744 | \n",
201 | " 309.573029 | \n",
202 | " 92567600 | \n",
203 | " 0.0 | \n",
204 | " 0 | \n",
205 | " False | \n",
206 | " False | \n",
207 | " False | \n",
208 | " False | \n",
209 | " False | \n",
210 | " False | \n",
211 | " False | \n",
212 | " False | \n",
213 | "
\n",
214 | " \n",
215 | " | 2020-06-04 | \n",
216 | " 308.511945 | \n",
217 | " 310.386176 | \n",
218 | " 306.498898 | \n",
219 | " 308.759857 | \n",
220 | " 75794400 | \n",
221 | " 0.0 | \n",
222 | " 0 | \n",
223 | " False | \n",
224 | " False | \n",
225 | " False | \n",
226 | " False | \n",
227 | " False | \n",
228 | " False | \n",
229 | " False | \n",
230 | " False | \n",
231 | "
\n",
232 | " \n",
233 | " | 2020-06-05 | \n",
234 | " 314.580853 | \n",
235 | " 318.587093 | \n",
236 | " 314.511430 | \n",
237 | " 316.673218 | \n",
238 | " 150524700 | \n",
239 | " 0.0 | \n",
240 | " 0 | \n",
241 | " True | \n",
242 | " False | \n",
243 | " False | \n",
244 | " False | \n",
245 | " False | \n",
246 | " False | \n",
247 | " False | \n",
248 | " False | \n",
249 | "
\n",
250 | " \n",
251 | " | 2020-06-08 | \n",
252 | " 317.545882 | \n",
253 | " 320.709245 | \n",
254 | " 316.960812 | \n",
255 | " 320.501007 | \n",
256 | " 73641200 | \n",
257 | " 0.0 | \n",
258 | " 0 | \n",
259 | " False | \n",
260 | " False | \n",
261 | " False | \n",
262 | " False | \n",
263 | " False | \n",
264 | " False | \n",
265 | " False | \n",
266 | " False | \n",
267 | "
\n",
268 | " \n",
269 | " | 2020-06-09 | \n",
270 | " 317.625216 | \n",
271 | " 320.580342 | \n",
272 | " 316.693064 | \n",
273 | " 318.111145 | \n",
274 | " 77479200 | \n",
275 | " 0.0 | \n",
276 | " 0 | \n",
277 | " False | \n",
278 | " True | \n",
279 | " False | \n",
280 | " False | \n",
281 | " False | \n",
282 | " False | \n",
283 | " False | \n",
284 | " False | \n",
285 | "
\n",
286 | " \n",
287 | " | ... | \n",
288 | " ... | \n",
289 | " ... | \n",
290 | " ... | \n",
291 | " ... | \n",
292 | " ... | \n",
293 | " ... | \n",
294 | " ... | \n",
295 | " ... | \n",
296 | " ... | \n",
297 | " ... | \n",
298 | " ... | \n",
299 | " ... | \n",
300 | " ... | \n",
301 | " ... | \n",
302 | " ... | \n",
303 | "
\n",
304 | " \n",
305 | " | 2020-10-27 | \n",
306 | " 339.760010 | \n",
307 | " 340.119995 | \n",
308 | " 337.989990 | \n",
309 | " 338.220001 | \n",
310 | " 65994100 | \n",
311 | " 0.0 | \n",
312 | " 0 | \n",
313 | " True | \n",
314 | " False | \n",
315 | " False | \n",
316 | " False | \n",
317 | " True | \n",
318 | " False | \n",
319 | " False | \n",
320 | " False | \n",
321 | "
\n",
322 | " \n",
323 | " | 2020-10-28 | \n",
324 | " 332.100006 | \n",
325 | " 338.250000 | \n",
326 | " 326.130005 | \n",
327 | " 326.660004 | \n",
328 | " 127094300 | \n",
329 | " 0.0 | \n",
330 | " 0 | \n",
331 | " False | \n",
332 | " False | \n",
333 | " False | \n",
334 | " False | \n",
335 | " False | \n",
336 | " False | \n",
337 | " False | \n",
338 | " False | \n",
339 | "
\n",
340 | " \n",
341 | " | 2020-10-29 | \n",
342 | " 326.910004 | \n",
343 | " 333.399994 | \n",
344 | " 325.089996 | \n",
345 | " 329.980011 | \n",
346 | " 90597700 | \n",
347 | " 0.0 | \n",
348 | " 0 | \n",
349 | " False | \n",
350 | " False | \n",
351 | " False | \n",
352 | " False | \n",
353 | " False | \n",
354 | " False | \n",
355 | " False | \n",
356 | " False | \n",
357 | "
\n",
358 | " \n",
359 | " | 2020-10-30 | \n",
360 | " 328.279999 | \n",
361 | " 329.690002 | \n",
362 | " 322.600006 | \n",
363 | " 326.540009 | \n",
364 | " 120287300 | \n",
365 | " 0.0 | \n",
366 | " 0 | \n",
367 | " False | \n",
368 | " False | \n",
369 | " True | \n",
370 | " False | \n",
371 | " False | \n",
372 | " False | \n",
373 | " False | \n",
374 | " False | \n",
375 | "
\n",
376 | " \n",
377 | " | 2020-11-02 | \n",
378 | " 330.200012 | \n",
379 | " 332.359985 | \n",
380 | " 327.239990 | \n",
381 | " 330.200012 | \n",
382 | " 85702800 | \n",
383 | " 0.0 | \n",
384 | " 0 | \n",
385 | " True | \n",
386 | " False | \n",
387 | " False | \n",
388 | " False | \n",
389 | " False | \n",
390 | " False | \n",
391 | " False | \n",
392 | " False | \n",
393 | "
\n",
394 | " \n",
395 | "
\n",
396 | "
107 rows × 15 columns
\n",
397 | "
"
398 | ],
399 | "text/plain": [
400 | " Open High ... Bullish engulfing Bearish engulfing\n",
401 | "Date ... \n",
402 | "2020-06-03 307.649227 310.604352 ... False False\n",
403 | "2020-06-04 308.511945 310.386176 ... False False\n",
404 | "2020-06-05 314.580853 318.587093 ... False False\n",
405 | "2020-06-08 317.545882 320.709245 ... False False\n",
406 | "2020-06-09 317.625216 320.580342 ... False False\n",
407 | "... ... ... ... ... ...\n",
408 | "2020-10-27 339.760010 340.119995 ... False False\n",
409 | "2020-10-28 332.100006 338.250000 ... False False\n",
410 | "2020-10-29 326.910004 333.399994 ... False False\n",
411 | "2020-10-30 328.279999 329.690002 ... False False\n",
412 | "2020-11-02 330.200012 332.359985 ... False False\n",
413 | "\n",
414 | "[107 rows x 15 columns]"
415 | ]
416 | },
417 | "metadata": {
418 | "tags": []
419 | },
420 | "execution_count": 53
421 | }
422 | ]
423 | },
424 | {
425 | "cell_type": "code",
426 | "metadata": {
427 | "id": "aQp9ZDMKDPvI"
428 | },
429 | "source": [
430 | ""
431 | ],
432 | "execution_count": 53,
433 | "outputs": []
434 | }
435 | ]
436 | }
--------------------------------------------------------------------------------
/Calculation_of_daily_pivot_levels.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Calculation of daily pivot levels.ipynb",
7 | "provenance": [],
8 | "authorship_tag": "ABX9TyPFmH9nyVVd7WK+RoGMlwC7"
9 | },
10 | "kernelspec": {
11 | "name": "python3",
12 | "display_name": "Python 3"
13 | }
14 | },
15 | "cells": [
16 | {
17 | "cell_type": "code",
18 | "metadata": {
19 | "id": "7g3Xf0cMv-H7",
20 | "outputId": "1bda63ec-3870-4e7b-99b1-31022ac7e13a",
21 | "colab": {
22 | "base_uri": "https://localhost:8080/",
23 | "height": 230
24 | }
25 | },
26 | "source": [
27 | "!pip install yfinance"
28 | ],
29 | "execution_count": 14,
30 | "outputs": [
31 | {
32 | "output_type": "stream",
33 | "text": [
34 | "Requirement already satisfied: yfinance in /usr/local/lib/python3.6/dist-packages (0.1.54)\n",
35 | "Requirement already satisfied: pandas>=0.24 in /usr/local/lib/python3.6/dist-packages (from yfinance) (1.0.5)\n",
36 | "Requirement already satisfied: numpy>=1.15 in /usr/local/lib/python3.6/dist-packages (from yfinance) (1.18.5)\n",
37 | "Requirement already satisfied: multitasking>=0.0.7 in /usr/local/lib/python3.6/dist-packages (from yfinance) (0.0.9)\n",
38 | "Requirement already satisfied: requests>=2.20 in /usr/local/lib/python3.6/dist-packages (from yfinance) (2.23.0)\n",
39 | "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas>=0.24->yfinance) (2018.9)\n",
40 | "Requirement already satisfied: python-dateutil>=2.6.1 in /usr/local/lib/python3.6/dist-packages (from pandas>=0.24->yfinance) (2.8.1)\n",
41 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests>=2.20->yfinance) (2020.6.20)\n",
42 | "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests>=2.20->yfinance) (1.24.3)\n",
43 | "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests>=2.20->yfinance) (2.10)\n",
44 | "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests>=2.20->yfinance) (3.0.4)\n",
45 | "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.6/dist-packages (from python-dateutil>=2.6.1->pandas>=0.24->yfinance) (1.15.0)\n"
46 | ],
47 | "name": "stdout"
48 | }
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "metadata": {
54 | "id": "cONv05MkwBVB"
55 | },
56 | "source": [
57 | "import numpy as np\n",
58 | "import pandas as pd\n",
59 | "import yfinance\n"
60 | ],
61 | "execution_count": 15,
62 | "outputs": []
63 | },
64 | {
65 | "cell_type": "code",
66 | "metadata": {
67 | "id": "qL393Ux9wE3x"
68 | },
69 | "source": [
70 | "ticker = yfinance.Ticker(\"FB\")\n",
71 | "df = ticker.history(interval=\"1d\")\n"
72 | ],
73 | "execution_count": 16,
74 | "outputs": []
75 | },
76 | {
77 | "cell_type": "code",
78 | "metadata": {
79 | "id": "-VXqcqOBxEDc",
80 | "outputId": "9e7f1a6a-3a96-4073-af03-a4ef655b5094",
81 | "colab": {
82 | "base_uri": "https://localhost:8080/",
83 | "height": 686
84 | }
85 | },
86 | "source": [
87 | "df"
88 | ],
89 | "execution_count": 20,
90 | "outputs": [
91 | {
92 | "output_type": "execute_result",
93 | "data": {
94 | "text/html": [
95 | "\n",
96 | "\n",
109 | "
\n",
110 | " \n",
111 | " \n",
112 | " | \n",
113 | " Open | \n",
114 | " High | \n",
115 | " Low | \n",
116 | " Close | \n",
117 | " Volume | \n",
118 | " Dividends | \n",
119 | " Stock Splits | \n",
120 | "
\n",
121 | " \n",
122 | " | Date | \n",
123 | " | \n",
124 | " | \n",
125 | " | \n",
126 | " | \n",
127 | " | \n",
128 | " | \n",
129 | " | \n",
130 | "
\n",
131 | " \n",
132 | " \n",
133 | " \n",
134 | " | 2020-08-31 | \n",
135 | " 293.95 | \n",
136 | " 296.88 | \n",
137 | " 291.55 | \n",
138 | " 293.20 | \n",
139 | " 17345100 | \n",
140 | " 0 | \n",
141 | " 0 | \n",
142 | "
\n",
143 | " \n",
144 | " | 2020-09-01 | \n",
145 | " 294.71 | \n",
146 | " 301.49 | \n",
147 | " 292.71 | \n",
148 | " 295.44 | \n",
149 | " 17320900 | \n",
150 | " 0 | \n",
151 | " 0 | \n",
152 | "
\n",
153 | " \n",
154 | " | 2020-09-02 | \n",
155 | " 298.88 | \n",
156 | " 303.60 | \n",
157 | " 293.05 | \n",
158 | " 302.50 | \n",
159 | " 24341400 | \n",
160 | " 0 | \n",
161 | " 0 | \n",
162 | "
\n",
163 | " \n",
164 | " | 2020-09-03 | \n",
165 | " 295.99 | \n",
166 | " 297.60 | \n",
167 | " 283.63 | \n",
168 | " 291.12 | \n",
169 | " 32294100 | \n",
170 | " 0 | \n",
171 | " 0 | \n",
172 | "
\n",
173 | " \n",
174 | " | 2020-09-04 | \n",
175 | " 287.25 | \n",
176 | " 289.00 | \n",
177 | " 271.14 | \n",
178 | " 282.73 | \n",
179 | " 30333700 | \n",
180 | " 0 | \n",
181 | " 0 | \n",
182 | "
\n",
183 | " \n",
184 | " | 2020-09-08 | \n",
185 | " 271.28 | \n",
186 | " 279.30 | \n",
187 | " 269.42 | \n",
188 | " 271.16 | \n",
189 | " 24864000 | \n",
190 | " 0 | \n",
191 | " 0 | \n",
192 | "
\n",
193 | " \n",
194 | " | 2020-09-09 | \n",
195 | " 275.77 | \n",
196 | " 278.48 | \n",
197 | " 271.35 | \n",
198 | " 273.72 | \n",
199 | " 22918800 | \n",
200 | " 0 | \n",
201 | " 0 | \n",
202 | "
\n",
203 | " \n",
204 | " | 2020-09-10 | \n",
205 | " 275.51 | \n",
206 | " 279.16 | \n",
207 | " 267.03 | \n",
208 | " 268.09 | \n",
209 | " 24814700 | \n",
210 | " 0 | \n",
211 | " 0 | \n",
212 | "
\n",
213 | " \n",
214 | " | 2020-09-11 | \n",
215 | " 270.06 | \n",
216 | " 271.39 | \n",
217 | " 262.64 | \n",
218 | " 266.61 | \n",
219 | " 18913900 | \n",
220 | " 0 | \n",
221 | " 0 | \n",
222 | "
\n",
223 | " \n",
224 | " | 2020-09-14 | \n",
225 | " 270.95 | \n",
226 | " 276.64 | \n",
227 | " 265.70 | \n",
228 | " 266.15 | \n",
229 | " 24093800 | \n",
230 | " 0 | \n",
231 | " 0 | \n",
232 | "
\n",
233 | " \n",
234 | " | 2020-09-15 | \n",
235 | " 270.67 | \n",
236 | " 274.52 | \n",
237 | " 269.30 | \n",
238 | " 272.42 | \n",
239 | " 18478500 | \n",
240 | " 0 | \n",
241 | " 0 | \n",
242 | "
\n",
243 | " \n",
244 | " | 2020-09-16 | \n",
245 | " 267.29 | \n",
246 | " 272.44 | \n",
247 | " 261.79 | \n",
248 | " 263.52 | \n",
249 | " 29183400 | \n",
250 | " 0 | \n",
251 | " 0 | \n",
252 | "
\n",
253 | " \n",
254 | " | 2020-09-17 | \n",
255 | " 258.28 | \n",
256 | " 261.50 | \n",
257 | " 250.19 | \n",
258 | " 254.82 | \n",
259 | " 31281400 | \n",
260 | " 0 | \n",
261 | " 0 | \n",
262 | "
\n",
263 | " \n",
264 | " | 2020-09-18 | \n",
265 | " 258.40 | \n",
266 | " 259.20 | \n",
267 | " 250.05 | \n",
268 | " 252.53 | \n",
269 | " 28130800 | \n",
270 | " 0 | \n",
271 | " 0 | \n",
272 | "
\n",
273 | " \n",
274 | " | 2020-09-21 | \n",
275 | " 247.54 | \n",
276 | " 249.95 | \n",
277 | " 244.13 | \n",
278 | " 248.15 | \n",
279 | " 24709400 | \n",
280 | " 0 | \n",
281 | " 0 | \n",
282 | "
\n",
283 | " \n",
284 | " | 2020-09-22 | \n",
285 | " 253.31 | \n",
286 | " 255.32 | \n",
287 | " 248.22 | \n",
288 | " 254.75 | \n",
289 | " 30293100 | \n",
290 | " 0 | \n",
291 | " 0 | \n",
292 | "
\n",
293 | " \n",
294 | " | 2020-09-23 | \n",
295 | " 255.26 | \n",
296 | " 257.99 | \n",
297 | " 248.15 | \n",
298 | " 249.02 | \n",
299 | " 19641300 | \n",
300 | " 0 | \n",
301 | " 0 | \n",
302 | "
\n",
303 | " \n",
304 | " | 2020-09-24 | \n",
305 | " 246.50 | \n",
306 | " 252.24 | \n",
307 | " 245.62 | \n",
308 | " 249.53 | \n",
309 | " 20006800 | \n",
310 | " 0 | \n",
311 | " 0 | \n",
312 | "
\n",
313 | " \n",
314 | " | 2020-09-25 | \n",
315 | " 249.40 | \n",
316 | " 255.75 | \n",
317 | " 246.61 | \n",
318 | " 254.82 | \n",
319 | " 18351300 | \n",
320 | " 0 | \n",
321 | " 0 | \n",
322 | "
\n",
323 | " \n",
324 | " | 2020-09-28 | \n",
325 | " 259.40 | \n",
326 | " 259.60 | \n",
327 | " 254.82 | \n",
328 | " 256.82 | \n",
329 | " 18807800 | \n",
330 | " 0 | \n",
331 | " 0 | \n",
332 | "
\n",
333 | " \n",
334 | "
\n",
335 | "
"
336 | ],
337 | "text/plain": [
338 | " Open High Low Close Volume Dividends Stock Splits\n",
339 | "Date \n",
340 | "2020-08-31 293.95 296.88 291.55 293.20 17345100 0 0\n",
341 | "2020-09-01 294.71 301.49 292.71 295.44 17320900 0 0\n",
342 | "2020-09-02 298.88 303.60 293.05 302.50 24341400 0 0\n",
343 | "2020-09-03 295.99 297.60 283.63 291.12 32294100 0 0\n",
344 | "2020-09-04 287.25 289.00 271.14 282.73 30333700 0 0\n",
345 | "2020-09-08 271.28 279.30 269.42 271.16 24864000 0 0\n",
346 | "2020-09-09 275.77 278.48 271.35 273.72 22918800 0 0\n",
347 | "2020-09-10 275.51 279.16 267.03 268.09 24814700 0 0\n",
348 | "2020-09-11 270.06 271.39 262.64 266.61 18913900 0 0\n",
349 | "2020-09-14 270.95 276.64 265.70 266.15 24093800 0 0\n",
350 | "2020-09-15 270.67 274.52 269.30 272.42 18478500 0 0\n",
351 | "2020-09-16 267.29 272.44 261.79 263.52 29183400 0 0\n",
352 | "2020-09-17 258.28 261.50 250.19 254.82 31281400 0 0\n",
353 | "2020-09-18 258.40 259.20 250.05 252.53 28130800 0 0\n",
354 | "2020-09-21 247.54 249.95 244.13 248.15 24709400 0 0\n",
355 | "2020-09-22 253.31 255.32 248.22 254.75 30293100 0 0\n",
356 | "2020-09-23 255.26 257.99 248.15 249.02 19641300 0 0\n",
357 | "2020-09-24 246.50 252.24 245.62 249.53 20006800 0 0\n",
358 | "2020-09-25 249.40 255.75 246.61 254.82 18351300 0 0\n",
359 | "2020-09-28 259.40 259.60 254.82 256.82 18807800 0 0"
360 | ]
361 | },
362 | "metadata": {
363 | "tags": []
364 | },
365 | "execution_count": 20
366 | }
367 | ]
368 | },
369 | {
370 | "cell_type": "code",
371 | "metadata": {
372 | "id": "x5G7JJfUsA0i"
373 | },
374 | "source": [
375 | "last_day = df.tail(1).copy()"
376 | ],
377 | "execution_count": 17,
378 | "outputs": []
379 | },
380 | {
381 | "cell_type": "markdown",
382 | "metadata": {
383 | "id": "WGfvJQKK5Hh3"
384 | },
385 | "source": [
386 | "https://www.tradingview.com/support/solutions/43000521824-pivot-points-standard/ \n"
387 | ]
388 | },
389 | {
390 | "cell_type": "code",
391 | "metadata": {
392 | "id": "XyRdpLqq5B9z"
393 | },
394 | "source": [
395 | "last_day['Pivot'] = (last_day['High'] + last_day['Low'] + last_day['Close'])/3\n",
396 | "last_day['R1'] = 2*last_day['Pivot'] - last_day['Low']\n",
397 | "last_day['S1'] = 2*last_day['Pivot'] - last_day['High']\n",
398 | "last_day['R2'] = last_day['Pivot'] + (last_day['High'] - last_day['Low'])\n",
399 | "last_day['S2'] = last_day['Pivot'] - (last_day['High'] - last_day['Low'])\n",
400 | "last_day['R3'] = last_day['Pivot'] + 2*(last_day['High'] - last_day['Low'])\n",
401 | "last_day['S3'] = last_day['Pivot'] - 2*(last_day['High'] - last_day['Low'])\n"
402 | ],
403 | "execution_count": 21,
404 | "outputs": []
405 | },
406 | {
407 | "cell_type": "code",
408 | "metadata": {
409 | "id": "MO4FoBnrs3tG",
410 | "outputId": "122f2ba4-7583-448c-8851-1f4b2bef8958",
411 | "colab": {
412 | "base_uri": "https://localhost:8080/",
413 | "height": 109
414 | }
415 | },
416 | "source": [
417 | "last_day"
418 | ],
419 | "execution_count": 22,
420 | "outputs": [
421 | {
422 | "output_type": "execute_result",
423 | "data": {
424 | "text/html": [
425 | "\n",
426 | "\n",
439 | "
\n",
440 | " \n",
441 | " \n",
442 | " | \n",
443 | " Open | \n",
444 | " High | \n",
445 | " Low | \n",
446 | " Close | \n",
447 | " Volume | \n",
448 | " Dividends | \n",
449 | " Stock Splits | \n",
450 | " Pivot | \n",
451 | " R1 | \n",
452 | " S1 | \n",
453 | " R2 | \n",
454 | " S2 | \n",
455 | " R3 | \n",
456 | " S3 | \n",
457 | "
\n",
458 | " \n",
459 | " | Date | \n",
460 | " | \n",
461 | " | \n",
462 | " | \n",
463 | " | \n",
464 | " | \n",
465 | " | \n",
466 | " | \n",
467 | " | \n",
468 | " | \n",
469 | " | \n",
470 | " | \n",
471 | " | \n",
472 | " | \n",
473 | " | \n",
474 | "
\n",
475 | " \n",
476 | " \n",
477 | " \n",
478 | " | 2020-09-28 | \n",
479 | " 259.4 | \n",
480 | " 259.6 | \n",
481 | " 254.82 | \n",
482 | " 256.82 | \n",
483 | " 18807800 | \n",
484 | " 0 | \n",
485 | " 0 | \n",
486 | " 257.08 | \n",
487 | " 259.34 | \n",
488 | " 254.56 | \n",
489 | " 261.86 | \n",
490 | " 252.3 | \n",
491 | " 266.64 | \n",
492 | " 247.52 | \n",
493 | "
\n",
494 | " \n",
495 | "
\n",
496 | "
"
497 | ],
498 | "text/plain": [
499 | " Open High Low Close ... R2 S2 R3 S3\n",
500 | "Date ... \n",
501 | "2020-09-28 259.4 259.6 254.82 256.82 ... 261.86 252.3 266.64 247.52\n",
502 | "\n",
503 | "[1 rows x 14 columns]"
504 | ]
505 | },
506 | "metadata": {
507 | "tags": []
508 | },
509 | "execution_count": 22
510 | }
511 | ]
512 | }
513 | ]
514 | }
--------------------------------------------------------------------------------
/Heikin_Ashi.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Heikin Ashi.ipynb",
7 | "provenance": [],
8 | "authorship_tag": "ABX9TyM0DD/2yAin1oGhrWEEoJKb"
9 | },
10 | "kernelspec": {
11 | "name": "python3",
12 | "display_name": "Python 3"
13 | },
14 | "language_info": {
15 | "name": "python"
16 | }
17 | },
18 | "cells": [
19 | {
20 | "cell_type": "code",
21 | "metadata": {
22 | "colab": {
23 | "base_uri": "https://localhost:8080/"
24 | },
25 | "id": "-Y_YRzuvppCa",
26 | "outputId": "5ed4c5e0-bdde-4ba5-d70f-2eb78de21b85"
27 | },
28 | "source": [
29 | "!pip install yfinance"
30 | ],
31 | "execution_count": 79,
32 | "outputs": [
33 | {
34 | "output_type": "stream",
35 | "text": [
36 | "Requirement already satisfied: yfinance in /usr/local/lib/python3.7/dist-packages (0.1.59)\n",
37 | "Requirement already satisfied: multitasking>=0.0.7 in /usr/local/lib/python3.7/dist-packages (from yfinance) (0.0.9)\n",
38 | "Requirement already satisfied: numpy>=1.15 in /usr/local/lib/python3.7/dist-packages (from yfinance) (1.19.5)\n",
39 | "Requirement already satisfied: requests>=2.20 in /usr/local/lib/python3.7/dist-packages (from yfinance) (2.23.0)\n",
40 | "Requirement already satisfied: pandas>=0.24 in /usr/local/lib/python3.7/dist-packages (from yfinance) (1.1.5)\n",
41 | "Requirement already satisfied: lxml>=4.5.1 in /usr/local/lib/python3.7/dist-packages (from yfinance) (4.6.3)\n",
42 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.20->yfinance) (2020.12.5)\n",
43 | "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.20->yfinance) (1.24.3)\n",
44 | "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.20->yfinance) (2.10)\n",
45 | "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests>=2.20->yfinance) (3.0.4)\n",
46 | "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.24->yfinance) (2018.9)\n",
47 | "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.24->yfinance) (2.8.1)\n",
48 | "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas>=0.24->yfinance) (1.15.0)\n"
49 | ],
50 | "name": "stdout"
51 | }
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "metadata": {
57 | "colab": {
58 | "base_uri": "https://localhost:8080/"
59 | },
60 | "id": "LPHWYHWNqdkS",
61 | "outputId": "d1d7f7ae-dba4-4605-b206-e67b1e9756c9"
62 | },
63 | "source": [
64 | "!pip install mpl_finance"
65 | ],
66 | "execution_count": 80,
67 | "outputs": [
68 | {
69 | "output_type": "stream",
70 | "text": [
71 | "Requirement already satisfied: mpl_finance in /usr/local/lib/python3.7/dist-packages (0.10.1)\n",
72 | "Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from mpl_finance) (3.2.2)\n",
73 | "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mpl_finance) (2.8.1)\n",
74 | "Requirement already satisfied: numpy>=1.11 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mpl_finance) (1.19.5)\n",
75 | "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mpl_finance) (0.10.0)\n",
76 | "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mpl_finance) (2.4.7)\n",
77 | "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mpl_finance) (1.3.1)\n",
78 | "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.1->matplotlib->mpl_finance) (1.15.0)\n"
79 | ],
80 | "name": "stdout"
81 | }
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "metadata": {
87 | "id": "og1Ipiicqeum"
88 | },
89 | "source": [
90 | "import pandas as pd\n",
91 | "import yfinance\n",
92 | "from mpl_finance import candlestick_ohlc\n",
93 | "import matplotlib.pyplot as plt\n",
94 | "\n",
95 | "plt.rcParams['figure.figsize'] = [12, 7]\n",
96 | "\n",
97 | "plt.rc('font', size=14) "
98 | ],
99 | "execution_count": 81,
100 | "outputs": []
101 | },
102 | {
103 | "cell_type": "code",
104 | "metadata": {
105 | "id": "Xmu5Pr5HqhiG"
106 | },
107 | "source": [
108 | "name = 'SPY'\n",
109 | "ticker = yfinance.Ticker(name)\n",
110 | "df = ticker.history(interval=\"1d\",start=\"2020-12-15\",end=\"2021-04-15\")\n",
111 | "\n"
112 | ],
113 | "execution_count": 82,
114 | "outputs": []
115 | },
116 | {
117 | "cell_type": "code",
118 | "metadata": {
119 | "id": "HfHSNs9TqiuM"
120 | },
121 | "source": [
122 | "\n",
123 | "df['Date'] = range(df.shape[0])\n",
124 | "df = df.loc[:,['Date', 'Open', 'High', 'Low', 'Close']]"
125 | ],
126 | "execution_count": 83,
127 | "outputs": []
128 | },
129 | {
130 | "cell_type": "code",
131 | "metadata": {
132 | "id": "xNcokXHkqlKI"
133 | },
134 | "source": [
135 | "def plot_chart(df):\n",
136 | " fig, ax = plt.subplots()\n",
137 | "\n",
138 | " candlestick_ohlc(ax,df.values,width=0.6, \\\n",
139 | " colorup='green', colordown='red', alpha=0.8)\n",
140 | "\n",
141 | " fig.tight_layout()\n",
142 | "\n",
143 | " fig.show()"
144 | ],
145 | "execution_count": 84,
146 | "outputs": []
147 | },
148 | {
149 | "cell_type": "code",
150 | "metadata": {
151 | "colab": {
152 | "base_uri": "https://localhost:8080/",
153 | "height": 505
154 | },
155 | "id": "3V25LX-sqroQ",
156 | "outputId": "abce8dd1-769f-4da2-f1fc-17d93c7fcdf6"
157 | },
158 | "source": [
159 | "plot_chart(df)"
160 | ],
161 | "execution_count": 85,
162 | "outputs": [
163 | {
164 | "output_type": "display_data",
165 | "data": {
166 | "image/png": "\n",
167 | "text/plain": [
168 | ""
169 | ]
170 | },
171 | "metadata": {
172 | "tags": [],
173 | "needs_background": "light"
174 | }
175 | }
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "metadata": {
181 | "id": "efeyvyrvqva6"
182 | },
183 | "source": [
184 | "df_ha = df.copy()"
185 | ],
186 | "execution_count": 86,
187 | "outputs": []
188 | },
189 | {
190 | "cell_type": "code",
191 | "metadata": {
192 | "id": "91N0HeBfqzuc"
193 | },
194 | "source": [
195 | "for i in range(df_ha.shape[0]):\n",
196 | " if i > 0:\n",
197 | " df_ha.loc[df_ha.index[i],'Open'] = (df['Open'][i-1] + df['Close'][i-1])/2\n",
198 | " \n",
199 | " df_ha.loc[df_ha.index[i],'Close'] = (df['Open'][i] + df['Close'][i] + df['Low'][i] + df['High'][i])/4\n",
200 | "\n",
201 | "df_ha = df_ha.iloc[1:,:]\n",
202 | "\n",
203 | " "
204 | ],
205 | "execution_count": 87,
206 | "outputs": []
207 | },
208 | {
209 | "cell_type": "code",
210 | "metadata": {
211 | "colab": {
212 | "base_uri": "https://localhost:8080/",
213 | "height": 505
214 | },
215 | "id": "YW7fNhGstcR1",
216 | "outputId": "21454df8-1dbf-4490-85ba-ff546ead6388"
217 | },
218 | "source": [
219 | "plot_chart(df_ha)"
220 | ],
221 | "execution_count": 88,
222 | "outputs": [
223 | {
224 | "output_type": "display_data",
225 | "data": {
226 | "image/png": "\n",
227 | "text/plain": [
228 | ""
229 | ]
230 | },
231 | "metadata": {
232 | "tags": [],
233 | "needs_background": "light"
234 | }
235 | }
236 | ]
237 | },
238 | {
239 | "cell_type": "code",
240 | "metadata": {
241 | "id": "gQm8nD4ktzk7"
242 | },
243 | "source": [
244 | " "
245 | ],
246 | "execution_count": 88,
247 | "outputs": []
248 | }
249 | ]
250 | }
--------------------------------------------------------------------------------
/Threshold.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Threshold.ipynb",
7 | "provenance": [],
8 | "authorship_tag": "ABX9TyOFYlTl5r9npPtfRIa4ypnY"
9 | },
10 | "kernelspec": {
11 | "name": "python3",
12 | "display_name": "Python 3"
13 | },
14 | "language_info": {
15 | "name": "python"
16 | }
17 | },
18 | "cells": [
19 | {
20 | "cell_type": "code",
21 | "metadata": {
22 | "id": "QMQ3e-0Tm0aR"
23 | },
24 | "source": [
25 | "import numpy as np\n",
26 | "import matplotlib.pyplot as plt\n"
27 | ],
28 | "execution_count": null,
29 | "outputs": []
30 | },
31 | {
32 | "cell_type": "code",
33 | "metadata": {
34 | "id": "R9VmqWlEkipF"
35 | },
36 | "source": [
37 | "from sklearn.datasets import load_boston"
38 | ],
39 | "execution_count": null,
40 | "outputs": []
41 | },
42 | {
43 | "cell_type": "code",
44 | "metadata": {
45 | "id": "WA4l8eSJmE4s"
46 | },
47 | "source": [
48 | "from sklearn.linear_model import LogisticRegression\n",
49 | "from sklearn.preprocessing import StandardScaler\n",
50 | "from sklearn.pipeline import make_pipeline"
51 | ],
52 | "execution_count": null,
53 | "outputs": []
54 | },
55 | {
56 | "cell_type": "code",
57 | "metadata": {
58 | "id": "Zl9G_yLSmJDd"
59 | },
60 | "source": [
61 | "from sklearn.model_selection import train_test_split,cross_val_score\n",
62 | "from sklearn.metrics import roc_curve,plot_roc_curve, balanced_accuracy_score"
63 | ],
64 | "execution_count": null,
65 | "outputs": []
66 | },
67 | {
68 | "cell_type": "code",
69 | "metadata": {
70 | "id": "q--Hc4VumQQe"
71 | },
72 | "source": [
73 | "X,y = load_boston(return_X_y=True)\n",
74 | "\n",
75 | "\n",
76 | "y = (y > y.mean()).astype(int)"
77 | ],
78 | "execution_count": null,
79 | "outputs": []
80 | },
81 | {
82 | "cell_type": "code",
83 | "metadata": {
84 | "id": "ZMmrxRlImLSQ"
85 | },
86 | "source": [
87 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)"
88 | ],
89 | "execution_count": null,
90 | "outputs": []
91 | },
92 | {
93 | "cell_type": "code",
94 | "metadata": {
95 | "id": "z43ZuZvtmPI6"
96 | },
97 | "source": [
98 | "model = make_pipeline(StandardScaler(),LogisticRegression())"
99 | ],
100 | "execution_count": null,
101 | "outputs": []
102 | },
103 | {
104 | "cell_type": "code",
105 | "metadata": {
106 | "colab": {
107 | "base_uri": "https://localhost:8080/"
108 | },
109 | "id": "zzf0l6HzmXX3",
110 | "outputId": "f7800eb5-bd64-482a-8d7b-708b65221fef"
111 | },
112 | "source": [
113 | "model.fit(X_train,y_train)"
114 | ],
115 | "execution_count": null,
116 | "outputs": [
117 | {
118 | "output_type": "execute_result",
119 | "data": {
120 | "text/plain": [
121 | "Pipeline(memory=None,\n",
122 | " steps=[('standardscaler',\n",
123 | " StandardScaler(copy=True, with_mean=True, with_std=True)),\n",
124 | " ('logisticregression',\n",
125 | " LogisticRegression(C=1.0, class_weight=None, dual=False,\n",
126 | " fit_intercept=True, intercept_scaling=1,\n",
127 | " l1_ratio=None, max_iter=100,\n",
128 | " multi_class='auto', n_jobs=None,\n",
129 | " penalty='l2', random_state=None,\n",
130 | " solver='lbfgs', tol=0.0001, verbose=0,\n",
131 | " warm_start=False))],\n",
132 | " verbose=False)"
133 | ]
134 | },
135 | "metadata": {
136 | "tags": []
137 | },
138 | "execution_count": 8
139 | }
140 | ]
141 | },
142 | {
143 | "cell_type": "code",
144 | "metadata": {
145 | "colab": {
146 | "base_uri": "https://localhost:8080/",
147 | "height": 296
148 | },
149 | "id": "epLsoevIxk_k",
150 | "outputId": "16bc8ad9-47b2-4aab-8b2c-5852aebd82ea"
151 | },
152 | "source": [
153 | "plot_roc_curve(model,X_train,y_train)"
154 | ],
155 | "execution_count": null,
156 | "outputs": [
157 | {
158 | "output_type": "execute_result",
159 | "data": {
160 | "text/plain": [
161 | ""
162 | ]
163 | },
164 | "metadata": {
165 | "tags": []
166 | },
167 | "execution_count": 9
168 | },
169 | {
170 | "output_type": "display_data",
171 | "data": {
172 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEGCAYAAABo25JHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deZhV1Znv8e8vDEJENAreKyANiWiDQFBLgTYaCUYJsSFeEafEtvVGO5EM7dBX2kQJnc4E2h072oYYQ0wQwSlWjCidjrSJCghKGMqoqAjFoIBGJQQRfe8fexc51HiKqn0OVfv3eZ7z1B7W2fvdVHHes9baey1FBGZmll8fKHcAZmZWXk4EZmY550RgZpZzTgRmZjnnRGBmlnMdyx1Ac/Xo0SP69etX7jDMzNqUpUuXbomInvXta3OJoF+/fixZsqTcYZiZtSmSXmlon5uGzMxyzonAzCznnAjMzHLOicDMLOecCMzMci6zRCDpdkmvSVrZwH5JuknSaknLJR2bVSxmZtawLGsEM4Exjez/FDAgfV0K/GeGsZiZWQMye44gIh6T1K+RIuOBOyIZB3uhpIMkHRYRG7OKyczy585Fa3lg2fpyh9EqBvXqzvV/e3SrH7ecfQS9gXUF69XptjokXSppiaQlmzdvLklwZtY+PLBsPVUb3yp3GPu0NvFkcUTMAGYAVFRUeCYdszaulN/Sqza+xaDDujPnspElOV9bVM5EsB44vGC9T7rNLPfaU3NGfRa9/DoAw/sfnPm5Bh3WnfHD6m1ssFQ5E0ElMEnSXcBw4E33D5glapozBh3WvdyhZGJ4/4MZP6w35w/vW+5QjAwTgaTZwClAD0nVwPVAJ4CIuBV4CBgLrAa2A3+fVSxmbZGbM6xUsrxr6Lwm9gdweVbnN2uOfa0ppj3XBmzf0yY6iy2fSvnhXMo262K4XdtKyYnAMtPSD/JSfji7zdryzInAMtPSDk9/OJuVhhOBtarCWoDv3zZrG5wIbA+t2Zzjdm6ztsGJwPbg5hyz/HEisDrcnGOWL04EOdRY84/vXzfLH89QljN3LlrLP9+/Yndbfm1u1zfLH9cIcqamJvCtM4e4Hd/MANcIcml4/4OdBMxsN9cI2rna/QHuAzCz2lwjaMfq6w9wH4CZ1eYaQRuwtw951SQA9weYWWOcCNqAvX3Iyw93mVkxnAj2YTU1AY/ZY2ZZch/BPqwwCbhd38yy4hrBPsg1ATMrJdcI9kGuCZhZKblGsI+5c9FaFr38OsP7H+yagJmVhBNBGdV3W2jNLZ+uCZhZqTgRlFF9t4X6lk8zKzUngjJzZ7CZlZs7i83Mcs41ghKrb3J3M7NyciJoZU2NC+TJ3c1sX+NE0MqaGhfIncFmtq9xIsiAO4DNrC1xImimppp+3O5vZm2N7xpqppqmn4a43d/M2hrXCPaCm37MrD3JtEYgaYyk5yStlnRNPfv7SnpU0jOSlksam2U8ZmZWV2aJQFIH4GbgU8Ag4DxJg2oV+xowNyKOAc4FbskqHjMzq1+WNYITgNUR8VJE7ATuAsbXKhNATc/qgcCGDOMxM7N6ZJkIegPrCtar022FpgCflVQNPAR8qb4DSbpU0hJJSzZv3pxFrGZmuVXuu4bOA2ZGRB9gLPAzSXViiogZEVERERU9e/YseZBmZu1ZlncNrQcOL1jvk24rdAkwBiAinpTUBegBvJZhXHul9vSRZmbtRZY1gqeAAZL6S+pM0hlcWavMWmA0gKSBQBdgn2z78fSRZtZeZVYjiIhdkiYBjwAdgNsjYpWkqcCSiKgErgR+JOkfSTqOL4qIyCqmlvLzA2bWHmX6QFlEPETSCVy47bqC5SrgxCxjMDOzxpW7s7hNqJlQ3sysPXIiKELNIHPuGzCz9siJoEjD+x/sOQTMrF1yIjAzyzmPPtoIPztgZnngRFCPmgRQOL+w+wfMrL1yIqhHTS3A8wubWR44ETTAD4+ZWV64s7gWPzNgZnnjGkGqdr+A+wTMLC+KTgSSPhgR27MMppzcL2BmedVkIpD0N8BtQDegr6SPApdFxBezDq7U3C9gZnlUTB/BvwGnA1sBIuL3wMlZBmVmZqVTVGdxRKyrtem9DGIxM7MyKKaPYF3aPBSSOgFfAZ7NNiwzMyuVYmoE/wBcTjLx/HpgGNDu+gfMzPKqmBrBURFxQeEGSScCj2cTkpmZlVIxNYL/KHKbmZm1QQ3WCCSNBP4G6CnpioJd3UnmIDYzs3agsaahziTPDnQEDijY/hYwIcugzMysdBpMBBHxP8D/SJoZEa+UMKaSqxlfaHj/g8sdiplZyRXTWbxd0jTgaKBLzcaI+ERmUZWY5yQ2szwrprN4FvAHoD/wDWAN8FSGMZWF5yQ2s7wqJhEcEhE/Bt6NiP+JiIuBdlMbMDPLu2Kaht5Nf26U9GlgA+DGdDOzdqKYRPBNSQcCV5I8P9Ad+GqmUZmZWck0mQgi4sF08U1gFOx+stjMzNqBxh4o6wBMJBlj6OGIWCnpDOCfga7AMaUJ0czMstRYjeDHwOHAYuAmSRuACuCaiPhFKYLLWs30lFUb32LQYd3LHY6ZWVk0lggqgKER8b6kLsAm4CMRsbU0oWWvMAn4GQIzy6vGbh/dGRHvA0TEDuCl5iYBSWMkPSdptaRrGigzUVKVpFWS7mzO8VtDzfSUfobAzPKqsRrBX0tani4L+Ei6LiAiYmhjB077GG4GPglUA09JqoyIqoIyA4DJwIkR8YakQ1twLWZmthcaSwQDW3jsE4DVEfESgKS7gPFAVUGZzwM3R8QbABHxWgvPaWZmzdTYoHMtHWiuN1A413E1MLxWmSMBJD1OMrT1lIh4uPaBJF0KXArQt6+bcMzMWlNRk9dnqCMwADgFOA/4kaSDaheKiBkRURERFT179ixxiGZm7VuWiWA9ye2nNfqk2wpVA5UR8W5EvAw8T5IYMnXnorWc88Mnqdr4VtanMjPb5xWVCCR1lXRUM4/9FDBAUn9JnYFzgcpaZX5BUhtAUg+SpqKXmnmeZvNto2Zmf9FkIpD0t8Ay4OF0fZik2h/odUTELmAS8AjwLDA3IlZJmippXFrsEWCrpCrgUeDqUj2n4NtGzcwSxQw6N4XkDqAFABGxTFL/Yg4eEQ8BD9Xadl3BcgBXpC8zMyuDYpqG3o2IN2ttiyyCMTOz0iumRrBK0vlAh/QBsC8DT2QblpmZlUoxNYIvkcxX/A5wJ8lw1J6PwMysnSimRvDXEXEtcG3WwZiZWekVUyO4QdKzkv5F0uDMIzIzs5JqMhFExCiSmck2Az+UtELS1zKPzMzMSqKoB8oiYlNE3AT8A8kzBdc18RYzM2sjinmgbKCkKZJWkExe/wTJcBFmZtYOFNNZfDswBzg9IjZkHI+ZmZVYk4kgIkaWIhAzMyuPBhOBpLkRMTFtEip8krioGcrMzKxtaKxG8JX05xmlCMTMzMqjwc7iiNiYLn4xIl4pfAFfLE14ZmaWtWJuH/1kPds+1dqBmJlZeTSYCCR9Ie0fOErS8oLXy8Dy0oXYuu5ctJZFL79e7jDMzPYZjfUR3AnMA74NXFOw/e2IaLOfpA8sS2bL9MxkZmaJxhJBRMQaSZfX3iHp4LacDIb3P9gzk5mZpZqqEZwBLCW5fVQF+wL4cIZxmZlZiTSYCCLijPRnUdNSmplZ21TMWEMnSto/Xf6spBsluV3FzKydKOb20f8Etkv6KHAl8CLws0yjMjOzkikmEeyKiADGAz+IiJuBA7INy8zMSqWY0UffljQZ+BxwkqQPAJ2yDcvMzEqlmBrBOSQT118cEZtI5iKYlmlUZmZWMsVMVbkJmAUcKOkMYEdE3JF5ZGZmVhLF3DU0EVgMnA1MBBZJmpB1YGZmVhrF9BFcCxwfEa8BSOoJ/Bq4J8vAzMysNIrpI/hATRJIbS3yfWZm1gYUUyN4WNIjwOx0/RzgoexCMjOzUipmzuKrJf0f4GPpphkRcX+2YZmZWak0NmfxAGA68BFgBXBVRKwvVWBmZlYajbX13w48CJxFMgLpfzT34JLGSHpO0mpJ1zRS7ixJIamiuecwM7OWaaxp6ICI+FG6/Jykp5tzYEkdgJtJprqsBp6SVBkRVbXKHQB8BVjUnOObmVnraCwRdJF0DH+Zh6Br4XpENJUYTgBWR8RLAJLuIhmvqKpWuX8Bvgtc3czYzcysFTSWCDYCNxasbypYD+ATTRy7N7CuYL0aGF5YQNKxwOER8StJDSYCSZcClwL07esRsM3MWlNjE9OMyvLE6eB1NwIXNVU2ImYAMwAqKioiy7jMzPImywfD1gOHF6z3SbfVOAAYDCyQtAYYAVS6w9jMrLSyTARPAQMk9ZfUGTgXqKzZGRFvRkSPiOgXEf2AhcC4iFiSYUxmZlZLZokgInYBk4BHgGeBuRGxStJUSeOyOq+ZmTVPk08WSxJwAfDhiJiazlf8vyNicVPvjYiHqDUcRURc10DZU4qK2MzMWlUxNYJbgJHAeen62yTPB5iZWTtQzKBzwyPiWEnPAETEG2mbv5mZtQPF1AjeTZ8SDtg9H8H7mUZlZmYlU0wiuAm4HzhU0r8CvwO+lWlUZmZWMsUMQz1L0lJgNMnwEp+JiGczj8zMzEqimLuG+gLbgV8WbouItVkGZmZmpVFMZ/GvSPoHBHQB+gPPAUdnGJeZmZVIMU1DQwrX04HivphZRGZmVlLNfrI4HX56eJMFzcysTSimj+CKgtUPAMcCGzKLyMzMSqqYPoIDCpZ3kfQZ3JtNOGZmVmqNJoL0QbIDIuKqEsVjZmYl1mAfgaSOEfEecGIJ4zEzsxJrrEawmKQ/YJmkSuBu4E81OyPivoxjMzOzEiimj6ALsJVkjuKa5wkCcCIwM2sHGksEh6Z3DK3kLwmghucNNjNrJxpLBB2AbuyZAGo4EZiZtRONJYKNETG1ZJGYmVlZNPZkcX01ATMza2caSwSjSxaFmZmVTYOJICJeL2UgZmZWHs0edM7MzNoXJwIzs5xzIjAzyzknAjOznHMiMDPLOScCM7OccyIwM8s5JwIzs5xzIjAzy7lME4GkMZKek7Ra0jX17L9CUpWk5ZL+W9JfZRmPmZnVlVkiSOc7vhn4FDAIOE/SoFrFngEqImIocA/wvaziMTOz+mVZIzgBWB0RL0XETuAuYHxhgYh4NCK2p6sLgT4ZxmNmZvXIMhH0BtYVrFen2xpyCTCvvh2SLpW0RNKSzZs3t2KIZma2T3QWS/osUAFMq29/RMyIiIqIqOjZs2dpgzMza+eKmbx+b60HDi9Y75Nu24OkU4FrgY9HxDsZxmNmZvXIskbwFDBAUn9JnYFzgcrCApKOAX4IjIuI1zKMxczMGpBZIoiIXcAk4BHgWWBuRKySNFXSuLTYNKAbcLekZZIqGzicmZllJMumISLiIeChWtuuK1g+Ncvzm5lZ0/aJzmIzMyufTGsE+5I7F63lgWXrqdr4FoMO617ucMzM9hm5qREUJoHxwxp7nMHMLF9yUyMAGHRYd+ZcNrLcYZiZ7VNyUyMwM7P6ORGYmeWcE4GZWc45EZiZ5ZwTgZlZzjkRmJnlnBOBmVnOORGYmeWcE4GZWc45EZiZ5ZwTgZlZzjkRmJnlnBOBmVnOORGYmeWcE4GZWc45EZiZ5ZwTgZlZzuVqhjKztuLdd9+lurqaHTt2lDsUa2O6dOlCnz596NSpU9HvcSIw2wdVV1dzwAEH0K9fPySVOxxrIyKCrVu3Ul1dTf/+/Yt+n5uGzPZBO3bs4JBDDnESsGaRxCGHHNLsmqQTgdk+yknA9sbe/N04EZiZ5ZwTgZnVq0OHDgwbNozBgwdz9tlns337dpYsWcKXv/zlvT5mt27dANiwYQMTJkxorVD56le/ymOPPbZ7fcuWLXTq1Ilbb7213vPXmDlzJpMmTdq9fscddzB48GCGDBnCMcccw/Tp01sc28MPP8xRRx3FEUccwXe+8516y7zyyiuMHj2aoUOHcsopp1BdXb3H/rfeeos+ffrsEeupp57KG2+80eL4wInAzBrQtWtXli1bxsqVK+ncuTO33norFRUV3HTTTS0+dq9evbjnnntaIUrYunUrCxcu5OSTT9697e6772bEiBHMnj276OPMmzePf//3f2f+/PmsWLGChQsXcuCBB7Yotvfee4/LL7+cefPmUVVVxezZs6mqqqpT7qqrruLCCy9k+fLlXHfddUyePHmP/V//+tf3uD6Az33uc9xyyy0tiq+G7xoy28d945erqNrwVqsec1Cv7lz/t0cXXf6kk05i+fLlLFiwgOnTp/Pggw8yZcoUXnzxRVavXs2WLVv4p3/6Jz7/+c8DMG3aNObOncs777zDmWeeyTe+8Y09jrdmzRrOOOMMVq5cycyZM6msrGT79u28+OKLnHnmmXzve98DYP78+Vx//fW88847fOQjH+EnP/lJnW/19957L2PGjNlj2+zZs7nhhhs4//zzqa6upk+fPk1e47e//W2mT59Or169ANhvv/12X8/eWrx4MUcccQQf/vCHATj33HN54IEHGDRo0B7lqqqquPHGGwEYNWoUn/nMZ3bvW7p0Ka+++ipjxoxhyZIlu7ePGzeOk046iWuvvbZFMYJrBGbWhF27djFv3jyGDBlSZ9/y5cv5zW9+w5NPPsnUqVPZsGED8+fP54UXXmDx4sUsW7aMpUuX7tFsU59ly5YxZ84cVqxYwZw5c1i3bh1btmzhm9/8Jr/+9a95+umnqaio2P1hWejxxx/nuOOO272+bt06Nm7cyAknnMDEiROZM2dOUde5cuXKPY7TkFmzZjFs2LA6r/qautavX8/hhx++e71Pnz6sX7++TrmPfvSj3HfffQDcf//9vP3222zdupX333+fK6+8st4mqg996EO88847bN26tajra4xrBGb7uOZ8c29Nf/7znxk2bBiQ1AguueQSnnjiiT3KjB8/nq5du9K1a1dGjRrF4sWL+d3vfsf8+fM55phjANi2bRsvvPBCnaaNQqNHj97dDDNo0CBeeeUV/vjHP1JVVcWJJ54IwM6dOxk5cmSd927cuJGePXvuXp8zZw4TJ04Ekm/gF198MVdeeWWD527uXTYXXHABF1xwQbPe05Tp06czadIkZs6cycknn0zv3r3p0KEDt9xyC2PHjm2wRnPooYeyYcMGDjnkkBadP9NEIGkM8H2gA3BbRHyn1v79gDuA44CtwDkRsSbLmMysODV9BI2p/SEqiYhg8uTJXHbZZUWfa7/99tu93KFDB3bt2kVE8MlPfrLJdv6uXbvucd/87Nmz2bRpE7NmzQKSjukXXniBAQMG0LVrV3bu3Ennzp0BeP311+nRowcARx99NEuXLuUTn/hEo+ebNWsW06ZNq7P9iCOOqNPv0bt3b9atW7d7vbq6mt69e9d5b69evXbXCLZt28a9997LQQcdxJNPPslvf/tbbrnlFrZt28bOnTvp1q3b7k7nHTt20LVr10bjLUZmTUOSOgA3A58CBgHnSRpUq9glwBsRcQTwb8B3s4rHzFrfAw88wI4dO9i6dSsLFizg+OOP5/TTT+f2229n27ZtQNI88tprrzX72CNGjODxxx9n9erVAPzpT3/i+eefr1Nu4MCBu8s8//zzbNu2jfXr17NmzRrWrFnD5MmTdyeTj3/84/z85z8HkhrP3LlzGTVqFACTJ0/m6quvZtOmTUBSA7ntttvqnO+CCy5g2bJldV71dX4ff/zxvPDCC7z88svs3LmTu+66i3HjxtUpt2XLFt5//30g6au4+OKLgSTprF27ljVr1jB9+nQuvPDC3UkgIti0aRP9+vUr/h+1AVn2EZwArI6IlyJiJ3AXML5WmfHAT9Ple4DR8lM0Zm3G0KFDGTVqFCNGjODrX/86vXr14rTTTuP8889n5MiRDBkyhAkTJvD22283+9g9e/Zk5syZnHfeeQwdOpSRI0fyhz/8oU65T3/60yxYsABIagNnnnnmHvvPOuus3Yng+9//Pvfddx/Dhg1jxIgRnH322bubrMaOHcukSZM49dRTOfroozn22GN5662WddJ37NiRH/zgB5x++ukMHDiQiRMncvTRSVPfddddR2VlJQALFizgqKOO4sgjj+TVV18tqgN46dKljBgxgo4dW96wo4ho8UHqPbA0ARgTEf83Xf8cMDwiJhWUWZmWqU7XX0zLbKl1rEuBSwH69u173CuvvNLseL7xy1VA+dpbzZrj2WefZeDAgeUOo1FTpkyhW7duXHXVVeUOhY997GM8+OCDHHTQQeUOpWS+8pWvMG7cOEaPHl1nX31/P5KWRkRFfcdqE53FETEDmAFQUVGxV5nLCcCs/brhhhtYu3ZtrhLB4MGD600CeyPLRLAeOLxgvU+6rb4y1ZI6AgeSdBqb2T5uypQp5Q5ht+HDh5c7hJJr6TMOhbLsI3gKGCCpv6TOwLlAZa0ylcDfpcsTgN9EVm1VZm2M/yvY3tibv5vMEkFE7AImAY8AzwJzI2KVpKmSarrNfwwcImk1cAVwTVbxmLUlXbp0YevWrU4G1iw18xF06dKlWe/LrLM4KxUVFVH4mLVZe+QZymxvNTRDWZvvLDbLm06dOjVrhimzlvBYQ2ZmOedEYGaWc04EZmY51+Y6iyVtBpr/aHGiB7ClyVLti685H3zN+dCSa/6riOhZ3442lwhaQtKShnrN2ytfcz74mvMhq2t205CZWc45EZiZ5VzeEsGMcgdQBr7mfPA150Mm15yrPgIzM6srbzUCMzOrxYnAzCzn2mUikDRG0nOSVkuqM6KppP0kzUn3L5LUr/RRtq4irvkKSVWSlkv6b0l/VY44W1NT11xQ7ixJIanN32pYzDVLmpj+rldJurPUMba2Iv62+0p6VNIz6d/32HLE2Vok3S7ptXQGx/r2S9JN6b/HcknHtvikEdGuXkAH4EXgw0Bn4PfAoFplvgjcmi6fC8wpd9wluOZRwAfT5S/k4ZrTcgcAjwELgYpyx12C3/MA4BngQ+n6oeWOuwTXPAP4Qro8CFhT7rhbeM0nA8cCKxvYPxaYBwgYASxq6TnbY43gBGB1RLwUETuBu4DxtcqMB36aLt8DjJakEsbY2pq85oh4NCK2p6sLSWaMa8uK+T0D/AvwXaA9jOdczDV/Hrg5It4AiIjXShxjayvmmgPoni4fCGwoYXytLiIeA15vpMh44I5ILAQOknRYS87ZHhNBb2BdwXp1uq3eMpFMoPMmcEhJostGMddc6BKSbxRtWZPXnFaZD4+IX5UysAwV83s+EjhS0uOSFkoaU7LoslHMNU8BPiupGngI+FJpQiub5v5/b5LnI8gZSZ8FKoCPlzuWLEn6AHAjcFGZQym1jiTNQ6eQ1PoekzQkIv5Y1qiydR4wMyJukDQS+JmkwRHxfrkDayvaY41gPXB4wXqfdFu9ZSR1JKlObi1JdNko5pqRdCpwLTAuIt4pUWxZaeqaDwAGAwskrSFpS61s4x3Gxfyeq4HKiHg3Il4GnidJDG1VMdd8CTAXICKeBLqQDM7WXhX1/7052mMieAoYIKm/pM4kncGVtcpUAn+XLk8AfhNpL0wb1eQ1SzoG+CFJEmjr7cbQxDVHxJsR0SMi+kVEP5J+kXER0ZbnOS3mb/sXJLUBJPUgaSp6qZRBtrJirnktMBpA0kCSRLC5pFGWViVwYXr30AjgzYjY2JIDtrumoYjYJWkS8AjJHQe3R8QqSVOBJRFRCfyYpPq4mqRT5tzyRdxyRV7zNKAbcHfaL742IsaVLegWKvKa25Uir/kR4DRJVcB7wNUR0WZru0Ve85XAjyT9I0nH8UVt+YudpNkkybxH2u9xPdAJICJuJekHGQusBrYDf9/ic7bhfy8zM2sF7bFpyMzMmsGJwMws55wIzMxyzonAzCznnAjMzHLOicD2SZLek7Ss4NWvkbLbWuF8MyW9nJ7r6fQJ1eYe4zZJg9Llf66174mWxpgep+bfZaWkX0o6qInyw9r6aJyWPd8+avskSdsioltrl23kGDOBByPiHkmnAdMjYmgLjtfimJo6rqSfAs9HxL82Uv4iklFXJ7V2LNZ+uEZgbYKkbuk8Ck9LWiGpzkijkg6T9FjBN+aT0u2nSXoyfe/dkpr6gH4MOCJ97xXpsVZK+mq6bX9Jv5L0+3T7Oen2BZIqJH0H6JrGMSvdty39eZekTxfEPFPSBEkdJE2T9FQ6xvxlRfyzPEk62JikE9JrfEbSE5KOSp/EnQqck8ZyThr77ZIWp2XrG7HV8qbcY2/75Vd9L5KnYpelr/tJnoLvnu7rQfJUZU2Ndlv680rg2nS5A8l4Qz1IPtj3T7f/P+C6es43E5iQLp8NLAKOA1YA+5M8lb0KOAY4C/hRwXsPTH8uIJ3zoCamgjI1MZ4J/DRd7kwyimRX4FLga+n2/YAlQP964txWcH13A2PS9e5Ax3T5VODedPki4AcF7/8W8Nl0+SCSsYj2L/fv26/yvtrdEBPWbvw5IobVrEjqBHxL0snA+yTfhP8XsKngPU8Bt6dlfxERyyR9nGSyksfToTU6k3yTrs80SV8jGafmEpLxa+6PiD+lMdwHnAQ8DNwg6bskzUm/bcZ1zQO+L2k/YAzwWET8OW2OGippQlruQJLB4l6u9f6ukpal1/8s8F8F5X8qaQDJMAudGjj/acA4SVel612AvumxLKecCKytuADoCRwXEe8qGVG0S2GBiHgsTRSfBmZKuhF4A/iviDiviHNcHRH31KxIGl1foYh4XslcB2OBb0r674iYWsxFRMQOSQuA04FzSCZagWS2qS9FxCNNHOLPETFM0gdJxt+5HLiJZAKeRyPizLRjfUED7xdwVkQ8V0y8lg/uI7C24kDgtTQJjALqzLmsZB7mVyPiR8BtJNP9LQROlFTT5r+/pCOLPOdvgc9I+qCk/UmadX4rqRewPSJ+TjKYX31zxr6b1kzqM4dkoLCa2gUkH+pfqHmPpCPTc9Yrktnmvgxcqb8MpV4zFPFFBUXfJmkiq/EI8CWl1SMlo9JazjkRWFsxC6iQtAK4EPhDPWVOAX4v6RmSb9vfj4jNJB+MsyUtJ2kW+utiThgRT5P0HSwm6TO4LSKeAYYAi9MmmuuBb9bz9hnA8prO4lrmk0wM9OtIpln4nEoAAABiSURBVF+EJHFVAU8rmbT8hzRRY09jWU4yMcv3gG+n1174vkeBQTWdxSQ1h05pbKvSdcs53z5qZpZzrhGYmeWcE4GZWc45EZiZ5ZwTgZlZzjkRmJnlnBOBmVnOORGYmeXc/wfeaHFTq7yydwAAAABJRU5ErkJggg==\n",
173 | "text/plain": [
174 | ""
175 | ]
176 | },
177 | "metadata": {
178 | "tags": [],
179 | "needs_background": "light"
180 | }
181 | }
182 | ]
183 | },
184 | {
185 | "cell_type": "code",
186 | "metadata": {
187 | "id": "dbvF6TW3mYqc"
188 | },
189 | "source": [
190 | "threshold = []\n",
191 | "accuracy = []\n",
192 | "\n",
193 | "for p in np.unique(model.predict_proba(X_train)[:,1]):\n",
194 | " threshold.append(p)\n",
195 | " y_pred = (model.predict_proba(X_train)[:,1] >= p).astype(int)\n",
196 | " accuracy.append(balanced_accuracy_score(y_train,y_pred))"
197 | ],
198 | "execution_count": null,
199 | "outputs": []
200 | },
201 | {
202 | "cell_type": "code",
203 | "metadata": {
204 | "colab": {
205 | "base_uri": "https://localhost:8080/",
206 | "height": 282
207 | },
208 | "id": "5OsZe4iTnHA-",
209 | "outputId": "aa0e241f-e636-427e-c71d-0276219546ed"
210 | },
211 | "source": [
212 | "plt.scatter(threshold,accuracy)\n",
213 | "plt.xlabel(\"Threshold\")\n",
214 | "plt.ylabel(\"Balanced accuracy\")\n",
215 | "plt.show()"
216 | ],
217 | "execution_count": null,
218 | "outputs": [
219 | {
220 | "output_type": "display_data",
221 | "data": {
222 | "image/png": "\n",
223 | "text/plain": [
224 | ""
225 | ]
226 | },
227 | "metadata": {
228 | "tags": [],
229 | "needs_background": "light"
230 | }
231 | }
232 | ]
233 | },
234 | {
235 | "cell_type": "code",
236 | "metadata": {
237 | "colab": {
238 | "base_uri": "https://localhost:8080/"
239 | },
240 | "id": "jZGGUpLJzGDn",
241 | "outputId": "ac1265d2-4d28-455d-9087-a2d66c740cc4"
242 | },
243 | "source": [
244 | "threshold[np.argmax(accuracy)]"
245 | ],
246 | "execution_count": null,
247 | "outputs": [
248 | {
249 | "output_type": "execute_result",
250 | "data": {
251 | "text/plain": [
252 | "0.5602892029098923"
253 | ]
254 | },
255 | "metadata": {
256 | "tags": []
257 | },
258 | "execution_count": 12
259 | }
260 | ]
261 | },
262 | {
263 | "cell_type": "code",
264 | "metadata": {
265 | "id": "srGZ0zfSnOfF"
266 | },
267 | "source": [
268 | "fpr, tpr, thresholds = roc_curve(y_train,model.predict_proba(X_train)[:,1],drop_intermediate=False)"
269 | ],
270 | "execution_count": null,
271 | "outputs": []
272 | },
273 | {
274 | "cell_type": "code",
275 | "metadata": {
276 | "colab": {
277 | "base_uri": "https://localhost:8080/"
278 | },
279 | "id": "jd1zYR6KnzYf",
280 | "outputId": "fd562b47-f8a3-4fbd-e506-667f71e8142d"
281 | },
282 | "source": [
283 | "np.argmin(np.abs(fpr+tpr-1))"
284 | ],
285 | "execution_count": null,
286 | "outputs": [
287 | {
288 | "output_type": "execute_result",
289 | "data": {
290 | "text/plain": [
291 | "154"
292 | ]
293 | },
294 | "metadata": {
295 | "tags": []
296 | },
297 | "execution_count": 14
298 | }
299 | ]
300 | },
301 | {
302 | "cell_type": "code",
303 | "metadata": {
304 | "colab": {
305 | "base_uri": "https://localhost:8080/",
306 | "height": 279
307 | },
308 | "id": "A8_BOOpDyPQu",
309 | "outputId": "c9505de5-9ea0-4a95-fcab-636a3db811f2"
310 | },
311 | "source": [
312 | "plt.scatter(thresholds,np.abs(fpr+tpr-1))\n",
313 | "plt.xlabel(\"Threshold\")\n",
314 | "plt.ylabel(\"|FPR + TPR - 1|\")\n",
315 | "plt.show()"
316 | ],
317 | "execution_count": null,
318 | "outputs": [
319 | {
320 | "output_type": "display_data",
321 | "data": {
322 | "image/png": "\n",
323 | "text/plain": [
324 | ""
325 | ]
326 | },
327 | "metadata": {
328 | "tags": [],
329 | "needs_background": "light"
330 | }
331 | }
332 | ]
333 | },
334 | {
335 | "cell_type": "code",
336 | "metadata": {
337 | "colab": {
338 | "base_uri": "https://localhost:8080/"
339 | },
340 | "id": "Av9NUfKsy5HQ",
341 | "outputId": "0a0b4fdb-4fdc-483a-ce1b-d1f82d8efcbe"
342 | },
343 | "source": [
344 | "thresholds[np.argmin(np.abs(fpr+tpr-1))]"
345 | ],
346 | "execution_count": null,
347 | "outputs": [
348 | {
349 | "output_type": "execute_result",
350 | "data": {
351 | "text/plain": [
352 | "0.44625685602433796"
353 | ]
354 | },
355 | "metadata": {
356 | "tags": []
357 | },
358 | "execution_count": 16
359 | }
360 | ]
361 | }
362 | ]
363 | }
--------------------------------------------------------------------------------