├── 02.1_neuralprophet_bike_share_trend_seasonality.ipynb ├── 02.2_neuralprophet_bike_share_AR_lagged.ipynb ├── 02.HBOS.ipynb ├── 03. ECOD.ipynb ├── 04.iForest.ipynb ├── 05. PCA.ipynb ├── 05_1_ARIMA_avocado.ipynb ├── 06. OCSVM.ipynb ├── 06.1_tree_based_time_series_forecasting.ipynb ├── 06.2_multi_step_forecasts.ipynb ├── 07. GMM.ipynb ├── 08. KNN.ipynb ├── 09. LOF.ipynb ├── 10. CBLOF.ipynb ├── 10.0_Linear Regression for Multi-period Time Series Forecasts with Uncertainty.ipynb ├── 11. XGBOD.ipynb ├── 12. Autoencoders.ipynb ├── 12.0_deepAR_gluonTS.ipynb ├── 12_1_gluonTS_stocks.ipynb ├── 14.0_Tree-based Time Series Probabilistic forecasting.ipynb ├── 16.0_TFT_Ecuador_sales..ipynb ├── 17_0_lag_llama_walmart.ipynb ├── 20220804_Transfer_learning_for_Image_Classification.ipynb ├── A data scientist toolkit.ipynb ├── A wide variety of models for multi-class classification.ipynb ├── Algo_trading_01_TA.ipynb ├── Algo_trading_02_backtesting.ipynb ├── AntPlot.py ├── Change point detection.ipynb ├── Deploy your machine learning model using streamlit.ipynb ├── Dickens_A_Tale_of_Two_Cities.txt ├── Explain Your Model with Microsoft's InterpretML-Github.ipynb ├── Explain any models with the SHAP values - the KernelExplainer for article.ipynb ├── Explain your model with LIME for article.ipynb ├── Explain your model with the SHAP values for article.ipynb ├── From Quantile Regression to Quantile Random Forests.ipynb ├── From logistic to deep learning.ipynb ├── From regression to RNN.ipynb ├── HNSW.ipynb ├── Histogram-based Outlier Score.ipynb ├── Kalman Filter.ipynb ├── Keras MNIST.ipynb ├── LSH.ipynb ├── Luminaire.ipynb ├── NotCoolYet.py ├── Plot with Plotly for article.ipynb ├── PyOD Tutorial - autoencoder.ipynb ├── RLHF_for_algorithmic_trading.ipynb ├── ROUGE.ipynb ├── Revisiting the CM the ROC and the PR.ipynb ├── S3_sample.ipynb ├── TA in R for article.md ├── Taylor_rule.py ├── The SHAP Values with H2o Models.ipynb ├── The SHAP values with More Charts for article.ipynb ├── The Taylor rule.ipynb ├── The_VGG_16_With_Tensorflow_in_Google_Colab.ipynb ├── The_VGG_16_with_Google_Co_lab.ipynb ├── Time Series Anomaly Detection with Prophet for article.ipynb ├── Undersampling and oversampling.ipynb ├── Use seaborn to do beautiful plots easy-git.ipynb ├── VGG-16.ipynb ├── VeryCool.py ├── Waterfall plot for the SHAP of any models class.ipynb ├── cookie_OOP.ipynb ├── deploy_model.py ├── imagenet_classes.txt ├── my_function.py ├── pic └── tesla.png ├── sample.mp3 ├── stock.py ├── stock2.py ├── streamlit_model_performance.py ├── time_series_08_monte_carlo_simulation.ipynb ├── use_NotCoolYet.py ├── use_VeryCool.py ├── voice-change-Ivy.mp3 ├── voice-change-Joanna.mp3 └── wavenet_github.py /11. XGBOD.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "f12ec631-0d57-4a6d-a0ec-7940abac6968", 6 | "metadata": {}, 7 | "source": [ 8 | "## 11. XGBoost Outlier Detection (XGBOD)" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "9021294f-5f48-4f53-8b36-773852531a1f", 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "data": { 19 | "image/png": "\n", 20 | "text/plain": [ 21 | "
" 22 | ] 23 | }, 24 | "metadata": { 25 | "needs_background": "light" 26 | }, 27 | "output_type": "display_data" 28 | } 29 | ], 30 | "source": [ 31 | "import numpy as np\n", 32 | "import pandas as pd\n", 33 | "import matplotlib.pyplot as plt\n", 34 | "from pyod.utils.data import generate_data\n", 35 | "contamination = 0.05 # percentage of outliers\n", 36 | "n_train = 500 # number of training points\n", 37 | "n_test = 500 # number of testing points\n", 38 | "n_features = 6 # number of features\n", 39 | "X_train, X_test, y_train, y_test = generate_data(\n", 40 | " n_train=n_train, \n", 41 | " n_test=n_test, \n", 42 | " n_features= n_features, \n", 43 | " contamination=contamination, \n", 44 | " random_state=123)\n", 45 | "\n", 46 | "# Make the 2d numpy array a pandas dataframe for each manipulation \n", 47 | "X_train_pd = pd.DataFrame(X_train)\n", 48 | " \n", 49 | "# Plot\n", 50 | "plt.scatter(X_train_pd[0], X_train_pd[1], c=y_train, alpha=0.8)\n", 51 | "plt.title('Scatter plot')\n", 52 | "plt.xlabel('x0')\n", 53 | "plt.ylabel('x1')\n", 54 | "plt.show()" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 7, 60 | "id": "4da737b7-44ee-4c90-a8f6-d954595ffe2b", 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "name": "stderr", 65 | "output_type": "stream", 66 | "text": [ 67 | "/Users/chriskuo/opt/anaconda3/envs/AD/lib/python3.9/site-packages/pyod/models/base.py:410: UserWarning: y should not be presented in unsupervised learning.\n", 68 | " warnings.warn(\n", 69 | "/Users/chriskuo/opt/anaconda3/envs/AD/lib/python3.9/site-packages/xgboost/sklearn.py:1224: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].\n", 70 | " warnings.warn(label_encoder_deprecation_msg, UserWarning)\n" 71 | ] 72 | }, 73 | { 74 | "name": "stdout", 75 | "output_type": "stream", 76 | "text": [ 77 | "[17:36:58] WARNING: /Users/runner/miniforge3/conda-bld/xgboost-split_1645117948562/work/src/learner.cc:576: \n", 78 | "Parameters: { \"n_components\", \"silent\" } might not be used.\n", 79 | "\n", 80 | " This could be a false alarm, with some parameters getting used by language bindings but\n", 81 | " then being mistakenly passed down to XGBoost core, or some parameter actually being used\n", 82 | " but getting flagged wrongly here. Please open an issue if you find any such cases.\n", 83 | "\n", 84 | "\n", 85 | "[17:36:58] WARNING: /Users/runner/miniforge3/conda-bld/xgboost-split_1645117948562/work/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n", 86 | "The training data: {0.0: 475, 1.0: 25}\n", 87 | "The training data: {0.0: 475, 1.0: 25}\n" 88 | ] 89 | } 90 | ], 91 | "source": [ 92 | "from pyod.models.xgbod import XGBOD\n", 93 | "xgbod = XGBOD(n_components=4,random_state=100) \n", 94 | "xgbod.fit(X_train,y_train)\n", 95 | "\n", 96 | "# get the prediction labels and outlier scores of the training data\n", 97 | "y_train_pred = xgbod.labels_ # binary labels (0: inliers, 1: outliers)\n", 98 | "y_train_scores = xgbod.decision_scores_ # raw outlier scores\n", 99 | "y_train_scores = xgbod.decision_function(X_train)\n", 100 | "# get the prediction on the test data\n", 101 | "y_test_pred = xgbod.predict(X_test) # outlier labels (0 or 1)\n", 102 | "y_test_scores = xgbod.decision_function(X_test) # outlier scores\n", 103 | "\n", 104 | "def count_stat(vector):\n", 105 | " # Because it is '0' and '1', we can run a count statistic. \n", 106 | " unique, counts = np.unique(vector, return_counts=True)\n", 107 | " return dict(zip(unique, counts))\n", 108 | "\n", 109 | "print(\"The training data:\", count_stat(y_train_pred))\n", 110 | "print(\"The training data:\", count_stat(y_test_pred))" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 8, 116 | "id": "20c204ec-2e3d-4497-be6f-a45cedfeb490", 117 | "metadata": {}, 118 | "outputs": [ 119 | { 120 | "data": { 121 | "text/html": [ 122 | "
\n", 123 | "\n", 136 | "\n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | "
Pred0.01.0
Actual
0.04750
1.0025
\n", 162 | "
" 163 | ], 164 | "text/plain": [ 165 | "Pred 0.0 1.0\n", 166 | "Actual \n", 167 | "0.0 475 0\n", 168 | "1.0 0 25" 169 | ] 170 | }, 171 | "execution_count": 8, 172 | "metadata": {}, 173 | "output_type": "execute_result" 174 | } 175 | ], 176 | "source": [ 177 | "Actual_pred = pd.DataFrame({'Actual': y_test, 'Pred': y_test_pred})\n", 178 | "pd.crosstab(Actual_pred['Actual'],Actual_pred['Pred'])" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 9, 184 | "id": "ba21f02c-d927-4d4d-8351-d2ebea70b6fb", 185 | "metadata": {}, 186 | "outputs": [ 187 | { 188 | "data": { 189 | "text/plain": [ 190 | "{'base_score': 0.5,\n", 191 | " 'booster': 'gbtree',\n", 192 | " 'colsample_bylevel': 1,\n", 193 | " 'colsample_bytree': 1,\n", 194 | " 'estimator_list': [KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',\n", 195 | " metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=1, p=2,\n", 196 | " radius=1.0),\n", 197 | " LOF(algorithm='auto', contamination=0.1, leaf_size=30, metric='minkowski',\n", 198 | " metric_params=None, n_jobs=1, n_neighbors=1, novelty=True, p=2),\n", 199 | " KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',\n", 200 | " metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=3, p=2,\n", 201 | " radius=1.0),\n", 202 | " LOF(algorithm='auto', contamination=0.1, leaf_size=30, metric='minkowski',\n", 203 | " metric_params=None, n_jobs=1, n_neighbors=3, novelty=True, p=2),\n", 204 | " KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',\n", 205 | " metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=5, p=2,\n", 206 | " radius=1.0),\n", 207 | " LOF(algorithm='auto', contamination=0.1, leaf_size=30, metric='minkowski',\n", 208 | " metric_params=None, n_jobs=1, n_neighbors=5, novelty=True, p=2),\n", 209 | " KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',\n", 210 | " metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=10, p=2,\n", 211 | " radius=1.0),\n", 212 | " LOF(algorithm='auto', contamination=0.1, leaf_size=30, metric='minkowski',\n", 213 | " metric_params=None, n_jobs=1, n_neighbors=10, novelty=True, p=2),\n", 214 | " KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',\n", 215 | " metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=20, p=2,\n", 216 | " radius=1.0),\n", 217 | " LOF(algorithm='auto', contamination=0.1, leaf_size=30, metric='minkowski',\n", 218 | " metric_params=None, n_jobs=1, n_neighbors=20, novelty=True, p=2),\n", 219 | " KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',\n", 220 | " metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=30, p=2,\n", 221 | " radius=1.0),\n", 222 | " LOF(algorithm='auto', contamination=0.1, leaf_size=30, metric='minkowski',\n", 223 | " metric_params=None, n_jobs=1, n_neighbors=30, novelty=True, p=2),\n", 224 | " KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',\n", 225 | " metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=40, p=2,\n", 226 | " radius=1.0),\n", 227 | " LOF(algorithm='auto', contamination=0.1, leaf_size=30, metric='minkowski',\n", 228 | " metric_params=None, n_jobs=1, n_neighbors=40, novelty=True, p=2),\n", 229 | " KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',\n", 230 | " metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=50, p=2,\n", 231 | " radius=1.0),\n", 232 | " LOF(algorithm='auto', contamination=0.1, leaf_size=30, metric='minkowski',\n", 233 | " metric_params=None, n_jobs=1, n_neighbors=50, novelty=True, p=2),\n", 234 | " HBOS(alpha=0.1, contamination=0.1, n_bins=5, tol=0.5),\n", 235 | " HBOS(alpha=0.1, contamination=0.1, n_bins=10, tol=0.5),\n", 236 | " HBOS(alpha=0.1, contamination=0.1, n_bins=15, tol=0.5),\n", 237 | " HBOS(alpha=0.1, contamination=0.1, n_bins=20, tol=0.5),\n", 238 | " HBOS(alpha=0.1, contamination=0.1, n_bins=25, tol=0.5),\n", 239 | " HBOS(alpha=0.1, contamination=0.1, n_bins=30, tol=0.5),\n", 240 | " HBOS(alpha=0.1, contamination=0.1, n_bins=50, tol=0.5),\n", 241 | " OCSVM(cache_size=200, coef0=0.0, contamination=0.1, degree=3, gamma='auto',\n", 242 | " kernel='rbf', max_iter=-1, nu=0.01, shrinking=True, tol=0.001,\n", 243 | " verbose=False),\n", 244 | " OCSVM(cache_size=200, coef0=0.0, contamination=0.1, degree=3, gamma='auto',\n", 245 | " kernel='rbf', max_iter=-1, nu=0.1, shrinking=True, tol=0.001,\n", 246 | " verbose=False),\n", 247 | " OCSVM(cache_size=200, coef0=0.0, contamination=0.1, degree=3, gamma='auto',\n", 248 | " kernel='rbf', max_iter=-1, nu=0.2, shrinking=True, tol=0.001,\n", 249 | " verbose=False),\n", 250 | " OCSVM(cache_size=200, coef0=0.0, contamination=0.1, degree=3, gamma='auto',\n", 251 | " kernel='rbf', max_iter=-1, nu=0.3, shrinking=True, tol=0.001,\n", 252 | " verbose=False),\n", 253 | " OCSVM(cache_size=200, coef0=0.0, contamination=0.1, degree=3, gamma='auto',\n", 254 | " kernel='rbf', max_iter=-1, nu=0.4, shrinking=True, tol=0.001,\n", 255 | " verbose=False),\n", 256 | " OCSVM(cache_size=200, coef0=0.0, contamination=0.1, degree=3, gamma='auto',\n", 257 | " kernel='rbf', max_iter=-1, nu=0.5, shrinking=True, tol=0.001,\n", 258 | " verbose=False),\n", 259 | " OCSVM(cache_size=200, coef0=0.0, contamination=0.1, degree=3, gamma='auto',\n", 260 | " kernel='rbf', max_iter=-1, nu=0.6, shrinking=True, tol=0.001,\n", 261 | " verbose=False),\n", 262 | " OCSVM(cache_size=200, coef0=0.0, contamination=0.1, degree=3, gamma='auto',\n", 263 | " kernel='rbf', max_iter=-1, nu=0.7, shrinking=True, tol=0.001,\n", 264 | " verbose=False),\n", 265 | " OCSVM(cache_size=200, coef0=0.0, contamination=0.1, degree=3, gamma='auto',\n", 266 | " kernel='rbf', max_iter=-1, nu=0.8, shrinking=True, tol=0.001,\n", 267 | " verbose=False),\n", 268 | " OCSVM(cache_size=200, coef0=0.0, contamination=0.1, degree=3, gamma='auto',\n", 269 | " kernel='rbf', max_iter=-1, nu=0.9, shrinking=True, tol=0.001,\n", 270 | " verbose=False),\n", 271 | " OCSVM(cache_size=200, coef0=0.0, contamination=0.1, degree=3, gamma='auto',\n", 272 | " kernel='rbf', max_iter=-1, nu=0.99, shrinking=True, tol=0.001,\n", 273 | " verbose=False),\n", 274 | " IForest(behaviour='old', bootstrap=False, contamination=0.1, max_features=1.0,\n", 275 | " max_samples='auto', n_estimators=10, n_jobs=1, random_state=100,\n", 276 | " verbose=0),\n", 277 | " IForest(behaviour='old', bootstrap=False, contamination=0.1, max_features=1.0,\n", 278 | " max_samples='auto', n_estimators=20, n_jobs=1, random_state=100,\n", 279 | " verbose=0),\n", 280 | " IForest(behaviour='old', bootstrap=False, contamination=0.1, max_features=1.0,\n", 281 | " max_samples='auto', n_estimators=50, n_jobs=1, random_state=100,\n", 282 | " verbose=0),\n", 283 | " IForest(behaviour='old', bootstrap=False, contamination=0.1, max_features=1.0,\n", 284 | " max_samples='auto', n_estimators=70, n_jobs=1, random_state=100,\n", 285 | " verbose=0),\n", 286 | " IForest(behaviour='old', bootstrap=False, contamination=0.1, max_features=1.0,\n", 287 | " max_samples='auto', n_estimators=100, n_jobs=1, random_state=100,\n", 288 | " verbose=0),\n", 289 | " IForest(behaviour='old', bootstrap=False, contamination=0.1, max_features=1.0,\n", 290 | " max_samples='auto', n_estimators=150, n_jobs=1, random_state=100,\n", 291 | " verbose=0),\n", 292 | " IForest(behaviour='old', bootstrap=False, contamination=0.1, max_features=1.0,\n", 293 | " max_samples='auto', n_estimators=200, n_jobs=1, random_state=100,\n", 294 | " verbose=0)],\n", 295 | " 'gamma': 0,\n", 296 | " 'learning_rate': 0.1,\n", 297 | " 'max_delta_step': 0,\n", 298 | " 'max_depth': 3,\n", 299 | " 'min_child_weight': 1,\n", 300 | " 'n_estimators': 100,\n", 301 | " 'n_jobs': 1,\n", 302 | " 'nthread': None,\n", 303 | " 'objective': 'binary:logistic',\n", 304 | " 'random_state': 100,\n", 305 | " 'reg_alpha': 0,\n", 306 | " 'reg_lambda': 1,\n", 307 | " 'scale_pos_weight': 1,\n", 308 | " 'silent': True,\n", 309 | " 'standardization_flag_list': [True,\n", 310 | " True,\n", 311 | " True,\n", 312 | " True,\n", 313 | " True,\n", 314 | " True,\n", 315 | " True,\n", 316 | " True,\n", 317 | " True,\n", 318 | " True,\n", 319 | " True,\n", 320 | " True,\n", 321 | " True,\n", 322 | " True,\n", 323 | " True,\n", 324 | " True,\n", 325 | " False,\n", 326 | " False,\n", 327 | " False,\n", 328 | " False,\n", 329 | " False,\n", 330 | " False,\n", 331 | " False,\n", 332 | " True,\n", 333 | " True,\n", 334 | " True,\n", 335 | " True,\n", 336 | " True,\n", 337 | " True,\n", 338 | " True,\n", 339 | " True,\n", 340 | " True,\n", 341 | " True,\n", 342 | " True,\n", 343 | " False,\n", 344 | " False,\n", 345 | " False,\n", 346 | " False,\n", 347 | " False,\n", 348 | " False,\n", 349 | " False],\n", 350 | " 'subsample': 1}" 351 | ] 352 | }, 353 | "execution_count": 9, 354 | "metadata": {}, 355 | "output_type": "execute_result" 356 | } 357 | ], 358 | "source": [ 359 | "xgbod.get_params()" 360 | ] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "execution_count": 10, 365 | "id": "bf53d481-1bac-4dac-b4ff-29c6486bbf7d", 366 | "metadata": {}, 367 | "outputs": [ 368 | { 369 | "data": { 370 | "text/html": [ 371 | "
\n", 372 | "\n", 385 | "\n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | "
GroupCountCount %012345pred
0Normal47595.02.0030672.0113022.0063991.9914092.0083671.9848870.0
1Outlier255.00.453548-0.209294-0.474818-0.231157-0.034642-0.0559471.0
\n", 430 | "
" 431 | ], 432 | "text/plain": [ 433 | " Group Count Count % 0 1 2 3 4 \\\n", 434 | "0 Normal 475 95.0 2.003067 2.011302 2.006399 1.991409 2.008367 \n", 435 | "1 Outlier 25 5.0 0.453548 -0.209294 -0.474818 -0.231157 -0.034642 \n", 436 | "\n", 437 | " 5 pred \n", 438 | "0 1.984887 0.0 \n", 439 | "1 -0.055947 1.0 " 440 | ] 441 | }, 442 | "execution_count": 10, 443 | "metadata": {}, 444 | "output_type": "execute_result" 445 | } 446 | ], 447 | "source": [ 448 | "# Let's see how many '0's and '1's.\n", 449 | "df_train = pd.DataFrame(X_train)\n", 450 | "df_columns = df_train.columns\n", 451 | "df_train['pred'] = y_train_pred\n", 452 | "df_train['Group'] = np.where(df_train['pred']==1, 'Outlier','Normal')\n", 453 | "\n", 454 | "# Now let's show the summary statistics:\n", 455 | "cnt = df_train.groupby('Group')['pred'].count().reset_index().rename(columns={'pred':'Count'})\n", 456 | "cnt['Count %'] = (cnt['Count'] / cnt['Count'].sum()) * 100 # The count and count %\n", 457 | "stat = df_train.groupby('Group').mean().reset_index() # The avg.\n", 458 | "cnt.merge(stat, left_on='Group',right_on='Group') # Put the count and the avg. together" 459 | ] 460 | }, 461 | { 462 | "cell_type": "code", 463 | "execution_count": null, 464 | "id": "2915de00-e5ad-4940-90eb-04a5b6b8d31c", 465 | "metadata": {}, 466 | "outputs": [], 467 | "source": [] 468 | } 469 | ], 470 | "metadata": { 471 | "kernelspec": { 472 | "display_name": "Python 3 (ipykernel)", 473 | "language": "python", 474 | "name": "python3" 475 | }, 476 | "language_info": { 477 | "codemirror_mode": { 478 | "name": "ipython", 479 | "version": 3 480 | }, 481 | "file_extension": ".py", 482 | "mimetype": "text/x-python", 483 | "name": "python", 484 | "nbconvert_exporter": "python", 485 | "pygments_lexer": "ipython3", 486 | "version": "3.9.12" 487 | } 488 | }, 489 | "nbformat": 4, 490 | "nbformat_minor": 5 491 | } 492 | -------------------------------------------------------------------------------- /AntPlot.py: -------------------------------------------------------------------------------- 1 | import random 2 | import time 3 | import math 4 | import matplotlib.pyplot as plt 5 | 6 | class Ant: 7 | """A single Ant""" 8 | 9 | def __init__(self, model): 10 | self.model = model 11 | self.x = 0 12 | self.y = 0 13 | self.has_food = 0 14 | 15 | def next_left(self): 16 | """The (x, y) position of the Location the Ant 17 | would move to if it moved forward left. 18 | """ 19 | if not self.has_food: 20 | return (self.x, self.y + 1) 21 | else: 22 | return (self.x, self.y - 1) 23 | 24 | def next_right(self): 25 | """The (x, y) position of the Location the Ant 26 | would move to if it moved forward right. 27 | """ 28 | if not self.has_food: 29 | return (self.x + 1, self.y) 30 | else: 31 | return (self.x - 1, self.y) 32 | 33 | def left_pheromone(self): 34 | """The amount of pheromone in the Location that 35 | the Ant would move into if it moved forward left. 36 | """ 37 | return self.model.get_pheromone(self.next_left()) 38 | 39 | def right_pheromone(self): 40 | """The amount of pheromone in the Location that 41 | the Ant would move into if it moved forward right. 42 | """ 43 | return self.model.get_pheromone(self.next_right()) 44 | 45 | def will_move(self): 46 | """Whether or not this Ant will move this turn.""" 47 | if self.model.at_capacity(self.next_left()) and \ 48 | self.model.at_capacity(self.next_right()): 49 | return False 50 | p_l = self.left_pheromone() 51 | p_r = self.right_pheromone() 52 | prob_move = 0.5 + 0.5*math.tanh((p_l + p_r) / 100.0 - 1) 53 | return random.random() < prob_move 54 | 55 | def will_go_right(self): 56 | """Whether or not this Ant will move forward right 57 | this turn. 58 | """ 59 | p_l = self.left_pheromone() 60 | p_r = self.right_pheromone() 61 | 62 | if self.model.at_capacity(self.next_right()): 63 | return False 64 | 65 | if self.model.at_capacity(self.next_left()): 66 | return True 67 | 68 | prob_right = (1 - (5 + p_l)**2 / 69 | float((5 + p_l)**2 + (5 + p_r)**2)) 70 | 71 | return random.random() < prob_right 72 | 73 | def move(self): 74 | """Moves this Ant.""" 75 | if not self.will_move(): 76 | return 77 | if self.will_go_right(): 78 | (self.x, self.y) = self.next_right() 79 | else: 80 | (self.x, self.y) = self.next_left() 81 | self.lay_pheromone() 82 | pos = (self.x, self.y) 83 | if pos == (0, 0): 84 | self.has_food = False 85 | else: 86 | if self.model.has_food(pos) and not self.has_food: 87 | self.model.remove_food(pos) 88 | self.has_food = True 89 | 90 | def lay_pheromone(self): 91 | """This Ant lays pheromone in its current Location.""" 92 | pos = (self.x, self.y) 93 | current = self.model.get_pheromone(pos) 94 | if not self.has_food: 95 | limit = 1000 96 | amount = 1 97 | else: 98 | limit = 300 99 | amount = 10 100 | if current >= limit: 101 | return 102 | new_amount = min(current + amount, limit) 103 | self.model.set_pheromone(pos, new_amount) 104 | 105 | class Location: 106 | """The grid recording the food and pheromone.""" 107 | 108 | def __init__(self): 109 | self.food = 0 110 | self.pheromone = 0 111 | 112 | def place_food(self, p): 113 | """Place food with probability p into this Location.""" 114 | if random.random() < p: 115 | self.food = 1 116 | 117 | def has_food(self): 118 | """Returns True if this Location has at least 1 food in it, 119 | False otherwise. 120 | """ 121 | return self.food > 0 122 | 123 | def remove_food(self): 124 | """Remove one food from this Location. Crashes if there is 125 | no food in this Location. 126 | """ 127 | assert(self.has_food) 128 | self.food -= 1 129 | 130 | def add_pheromone(self, amount=1): 131 | """Add pheromone to this Location.""" 132 | self.pheromone += amount 133 | 134 | def set_pheromone(self, amount): 135 | """Set the pheromone in this Location to amount.""" 136 | self.pheromone = amount 137 | 138 | def get_pheromone(self): 139 | """Returns the amount of pheromone in this Location.""" 140 | return self.pheromone 141 | 142 | def evaporate_pheromone(self): 143 | """Evaporates 1/30 of the pheromone in this Location.""" 144 | self.pheromone -= self.pheromone * (1.0 / 30) 145 | 146 | 147 | class Model: 148 | """Class that represents the room the robot ants live in """ 149 | 150 | MAX_ANTS = 200 151 | 152 | def __init__(self): 153 | self.ants = {} 154 | self.locations = {} 155 | self.p_food = 0 156 | 157 | def place_food(self, p): 158 | """Place food in all Locations with probability p.""" 159 | self.p_food = p 160 | for point in self.locations: 161 | point.place_food(p) 162 | 163 | def remove_food(self, pos): 164 | """Remove one unit of food from the Location at pos.""" 165 | self.locations[pos].remove_food(); 166 | 167 | def has_food(self, pos): 168 | """Returns true if the Location at pos has at least one unit 169 | of food, false otherwise. 170 | """ 171 | return self.get_location(pos).has_food(); 172 | 173 | def add_ants(self, n): 174 | """Add n ants to the nest. Each ant starts at (0,0)""" 175 | for i in range(n): 176 | ant = Ant(self) 177 | pos = (ant.x, ant.y) 178 | if pos in self.ants: 179 | self.ants[pos].append(ant) 180 | else: 181 | self.ants[pos] = [ant] 182 | 183 | def __repr__(self): 184 | """Return a string representation of this room.""" 185 | return str(self.ants) 186 | 187 | def move_ants(self): 188 | """Iterate through and move all the Ants in the room.""" 189 | ants = [] 190 | for pos, antlist in self.ants.items(): 191 | for ant in antlist: 192 | ant.move() 193 | ants.append(ant) 194 | self.evaporate_pheromone() 195 | d = {} 196 | for ant in ants: 197 | pos = (ant.x, ant.y) 198 | if pos in d: 199 | d[pos].append(ant) 200 | else: 201 | d[pos] = [ant] 202 | self.ants = d 203 | 204 | def get_location(self, pos): 205 | """Returns the Location at pos, creating it if it doesn't 206 | already exist. 207 | """ 208 | if pos not in self.locations: 209 | loc = Location() 210 | self.locations[pos] = loc 211 | if self.p_food > 0: 212 | loc.place_food(self.p_food) 213 | else: 214 | loc = self.locations[pos] 215 | return loc 216 | 217 | def add_pheromone(self, pos, amount=1): 218 | """Adds amount pheromone to the Location at pos.""" 219 | self.get_location(pos).add_pheromone(amount) 220 | 221 | def get_pheromone(self, pos): 222 | """Returns the amount of pheromone in the Location at pos.""" 223 | return self.get_location(pos).get_pheromone(); 224 | 225 | def set_pheromone(self, pos, amount): 226 | """Sets the amount of pheromone in the Location at pos to 227 | amount. 228 | """ 229 | self.get_location(pos).set_pheromone(amount) 230 | 231 | def evaporate_pheromone(self): 232 | """Evaporates pheromone from all existing Locations.""" 233 | for pos, loc in self.locations.items(): 234 | loc.evaporate_pheromone() 235 | 236 | def num_ants(self, pos): 237 | """Returns the number of Ants at pos.""" 238 | if pos in self.ants: 239 | return len(self.ants[pos]) 240 | else: return 0 241 | 242 | def at_capacity(self, pos): 243 | """Returns True if the Location at pos is full with Ants, 244 | False otherwise. 245 | """ 246 | return self.num_ants(pos) >= Model.MAX_ANTS 247 | 248 | if __name__ == "__main__": 249 | model = Model() 250 | model.place_food(0.5) 251 | timesteps = 600 252 | for i in range(timesteps): 253 | model.add_ants(4) 254 | model.move_ants() 255 | positions = model.ants 256 | 257 | xdata = [] 258 | ydata = [] 259 | plt.show() 260 | axes = plt.gca() 261 | axes.set_xlim(0, 30) 262 | axes.set_ylim(0, 30) 263 | line, = axes.plot(xdata, ydata, 'gx') # A green mark for an ant 264 | 265 | 266 | for pos in positions: 267 | x, y = pos 268 | xdata.append(x) 269 | ydata.append(y) 270 | line.set_xdata(xdata) 271 | line.set_ydata(ydata) 272 | plt.draw() 273 | plt.pause(1e-17) 274 | time.sleep(0.005) 275 | plt.show() 276 | -------------------------------------------------------------------------------- /Deploy your machine learning model using streamlit.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "afb5efdd-fbde-436d-a8e6-be1c2b00321f", 6 | "metadata": {}, 7 | "source": [ 8 | "## Deploy your Machine Learning Model using Streamlit" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 28, 14 | "id": "b3f910ba-69fc-4ac7-b8bb-b17368835f5d", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "variables = ['APARTMENTS_AVG','LIVINGAPARTMENTS_AVG','OCCUPATION_TYPE']" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 29, 24 | "id": "e5532721-263e-4fd1-aa4b-c87740d4bf71", 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "import numpy as np\n", 29 | "import pandas as pd\n", 30 | "from sklearn.model_selection import train_test_split\n", 31 | "path = ''\n", 32 | "df = pd.read_csv(path + '/application_train.csv')\n", 33 | "df = df.sample(frac=0.1) # Take some records just to build a toy model\n", 34 | "X_train, X_test, Y_train, Y_test = train_test_split(df[variables],df['TARGET'],random_state=0)" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "id": "81fe2b64-2558-42bd-ab37-273668fa45e4", 40 | "metadata": {}, 41 | "source": [ 42 | "### Data pre-processing: fillna for numeric variables" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 30, 48 | "id": "827fd584-c958-4f8a-a452-307354b459f4", 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "# Impute with the mean of the training data\n", 53 | "# Keep the same mean to impute the test data or any future data\n", 54 | "APARTMENTS_AVG_MEAN = X_train['APARTMENTS_AVG'].mean()\n", 55 | "APARTMENTS_AVG_MAX = X_train['APARTMENTS_AVG'].mean()\n", 56 | "APARTMENTS_AVG_MIN = X_train['APARTMENTS_AVG'].min()\n", 57 | "LIVINGAPARTMENTS_AVG_MEAN = X_train['LIVINGAPARTMENTS_AVG'].mean()\n", 58 | "LIVINGAPARTMENTS_AVG_MAX = X_train['LIVINGAPARTMENTS_AVG'].max()\n", 59 | "LIVINGAPARTMENTS_AVG_MIN = X_train['LIVINGAPARTMENTS_AVG'].min()" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 31, 65 | "id": "f8545ccc-155b-4da9-b29d-b6a41ffc8424", 66 | "metadata": {}, 67 | "outputs": [ 68 | { 69 | "data": { 70 | "text/plain": [ 71 | "(0.0, 1.0)" 72 | ] 73 | }, 74 | "execution_count": 31, 75 | "metadata": {}, 76 | "output_type": "execute_result" 77 | } 78 | ], 79 | "source": [ 80 | "LIVINGAPARTMENTS_AVG_MIN, LIVINGAPARTMENTS_AVG_MAX" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 32, 86 | "id": "fae750e8-38e9-457c-9ed3-29d6226f574b", 87 | "metadata": {}, 88 | "outputs": [ 89 | { 90 | "data": { 91 | "text/plain": [ 92 | "(0.0, 0.11768214539007016)" 93 | ] 94 | }, 95 | "execution_count": 32, 96 | "metadata": {}, 97 | "output_type": "execute_result" 98 | } 99 | ], 100 | "source": [ 101 | "APARTMENTS_AVG_MIN, APARTMENTS_AVG_MAX" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 33, 107 | "id": "0e8e6621-85e9-4d28-a849-4af9aaa8377e", 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "mean_values = {'APARTMENTS_AVG': APARTMENTS_AVG_MEAN, 'LIVINGAPARTMENTS_AVG': LIVINGAPARTMENTS_AVG_MEAN}\n", 112 | "X_train = X_train.fillna(value=mean_values)\n", 113 | "X_test = X_test.fillna(value=mean_values)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "id": "ff73a035-b14f-4d31-8a2f-493561e67911", 119 | "metadata": {}, 120 | "source": [ 121 | "### Data pre-processing: categorical variables" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 34, 127 | "id": "3715ebe4-be65-49dd-8889-7c21cc5e7cd6", 128 | "metadata": {}, 129 | "outputs": [ 130 | { 131 | "data": { 132 | "text/plain": [ 133 | "Laborers 4117\n", 134 | "Sales staff 2315\n", 135 | "Core staff 2107\n", 136 | "Managers 1577\n", 137 | "Drivers 1420\n", 138 | "High skill tech staff 860\n", 139 | "Accountants 742\n", 140 | "Medicine staff 633\n", 141 | "Security staff 530\n", 142 | "Cooking staff 456\n", 143 | "Cleaning staff 350\n", 144 | "Private service staff 185\n", 145 | "Low-skill Laborers 160\n", 146 | "Secretaries 111\n", 147 | "Waiters/barmen staff 91\n", 148 | "Realty agents 59\n", 149 | "HR staff 41\n", 150 | "IT staff 40\n", 151 | "Name: OCCUPATION_TYPE, dtype: int64" 152 | ] 153 | }, 154 | "execution_count": 34, 155 | "metadata": {}, 156 | "output_type": "execute_result" 157 | } 158 | ], 159 | "source": [ 160 | "X_train['OCCUPATION_TYPE'].value_counts()" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "id": "ce3551cb-318e-406a-83c0-96cb97e289f0", 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 35, 174 | "id": "7f33af60-d4ac-430f-b1d8-ae41736f9700", 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [ 178 | "OCCUPATION_list = ['Laborers','Sales staff','Core staff','Managers','Drivers','High skill tech staff',\n", 179 | "'Accountants','Medicine staff','Security staff','Cooking staff','Cleaning staff']" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 36, 185 | "id": "ed91d9d3-fb38-4d55-bbf2-1c1b008da7a5", 186 | "metadata": {}, 187 | "outputs": [], 188 | "source": [ 189 | "X_train['OCCUPATION_GRP'] = np.where(X_train['OCCUPATION_TYPE'].isin(OCCUPATION_list), X_train['OCCUPATION_TYPE'], 'OTHER')\n", 190 | "X_test['OCCUPATION_GRP'] = np.where(X_test['OCCUPATION_TYPE'].isin(OCCUPATION_list), X_test['OCCUPATION_TYPE'], 'OTHER')\n", 191 | "X_train = X_train.drop('OCCUPATION_TYPE',axis=1)\n", 192 | "X_test = X_test.drop('OCCUPATION_TYPE',axis=1)" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 37, 198 | "id": "b4b4e4c7-50b0-43fa-9ccb-5b907cae3e15", 199 | "metadata": {}, 200 | "outputs": [], 201 | "source": [ 202 | "#X_train['OCCUPATION_GRP'] = X_train['OCCUPATION_GRP'].str.replace(' ','_')\n", 203 | "#X_train['OCCUPATION_GRP'] = X_train['OCCUPATION_GRP'].str.replace('/','_')\n" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 38, 209 | "id": "42dcbda5-cb91-4718-9f9d-eb706574f881", 210 | "metadata": {}, 211 | "outputs": [ 212 | { 213 | "data": { 214 | "text/plain": [ 215 | "OTHER 7956\n", 216 | "Laborers 4117\n", 217 | "Sales staff 2315\n", 218 | "Core staff 2107\n", 219 | "Managers 1577\n", 220 | "Drivers 1420\n", 221 | "High skill tech staff 860\n", 222 | "Accountants 742\n", 223 | "Medicine staff 633\n", 224 | "Security staff 530\n", 225 | "Cooking staff 456\n", 226 | "Cleaning staff 350\n", 227 | "Name: OCCUPATION_GRP, dtype: int64" 228 | ] 229 | }, 230 | "execution_count": 38, 231 | "metadata": {}, 232 | "output_type": "execute_result" 233 | } 234 | ], 235 | "source": [ 236 | "X_train['OCCUPATION_GRP'].value_counts(dropna=False)" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 39, 242 | "id": "1fd5876b-e3e1-4a0b-ae1a-9ce02bb200bf", 243 | "metadata": {}, 244 | "outputs": [], 245 | "source": [ 246 | "# Get_Dummy for One-Hot #\n", 247 | "def getDummy(df,var):\n", 248 | " df[var] = df[var].str.replace(' ','_')\n", 249 | " df[var] = df[var].str.replace('/','_')\n", 250 | " dummies = pd.get_dummies(df[var])\n", 251 | " df2 = pd.concat([df, dummies], axis=1)\n", 252 | " df2 = df2.drop([var], axis=1)\n", 253 | " return(df2)\n", 254 | " \n", 255 | "X_train = getDummy(X_train,'OCCUPATION_GRP')\n", 256 | "X_test = getDummy(X_test,'OCCUPATION_GRP')" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 40, 262 | "id": "0f1d9231-80f5-479f-869c-e4bca54e9162", 263 | "metadata": {}, 264 | "outputs": [ 265 | { 266 | "data": { 267 | "text/plain": [ 268 | "array(['APARTMENTS_AVG', 'Accountants', 'Cleaning_staff', 'Cooking_staff',\n", 269 | " 'Core_staff', 'Drivers', 'High_skill_tech_staff',\n", 270 | " 'LIVINGAPARTMENTS_AVG', 'Laborers', 'Managers', 'Medicine_staff',\n", 271 | " 'OTHER', 'Sales_staff', 'Security_staff'], dtype=object)" 272 | ] 273 | }, 274 | "execution_count": 40, 275 | "metadata": {}, 276 | "output_type": "execute_result" 277 | } 278 | ], 279 | "source": [ 280 | "np.sort(X_train.columns)" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": 41, 286 | "id": "6444f0f2-faaa-429d-85dd-c63b3e975f57", 287 | "metadata": {}, 288 | "outputs": [ 289 | { 290 | "data": { 291 | "text/plain": [ 292 | "DecisionTreeClassifier(max_depth=6, min_samples_leaf=5)" 293 | ] 294 | }, 295 | "execution_count": 41, 296 | "metadata": {}, 297 | "output_type": "execute_result" 298 | } 299 | ], 300 | "source": [ 301 | "from sklearn.tree import DecisionTreeClassifier # for classification\n", 302 | "\n", 303 | "# First, specify the model. \n", 304 | "dtree = DecisionTreeClassifier(min_samples_leaf = 5, max_depth = 6)\n", 305 | "# Then, train the model.\n", 306 | "dtree.fit(X_train,Y_train)" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": 42, 312 | "id": "a241854b-3584-4065-9da1-6d8130658992", 313 | "metadata": {}, 314 | "outputs": [ 315 | { 316 | "data": { 317 | "text/plain": [ 318 | "" 319 | ] 320 | }, 321 | "execution_count": 42, 322 | "metadata": {}, 323 | "output_type": "execute_result" 324 | }, 325 | { 326 | "data": { 327 | "image/png": "\n", 328 | "text/plain": [ 329 | "
" 330 | ] 331 | }, 332 | "metadata": { 333 | "needs_background": "light" 334 | }, 335 | "output_type": "display_data" 336 | } 337 | ], 338 | "source": [ 339 | "importances = pd.DataFrame({'feature': X_train.columns, 'importance': np.round(dtree.feature_importances_,3)})\n", 340 | "importances = importances.sort_values('importance',ascending=False)\n", 341 | "importances.plot.bar(x='feature', figsize=(4,3),fontsize=6)" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": 43, 347 | "id": "52ad2a29-3f2f-4e7b-a0e4-d3458568214a", 348 | "metadata": {}, 349 | "outputs": [ 350 | { 351 | "data": { 352 | "text/plain": [ 353 | "array([0.06263063, 0.06263063, 0.06263063, 0.10874704, 0.06263063])" 354 | ] 355 | }, 356 | "execution_count": 43, 357 | "metadata": {}, 358 | "output_type": "execute_result" 359 | } 360 | ], 361 | "source": [ 362 | "# Predict a few records\n", 363 | "predictions = dtree.predict_proba(X_test[0:5])[:,1]\n", 364 | "predictions" 365 | ] 366 | }, 367 | { 368 | "cell_type": "markdown", 369 | "id": "8bc6318f-bca8-4d37-9a44-3f422b387450", 370 | "metadata": {}, 371 | "source": [ 372 | "## Save the model" 373 | ] 374 | }, 375 | { 376 | "cell_type": "code", 377 | "execution_count": 52, 378 | "id": "7d7ef156-3083-45b7-bdb3-17de6f206f34", 379 | "metadata": {}, 380 | "outputs": [ 381 | { 382 | "data": { 383 | "text/plain": [ 384 | "array([0.06263063])" 385 | ] 386 | }, 387 | "execution_count": 52, 388 | "metadata": {}, 389 | "output_type": "execute_result" 390 | } 391 | ], 392 | "source": [ 393 | "import pickle \n", 394 | "# save the model to disk\n", 395 | "modelname = path + '/toymodel.pkl'\n", 396 | "pickle.dump(dtree, open(modelname, 'wb'))\n", 397 | " \n", 398 | "# load the model from disk\n", 399 | "loaded_model = pickle.load(open(modelname, 'rb'))\n", 400 | "predictions = loaded_model.predict_proba(X_test[0:1])[:,1]\n", 401 | "predictions" 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": 53, 407 | "id": "5997a6e5-1fd1-4bc4-96ca-00a4944db0c0", 408 | "metadata": {}, 409 | "outputs": [ 410 | { 411 | "data": { 412 | "text/html": [ 413 | "
\n", 414 | "\n", 427 | "\n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | "
APARTMENTS_AVGLIVINGAPARTMENTS_AVGAccountantsCleaning_staffCooking_staffCore_staffDriversHigh_skill_tech_staffLaborersManagersMedicine_staffOTHERSales_staffSecurity_staff
1543760.01860.101631000000000100
\n", 467 | "
" 468 | ], 469 | "text/plain": [ 470 | " APARTMENTS_AVG LIVINGAPARTMENTS_AVG Accountants Cleaning_staff \\\n", 471 | "154376 0.0186 0.101631 0 0 \n", 472 | "\n", 473 | " Cooking_staff Core_staff Drivers High_skill_tech_staff Laborers \\\n", 474 | "154376 0 0 0 0 0 \n", 475 | "\n", 476 | " Managers Medicine_staff OTHER Sales_staff Security_staff \n", 477 | "154376 0 0 1 0 0 " 478 | ] 479 | }, 480 | "execution_count": 53, 481 | "metadata": {}, 482 | "output_type": "execute_result" 483 | } 484 | ], 485 | "source": [ 486 | "X_test[0:1]" 487 | ] 488 | } 489 | ], 490 | "metadata": { 491 | "kernelspec": { 492 | "display_name": "Python 3", 493 | "language": "python", 494 | "name": "python3" 495 | }, 496 | "language_info": { 497 | "codemirror_mode": { 498 | "name": "ipython", 499 | "version": 3 500 | }, 501 | "file_extension": ".py", 502 | "mimetype": "text/x-python", 503 | "name": "python", 504 | "nbconvert_exporter": "python", 505 | "pygments_lexer": "ipython3", 506 | "version": "3.8.2" 507 | } 508 | }, 509 | "nbformat": 4, 510 | "nbformat_minor": 5 511 | } 512 | -------------------------------------------------------------------------------- /Dickens_A_Tale_of_Two_Cities.txt: -------------------------------------------------------------------------------- 1 | It was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness, it was the epoch of belief, it was the epoch of incredulity, it was the season of Light, it was the season of Darkness, it was the spring of hope, it was the winter of despair, we had everything before us, we had nothing before us, we were all going direct to Heaven, we were all going direct the other way--in short, the period was so far like the present period that some of its noisiest authorities insisted on its being received, for good or for evil, in the superlative degree of comparison only. -------------------------------------------------------------------------------- /Explain Your Model with Microsoft's InterpretML-Github.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Explain Your Model with Microsoft's InterpretML\n", 8 | "* pip install -U interpret" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 3, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "from interpret.glassbox import ExplainableBoostingClassifier\n", 18 | "from sklearn.model_selection import train_test_split" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 4, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "import pandas as pd\n", 28 | "import numpy as np\n", 29 | "np.random.seed(0)\n", 30 | "import matplotlib.pyplot as plt\n", 31 | "df = pd.read_csv('winequality-red.csv') \n", 32 | "df['quality'] = df['quality'].astype(int)\n", 33 | "\n", 34 | "Y = df['quality']\n", 35 | "X = df[['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',\n", 36 | " 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',\n", 37 | " 'pH', 'sulphates', 'alcohol']]\n", 38 | "X_featurenames = X.columns\n", 39 | "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2)" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "## (A) Explore the data" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 22, 52 | "metadata": { 53 | "scrolled": true 54 | }, 55 | "outputs": [ 56 | { 57 | "data": { 58 | "text/html": [ 59 | "\n", 60 | "" 61 | ] 62 | }, 63 | "metadata": {}, 64 | "output_type": "display_data" 65 | } 66 | ], 67 | "source": [ 68 | "from interpret import show\n", 69 | "from interpret.data import Marginal\n", 70 | "\n", 71 | "marginal = Marginal().explain_data(X_train, Y_train, name = 'Train Data')\n", 72 | "show(marginal)" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "## (B) Train the Explainable Boosting Machine (EBM)" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 23, 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "data": { 89 | "text/plain": [ 90 | "ExplainableBoostingRegressor(binning_strategy='uniform', data_n_episodes=2000,\n", 91 | " early_stopping_run_length=50,\n", 92 | " early_stopping_tolerance=1e-05,\n", 93 | " feature_names=['fixed acidity', 'volatile acidity',\n", 94 | " 'citric acid', 'residual sugar',\n", 95 | " 'chlorides', 'free sulfur dioxide',\n", 96 | " 'total sulfur dioxide', 'density',\n", 97 | " 'pH', 'sulphates', 'alcohol'],\n", 98 | " feature_step_n_inner_bags=0,\n", 99 | " feature_types=['c...ntinuous',\n", 100 | " 'continuous', 'continuous',\n", 101 | " 'continuous', 'continuous',\n", 102 | " 'continuous', 'continuous',\n", 103 | " 'continuous', 'continuous',\n", 104 | " 'continuous'],\n", 105 | " holdout_size=0.15, holdout_split=0.15,\n", 106 | " interactions=0, learning_rate=0.01,\n", 107 | " main_attr='all', max_tree_splits=2,\n", 108 | " min_cases_for_splits=2, n_estimators=16, n_jobs=-2,\n", 109 | " random_state=1234, schema=None, scoring=None,\n", 110 | " training_step_episodes=1)" 111 | ] 112 | }, 113 | "execution_count": 23, 114 | "metadata": {}, 115 | "output_type": "execute_result" 116 | } 117 | ], 118 | "source": [ 119 | "from interpret.glassbox import ExplainableBoostingRegressor, LinearRegression, RegressionTree\n", 120 | "\n", 121 | "lr = LinearRegression(random_state=seed)\n", 122 | "lr.fit(X_train, Y_train)\n", 123 | "\n", 124 | "rt = RegressionTree(random_state=seed)\n", 125 | "rt.fit(X_train, Y_train)\n", 126 | "\n", 127 | "ebm = ExplainableBoostingRegressor(random_state=seed)\n", 128 | "ebm.fit(X_train, Y_train) \n", 129 | "# For Classifier, use ebm = ExplainableBoostingClassifier()" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "## (C) How Does the EBM Model Perform?" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 35, 142 | "metadata": {}, 143 | "outputs": [ 144 | { 145 | "data": { 146 | "text/html": [ 147 | "\n", 148 | "" 149 | ] 150 | }, 151 | "metadata": {}, 152 | "output_type": "display_data" 153 | }, 154 | { 155 | "data": { 156 | "text/html": [ 157 | "\n", 158 | "" 159 | ] 160 | }, 161 | "metadata": {}, 162 | "output_type": "display_data" 163 | }, 164 | { 165 | "data": { 166 | "text/html": [ 167 | "\n", 168 | "" 169 | ] 170 | }, 171 | "metadata": {}, 172 | "output_type": "display_data" 173 | } 174 | ], 175 | "source": [ 176 | "from interpret import show\n", 177 | "from interpret.perf import RegressionPerf\n", 178 | "\n", 179 | "ebm_perf = RegressionPerf(ebm.predict).explain_perf(X_test, Y_test, name='EBM')\n", 180 | "lr_perf = RegressionPerf(lr.predict).explain_perf(X_test, Y_test, name='Linear Regression')\n", 181 | "rt_perf = RegressionPerf(rt.predict).explain_perf(X_test, Y_test, name='Regression Tree')\n", 182 | "show(ebm_perf)\n", 183 | "show(lr_perf)\n", 184 | "show(rt_perf)" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": {}, 190 | "source": [ 191 | "## (D) Global Interpretability - What the Model Says for All Data" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 24, 197 | "metadata": {}, 198 | "outputs": [ 199 | { 200 | "data": { 201 | "text/html": [ 202 | "\n", 203 | "" 204 | ] 205 | }, 206 | "metadata": {}, 207 | "output_type": "display_data" 208 | } 209 | ], 210 | "source": [ 211 | "ebm_global = ebm.explain_global(name='EBM')\n", 212 | "show(ebm_global)" 213 | ] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": {}, 218 | "source": [ 219 | "## (E) Local Interpretability - What the Model Says for Individual Data" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 25, 225 | "metadata": {}, 226 | "outputs": [ 227 | { 228 | "data": { 229 | "text/html": [ 230 | "\n", 231 | "" 232 | ] 233 | }, 234 | "metadata": {}, 235 | "output_type": "display_data" 236 | } 237 | ], 238 | "source": [ 239 | "ebm_local = ebm.explain_local(X_test[:5], Y_test[:5], name='EBM')\n", 240 | "show(ebm_local)" 241 | ] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": {}, 246 | "source": [ 247 | "## (F) Put All in a Dashboard - This is the Best" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": 36, 253 | "metadata": {}, 254 | "outputs": [ 255 | { 256 | "data": { 257 | "text/html": [ 258 | "\n", 259 | "Open in new window" 260 | ] 261 | }, 262 | "metadata": {}, 263 | "output_type": "display_data" 264 | } 265 | ], 266 | "source": [ 267 | "show([marginal, lr_global, lr_perf, rt_global, rt_perf, ebm_perf, ebm_global, ebm_local])" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": null, 273 | "metadata": {}, 274 | "outputs": [], 275 | "source": [] 276 | } 277 | ], 278 | "metadata": { 279 | "kernelspec": { 280 | "display_name": "Python 3", 281 | "language": "python", 282 | "name": "python3" 283 | }, 284 | "language_info": { 285 | "codemirror_mode": { 286 | "name": "ipython", 287 | "version": 3 288 | }, 289 | "file_extension": ".py", 290 | "mimetype": "text/x-python", 291 | "name": "python", 292 | "nbconvert_exporter": "python", 293 | "pygments_lexer": "ipython3", 294 | "version": "3.8.2" 295 | } 296 | }, 297 | "nbformat": 4, 298 | "nbformat_minor": 4 299 | } 300 | -------------------------------------------------------------------------------- /Histogram-based Outlier Score.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "f12ec631-0d57-4a6d-a0ec-7940abac6968", 6 | "metadata": {}, 7 | "source": [ 8 | "## Histogram-based Outlier Score (HBOS)" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "id": "ef787f94-8ea5-40a0-b8ea-51d906fc4d24", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 113, 22 | "id": "9021294f-5f48-4f53-8b36-773852531a1f", 23 | "metadata": {}, 24 | "outputs": [ 25 | { 26 | "data": { 27 | "image/png": "\n", 28 | "text/plain": [ 29 | "
" 30 | ] 31 | }, 32 | "metadata": { 33 | "needs_background": "light" 34 | }, 35 | "output_type": "display_data" 36 | } 37 | ], 38 | "source": [ 39 | "import numpy as np\n", 40 | "import pandas as pd\n", 41 | "import matplotlib.pyplot as plt\n", 42 | "from pyod.utils.data import generate_data\n", 43 | "contamination = 0.1 # percentage of outliers\n", 44 | "n_train = 500 # number of training points\n", 45 | "n_test = 500 # number of testing points\n", 46 | "n_features = 2 # number of features\n", 47 | "X_train, X_test, y_train, y_test = generate_data(\n", 48 | " n_train=n_train, \n", 49 | " n_test=n_test, \n", 50 | " n_features= n_features, \n", 51 | " contamination=contamination, \n", 52 | " random_state=123)\n", 53 | "\n", 54 | "# Make the 2d numpy array a pandas dataframe for each manipulation \n", 55 | "X_train_pd = pd.DataFrame(X_train)\n", 56 | " \n", 57 | "# Plot\n", 58 | "plt.scatter(X_train_pd[0], X_train_pd[1], c=y_train, alpha=0.8)\n", 59 | "plt.title('Scatter plot')\n", 60 | "plt.xlabel('x0')\n", 61 | "plt.ylabel('x1')\n", 62 | "plt.show()" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 90, 68 | "id": "ea5b38f5-3d40-43b2-a465-1810e5a0b363", 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "from pyod.models.hbos import HBOS\n", 73 | "n_bins = 50\n", 74 | "hbos = HBOS(n_bins=n_bins)\n", 75 | "hbos.fit(X_train)\n", 76 | "HBOS(alpha=0.1, contamination=0.1, n_bins=n_bins, tol=0.5)\n", 77 | "y_train_scores = hbos.decision_function(X_train)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 91, 83 | "id": "6def6357-3ff3-47c2-b4bb-1b46fb26a2b0", 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "y_test_pred = hbos.predict(X_test) # outlier labels (0 or 1)\n", 88 | "# Because it is '0' and '1', we can run a count statistic. \n", 89 | "unique, counts = np.unique(y_test_pred, return_counts=True)\n", 90 | "dict(zip(unique, counts))\n", 91 | "# And you can generate the anomaly score using clf.decision_function:\n", 92 | "y_test_scores = hbos.decision_function(X_test)" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 114, 98 | "id": "df278797-c345-45f8-9dce-99b895ae5dcf", 99 | "metadata": {}, 100 | "outputs": [ 101 | { 102 | "data": { 103 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEWCAYAAACdaNcBAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAUXElEQVR4nO3df5TddZ3f8ecLAiK/oQTKTwd34++uojkC0lXbuKe4uIaeytlQ0bDLNmfXn8uxxw3sdqHbsyy2Hqpdq20WtWmlCmVZSWV1TaPgoVVqEKpiEFh+BgIJughBRQPv/nG/kZthZjIzd2buzCfPxzlz7v3+fn+/ybzmcz/33s83VYUkqS17DbsASdLMM9wlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuOsXktyW5I3DrmM+SHJhkssnWH5ukhvnsqaZtrtzSPLFJCvnsibNHMN9D5Hk3iRvGjVvl1/uqnp5VV2/m/2MJKkki2ap1Hmhqi6pqt+BmTnn7vqPDFpXkouTfGaK6188nWNV1Zurau10ttXwGe6aV1r/oyHNFcNdv9Dfuk/y2iQbkzye5JEkl3Wrfa17fCzJ9iSnJtkryR8luS/J1iT/Nckhfft9Z7fsB0n+1ajjXJzk6iSfSfI4cG537K8neSzJliQfS7Jv3/4qybuS3JnkiST/Jskvdds8nuSq/vVHneN9SV7TPT+n29fLuunfSfL5vrp2tpCfc859+/twkr9Lck+SN0/yOp+R5Jau1gf6W9ZJ3phk81j/LklOBy4EfrOr4/91y49Jsi7JD5PcleRfTKaOZ3efP0/yoyS3J1nWt+D6JDtfvZyb5Mbxzrdbfnf373FPkrdPoQbNAsNd4/ko8NGqOhj4JeCqbv7ru8dDq+rAqvo6cG7384+AFwIHAh8D6ILz48DbgaOBQ4BjRx1rOXA1cChwBfA0cD5wBHAqsAx416htTgdeA5wCfBBY0x3jeOAVwNnjnNcNwBv7zuVu4A190zeMsc1Y5wxwMvD9rs5/C3wyScY6aFWNVNW93eSTwDu78z0D+L0kZ45Tb/8+vgRcAlzZ1fHKbtFngc3AMcDbgEt2hnRVXVxVF0+w25PpXYMjgIuAa5IcPsG6zznfJAcA/wF4c1UdBLwOuHV356PZZbjvWT7ftYYfS/IYvdAdz8+BX05yRFVtr6pvTLDu24HLquruqtoOXACs6LpY3gb8z6q6sap+BvwxMHpAo69X1eer6pmq+klV3VxV36iqHV0g/meeDeCdPlRVj1fVbcB3gS93x/8R8EXgpHFqvaFvX78K/Fnf9BsYO9zHc19V/UVVPQ2spffH66jdbVRV11fVd7rz/Ta9cB59fpOS5HjgHwJ/UFU/rapbgcuBd0xyF1uBj1TVz6vqSnrhfcY46050vs8Ar0jy/Kra0v27aIgM9z3LmVV16M4fntsa7nce8CLg9iTfTPKWCdY9Brivb/o+YBG9X/xjgAd2LqiqHwM/GLX9A/0TSV6U5AtJHu66ai6h11rs90jf85+MMX3gOLXeAPxqkr8P7A1cCZzWvdl5CFNrcT6880l3Xkxw3F9IcnKSrybZluRHwO/y3PObrGOAH1bVE33z7uO5r47G82DtOnrgfd0+xzLm+VbVk8Bv0juPLUmuS/KSSR5fs8Rw15iq6s6qOhs4EvgQcHX38nusYUQfAl7QN30CsINe4G4Bjtu5IMnzgb83+nCjpj8B3A4s6bqFLgTG7O6Yqqq6C/gx8D7ga10oPgysAm6sqmfG2mwmjt3nvwPrgOOr6hDgP/Hs+T0J7L9zxSR7A4snqOUh4PAkB/XNOwF4cJK1HDuqK+mEbp9TUlV/U1W/Rq81fzvwF1Pdh2aW4a4xdW82Lu7C7rFu9tPANnovwV/Yt/pngfOTnJjkQJ7tF95Bry/9N5K8rnuT81+z+6A+CHgc2N61AH9vps6rcwPwHp7tgrl+1PRoY53zIA6i19r+aZLXAv+8b9kdwH7dm677AH8EPK9v+SPASJK9AKrqAeD/AH+WZL8kv0LvVdcVk6zlSOB9SfZJchbwUuCvp3IySY5K8tbuj/9TwHZ6/1c0RIa7xnM6cFuS7fTeXF3R9en+GPhT4H93ffenAJ8C/hu9T5XcA/wUeC9A1/f6XuBz9FrxT9Dr531qgmP/S3qB9wS9FuCVM3xuN9AL2K+NM72Lcc55EO8C/iTJE/Teg9j5ZjXdewbvotdv/iC9lnz/p2f+R/f4gyTf6p6fDYzQa3H/FXBRVa2fZC03AUuAR+md49uqanS32e7sBXygO/4P6b1/MFGXn+ZAvFmH5lLXsn+MXpfLPUMuR2qWLXfNuiS/kWT/7mX7h4HvAPcOtyqpbYa75sJyei/ZH6LXBbCifMkozSq7ZSSpQbbcJalB82KQpiOOOKJGRkaGXYYkLSg333zzo1W1eKxl8yLcR0ZG2Lhx47DLkKQFJcl94y2zW0aSGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1aLfhnuRT6d0X87t98w5Psj69e1iuT3JY37ILuvs4fj/JP5mtwiVJ45tMy/2/0Bv+td9qYENVLQE2dNM775e5Anh5t83Hu5sNSJLm0G7Dvaq+Rm+M5n7L6d1Dke7xzL75n6uqp7rhXO8CXjszpUqSJmu631A9qqq2AFTVliRHdvOPBfpvpLyZce7lmGQVvVubccIJJ0yzjOEZWX3dtLa799Lx7j0sSTNnpt9QHev2aWMOO1lVa6pqaVUtXbx4zKERJEnTNN1wfyTJ0QDd49Zu/mbg+L71jmMaN9uVJA1muuG+DljZPV8JXNs3f0WS5yU5kd6NGf7vYCVKkqZqt33uST4LvBE4Islm4CLgUuCqJOcB9wNnQe9myEmuAr4H7ADeXVXeBV2S5thuw72qzh5n0bJx1v9TendRlyQNid9QlaQGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDVooHBPcn6S25J8N8lnk+yX5PAk65Pc2T0eNlPFSpImZ9rhnuRY4H3A0qp6BbA3sAJYDWyoqiXAhm5akjSHBu2WWQQ8P8kiYH/gIWA5sLZbvhY4c8BjSJKmaNrhXlUPAh8G7ge2AD+qqi8DR1XVlm6dLcCRY22fZFWSjUk2btu2bbplSJLGMEi3zGH0WuknAscAByQ5Z7LbV9WaqlpaVUsXL1483TIkSWMYpFvmTcA9VbWtqn4OXAO8DngkydEA3ePWwcuUJE3FIOF+P3BKkv2TBFgGbALWASu7dVYC1w5WoiRpqhZNd8OquinJ1cC3gB3ALcAa4EDgqiTn0fsDcNZMFCpJmrxphztAVV0EXDRq9lP0WvGSpCEZKNwXupHV1w27BEmaFQ4/IEkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwYK9ySHJrk6ye1JNiU5NcnhSdYnubN7PGymipUkTc6gLfePAl+qqpcArwQ2AauBDVW1BNjQTUuS5tC0wz3JwcDrgU8CVNXPquoxYDmwtlttLXDmYCVKkqZqkJb7C4FtwKeT3JLk8iQHAEdV1RaA7vHIGahTkjQFg4T7IuDVwCeq6iTgSabQBZNkVZKNSTZu27ZtgDIkSaMNEu6bgc1VdVM3fTW9sH8kydEA3ePWsTauqjVVtbSqli5evHiAMiRJo0073KvqYeCBJC/uZi0DvgesA1Z281YC1w5UoSRpyhYNuP17gSuS7AvcDfwWvT8YVyU5D7gfOGvAY0iSpmigcK+qW4GlYyxaNsh+JUmD8RuqktQgw12SGmS4S1KDBn1DVQvAyOrrpr3tvZeeMYOVSJorttwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQn3OfY37mXNJcsOUuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkN8huqC8gg326VtGex5S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDVo4HBPsneSW5J8oZs+PMn6JHd2j4cNXqYkaSpmouX+fmBT3/RqYENVLQE2dNOSpDk0ULgnOQ44A7i8b/ZyYG33fC1w5iDHkCRN3aAt948AHwSe6Zt3VFVtAegejxxrwySrkmxMsnHbtm0DliFJ6jftcE/yFmBrVd08ne2rak1VLa2qpYsXL55uGZKkMQxys47TgLcm+XVgP+DgJJ8BHklydFVtSXI0sHUmCpUkTd60W+5VdUFVHVdVI8AK4CtVdQ6wDljZrbYSuHbgKiVJUzIbt9m7FLgqyXnA/cBZs3CMXXj7OUna1YyEe1VdD1zfPf8BsGwm9itJmh6/oSpJDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1KBFwy5A6jey+rppb3vvpWfMYCXSwmbLXZIaZLhLUoOmHe5Jjk/y1SSbktyW5P3d/MOTrE9yZ/d42MyVK0majEFa7juAD1TVS4FTgHcneRmwGthQVUuADd20JGkOTTvcq2pLVX2re/4EsAk4FlgOrO1WWwucOWCNkqQpmpFPyyQZAU4CbgKOqqot0PsDkOTIcbZZBawCOOGEE2aiDM2C6X56xU+uSMM18BuqSQ4E/hL4/ap6fLLbVdWaqlpaVUsXL148aBmSpD4DhXuSfegF+xVVdU03+5EkR3fLjwa2DlaiJGmqBvm0TIBPApuq6rK+ReuAld3zlcC10y9PkjQdg/S5nwa8A/hOklu7eRcClwJXJTkPuB84a6AKJUlTNu1wr6obgYyzeNl09ytJGpzfUJWkBjlwmGbFIAOASRqcLXdJapDhLkkNMtwlqUH2uUvSgObjTWZsuUtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoP8hqqa4c28BfPz26LDYMtdkhpkuEtSgwx3SWqQ4S5JDTLcJalBflpGGgI/0aHZZstdkhpky13SvDTIq5uFdMzZYstdkhpky10aQEstPbXFlrskNchwl6QG2S2jPZ5dK2qRLXdJapAtd0mzxldFw2PLXZIaZMtd0m7ZAl94Zq3lnuT0JN9PcleS1bN1HEnSc81Kyz3J3sB/BH4N2Ax8M8m6qvrebBxP2pPYitZkzFbL/bXAXVV1d1X9DPgcsHyWjiVJGmW2+tyPBR7om94MnNy/QpJVwKpucnuS7+9mn0cAj85Yhe3x+kzM6zMxr8/EZu365EMDbf6C8RbMVrhnjHm1y0TVGmDNpHeYbKyqpYMW1iqvz8S8PhPz+kxsIV6f2eqW2Qwc3zd9HPDQLB1LkjTKbIX7N4ElSU5Msi+wAlg3S8eSJI0yK90yVbUjyXuAvwH2Bj5VVbcNuNtJd+Hsobw+E/P6TMzrM7EFd31SVbtfS5K0oDj8gCQ1yHCXpAYtqHBP8u+S3J7k20n+Ksmhw65pPnCoh/ElOT7JV5NsSnJbkvcPu6b5JsneSW5J8oVh1zLfJDk0ydVd7mxKcuqwa5qsBRXuwHrgFVX1K8AdwAVDrmfo+oZ6eDPwMuDsJC8bblXzyg7gA1X1UuAU4N1en+d4P7Bp2EXMUx8FvlRVLwFeyQK6Tgsq3Kvqy1W1o5v8Br3Pz+/pHOphAlW1paq+1T1/gt4v57HDrWr+SHIccAZw+bBrmW+SHAy8HvgkQFX9rKoeG2pRU7Cgwn2U3wa+OOwi5oGxhnowvMaQZAQ4CbhpyKXMJx8BPgg8M+Q65qMXAtuAT3fdVpcnOWDYRU3WvAv3JP8ryXfH+Fnet84f0nu5fcXwKp03djvUgyDJgcBfAr9fVY8Pu575IMlbgK1VdfOwa5mnFgGvBj5RVScBTwIL5j2teXezjqp600TLk6wE3gIsKz+kDw71sFtJ9qEX7FdU1TXDrmceOQ14a5JfB/YDDk7ymao6Z8h1zRebgc1VtfOV3tUsoHCfdy33iSQ5HfgD4K1V9eNh1zNPONTDBJKEXp/ppqq6bNj1zCdVdUFVHVdVI/T+33zFYH9WVT0MPJDkxd2sZcCCuSfFvGu578bHgOcB63u/s3yjqn53uCUN1ywN9dCS04B3AN9Jcms378Kq+uvhlaQF5L3AFV3D6W7gt4Zcz6Q5/IAkNWhBdctIkibHcJekBhnuktQgw12SGmS4S1KDDHftUZJsHzV9bpKPdc8vTvJgklu7UQA/kWSvbtm+ST6S5G+T3Jnk2m5clp37+cNu1Mlvd9ufPLdnJu3KcJd29e+r6lX0Rtj8B8AbuvmXAAcBL6qqJcDngWvScyq9b02/uhux9E3sOt6PNOcW2peYpLmyL72v5P9dkv3pfXnlxKp6GqCqPp3kt4F/DBwCPFpVT3XLHh1SzdIv2HLXnub5XbfJrd03Vv9k1PLzu/lbgDuq6lbgl4H7xxhwbCPwcuDLwPFJ7kjy8SRvQBoyw117mp9U1at2/gB/PGr5zm6ZI4EDkqygN/LmWF/lDlBVtR14DbCK3hCxVyY5d5bqlybFcJfGUFU/B75E72YNdwEvSHLQqNVeTTeQVFU9XVXXV9VFwHuAfzaX9UqjGe7SGLrRJF8H/G1VPQmsBS7rbmtIkncC+wNfSfLiJEv6Nn8VcN8clyztwjdUpV2dn+QcYB/g28DHu/kXAB8G7kjyDHA78E+rqrobgfx5d8P2HfRa+qvmvHKpj6NCSlKD7JaRpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalB/x9Tap4lBbk+wQAAAABJRU5ErkJggg==\n", 104 | "text/plain": [ 105 | "
" 106 | ] 107 | }, 108 | "metadata": { 109 | "needs_background": "light" 110 | }, 111 | "output_type": "display_data" 112 | } 113 | ], 114 | "source": [ 115 | "import matplotlib.pyplot as plt\n", 116 | "plt.hist(y_test_scores, bins='auto') # arguments are passed to np.histogram\n", 117 | "plt.title(\"Histogram with 'auto' bins\")\n", 118 | "plt.xlabel('HBOS')\n", 119 | "plt.show()" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 110, 125 | "id": "7b0fa41b-c95c-496f-938d-9586ff32f1c5", 126 | "metadata": {}, 127 | "outputs": [ 128 | { 129 | "data": { 130 | "text/html": [ 131 | "
\n", 132 | "\n", 145 | "\n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | "
GroupCountCount %01Anomaly_Score
0Normal45891.61.9901061.970008-0.215748
1Outlier428.4-0.036965-0.0794195.836369
\n", 178 | "
" 179 | ], 180 | "text/plain": [ 181 | " Group Count Count % 0 1 Anomaly_Score\n", 182 | "0 Normal 458 91.6 1.990106 1.970008 -0.215748\n", 183 | "1 Outlier 42 8.4 -0.036965 -0.079419 5.836369" 184 | ] 185 | }, 186 | "execution_count": 110, 187 | "metadata": {}, 188 | "output_type": "execute_result" 189 | } 190 | ], 191 | "source": [ 192 | "threshold = 4\n", 193 | "# Let's see how many '0's and '1's.\n", 194 | "df_test = pd.DataFrame(X_test)\n", 195 | "df_columns = df_test.columns\n", 196 | "df_test['Anomaly_Score'] = y_test_scores\n", 197 | "df_test['Group'] = np.where(df_test['Anomaly_Score']< threshold, 'Normal', 'Outlier')\n", 198 | "\n", 199 | "# Now let's show the summary statistics:\n", 200 | "cnt = df_test[['Group','Anomaly_Score']].groupby('Group').count().reset_index().rename(columns={'Anomaly_Score':'Count'})\n", 201 | "cnt['Count %'] = (cnt['Count'] / cnt['Count'].sum()) * 100 # The count and count %\n", 202 | "stat = df_test.groupby('Group').mean().reset_index() # The avg.\n", 203 | "cnt.merge(stat, left_on='Group',right_on='Group') # Put the count and the avg. together" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 111, 209 | "id": "0bf7362d-5fd3-4151-a61e-b148a7607c98", 210 | "metadata": {}, 211 | "outputs": [ 212 | { 213 | "data": { 214 | "text/html": [ 215 | "
\n", 216 | "\n", 229 | "\n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | "
Pred01
Actual
0.04500
1.0842
\n", 255 | "
" 256 | ], 257 | "text/plain": [ 258 | "Pred 0 1\n", 259 | "Actual \n", 260 | "0.0 450 0\n", 261 | "1.0 8 42" 262 | ] 263 | }, 264 | "execution_count": 111, 265 | "metadata": {}, 266 | "output_type": "execute_result" 267 | } 268 | ], 269 | "source": [ 270 | "Actual_pred = pd.DataFrame({'Actual': y_test, 'Anomaly_Score': y_test_scores})\n", 271 | "Actual_pred['Pred'] = np.where(Actual_pred['Anomaly_Score']< threshold,0,1)\n", 272 | "pd.crosstab(Actual_pred['Actual'],Actual_pred['Pred'])" 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": null, 278 | "id": "7dab1e12-fa57-4c43-b3be-71083fe45668", 279 | "metadata": {}, 280 | "outputs": [], 281 | "source": [] 282 | } 283 | ], 284 | "metadata": { 285 | "kernelspec": { 286 | "display_name": "Python 3 (ipykernel)", 287 | "language": "python", 288 | "name": "python3" 289 | }, 290 | "language_info": { 291 | "codemirror_mode": { 292 | "name": "ipython", 293 | "version": 3 294 | }, 295 | "file_extension": ".py", 296 | "mimetype": "text/x-python", 297 | "name": "python", 298 | "nbconvert_exporter": "python", 299 | "pygments_lexer": "ipython3", 300 | "version": "3.9.12" 301 | } 302 | }, 303 | "nbformat": 4, 304 | "nbformat_minor": 5 305 | } 306 | -------------------------------------------------------------------------------- /NotCoolYet.py: -------------------------------------------------------------------------------- 1 | ## OOP 2 | 3 | class cookie(): 4 | def __init__(self, r,flavor): 5 | self.radius = r 6 | self.theFlavor = flavor 7 | 8 | def area(self): 9 | return 3.1416 * self.radius * self.radius 10 | 11 | def perimeter(self): 12 | return 2 * 3.1416 * self.radius 13 | 14 | smallCookie = cookie(3,'rasin') 15 | largeCookie = cookie(10,'chocolate') 16 | 17 | # smallCookie 18 | print("My small cookie is a " + smallCookie.theFlavor + " cookie.") 19 | print("It's area is: ") 20 | print(smallCookie.area()) 21 | print("And it's perimeter is: ") 22 | print(smallCookie.perimeter()) 23 | 24 | # largeCookie 25 | print("My large cookie is a " + largeCookie.theFlavor + " cookie.") 26 | print("It's area is: ") 27 | print(largeCookie.area()) 28 | print("And it's perimeter is: ") 29 | print(largeCookie.perimeter()) -------------------------------------------------------------------------------- /ROUGE.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "source": [ 20 | "### ROUGE" 21 | ], 22 | "metadata": { 23 | "id": "rEYClgV9mwfF" 24 | } 25 | }, 26 | { 27 | "cell_type": "code", 28 | "source": [ 29 | "!pip install rouge" 30 | ], 31 | "metadata": { 32 | "colab": { 33 | "base_uri": "https://localhost:8080/" 34 | }, 35 | "id": "bSHBqOOUmv8K", 36 | "outputId": "5d9f856d-5830-44e9-b145-0a78e7da8dc6" 37 | }, 38 | "execution_count": 10, 39 | "outputs": [ 40 | { 41 | "output_type": "stream", 42 | "name": "stdout", 43 | "text": [ 44 | "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", 45 | "Collecting rouge\n", 46 | " Downloading rouge-1.0.1-py3-none-any.whl (13 kB)\n", 47 | "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from rouge) (1.16.0)\n", 48 | "Installing collected packages: rouge\n", 49 | "Successfully installed rouge-1.0.1\n" 50 | ] 51 | } 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "source": [ 57 | "from rouge import Rouge \n", 58 | "rouge = Rouge()" 59 | ], 60 | "metadata": { 61 | "id": "GqUJCvnZnC3O" 62 | }, 63 | "execution_count": 66, 64 | "outputs": [] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "source": [ 69 | "# Reference\n", 70 | "long = 'It was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness, it was the epoch of belief, it was the epoch of incredulity, it was the season of Light, it was the season of Darkness, it was the spring of hope, it was the winter of despair, we had everything before us, we had nothing before us'\n", 71 | "long_set = set(long.split())\n", 72 | "len(long_set)" 73 | ], 74 | "metadata": { 75 | "colab": { 76 | "base_uri": "https://localhost:8080/" 77 | }, 78 | "id": "J9sM5XMnnHar", 79 | "outputId": "96a32bb6-fc9f-4cde-c535-594ba34b9a03" 80 | }, 81 | "execution_count": 67, 82 | "outputs": [ 83 | { 84 | "output_type": "execute_result", 85 | "data": { 86 | "text/plain": [ 87 | "28" 88 | ] 89 | }, 90 | "metadata": {}, 91 | "execution_count": 67 92 | } 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "source": [ 98 | "# Short\n", 99 | "short='It was an age of wisdom, foolishness, belief, Light, Darkness, hope, and despair, with both light and darkness.'\n", 100 | "short_set = set(short.split())" 101 | ], 102 | "metadata": { 103 | "id": "_o81DpWxnPck" 104 | }, 105 | "execution_count": 68, 106 | "outputs": [] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "source": [ 111 | "scores0 = rouge.get_scores(short, long)\n", 112 | "scores0" 113 | ], 114 | "metadata": { 115 | "colab": { 116 | "base_uri": "https://localhost:8080/" 117 | }, 118 | "id": "NiWJTFpE3qH1", 119 | "outputId": "67800911-98ee-47ac-9ec7-c4b01d3bc823" 120 | }, 121 | "execution_count": 69, 122 | "outputs": [ 123 | { 124 | "output_type": "execute_result", 125 | "data": { 126 | "text/plain": [ 127 | "[{'rouge-1': {'r': 0.39285714285714285,\n", 128 | " 'p': 0.6470588235294118,\n", 129 | " 'f': 0.4888888841876543},\n", 130 | " 'rouge-2': {'r': 0.06976744186046512,\n", 131 | " 'p': 0.17647058823529413,\n", 132 | " 'f': 0.09999999593888906},\n", 133 | " 'rouge-l': {'r': 0.39285714285714285,\n", 134 | " 'p': 0.6470588235294118,\n", 135 | " 'f': 0.4888888841876543}}]" 136 | ] 137 | }, 138 | "metadata": {}, 139 | "execution_count": 69 140 | } 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "source": [ 146 | "overlap = long_set.intersection(short_set)\n", 147 | "len(overlap)" 148 | ], 149 | "metadata": { 150 | "colab": { 151 | "base_uri": "https://localhost:8080/" 152 | }, 153 | "id": "jiWLlOx4nfiN", 154 | "outputId": "42ce3bbc-9293-46f3-c15e-9d434f23d3e3" 155 | }, 156 | "execution_count": 70, 157 | "outputs": [ 158 | { 159 | "output_type": "execute_result", 160 | "data": { 161 | "text/plain": [ 162 | "11" 163 | ] 164 | }, 165 | "metadata": {}, 166 | "execution_count": 70 167 | } 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "source": [ 173 | "print('recall:')\n", 174 | "recall = len(overlap) / len(long_set)\n", 175 | "recall" 176 | ], 177 | "metadata": { 178 | "colab": { 179 | "base_uri": "https://localhost:8080/" 180 | }, 181 | "id": "VkL3r7wrn23f", 182 | "outputId": "2a112612-558c-4842-d0be-7683ebc53320" 183 | }, 184 | "execution_count": 71, 185 | "outputs": [ 186 | { 187 | "output_type": "stream", 188 | "name": "stdout", 189 | "text": [ 190 | "recall:\n" 191 | ] 192 | }, 193 | { 194 | "output_type": "execute_result", 195 | "data": { 196 | "text/plain": [ 197 | "0.39285714285714285" 198 | ] 199 | }, 200 | "metadata": {}, 201 | "execution_count": 71 202 | } 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "source": [ 208 | "print('precision:')\n", 209 | "precision = len(overlap) / len(short_set)\n", 210 | "precision" 211 | ], 212 | "metadata": { 213 | "colab": { 214 | "base_uri": "https://localhost:8080/" 215 | }, 216 | "id": "7pUCas2xn79_", 217 | "outputId": "3437206f-6983-4f01-9cd7-04612c819cad" 218 | }, 219 | "execution_count": 72, 220 | "outputs": [ 221 | { 222 | "output_type": "stream", 223 | "name": "stdout", 224 | "text": [ 225 | "precision:\n" 226 | ] 227 | }, 228 | { 229 | "output_type": "execute_result", 230 | "data": { 231 | "text/plain": [ 232 | "0.6470588235294118" 233 | ] 234 | }, 235 | "metadata": {}, 236 | "execution_count": 72 237 | } 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "source": [ 243 | "f = 2/(1/recall+1/precision)\n", 244 | "f" 245 | ], 246 | "metadata": { 247 | "colab": { 248 | "base_uri": "https://localhost:8080/" 249 | }, 250 | "id": "GvZf7_sCo32l", 251 | "outputId": "c89e6928-d616-46bf-c072-337a5ca1ceab" 252 | }, 253 | "execution_count": 73, 254 | "outputs": [ 255 | { 256 | "output_type": "execute_result", 257 | "data": { 258 | "text/plain": [ 259 | "0.4888888888888889" 260 | ] 261 | }, 262 | "metadata": {}, 263 | "execution_count": 73 264 | } 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "source": [], 270 | "metadata": { 271 | "id": "7YfBf_2uy2_A" 272 | }, 273 | "execution_count": 73, 274 | "outputs": [] 275 | } 276 | ] 277 | } -------------------------------------------------------------------------------- /S3_sample.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "9a1b92a5-8216-4784-abcb-bbaa6d5001e8", 6 | "metadata": {}, 7 | "source": [ 8 | "## Run AWS BOTO3" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "e04f5082-e399-4927-a86e-a0270bce9542", 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "name": "stdout", 19 | "output_type": "stream", 20 | "text": [ 21 | "put-everything\n", 22 | "speech-aid\n" 23 | ] 24 | } 25 | ], 26 | "source": [ 27 | "import boto3\n", 28 | "\n", 29 | "# Let's use Amazon S3\n", 30 | "s3 = boto3.resource('s3')\n", 31 | "\n", 32 | "# Print out bucket names\n", 33 | "for bucket in s3.buckets.all():\n", 34 | " print(bucket.name)" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "id": "8d48b45a-1d6e-4450-afaa-a98b7920ae6c", 40 | "metadata": {}, 41 | "source": [ 42 | "### Upload a file" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 2, 48 | "id": "e263bd42", 49 | "metadata": {}, 50 | "outputs": [ 51 | { 52 | "data": { 53 | "text/plain": [ 54 | "s3.Object(bucket_name='put-everything', key='sample.mp3')" 55 | ] 56 | }, 57 | "execution_count": 2, 58 | "metadata": {}, 59 | "output_type": "execute_result" 60 | } 61 | ], 62 | "source": [ 63 | "data = open('C:/Users/sample.mp3', 'rb')\n", 64 | "s3.Bucket('put-everything').put_object(Key='sample.mp3', Body=data)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "id": "af2778da-762c-4f17-9350-1c7199314eea", 70 | "metadata": {}, 71 | "source": [ 72 | "### Download a file" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 3, 78 | "id": "59ef773d-bad9-47db-b363-0e00c27fcd20", 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "s3 = boto3.client('s3')\n", 83 | "ff = 'C:/Users/sun2.jpg'\n", 84 | "s3.download_file('put-everything', 'sun.jpg', 'C:/Users/sample2.mp3')" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "id": "2253bcec-15fc-44e5-8717-57529dcffd00", 90 | "metadata": {}, 91 | "source": [ 92 | "### Use AWS Transcribe" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 4, 98 | "id": "a2212c02-6e4f-4a93-aaa4-8ec146d43570", 99 | "metadata": {}, 100 | "outputs": [ 101 | { 102 | "name": "stdout", 103 | "output_type": "stream", 104 | "text": [ 105 | "What is a little bear with no teeth is called a gummy bear?\n" 106 | ] 107 | } 108 | ], 109 | "source": [ 110 | "from __future__ import print_function\n", 111 | "import time\n", 112 | "import boto3\n", 113 | "import urllib.request\n", 114 | "import json\n", 115 | "transcribe = boto3.client('transcribe')\n", 116 | "job_name = \"job10\"\n", 117 | "job_uri = \"s3://put-everything/sample.mp3\"\n", 118 | "transcribe.start_transcription_job(\n", 119 | " TranscriptionJobName=job_name,\n", 120 | " Media={'MediaFileUri': job_uri},\n", 121 | " MediaFormat='mp3',\n", 122 | " LanguageCode='en-US'\n", 123 | ")\n", 124 | "while True:\n", 125 | " status = transcribe.get_transcription_job(TranscriptionJobName=job_name)\n", 126 | " if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']:\n", 127 | " response = urllib.request.urlopen(status['TranscriptionJob']['Transcript']['TranscriptFileUri'])\n", 128 | " data = json.loads(response.read())\n", 129 | " text = data['results']['transcripts'][0]['transcript']\n", 130 | " print(text)\n", 131 | " break\n" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "id": "688cb344-62f5-4eef-aa4f-bfe91cb107d8", 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "id": "13ddf2bd-568b-4080-81dc-ebbdc651da24", 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "id": "fdedff61-de70-43f5-a2d4-7677d14bf8d9", 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [] 157 | } 158 | ], 159 | "metadata": { 160 | "kernelspec": { 161 | "display_name": "Python 3 (ipykernel)", 162 | "language": "python", 163 | "name": "python3" 164 | }, 165 | "language_info": { 166 | "codemirror_mode": { 167 | "name": "ipython", 168 | "version": 3 169 | }, 170 | "file_extension": ".py", 171 | "mimetype": "text/x-python", 172 | "name": "python", 173 | "nbconvert_exporter": "python", 174 | "pygments_lexer": "ipython3", 175 | "version": "3.8.12" 176 | } 177 | }, 178 | "nbformat": 4, 179 | "nbformat_minor": 5 180 | } 181 | -------------------------------------------------------------------------------- /TA in R for article.md: -------------------------------------------------------------------------------- 1 | ## Technical Mean Reversion Anomaly 2 | 3 | We find some stocks seem reverting around a trend line, although stock price movement is complex. We notice the price of a stock in the short term can deviate from its long-term trend line. The long-term trend can be characterized by a moving average line or a regression. We often find the short-term prices deviate and then reverts back to the regression line. Of various combinations of stock trading strategies, a common type of trading stategies is the mean reversion. It identifies anomalous opportunities as the entry or exit points. This strategy has been quite popular among traders. 4 | 5 | - Learning objective 1: Use the R library ["quantmod"](https://cran.r-project.org/web/packages/quantmod/quantmod.pdf), ["TTR"](https://cran.r-project.org/web/packages/TTR/TTR.pdf), and ["PerformanceAnalytics"](https://cran.r-project.org/web/packages/PerformanceAnalytics/PerformanceAnalytics.pdf) 6 | - Learning objective 2: Common stock data transformation 7 | - Learning objective 3: The basic characteristics of stock returns 8 | - Learning objective 4: Common technical indicators 9 | - Learning objective 5: How to plot technical charts 10 | - Learning objective 6: Develop your trading strategy & signals 11 | - Learning objective 7: Backtesting 12 | - Learning objective 8: Use technical indicators as features for machine learning 13 | - Learning objective 9: Python [TA-lib](https://mrjbq7.github.io/ta-lib/). TA-Lib is widely used by trading software developers to perform technical analysis of financial market data. 14 | 15 | ### Learning objective 1: Use the R library ["quantmod"](https://cran.r-project.org/web/packages/quantmod/quantmod.pdf), ["TTR"](https://cran.r-project.org/web/packages/TTR/TTR.pdf), and ["PerformanceAnalytics"](https://cran.r-project.org/web/packages/PerformanceAnalytics/PerformanceAnalytics.pdf) 16 | 17 | 18 | ```R 19 | #install.packages("PerformanceAnalytics") 20 | ``` 21 | 22 | 23 | ```R 24 | # The easiest way to get dplyr is to install the whole tidyverse: 25 | library(tidyverse) # https://www.tidyverse.org/ 26 | library(dplyr) # or just dplyr 27 | library(quantmod) 28 | library(TTR) 29 | library(PerformanceAnalytics) 30 | library("IRdisplay") 31 | ``` 32 | 33 | 34 | ```R 35 | getSymbols(c("AMZN","DAL")) 36 | ``` 37 | 38 | ‘getSymbols’ currently uses auto.assign=TRUE by default, but will 39 | use auto.assign=FALSE in 0.5-0. You will still be able to use 40 | ‘loadSymbols’ to automatically load data. getOption("getSymbols.env") 41 | and getOption("getSymbols.auto.assign") will still be checked for 42 | alternate defaults. 43 | 44 | This message is shown once per session and may be disabled by setting 45 | options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details. 46 | 47 | 48 | 49 | 50 |
    51 |
  1. 'AMZN'
  2. 52 |
  3. 'DAL'
  4. 53 |
54 | 55 | 56 | 57 | 58 | ```R 59 | df <- AMZN 60 | head(df) 61 | ``` 62 | 63 | 64 | AMZN.Open AMZN.High AMZN.Low AMZN.Close AMZN.Volume AMZN.Adjusted 65 | 2007-01-03 38.68 39.06 38.05 38.70 12405100 38.70 66 | 2007-01-04 38.59 39.14 38.26 38.90 6318400 38.90 67 | 2007-01-05 38.72 38.79 37.60 38.37 6619700 38.37 68 | 2007-01-08 38.22 38.31 37.17 37.50 6783000 37.50 69 | 2007-01-09 37.60 38.06 37.34 37.78 5703000 37.78 70 | 2007-01-10 37.49 37.70 37.07 37.15 6527500 37.15 71 | 72 | 73 | ### Learning objective 2: Common stock data transformation 74 | * These common stock data transformation can be handled easily by the functions in the quantmod library 75 | 76 | 77 | ```R 78 | df2 <- df 79 | 80 | # Returns from Open to Close, Hi to Close, or Close to Close 81 | df2$OpCl <- OpCl(df2) 82 | df2$OpOp <- OpOp(df2) 83 | df2$HiCl <- HiCl(df2) 84 | df2$ClCl <- ClCl(df2) 85 | 86 | df2$pcntOpCl1 <- Delt(Op(df2),Cl(df2),k=1) 87 | df2$pcntOpCl2 <- Delt(Op(df2),Cl(df2),k=2) 88 | df2$pcntOpCl3 <- Delt(Op(df2),Cl(df2),k=3) 89 | 90 | #One period lag of the close 91 | df2$lagCl <- Lag(Cl(df2)) 92 | df2$lag2Cl <- Lag(Cl(df2),2) 93 | df2$lag3Cl <- Lag(Cl(df2),3) 94 | 95 | # Move up the OpCl by one period 96 | df2$nextOpCl <- Next(OpCl(df2)) 97 | 98 | #head(df2) 99 | ``` 100 | 101 | 102 | ```R 103 | df.monthly <- to.monthly(df) 104 | df.monthly$month <- format(index(df.monthly),"%Y%m") 105 | df.monthly$year <- format(index(df.monthly),"%Y") 106 | head(df.monthly) 107 | ``` 108 | 109 | 110 | df.Open df.High df.Low df.Close df.Volume df.Adjusted month year 111 | Jan 2007 38.68 39.14 36.30 37.67 130435300 37.67 200701 2007 112 | Feb 2007 37.95 42.00 36.68 39.14 157975400 39.14 200702 2007 113 | Mar 2007 39.32 40.24 37.04 39.79 142153100 39.79 200703 2007 114 | Apr 2007 39.85 63.84 39.55 61.33 346287000 61.33 200704 2007 115 | May 2007 61.12 73.31 59.70 69.14 330242400 69.14 200705 2007 116 | Jun 2007 68.90 74.72 66.71 68.41 238788700 68.41 200706 2007 117 | 118 | 119 | 120 | ```R 121 | rtn.daily <- dailyReturn(df) # returns by day 122 | rtn.weekly <- weeklyReturn(df) # returns by week 123 | rtn.monthly <- monthlyReturn(df) # returns by month, indexed by yearmon 124 | # daily,weekly,monthly,quarterly, and yearly 125 | rtn.allperiods <- allReturns(df) # note the plural 126 | head(rtn.daily) 127 | ``` 128 | 129 | 130 | daily.returns 131 | 2007-01-03 0.0005170889 132 | 2007-01-04 0.0051679844 133 | 2007-01-05 -0.0136247551 134 | 2007-01-08 -0.0226739386 135 | 2007-01-09 0.0074666400 136 | 2007-01-10 -0.0166754107 137 | 138 | 139 | ### Learning objective 3: The basic characteristics of stock returns 140 | - A standard normal distribution has 0 mean, 1 standard deviation, and 0 excess [kurtosis](http://www.r-tutor.com/elementary-statistics/numerical-measures/kurtosis) 141 | - The ditribution of a typical stock returns has small standard deviation and positive excess kurtosis 142 | 143 | 144 | ```R 145 | # Generate a standard normal distribution 146 | rn <- rnorm(100000) 147 | print(paste0("standard deviation: ", sd(rn))) 148 | print(paste0("Kurtosis: ", round(kurtosis(rn),2))) 149 | options(repr.plot.width = 4, repr.plot.height = 4) 150 | 151 | #hist(rn,breaks=100,prob=TRUE) 152 | #curve(dnorm(x, mean=0, sd=1), col="darkblue", lwd=2, add=TRUE ) # Overlay a standard normal distribution 153 | ``` 154 | 155 | [1] "standard deviation: 0.998879862990584" 156 | [1] "Kurtosis: 0.03" 157 | 158 | 159 | 160 | ```R 161 | print(paste0("standard deviation: ", sd(rtn.daily))) 162 | print(paste0("Kurtosis: ", round(kurtosis(rtn.daily),2))) 163 | 164 | options(repr.plot.width = 4, repr.plot.height = 4) 165 | 166 | m<-mean(rtn.daily) 167 | std<-sqrt(var(rtn.daily)) 168 | m 169 | 170 | # Overlay a standard normal distribution 171 | #curve(dnorm(x, mean=m, sd=std), col="darkblue", lwd=2, add=TRUE ) 172 | #hist(rtn.daily, breaks=100, prob=TRUE) # Make it a probability distribution 173 | ``` 174 | 175 | [1] "standard deviation: 0.024401938344768" 176 | [1] "Kurtosis: 15.91" 177 | 178 | 179 | 180 | 0.00156416346916063 181 | 182 | 183 | 184 | ```R 185 | # A really basic boxplot. 186 | df$year <- format(index(df),"%Y") 187 | df$month <- format(index(df),"%Y%m") 188 | df3 <- data.frame(df) %>% filter(year==2014) 189 | df3$AMZN.Volume <- as.numeric(df3$AMZN.Volume) 190 | 191 | options(repr.plot.width = 6, repr.plot.height = 3) 192 | 193 | # Basic plot 194 | p <-ggplot(df3, aes(x=as.factor(month), y=AMZN.Volume)) 195 | 196 | #p + geom_boxplot(fill="slateblue", alpha=0.2) + xlab("Month") 197 | ``` 198 | 199 | 200 | ```R 201 | options(repr.plot.width = 6, repr.plot.height = 3) 202 | # Change outlier, color, shape and size 203 | p2 <- p + geom_boxplot(outlier.colour="red", outlier.shape=8, 204 | outlier.size=1) + xlab("Month") 205 | #p2 206 | ``` 207 | 208 | 209 | ```R 210 | # Box plot with dot plot 211 | #p2 + geom_dotplot(binaxis='y', stackdir='center', dotsize=0.2, binwidth=40) 212 | ``` 213 | 214 | 215 | ```R 216 | options(repr.plot.width = 6, repr.plot.height = 4) 217 | df <- AMZN 218 | df$OpCl <- OpCl(df) 219 | df$OpOp <- OpOp(df) 220 | df$HiCl <- HiCl(df) 221 | df$month <- format(index(df),"%Y%m") 222 | df$year <- format(index(df),"%Y") 223 | df_hiCl <- df[df$year==2017,] 224 | #boxplot(HiCl~month, data=df_hiCl, notch=TRUE, 225 | # col=(c("gold","darkgreen")), 226 | # main="Hi-Closed", xlab="Month") 227 | 228 | #boxplot(OpCl~month, data=df_hiCl, notch=TRUE, 229 | # col=(c("gold","darkgreen")), 230 | # main="Open-Closed", xlab="Month") 231 | ``` 232 | 233 | ### Learning objective 4: Common technical indicators 234 | 235 | #### MACD 236 | * MACD=12-Period EMA − 26-Period EMA, or "fast EMA - slow FMA" 237 | * The MACD was developed by Gerald Appel and is probably the most popular price oscillator. 238 | * It can be used as a generic oscillator for any univariate series, not only price. 239 | * The MACD has a positive value whenever the 12-period EMA is above the 26-period EMA and a negative value when the 12-period EMA is below the 26-period EMA. The more distant the MACD is above or below its baseline indicates that the distance between the two EMAs is growing. 240 | 241 | #### RSI 242 | * Introduced by Welles Wilder Jr. in his seminal 1978 book "New Concepts in Technical Trading Systems", the relative strength index (RSI) is a popular momentum indicator. 243 | * It measures the magnitude of recent price changes to evaluate overbought or oversold conditions. 244 | * The RSI is displayed as an oscillator and can have a reading from 0 to 100. 245 | * RSI >= 70: a security is overbought or overvalued and may be primed for a trend reversal or corrective pullback in price. 246 | * RSI <= 30: an oversold or undervalued condition. 247 | * It can be used in the price of a stock or other asset. 248 | 249 | #### Bollinger Bands 250 | * Bollinger Bands are a type of price envelope developed by John Bollinger 251 | * Bollinger Bands are envelopes plotted at a standard deviation level above and below a simple moving average of the price. Because the distance of the bands is based on standard deviation, they adjust to volatility swings in the underlying price. 252 | * Bollinger Bands use 2 parameters, Period and Standard Deviations, StdDev. The default values are 20 for period, and 2 for standard deviations, although you may customize the combinations. 253 | * Bollinger bands help determine whether prices are high or low on a relative basis. They are used in pairs, both upper and lower bands and in conjunction with a moving average. Further, the pair of bands is not intended to be used on its own. Use the pair to confirm signals given with other indicators. 254 | * "Distance from a moving average" or "standard deviation" apply the same concept 255 | * Click [here](https://www.fidelity.com/learning-center/trading-investing/technical-analysis/technical-indicator-guide/bollinger-bands#:~:text=Bollinger%20Bands%20are%20envelopes%20plotted,Period%20and%20Standard%20Deviations%2C%20StdDev.) for more detail 256 | 257 | 258 | ```R 259 | v <- Delt(Op(df),Cl(df),k=1:3) 260 | colnames(v) <-c("pcntOpCl1","pcntOpCl2","pcntOpCl3") 261 | df2 <- cbind(df,v) 262 | #head(df2) 263 | ``` 264 | 265 | 266 | ```R 267 | macd <- MACD(df2$AMZN.Adjusted, nFast = 12, nSlow = 26, nSig = 9, maType = "SMA", percent = FALSE) 268 | rsi <- RSI(df2$AMZN.Adjusted, n = 14, maType = "SMA") 269 | 270 | #tail(macd) 271 | ``` 272 | 273 | 274 | ```R 275 | #tail(rsi) 276 | ``` 277 | 278 | 279 | ```R 280 | d <- cbind(AMZN,macd,rsi) 281 | d$SMA12 <- SMA(d$AMZN.Adjusted,12) 282 | d$SMA26 <- SMA(d$AMZN.Adjusted,26) 283 | d <- subset(d, select = -c(AMZN.Open,AMZN.High,AMZN.Low,AMZN.Close,AMZN.Volume)) 284 | #d[50:60] 285 | ``` 286 | 287 | ### Learning objective 5: How to plot technical charts 288 | 289 | 290 | ```R 291 | options(repr.plot.width = 6, repr.plot.height = 3) 292 | #chartSeries(AMZN, subset = "last 3 months") 293 | ``` 294 | 295 | 296 | ```R 297 | options(repr.plot.width = 6, repr.plot.height = 3) 298 | #chartSeries(AMZN, subset = "2007::2008-01") 299 | ``` 300 | 301 | 302 | ```R 303 | options(repr.plot.width = 6, repr.plot.height = 3) 304 | #chartSeries(AMZN, theme = chartTheme("white")) 305 | ``` 306 | 307 | 308 | ```R 309 | options(repr.plot.width = 6, repr.plot.height = 3) 310 | #chartSeries(AMZN, subset = "2016::2018-12", TA = c(addVo(), addBBands())) #add volume and Bollinger Bands from TTR 311 | ``` 312 | 313 | 314 | ```R 315 | options(repr.plot.width = 6, repr.plot.height = 3) 316 | #chartSeries(AMZN, subset = "2018::2018-12",bar.type='hlc', 317 | # TA = c(addSMA(n=12,col="blue"),addSMA(n=26,col="red")), 318 | # theme = chartTheme("white")) 319 | ``` 320 | 321 | 322 | ```R 323 | options(repr.plot.width = 6, repr.plot.height = 3) 324 | #chartSeries(AMZN, subset = "2018::2018-12",bar.type='hlc', 325 | # TA = c(addSMA(n=12,col="green"),addSMA(n=26,col="red"), 326 | # addMACD(),addRSI()), 327 | # theme = chartTheme("white")) 328 | ``` 329 | 330 | 331 | ```R 332 | options(repr.plot.width = 6, repr.plot.height = 3) 333 | #barChart(AMZN,subset = "2018::2018-12",bar.type='hlc') 334 | ``` 335 | 336 | 337 | ```R 338 | options(repr.plot.width = 6, repr.plot.height = 3) 339 | #candleChart(AMZN,subset = "2018::2018-06",multi.col=TRUE, theme='white') 340 | ``` 341 | 342 | 343 | ```R 344 | options(repr.plot.width = 6, repr.plot.height = 3) 345 | #chartSeries(AMZN, subset = "2018::2018-06",theme="white", TA="addVo();addBBands();addCCI()") 346 | ``` 347 | 348 | 349 | ```R 350 | options(repr.plot.width = 6, repr.plot.height = 3) 351 | #chartSeries(AMZN, subset = "2018::2018-06", 352 | # theme="white", 353 | # TA="addVo();addBBands();addCCI(); 354 | # addTA(OpCl(AMZN),col='blue', type='h') ") 355 | ``` 356 | 357 | ### Learning objective 6: Develop your trading strategy & signals 358 | 359 | #### MACD & RSI trading rule 360 | 361 | 362 | 363 | ```R 364 | macd <- MACD(AMZN$AMZN.Adjusted, nFast = 12, nSlow = 26, nSig = 9, maType = "SMA", percent = FALSE) 365 | rsi <- RSI(AMZN$AMZN.Adjusted, n = 14, maType = "SMA") 366 | #tail(macd) 367 | #tail(rsi) 368 | ``` 369 | 370 | Here we assume no transaction cost. 371 | 372 | 373 | ```R 374 | macd <- MACD(AMZN$AMZN.Adjusted, nFast = 12, nSlow = 26, nSig = 9, maType = "SMA", percent = FALSE) 375 | rsi <- RSI(AMZN$AMZN.Adjusted, n = 14, maType = "SMA") 376 | 377 | # Strategy 1: if macd>signal, enter and stay in the market. If macd 70), 1, 0) 383 | strategy2[is.na(strategy2)] <-0 384 | 385 | # Strategy 3: if oversold, enter and stay in the market. 386 | strategy3 <- ifelse ((macd$signal > macd$macd) & (rsi$rsi < 30), 1, 0) 387 | strategy3[is.na(strategy3)] <-0 388 | 389 | 390 | # Buy-and-hold: keep it all time. So "1", not "0" 391 | bh_strategy <- rep(1,dim(macd)[1]) 392 | ``` 393 | 394 | ### Learning objective 7: Backtesting 395 | 396 | #### Annualized return 397 | * An annualized total return is the average amount earned by an investment each year over a given time period. 398 | 399 | #### Sharpe Ratio 400 | * [Sharpe Ratio](https://en.wikipedia.org/wiki/Sharpe_ratio) 401 | * [Annualized Sharpe Ratio](https://www.rdocumentation.org/packages/PerformanceAnalytics/versions/2.0.4/topics/SharpeRatio.annualized#:~:text=The%20annualized%20Sharpe%20ratio%20is,standard%20deviation%20of%20excess%20return.) 402 | * Usually, any Sharpe ratio greater than 1.0 is considered acceptable to good by investors. A ratio higher than 2.0 is rated as very good. A ratio of 3.0 or higher is considered excellent. A ratio under 1.0 is considered sub-optimal. 403 | * "Lag": Since we are working with Closing prices, we can BUY or SELL on our signal the next day only 404 | 405 | 406 | ```R 407 | # Put in a function 408 | backtest <- function(df,from_date,to_date,strategy,strategy_name){ 409 | trade_return <- rtn.daily[index(rtn.daily)<=to_date & index(rtn.daily)>=from_date]*lag(strategy, na.pad = FALSE) 410 | cumm_return <- Return.cumulative(trade_return) 411 | annual_return <- Return.annualized(trade_return) 412 | summary(as.ts(trade_return)) 413 | SharpeRatio <- SharpeRatio(as.ts(trade_return), Rf = 0, p = 0.95, FUN = "StdDev") 414 | SharpeRatioAnnualized <- SharpeRatio.annualized(trade_return, Rf = 0) 415 | out <- as.data.frame(c(cumm_return,annual_return,SharpeRatio,SharpeRatioAnnualized)) 416 | out <- round(out,2) 417 | colnames(out) <- strategy_name 418 | row.names(out) <- c('Cumulative Return','Annualized Return','Sharpe Ratio','Annualized Sharpe Ratio') 419 | 420 | return( out ) 421 | } 422 | 423 | # Strategy 1 424 | strategy1_performance <- backtest(AMZN, from_date = '2007-01-01', to_date = '2015-12-31', strategy1,"Strategy1") 425 | strategy1_performance 426 | 427 | # Strategy 2 428 | strategy2_performance <- backtest(AMZN, from_date = '2007-01-01', to_date = '2015-12-31', strategy2,"Strategy2") 429 | strategy2_performance 430 | 431 | # Strategy 3 432 | strategy3_performance <- backtest(AMZN, from_date = '2007-01-01', to_date = '2015-12-31', strategy3,"Strategy3") 433 | strategy3_performance 434 | 435 | 436 | # Buy-and-hold strategy 437 | BH_backtest <- function(df,from_date,to_date,strategy_name){ 438 | trade_return <- rtn.daily[index(rtn.daily)<=to_date & index(rtn.daily)>=from_date] 439 | cumm_return <- Return.cumulative(trade_return) 440 | annual_return <- Return.annualized(trade_return) 441 | summary(as.ts(trade_return)) 442 | SharpeRatio <- SharpeRatio(as.ts(trade_return), Rf = 0, p = 0.95, FUN = "StdDev") 443 | SharpeRatioAnnualized <- SharpeRatio.annualized(trade_return, Rf = 0) 444 | out <- as.data.frame(c(cumm_return,annual_return,SharpeRatio,SharpeRatioAnnualized)) 445 | out <- round(out,2) 446 | colnames(out) <- strategy_name 447 | row.names(out) <- c('Cumulative Return','Annualized Return','Sharpe Ratio','Annualized Sharpe Ratio') 448 | 449 | return( out ) 450 | } 451 | 452 | buy_and_hold_performance <- BH_backtest(AMZN, from_date = '2007-01-01', to_date = '2015-12-31',"Buy & Hold Strategy") 453 | buy_and_hold_performance 454 | ``` 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 |
Strategy1
Cumulative Return1.23
Annualized Return0.09
Sharpe Ratio0.03
Annualized Sharpe Ratio0.33
466 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 | 477 | 478 |
Strategy2
Cumulative Return-0.01
Annualized Return 0.00
Sharpe Ratio 0.00
Annualized Sharpe Ratio-0.01
479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 |
Strategy3
Cumulative Return0.73
Annualized Return0.06
Sharpe Ratio0.04
Annualized Sharpe Ratio0.55
492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 |
Buy & Hold Strategy
Cumulative Return16.47
Annualized Return 0.37
Sharpe Ratio 0.06
Annualized Sharpe Ratio 0.88
505 | 506 | 507 | 508 | 509 | ```R 510 | 511 | ``` 512 | 513 | 514 | ```R 515 | 516 | ``` 517 | -------------------------------------------------------------------------------- /Taylor_rule.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | # Consumer Price Index for All Urban Consumers: 5 | CPIAUCSL = pd.read_csv("/Datasets/CPIAUCSL.csv") 6 | # Consumer Price Index for All Urban Consumers: All Items Less Food and Energy in U.S. City Average 7 | CPILFESL = pd.read_csv("/CPILFESL.csv") 8 | # Gross Domestic Product: Implicit Price Deflator 9 | GDPDEF = pd.read_csv("/GDPDEF.csv") 10 | # Personal Consumption Expenditures: Chain-type Price Index 11 | PCEPI = pd.read_csv("/PCEPI.csv") 12 | # Real Gross Domestic Product 13 | GDPC1 = pd.read_csv("/GDPC1.csv") 14 | # Potential GDP 15 | GDPPOT = pd.read_csv("/GDPPOT.csv") 16 | # Federal Funds Rate 17 | FEDFUNDS = pd.read_csv("/FEDFUNDS.csv") 18 | 19 | 20 | # GDP indexes are quarterly. Create monthly data by forward filling 21 | month = CPIAUCSL['DATE'] 22 | GDPDEF = pd.merge(month, GDPDEF, left_on = 'DATE', right_on = 'DATE', how='left') 23 | GDPPOT = pd.merge(month, GDPPOT, left_on = 'DATE', right_on = 'DATE', how='left') 24 | GDPC1 = pd.merge(month, GDPC1, left_on = 'DATE', right_on = 'DATE', how='left') 25 | 26 | # Combine all the data 27 | from functools import reduce 28 | dfs = [CPIAUCSL, CPILFESL, GDPDEF, PCEPI, GDPC1, GDPPOT,FEDFUNDS] 29 | data = reduce(lambda left,right: pd.merge(left,right,on=['DATE'],how='outer'), dfs) 30 | data = data.fillna(method='ffill') # Forward filling 31 | data = data.dropna( how='any') # drop the NAs of old time periods 32 | data['GDP_gap'] = 100 * (data['GDPC1'] / data['GDPPOT'] -1) 33 | data['DATE'] = pd.to_datetime(data['DATE'], format='%Y-%m-%d').dt.strftime('%Y-%m') 34 | data.index = data['DATE'] 35 | 36 | # Calculate the rate of inflation from CPI 37 | data['rCPIAUCSL'] = 100 * ( data['CPIAUCSL'] / data['CPIAUCSL'].shift(12) -1 ) 38 | data['rCPILFESL'] = 100 * ( data['CPILFESL'] / data['CPILFESL'].shift(12) -1 ) 39 | data['rGDPDEF'] = 100 * ( data['GDPDEF'] / data['GDPDEF'].shift(12) -1 ) 40 | data['rPCEPI'] = 100 * ( data['PCEPI'] / data['PCEPI'].shift(12) -1 ) 41 | 42 | 43 | # Taylor rule formula 44 | def taylor(inflation): 45 | data['ff'+inflation] = data[inflation] + 0.5 * data['GDP_gap'] + 0.5 * (data[inflation] - 2) + 2 46 | return data 47 | 48 | inf_list = ['rCPIAUCSL','rCPILFESL','rGDPDEF','rPCEPI'] 49 | for inflation in inf_list: 50 | taylor(inflation) 51 | 52 | 53 | 54 | import matplotlib.pyplot as plt 55 | import matplotlib.ticker as mtick 56 | 57 | data2 = data[['FEDFUNDS','ffrCPIAUCSL', 'ffrCPILFESL', 'ffrGDPDEF', 'ffrPCEPI']] 58 | data2.columns = ['Effective Federal Funds Rate', 59 | 'CPI All Urban Consumers', 60 | 'CPI All Urban Consumers - All Items Less Food and Energy', 61 | 'GGP Implicit Price Deflator', 62 | 'Personal Consumption Expenditures Price Index'] 63 | 64 | 65 | ################## 66 | # Set up sidebar # 67 | ################## 68 | # !pip install streamlit 69 | import streamlit as st 70 | import matplotlib.pyplot as plt 71 | 72 | option = st.sidebar.selectbox('Select one inflation index', ( 73 | 'CPI All Urban Consumers', 74 | 'CPI All Urban Consumers - All Items Less Food and Energy', 75 | 'GGP Implicit Price Deflator', 76 | 'Personal Consumption Expenditures Price Index' )) 77 | 78 | import datetime 79 | 80 | today = datetime.date.today() 81 | before = today - datetime.timedelta(days=7000) 82 | start_date = st.sidebar.date_input('Start date', before) 83 | end_date = st.sidebar.date_input('End date', today) 84 | if start_date < end_date: 85 | st.sidebar.success('Start date: `%s`\n\nEnd date:`%s`' % (start_date, end_date)) 86 | else: 87 | st.sidebar.error('Error: End date must fall after start date.') 88 | 89 | 90 | ################### 91 | # Set up main app # 92 | ################### 93 | # https://share.streamlit.io/daniellewisdl/streamlit-cheat-sheet/app.py 94 | #progress_bar = st.progress(0) 95 | 96 | start_date = pd.to_datetime(start_date, format='%Y-%m-%d').strftime('%Y-%m') 97 | end_date = pd.to_datetime(end_date, format='%Y-%m-%d').strftime('%Y-%m') 98 | 99 | data3 = data2[ (data2.index >= start_date) & (data2.index <= end_date) ] 100 | data4 = data3[ ['Effective Federal Funds Rate', option]] 101 | 102 | st.title('The Taylor rule') 103 | 104 | 105 | 106 | import matplotlib.pyplot as plt 107 | f = plt.figure(figsize=(16,8)) 108 | ax = data3.plot(kind='line', ax=f.gca(),rot=45,fontsize=16) 109 | ax.legend(loc='center left', bbox_to_anchor=(0, 1.2), fontsize = 16) 110 | ax.yaxis.set_major_formatter(mtick.PercentFormatter()) 111 | 112 | st.write(f) 113 | 114 | 115 | 116 | import matplotlib.pyplot as plt 117 | f = plt.figure(figsize=(16,8)) 118 | ax = data4.plot(kind='line', ax=f.gca(),rot=45,fontsize=16) 119 | ax.legend(loc='center left', bbox_to_anchor=(0, 1.1), fontsize = 16) 120 | ax.yaxis.set_major_formatter(mtick.PercentFormatter()) 121 | 122 | st.write(f) -------------------------------------------------------------------------------- /VGG-16.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "b1ca99fe-b73b-431f-98ac-51d61aa392e4", 6 | "metadata": {}, 7 | "source": [ 8 | "## Pre-trained models" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "e7f4cdc6-6dfe-4829-9f7a-e7c6a10e9dea", 14 | "metadata": {}, 15 | "source": [ 16 | "### (1) VGG-16" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 88, 22 | "id": "01c38c19-a1cd-4c87-9369-174cb17d4783", 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "import io\n", 27 | "import torch\n", 28 | "from PIL import Image\n", 29 | "import requests\n", 30 | "from torch.autograd import Variable\n", 31 | "import torchvision.models as models\n", 32 | "import torchvision.transforms as transforms\n", 33 | "\n", 34 | "# You can get all the pre-trained models here: https://pytorch.org/vision/stable/models.html\n", 35 | "vgg16 = models.vgg16(pretrained=True) # This may take a few minutes." 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 97, 41 | "id": "a2ee6659-bfa0-48d2-a64c-17deed6b905a", 42 | "metadata": {}, 43 | "outputs": [ 44 | { 45 | "data": { 46 | "text/plain": [ 47 | "VGG(\n", 48 | " (features): Sequential(\n", 49 | " (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", 50 | " (1): ReLU(inplace=True)\n", 51 | " (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", 52 | " (3): ReLU(inplace=True)\n", 53 | " (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", 54 | " (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", 55 | " (6): ReLU(inplace=True)\n", 56 | " (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", 57 | " (8): ReLU(inplace=True)\n", 58 | " (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", 59 | " (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", 60 | " (11): ReLU(inplace=True)\n", 61 | " (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", 62 | " (13): ReLU(inplace=True)\n", 63 | " (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", 64 | " (15): ReLU(inplace=True)\n", 65 | " (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", 66 | " (17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", 67 | " (18): ReLU(inplace=True)\n", 68 | " (19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", 69 | " (20): ReLU(inplace=True)\n", 70 | " (21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", 71 | " (22): ReLU(inplace=True)\n", 72 | " (23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", 73 | " (24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", 74 | " (25): ReLU(inplace=True)\n", 75 | " (26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", 76 | " (27): ReLU(inplace=True)\n", 77 | " (28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", 78 | " (29): ReLU(inplace=True)\n", 79 | " (30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", 80 | " )\n", 81 | " (avgpool): AdaptiveAvgPool2d(output_size=(7, 7))\n", 82 | " (classifier): Sequential(\n", 83 | " (0): Linear(in_features=25088, out_features=4096, bias=True)\n", 84 | " (1): ReLU(inplace=True)\n", 85 | " (2): Dropout(p=0.5, inplace=False)\n", 86 | " (3): Linear(in_features=4096, out_features=4096, bias=True)\n", 87 | " (4): ReLU(inplace=True)\n", 88 | " (5): Dropout(p=0.5, inplace=False)\n", 89 | " (6): Linear(in_features=4096, out_features=1000, bias=True)\n", 90 | " )\n", 91 | ")" 92 | ] 93 | }, 94 | "execution_count": 97, 95 | "metadata": {}, 96 | "output_type": "execute_result" 97 | } 98 | ], 99 | "source": [ 100 | "vgg16" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 92, 106 | "id": "f03b113c-ae31-4deb-8df9-0f6bf7c68aeb", 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "# Read the categories\n", 111 | "# Download ImageNet labels\n", 112 | "# https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt\n", 113 | "\n", 114 | "with open(\"/Downloads/imagenet_classes.txt\", \"r\") as f:\n", 115 | " labels = [s.strip() for s in f.readlines()]" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 98, 121 | "id": "ca28c91e-c771-40b0-9e9b-9377b01b8b04", 122 | "metadata": {}, 123 | "outputs": [ 124 | { 125 | "name": "stdout", 126 | "output_type": "stream", 127 | "text": [ 128 | "(1600, 1071)\n" 129 | ] 130 | } 131 | ], 132 | "source": [ 133 | "import urllib\n", 134 | "#url, filename = (\"https://github.com/dataman-git/codes_for_articles/blob/master/pic/tesla.png?raw=true\", \"tesla.jpg\")\n", 135 | "url, filename = (\"https://cff2.earth.com/uploads/2022/01/06080341/Goldfish.jpg?raw=true\", \"goldfish.jpg\")\n", 136 | "url, filename = (\"https://cdn.britannica.com/92/152292-050-EAF28A45/Bald-eagle.jpg?raw=true\", \"eagle.jpg\")\n", 137 | "\n", 138 | "try: urllib.URLopener().retrieve(url, filename)\n", 139 | "except: urllib.request.urlretrieve(url, filename)\n", 140 | "\n", 141 | "# sample execution (requires torchvision)\n", 142 | "from PIL import Image\n", 143 | "from torchvision import transforms\n", 144 | "input_image = Image.open(filename).convert('RGB')\n", 145 | "print(input_image.size)\n", 146 | "input_image.show()\n", 147 | "\n", 148 | "#pix = np.array(im.getdata()).reshape(im.size[0], im.size[1], 3)\n", 149 | "#pix[1:10]" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 94, 155 | "id": "4dddc05b-3dc3-4fc3-9541-13c847c777ff", 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "preprocess = transforms.Compose([\n", 160 | " transforms.Resize(256),\n", 161 | " transforms.CenterCrop(224),\n", 162 | " transforms.ToTensor(),\n", 163 | " transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),\n", 164 | "])\n", 165 | "input_tensor = preprocess(input_image) \n", 166 | "input_tensor.shape # = torch.Size([3, 224, 224])\n", 167 | "input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model\n", 168 | "input_tensor.shape # = torch.Size([1, 3, 224, 224])\n", 169 | "\n", 170 | "# move the input and model to GPU for speed if available\n", 171 | "if torch.cuda.is_available():\n", 172 | " input_batch = input_batch.to('cuda')\n", 173 | " model.to('cuda')" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 95, 179 | "id": "c714e0b1-f505-4fcb-9ebc-b26400569d35", 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "output = vgg16(input_batch) # Returns a Tensor of shape (batch, num class labels)\n", 184 | "#prediction = prediction.data.numpy().argmax() # Our prediction will be the index of the class label with the largest value.\n", 185 | "#prediction\n", 186 | "\n", 187 | "# Tensor of shape 1000, with confidence scores over Imagenet's 1000 classes\n", 188 | "#print(output[0])\n", 189 | "# The output has unnormalized scores. To get probabilities, you can run a softmax on it.\n", 190 | "probabilities = torch.nn.functional.softmax(output[0], dim=0)\n", 191 | "#print(probabilities)" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 96, 197 | "id": "c6c8676b-219d-4c8a-afdf-82ef910d8e15", 198 | "metadata": {}, 199 | "outputs": [ 200 | { 201 | "name": "stdout", 202 | "output_type": "stream", 203 | "text": [ 204 | "bald eagle 0.9969350099563599\n", 205 | "vulture 0.0015524202026426792\n", 206 | "kite 0.001502618077211082\n", 207 | "albatross 3.8109526485641254e-06\n", 208 | "hornbill 1.904312739497982e-06\n" 209 | ] 210 | } 211 | ], 212 | "source": [ 213 | "# Show top categories per image\n", 214 | "top5_prob, top5_catid = torch.topk(probabilities, 5)\n", 215 | "for i in range(top5_prob.size(0)):\n", 216 | " print(labels[top5_catid[i]], top5_prob[i].item())" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "id": "b7913a9c-5789-4162-9380-977fd928fb03", 223 | "metadata": {}, 224 | "outputs": [], 225 | "source": [] 226 | } 227 | ], 228 | "metadata": { 229 | "kernelspec": { 230 | "display_name": "Python 3", 231 | "language": "python", 232 | "name": "python3" 233 | }, 234 | "language_info": { 235 | "codemirror_mode": { 236 | "name": "ipython", 237 | "version": 3 238 | }, 239 | "file_extension": ".py", 240 | "mimetype": "text/x-python", 241 | "name": "python", 242 | "nbconvert_exporter": "python", 243 | "pygments_lexer": "ipython3", 244 | "version": "3.6.13" 245 | } 246 | }, 247 | "nbformat": 4, 248 | "nbformat_minor": 5 249 | } 250 | -------------------------------------------------------------------------------- /VeryCool.py: -------------------------------------------------------------------------------- 1 | ## OOP 2 | 3 | class cookie(): 4 | def __init__(self, r,flavor): 5 | self.radius = r 6 | self.theFlavor = flavor 7 | 8 | def area(self): 9 | return 3.1416 * self.radius * self.radius 10 | 11 | def perimeter(self): 12 | return 2 * 3.1416 * self.radius 13 | 14 | if __name__ == "__main__": 15 | 16 | smallCookie = cookie(3,'rasin') 17 | largeCookie = cookie(10,'chocolate') 18 | 19 | # smallCookie 20 | print("My small cookie is a " + smallCookie.theFlavor + " cookie.") 21 | print("It's area is: ") 22 | print(smallCookie.area()) 23 | print("And it's perimeter is: ") 24 | print(smallCookie.perimeter()) 25 | 26 | # largeCookie 27 | print("My large cookie is a " + largeCookie.theFlavor + " cookie.") 28 | print("It's area is: ") 29 | print(largeCookie.area()) 30 | print("And it's perimeter is: ") 31 | print(largeCookie.perimeter()) -------------------------------------------------------------------------------- /cookie_OOP.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Object-oriend programming (OOP)\n", 8 | "\n", 9 | "## [Part 1: Learning Object-Orient Programming with Python in 10 Minutes](https://python.plainenglish.io/learning-object-orient-programming-and-if-name-main-in-python-in-10-minutes-9b43504a739b)" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "class cookie():\n", 19 | " def __init__(self, r,flavor):\n", 20 | " self.radius = r\n", 21 | " self.theFlavor = flavor\n", 22 | "\n", 23 | " def area(self):\n", 24 | " return 3.1416 * self.radius * self.radius\n", 25 | " \n", 26 | " def perimeter(self):\n", 27 | " return 2 * 3.1416 * self.radius\n", 28 | "\n", 29 | "smallCookie = cookie(3,'rasin')\n", 30 | "largeCookie = cookie(10,'chocolate')" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 2, 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "name": "stdout", 40 | "output_type": "stream", 41 | "text": [ 42 | "My small cookie is a rasin cookie.\n", 43 | "It's area is: \n", 44 | "28.2744\n", 45 | "And it's perimeter is: \n", 46 | "18.8496\n" 47 | ] 48 | } 49 | ], 50 | "source": [ 51 | "print(\"My small cookie is a \" + smallCookie.theFlavor + \" cookie.\")\n", 52 | "print(\"It's area is: \")\n", 53 | "print(smallCookie.area())\n", 54 | "print(\"And it's perimeter is: \")\n", 55 | "print(smallCookie.perimeter())" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 3, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "name": "stdout", 65 | "output_type": "stream", 66 | "text": [ 67 | "My large cookie is a chocolate cookie.\n", 68 | "It's area is: \n", 69 | "314.16\n", 70 | "And it's perimeter is: \n", 71 | "62.832\n" 72 | ] 73 | } 74 | ], 75 | "source": [ 76 | "print(\"My large cookie is a \" + largeCookie.theFlavor + \" cookie.\")\n", 77 | "print(\"It's area is: \")\n", 78 | "print(largeCookie.area())\n", 79 | "print(\"And it's perimeter is: \")\n", 80 | "print(largeCookie.perimeter())" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 4, 93 | "metadata": {}, 94 | "outputs": [ 95 | { 96 | "name": "stdout", 97 | "output_type": "stream", 98 | "text": [ 99 | "I am making cookies with cookie cutters,\n" 100 | ] 101 | } 102 | ], 103 | "source": [ 104 | "# Python program to execute \n", 105 | "def my_function(): \n", 106 | " print (\"I am making cookies with cookie cutters,\")\n", 107 | "\n", 108 | "# Test it\n", 109 | "my_function() " 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "## [Part II: Class Inheritance](https://python.plainenglish.io/learning-object-orient-programming-and-if-name-main-in-python-in-10-minutes-9b43504a739b)" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 43, 129 | "metadata": {}, 130 | "outputs": [ 131 | { 132 | "name": "stdout", 133 | "output_type": "stream", 134 | "text": [ 135 | "My small cookie is a rasin cookie.\n", 136 | "It's area is: \n", 137 | "28.2744\n", 138 | "And it's perimeter is: \n", 139 | "18.8496\n" 140 | ] 141 | } 142 | ], 143 | "source": [ 144 | "class cookie():\n", 145 | " def __init__(self, r,flavor):\n", 146 | " self.radius = r\n", 147 | " self.theFlavor = flavor\n", 148 | "\n", 149 | " def area(self):\n", 150 | " return 3.1416 * self.radius * self.radius\n", 151 | " \n", 152 | " def perimeter(self):\n", 153 | " return 2 * 3.1416 * self.radius\n", 154 | "\n", 155 | "smallCookie = cookie(3,'rasin')\n", 156 | "\n", 157 | "print(\"My small cookie is a \" + smallCookie.theFlavor + \" cookie.\")\n", 158 | "print(\"It's area is: \")\n", 159 | "print(smallCookie.area())\n", 160 | "print(\"And it's perimeter is: \")\n", 161 | "print(smallCookie.perimeter())" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 40, 167 | "metadata": {}, 168 | "outputs": [ 169 | { 170 | "name": "stdout", 171 | "output_type": "stream", 172 | "text": [ 173 | "We just made a glazed donut.\n", 174 | "It's area is: \n", 175 | "84.8232\n", 176 | "And it's perimeter is: \n", 177 | "56.5488\n" 178 | ] 179 | } 180 | ], 181 | "source": [ 182 | "class Doughnut(cookie):\n", 183 | " def __init__(self, r_in, r_out, topping):\n", 184 | " self.insideRadius = r_in\n", 185 | " self.outsideRadius = r_out\n", 186 | " self.topping = topping\n", 187 | "\n", 188 | " def area(self):\n", 189 | " return 3.1416 * (self.outsideRadius ** 2 - self.insideRadius ** 2)\n", 190 | " \n", 191 | " def perimeter(self):\n", 192 | " return 2 * 3.1416 * (self.insideRadius + self.outsideRadius)\n", 193 | "\n", 194 | "glazedDonut = Doughnut(3,6,'glazed')\n", 195 | "\n", 196 | "print(\"We just made a \" + glazedDonut.topping + \" donut.\")\n", 197 | "print(\"It's area is: \")\n", 198 | "print(glazedDonut.area())\n", 199 | "print(\"And it's perimeter is: \")\n", 200 | "print(glazedDonut.perimeter())" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": 49, 206 | "metadata": {}, 207 | "outputs": [ 208 | { 209 | "name": "stdout", 210 | "output_type": "stream", 211 | "text": [ 212 | "We just made a glazed chocolate donut.\n", 213 | "It's area is: \n", 214 | "84.8232\n", 215 | "And it's perimeter is: \n", 216 | "56.5488\n", 217 | "And the inside hollow area is: \n", 218 | "28.2744\n" 219 | ] 220 | } 221 | ], 222 | "source": [ 223 | "# Invoking the parent\n", 224 | "class Doughnut(cookie):\n", 225 | " def __init__(self, flavor, r_in, r_out, topping):\n", 226 | " self.insideRadius = r_in\n", 227 | " self.outsideRadius = r_out\n", 228 | " self.topping = topping\n", 229 | " \n", 230 | " # invoking the __init__ of the parent class \n", 231 | " cookie.__init__(self, r_in, flavor) \n", 232 | "\n", 233 | " def donutarea(self):\n", 234 | " return 3.1416 * (self.outsideRadius ** 2 - self.insideRadius ** 2)\n", 235 | " \n", 236 | " def donutperimeter(self):\n", 237 | " return 2 * 3.1416 * (self.insideRadius + self.outsideRadius)\n", 238 | "\n", 239 | "glazedDonut = Doughnut(flavor = 'chocolate',\n", 240 | " r_in = 3,\n", 241 | " r_out = 6,\n", 242 | " topping = 'glazed')\n", 243 | "\n", 244 | "print(\"We just made a \" + glazedDonut.topping + ' ' + glazedDonut.theFlavor + \" donut.\")\n", 245 | "print(\"It's area is: \")\n", 246 | "print(glazedDonut.donutarea())\n", 247 | "print(\"And it's perimeter is: \")\n", 248 | "print(glazedDonut.donutperimeter())\n", 249 | "print(\"And the inside hollow area is: \")\n", 250 | "print(glazedDonut.area())" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 35, 256 | "metadata": {}, 257 | "outputs": [ 258 | { 259 | "ename": "AttributeError", 260 | "evalue": "'Doughnut' object has no attribute 'theFlavor'", 261 | "output_type": "error", 262 | "traceback": [ 263 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 264 | "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", 265 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 20\u001b[0m topping = 'glazed')\n\u001b[1;32m 21\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 22\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"We just made a \"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mglazedDonut\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtopping\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m' '\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mglazedDonut\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtheFlavor\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\" donut.\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 23\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"It's area is: \"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mglazedDonut\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marea\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 266 | "\u001b[0;31mAttributeError\u001b[0m: 'Doughnut' object has no attribute 'theFlavor'" 267 | ] 268 | } 269 | ], 270 | "source": [ 271 | "# An errorous example\n", 272 | "class Doughnut(cookie):\n", 273 | " def __init__(self, r, flavor, r_in, r_out, topping):\n", 274 | " self.insideRadius = r_in\n", 275 | " self.outsideRadius = r_out\n", 276 | " self.topping = topping\n", 277 | " \n", 278 | " # If you forget to invoke the __init__ of the parent class,\n", 279 | " # you will get an error message. This is because it does not know where flavor comes from.\n", 280 | " # cookie.__init__(self, r, flavor) \n", 281 | "\n", 282 | " def area(self):\n", 283 | " return 3.1416 * (self.outsideRadius ** 2 - self.insideRadius ** 2)\n", 284 | " \n", 285 | " def perimeter(self):\n", 286 | " return 2 * 3.1416 * (self.insideRadius + self.outsideRadius)\n", 287 | "\n", 288 | "glazedDonut = Doughnut(r = 3,\n", 289 | " flavor = 'chocolate',\n", 290 | " r_in = 3,\n", 291 | " r_out = 6,\n", 292 | " topping = 'glazed')\n", 293 | "\n", 294 | "print(\"We just made a \" + glazedDonut.topping + ' ' + glazedDonut.theFlavor + \" donut.\")\n", 295 | "print(\"It's area is: \")\n", 296 | "print(glazedDonut.area())\n", 297 | "print(\"And it's perimeter is: \")\n", 298 | "print(glazedDonut.perimeter())" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": 48, 304 | "metadata": {}, 305 | "outputs": [ 306 | { 307 | "name": "stdout", 308 | "output_type": "stream", 309 | "text": [ 310 | "We just made a glazed chocolate donut.\n", 311 | "It's area is: \n", 312 | "84.8232\n", 313 | "And it's perimeter is: \n", 314 | "56.5488\n", 315 | "And the inside hollow area is: \n", 316 | "28.2744\n" 317 | ] 318 | } 319 | ], 320 | "source": [ 321 | "# Use super()\n", 322 | "class Doughnut(cookie):\n", 323 | " def __init__(self, r, flavor, r_in, r_out, topping):\n", 324 | " self.insideRadius = r_in\n", 325 | " self.outsideRadius = r_out\n", 326 | " self.topping = topping\n", 327 | " \n", 328 | " # invoking the __init__ of the parent class \n", 329 | " super().__init__(r, flavor) \n", 330 | " # super().__init__(self, r, flavor) # This is wrong. YOu do not need 'self'\n", 331 | "\n", 332 | " def donutarea(self):\n", 333 | " return 3.1416 * (self.outsideRadius ** 2 - self.insideRadius ** 2)\n", 334 | " \n", 335 | " def donutperimeter(self):\n", 336 | " return 2 * 3.1416 * (self.insideRadius + self.outsideRadius)\n", 337 | "\n", 338 | "glazedDonut = Doughnut(r = 3,\n", 339 | " flavor = 'chocolate',\n", 340 | " r_in = 3,\n", 341 | " r_out = 6,\n", 342 | " topping = 'glazed')\n", 343 | "\n", 344 | "print(\"We just made a \" + glazedDonut.topping + ' ' + glazedDonut.theFlavor + \" donut.\")\n", 345 | "print(\"It's area is: \")\n", 346 | "print(glazedDonut.donutarea())\n", 347 | "print(\"And it's perimeter is: \")\n", 348 | "print(glazedDonut.donutperimeter())\n", 349 | "print(\"And the inside hollow area is: \")\n", 350 | "print(glazedDonut.area())" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": null, 356 | "metadata": {}, 357 | "outputs": [], 358 | "source": [] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": null, 363 | "metadata": {}, 364 | "outputs": [], 365 | "source": [] 366 | } 367 | ], 368 | "metadata": { 369 | "kernelspec": { 370 | "display_name": "Python 3", 371 | "language": "python", 372 | "name": "python3" 373 | }, 374 | "language_info": { 375 | "codemirror_mode": { 376 | "name": "ipython", 377 | "version": 3 378 | }, 379 | "file_extension": ".py", 380 | "mimetype": "text/x-python", 381 | "name": "python", 382 | "nbconvert_exporter": "python", 383 | "pygments_lexer": "ipython3", 384 | "version": "3.8.2" 385 | } 386 | }, 387 | "nbformat": 4, 388 | "nbformat_minor": 4 389 | } 390 | -------------------------------------------------------------------------------- /deploy_model.py: -------------------------------------------------------------------------------- 1 | ############################### 2 | # This program lets you # 3 | # - enter values in Streamlit # 4 | # - get prediction # 5 | ############################### 6 | import pickle 7 | import pandas as pd 8 | import streamlit as st 9 | 10 | # loading the model 11 | path = '' 12 | modelname = path + '/toymodel.pkl' 13 | loaded_model = pickle.load(open(modelname, 'rb')) 14 | 15 | 16 | 17 | ############# 18 | # Main page # 19 | ############# 20 | st.write("The model prediction") 21 | 22 | LIVINGAPARTMENTS_AVG_MIN = 0.0 23 | LIVINGAPARTMENTS_AVG_MAX = 1.0 24 | APARTMENTS_AVG_MIN = 0.0 25 | APARTMENTS_AVG_MAX = 0.11697126743049956 26 | 27 | # Get input values - numeric variables 28 | LIVINGAPARTMENTS_AVG = st.slider('Please enter the living apartments:', 29 | min_value = LIVINGAPARTMENTS_AVG_MIN, 30 | max_value = LIVINGAPARTMENTS_AVG_MAX 31 | ) 32 | APARTMENTS_AVG = st.slider('Please enter the apartment average:', 33 | min_value = APARTMENTS_AVG_MIN, 34 | max_value = APARTMENTS_AVG_MAX 35 | ) 36 | 37 | # Set dummy variables to zero 38 | cat_list = ['Accountants', 'Cleaning_staff', 'Cooking_staff', 39 | 'Core_staff', 'Drivers', 'High_skill_tech_staff', 40 | 'Laborers', 'Managers', 'Medicine_staff', 41 | 'OTHER', 'Sales_staff', 'Security_staff'] 42 | for i in cat_list: 43 | exec("%s = %d" % (i,0)) # The exec() command makes a value as the variable name 44 | 45 | 46 | # Enter data for prediction 47 | Occupation = st.selectbox('Please choose Your Occupation', 48 | ('Accountants', 49 | 'Cleaning_staff', 50 | 'Cooking_staff', 51 | 'Core_staff', 52 | 'Drivers', 53 | 'High_skill_tech_staff', 54 | 'Laborers', 55 | 'Managers', 56 | 'Medicine_staff', 57 | 'Sales_staff', 58 | 'Security_staff', 59 | 'OTHER') 60 | ) 61 | 62 | if Occupation=='Accountants': 63 | Accountants =1 64 | elif Occupation=='Cleaning_staff': 65 | Cleaning_staff =1 66 | elif Occupation=='Cooking_staff': 67 | Cooking_staff =1 68 | elif Occupation=='Core_staff': 69 | Core_staff =1 70 | elif Occupation=='Drivers': 71 | Drivers =1 72 | elif Occupation=='High_skill_tech_staff': 73 | High_skill_tech_staff =1 74 | elif Occupation=='Laborers': 75 | Laborers =1 76 | elif Occupation=='Managers': 77 | Managers =1 78 | elif Occupation=='Medicine_staff': 79 | Medicine_staff =1 80 | elif Occupation=='Sales_staff': 81 | Sales_staff =1 82 | elif Occupation=='Security_staff': 83 | Security_staff =1 84 | else: 85 | OTHER =1 86 | 87 | # when 'Predict' is clicked, make the prediction and store it 88 | if st.button("Get Your Prediction"): 89 | 90 | X = pd.DataFrame({'APARTMENTS_AVG':[APARTMENTS_AVG], 91 | 'LIVINGAPARTMENTS_AVG':[LIVINGAPARTMENTS_AVG], 92 | 'Accountants':[Accountants], 93 | 'Cleaning_staff':[Cleaning_staff], 94 | 'Cooking_staff':[Cooking_staff], 95 | 'Core_staff':[Core_staff], 96 | 'Drivers':[Drivers], 97 | 'High_skill_tech_staff':[High_skill_tech_staff], 98 | 'Laborers':[Laborers], 99 | 'Managers':[Managers], 100 | 'Medicine_staff':[Medicine_staff], 101 | 'Sales_staff':[Sales_staff], 102 | 'Security_staff':[Security_staff], 103 | 'OTHER':[OTHER] 104 | }) 105 | 106 | # Making predictions 107 | prediction = loaded_model.predict_proba(X)[:,1] # The model produces (p0,p1), we want p1. 108 | 109 | st.success('Your Target is {}'.format(prediction)) 110 | -------------------------------------------------------------------------------- /imagenet_classes.txt: -------------------------------------------------------------------------------- 1 | tench 2 | goldfish 3 | great white shark 4 | tiger shark 5 | hammerhead 6 | electric ray 7 | stingray 8 | cock 9 | hen 10 | ostrich 11 | brambling 12 | goldfinch 13 | house finch 14 | junco 15 | indigo bunting 16 | robin 17 | bulbul 18 | jay 19 | magpie 20 | chickadee 21 | water ouzel 22 | kite 23 | bald eagle 24 | vulture 25 | great grey owl 26 | European fire salamander 27 | common newt 28 | eft 29 | spotted salamander 30 | axolotl 31 | bullfrog 32 | tree frog 33 | tailed frog 34 | loggerhead 35 | leatherback turtle 36 | mud turtle 37 | terrapin 38 | box turtle 39 | banded gecko 40 | common iguana 41 | American chameleon 42 | whiptail 43 | agama 44 | frilled lizard 45 | alligator lizard 46 | Gila monster 47 | green lizard 48 | African chameleon 49 | Komodo dragon 50 | African crocodile 51 | American alligator 52 | triceratops 53 | thunder snake 54 | ringneck snake 55 | hognose snake 56 | green snake 57 | king snake 58 | garter snake 59 | water snake 60 | vine snake 61 | night snake 62 | boa constrictor 63 | rock python 64 | Indian cobra 65 | green mamba 66 | sea snake 67 | horned viper 68 | diamondback 69 | sidewinder 70 | trilobite 71 | harvestman 72 | scorpion 73 | black and gold garden spider 74 | barn spider 75 | garden spider 76 | black widow 77 | tarantula 78 | wolf spider 79 | tick 80 | centipede 81 | black grouse 82 | ptarmigan 83 | ruffed grouse 84 | prairie chicken 85 | peacock 86 | quail 87 | partridge 88 | African grey 89 | macaw 90 | sulphur-crested cockatoo 91 | lorikeet 92 | coucal 93 | bee eater 94 | hornbill 95 | hummingbird 96 | jacamar 97 | toucan 98 | drake 99 | red-breasted merganser 100 | goose 101 | black swan 102 | tusker 103 | echidna 104 | platypus 105 | wallaby 106 | koala 107 | wombat 108 | jellyfish 109 | sea anemone 110 | brain coral 111 | flatworm 112 | nematode 113 | conch 114 | snail 115 | slug 116 | sea slug 117 | chiton 118 | chambered nautilus 119 | Dungeness crab 120 | rock crab 121 | fiddler crab 122 | king crab 123 | American lobster 124 | spiny lobster 125 | crayfish 126 | hermit crab 127 | isopod 128 | white stork 129 | black stork 130 | spoonbill 131 | flamingo 132 | little blue heron 133 | American egret 134 | bittern 135 | crane 136 | limpkin 137 | European gallinule 138 | American coot 139 | bustard 140 | ruddy turnstone 141 | red-backed sandpiper 142 | redshank 143 | dowitcher 144 | oystercatcher 145 | pelican 146 | king penguin 147 | albatross 148 | grey whale 149 | killer whale 150 | dugong 151 | sea lion 152 | Chihuahua 153 | Japanese spaniel 154 | Maltese dog 155 | Pekinese 156 | Shih-Tzu 157 | Blenheim spaniel 158 | papillon 159 | toy terrier 160 | Rhodesian ridgeback 161 | Afghan hound 162 | basset 163 | beagle 164 | bloodhound 165 | bluetick 166 | black-and-tan coonhound 167 | Walker hound 168 | English foxhound 169 | redbone 170 | borzoi 171 | Irish wolfhound 172 | Italian greyhound 173 | whippet 174 | Ibizan hound 175 | Norwegian elkhound 176 | otterhound 177 | Saluki 178 | Scottish deerhound 179 | Weimaraner 180 | Staffordshire bullterrier 181 | American Staffordshire terrier 182 | Bedlington terrier 183 | Border terrier 184 | Kerry blue terrier 185 | Irish terrier 186 | Norfolk terrier 187 | Norwich terrier 188 | Yorkshire terrier 189 | wire-haired fox terrier 190 | Lakeland terrier 191 | Sealyham terrier 192 | Airedale 193 | cairn 194 | Australian terrier 195 | Dandie Dinmont 196 | Boston bull 197 | miniature schnauzer 198 | giant schnauzer 199 | standard schnauzer 200 | Scotch terrier 201 | Tibetan terrier 202 | silky terrier 203 | soft-coated wheaten terrier 204 | West Highland white terrier 205 | Lhasa 206 | flat-coated retriever 207 | curly-coated retriever 208 | golden retriever 209 | Labrador retriever 210 | Chesapeake Bay retriever 211 | German short-haired pointer 212 | vizsla 213 | English setter 214 | Irish setter 215 | Gordon setter 216 | Brittany spaniel 217 | clumber 218 | English springer 219 | Welsh springer spaniel 220 | cocker spaniel 221 | Sussex spaniel 222 | Irish water spaniel 223 | kuvasz 224 | schipperke 225 | groenendael 226 | malinois 227 | briard 228 | kelpie 229 | komondor 230 | Old English sheepdog 231 | Shetland sheepdog 232 | collie 233 | Border collie 234 | Bouvier des Flandres 235 | Rottweiler 236 | German shepherd 237 | Doberman 238 | miniature pinscher 239 | Greater Swiss Mountain dog 240 | Bernese mountain dog 241 | Appenzeller 242 | EntleBucher 243 | boxer 244 | bull mastiff 245 | Tibetan mastiff 246 | French bulldog 247 | Great Dane 248 | Saint Bernard 249 | Eskimo dog 250 | malamute 251 | Siberian husky 252 | dalmatian 253 | affenpinscher 254 | basenji 255 | pug 256 | Leonberg 257 | Newfoundland 258 | Great Pyrenees 259 | Samoyed 260 | Pomeranian 261 | chow 262 | keeshond 263 | Brabancon griffon 264 | Pembroke 265 | Cardigan 266 | toy poodle 267 | miniature poodle 268 | standard poodle 269 | Mexican hairless 270 | timber wolf 271 | white wolf 272 | red wolf 273 | coyote 274 | dingo 275 | dhole 276 | African hunting dog 277 | hyena 278 | red fox 279 | kit fox 280 | Arctic fox 281 | grey fox 282 | tabby 283 | tiger cat 284 | Persian cat 285 | Siamese cat 286 | Egyptian cat 287 | cougar 288 | lynx 289 | leopard 290 | snow leopard 291 | jaguar 292 | lion 293 | tiger 294 | cheetah 295 | brown bear 296 | American black bear 297 | ice bear 298 | sloth bear 299 | mongoose 300 | meerkat 301 | tiger beetle 302 | ladybug 303 | ground beetle 304 | long-horned beetle 305 | leaf beetle 306 | dung beetle 307 | rhinoceros beetle 308 | weevil 309 | fly 310 | bee 311 | ant 312 | grasshopper 313 | cricket 314 | walking stick 315 | cockroach 316 | mantis 317 | cicada 318 | leafhopper 319 | lacewing 320 | dragonfly 321 | damselfly 322 | admiral 323 | ringlet 324 | monarch 325 | cabbage butterfly 326 | sulphur butterfly 327 | lycaenid 328 | starfish 329 | sea urchin 330 | sea cucumber 331 | wood rabbit 332 | hare 333 | Angora 334 | hamster 335 | porcupine 336 | fox squirrel 337 | marmot 338 | beaver 339 | guinea pig 340 | sorrel 341 | zebra 342 | hog 343 | wild boar 344 | warthog 345 | hippopotamus 346 | ox 347 | water buffalo 348 | bison 349 | ram 350 | bighorn 351 | ibex 352 | hartebeest 353 | impala 354 | gazelle 355 | Arabian camel 356 | llama 357 | weasel 358 | mink 359 | polecat 360 | black-footed ferret 361 | otter 362 | skunk 363 | badger 364 | armadillo 365 | three-toed sloth 366 | orangutan 367 | gorilla 368 | chimpanzee 369 | gibbon 370 | siamang 371 | guenon 372 | patas 373 | baboon 374 | macaque 375 | langur 376 | colobus 377 | proboscis monkey 378 | marmoset 379 | capuchin 380 | howler monkey 381 | titi 382 | spider monkey 383 | squirrel monkey 384 | Madagascar cat 385 | indri 386 | Indian elephant 387 | African elephant 388 | lesser panda 389 | giant panda 390 | barracouta 391 | eel 392 | coho 393 | rock beauty 394 | anemone fish 395 | sturgeon 396 | gar 397 | lionfish 398 | puffer 399 | abacus 400 | abaya 401 | academic gown 402 | accordion 403 | acoustic guitar 404 | aircraft carrier 405 | airliner 406 | airship 407 | altar 408 | ambulance 409 | amphibian 410 | analog clock 411 | apiary 412 | apron 413 | ashcan 414 | assault rifle 415 | backpack 416 | bakery 417 | balance beam 418 | balloon 419 | ballpoint 420 | Band Aid 421 | banjo 422 | bannister 423 | barbell 424 | barber chair 425 | barbershop 426 | barn 427 | barometer 428 | barrel 429 | barrow 430 | baseball 431 | basketball 432 | bassinet 433 | bassoon 434 | bathing cap 435 | bath towel 436 | bathtub 437 | beach wagon 438 | beacon 439 | beaker 440 | bearskin 441 | beer bottle 442 | beer glass 443 | bell cote 444 | bib 445 | bicycle-built-for-two 446 | bikini 447 | binder 448 | binoculars 449 | birdhouse 450 | boathouse 451 | bobsled 452 | bolo tie 453 | bonnet 454 | bookcase 455 | bookshop 456 | bottlecap 457 | bow 458 | bow tie 459 | brass 460 | brassiere 461 | breakwater 462 | breastplate 463 | broom 464 | bucket 465 | buckle 466 | bulletproof vest 467 | bullet train 468 | butcher shop 469 | cab 470 | caldron 471 | candle 472 | cannon 473 | canoe 474 | can opener 475 | cardigan 476 | car mirror 477 | carousel 478 | carpenter's kit 479 | carton 480 | car wheel 481 | cash machine 482 | cassette 483 | cassette player 484 | castle 485 | catamaran 486 | CD player 487 | cello 488 | cellular telephone 489 | chain 490 | chainlink fence 491 | chain mail 492 | chain saw 493 | chest 494 | chiffonier 495 | chime 496 | china cabinet 497 | Christmas stocking 498 | church 499 | cinema 500 | cleaver 501 | cliff dwelling 502 | cloak 503 | clog 504 | cocktail shaker 505 | coffee mug 506 | coffeepot 507 | coil 508 | combination lock 509 | computer keyboard 510 | confectionery 511 | container ship 512 | convertible 513 | corkscrew 514 | cornet 515 | cowboy boot 516 | cowboy hat 517 | cradle 518 | crane 519 | crash helmet 520 | crate 521 | crib 522 | Crock Pot 523 | croquet ball 524 | crutch 525 | cuirass 526 | dam 527 | desk 528 | desktop computer 529 | dial telephone 530 | diaper 531 | digital clock 532 | digital watch 533 | dining table 534 | dishrag 535 | dishwasher 536 | disk brake 537 | dock 538 | dogsled 539 | dome 540 | doormat 541 | drilling platform 542 | drum 543 | drumstick 544 | dumbbell 545 | Dutch oven 546 | electric fan 547 | electric guitar 548 | electric locomotive 549 | entertainment center 550 | envelope 551 | espresso maker 552 | face powder 553 | feather boa 554 | file 555 | fireboat 556 | fire engine 557 | fire screen 558 | flagpole 559 | flute 560 | folding chair 561 | football helmet 562 | forklift 563 | fountain 564 | fountain pen 565 | four-poster 566 | freight car 567 | French horn 568 | frying pan 569 | fur coat 570 | garbage truck 571 | gasmask 572 | gas pump 573 | goblet 574 | go-kart 575 | golf ball 576 | golfcart 577 | gondola 578 | gong 579 | gown 580 | grand piano 581 | greenhouse 582 | grille 583 | grocery store 584 | guillotine 585 | hair slide 586 | hair spray 587 | half track 588 | hammer 589 | hamper 590 | hand blower 591 | hand-held computer 592 | handkerchief 593 | hard disc 594 | harmonica 595 | harp 596 | harvester 597 | hatchet 598 | holster 599 | home theater 600 | honeycomb 601 | hook 602 | hoopskirt 603 | horizontal bar 604 | horse cart 605 | hourglass 606 | iPod 607 | iron 608 | jack-o'-lantern 609 | jean 610 | jeep 611 | jersey 612 | jigsaw puzzle 613 | jinrikisha 614 | joystick 615 | kimono 616 | knee pad 617 | knot 618 | lab coat 619 | ladle 620 | lampshade 621 | laptop 622 | lawn mower 623 | lens cap 624 | letter opener 625 | library 626 | lifeboat 627 | lighter 628 | limousine 629 | liner 630 | lipstick 631 | Loafer 632 | lotion 633 | loudspeaker 634 | loupe 635 | lumbermill 636 | magnetic compass 637 | mailbag 638 | mailbox 639 | maillot 640 | maillot 641 | manhole cover 642 | maraca 643 | marimba 644 | mask 645 | matchstick 646 | maypole 647 | maze 648 | measuring cup 649 | medicine chest 650 | megalith 651 | microphone 652 | microwave 653 | military uniform 654 | milk can 655 | minibus 656 | miniskirt 657 | minivan 658 | missile 659 | mitten 660 | mixing bowl 661 | mobile home 662 | Model T 663 | modem 664 | monastery 665 | monitor 666 | moped 667 | mortar 668 | mortarboard 669 | mosque 670 | mosquito net 671 | motor scooter 672 | mountain bike 673 | mountain tent 674 | mouse 675 | mousetrap 676 | moving van 677 | muzzle 678 | nail 679 | neck brace 680 | necklace 681 | nipple 682 | notebook 683 | obelisk 684 | oboe 685 | ocarina 686 | odometer 687 | oil filter 688 | organ 689 | oscilloscope 690 | overskirt 691 | oxcart 692 | oxygen mask 693 | packet 694 | paddle 695 | paddlewheel 696 | padlock 697 | paintbrush 698 | pajama 699 | palace 700 | panpipe 701 | paper towel 702 | parachute 703 | parallel bars 704 | park bench 705 | parking meter 706 | passenger car 707 | patio 708 | pay-phone 709 | pedestal 710 | pencil box 711 | pencil sharpener 712 | perfume 713 | Petri dish 714 | photocopier 715 | pick 716 | pickelhaube 717 | picket fence 718 | pickup 719 | pier 720 | piggy bank 721 | pill bottle 722 | pillow 723 | ping-pong ball 724 | pinwheel 725 | pirate 726 | pitcher 727 | plane 728 | planetarium 729 | plastic bag 730 | plate rack 731 | plow 732 | plunger 733 | Polaroid camera 734 | pole 735 | police van 736 | poncho 737 | pool table 738 | pop bottle 739 | pot 740 | potter's wheel 741 | power drill 742 | prayer rug 743 | printer 744 | prison 745 | projectile 746 | projector 747 | puck 748 | punching bag 749 | purse 750 | quill 751 | quilt 752 | racer 753 | racket 754 | radiator 755 | radio 756 | radio telescope 757 | rain barrel 758 | recreational vehicle 759 | reel 760 | reflex camera 761 | refrigerator 762 | remote control 763 | restaurant 764 | revolver 765 | rifle 766 | rocking chair 767 | rotisserie 768 | rubber eraser 769 | rugby ball 770 | rule 771 | running shoe 772 | safe 773 | safety pin 774 | saltshaker 775 | sandal 776 | sarong 777 | sax 778 | scabbard 779 | scale 780 | school bus 781 | schooner 782 | scoreboard 783 | screen 784 | screw 785 | screwdriver 786 | seat belt 787 | sewing machine 788 | shield 789 | shoe shop 790 | shoji 791 | shopping basket 792 | shopping cart 793 | shovel 794 | shower cap 795 | shower curtain 796 | ski 797 | ski mask 798 | sleeping bag 799 | slide rule 800 | sliding door 801 | slot 802 | snorkel 803 | snowmobile 804 | snowplow 805 | soap dispenser 806 | soccer ball 807 | sock 808 | solar dish 809 | sombrero 810 | soup bowl 811 | space bar 812 | space heater 813 | space shuttle 814 | spatula 815 | speedboat 816 | spider web 817 | spindle 818 | sports car 819 | spotlight 820 | stage 821 | steam locomotive 822 | steel arch bridge 823 | steel drum 824 | stethoscope 825 | stole 826 | stone wall 827 | stopwatch 828 | stove 829 | strainer 830 | streetcar 831 | stretcher 832 | studio couch 833 | stupa 834 | submarine 835 | suit 836 | sundial 837 | sunglass 838 | sunglasses 839 | sunscreen 840 | suspension bridge 841 | swab 842 | sweatshirt 843 | swimming trunks 844 | swing 845 | switch 846 | syringe 847 | table lamp 848 | tank 849 | tape player 850 | teapot 851 | teddy 852 | television 853 | tennis ball 854 | thatch 855 | theater curtain 856 | thimble 857 | thresher 858 | throne 859 | tile roof 860 | toaster 861 | tobacco shop 862 | toilet seat 863 | torch 864 | totem pole 865 | tow truck 866 | toyshop 867 | tractor 868 | trailer truck 869 | tray 870 | trench coat 871 | tricycle 872 | trimaran 873 | tripod 874 | triumphal arch 875 | trolleybus 876 | trombone 877 | tub 878 | turnstile 879 | typewriter keyboard 880 | umbrella 881 | unicycle 882 | upright 883 | vacuum 884 | vase 885 | vault 886 | velvet 887 | vending machine 888 | vestment 889 | viaduct 890 | violin 891 | volleyball 892 | waffle iron 893 | wall clock 894 | wallet 895 | wardrobe 896 | warplane 897 | washbasin 898 | washer 899 | water bottle 900 | water jug 901 | water tower 902 | whiskey jug 903 | whistle 904 | wig 905 | window screen 906 | window shade 907 | Windsor tie 908 | wine bottle 909 | wing 910 | wok 911 | wooden spoon 912 | wool 913 | worm fence 914 | wreck 915 | yawl 916 | yurt 917 | web site 918 | comic book 919 | crossword puzzle 920 | street sign 921 | traffic light 922 | book jacket 923 | menu 924 | plate 925 | guacamole 926 | consomme 927 | hot pot 928 | trifle 929 | ice cream 930 | ice lolly 931 | French loaf 932 | bagel 933 | pretzel 934 | cheeseburger 935 | hotdog 936 | mashed potato 937 | head cabbage 938 | broccoli 939 | cauliflower 940 | zucchini 941 | spaghetti squash 942 | acorn squash 943 | butternut squash 944 | cucumber 945 | artichoke 946 | bell pepper 947 | cardoon 948 | mushroom 949 | Granny Smith 950 | strawberry 951 | orange 952 | lemon 953 | fig 954 | pineapple 955 | banana 956 | jackfruit 957 | custard apple 958 | pomegranate 959 | hay 960 | carbonara 961 | chocolate sauce 962 | dough 963 | meat loaf 964 | pizza 965 | potpie 966 | burrito 967 | red wine 968 | espresso 969 | cup 970 | eggnog 971 | alp 972 | bubble 973 | cliff 974 | coral reef 975 | geyser 976 | lakeside 977 | promontory 978 | sandbar 979 | seashore 980 | valley 981 | volcano 982 | ballplayer 983 | groom 984 | scuba diver 985 | rapeseed 986 | daisy 987 | yellow lady's slipper 988 | corn 989 | acorn 990 | hip 991 | buckeye 992 | coral fungus 993 | agaric 994 | gyromitra 995 | stinkhorn 996 | earthstar 997 | hen-of-the-woods 998 | bolete 999 | ear 1000 | toilet tissue -------------------------------------------------------------------------------- /my_function.py: -------------------------------------------------------------------------------- 1 | # Python program to execute 2 | def my_function(): 3 | print ("I am making cookies with cookie cutters,") 4 | 5 | # Test it 6 | my_function() -------------------------------------------------------------------------------- /pic/tesla.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataman-git/codes_for_articles/5c34840dbefe93bf9a6de9ef0b90ba3ddce9ab45/pic/tesla.png -------------------------------------------------------------------------------- /sample.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataman-git/codes_for_articles/5c34840dbefe93bf9a6de9ef0b90ba3ddce9ab45/sample.mp3 -------------------------------------------------------------------------------- /stock.py: -------------------------------------------------------------------------------- 1 | # !pip install streamlit 2 | import streamlit as st 3 | import pandas as pd 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import yfinance as yf # https://pypi.org/project/yfinance/ 7 | from ta.volatility import BollingerBands 8 | from ta.trend import MACD 9 | from ta.momentum import RSIIndicator 10 | 11 | ################## 12 | # Set up sidebar # 13 | ################## 14 | 15 | # Add in location to select image. 16 | 17 | option = st.sidebar.selectbox('Select one symbol', ( 'AAPL', 'MSFT',"SPY",'WMT')) 18 | 19 | 20 | import datetime 21 | 22 | today = datetime.date.today() 23 | before = today - datetime.timedelta(days=700) 24 | start_date = st.sidebar.date_input('Start date', before) 25 | end_date = st.sidebar.date_input('End date', today) 26 | if start_date < end_date: 27 | st.sidebar.success('Start date: `%s`\n\nEnd date:`%s`' % (start_date, end_date)) 28 | else: 29 | st.sidebar.error('Error: End date must fall after start date.') 30 | 31 | 32 | ############## 33 | # Stock data # 34 | ############## 35 | 36 | # https://technical-analysis-library-in-python.readthedocs.io/en/latest/ta.html#momentum-indicators 37 | 38 | df = yf.download(option,start= start_date,end= end_date, progress=False) 39 | 40 | indicator_bb = BollingerBands(df['Close']) 41 | 42 | bb = df 43 | bb['bb_h'] = indicator_bb.bollinger_hband() 44 | bb['bb_l'] = indicator_bb.bollinger_lband() 45 | bb = bb[['Close','bb_h','bb_l']] 46 | 47 | macd = MACD(df['Close']).macd() 48 | 49 | rsi = RSIIndicator(df['Close']).rsi() 50 | 51 | 52 | ################### 53 | # Set up main app # 54 | ################### 55 | 56 | st.write('Stock Bollinger Bands') 57 | 58 | st.line_chart(bb) 59 | 60 | progress_bar = st.progress(0) 61 | 62 | # https://share.streamlit.io/daniellewisdl/streamlit-cheat-sheet/app.py 63 | 64 | st.write('Stock Moving Average Convergence Divergence (MACD)') 65 | st.area_chart(macd) 66 | 67 | st.write('Stock RSI ') 68 | st.line_chart(rsi) 69 | 70 | 71 | st.write('Recent data ') 72 | st.dataframe(df.tail(10)) 73 | 74 | 75 | ################ 76 | # Download csv # 77 | ################ 78 | 79 | import base64 80 | from io import BytesIO 81 | 82 | def to_excel(df): 83 | output = BytesIO() 84 | writer = pd.ExcelWriter(output, engine='xlsxwriter') 85 | df.to_excel(writer, sheet_name='Sheet1') 86 | writer.save() 87 | processed_data = output.getvalue() 88 | return processed_data 89 | 90 | def get_table_download_link(df): 91 | """Generates a link allowing the data in a given panda dataframe to be downloaded 92 | in: dataframe 93 | out: href string 94 | """ 95 | val = to_excel(df) 96 | b64 = base64.b64encode(val) # val looks like b'...' 97 | return f'Download excel file' # decode b'abc' => abc 98 | 99 | st.markdown(get_table_download_link(df), unsafe_allow_html=True) 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | -------------------------------------------------------------------------------- /stock2.py: -------------------------------------------------------------------------------- 1 | # !pip3 install streamlit 2 | import streamlit as st 3 | import pandas as pd 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import yfinance as yf # https://pypi.org/project/yfinance/ 7 | 8 | ############################## 9 | # Technical Analysis Classes # 10 | ############################## 11 | 12 | # https://github.com/bukosabino/ta/blob/master/ta/utils.py 13 | class IndicatorMixin: 14 | """Util mixin indicator class""" 15 | 16 | _fillna = False 17 | 18 | def _check_fillna(self, series: pd.Series, value: int = 0) -> pd.Series: 19 | """Check if fillna flag is True. 20 | Args: 21 | series(pandas.Series): dataset 'Close' column. 22 | value(int): value to fill gaps; if -1 fill values using 'backfill' mode. 23 | Returns: 24 | pandas.Series: New feature generated. 25 | """ 26 | if self._fillna: 27 | series_output = series.copy(deep=False) 28 | series_output = series_output.replace([np.inf, -np.inf], np.nan) 29 | if isinstance(value, int) and value == -1: 30 | series = series_output.fillna(method="ffill").fillna(value=-1) 31 | else: 32 | series = series_output.fillna(method="ffill").fillna(value) 33 | return series 34 | 35 | @staticmethod 36 | def _true_range( 37 | high: pd.Series, low: pd.Series, prev_close: pd.Series 38 | ) -> pd.Series: 39 | tr1 = high - low 40 | tr2 = (high - prev_close).abs() 41 | tr3 = (low - prev_close).abs() 42 | true_range = pd.DataFrame(data={"tr1": tr1, "tr2": tr2, "tr3": tr3}).max(axis=1) 43 | return true_range 44 | 45 | 46 | def dropna(df: pd.DataFrame) -> pd.DataFrame: 47 | """Drop rows with "Nans" values""" 48 | df = df.copy() 49 | number_cols = df.select_dtypes("number").columns.to_list() 50 | df[number_cols] = df[number_cols][df[number_cols] < math.exp(709)] # big number 51 | df[number_cols] = df[number_cols][df[number_cols] != 0.0] 52 | df = df.dropna() 53 | return df 54 | 55 | 56 | def _sma(series, periods: int, fillna: bool = False): 57 | min_periods = 0 if fillna else periods 58 | return series.rolling(window=periods, min_periods=min_periods).mean() 59 | 60 | 61 | def _ema(series, periods, fillna=False): 62 | min_periods = 0 if fillna else periods 63 | return series.ewm(span=periods, min_periods=min_periods, adjust=False).mean() 64 | 65 | 66 | def _get_min_max(series1: pd.Series, series2: pd.Series, function: str = "min"): 67 | """Find min or max value between two lists for each index""" 68 | series1 = np.array(series1) 69 | series2 = np.array(series2) 70 | if function == "min": 71 | output = np.amin([series1, series2], axis=0) 72 | elif function == "max": 73 | output = np.amax([series1, series2], axis=0) 74 | else: 75 | raise ValueError('"f" variable value should be "min" or "max"') 76 | 77 | return pd.Series(output) 78 | 79 | 80 | # https://github.com/bukosabino/ta/blob/master/ta/volatility.py 81 | class BollingerBands(IndicatorMixin): 82 | """Bollinger Bands 83 | https://school.stockcharts.com/doku.php?id=technical_indicators:bollinger_bands 84 | Args: 85 | close(pandas.Series): dataset 'Close' column. 86 | window(int): n period. 87 | window_dev(int): n factor standard deviation 88 | fillna(bool): if True, fill nan values. 89 | """ 90 | 91 | def __init__( 92 | self, 93 | close: pd.Series, 94 | window: int = 20, 95 | window_dev: int = 2, 96 | fillna: bool = False, 97 | ): 98 | self._close = close 99 | self._window = window 100 | self._window_dev = window_dev 101 | self._fillna = fillna 102 | self._run() 103 | 104 | def _run(self): 105 | min_periods = 0 if self._fillna else self._window 106 | self._mavg = self._close.rolling(self._window, min_periods=min_periods).mean() 107 | self._mstd = self._close.rolling(self._window, min_periods=min_periods).std( 108 | ddof=0 109 | ) 110 | self._hband = self._mavg + self._window_dev * self._mstd 111 | self._lband = self._mavg - self._window_dev * self._mstd 112 | 113 | def bollinger_mavg(self) -> pd.Series: 114 | """Bollinger Channel Middle Band 115 | Returns: 116 | pandas.Series: New feature generated. 117 | """ 118 | mavg = self._check_fillna(self._mavg, value=-1) 119 | return pd.Series(mavg, name="mavg") 120 | 121 | def bollinger_hband(self) -> pd.Series: 122 | """Bollinger Channel High Band 123 | Returns: 124 | pandas.Series: New feature generated. 125 | """ 126 | hband = self._check_fillna(self._hband, value=-1) 127 | return pd.Series(hband, name="hband") 128 | 129 | def bollinger_lband(self) -> pd.Series: 130 | """Bollinger Channel Low Band 131 | Returns: 132 | pandas.Series: New feature generated. 133 | """ 134 | lband = self._check_fillna(self._lband, value=-1) 135 | return pd.Series(lband, name="lband") 136 | 137 | def bollinger_wband(self) -> pd.Series: 138 | """Bollinger Channel Band Width 139 | From: https://school.stockcharts.com/doku.php?id=technical_indicators:bollinger_band_width 140 | Returns: 141 | pandas.Series: New feature generated. 142 | """ 143 | wband = ((self._hband - self._lband) / self._mavg) * 100 144 | wband = self._check_fillna(wband, value=0) 145 | return pd.Series(wband, name="bbiwband") 146 | 147 | def bollinger_pband(self) -> pd.Series: 148 | """Bollinger Channel Percentage Band 149 | From: https://school.stockcharts.com/doku.php?id=technical_indicators:bollinger_band_perce 150 | Returns: 151 | pandas.Series: New feature generated. 152 | """ 153 | pband = (self._close - self._lband) / (self._hband - self._lband) 154 | pband = self._check_fillna(pband, value=0) 155 | return pd.Series(pband, name="bbipband") 156 | 157 | def bollinger_hband_indicator(self) -> pd.Series: 158 | """Bollinger Channel Indicator Crossing High Band (binary). 159 | It returns 1, if close is higher than bollinger_hband. Else, it returns 0. 160 | Returns: 161 | pandas.Series: New feature generated. 162 | """ 163 | hband = pd.Series( 164 | np.where(self._close > self._hband, 1.0, 0.0), index=self._close.index 165 | ) 166 | hband = self._check_fillna(hband, value=0) 167 | return pd.Series(hband, index=self._close.index, name="bbihband") 168 | 169 | def bollinger_lband_indicator(self) -> pd.Series: 170 | """Bollinger Channel Indicator Crossing Low Band (binary). 171 | It returns 1, if close is lower than bollinger_lband. Else, it returns 0. 172 | Returns: 173 | pandas.Series: New feature generated. 174 | """ 175 | lband = pd.Series( 176 | np.where(self._close < self._lband, 1.0, 0.0), index=self._close.index 177 | ) 178 | lband = self._check_fillna(lband, value=0) 179 | return pd.Series(lband, name="bbilband") 180 | 181 | # https://github.com/bukosabino/ta/blob/master/ta/momentum.py 182 | class RSIIndicator(IndicatorMixin): 183 | """Relative Strength Index (RSI) 184 | Compares the magnitude of recent gains and losses over a specified time 185 | period to measure speed and change of price movements of a security. It is 186 | primarily used to attempt to identify overbought or oversold conditions in 187 | the trading of an asset. 188 | https://www.investopedia.com/terms/r/rsi.asp 189 | Args: 190 | close(pandas.Series): dataset 'Close' column. 191 | window(int): n period. 192 | fillna(bool): if True, fill nan values. 193 | """ 194 | 195 | def __init__(self, close: pd.Series, window: int = 14, fillna: bool = False): 196 | self._close = close 197 | self._window = window 198 | self._fillna = fillna 199 | self._run() 200 | 201 | def _run(self): 202 | diff = self._close.diff(1) 203 | up_direction = diff.where(diff > 0, 0.0) 204 | down_direction = -diff.where(diff < 0, 0.0) 205 | min_periods = 0 if self._fillna else self._window 206 | emaup = up_direction.ewm( 207 | alpha=1 / self._window, min_periods=min_periods, adjust=False 208 | ).mean() 209 | emadn = down_direction.ewm( 210 | alpha=1 / self._window, min_periods=min_periods, adjust=False 211 | ).mean() 212 | relative_strength = emaup / emadn 213 | self._rsi = pd.Series( 214 | np.where(emadn == 0, 100, 100 - (100 / (1 + relative_strength))), 215 | index=self._close.index, 216 | ) 217 | 218 | def rsi(self) -> pd.Series: 219 | """Relative Strength Index (RSI) 220 | Returns: 221 | pandas.Series: New feature generated. 222 | """ 223 | rsi_series = self._check_fillna(self._rsi, value=50) 224 | return pd.Series(rsi_series, name="rsi") 225 | 226 | # https://github.com/bukosabino/ta/blob/master/ta/trend.py 227 | class MACD(IndicatorMixin): 228 | """Moving Average Convergence Divergence (MACD) 229 | Is a trend-following momentum indicator that shows the relationship between 230 | two moving averages of prices. 231 | https://school.stockcharts.com/doku.php?id=technical_indicators:moving_average_convergence_divergence_macd 232 | Args: 233 | close(pandas.Series): dataset 'Close' column. 234 | window_fast(int): n period short-term. 235 | window_slow(int): n period long-term. 236 | window_sign(int): n period to signal. 237 | fillna(bool): if True, fill nan values. 238 | """ 239 | 240 | def __init__( 241 | self, 242 | close: pd.Series, 243 | window_slow: int = 26, 244 | window_fast: int = 12, 245 | window_sign: int = 9, 246 | fillna: bool = False, 247 | ): 248 | self._close = close 249 | self._window_slow = window_slow 250 | self._window_fast = window_fast 251 | self._window_sign = window_sign 252 | self._fillna = fillna 253 | self._run() 254 | 255 | def _run(self): 256 | self._emafast = _ema(self._close, self._window_fast, self._fillna) 257 | self._emaslow = _ema(self._close, self._window_slow, self._fillna) 258 | self._macd = self._emafast - self._emaslow 259 | self._macd_signal = _ema(self._macd, self._window_sign, self._fillna) 260 | self._macd_diff = self._macd - self._macd_signal 261 | 262 | def macd(self) -> pd.Series: 263 | """MACD Line 264 | Returns: 265 | pandas.Series: New feature generated. 266 | """ 267 | macd_series = self._check_fillna(self._macd, value=0) 268 | return pd.Series( 269 | macd_series, name=f"MACD_{self._window_fast}_{self._window_slow}" 270 | ) 271 | 272 | def macd_signal(self) -> pd.Series: 273 | """Signal Line 274 | Returns: 275 | pandas.Series: New feature generated. 276 | """ 277 | 278 | macd_signal_series = self._check_fillna(self._macd_signal, value=0) 279 | return pd.Series( 280 | macd_signal_series, 281 | name=f"MACD_sign_{self._window_fast}_{self._window_slow}", 282 | ) 283 | 284 | def macd_diff(self) -> pd.Series: 285 | """MACD Histogram 286 | Returns: 287 | pandas.Series: New feature generated. 288 | """ 289 | macd_diff_series = self._check_fillna(self._macd_diff, value=0) 290 | return pd.Series( 291 | macd_diff_series, name=f"MACD_diff_{self._window_fast}_{self._window_slow}" 292 | ) 293 | 294 | ################## 295 | # Set up sidebar # 296 | ################## 297 | 298 | # Add in location to select image. 299 | 300 | option = st.sidebar.selectbox('Select one symbol', ( 'AAPL', 'MSFT',"SPY",'WMT')) 301 | 302 | 303 | import datetime 304 | 305 | today = datetime.date.today() 306 | before = today - datetime.timedelta(days=700) 307 | start_date = st.sidebar.date_input('Start date', before) 308 | end_date = st.sidebar.date_input('End date', today) 309 | if start_date < end_date: 310 | st.sidebar.success('Start date: `%s`\n\nEnd date:`%s`' % (start_date, end_date)) 311 | else: 312 | st.sidebar.error('Error: End date must fall after start date.') 313 | 314 | 315 | ############## 316 | # Stock data # 317 | ############## 318 | 319 | # https://technical-analysis-library-in-python.readthedocs.io/en/latest/ta.html#momentum-indicators 320 | 321 | df = yf.download(option,start= start_date,end= end_date, progress=False) 322 | 323 | indicator_bb = BollingerBands(df['Close']) 324 | 325 | bb = df 326 | bb['bb_h'] = indicator_bb.bollinger_hband() 327 | bb['bb_l'] = indicator_bb.bollinger_lband() 328 | bb = bb[['Close','bb_h','bb_l']] 329 | 330 | macd = MACD(df['Close']).macd() 331 | 332 | rsi = RSIIndicator(df['Close']).rsi() 333 | 334 | 335 | ################### 336 | # Set up main app # 337 | ################### 338 | 339 | 340 | #st.write('Stock Bollinger Bands') 341 | 342 | st.line_chart(bb) 343 | 344 | progress_bar = st.progress(0) 345 | 346 | # https://share.streamlit.io/daniellewisdl/streamlit-cheat-sheet/app.py 347 | 348 | st.write('Stock Moving Average Convergence Divergence (MACD)') 349 | st.area_chart(macd) 350 | 351 | st.write('Stock RSI ') 352 | st.line_chart(rsi) 353 | 354 | 355 | st.write('Recent data ') 356 | st.dataframe(df.tail(10)) 357 | 358 | ################ 359 | # Download csv # 360 | ################ 361 | 362 | import base64 363 | from io import BytesIO 364 | 365 | def to_excel(df): 366 | output = BytesIO() 367 | writer = pd.ExcelWriter(output, engine='xlsxwriter') 368 | df.to_excel(writer, sheet_name='Sheet1') 369 | writer.save() 370 | processed_data = output.getvalue() 371 | return processed_data 372 | 373 | def get_table_download_link(df): 374 | """Generates a link allowing the data in a given panda dataframe to be downloaded 375 | in: dataframe 376 | out: href string 377 | """ 378 | val = to_excel(df) 379 | b64 = base64.b64encode(val) # val looks like b'...' 380 | return f'Download excel file' # decode b'abc' => abc 381 | 382 | st.markdown(get_table_download_link(df), unsafe_allow_html=True) 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | -------------------------------------------------------------------------------- /streamlit_model_performance.py: -------------------------------------------------------------------------------- 1 | # !pip install streamlit 2 | import streamlit as st 3 | import pandas as pd 4 | import numpy as np 5 | import seaborn as sns 6 | from datetime import datetime 7 | import matplotlib.pyplot as plt 8 | 9 | # Create some data 10 | df = pd.DataFrame({'state_CA':np.random.randint(low = 40, high=60, size=15), 11 | 'state_TX':np.random.randint(low = 20, high=30, size=15), 12 | 'state_NY':np.random.randint(low = 10, high=15, size=15), 13 | 'volume':np.random.randint(low = 300, high=350, size=15), 14 | 'pred':np.random.randint(low = 60, high=99, size=15), 15 | 'TP':np.random.randint(low = 68, high=75, size=15), 16 | 'TN':np.random.randint(low = 15, high=20, size=15), 17 | 'FP':np.random.randint(low = 3, high=5, size=15) 18 | }, 19 | index=pd.date_range(start="2019-01-01",end="2020-03-31", freq='M')) 20 | df['FN'] = 100 - df['TP'] - df['TN'] - df['FP'] 21 | df.index = df.index.strftime('%Y-%m-%d') 22 | 23 | ################## 24 | # Plots # 25 | ################## 26 | # Plot 1 # 27 | fig1, ax1 = plt.subplots(figsize=(6,4)) 28 | s1 = sns.barplot(x = df.index, y = 'state_CA', data = df, color = 'red',label='state_CA',ax=ax1) 29 | s2 = sns.barplot(x = df.index, y = 'state_TX', data = df, color = 'blue',label='state_TX',ax=ax1) 30 | s3 = sns.barplot(x = df.index, y = 'state_NY', data = df, color = 'green',label='state_NY',ax=ax1) 31 | plt.ylim(0,70) 32 | z, _ = plt.xticks(rotation=90) 33 | plt.legend(ncol=3, loc='best', frameon=True) 34 | 35 | # plot 2 36 | fig2, ax2 = plt.subplots(figsize=(6,4)) 37 | sns.set(style="whitegrid") 38 | sns.barplot(x=df.index, y="volume", color="b", data=df, ax=ax2) 39 | z, _ = plt.xticks(rotation=90) 40 | 41 | 42 | # Plot 3 43 | fig3, ax3 = plt.subplots(figsize=(6,4)) 44 | sns.set(style="whitegrid") 45 | sns.lineplot(x=df.index, y="pred", color="b", data=df, ax=ax3) 46 | plt.ylim(0,100) 47 | z, _ = plt.xticks(rotation=90) 48 | 49 | # Plot 4 50 | fig4, ax4 = plt.subplots(figsize=(6,4)) 51 | t1 = sns.lineplot(x = df.index, y = 'TP', data = df, color = 'red',label='True Positive',ax=ax4) 52 | t2 = sns.lineplot(x = df.index, y = 'TN', data = df, color = 'blue',label='True Negative',ax=ax4) 53 | t3 = sns.lineplot(x = df.index, y = 'FP', data = df, color = 'green',label='False Positive',ax=ax4) 54 | t4 = sns.lineplot(x = df.index, y = 'FN', data = df, color = 'black',label='False Negative',ax=ax4) 55 | plt.ylim(0,100) 56 | z, _ = plt.xticks(rotation=90) 57 | plt.legend(ncol=2, loc='best', frameon=True) 58 | 59 | ################## 60 | # Set up sidebar # 61 | ################## 62 | 63 | option = st.sidebar.write('More functions') 64 | 65 | ################### 66 | # Set up main app # 67 | ################### 68 | 69 | col1, col2 = st.beta_columns(2) 70 | col1.header("State") 71 | col1.write(fig1) 72 | col2.header("Volume") 73 | col2.write(fig2) 74 | 75 | col3, col4 = st.beta_columns(2) 76 | col3.header("Prediction") 77 | col3.write(fig3) 78 | col4.header("Confusion Matrix") 79 | col4.write(fig4) 80 | -------------------------------------------------------------------------------- /use_NotCoolYet.py: -------------------------------------------------------------------------------- 1 | # 2 | import NotCoolYet 3 | 4 | greatCookie = NotCoolYet.cookie(20,'Frosting') 5 | 6 | # smallCookie 7 | print("My great cookie is a " + greatCookie.theFlavor + " cookie.") 8 | print("It's area is: ") 9 | print(greatCookie.area()) 10 | print("And it's perimeter is: ") 11 | print(greatCookie.perimeter()) 12 | -------------------------------------------------------------------------------- /use_VeryCool.py: -------------------------------------------------------------------------------- 1 | # 2 | import VeryCool 3 | 4 | greatCookie = VeryCool.cookie(20,'Frosting') 5 | 6 | # smallCookie 7 | print("My great cookie is a " + greatCookie.theFlavor + " cookie.") 8 | print("It's area is: ") 9 | print(greatCookie.area()) 10 | print("And it's perimeter is: ") 11 | print(greatCookie.perimeter()) 12 | -------------------------------------------------------------------------------- /voice-change-Ivy.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataman-git/codes_for_articles/5c34840dbefe93bf9a6de9ef0b90ba3ddce9ab45/voice-change-Ivy.mp3 -------------------------------------------------------------------------------- /voice-change-Joanna.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataman-git/codes_for_articles/5c34840dbefe93bf9a6de9ef0b90ba3ddce9ab45/voice-change-Joanna.mp3 -------------------------------------------------------------------------------- /wavenet_github.py: -------------------------------------------------------------------------------- 1 | ## Text-to-speech Wavenet 2 | 3 | 4 | import os 5 | os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'YOUR_JSON_FILE.json' 6 | 7 | def synthesize_ssml(ssml, 8 | booktitle, 9 | language_code, 10 | voice_name, 11 | speaking_rate 12 | ): 13 | """Synthesizes speech from the input string of ssml. 14 | 15 | Note: ssml must be well-formed according to: 16 | https://www.w3.org/TR/speech-synthesis/ 17 | 18 | Example: Hello there. 19 | """ 20 | from google.cloud import texttospeech 21 | 22 | client = texttospeech.TextToSpeechClient() 23 | 24 | input_text = texttospeech.SynthesisInput(ssml=ssml) 25 | 26 | # Note: the voice can also be specified by name. 27 | # Names of voices can be retrieved with client.list_voices(). 28 | voice = texttospeech.VoiceSelectionParams( 29 | language_code=language_code, 30 | name=voice_name 31 | ) 32 | 33 | audio_config = texttospeech.AudioConfig( 34 | audio_encoding=texttospeech.AudioEncoding.MP3, 35 | speaking_rate = speaking_rate 36 | ) 37 | 38 | response = client.synthesize_speech( 39 | input=input_text, voice=voice, audio_config=audio_config 40 | ) 41 | 42 | # The response's audio_content is binary. 43 | with open("C:/Users/datas/Downloads/TTS/output/"+booktitle+voice_name+str(speaking_rate)+".mp3", "wb") as out: 44 | out.write(response.audio_content) 45 | 46 | 47 | 48 | # Get the text 49 | import json 50 | folder_path = 'C:/Users/datas/Downloads/TTS/doc/' 51 | booktitle = "Dickens_A_Tale_of_Two_Cities" 52 | filename = folder_path + booktitle + ".txt" 53 | print(filename) 54 | with open(filename, 'r') as f: 55 | text = f.read() 56 | f.close() 57 | print(text) 58 | 59 | Emily = synthesize_ssml(text, 60 | booktitle, 61 | language_code = "en-US", 62 | voice_name = "en-US-Wavenet-E", 63 | speaking_rate = 0.8 64 | ) 65 | 66 | Frances = synthesize_ssml(text, 67 | booktitle, 68 | language_code = "en-US", 69 | voice_name = "en-US-Wavenet-F", 70 | speaking_rate = 0.8 71 | ) 72 | 73 | Adam = synthesize_ssml(text, 74 | booktitle, 75 | language_code = "en-US", 76 | voice_name = "en-US-Wavenet-A", 77 | speaking_rate = 0.8 78 | ) 79 | 80 | Jason = synthesize_ssml(text, 81 | booktitle, 82 | language_code = "en-US", 83 | voice_name = "en-US-Wavenet-J", 84 | speaking_rate = 0.8 85 | ) 86 | --------------------------------------------------------------------------------