├── 001_Pandas.ipynb
├── 002_Numpy.ipynb
├── 003_Matrix.ipynb
├── 004_Data_PreProcessing.ipynb
├── 005_Pre_Proccessing (Part_2).ipynb
├── 006_Data_Visualization.ipynb
├── 007_Understand_Data.ipynb
├── 008_Basic_Statistics.ipynb
├── ANOVA_F_value.ipynb
├── Array_Selection_Numpy.ipynb
├── Basic_Machine_Learning_Predicts.ipynb
├── Categorical_Continuous.ipynb
├── Chi_Squared.ipynb
├── Column_Selection_Pandas.ipynb
├── DL_Title.PNG
├── Data_Cleaning_for_Machine_Learning.ipynb
├── Descriptive_Statistics.ipynb
├── Discrete_Probability_Distributions.ipynb
├── Drop_Highly_Correlated_Features.ipynb
├── Feature_Importance_Classification.ipynb
├── Feature_Importance_Continuous.ipynb
├── Features_Analysis.ipynb
├── Features_Extraction.ipynb
├── Features_Extraction_with_PCA.ipynb
├── Features_Rank.ipynb
├── Features_Scores.ipynb
├── Features_Selections.ipynb
├── Features_Selections_Stock.ipynb
├── Features_Transformation.ipynb
├── In_Sample_Out_Sample.ipynb
├── LICENSE
├── Linear_Regression_Stock.ipynb
├── Logistic_Regression_Stock.ipynb
├── Metric.ipynb
├── Nested_Cross-Validation_Part2.ipynb
├── NetworkX.ipynb
├── Poisson_Regression.ipynb
├── Principal_Component_Analysis_(PCA).ipynb
├── Principal_Component_Analysis_(PCA)_Stock.ipynb
├── Probabilities.ipynb
├── README.md
├── Scaling_and_Transformations.ipynb
├── Split_Data.ipynb
├── Stationary_Check.ipynb
├── Stationary_Check_Part_2.ipynb
├── Stock_Algorithms
    ├── 30_Regression_Models.ipynb
    ├── ARIMA_Models.ipynb
    ├── AdaBoost_Classification.ipynb
    ├── AdaBoost_Regressor.ipynb
    ├── Addictive_Model.ipynb
    ├── Algorithms.PNG
    ├── Anomaly_Detection.ipynb
    ├── Anomaly_Detection_SVM.ipynb
    ├── Artificial_Neural_Network.ipynb
    ├── Automatic_Relevance_Determination_Regression.ipynb
    ├── Bagging_Classifier.ipynb
    ├── Basic_Machine_Learning_Predicts.ipynb
    ├── Basic_Machine_Learning_Predicts_Updates.ipynb
    ├── Basic_Regressions.ipynb
    ├── Bayesian_Ridge_Regression.ipynb
    ├── Bayesian_Ridge_Regression_Part2.ipynb
    ├── Bernoulli_Restricted_Boltzmann_Machine.ipynb
    ├── Calibrate_Predicted_Probabilities.ipynb
    ├── CatBoost_Algorithms.ipynb
    ├── CatBoost_Algorithms_Part2.ipynb
    ├── Classification_Cluster.ipynb
    ├── Classification_Cluster_2.ipynb
    ├── Classification_Cluster_3.ipynb
    ├── Convolutional_Neural_Network.ipynb
    ├── Convolutional_Neural_Networks_Keras.ipynb
    ├── Cox_Proportional_Hazards.ipynb
    ├── Decision_Tree_Classifier_Visualize.ipynb
    ├── Decision_Tree_Multioutput_Regression.ipynb
    ├── Decision_Trees_Classification.ipynb
    ├── Decision_Trees_Classification_Explained.ipynb
    ├── Decision_Trees_Classification_Part2.ipynb
    ├── Decision_Trees_Classification_Part3.ipynb
    ├── Decision_Trees_Classification_Part4.ipynb
    ├── Decision_Trees_Classification_Part5.ipynb
    ├── Decision_Trees_Regression.ipynb
    ├── Decision_Trees_Regression_Part2.ipynb
    ├── Deep_Belief_Networks.ipynb
    ├── ElasticNet_Regression.ipynb
    ├── Fast_Fourier_Transformations.ipynb
    ├── Fixed_Effects_Model.ipynb
    ├── Gaussian_Regression.ipynb
    ├── Genetic_Algorithm.ipynb
    ├── Genetic_Algorithm_Part2.ipynb
    ├── Gradient_Ascent.ipynb
    ├── Gradient_Boosting_Classification.ipynb
    ├── Gradient_Boosting_Machine_(GBM).ipynb
    ├── Gradient_Boosting_Regressor.ipynb
    ├── Hierarchical_Clustering.ipynb
    ├── Huber_Regression.ipynb
    ├── Huber_Regression_Part2.ipynb
    ├── Hyperparameter_Tuning.ipynb
    ├── Implementing_Logistic_Regression.ipynb
    ├── Isotonic_Regression.ipynb
    ├── Isotonic_Regression_Linear_Regression.ipynb
    ├── K_Means.ipynb
    ├── K_Means_Clustering.ipynb
    ├── K_Means_Clustering_Part2.ipynb
    ├── K_Nearest_Neighbors.ipynb
    ├── K_Nearest_Neighbors_Multioutput_Regression.ipynb
    ├── K_Nearest_Neighbors_Part2.ipynb
    ├── LSTM_Neural_Networks.ipynb
    ├── LSTM_RNN.ipynb
    ├── LSTM_RNN_Part2.ipynb
    ├── Lasso_Regression.ipynb
    ├── Lasso_Regression_Alpha_Levels.ipynb
    ├── Lasso_Regression_Part2.ipynb
    ├── Lasso_Ridge_Regression.ipynb
    ├── Least_Angled_Regression.ipynb
    ├── Least_Squares_Regression.ipynb
    ├── Leave_One_Out_Cross_Validation.ipynb
    ├── Light_GBM.ipynb
    ├── Linear_Discriminant_Analysis.ipynb
    ├── Linear_Discriminant_Analysis_Classification.ipynb
    ├── Linear_Regression.ipynb
    ├── Linear_Regression_Classification.ipynb
    ├── Linear_Regression_Continuous.ipynb
    ├── Linear_Regression_Multioutput_Regression.ipynb
    ├── Linear_Regression_Predict_Future_Price.ipynb
    ├── Linear_Regression_Prediction.ipynb
    ├── Linear_Regression_Prediction_Part2.ipynb
    ├── Linear_Regression_Prediction_Part3.ipynb
    ├── Linear_Regression_Using_Linear_Algebra.ipynb
    ├── Linear_Regression_with_Normalize_Data.ipynb
    ├── Locally_Estimated_Scatterplot_Smoothing.ipynb
    ├── Locally_Weighted_Scatterplot_Smoothing_LOWESS.ipynb
    ├── Logistic_Model.ipynb
    ├── Logistic_Regression.ipynb
    ├── Logistic_Regression_Classification.ipynb
    ├── Logistic_Regression_Classification_Part2.ipynb
    ├── Logistic_Regression_Classification_Part3.ipynb
    ├── Logistic_Regression_Classification_Part4.ipynb
    ├── Logistic_Regression_Large_Data.ipynb
    ├── Logistic_Regression_Part2.ipynb
    ├── Mini-Batch_k-Means_Clustering.ipynb
    ├── Model_Selection.ipynb
    ├── MultiOutputRegressor.ipynb
    ├── Multioutput_Regression_With_Cross-Validation.ipynb
    ├── Multiple_Linear_Regression.ipynb
    ├── Multiple_Linear_Regression_Part2.ipynb
    ├── Multiple_Linear_Regression_with_Normalize_Data.ipynb
    ├── Multivariate_Adaptive_Regression_Splines.ipynb
    ├── Multivariate_Adaptive_Regression_Splines_Part2.ipynb
    ├── Multivariate_relationships.ipynb
    ├── Naive_Bayes_Classification.ipynb
    ├── Naive_Bayes_Multinomial_Classification.ipynb
    ├── Nearest_Neighbor_Classification.ipynb
    ├── Nested_Cross-Validation.ipynb
    ├── Nested_Cross-Validation_Part2.ipynb
    ├── NetworkX.ipynb
    ├── NetworkX_Part2.ipynb
    ├── Neural_Network_ANN.ipynb
    ├── Neural_Network_Part2.ipynb
    ├── Neural_Networks_Classification.ipynb
    ├── Neural_Networks_Regression.ipynb
    ├── Non_Linear_Least_Squares_Curve_Fitting.ipynb
    ├── Optimization_Parameters.ipynb
    ├── Ordinal_Regression.ipynb
    ├── Partial_Least_Squares_Regression_(PLSR).ipynb
    ├── Passive_Aggressive_Classification.ipynb
    ├── Passive_Aggressive_Classifier.ipynb
    ├── Passive_Aggressive_Regression.ipynb
    ├── Perceptron_Algorithm.ipynb
    ├── Polynomial_Regression.ipynb
    ├── Polynomial_Regression_Part2.ipynb
    ├── Polynomial_Regression_Part3.ipynb
    ├── Principal_Component_Classification.ipynb
    ├── Principal_Component_Regression.ipynb
    ├── PyBrain_Dataset.ipynb
    ├── PyCaret_Stock_Prediction.ipynb
    ├── PyCaret_Stock_Prediction_Part2.ipynb
    ├── PyTorch_Linear_Regression.ipynb
    ├── PyTorch_Regression.ipynb
    ├── Pynamical_Prediction.ipynb
    ├── Quantile_Regression.ipynb
    ├── Quantile_Regression_Part2.ipynb
    ├── Quasi_Poisson_Regression.ipynb
    ├── Quasi_Poisson_Regression_Part2.ipynb
    ├── RANSAC_Regression.ipynb
    ├── README.md
    ├── RNN_Tensorflow.ipynb
    ├── Radius_Neighbors_Regressor.ipynb
    ├── Random_Forests_Classification.ipynb
    ├── Random_Forests_Classification_Part2.ipynb
    ├── Random_Forests_Multioutput_Regression.ipynb
    ├── Random_Forests_Regression.ipynb
    ├── Regressor_Chain.ipynb
    ├── Ridge_Regression.ipynb
    ├── Robust_Linear_Models.ipynb
    ├── SMOTE_Near_Miss_Algorithm.ipynb
    ├── SVC_Predicted_Probabilities.ipynb
    ├── Simple_Linear_Regression.ipynb
    ├── Simple_Linear_Regression_Part2.ipynb
    ├── Simple_Linear_Regression_with_Normalize_Data.ipynb
    ├── Simple_Multiple_Linear_Regression.ipynb
    ├── Stepwise_Regression_Backward.ipynb
    ├── Stepwise_Regression_Forward.ipynb
    ├── Stochastic_Gradient_Descent_Classification.ipynb
    ├── Stochastic_Gradient_Descent_Regression.ipynb
    ├── Stochastic_Gradient_Descent_Regression_Part2.ipynb
    ├── Support_Vector_Classifiers.ipynb
    ├── Support_Vector_Machine.ipynb
    ├── Support_Vector_Machine_Part2.ipynb
    ├── TensorFlow_LinearRegression2.ipynb
    ├── TensorFlow_LinearRegressionSingle.ipynb
    ├── TensorFlow_LinearRegression_Basic.ipynb
    ├── Theil_Sen_Regression.ipynb
    ├── Time_Series_Decomposition_Random_Walks.ipynb
    ├── Time_Series_Forecasting.ipynb
    ├── Time_Series_Forecasting_Model.ipynb
    ├── TruncatedSVD.ipynb
    ├── Tweedie_Regression.ipynb
    ├── XGBoost_Algorithms.ipynb
    ├── XGBoost_Classification.ipynb
    ├── XGBoost_Classification_Part_2.ipynb
    ├── XGBoost_Regression.ipynb
    ├── XGBoost_Regressor.ipynb
    ├── XGBoost_Regressor_Part_2.ipynb
    ├── scikit-learn_Prediction.ipynb
    ├── shap_prediction.ipynb
    ├── t_SNE.ipynb
    ├── t_SNE_Part2.ipynb
    └── t_SNE_Part3.ipynb
├── Stock_Apps
    ├── README.md
    ├── Stock_Apps.PNG
    ├── Stock_ML_Data_PreProcessing_Apps.py
    ├── Stock_ML_Feature_Selection_Apps.py
    ├── Stock_ML_Predict_Apps.py
    ├── Stock_ML_Predict_Apps_Menu.py
    └── Stock_ML_Rescale_Data_Apps.py
├── Tensorflow_Basics.ipynb
├── Title.PNG
├── Train_Test_Split.ipynb
├── Train_Validate_Test.ipynb
├── Underfitting_Overfitting_Check_Regression.ipynb
├── Understand_Data.ipynb
└── Variance_Inflation_Factor.ipynb


/004_Data_PreProcessing.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "source": [
  6 |         "# Data PreProcessing"
  7 |       ],
  8 |       "metadata": {}
  9 |     },
 10 |     {
 11 |       "cell_type": "markdown",
 12 |       "source": [
 13 |         "Step 1: Importing the libraries"
 14 |       ],
 15 |       "metadata": {}
 16 |     },
 17 |     {
 18 |       "cell_type": "code",
 19 |       "source": [
 20 |         "import numpy as np\n",
 21 |         "import pandas as pd\n",
 22 |         "\n",
 23 |         "import warnings\n",
 24 |         "warnings.filterwarnings(\"ignore\")\n",
 25 |         "\n",
 26 |         "# fix_yahoo_finance is used to fetch data \n",
 27 |         "import fix_yahoo_finance as yf\n",
 28 |         "yf.pdr_override()"
 29 |       ],
 30 |       "outputs": [],
 31 |       "execution_count": 1,
 32 |       "metadata": {
 33 |         "collapsed": false,
 34 |         "outputHidden": false,
 35 |         "inputHidden": false
 36 |       }
 37 |     },
 38 |     {
 39 |       "cell_type": "markdown",
 40 |       "source": [
 41 |         "Step 2: Importing dataset"
 42 |       ],
 43 |       "metadata": {}
 44 |     },
 45 |     {
 46 |       "cell_type": "code",
 47 |       "source": [
 48 |         "# input\n",
 49 |         "symbol = 'AMD'\n",
 50 |         "start = '2014-01-01'\n",
 51 |         "end = '2018-08-27'\n",
 52 |         "\n",
 53 |         "# Read data \n",
 54 |         "dataset = yf.download(symbol,start,end)\n",
 55 |         "\n",
 56 |         "# Only keep close columns \n",
 57 |         "dataset.head()"
 58 |       ],
 59 |       "outputs": [
 60 |         {
 61 |           "output_type": "stream",
 62 |           "name": "stdout",
 63 |           "text": [
 64 |             "[*********************100%***********************]  1 of 1 downloaded\n"
 65 |           ]
 66 |         },
 67 |         {
 68 |           "output_type": "execute_result",
 69 |           "execution_count": 2,
 70 |           "data": {
 71 |             "text/plain": [
 72 |               "            Open  High   Low  Close  Adj Close    Volume\n",
 73 |               "Date                                                    \n",
 74 |               "2014-01-02  3.85  3.98  3.84   3.95       3.95  20548400\n",
 75 |               "2014-01-03  3.98  4.00  3.88   4.00       4.00  22887200\n",
 76 |               "2014-01-06  4.01  4.18  3.99   4.13       4.13  42398300\n",
 77 |               "2014-01-07  4.19  4.25  4.11   4.18       4.18  42932100\n",
 78 |               "2014-01-08  4.23  4.26  4.14   4.18       4.18  30678700"
 79 |             ],
 80 |             "text/html": [
 81 |               "<div>\n",
 82 |               "<style scoped>\n",
 83 |               "    .dataframe tbody tr th:only-of-type {\n",
 84 |               "        vertical-align: middle;\n",
 85 |               "    }\n",
 86 |               "\n",
 87 |               "    .dataframe tbody tr th {\n",
 88 |               "        vertical-align: top;\n",
 89 |               "    }\n",
 90 |               "\n",
 91 |               "    .dataframe thead th {\n",
 92 |               "        text-align: right;\n",
 93 |               "    }\n",
 94 |               "</style>\n",
 95 |               "<table border=\"1\" class=\"dataframe\">\n",
 96 |               "  <thead>\n",
 97 |               "    <tr style=\"text-align: right;\">\n",
 98 |               "      <th></th>\n",
 99 |               "      <th>Open</th>\n",
100 |               "      <th>High</th>\n",
101 |               "      <th>Low</th>\n",
102 |               "      <th>Close</th>\n",
103 |               "      <th>Adj Close</th>\n",
104 |               "      <th>Volume</th>\n",
105 |               "    </tr>\n",
106 |               "    <tr>\n",
107 |               "      <th>Date</th>\n",
108 |               "      <th></th>\n",
109 |               "      <th></th>\n",
110 |               "      <th></th>\n",
111 |               "      <th></th>\n",
112 |               "      <th></th>\n",
113 |               "      <th></th>\n",
114 |               "    </tr>\n",
115 |               "  </thead>\n",
116 |               "  <tbody>\n",
117 |               "    <tr>\n",
118 |               "      <th>2014-01-02</th>\n",
119 |               "      <td>3.85</td>\n",
120 |               "      <td>3.98</td>\n",
121 |               "      <td>3.84</td>\n",
122 |               "      <td>3.95</td>\n",
123 |               "      <td>3.95</td>\n",
124 |               "      <td>20548400</td>\n",
125 |               "    </tr>\n",
126 |               "    <tr>\n",
127 |               "      <th>2014-01-03</th>\n",
128 |               "      <td>3.98</td>\n",
129 |               "      <td>4.00</td>\n",
130 |               "      <td>3.88</td>\n",
131 |               "      <td>4.00</td>\n",
132 |               "      <td>4.00</td>\n",
133 |               "      <td>22887200</td>\n",
134 |               "    </tr>\n",
135 |               "    <tr>\n",
136 |               "      <th>2014-01-06</th>\n",
137 |               "      <td>4.01</td>\n",
138 |               "      <td>4.18</td>\n",
139 |               "      <td>3.99</td>\n",
140 |               "      <td>4.13</td>\n",
141 |               "      <td>4.13</td>\n",
142 |               "      <td>42398300</td>\n",
143 |               "    </tr>\n",
144 |               "    <tr>\n",
145 |               "      <th>2014-01-07</th>\n",
146 |               "      <td>4.19</td>\n",
147 |               "      <td>4.25</td>\n",
148 |               "      <td>4.11</td>\n",
149 |               "      <td>4.18</td>\n",
150 |               "      <td>4.18</td>\n",
151 |               "      <td>42932100</td>\n",
152 |               "    </tr>\n",
153 |               "    <tr>\n",
154 |               "      <th>2014-01-08</th>\n",
155 |               "      <td>4.23</td>\n",
156 |               "      <td>4.26</td>\n",
157 |               "      <td>4.14</td>\n",
158 |               "      <td>4.18</td>\n",
159 |               "      <td>4.18</td>\n",
160 |               "      <td>30678700</td>\n",
161 |               "    </tr>\n",
162 |               "  </tbody>\n",
163 |               "</table>\n",
164 |               "</div>"
165 |             ]
166 |           },
167 |           "metadata": {}
168 |         }
169 |       ],
170 |       "execution_count": 2,
171 |       "metadata": {
172 |         "collapsed": false,
173 |         "outputHidden": false,
174 |         "inputHidden": false
175 |       }
176 |     },
177 |     {
178 |       "cell_type": "code",
179 |       "source": [
180 |         "X = dataset.iloc[ : , :-1].values\n",
181 |         "Y = dataset.iloc[ : , 3].values"
182 |       ],
183 |       "outputs": [],
184 |       "execution_count": 3,
185 |       "metadata": {
186 |         "collapsed": false,
187 |         "outputHidden": false,
188 |         "inputHidden": false
189 |       }
190 |     },
191 |     {
192 |       "cell_type": "markdown",
193 |       "source": [
194 |         "Step 3: Handling the missing data"
195 |       ],
196 |       "metadata": {}
197 |     },
198 |     {
199 |       "cell_type": "code",
200 |       "source": [
201 |         "from sklearn.preprocessing import Imputer\n",
202 |         "imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\", axis = 0)\n",
203 |         "imputer = imputer.fit(X[ : , 1:3])\n",
204 |         "X[ : , 1:3] = imputer.transform(X[ : , 1:3])"
205 |       ],
206 |       "outputs": [],
207 |       "execution_count": 6,
208 |       "metadata": {
209 |         "collapsed": false,
210 |         "outputHidden": false,
211 |         "inputHidden": false
212 |       }
213 |     },
214 |     {
215 |       "cell_type": "markdown",
216 |       "source": [
217 |         "Step 4: Encoding categorical data"
218 |       ],
219 |       "metadata": {}
220 |     },
221 |     {
222 |       "cell_type": "code",
223 |       "source": [
224 |         "from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n",
225 |         "labelencoder_X = LabelEncoder()\n",
226 |         "X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])"
227 |       ],
228 |       "outputs": [],
229 |       "execution_count": 7,
230 |       "metadata": {
231 |         "collapsed": false,
232 |         "outputHidden": false,
233 |         "inputHidden": false
234 |       }
235 |     },
236 |     {
237 |       "cell_type": "code",
238 |       "source": [
239 |         "# Creating a dummy variable\n",
240 |         "onehotencoder = OneHotEncoder(categorical_features = [0])\n",
241 |         "X = onehotencoder.fit_transform(X).toarray()\n",
242 |         "labelencoder_Y = LabelEncoder()\n",
243 |         "Y =  labelencoder_Y.fit_transform(Y)"
244 |       ],
245 |       "outputs": [],
246 |       "execution_count": 8,
247 |       "metadata": {
248 |         "collapsed": false,
249 |         "outputHidden": false,
250 |         "inputHidden": false
251 |       }
252 |     },
253 |     {
254 |       "cell_type": "markdown",
255 |       "source": [
256 |         "Step 5: Splitting the datasets into training sets and Test sets"
257 |       ],
258 |       "metadata": {}
259 |     },
260 |     {
261 |       "cell_type": "code",
262 |       "source": [
263 |         "from sklearn.cross_validation import train_test_split\n",
264 |         "X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)"
265 |       ],
266 |       "outputs": [
267 |         {
268 |           "output_type": "stream",
269 |           "name": "stderr",
270 |           "text": [
271 |             "C:\\Users\\Tin Hang\\Anaconda3\\envs\\py35\\lib\\site-packages\\sklearn\\cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
272 |             "  \"This module will be removed in 0.20.\", DeprecationWarning)\n"
273 |           ]
274 |         }
275 |       ],
276 |       "execution_count": 9,
277 |       "metadata": {
278 |         "collapsed": false,
279 |         "outputHidden": false,
280 |         "inputHidden": false
281 |       }
282 |     },
283 |     {
284 |       "cell_type": "markdown",
285 |       "source": [
286 |         "Step 6: Feature Scaling"
287 |       ],
288 |       "metadata": {}
289 |     },
290 |     {
291 |       "cell_type": "code",
292 |       "source": [
293 |         "from sklearn.preprocessing import StandardScaler\n",
294 |         "sc_X = StandardScaler()\n",
295 |         "X_train = sc_X.fit_transform(X_train)\n",
296 |         "X_test = sc_X.fit_transform(X_test)"
297 |       ],
298 |       "outputs": [],
299 |       "execution_count": 10,
300 |       "metadata": {
301 |         "collapsed": false,
302 |         "outputHidden": false,
303 |         "inputHidden": false
304 |       }
305 |     }
306 |   ],
307 |   "metadata": {
308 |     "kernel_info": {
309 |       "name": "python3"
310 |     },
311 |     "language_info": {
312 |       "file_extension": ".py",
313 |       "nbconvert_exporter": "python",
314 |       "version": "3.5.5",
315 |       "mimetype": "text/x-python",
316 |       "pygments_lexer": "ipython3",
317 |       "codemirror_mode": {
318 |         "version": 3,
319 |         "name": "ipython"
320 |       },
321 |       "name": "python"
322 |     },
323 |     "kernelspec": {
324 |       "name": "python3",
325 |       "language": "python",
326 |       "display_name": "Python 3"
327 |     },
328 |     "nteract": {
329 |       "version": "0.11.9"
330 |     }
331 |   },
332 |   "nbformat": 4,
333 |   "nbformat_minor": 4
334 | }


--------------------------------------------------------------------------------
/Basic_Machine_Learning_Predicts.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "source": [
  6 |         "# Simple Linear Regression for stock using scikit-learn\n"
  7 |       ],
  8 |       "metadata": {}
  9 |     },
 10 |     {
 11 |       "cell_type": "code",
 12 |       "source": [
 13 |         "import pandas as pd\n",
 14 |         "import numpy as np\n",
 15 |         "import matplotlib.pyplot as plt\n",
 16 |         "import math\n",
 17 |         "import seaborn as sns\n",
 18 |         "%matplotlib inline\n",
 19 |         "\n",
 20 |         "import warnings\n",
 21 |         "warnings.filterwarnings(\"ignore\")\n",
 22 |         "\n",
 23 |         "import fix_yahoo_finance as yf\n",
 24 |         "yf.pdr_override()"
 25 |       ],
 26 |       "outputs": [],
 27 |       "execution_count": 1,
 28 |       "metadata": {
 29 |         "collapsed": false,
 30 |         "outputHidden": false,
 31 |         "inputHidden": false
 32 |       }
 33 |     },
 34 |     {
 35 |       "cell_type": "code",
 36 |       "source": [
 37 |         "stock = 'AAPL'\n",
 38 |         "start = '2016-01-01' \n",
 39 |         "end = '2018-01-01'\n",
 40 |         "data = yf.download(stock, start, end)\n",
 41 |         "data.head()"
 42 |       ],
 43 |       "outputs": [
 44 |         {
 45 |           "output_type": "stream",
 46 |           "name": "stdout",
 47 |           "text": [
 48 |             "[*********************100%***********************]  1 of 1 downloaded\n"
 49 |           ]
 50 |         },
 51 |         {
 52 |           "output_type": "execute_result",
 53 |           "execution_count": 2,
 54 |           "data": {
 55 |             "text/plain": "                  Open        High         Low       Close   Adj Close  \\\nDate                                                                     \n2016-01-04  102.610001  105.370003  102.000000  105.349998  100.274513   \n2016-01-05  105.750000  105.849998  102.410004  102.709999   97.761681   \n2016-01-06  100.559998  102.370003   99.870003  100.699997   95.848511   \n2016-01-07   98.680000  100.129997   96.430000   96.449997   91.803276   \n2016-01-08   98.550003   99.110001   96.760002   96.959999   92.288696   \n\n              Volume  \nDate                  \n2016-01-04  67649400  \n2016-01-05  55791000  \n2016-01-06  68457400  \n2016-01-07  81094400  \n2016-01-08  70798000  ",
 56 |             "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Open</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Close</th>\n      <th>Adj Close</th>\n      <th>Volume</th>\n    </tr>\n    <tr>\n      <th>Date</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2016-01-04</th>\n      <td>102.610001</td>\n      <td>105.370003</td>\n      <td>102.000000</td>\n      <td>105.349998</td>\n      <td>100.274513</td>\n      <td>67649400</td>\n    </tr>\n    <tr>\n      <th>2016-01-05</th>\n      <td>105.750000</td>\n      <td>105.849998</td>\n      <td>102.410004</td>\n      <td>102.709999</td>\n      <td>97.761681</td>\n      <td>55791000</td>\n    </tr>\n    <tr>\n      <th>2016-01-06</th>\n      <td>100.559998</td>\n      <td>102.370003</td>\n      <td>99.870003</td>\n      <td>100.699997</td>\n      <td>95.848511</td>\n      <td>68457400</td>\n    </tr>\n    <tr>\n      <th>2016-01-07</th>\n      <td>98.680000</td>\n      <td>100.129997</td>\n      <td>96.430000</td>\n      <td>96.449997</td>\n      <td>91.803276</td>\n      <td>81094400</td>\n    </tr>\n    <tr>\n      <th>2016-01-08</th>\n      <td>98.550003</td>\n      <td>99.110001</td>\n      <td>96.760002</td>\n      <td>96.959999</td>\n      <td>92.288696</td>\n      <td>70798000</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
 57 |           },
 58 |           "metadata": {}
 59 |         }
 60 |       ],
 61 |       "execution_count": 2,
 62 |       "metadata": {
 63 |         "collapsed": false,
 64 |         "outputHidden": false,
 65 |         "inputHidden": false
 66 |       }
 67 |     },
 68 |     {
 69 |       "cell_type": "code",
 70 |       "source": [
 71 |         "df = data.reset_index()\n",
 72 |         "df.head()"
 73 |       ],
 74 |       "outputs": [
 75 |         {
 76 |           "output_type": "execute_result",
 77 |           "execution_count": 3,
 78 |           "data": {
 79 |             "text/plain": "        Date        Open        High         Low       Close   Adj Close  \\\n0 2016-01-04  102.610001  105.370003  102.000000  105.349998  100.274513   \n1 2016-01-05  105.750000  105.849998  102.410004  102.709999   97.761681   \n2 2016-01-06  100.559998  102.370003   99.870003  100.699997   95.848511   \n3 2016-01-07   98.680000  100.129997   96.430000   96.449997   91.803276   \n4 2016-01-08   98.550003   99.110001   96.760002   96.959999   92.288696   \n\n     Volume  \n0  67649400  \n1  55791000  \n2  68457400  \n3  81094400  \n4  70798000  ",
 80 |             "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Date</th>\n      <th>Open</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Close</th>\n      <th>Adj Close</th>\n      <th>Volume</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>2016-01-04</td>\n      <td>102.610001</td>\n      <td>105.370003</td>\n      <td>102.000000</td>\n      <td>105.349998</td>\n      <td>100.274513</td>\n      <td>67649400</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>2016-01-05</td>\n      <td>105.750000</td>\n      <td>105.849998</td>\n      <td>102.410004</td>\n      <td>102.709999</td>\n      <td>97.761681</td>\n      <td>55791000</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>2016-01-06</td>\n      <td>100.559998</td>\n      <td>102.370003</td>\n      <td>99.870003</td>\n      <td>100.699997</td>\n      <td>95.848511</td>\n      <td>68457400</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>2016-01-07</td>\n      <td>98.680000</td>\n      <td>100.129997</td>\n      <td>96.430000</td>\n      <td>96.449997</td>\n      <td>91.803276</td>\n      <td>81094400</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>2016-01-08</td>\n      <td>98.550003</td>\n      <td>99.110001</td>\n      <td>96.760002</td>\n      <td>96.959999</td>\n      <td>92.288696</td>\n      <td>70798000</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
 81 |           },
 82 |           "metadata": {}
 83 |         }
 84 |       ],
 85 |       "execution_count": 3,
 86 |       "metadata": {
 87 |         "collapsed": false,
 88 |         "outputHidden": false,
 89 |         "inputHidden": false
 90 |       }
 91 |     },
 92 |     {
 93 |       "cell_type": "code",
 94 |       "source": [
 95 |         "X = df.drop(['Date','Close'], axis=1, inplace=True)\n",
 96 |         "y = df[['Adj Close']]"
 97 |       ],
 98 |       "outputs": [],
 99 |       "execution_count": 4,
100 |       "metadata": {
101 |         "collapsed": false,
102 |         "outputHidden": false,
103 |         "inputHidden": false
104 |       }
105 |     },
106 |     {
107 |       "cell_type": "code",
108 |       "source": [
109 |         "df = df.as_matrix()"
110 |       ],
111 |       "outputs": [],
112 |       "execution_count": 5,
113 |       "metadata": {
114 |         "collapsed": false,
115 |         "outputHidden": false,
116 |         "inputHidden": false
117 |       }
118 |     },
119 |     {
120 |       "cell_type": "code",
121 |       "source": [
122 |         "from sklearn.model_selection import train_test_split\n",
123 |         "\n",
124 |         "# Split X and y into X_\n",
125 |         "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25,  random_state=0)"
126 |       ],
127 |       "outputs": [],
128 |       "execution_count": 6,
129 |       "metadata": {
130 |         "collapsed": false,
131 |         "outputHidden": false,
132 |         "inputHidden": false
133 |       }
134 |     },
135 |     {
136 |       "cell_type": "code",
137 |       "source": [
138 |         "from sklearn.linear_model import LinearRegression\n",
139 |         "\n",
140 |         "regression_model = LinearRegression()\n",
141 |         "regression_model.fit(X_train, y_train)"
142 |       ],
143 |       "outputs": [
144 |         {
145 |           "output_type": "execute_result",
146 |           "execution_count": 7,
147 |           "data": {
148 |             "text/plain": "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)"
149 |           },
150 |           "metadata": {}
151 |         }
152 |       ],
153 |       "execution_count": 7,
154 |       "metadata": {
155 |         "collapsed": false,
156 |         "outputHidden": false,
157 |         "inputHidden": false
158 |       }
159 |     },
160 |     {
161 |       "cell_type": "code",
162 |       "source": [
163 |         "intercept = regression_model.intercept_[0]\n",
164 |         "\n",
165 |         "print(\"The intercept for our model is {}\".format(intercept))"
166 |       ],
167 |       "outputs": [
168 |         {
169 |           "output_type": "stream",
170 |           "name": "stdout",
171 |           "text": [
172 |             "The intercept for our model is -1.2047109976265347e-09\n"
173 |           ]
174 |         }
175 |       ],
176 |       "execution_count": 8,
177 |       "metadata": {
178 |         "collapsed": false,
179 |         "outputHidden": false,
180 |         "inputHidden": false
181 |       }
182 |     },
183 |     {
184 |       "cell_type": "code",
185 |       "source": [
186 |         "regression_model.score(X_test, y_test)"
187 |       ],
188 |       "outputs": [
189 |         {
190 |           "output_type": "execute_result",
191 |           "execution_count": 9,
192 |           "data": {
193 |             "text/plain": "1.0"
194 |           },
195 |           "metadata": {}
196 |         }
197 |       ],
198 |       "execution_count": 9,
199 |       "metadata": {
200 |         "collapsed": false,
201 |         "outputHidden": false,
202 |         "inputHidden": false
203 |       }
204 |     },
205 |     {
206 |       "cell_type": "code",
207 |       "source": [
208 |         "from sklearn.metrics import mean_squared_error\n",
209 |         "\n",
210 |         "y_predict = regression_model.predict(X_test)\n",
211 |         "\n",
212 |         "regression_model_mse = mean_squared_error(y_predict, y_test)\n",
213 |         "\n",
214 |         "regression_model_mse"
215 |       ],
216 |       "outputs": [
217 |         {
218 |           "output_type": "execute_result",
219 |           "execution_count": 10,
220 |           "data": {
221 |             "text/plain": "2.8264629110010686e-19"
222 |           },
223 |           "metadata": {}
224 |         }
225 |       ],
226 |       "execution_count": 10,
227 |       "metadata": {
228 |         "collapsed": false,
229 |         "outputHidden": false,
230 |         "inputHidden": false
231 |       }
232 |     },
233 |     {
234 |       "cell_type": "code",
235 |       "source": [
236 |         "math.sqrt(regression_model_mse)"
237 |       ],
238 |       "outputs": [
239 |         {
240 |           "output_type": "execute_result",
241 |           "execution_count": 11,
242 |           "data": {
243 |             "text/plain": "5.316448919157475e-10"
244 |           },
245 |           "metadata": {}
246 |         }
247 |       ],
248 |       "execution_count": 11,
249 |       "metadata": {
250 |         "collapsed": false,
251 |         "outputHidden": false,
252 |         "inputHidden": false
253 |       }
254 |     },
255 |     {
256 |       "cell_type": "code",
257 |       "source": [
258 |         "# input the latest Open, High, Low, Close, Volume\n",
259 |         "# predicts the next day price\n",
260 |         "regression_model.predict([[167.81, 171.75, 165.19, 166.48, 37232900]])"
261 |       ],
262 |       "outputs": [
263 |         {
264 |           "output_type": "execute_result",
265 |           "execution_count": 12,
266 |           "data": {
267 |             "text/plain": "array([[166.48]])"
268 |           },
269 |           "metadata": {}
270 |         }
271 |       ],
272 |       "execution_count": 12,
273 |       "metadata": {
274 |         "collapsed": false,
275 |         "outputHidden": false,
276 |         "inputHidden": false
277 |       }
278 |     }
279 |   ],
280 |   "metadata": {
281 |     "kernel_info": {
282 |       "name": "python3"
283 |     },
284 |     "kernelspec": {
285 |       "name": "python3",
286 |       "language": "python",
287 |       "display_name": "Python 3"
288 |     },
289 |     "language_info": {
290 |       "file_extension": ".py",
291 |       "pygments_lexer": "ipython3",
292 |       "version": "3.5.5",
293 |       "mimetype": "text/x-python",
294 |       "codemirror_mode": {
295 |         "version": 3,
296 |         "name": "ipython"
297 |       },
298 |       "name": "python",
299 |       "nbconvert_exporter": "python"
300 |     },
301 |     "nteract": {
302 |       "version": "0.28.0"
303 |     }
304 |   },
305 |   "nbformat": 4,
306 |   "nbformat_minor": 4
307 | }


--------------------------------------------------------------------------------
/DL_Title.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LastAncientOne/Deep_Learning_Machine_Learning_Stock/e31e9fc689cd2cb6d74a8f2694750afd6e13bcec/DL_Title.PNG


--------------------------------------------------------------------------------
/Data_Cleaning_for_Machine_Learning.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "source": [
  6 |         "# Data Cleaning for Machine Learning with Python"
  7 |       ],
  8 |       "metadata": {
  9 |         "nteract": {
 10 |           "transient": {
 11 |             "deleting": false
 12 |           }
 13 |         }
 14 |       }
 15 |     },
 16 |     {
 17 |       "cell_type": "code",
 18 |       "source": [
 19 |         "import numpy as np\n",
 20 |         "import matplotlib.pyplot as plt\n",
 21 |         "import pandas as pd\n",
 22 |         "\n",
 23 |         "import warnings\n",
 24 |         "warnings.filterwarnings(\"ignore\")\n",
 25 |         "\n",
 26 |         "# fetch yahoo data\n",
 27 |         "import yfinance as yf\n",
 28 |         "yf.pdr_override()"
 29 |       ],
 30 |       "outputs": [],
 31 |       "execution_count": 1,
 32 |       "metadata": {
 33 |         "collapsed": true,
 34 |         "jupyter": {
 35 |           "source_hidden": false,
 36 |           "outputs_hidden": false
 37 |         },
 38 |         "nteract": {
 39 |           "transient": {
 40 |             "deleting": false
 41 |           }
 42 |         },
 43 |         "execution": {
 44 |           "iopub.status.busy": "2020-08-21T01:15:24.606Z",
 45 |           "iopub.execute_input": "2020-08-21T01:15:24.613Z",
 46 |           "iopub.status.idle": "2020-08-21T01:15:25.604Z",
 47 |           "shell.execute_reply": "2020-08-21T01:15:25.631Z"
 48 |         }
 49 |       }
 50 |     },
 51 |     {
 52 |       "cell_type": "code",
 53 |       "source": [
 54 |         "# input\n",
 55 |         "symbol = 'AMD'\n",
 56 |         "start = '2014-01-01'\n",
 57 |         "end = '2018-08-27'\n",
 58 |         "\n",
 59 |         "# Read data \n",
 60 |         "dataset = yf.download(symbol,start,end)\n",
 61 |         "\n",
 62 |         "# Only keep close columns \n",
 63 |         "dataset.head()"
 64 |       ],
 65 |       "outputs": [
 66 |         {
 67 |           "output_type": "stream",
 68 |           "name": "stdout",
 69 |           "text": [
 70 |             "[*********************100%***********************]  1 of 1 completed\n"
 71 |           ]
 72 |         },
 73 |         {
 74 |           "output_type": "execute_result",
 75 |           "execution_count": 2,
 76 |           "data": {
 77 |             "text/plain": "            Adj Close  Close  High   Low  Open    Volume\nDate                                                    \n2014-01-02       3.95   3.95  3.98  3.84  3.85  20548400\n2014-01-03       4.00   4.00  4.00  3.88  3.98  22887200\n2014-01-06       4.13   4.13  4.18  3.99  4.01  42398300\n2014-01-07       4.18   4.18  4.25  4.11  4.19  42932100\n2014-01-08       4.18   4.18  4.26  4.14  4.23  30678700",
 78 |             "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Adj Close</th>\n      <th>Close</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Open</th>\n      <th>Volume</th>\n    </tr>\n    <tr>\n      <th>Date</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2014-01-02</th>\n      <td>3.95</td>\n      <td>3.95</td>\n      <td>3.98</td>\n      <td>3.84</td>\n      <td>3.85</td>\n      <td>20548400</td>\n    </tr>\n    <tr>\n      <th>2014-01-03</th>\n      <td>4.00</td>\n      <td>4.00</td>\n      <td>4.00</td>\n      <td>3.88</td>\n      <td>3.98</td>\n      <td>22887200</td>\n    </tr>\n    <tr>\n      <th>2014-01-06</th>\n      <td>4.13</td>\n      <td>4.13</td>\n      <td>4.18</td>\n      <td>3.99</td>\n      <td>4.01</td>\n      <td>42398300</td>\n    </tr>\n    <tr>\n      <th>2014-01-07</th>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>4.25</td>\n      <td>4.11</td>\n      <td>4.19</td>\n      <td>42932100</td>\n    </tr>\n    <tr>\n      <th>2014-01-08</th>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>4.26</td>\n      <td>4.14</td>\n      <td>4.23</td>\n      <td>30678700</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
 79 |           },
 80 |           "metadata": {}
 81 |         }
 82 |       ],
 83 |       "execution_count": 2,
 84 |       "metadata": {
 85 |         "collapsed": true,
 86 |         "jupyter": {
 87 |           "source_hidden": false,
 88 |           "outputs_hidden": false
 89 |         },
 90 |         "nteract": {
 91 |           "transient": {
 92 |             "deleting": false
 93 |           }
 94 |         },
 95 |         "execution": {
 96 |           "iopub.status.busy": "2020-08-21T01:15:25.614Z",
 97 |           "iopub.execute_input": "2020-08-21T01:15:25.621Z",
 98 |           "iopub.status.idle": "2020-08-21T01:15:26.860Z",
 99 |           "shell.execute_reply": "2020-08-21T01:15:27.073Z"
100 |         }
101 |       }
102 |     },
103 |     {
104 |       "cell_type": "code",
105 |       "source": [
106 |         "dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)\n",
107 |         "dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)\n",
108 |         "dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)\n",
109 |         "dataset['Returns'] = dataset['Adj Close'].pct_change()\n",
110 |         "dataset = dataset.dropna()"
111 |       ],
112 |       "outputs": [],
113 |       "execution_count": 3,
114 |       "metadata": {
115 |         "collapsed": true,
116 |         "jupyter": {
117 |           "source_hidden": false,
118 |           "outputs_hidden": false
119 |         },
120 |         "nteract": {
121 |           "transient": {
122 |             "deleting": false
123 |           }
124 |         },
125 |         "execution": {
126 |           "iopub.status.busy": "2020-08-21T01:15:26.872Z",
127 |           "iopub.execute_input": "2020-08-21T01:15:26.882Z",
128 |           "iopub.status.idle": "2020-08-21T01:15:26.899Z",
129 |           "shell.execute_reply": "2020-08-21T01:15:27.079Z"
130 |         }
131 |       }
132 |     },
133 |     {
134 |       "cell_type": "code",
135 |       "source": [
136 |         "# summarize the number of unique values in each column\n",
137 |         "print(dataset.nunique())"
138 |       ],
139 |       "outputs": [
140 |         {
141 |           "output_type": "stream",
142 |           "name": "stdout",
143 |           "text": [
144 |             "Adj Close             657\n",
145 |             "Close                 657\n",
146 |             "High                  644\n",
147 |             "Low                   626\n",
148 |             "Open                  638\n",
149 |             "Volume               1168\n",
150 |             "Increase_Decrease       2\n",
151 |             "Buy_Sell_on_Open        2\n",
152 |             "Buy_Sell                2\n",
153 |             "Returns              1078\n",
154 |             "dtype: int64\n"
155 |           ]
156 |         }
157 |       ],
158 |       "execution_count": 4,
159 |       "metadata": {
160 |         "collapsed": true,
161 |         "jupyter": {
162 |           "source_hidden": false,
163 |           "outputs_hidden": false
164 |         },
165 |         "nteract": {
166 |           "transient": {
167 |             "deleting": false
168 |           }
169 |         },
170 |         "execution": {
171 |           "iopub.status.busy": "2020-08-21T01:15:26.915Z",
172 |           "iopub.execute_input": "2020-08-21T01:15:26.922Z",
173 |           "iopub.status.idle": "2020-08-21T01:15:26.938Z",
174 |           "shell.execute_reply": "2020-08-21T01:15:27.085Z"
175 |         }
176 |       }
177 |     },
178 |     {
179 |       "cell_type": "code",
180 |       "source": [
181 |         "print(dataset.shape)"
182 |       ],
183 |       "outputs": [
184 |         {
185 |           "output_type": "stream",
186 |           "name": "stdout",
187 |           "text": [
188 |             "(1170, 10)\n"
189 |           ]
190 |         }
191 |       ],
192 |       "execution_count": 5,
193 |       "metadata": {
194 |         "collapsed": true,
195 |         "jupyter": {
196 |           "source_hidden": false,
197 |           "outputs_hidden": false
198 |         },
199 |         "nteract": {
200 |           "transient": {
201 |             "deleting": false
202 |           }
203 |         },
204 |         "execution": {
205 |           "iopub.status.busy": "2020-08-21T01:15:26.950Z",
206 |           "iopub.execute_input": "2020-08-21T01:15:26.957Z",
207 |           "iopub.status.idle": "2020-08-21T01:15:26.977Z",
208 |           "shell.execute_reply": "2020-08-21T01:15:27.100Z"
209 |         }
210 |       }
211 |     },
212 |     {
213 |       "cell_type": "code",
214 |       "source": [
215 |         "counts = dataset.nunique()\n",
216 |         "to_del = [i for i,v in enumerate(counts) if v == 1]\n",
217 |         "print(to_del)"
218 |       ],
219 |       "outputs": [
220 |         {
221 |           "output_type": "stream",
222 |           "name": "stdout",
223 |           "text": [
224 |             "[]\n"
225 |           ]
226 |         }
227 |       ],
228 |       "execution_count": 6,
229 |       "metadata": {
230 |         "collapsed": true,
231 |         "jupyter": {
232 |           "source_hidden": false,
233 |           "outputs_hidden": false
234 |         },
235 |         "nteract": {
236 |           "transient": {
237 |             "deleting": false
238 |           }
239 |         },
240 |         "execution": {
241 |           "iopub.status.busy": "2020-08-21T01:15:26.988Z",
242 |           "iopub.execute_input": "2020-08-21T01:15:26.993Z",
243 |           "iopub.status.idle": "2020-08-21T01:15:27.007Z",
244 |           "shell.execute_reply": "2020-08-21T01:15:27.105Z"
245 |         }
246 |       }
247 |     },
248 |     {
249 |       "cell_type": "code",
250 |       "source": [
251 |         "# drop useless columns\n",
252 |         "dataset.drop(to_del, axis=1, inplace=True)\n",
253 |         "print(dataset.shape)"
254 |       ],
255 |       "outputs": [
256 |         {
257 |           "output_type": "stream",
258 |           "name": "stdout",
259 |           "text": [
260 |             "(1170, 10)\n"
261 |           ]
262 |         }
263 |       ],
264 |       "execution_count": 7,
265 |       "metadata": {
266 |         "collapsed": true,
267 |         "jupyter": {
268 |           "source_hidden": false,
269 |           "outputs_hidden": false
270 |         },
271 |         "nteract": {
272 |           "transient": {
273 |             "deleting": false
274 |           }
275 |         },
276 |         "execution": {
277 |           "iopub.status.busy": "2020-08-21T01:15:27.016Z",
278 |           "iopub.execute_input": "2020-08-21T01:15:27.022Z",
279 |           "iopub.status.idle": "2020-08-21T01:15:27.036Z",
280 |           "shell.execute_reply": "2020-08-21T01:15:27.109Z"
281 |         }
282 |       }
283 |     }
284 |   ],
285 |   "metadata": {
286 |     "kernel_info": {
287 |       "name": "python3"
288 |     },
289 |     "language_info": {
290 |       "mimetype": "text/x-python",
291 |       "codemirror_mode": {
292 |         "name": "ipython",
293 |         "version": 3
294 |       },
295 |       "name": "python",
296 |       "version": "3.5.5",
297 |       "pygments_lexer": "ipython3",
298 |       "nbconvert_exporter": "python",
299 |       "file_extension": ".py"
300 |     },
301 |     "kernelspec": {
302 |       "argv": [
303 |         "C:\\Users\\Tin Hang\\Anaconda3\\envs\\py35\\python.exe",
304 |         "-m",
305 |         "ipykernel_launcher",
306 |         "-f",
307 |         "{connection_file}"
308 |       ],
309 |       "display_name": "Python 3",
310 |       "language": "python",
311 |       "name": "python3"
312 |     },
313 |     "nteract": {
314 |       "version": "0.24.1"
315 |     }
316 |   },
317 |   "nbformat": 4,
318 |   "nbformat_minor": 0
319 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 LastAncientOne
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | [![Contributors][contributors-shield]][contributors-url]
  3 | [![Forks][forks-shield]][forks-url]
  4 | [![Stargazers][stars-shield]][stars-url]
  5 | [![Issues][issues-shield]][issues-url]
  6 | [![MIT License][license-shield]][license-url]
  7 | [![LinkedIn][linkedin-shield]][linkedin-url]
  8 | 
  9 | <a href="https://www.buymeacoffee.com/lastancientone" target="_blank"><img src="https://cdn.buymeacoffee.com/buttons/v2/default-yellow.png" alt="Buy Me A Coffee" style="height: 60px !important;width: 217px !important;" ></a>
 10 | 
 11 | <!-- MARKDOWN LINKS & IMAGES -->
 12 | <!-- https://www.markdownguide.org/basic-syntax/#reference-style-links -->
 13 | [contributors-shield]: https://img.shields.io/github/contributors/LastAncientOne/Deep-Learning-Machine-Learning-Stock.svg?style=for-the-badge
 14 | [contributors-url]: https://github.com/LastAncientOne/Deep-Learning-Machine-Learning-Stock/graphs/contributors
 15 | [forks-shield]: https://img.shields.io/github/forks/LastAncientOne/Deep-Learning-Machine-Learning-Stock.svg?style=for-the-badge
 16 | [forks-url]: https://github.com/LastAncientOne/Deep-Learning-Machine-Learning-Stock/network/members
 17 | [stars-shield]: https://img.shields.io/github/stars/LastAncientOne/Deep-Learning-Machine-Learning-Stock.svg?style=for-the-badge
 18 | [stars-url]: https://github.com/LastAncientOne/Deep-Learning-Machine-Learning-Stock/stargazers
 19 | [issues-shield]: https://img.shields.io/github/issues/LastAncientOne/Deep-Learning-Machine-Learning-Stock.svg?style=for-the-badge
 20 | [issues-url]: https://github.com/LastAncientOne/Deep-Learning-Machine-Learning-Stock/issues
 21 | [license-shield]: https://img.shields.io/github/license/LastAncientOne/Deep-Learning-Machine-Learning-Stock.svg?style=for-the-badge
 22 | [license-url]: LICENSE
 23 | [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
 24 | [linkedin-url]: https://linkedin.com/in/tin-hang
 25 | 
 26 | 
 27 | <img src="DL_Title.PNG">  
 28 | 
 29 | <h1 align="center">Deep Learning and Machine Learning for Stock Predictions</h1>  
 30 | 
 31 | Description: This is a comprehensive study and analysis of stocks using deep learning (DL) and machine learning (ML) techniques. Both machine learning and deep learning are types of artificial intelligence (AI). The objective is to predict stock behavior by employing various machine learning and deep learning algorithms. The focus is on experimenting with stock data to understand how and why certain methods are effective, as well as identifying reasons for their potential limitations. Different stock strategies are explored within the context of machine learning and deep learning. Technical Analysis and Fundamental Analysis are utilized to predict future stock prices using these AI techniques, encompassing both long-term and short-term predictions.  
 32 | 
 33 | Machine learning is a branch of artificial intelligence that involves the development of algorithms capable of automatically adapting and generating outputs by processing structured data. On the other hand, deep learning is a subset of machine learning that employs similar algorithms but with additional layers of complexity, enabling different interpretations of the data. The network of algorithms used in deep learning is known as artificial neural networks, which mimic the interconnectedness of neural pathways in the human brain.   
 34 | 
 35 | Deep learning and machine learning are powerful approaches that have revolutionized the AI landscape. Understanding the fundamentals of these techniques and the commonly used algorithms is essential for aspiring data scientists and AI enthusiasts. Regression, as a fundamental concept in predictive modeling, plays a crucial role in analyzing and predicting continuous variables. By harnessing the capabilities of these algorithms and techniques, we can unlock incredible potential in various domains, leading to advancements and improvements in numerous industries.  
 36 | 
 37 | ### Machine Learning Step-by-Step  
 38 | 1. Collecting/Gathering Data.
 39 | 2. Preparing the Data - load data and prepare it for the machine learning training.
 40 | 3. Choosing a Model.  
 41 | 4. Training the Model.  
 42 | 5. Evaluating the Model.  
 43 | 6. Parameter Tuning.  
 44 | 7. Make a Predictions.
 45 | 
 46 | ### Deep Learning Model Step-by-Step  
 47 | 1. Define the Model.  
 48 | 2. Complie the Model.  
 49 | 3. Fit the Model with training dataset.  
 50 | 4. Make a Predictions.  
 51 | 
 52 | <h3 align="left">Programming Languages and Tools:</h3>
 53 | <p align="left"> </a> <a href="https://www.python.org" target="_blank"> <img src="https://raw.githubusercontent.com/devicons/devicon/master/icons/python/python-original.svg" alt="python" width="50" height="50"/>  </a> <a href="https://nteract.io/" target="_blank"> <img src="https://avatars.githubusercontent.com/u/12401040?s=200&v=4" alt="Nteract" width="50" height="50"/> </a> <a href="https://anaconda.org/" target="_blank"> <img src="https://www.clipartkey.com/mpngs/m/227-2271689_transparent-anaconda-logo-png.png" alt="Anaconda" width="50" height="50"/> </a> <a href="https://www.spyder-ide.org/" target="_blank"> <img src="https://www.pinclipart.com/picdir/middle/180-1807410_spyder-icon-clipart.png" alt="Spyder" width="50" height="50"/> </a> <a href="https://jupyter.org/" target="_blank"> <img src="https://upload.wikimedia.org/wikipedia/commons/3/38/Jupyter_logo.svg" alt="Jupyter Notebook" width="50" height="50"/> </a> <a href="https://notepad-plus-plus.org/" target="_blank"> <img src="https://logos-download.com/wp-content/uploads/2019/07/Notepad_Logo.png" alt="Notepad++" width="50" height="50"/> </a> </p>
 54 | 
 55 | ### Three main types of data: Categorical, Discrete, and Continuous variables
 56 |   1. Categorical variable(Qualitative): Label data or distinct groups.    
 57 |     Example: location, gender, material type, payment, highest level of education  
 58 |   2. Discrete variable (Class Data): Numerica variables but the data is countable number of values between any two values.  
 59 |     Example: customer complaints or number of flaws or defects, Children per Household, age (number of years)  
 60 |   3. Continuous variable (Quantitative): Numeric variables that have an infinite number of values between any two values.
 61 |     Example: length of a part or the date and time a payment is received, running distance, age (infinitly accurate and use an infinite number of decimal places)  
 62 | 
 63 | ### Data Use  
 64 |   1. For 'Quantitative data' is used with all three centre measures (mean, median and mode) and all spread measures.  
 65 |   2. For 'Class data' is used with median and mode.  
 66 |   3. For 'Qualitative data' is for only with mode.  
 67 | 
 68 | ### Two types of problems: 
 69 |   1. Classification (predict label)  
 70 |   2. Regression (predict values)  
 71 | 
 72 | ### Bias-Variance Tradeoff  
 73 | #### Bias  
 74 | - Bias is the difference between our actual and predicted values.  
 75 | - Bias is the simple assumptions that our model makes about our data to be able to predict new data.  
 76 | - Assumptions made by a model to make a function easier to learn.   
 77 | #### Variance  
 78 | - Variance is opposite of bias.  
 79 | - Variance is variability of model prediction for a given data point or a value that tells us the spread of our data.  
 80 | - If you train your data on training data and obtain a very low error, upon changing the data and then training the same.   
 81 | 
 82 | ### Overfitting, Underfitting, and the bias-variance tradeoff  
 83 | Overfitted is when the model memorizes the noise and fits too closely to the training set. Good fit is a model that learns the training dataset and genernalizes well with the old out dataset. Underfitting is when it cannot establish the dominant trend within the data; as a result, in training errors and poor performance of the model. 
 84 | 
 85 | #### Overfitting:   
 86 | Overfitting model is a good model with the training data that fit or at lease with near each observation; however, the model mist the point and random noise is capture inside the model. The model have low training error and high CV error, low in-sample error and high out-of-sample error, and high variance.  
 87 |   1. High Train Accuracy   
 88 |   2. Low Test Accuracy
 89 | #### Avoiding Overfitting:  
 90 |   1. Early stopping - stop the training before the model starts learning the noise within the model.   
 91 |   2. Training with more data - adding more data will increase the accuracy of the modelor can help algorithms detect the signal better.     
 92 |   3. Data augmentation - add clean and relevant data into training data.  
 93 |   4. Feature selection - Use important features within the data. Remove features. 
 94 |   5. Regularization - reduce features by using regularization methods such as L1 regularization, Lasso regularization, and dropout.  
 95 |   6. Ensemble methods - combine predictions from multiple separate models such as bagging and boosting.       
 96 |   7. Increase training data.  
 97 | #### Good fit:  
 98 |   1. High Train Accuracy   
 99 |   2. High Test Accuracy   
100 | #### Underfitting:  
101 | Underfitting model is not perfect, so it does not capture the underlying logic of the data. Therefore, the model does not have strong predictive power with low accuracy. The model have large training set error, large in-sample error, and high bias.  
102 |   1. Low Train Accuracy  
103 |   2. Low Test Accuracy   
104 | #### Avoiding Underfitting:  
105 |   1. Decrease regularization - reduce the variance with a model by applying a penalty to the input parameters with the larger coefficients such as L1 regularization, Lasso regularization, dropout, etc.   
106 |   2. Increase the duration of training - extending the duration of training because stopping the training early will cause underfit model.  
107 |   3. Feature selection - not enough predictive features present, then adding more features or features with greater importance would improve the model.  
108 |   4. Increase the number of features - performing feature engineering  
109 |   5. Remove noise from the data    
110 | 
111 | 
112 | ## Python Reviews
113 | Step 1 through step 8 is a review on python.  
114 | After step 8, everything you need to know is relates to data analysis, data engineering, data science, machine learning, and deep learning.   
115 | Here the link to python tutorial:  
116 | [Python Tutorial for Stock Analysis](https://github.com/LastAncientOne/SimpleStockAnalysisPython)
117 | 
118 | 
119 | ## List of Machine Learning Algorithms for Stock Trading  
120 | ### Most Common Regression Algorithms  
121 | 1. Linear Regression Model  
122 | 2. Logistic Regression  
123 | 3. Lasso Regression    
124 | 4. Support Vector Machines  
125 | 5. Polynomial Regression  
126 | 6. Stepwise Regression  
127 | 7. Ridge Regression  
128 | 8. Multivariate Regression Algorithm    
129 | 9. Multiple Regression Algorithm  
130 | 10. K Means Clustering Algorithm  
131 | 11. Naïve Bayes Classifier Algorithm  
132 | 12. Random Forests  
133 | 13. Decision Trees  
134 | 14. Nearest Neighbours   
135 | 15. Lasso Regression  
136 | 16. ElasticNet Regression  
137 | 17. Reinforcement Learning  
138 | 18. Artificial Intelligence    
139 | 19. MultiModal Network  
140 | 20. Biologic Intelligence  
141 | 
142 | ### Different Types of Machine Learning Algorithms and Models  
143 | Algorithms are processes and sets of instructions used to solve a class of problems. Additionally, algorithms perform computations such as calculations, data processing, automated reasoning, and other tasks. A machine learning algorithm is a method that enables systems to learn and improve automatically from experience, without the need for explicit formulation.  
144 | 
145 | # Prerequistes  
146 | Python 3.5+  
147 | Jupyter Notebook Python 3    
148 | Windows 7 or Windows 10  
149 | 
150 | ### Download Software  
151 | https://www.python.org/  
152 | 
153 | <h3 align="left"> Programming Language:</h3>
154 | <p align="left"> </a> <a href="https://www.python.org" target="_blank"> <img src="https://raw.githubusercontent.com/devicons/devicon/master/icons/python/python-original.svg" alt="python" width="80" height="80"/> </a>  
155 | 
156 | <h3 align="left">Tools:</h3>
157 | <p align="left"> </a> <a href="https://anaconda.org/" target="_blank"> <img src="https://www.clipartkey.com/mpngs/m/227-2271689_transparent-anaconda-logo-png.png" alt="Anaconda" width="80" height="80"/> </a> <a href="https://www.spyder-ide.org/" target="_blank"> <img src="https://www.kindpng.com/picc/m/86-862450_spyder-python-logo-png-transparent-png.png" alt="Spyder" width="80" height="80"/> </a> <a href="https://jupyter.org/" target="_blank"> <img src="https://upload.wikimedia.org/wikipedia/commons/3/38/Jupyter_logo.svg" alt="Jupyter Notebook" width="80" height="80"/> </a> <a href="https://notepad-plus-plus.org/" target="_blank"> <img src="https://logos-download.com/wp-content/uploads/2019/07/Notepad_Logo.png" alt="Notepad++" width="80" height="80"/> </a> <a href="https://www.jetbrains.com/pycharm/" target="_blank"> <img src="https://brandeps.com/logo-download/P/Pycharm-logo-vector-01.svg" alt="Notepad++" width="80" height="80"/> </a> </p>  
158 | 
159 | <a href="https://www.buymeacoffee.com/lastancientone"><img src="https://img.buymeacoffee.com/button-api/?text=Buy me a Book&emoji=📚&slug=lastancientone&button_colour=000000&font_colour=ffffff&font_family=Lato&outline_colour=ffffff&coffee_colour=FFDD00" /></a>  
160 | 
161 | ## Authors  
162 | ### Tin Hang
163 | 
164 | ## Disclaimer  
165 | &#x1F53B; Do not use this code for investing or trading in the stock market. However, if you are interest in the stock market, you should read :books: books that relate to stock market, investment, or finance. On the other hand, if you into quant or machine learning, read books about &#x1F4D8; machine trading, algorithmic trading, and quantitative trading. You should read &#x1F4D7; about Machine Learning and Deep Learning to understand the concept, theory, and the mathematics. On the other hand, you should read academic paper and do research online about machine learning and deep learning on :computer:  
166 | 
167 | ### Certain portions of the code may encounter issues stemming from updates or obsolescence within specific library packages. Consequently, adjustments will be necessary, contingent upon the Python package library employed. It may be imperative to either upgrade or downgrade certain libraries accordingly.  
168 | 
169 | ## 🔴 Warning: This is not financial advice; it should not be relied upon for investment or trading decisions, as it is for educational purposes only.  
170 | 


--------------------------------------------------------------------------------
/Stock_Algorithms/30_Regression_Models.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "source": [
  6 |         "# 30 Regression Models"
  7 |       ],
  8 |       "metadata": {
  9 |         "nteract": {
 10 |           "transient": {
 11 |             "deleting": false
 12 |           }
 13 |         }
 14 |       }
 15 |     },
 16 |     {
 17 |       "cell_type": "code",
 18 |       "source": [
 19 |         "from lazypredict.Supervised import LazyRegressor\n",
 20 |         "from pandas.plotting import scatter_matrix# Scikit-learn packages\n",
 21 |         "from sklearn.linear_model import LinearRegression\n",
 22 |         "from sklearn.tree import DecisionTreeRegressor\n",
 23 |         "from sklearn.ensemble import ExtraTreesRegressor\n",
 24 |         "from sklearn import metrics\n",
 25 |         "from sklearn.metrics import mean_squared_error# Hide warnings\n",
 26 |         "from sklearn.model_selection import train_test_split  \n",
 27 |         "\n",
 28 |         "import warnings\n",
 29 |         "warnings.filterwarnings(\"ignore\")\n",
 30 |         "\n",
 31 |         "import yfinance as yf\n",
 32 |         "yf.pdr_override()"
 33 |       ],
 34 |       "outputs": [
 35 |         {
 36 |           "output_type": "stream",
 37 |           "name": "stderr",
 38 |           "text": [
 39 |             "C:\\Users\\Tin Hang\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\deprecation.py:143: FutureWarning: The sklearn.utils.testing module is  deprecated in version 0.22 and will be removed in version 0.24. The corresponding classes / functions should instead be imported from sklearn.utils. Anything that cannot be imported from sklearn.utils is now part of the private API.\n",
 40 |             "  warnings.warn(message, FutureWarning)\n"
 41 |           ]
 42 |         }
 43 |       ],
 44 |       "execution_count": 1,
 45 |       "metadata": {
 46 |         "collapsed": true,
 47 |         "jupyter": {
 48 |           "source_hidden": false,
 49 |           "outputs_hidden": false
 50 |         },
 51 |         "nteract": {
 52 |           "transient": {
 53 |             "deleting": false
 54 |           }
 55 |         },
 56 |         "execution": {
 57 |           "shell.execute_reply": "2021-04-28T00:38:36.736Z",
 58 |           "iopub.status.busy": "2021-04-28T00:38:34.815Z",
 59 |           "iopub.execute_input": "2021-04-28T00:38:34.824Z",
 60 |           "iopub.status.idle": "2021-04-28T00:38:36.720Z"
 61 |         }
 62 |       }
 63 |     },
 64 |     {
 65 |       "cell_type": "code",
 66 |       "source": [
 67 |         "# input\n",
 68 |         "symbol = 'AMD'\n",
 69 |         "start = '2014-01-01'\n",
 70 |         "end = '2018-08-27'\n",
 71 |         "\n",
 72 |         "# Read data \n",
 73 |         "dataset = yf.download(symbol,start,end)\n",
 74 |         "\n",
 75 |         "# Only keep close columns \n",
 76 |         "dataset.head()"
 77 |       ],
 78 |       "outputs": [
 79 |         {
 80 |           "output_type": "stream",
 81 |           "name": "stdout",
 82 |           "text": [
 83 |             "[*********************100%***********************]  1 of 1 completed\n"
 84 |           ]
 85 |         },
 86 |         {
 87 |           "output_type": "execute_result",
 88 |           "execution_count": 2,
 89 |           "data": {
 90 |             "text/plain": "            Open  High  Low  Close  Adj Close    Volume\nDate                                                   \n2014-01-02  3.85  3.98 3.84   3.95       3.95  20548400\n2014-01-03  3.98  4.00 3.88   4.00       4.00  22887200\n2014-01-06  4.01  4.18 3.99   4.13       4.13  42398300\n2014-01-07  4.19  4.25 4.11   4.18       4.18  42932100\n2014-01-08  4.23  4.26 4.14   4.18       4.18  30678700",
 91 |             "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Open</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Close</th>\n      <th>Adj Close</th>\n      <th>Volume</th>\n    </tr>\n    <tr>\n      <th>Date</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2014-01-02</th>\n      <td>3.85</td>\n      <td>3.98</td>\n      <td>3.84</td>\n      <td>3.95</td>\n      <td>3.95</td>\n      <td>20548400</td>\n    </tr>\n    <tr>\n      <th>2014-01-03</th>\n      <td>3.98</td>\n      <td>4.00</td>\n      <td>3.88</td>\n      <td>4.00</td>\n      <td>4.00</td>\n      <td>22887200</td>\n    </tr>\n    <tr>\n      <th>2014-01-06</th>\n      <td>4.01</td>\n      <td>4.18</td>\n      <td>3.99</td>\n      <td>4.13</td>\n      <td>4.13</td>\n      <td>42398300</td>\n    </tr>\n    <tr>\n      <th>2014-01-07</th>\n      <td>4.19</td>\n      <td>4.25</td>\n      <td>4.11</td>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>42932100</td>\n    </tr>\n    <tr>\n      <th>2014-01-08</th>\n      <td>4.23</td>\n      <td>4.26</td>\n      <td>4.14</td>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>30678700</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
 92 |           },
 93 |           "metadata": {}
 94 |         }
 95 |       ],
 96 |       "execution_count": 2,
 97 |       "metadata": {
 98 |         "collapsed": true,
 99 |         "jupyter": {
100 |           "source_hidden": false,
101 |           "outputs_hidden": false
102 |         },
103 |         "nteract": {
104 |           "transient": {
105 |             "deleting": false
106 |           }
107 |         },
108 |         "execution": {
109 |           "iopub.status.busy": "2021-04-28T00:38:36.725Z",
110 |           "iopub.execute_input": "2021-04-28T00:38:36.729Z",
111 |           "iopub.status.idle": "2021-04-28T00:38:37.064Z",
112 |           "shell.execute_reply": "2021-04-28T00:38:37.060Z"
113 |         }
114 |       }
115 |     },
116 |     {
117 |       "cell_type": "code",
118 |       "source": [
119 |         "# Creating train test split\n",
120 |         "X = dataset.drop(columns=['Adj Close'])\n",
121 |         "y = dataset['Adj Close']\n",
122 |         "\n",
123 |         "offset = int(X.shape[0] * 0.9)\n",
124 |         "\n",
125 |         "X_train, y_train = X[:offset], y[:offset]\n",
126 |         "X_test, y_test = X[offset:], y[offset:]"
127 |       ],
128 |       "outputs": [],
129 |       "execution_count": 3,
130 |       "metadata": {
131 |         "collapsed": true,
132 |         "jupyter": {
133 |           "source_hidden": false,
134 |           "outputs_hidden": false
135 |         },
136 |         "nteract": {
137 |           "transient": {
138 |             "deleting": false
139 |           }
140 |         },
141 |         "execution": {
142 |           "iopub.status.busy": "2021-04-28T00:38:37.069Z",
143 |           "iopub.execute_input": "2021-04-28T00:38:37.071Z",
144 |           "iopub.status.idle": "2021-04-28T00:38:37.076Z",
145 |           "shell.execute_reply": "2021-04-28T00:38:37.092Z"
146 |         }
147 |       }
148 |     },
149 |     {
150 |       "cell_type": "code",
151 |       "source": [
152 |         "reg = LazyRegressor(verbose=0, ignore_warnings=False, custom_metric=None)\n",
153 |         "models, predictions = reg.fit(X_train, X_test, y_train, y_test)\n",
154 |         "print(models)"
155 |       ],
156 |       "outputs": [
157 |         {
158 |           "output_type": "stream",
159 |           "name": "stderr",
160 |           "text": [
161 |             "100%|██████████| 42/42 [00:02<00:00, 17.49it/s]\n"
162 |           ]
163 |         },
164 |         {
165 |           "output_type": "stream",
166 |           "name": "stdout",
167 |           "text": [
168 |             "                               Adjusted R-Squared  R-Squared  RMSE  Time Taken\n",
169 |             "Model                                                                         \n",
170 |             "RANSACRegressor                              1.00       1.00  0.00        0.01\n",
171 |             "Lars                                         1.00       1.00  0.00        0.01\n",
172 |             "HuberRegressor                               1.00       1.00  0.00        0.05\n",
173 |             "LassoLarsCV                                  1.00       1.00  0.00        0.01\n",
174 |             "LassoLarsIC                                  1.00       1.00  0.00        0.01\n",
175 |             "LinearRegression                             1.00       1.00  0.00        0.01\n",
176 |             "TransformedTargetRegressor                   1.00       1.00  0.00        0.01\n",
177 |             "LarsCV                                       1.00       1.00  0.00        0.01\n",
178 |             "OrthogonalMatchingPursuit                    1.00       1.00  0.00        0.01\n",
179 |             "OrthogonalMatchingPursuitCV                  1.00       1.00  0.00        0.01\n",
180 |             "BayesianRidge                                1.00       1.00  0.00        0.01\n",
181 |             "LinearSVR                                    1.00       1.00  0.00        0.02\n",
182 |             "RidgeCV                                      1.00       1.00  0.03        0.01\n",
183 |             "Ridge                                        1.00       1.00  0.10        0.01\n",
184 |             "PassiveAggressiveRegressor                   1.00       1.00  0.11        0.01\n",
185 |             "LassoCV                                      1.00       1.00  0.11        0.05\n",
186 |             "ElasticNetCV                                 1.00       1.00  0.17        0.05\n",
187 |             "SGDRegressor                                 1.00       1.00  0.21        0.01\n",
188 |             "MLPRegressor                                 1.00       1.00  0.21        0.79\n",
189 |             "GeneralizedLinearRegressor                   0.70       0.71  1.87        0.01\n",
190 |             "TweedieRegressor                             0.70       0.71  1.87        0.01\n",
191 |             "ElasticNet                                   0.67       0.68  1.97        0.01\n",
192 |             "Lasso                                        0.63       0.64  2.09        0.01\n",
193 |             "GradientBoostingRegressor                    0.62       0.64  2.11        0.10\n",
194 |             "XGBRegressor                                 0.61       0.63  2.13        0.06\n",
195 |             "ExtraTreesRegressor                          0.59       0.61  2.19        0.15\n",
196 |             "DecisionTreeRegressor                        0.57       0.59  2.23        0.01\n",
197 |             "BaggingRegressor                             0.57       0.59  2.23        0.03\n",
198 |             "RandomForestRegressor                        0.57       0.59  2.24        0.21\n",
199 |             "ExtraTreeRegressor                           0.46       0.49  2.50        0.01\n",
200 |             "KNeighborsRegressor                          0.46       0.48  2.52        0.01\n",
201 |             "LGBMRegressor                                0.45       0.47  2.54        0.05\n",
202 |             "HistGradientBoostingRegressor                0.45       0.47  2.54        0.42\n",
203 |             "AdaBoostRegressor                            0.26       0.29  2.94        0.03\n",
204 |             "NuSVR                                       -0.24      -0.19  3.81        0.06\n",
205 |             "SVR                                         -0.28      -0.22  3.86        0.02\n",
206 |             "GammaRegressor                              -0.35      -0.29  3.97        0.01\n",
207 |             "GaussianProcessRegressor                    -1.86      -1.74  5.78        0.07\n",
208 |             "KernelRidge                                 -2.29      -2.15  6.20        0.04\n",
209 |             "PoissonRegressor                            -3.20      -3.02  7.01        0.01\n",
210 |             "DummyRegressor                              -5.70      -5.42  8.85        0.01\n",
211 |             "LassoLars                                   -5.70      -5.42  8.85        0.01\n"
212 |           ]
213 |         }
214 |       ],
215 |       "execution_count": 4,
216 |       "metadata": {
217 |         "collapsed": true,
218 |         "jupyter": {
219 |           "source_hidden": false,
220 |           "outputs_hidden": false
221 |         },
222 |         "nteract": {
223 |           "transient": {
224 |             "deleting": false
225 |           }
226 |         },
227 |         "execution": {
228 |           "iopub.status.busy": "2021-04-28T00:38:37.080Z",
229 |           "iopub.execute_input": "2021-04-28T00:38:37.084Z",
230 |           "iopub.status.idle": "2021-04-28T00:38:39.506Z",
231 |           "shell.execute_reply": "2021-04-28T00:38:39.501Z"
232 |         }
233 |       }
234 |     }
235 |   ],
236 |   "metadata": {
237 |     "kernel_info": {
238 |       "name": "python3"
239 |     },
240 |     "language_info": {
241 |       "name": "python",
242 |       "version": "3.6.12",
243 |       "mimetype": "text/x-python",
244 |       "codemirror_mode": {
245 |         "name": "ipython",
246 |         "version": 3
247 |       },
248 |       "pygments_lexer": "ipython3",
249 |       "nbconvert_exporter": "python",
250 |       "file_extension": ".py"
251 |     },
252 |     "kernelspec": {
253 |       "argv": [
254 |         "C:/Users/Tin Hang/Anaconda3\\python.exe",
255 |         "-m",
256 |         "ipykernel_launcher",
257 |         "-f",
258 |         "{connection_file}"
259 |       ],
260 |       "display_name": "Python 3",
261 |       "language": "python",
262 |       "name": "python3"
263 |     },
264 |     "nteract": {
265 |       "version": "0.28.0"
266 |     }
267 |   },
268 |   "nbformat": 4,
269 |   "nbformat_minor": 0
270 | }


--------------------------------------------------------------------------------
/Stock_Algorithms/Algorithms.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LastAncientOne/Deep_Learning_Machine_Learning_Stock/e31e9fc689cd2cb6d74a8f2694750afd6e13bcec/Stock_Algorithms/Algorithms.PNG


--------------------------------------------------------------------------------
/Stock_Algorithms/Genetic_Algorithm.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "source": [
  6 |         "# Genetic Algorithm"
  7 |       ],
  8 |       "metadata": {
  9 |         "nteract": {
 10 |           "transient": {
 11 |             "deleting": false
 12 |           }
 13 |         }
 14 |       }
 15 |     },
 16 |     {
 17 |       "cell_type": "code",
 18 |       "source": [
 19 |         "import numpy as np\n",
 20 |         "import matplotlib.pyplot as plt\n",
 21 |         "import pandas as pd\n",
 22 |         "\n",
 23 |         "# yahoo finance is used to fetch data \n",
 24 |         "import yfinance as yf\n",
 25 |         "yf.pdr_override()"
 26 |       ],
 27 |       "outputs": [],
 28 |       "execution_count": 1,
 29 |       "metadata": {
 30 |         "collapsed": true,
 31 |         "jupyter": {
 32 |           "source_hidden": false,
 33 |           "outputs_hidden": false
 34 |         },
 35 |         "nteract": {
 36 |           "transient": {
 37 |             "deleting": false
 38 |           }
 39 |         },
 40 |         "execution": {
 41 |           "iopub.status.busy": "2023-10-31T22:01:03.828Z",
 42 |           "iopub.execute_input": "2023-10-31T22:01:03.832Z",
 43 |           "shell.execute_reply": "2023-10-31T22:01:04.394Z",
 44 |           "iopub.status.idle": "2023-10-31T22:01:04.399Z"
 45 |         }
 46 |       }
 47 |     },
 48 |     {
 49 |       "cell_type": "code",
 50 |       "source": [
 51 |         "# input\n",
 52 |         "symbol = 'AMD'\n",
 53 |         "start = '2014-01-01'\n",
 54 |         "end = '2019-01-01'\n",
 55 |         "\n",
 56 |         "# Read data \n",
 57 |         "dataset = yf.download(symbol,start,end)"
 58 |       ],
 59 |       "outputs": [
 60 |         {
 61 |           "output_type": "stream",
 62 |           "name": "stdout",
 63 |           "text": [
 64 |             "[*********************100%***********************]  1 of 1 completed\n"
 65 |           ]
 66 |         }
 67 |       ],
 68 |       "execution_count": 2,
 69 |       "metadata": {
 70 |         "collapsed": true,
 71 |         "jupyter": {
 72 |           "source_hidden": false,
 73 |           "outputs_hidden": false
 74 |         },
 75 |         "nteract": {
 76 |           "transient": {
 77 |             "deleting": false
 78 |           }
 79 |         },
 80 |         "execution": {
 81 |           "iopub.status.busy": "2023-10-31T22:01:04.404Z",
 82 |           "iopub.execute_input": "2023-10-31T22:01:04.406Z",
 83 |           "shell.execute_reply": "2023-10-31T22:01:04.798Z",
 84 |           "iopub.status.idle": "2023-10-31T22:01:04.805Z"
 85 |         }
 86 |       }
 87 |     },
 88 |     {
 89 |       "cell_type": "code",
 90 |       "source": [
 91 |         "dataset['Open_Close'] = (dataset['Open'] - dataset['Adj Close'])/dataset['Open']\n",
 92 |         "dataset['High_Low'] = (dataset['High'] - dataset['Low'])/dataset['Low']\n",
 93 |         "dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)\n",
 94 |         "dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)\n",
 95 |         "dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)\n",
 96 |         "dataset['Returns'] = dataset['Adj Close'].pct_change()\n",
 97 |         "dataset = dataset.dropna()\n",
 98 |         "dataset.head()\n",
 99 |         "\n",
100 |         "# View Columns\n",
101 |         "dataset.head()"
102 |       ],
103 |       "outputs": [
104 |         {
105 |           "output_type": "execute_result",
106 |           "execution_count": 3,
107 |           "data": {
108 |             "text/plain": "            Open  High   Low  Close  Adj Close    Volume  Open_Close  \\\nDate                                                                   \n2014-01-03  3.98  4.00  3.88   4.00       4.00  22887200   -0.005025   \n2014-01-06  4.01  4.18  3.99   4.13       4.13  42398300   -0.029925   \n2014-01-07  4.19  4.25  4.11   4.18       4.18  42932100    0.002387   \n2014-01-08  4.23  4.26  4.14   4.18       4.18  30678700    0.011820   \n2014-01-09  4.20  4.23  4.05   4.09       4.09  30667600    0.026190   \n\n            High_Low  Increase_Decrease  Buy_Sell_on_Open  Buy_Sell   Returns  \nDate                                                                           \n2014-01-03  0.030928                  1                 1         1  0.012658  \n2014-01-06  0.047619                  1                 1         1  0.032500  \n2014-01-07  0.034063                  0                 1         0  0.012106  \n2014-01-08  0.028986                  0                 0         0  0.000000  \n2014-01-09  0.044444                  0                 0         1 -0.021531  ",
109 |             "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Open</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Close</th>\n      <th>Adj Close</th>\n      <th>Volume</th>\n      <th>Open_Close</th>\n      <th>High_Low</th>\n      <th>Increase_Decrease</th>\n      <th>Buy_Sell_on_Open</th>\n      <th>Buy_Sell</th>\n      <th>Returns</th>\n    </tr>\n    <tr>\n      <th>Date</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2014-01-03</th>\n      <td>3.98</td>\n      <td>4.00</td>\n      <td>3.88</td>\n      <td>4.00</td>\n      <td>4.00</td>\n      <td>22887200</td>\n      <td>-0.005025</td>\n      <td>0.030928</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0.012658</td>\n    </tr>\n    <tr>\n      <th>2014-01-06</th>\n      <td>4.01</td>\n      <td>4.18</td>\n      <td>3.99</td>\n      <td>4.13</td>\n      <td>4.13</td>\n      <td>42398300</td>\n      <td>-0.029925</td>\n      <td>0.047619</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0.032500</td>\n    </tr>\n    <tr>\n      <th>2014-01-07</th>\n      <td>4.19</td>\n      <td>4.25</td>\n      <td>4.11</td>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>42932100</td>\n      <td>0.002387</td>\n      <td>0.034063</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0.012106</td>\n    </tr>\n    <tr>\n      <th>2014-01-08</th>\n      <td>4.23</td>\n      <td>4.26</td>\n      <td>4.14</td>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>30678700</td>\n      <td>0.011820</td>\n      <td>0.028986</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0.000000</td>\n    </tr>\n    <tr>\n      <th>2014-01-09</th>\n      <td>4.20</td>\n      <td>4.23</td>\n      <td>4.05</td>\n      <td>4.09</td>\n      <td>4.09</td>\n      <td>30667600</td>\n      <td>0.026190</td>\n      <td>0.044444</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>-0.021531</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
110 |           },
111 |           "metadata": {}
112 |         }
113 |       ],
114 |       "execution_count": 3,
115 |       "metadata": {
116 |         "collapsed": true,
117 |         "jupyter": {
118 |           "source_hidden": false,
119 |           "outputs_hidden": false
120 |         },
121 |         "nteract": {
122 |           "transient": {
123 |             "deleting": false
124 |           }
125 |         },
126 |         "execution": {
127 |           "iopub.status.busy": "2023-10-31T22:01:04.810Z",
128 |           "iopub.execute_input": "2023-10-31T22:01:04.815Z",
129 |           "iopub.status.idle": "2023-10-31T22:01:04.835Z",
130 |           "shell.execute_reply": "2023-10-31T22:01:04.859Z"
131 |         }
132 |       }
133 |     },
134 |     {
135 |       "cell_type": "code",
136 |       "source": [
137 |         "# Define a placeholder for the fitness function (you should implement this)\n",
138 |         "def fitness_function(individual):\n",
139 |         "    # Calculate the fitness of the individual\n",
140 |         "    # You should define your fitness function based on your problem\n",
141 |         "    return 0  # Replace this with your actual fitness calculation\n",
142 |         "\n",
143 |         "def genetic_algorithm(population_size, num_generations, mutation_rate):\n",
144 |         "    population = np.random.randint(2, size=(population_size, 4))  # Initialize the population\n",
145 |         "\n",
146 |         "    for generation in range(num_generations):\n",
147 |         "        fitness_scores = [fitness_function(individual) for individual in population]\n",
148 |         "        best_individual = population[np.argmax(fitness_scores)]\n",
149 |         "        best_fitness = max(fitness_scores)\n",
150 |         "\n",
151 |         "        if generation % 10 == 0:\n",
152 |         "            print(f\"Generation {generation}: Best Fitness = {best_fitness}\")\n",
153 |         "\n",
154 |         "        # Create a new population using mutation and crossover\n",
155 |         "        new_population = []\n",
156 |         "\n",
157 |         "        for _ in range(population_size):\n",
158 |         "            parent1 = population[np.random.choice(range(population_size))]\n",
159 |         "            parent2 = population[np.random.choice(range(population_size))]\n",
160 |         "            crossover_point = np.random.randint(4)\n",
161 |         "            child = np.zeros(4)  # Initialize child as an array of zeros\n",
162 |         "            child[:crossover_point] = parent1[:crossover_point]\n",
163 |         "            child[crossover_point:] = parent2[crossover_point:]\n",
164 |         "            # Apply mutation\n",
165 |         "            mutation_mask = (np.random.rand(4) < mutation_rate).astype(int)\n",
166 |         "            child = (child + mutation_mask) % 2\n",
167 |         "            new_population.append(child)\n",
168 |         "\n",
169 |         "        population = np.array(new_population)\n",
170 |         "\n",
171 |         "    return best_individual, best_fitness"
172 |       ],
173 |       "outputs": [],
174 |       "execution_count": 4,
175 |       "metadata": {
176 |         "collapsed": true,
177 |         "jupyter": {
178 |           "source_hidden": false,
179 |           "outputs_hidden": false
180 |         },
181 |         "nteract": {
182 |           "transient": {
183 |             "deleting": false
184 |           }
185 |         },
186 |         "execution": {
187 |           "iopub.status.busy": "2023-10-31T22:01:04.840Z",
188 |           "iopub.execute_input": "2023-10-31T22:01:04.842Z",
189 |           "iopub.status.idle": "2023-10-31T22:01:04.848Z",
190 |           "shell.execute_reply": "2023-10-31T22:01:04.863Z"
191 |         }
192 |       }
193 |     },
194 |     {
195 |       "cell_type": "code",
196 |       "source": [
197 |         "# Set genetic algorithm parameters\n",
198 |         "population_size = 100\n",
199 |         "num_generations = 100\n",
200 |         "mutation_rate = 0.01\n",
201 |         "\n",
202 |         "# Run the genetic algorithm\n",
203 |         "best_individual, best_fitness = genetic_algorithm(population_size, num_generations, mutation_rate)\n",
204 |         "\n",
205 |         "print(\"Best Individual:\", best_individual)\n",
206 |         "print(\"Best Fitness:\", best_fitness)"
207 |       ],
208 |       "outputs": [
209 |         {
210 |           "output_type": "stream",
211 |           "name": "stdout",
212 |           "text": [
213 |             "Generation 0: Best Fitness = 0\n",
214 |             "Generation 10: Best Fitness = 0\n",
215 |             "Generation 20: Best Fitness = 0\n",
216 |             "Generation 30: Best Fitness = 0\n",
217 |             "Generation 40: Best Fitness = 0\n",
218 |             "Generation 50: Best Fitness = 0\n",
219 |             "Generation 60: Best Fitness = 0\n",
220 |             "Generation 70: Best Fitness = 0\n",
221 |             "Generation 80: Best Fitness = 0\n",
222 |             "Generation 90: Best Fitness = 0\n",
223 |             "Best Individual: [1. 1. 1. 1.]\n",
224 |             "Best Fitness: 0\n"
225 |           ]
226 |         }
227 |       ],
228 |       "execution_count": 5,
229 |       "metadata": {
230 |         "collapsed": true,
231 |         "jupyter": {
232 |           "source_hidden": false,
233 |           "outputs_hidden": false
234 |         },
235 |         "nteract": {
236 |           "transient": {
237 |             "deleting": false
238 |           }
239 |         },
240 |         "execution": {
241 |           "iopub.status.busy": "2023-10-31T22:01:04.852Z",
242 |           "iopub.execute_input": "2023-10-31T22:01:04.855Z",
243 |           "iopub.status.idle": "2023-10-31T22:01:05.294Z",
244 |           "shell.execute_reply": "2023-10-31T22:01:05.301Z"
245 |         }
246 |       }
247 |     }
248 |   ],
249 |   "metadata": {
250 |     "kernel_info": {
251 |       "name": "python3"
252 |     },
253 |     "language_info": {
254 |       "name": "python",
255 |       "version": "3.7.6",
256 |       "mimetype": "text/x-python",
257 |       "codemirror_mode": {
258 |         "name": "ipython",
259 |         "version": 3
260 |       },
261 |       "pygments_lexer": "ipython3",
262 |       "nbconvert_exporter": "python",
263 |       "file_extension": ".py"
264 |     },
265 |     "kernelspec": {
266 |       "argv": [
267 |         "C:/Users/Tin Hang/anaconda3\\python.exe",
268 |         "-m",
269 |         "ipykernel_launcher",
270 |         "-f",
271 |         "{connection_file}"
272 |       ],
273 |       "display_name": "Python 3",
274 |       "language": "python",
275 |       "name": "python3"
276 |     },
277 |     "nteract": {
278 |       "version": "0.28.0"
279 |     }
280 |   },
281 |   "nbformat": 4,
282 |   "nbformat_minor": 0
283 | }


--------------------------------------------------------------------------------
/Stock_Algorithms/Gradient_Boosting_Regressor.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "source": [
  6 |         "# Gradient Boosting Regressor"
  7 |       ],
  8 |       "metadata": {}
  9 |     },
 10 |     {
 11 |       "cell_type": "code",
 12 |       "source": [
 13 |         "import numpy as np\n",
 14 |         "import matplotlib.pyplot as plt\n",
 15 |         "import pandas as pd\n",
 16 |         "\n",
 17 |         "import warnings\n",
 18 |         "warnings.filterwarnings(\"ignore\")\n",
 19 |         "\n",
 20 |         "# fix_yahoo_finance is used to fetch data \n",
 21 |         "import fix_yahoo_finance as yf\n",
 22 |         "yf.pdr_override()"
 23 |       ],
 24 |       "outputs": [],
 25 |       "execution_count": null,
 26 |       "metadata": {
 27 |         "collapsed": false,
 28 |         "outputHidden": false,
 29 |         "inputHidden": false
 30 |       }
 31 |     },
 32 |     {
 33 |       "cell_type": "code",
 34 |       "source": [
 35 |         "# input\n",
 36 |         "symbol = 'AMD'\n",
 37 |         "start = '2007-01-01'\n",
 38 |         "end = '2018-11-16'\n",
 39 |         "\n",
 40 |         "# Read data \n",
 41 |         "dataset = yf.download(symbol,start,end)\n",
 42 |         "\n",
 43 |         "# View Columns\n",
 44 |         "dataset.head()"
 45 |       ],
 46 |       "outputs": [],
 47 |       "execution_count": null,
 48 |       "metadata": {
 49 |         "collapsed": false,
 50 |         "outputHidden": false,
 51 |         "inputHidden": false
 52 |       }
 53 |     },
 54 |     {
 55 |       "cell_type": "code",
 56 |       "source": [
 57 |         "dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)\n",
 58 |         "dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)\n",
 59 |         "dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)\n",
 60 |         "dataset['Returns'] = dataset['Adj Close'].pct_change()\n",
 61 |         "dataset = dataset.dropna()\n",
 62 |         "dataset.head()"
 63 |       ],
 64 |       "outputs": [],
 65 |       "execution_count": null,
 66 |       "metadata": {
 67 |         "collapsed": false,
 68 |         "outputHidden": false,
 69 |         "inputHidden": false
 70 |       }
 71 |     },
 72 |     {
 73 |       "cell_type": "code",
 74 |       "source": [
 75 |         "X = dataset[['Open', 'High', 'Low', 'Volume']].values\n",
 76 |         "y = dataset['Adj Close'].values"
 77 |       ],
 78 |       "outputs": [],
 79 |       "execution_count": null,
 80 |       "metadata": {
 81 |         "collapsed": false,
 82 |         "outputHidden": false,
 83 |         "inputHidden": false
 84 |       }
 85 |     },
 86 |     {
 87 |       "cell_type": "code",
 88 |       "source": [
 89 |         "# from sklearn.cross_validation import train_test_split\n",
 90 |         "from sklearn.model_selection import train_test_split\n",
 91 |         "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 1/4, random_state = 0) "
 92 |       ],
 93 |       "outputs": [],
 94 |       "execution_count": null,
 95 |       "metadata": {
 96 |         "collapsed": false,
 97 |         "outputHidden": false,
 98 |         "inputHidden": false
 99 |       }
100 |     },
101 |     {
102 |       "cell_type": "code",
103 |       "source": [
104 |         "from sklearn.ensemble import GradientBoostingRegressor\n",
105 |         "\n",
106 |         "gb = GradientBoostingRegressor(max_depth=4, \n",
107 |         "            n_estimators=200,\n",
108 |         "            random_state=2)"
109 |       ],
110 |       "outputs": [],
111 |       "execution_count": null,
112 |       "metadata": {
113 |         "collapsed": false,
114 |         "outputHidden": false,
115 |         "inputHidden": false
116 |       }
117 |     },
118 |     {
119 |       "cell_type": "code",
120 |       "source": [
121 |         "# Fit gb to the training set\n",
122 |         "gb.fit(X_train, y_train)\n",
123 |         "\n",
124 |         "# Predict test set labels\n",
125 |         "y_pred = gb.predict(X_test)"
126 |       ],
127 |       "outputs": [],
128 |       "execution_count": null,
129 |       "metadata": {
130 |         "collapsed": false,
131 |         "outputHidden": false,
132 |         "inputHidden": false
133 |       }
134 |     },
135 |     {
136 |       "cell_type": "code",
137 |       "source": [
138 |         "from sklearn.metrics import mean_squared_error as MSE\n",
139 |         "\n",
140 |         "# Compute MSE\n",
141 |         "mse_test = MSE(y_test, y_pred)\n",
142 |         "\n",
143 |         "# Compute RMSE\n",
144 |         "rmse_test = mse_test**(1/2)\n",
145 |         "\n",
146 |         "# Print RMSE\n",
147 |         "print('Test set RMSE of gb: {:.3f}'.format(rmse_test))"
148 |       ],
149 |       "outputs": [],
150 |       "execution_count": null,
151 |       "metadata": {
152 |         "collapsed": false,
153 |         "outputHidden": false,
154 |         "inputHidden": false
155 |       }
156 |     }
157 |   ],
158 |   "metadata": {
159 |     "kernel_info": {
160 |       "name": "python3"
161 |     },
162 |     "language_info": {
163 |       "pygments_lexer": "ipython3",
164 |       "version": "3.5.5",
165 |       "codemirror_mode": {
166 |         "version": 3,
167 |         "name": "ipython"
168 |       },
169 |       "nbconvert_exporter": "python",
170 |       "name": "python",
171 |       "mimetype": "text/x-python",
172 |       "file_extension": ".py"
173 |     },
174 |     "kernelspec": {
175 |       "name": "python3",
176 |       "language": "python",
177 |       "display_name": "Python 3"
178 |     },
179 |     "nteract": {
180 |       "version": "0.12.2"
181 |     }
182 |   },
183 |   "nbformat": 4,
184 |   "nbformat_minor": 4
185 | }


--------------------------------------------------------------------------------
/Stock_Algorithms/Hyperparameter_Tuning.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "source": [
  6 |         "# Hyperparameter Tuning"
  7 |       ],
  8 |       "metadata": {}
  9 |     },
 10 |     {
 11 |       "cell_type": "markdown",
 12 |       "source": [
 13 |         "In machine learning, a hyperparameter is a parameter whose value is set before the learning process begins. By contrast, the values of other parameters are derived via training. Different model training algorithms require different hyperparameters, some simple algorithms require none. (Wikipedia)\n",
 14 |         "\n",
 15 |         "\n",
 16 |         "Hyperparameter tuning optimize is used for a single target variable and is called \"hypermeter metric.\" \n",
 17 |         "\n",
 18 |         "A model hyperparameter is a model that has value cannot be estimated from data.\n"
 19 |       ],
 20 |       "metadata": {}
 21 |     },
 22 |     {
 23 |       "cell_type": "code",
 24 |       "source": [
 25 |         "import numpy as np\n",
 26 |         "import pandas as pd\n",
 27 |         "import matplotlib.pyplot as plt\n",
 28 |         "\n",
 29 |         "import warnings\n",
 30 |         "warnings.filterwarnings(\"ignore\")\n",
 31 |         "\n",
 32 |         "# yahoo finance is used to fetch data \n",
 33 |         "import yfinance as yf\n",
 34 |         "yf.pdr_override()"
 35 |       ],
 36 |       "outputs": [],
 37 |       "execution_count": 1,
 38 |       "metadata": {
 39 |         "collapsed": false,
 40 |         "outputHidden": false,
 41 |         "inputHidden": false
 42 |       }
 43 |     },
 44 |     {
 45 |       "cell_type": "code",
 46 |       "source": [
 47 |         "# input\n",
 48 |         "symbol = 'AMD'\n",
 49 |         "start = '2014-01-01'\n",
 50 |         "end = '2018-08-27'\n",
 51 |         "\n",
 52 |         "# Read data \n",
 53 |         "dataset = yf.download(symbol,start,end)\n",
 54 |         "\n",
 55 |         "# View Columns\n",
 56 |         "dataset.head()"
 57 |       ],
 58 |       "outputs": [
 59 |         {
 60 |           "output_type": "stream",
 61 |           "name": "stdout",
 62 |           "text": [
 63 |             "[*********************100%***********************]  1 of 1 downloaded\n"
 64 |           ]
 65 |         },
 66 |         {
 67 |           "output_type": "execute_result",
 68 |           "execution_count": 2,
 69 |           "data": {
 70 |             "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Open</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Close</th>\n      <th>Adj Close</th>\n      <th>Volume</th>\n    </tr>\n    <tr>\n      <th>Date</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2014-01-02</th>\n      <td>3.85</td>\n      <td>3.98</td>\n      <td>3.84</td>\n      <td>3.95</td>\n      <td>3.95</td>\n      <td>20548400</td>\n    </tr>\n    <tr>\n      <th>2014-01-03</th>\n      <td>3.98</td>\n      <td>4.00</td>\n      <td>3.88</td>\n      <td>4.00</td>\n      <td>4.00</td>\n      <td>22887200</td>\n    </tr>\n    <tr>\n      <th>2014-01-06</th>\n      <td>4.01</td>\n      <td>4.18</td>\n      <td>3.99</td>\n      <td>4.13</td>\n      <td>4.13</td>\n      <td>42398300</td>\n    </tr>\n    <tr>\n      <th>2014-01-07</th>\n      <td>4.19</td>\n      <td>4.25</td>\n      <td>4.11</td>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>42932100</td>\n    </tr>\n    <tr>\n      <th>2014-01-08</th>\n      <td>4.23</td>\n      <td>4.26</td>\n      <td>4.14</td>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>30678700</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
 71 |             "text/plain": "            Open  High   Low  Close  Adj Close    Volume\nDate                                                    \n2014-01-02  3.85  3.98  3.84   3.95       3.95  20548400\n2014-01-03  3.98  4.00  3.88   4.00       4.00  22887200\n2014-01-06  4.01  4.18  3.99   4.13       4.13  42398300\n2014-01-07  4.19  4.25  4.11   4.18       4.18  42932100\n2014-01-08  4.23  4.26  4.14   4.18       4.18  30678700"
 72 |           },
 73 |           "metadata": {}
 74 |         }
 75 |       ],
 76 |       "execution_count": 2,
 77 |       "metadata": {
 78 |         "collapsed": false,
 79 |         "outputHidden": false,
 80 |         "inputHidden": false
 81 |       }
 82 |     },
 83 |     {
 84 |       "cell_type": "code",
 85 |       "source": [
 86 |         "dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)\n",
 87 |         "dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)\n",
 88 |         "dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)\n",
 89 |         "dataset['Returns'] = dataset['Adj Close'].pct_change()\n",
 90 |         "dataset = dataset.dropna()\n",
 91 |         "dataset.head()"
 92 |       ],
 93 |       "outputs": [
 94 |         {
 95 |           "output_type": "execute_result",
 96 |           "execution_count": 3,
 97 |           "data": {
 98 |             "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Open</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Close</th>\n      <th>Adj Close</th>\n      <th>Volume</th>\n      <th>Increase_Decrease</th>\n      <th>Buy_Sell_on_Open</th>\n      <th>Buy_Sell</th>\n      <th>Returns</th>\n    </tr>\n    <tr>\n      <th>Date</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2014-01-03</th>\n      <td>3.98</td>\n      <td>4.00</td>\n      <td>3.88</td>\n      <td>4.00</td>\n      <td>4.00</td>\n      <td>22887200</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0.012658</td>\n    </tr>\n    <tr>\n      <th>2014-01-06</th>\n      <td>4.01</td>\n      <td>4.18</td>\n      <td>3.99</td>\n      <td>4.13</td>\n      <td>4.13</td>\n      <td>42398300</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0.032500</td>\n    </tr>\n    <tr>\n      <th>2014-01-07</th>\n      <td>4.19</td>\n      <td>4.25</td>\n      <td>4.11</td>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>42932100</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0.012107</td>\n    </tr>\n    <tr>\n      <th>2014-01-08</th>\n      <td>4.23</td>\n      <td>4.26</td>\n      <td>4.14</td>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>30678700</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0.000000</td>\n    </tr>\n    <tr>\n      <th>2014-01-09</th>\n      <td>4.20</td>\n      <td>4.23</td>\n      <td>4.05</td>\n      <td>4.09</td>\n      <td>4.09</td>\n      <td>30667600</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>-0.021531</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
 99 |             "text/plain": "            Open  High   Low  Close  Adj Close    Volume  Increase_Decrease  \\\nDate                                                                          \n2014-01-03  3.98  4.00  3.88   4.00       4.00  22887200                  1   \n2014-01-06  4.01  4.18  3.99   4.13       4.13  42398300                  1   \n2014-01-07  4.19  4.25  4.11   4.18       4.18  42932100                  0   \n2014-01-08  4.23  4.26  4.14   4.18       4.18  30678700                  0   \n2014-01-09  4.20  4.23  4.05   4.09       4.09  30667600                  0   \n\n            Buy_Sell_on_Open  Buy_Sell   Returns  \nDate                                              \n2014-01-03                 1         1  0.012658  \n2014-01-06                 1         1  0.032500  \n2014-01-07                 1         0  0.012107  \n2014-01-08                 0         0  0.000000  \n2014-01-09                 0         1 -0.021531  "
100 |           },
101 |           "metadata": {}
102 |         }
103 |       ],
104 |       "execution_count": 3,
105 |       "metadata": {
106 |         "collapsed": false,
107 |         "outputHidden": false,
108 |         "inputHidden": false
109 |       }
110 |     },
111 |     {
112 |       "cell_type": "code",
113 |       "source": [
114 |         "X = np.array(dataset['Open']).reshape(-1, 1)\n",
115 |         "y = np.array(dataset['Buy_Sell']).reshape(-1, 1)"
116 |       ],
117 |       "outputs": [],
118 |       "execution_count": 4,
119 |       "metadata": {
120 |         "collapsed": false,
121 |         "outputHidden": false,
122 |         "inputHidden": false
123 |       }
124 |     },
125 |     {
126 |       "cell_type": "code",
127 |       "source": [
128 |         "# Create logistic regression\n",
129 |         "from sklearn import linear_model\n",
130 |         "\n",
131 |         "logistic = linear_model.LogisticRegression()"
132 |       ],
133 |       "outputs": [],
134 |       "execution_count": 5,
135 |       "metadata": {
136 |         "collapsed": false,
137 |         "outputHidden": false,
138 |         "inputHidden": false
139 |       }
140 |     },
141 |     {
142 |       "cell_type": "code",
143 |       "source": [
144 |         "# Create logistic regression\n",
145 |         "logistic = linear_model.LogisticRegression()"
146 |       ],
147 |       "outputs": [],
148 |       "execution_count": 6,
149 |       "metadata": {
150 |         "collapsed": false,
151 |         "outputHidden": false,
152 |         "inputHidden": false
153 |       }
154 |     },
155 |     {
156 |       "cell_type": "code",
157 |       "source": [
158 |         "# Create regularization penalty space\n",
159 |         "penalty = ['l1', 'l2']\n",
160 |         "\n",
161 |         "# Create regularization hyperparameter space\n",
162 |         "C = np.logspace(0, 4, 10)\n",
163 |         "\n",
164 |         "# Create hyperparameter options\n",
165 |         "hyperparameters = dict(C=C, penalty=penalty)"
166 |       ],
167 |       "outputs": [],
168 |       "execution_count": 7,
169 |       "metadata": {
170 |         "collapsed": false,
171 |         "outputHidden": false,
172 |         "inputHidden": false
173 |       }
174 |     },
175 |     {
176 |       "cell_type": "code",
177 |       "source": [
178 |         "# Create grid search using 5-fold cross validation\n",
179 |         "from sklearn.model_selection import GridSearchCV\n",
180 |         "\n",
181 |         "clf = GridSearchCV(logistic, hyperparameters, cv=5, verbose=0)"
182 |       ],
183 |       "outputs": [],
184 |       "execution_count": 8,
185 |       "metadata": {
186 |         "collapsed": false,
187 |         "outputHidden": false,
188 |         "inputHidden": false
189 |       }
190 |     },
191 |     {
192 |       "cell_type": "code",
193 |       "source": [
194 |         "# Fit grid search\n",
195 |         "best_model = clf.fit(X, y)"
196 |       ],
197 |       "outputs": [],
198 |       "execution_count": 9,
199 |       "metadata": {
200 |         "collapsed": false,
201 |         "outputHidden": false,
202 |         "inputHidden": false
203 |       }
204 |     },
205 |     {
206 |       "cell_type": "code",
207 |       "source": [
208 |         "# View best hyperparameters\n",
209 |         "print('Best Penalty:', best_model.best_estimator_.get_params()['penalty'])\n",
210 |         "print('Best C:', best_model.best_estimator_.get_params()['C'])"
211 |       ],
212 |       "outputs": [
213 |         {
214 |           "output_type": "stream",
215 |           "name": "stdout",
216 |           "text": [
217 |             "Best Penalty: l1\n",
218 |             "Best C: 1.0\n"
219 |           ]
220 |         }
221 |       ],
222 |       "execution_count": 10,
223 |       "metadata": {
224 |         "collapsed": false,
225 |         "outputHidden": false,
226 |         "inputHidden": false
227 |       }
228 |     },
229 |     {
230 |       "cell_type": "code",
231 |       "source": [
232 |         "# Predict target vector\n",
233 |         "best_model.predict(X)"
234 |       ],
235 |       "outputs": [
236 |         {
237 |           "output_type": "execute_result",
238 |           "execution_count": 11,
239 |           "data": {
240 |             "text/plain": "array([0, 0, 0, ..., 1, 1, 1])"
241 |           },
242 |           "metadata": {}
243 |         }
244 |       ],
245 |       "execution_count": 11,
246 |       "metadata": {
247 |         "collapsed": false,
248 |         "outputHidden": false,
249 |         "inputHidden": false
250 |       }
251 |     }
252 |   ],
253 |   "metadata": {
254 |     "kernel_info": {
255 |       "name": "python3"
256 |     },
257 |     "language_info": {
258 |       "codemirror_mode": {
259 |         "version": 3,
260 |         "name": "ipython"
261 |       },
262 |       "name": "python",
263 |       "file_extension": ".py",
264 |       "mimetype": "text/x-python",
265 |       "nbconvert_exporter": "python",
266 |       "version": "3.5.5",
267 |       "pygments_lexer": "ipython3"
268 |     },
269 |     "kernelspec": {
270 |       "name": "python3",
271 |       "language": "python",
272 |       "display_name": "Python 3"
273 |     },
274 |     "nteract": {
275 |       "version": "0.28.0"
276 |     }
277 |   },
278 |   "nbformat": 4,
279 |   "nbformat_minor": 4
280 | }


--------------------------------------------------------------------------------
/Stock_Algorithms/K_Means_Clustering_Part2.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "source": [
  6 |         "# K Means Clustering Part 2"
  7 |       ],
  8 |       "metadata": {
  9 |         "nteract": {
 10 |           "transient": {
 11 |             "deleting": false
 12 |           }
 13 |         }
 14 |       }
 15 |     },
 16 |     {
 17 |       "cell_type": "code",
 18 |       "source": [
 19 |         "import pandas as pd\n",
 20 |         "import numpy as np\n",
 21 |         "import  pylab as pl\n",
 22 |         "import datetime as dt\n",
 23 |         "from math import sqrt\n",
 24 |         "\n",
 25 |         "import warnings\n",
 26 |         "warnings.filterwarnings(\"ignore\")\n",
 27 |         "\n",
 28 |         "# yahoo finance used to fetch data \n",
 29 |         "import yfinance as yf\n",
 30 |         "yf.pdr_override()\n",
 31 |         "\n",
 32 |         "from sklearn.cluster import KMeans"
 33 |       ],
 34 |       "outputs": [],
 35 |       "execution_count": 1,
 36 |       "metadata": {
 37 |         "collapsed": true,
 38 |         "jupyter": {
 39 |           "source_hidden": false,
 40 |           "outputs_hidden": false
 41 |         },
 42 |         "nteract": {
 43 |           "transient": {
 44 |             "deleting": false
 45 |           }
 46 |         },
 47 |         "execution": {
 48 |           "iopub.status.busy": "2022-04-07T00:41:22.144Z",
 49 |           "iopub.execute_input": "2022-04-07T00:41:22.149Z",
 50 |           "iopub.status.idle": "2022-04-07T00:41:23.358Z",
 51 |           "shell.execute_reply": "2022-04-07T00:41:23.351Z"
 52 |         }
 53 |       }
 54 |     },
 55 |     {
 56 |       "cell_type": "code",
 57 |       "source": [
 58 |         "stocks = si.tickers_dow()\n",
 59 |         "stocks"
 60 |       ],
 61 |       "outputs": [
 62 |         {
 63 |           "output_type": "error",
 64 |           "ename": "NameError",
 65 |           "evalue": "name 'si' is not defined",
 66 |           "traceback": [
 67 |             "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
 68 |             "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
 69 |             "\u001b[1;32m<ipython-input-2-b3022ca3123f>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mstocks\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msi\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtickers_dow\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      2\u001b[0m \u001b[0mstocks\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
 70 |             "\u001b[1;31mNameError\u001b[0m: name 'si' is not defined"
 71 |           ]
 72 |         }
 73 |       ],
 74 |       "execution_count": 2,
 75 |       "metadata": {
 76 |         "collapsed": true,
 77 |         "jupyter": {
 78 |           "source_hidden": false,
 79 |           "outputs_hidden": false
 80 |         },
 81 |         "nteract": {
 82 |           "transient": {
 83 |             "deleting": false
 84 |           }
 85 |         },
 86 |         "execution": {
 87 |           "iopub.status.busy": "2022-04-07T00:41:23.366Z",
 88 |           "iopub.execute_input": "2022-04-07T00:41:23.371Z",
 89 |           "iopub.status.idle": "2022-04-07T00:37:28.222Z",
 90 |           "shell.execute_reply": "2022-04-07T00:37:28.230Z"
 91 |         }
 92 |       }
 93 |     },
 94 |     {
 95 |       "cell_type": "code",
 96 |       "source": [
 97 |         "start = dt.datetime(2020, 1, 1)\n",
 98 |         "now = dt.datetime.now()\n"
 99 |       ],
100 |       "outputs": [],
101 |       "execution_count": null,
102 |       "metadata": {
103 |         "collapsed": true,
104 |         "jupyter": {
105 |           "source_hidden": false,
106 |           "outputs_hidden": false
107 |         },
108 |         "nteract": {
109 |           "transient": {
110 |             "deleting": false
111 |           }
112 |         },
113 |         "execution": {
114 |           "iopub.status.busy": "2022-04-07T00:37:30.620Z",
115 |           "iopub.execute_input": "2022-04-07T00:37:30.623Z",
116 |           "shell.execute_reply": "2022-04-07T00:37:30.635Z",
117 |           "iopub.status.idle": "2022-04-07T00:37:30.628Z"
118 |         }
119 |       }
120 |     },
121 |     {
122 |       "cell_type": "code",
123 |       "source": [
124 |         "df = yf.download(stocks, start, now)['Adj Close']\n",
125 |         "df.head()"
126 |       ],
127 |       "outputs": [],
128 |       "execution_count": null,
129 |       "metadata": {
130 |         "collapsed": true,
131 |         "jupyter": {
132 |           "source_hidden": false,
133 |           "outputs_hidden": false
134 |         },
135 |         "nteract": {
136 |           "transient": {
137 |             "deleting": false
138 |           }
139 |         },
140 |         "execution": {
141 |           "iopub.status.busy": "2022-04-07T00:37:47.765Z",
142 |           "iopub.execute_input": "2022-04-07T00:37:47.768Z",
143 |           "iopub.status.idle": "2022-04-07T00:37:50.505Z",
144 |           "shell.execute_reply": "2022-04-07T00:37:50.553Z"
145 |         }
146 |       }
147 |     },
148 |     {
149 |       "cell_type": "code",
150 |       "source": [
151 |         "returns = df.pct_change().mean() * 252\n",
152 |         "variance = df.pct_change().std() * sqrt(252)\n",
153 |         "returns.columns = [\"Returns\"]\n",
154 |         "variance.columns = [\"Variance\"]"
155 |       ],
156 |       "outputs": [],
157 |       "execution_count": null,
158 |       "metadata": {
159 |         "collapsed": true,
160 |         "jupyter": {
161 |           "source_hidden": false,
162 |           "outputs_hidden": false
163 |         },
164 |         "nteract": {
165 |           "transient": {
166 |             "deleting": false
167 |           }
168 |         },
169 |         "execution": {
170 |           "iopub.status.busy": "2022-04-07T00:38:41.490Z",
171 |           "iopub.execute_input": "2022-04-07T00:38:41.493Z",
172 |           "iopub.status.idle": "2022-04-07T00:38:41.500Z",
173 |           "shell.execute_reply": "2022-04-07T00:38:41.516Z"
174 |         }
175 |       }
176 |     },
177 |     {
178 |       "cell_type": "code",
179 |       "source": [
180 |         "ret_var = pd.concat([returns, variance], axis = 1).dropna()\n",
181 |         "ret_var.columns = [\"Returns\", \"Variance\"]"
182 |       ],
183 |       "outputs": [],
184 |       "execution_count": null,
185 |       "metadata": {
186 |         "collapsed": true,
187 |         "jupyter": {
188 |           "source_hidden": false,
189 |           "outputs_hidden": false
190 |         },
191 |         "nteract": {
192 |           "transient": {
193 |             "deleting": false
194 |           }
195 |         },
196 |         "execution": {
197 |           "iopub.status.busy": "2022-04-07T00:38:55.905Z",
198 |           "iopub.execute_input": "2022-04-07T00:38:55.908Z",
199 |           "shell.execute_reply": "2022-04-07T00:38:55.919Z",
200 |           "iopub.status.idle": "2022-04-07T00:38:55.926Z"
201 |         }
202 |       }
203 |     },
204 |     {
205 |       "cell_type": "code",
206 |       "source": [
207 |         "X =  ret_var.values\n",
208 |         "sse = []\n",
209 |         "\n",
210 |         "for k in range(2,15):\n",
211 |         "    \n",
212 |         "    kmeans = KMeans(n_clusters = k)\n",
213 |         "    kmeans.fit(X)\n",
214 |         "    \n",
215 |         "    sse.append(kmeans.inertia_) #SSE for each n_clusters\n",
216 |         "pl.plot(range(2,15), sse)\n",
217 |         "pl.title(\"Elbow Curve\")\n",
218 |         "pl.subplots()\n",
219 |         "pl.show()"
220 |       ],
221 |       "outputs": [],
222 |       "execution_count": null,
223 |       "metadata": {
224 |         "collapsed": true,
225 |         "jupyter": {
226 |           "source_hidden": false,
227 |           "outputs_hidden": false
228 |         },
229 |         "nteract": {
230 |           "transient": {
231 |             "deleting": false
232 |           }
233 |         },
234 |         "execution": {
235 |           "iopub.status.busy": "2022-04-07T00:39:58.118Z",
236 |           "iopub.execute_input": "2022-04-07T00:39:58.122Z",
237 |           "shell.execute_reply": "2022-04-07T00:39:58.523Z",
238 |           "iopub.status.idle": "2022-04-07T00:39:58.515Z"
239 |         }
240 |       }
241 |     },
242 |     {
243 |       "cell_type": "code",
244 |       "source": [
245 |         "kmeans = KMeans(n_clusters = 5).fit(X)\n",
246 |         "centroids = kmeans.cluster_centers_\n",
247 |         "pl.scatter(X[:,0],X[:,1], c = kmeans.labels_, cmap =\"rainbow\")\n",
248 |         "pl.show()"
249 |       ],
250 |       "outputs": [],
251 |       "execution_count": null,
252 |       "metadata": {
253 |         "collapsed": true,
254 |         "jupyter": {
255 |           "source_hidden": false,
256 |           "outputs_hidden": false
257 |         },
258 |         "nteract": {
259 |           "transient": {
260 |             "deleting": false
261 |           }
262 |         },
263 |         "execution": {
264 |           "iopub.status.busy": "2022-04-07T00:40:02.008Z",
265 |           "iopub.execute_input": "2022-04-07T00:40:02.013Z",
266 |           "shell.execute_reply": "2022-04-07T00:40:02.101Z",
267 |           "iopub.status.idle": "2022-04-07T00:40:02.108Z"
268 |         }
269 |       }
270 |     },
271 |     {
272 |       "cell_type": "code",
273 |       "source": [
274 |         "X = ret_var.values\n",
275 |         "kmeans =KMeans(n_clusters = 5).fit(X)\n",
276 |         "centroids = kmeans.cluster_centers_\n",
277 |         "pl.scatter(X[:,0],X[:,1], c = kmeans.labels_, cmap =\"rainbow\")\n",
278 |         "pl.show()"
279 |       ],
280 |       "outputs": [],
281 |       "execution_count": null,
282 |       "metadata": {
283 |         "collapsed": true,
284 |         "jupyter": {
285 |           "source_hidden": false,
286 |           "outputs_hidden": false
287 |         },
288 |         "nteract": {
289 |           "transient": {
290 |             "deleting": false
291 |           }
292 |         },
293 |         "execution": {
294 |           "iopub.status.busy": "2022-04-07T00:40:12.616Z",
295 |           "iopub.execute_input": "2022-04-07T00:40:12.620Z",
296 |           "shell.execute_reply": "2022-04-07T00:40:12.703Z",
297 |           "iopub.status.idle": "2022-04-07T00:40:12.709Z"
298 |         }
299 |       }
300 |     },
301 |     {
302 |       "cell_type": "code",
303 |       "source": [
304 |         "Companies = pd.DataFrame(ret_var.index)\n",
305 |         "cluster_labels = pd.DataFrame(kmeans.labels_)\n",
306 |         "df = pd.concat([Companies, cluster_labels],axis = 1)\n",
307 |         "df.columns = ['Stock', 'Cluster Labels']\n",
308 |         "df.set_index('Stock')"
309 |       ],
310 |       "outputs": [],
311 |       "execution_count": null,
312 |       "metadata": {
313 |         "collapsed": true,
314 |         "jupyter": {
315 |           "source_hidden": false,
316 |           "outputs_hidden": false
317 |         },
318 |         "nteract": {
319 |           "transient": {
320 |             "deleting": false
321 |           }
322 |         },
323 |         "execution": {
324 |           "iopub.status.busy": "2022-04-07T00:41:01.920Z",
325 |           "iopub.execute_input": "2022-04-07T00:41:01.925Z",
326 |           "iopub.status.idle": "2022-04-07T00:41:01.936Z",
327 |           "shell.execute_reply": "2022-04-07T00:41:01.950Z"
328 |         }
329 |       }
330 |     },
331 |     {
332 |       "cell_type": "code",
333 |       "source": [
334 |         "df"
335 |       ],
336 |       "outputs": [],
337 |       "execution_count": null,
338 |       "metadata": {
339 |         "collapsed": true,
340 |         "jupyter": {
341 |           "source_hidden": false,
342 |           "outputs_hidden": false
343 |         },
344 |         "nteract": {
345 |           "transient": {
346 |             "deleting": false
347 |           }
348 |         },
349 |         "execution": {
350 |           "iopub.status.busy": "2022-04-07T00:41:12.197Z",
351 |           "iopub.execute_input": "2022-04-07T00:41:12.202Z",
352 |           "iopub.status.idle": "2022-04-07T00:41:12.212Z",
353 |           "shell.execute_reply": "2022-04-07T00:41:12.226Z"
354 |         }
355 |       }
356 |     }
357 |   ],
358 |   "metadata": {
359 |     "kernel_info": {
360 |       "name": "python3"
361 |     },
362 |     "language_info": {
363 |       "name": "python",
364 |       "version": "3.6.13",
365 |       "mimetype": "text/x-python",
366 |       "codemirror_mode": {
367 |         "name": "ipython",
368 |         "version": 3
369 |       },
370 |       "pygments_lexer": "ipython3",
371 |       "nbconvert_exporter": "python",
372 |       "file_extension": ".py"
373 |     },
374 |     "kernelspec": {
375 |       "argv": [
376 |         "C:/Users/Tin Hang/Anaconda3\\python.exe",
377 |         "-m",
378 |         "ipykernel_launcher",
379 |         "-f",
380 |         "{connection_file}"
381 |       ],
382 |       "display_name": "Python 3",
383 |       "language": "python",
384 |       "name": "python3"
385 |     },
386 |     "nteract": {
387 |       "version": "0.28.0"
388 |     }
389 |   },
390 |   "nbformat": 4,
391 |   "nbformat_minor": 0
392 | }


--------------------------------------------------------------------------------
/Stock_Algorithms/Multiple_Linear_Regression_with_Normalize_Data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "source": [
  6 |         "# Multiple Linear Regression with Normalize Data"
  7 |       ],
  8 |       "metadata": {}
  9 |     },
 10 |     {
 11 |       "cell_type": "code",
 12 |       "source": [
 13 |         "# Importing the libraries\n",
 14 |         "import pandas as pd\n",
 15 |         "import numpy as np\n",
 16 |         "import matplotlib.pyplot as plt\n",
 17 |         "\n",
 18 |         "import warnings\n",
 19 |         "warnings.filterwarnings(\"ignore\")\n",
 20 |         "\n",
 21 |         "# fix_yahoo_finance is used to fetch data \n",
 22 |         "import fix_yahoo_finance as yf\n",
 23 |         "yf.pdr_override()"
 24 |       ],
 25 |       "outputs": [],
 26 |       "execution_count": 1,
 27 |       "metadata": {
 28 |         "collapsed": false,
 29 |         "outputHidden": false,
 30 |         "inputHidden": false
 31 |       }
 32 |     },
 33 |     {
 34 |       "cell_type": "code",
 35 |       "source": [
 36 |         "# input\n",
 37 |         "symbol = 'AMD'\n",
 38 |         "start = '2014-01-01'\n",
 39 |         "end = '2018-08-27'\n",
 40 |         "\n",
 41 |         "# Read data \n",
 42 |         "dataset = yf.download(symbol,start,end)\n",
 43 |         "\n",
 44 |         "# View columns \n",
 45 |         "dataset.head()"
 46 |       ],
 47 |       "outputs": [
 48 |         {
 49 |           "output_type": "stream",
 50 |           "name": "stdout",
 51 |           "text": [
 52 |             "[*********************100%***********************]  1 of 1 downloaded\n"
 53 |           ]
 54 |         },
 55 |         {
 56 |           "output_type": "execute_result",
 57 |           "execution_count": 2,
 58 |           "data": {
 59 |             "text/plain": [
 60 |               "            Open  High   Low  Close  Adj Close    Volume\n",
 61 |               "Date                                                    \n",
 62 |               "2014-01-02  3.85  3.98  3.84   3.95       3.95  20548400\n",
 63 |               "2014-01-03  3.98  4.00  3.88   4.00       4.00  22887200\n",
 64 |               "2014-01-06  4.01  4.18  3.99   4.13       4.13  42398300\n",
 65 |               "2014-01-07  4.19  4.25  4.11   4.18       4.18  42932100\n",
 66 |               "2014-01-08  4.23  4.26  4.14   4.18       4.18  30678700"
 67 |             ],
 68 |             "text/html": [
 69 |               "<div>\n",
 70 |               "<style scoped>\n",
 71 |               "    .dataframe tbody tr th:only-of-type {\n",
 72 |               "        vertical-align: middle;\n",
 73 |               "    }\n",
 74 |               "\n",
 75 |               "    .dataframe tbody tr th {\n",
 76 |               "        vertical-align: top;\n",
 77 |               "    }\n",
 78 |               "\n",
 79 |               "    .dataframe thead th {\n",
 80 |               "        text-align: right;\n",
 81 |               "    }\n",
 82 |               "</style>\n",
 83 |               "<table border=\"1\" class=\"dataframe\">\n",
 84 |               "  <thead>\n",
 85 |               "    <tr style=\"text-align: right;\">\n",
 86 |               "      <th></th>\n",
 87 |               "      <th>Open</th>\n",
 88 |               "      <th>High</th>\n",
 89 |               "      <th>Low</th>\n",
 90 |               "      <th>Close</th>\n",
 91 |               "      <th>Adj Close</th>\n",
 92 |               "      <th>Volume</th>\n",
 93 |               "    </tr>\n",
 94 |               "    <tr>\n",
 95 |               "      <th>Date</th>\n",
 96 |               "      <th></th>\n",
 97 |               "      <th></th>\n",
 98 |               "      <th></th>\n",
 99 |               "      <th></th>\n",
100 |               "      <th></th>\n",
101 |               "      <th></th>\n",
102 |               "    </tr>\n",
103 |               "  </thead>\n",
104 |               "  <tbody>\n",
105 |               "    <tr>\n",
106 |               "      <th>2014-01-02</th>\n",
107 |               "      <td>3.85</td>\n",
108 |               "      <td>3.98</td>\n",
109 |               "      <td>3.84</td>\n",
110 |               "      <td>3.95</td>\n",
111 |               "      <td>3.95</td>\n",
112 |               "      <td>20548400</td>\n",
113 |               "    </tr>\n",
114 |               "    <tr>\n",
115 |               "      <th>2014-01-03</th>\n",
116 |               "      <td>3.98</td>\n",
117 |               "      <td>4.00</td>\n",
118 |               "      <td>3.88</td>\n",
119 |               "      <td>4.00</td>\n",
120 |               "      <td>4.00</td>\n",
121 |               "      <td>22887200</td>\n",
122 |               "    </tr>\n",
123 |               "    <tr>\n",
124 |               "      <th>2014-01-06</th>\n",
125 |               "      <td>4.01</td>\n",
126 |               "      <td>4.18</td>\n",
127 |               "      <td>3.99</td>\n",
128 |               "      <td>4.13</td>\n",
129 |               "      <td>4.13</td>\n",
130 |               "      <td>42398300</td>\n",
131 |               "    </tr>\n",
132 |               "    <tr>\n",
133 |               "      <th>2014-01-07</th>\n",
134 |               "      <td>4.19</td>\n",
135 |               "      <td>4.25</td>\n",
136 |               "      <td>4.11</td>\n",
137 |               "      <td>4.18</td>\n",
138 |               "      <td>4.18</td>\n",
139 |               "      <td>42932100</td>\n",
140 |               "    </tr>\n",
141 |               "    <tr>\n",
142 |               "      <th>2014-01-08</th>\n",
143 |               "      <td>4.23</td>\n",
144 |               "      <td>4.26</td>\n",
145 |               "      <td>4.14</td>\n",
146 |               "      <td>4.18</td>\n",
147 |               "      <td>4.18</td>\n",
148 |               "      <td>30678700</td>\n",
149 |               "    </tr>\n",
150 |               "  </tbody>\n",
151 |               "</table>\n",
152 |               "</div>"
153 |             ]
154 |           },
155 |           "metadata": {}
156 |         }
157 |       ],
158 |       "execution_count": 2,
159 |       "metadata": {
160 |         "collapsed": false,
161 |         "outputHidden": false,
162 |         "inputHidden": false
163 |       }
164 |     },
165 |     {
166 |       "cell_type": "code",
167 |       "source": [
168 |         "X = dataset.iloc[ : , 0:4].values\n",
169 |         "Y = np.asanyarray(dataset[['Adj Close']])"
170 |       ],
171 |       "outputs": [],
172 |       "execution_count": 3,
173 |       "metadata": {
174 |         "collapsed": false,
175 |         "outputHidden": false,
176 |         "inputHidden": false
177 |       }
178 |     },
179 |     {
180 |       "cell_type": "code",
181 |       "source": [
182 |         "from sklearn import preprocessing\n",
183 |         "\n",
184 |         "# normalize the data attributes\n",
185 |         "normalized_X = preprocessing.normalize(X)"
186 |       ],
187 |       "outputs": [],
188 |       "execution_count": 4,
189 |       "metadata": {
190 |         "collapsed": false,
191 |         "outputHidden": false,
192 |         "inputHidden": false
193 |       }
194 |     },
195 |     {
196 |       "cell_type": "code",
197 |       "source": [
198 |         "X = normalized_X[: , 1:]"
199 |       ],
200 |       "outputs": [],
201 |       "execution_count": 5,
202 |       "metadata": {
203 |         "collapsed": false,
204 |         "outputHidden": false,
205 |         "inputHidden": false
206 |       }
207 |     },
208 |     {
209 |       "cell_type": "code",
210 |       "source": [
211 |         "# Splitting the dataset into the Training set and Test set\n",
212 |         "from sklearn.model_selection import train_test_split\n",
213 |         "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 0)"
214 |       ],
215 |       "outputs": [],
216 |       "execution_count": 6,
217 |       "metadata": {
218 |         "collapsed": false,
219 |         "outputHidden": false,
220 |         "inputHidden": false
221 |       }
222 |     },
223 |     {
224 |       "cell_type": "code",
225 |       "source": [
226 |         "from sklearn.linear_model import LinearRegression\n",
227 |         "regressor = LinearRegression()\n",
228 |         "regressor.fit(X_train, Y_train)"
229 |       ],
230 |       "outputs": [
231 |         {
232 |           "output_type": "execute_result",
233 |           "execution_count": 7,
234 |           "data": {
235 |             "text/plain": [
236 |               "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n",
237 |               "         normalize=False)"
238 |             ]
239 |           },
240 |           "metadata": {}
241 |         }
242 |       ],
243 |       "execution_count": 7,
244 |       "metadata": {
245 |         "collapsed": false,
246 |         "outputHidden": false,
247 |         "inputHidden": false
248 |       }
249 |     },
250 |     {
251 |       "cell_type": "code",
252 |       "source": [
253 |         "y_pred = regressor.predict(X_test)"
254 |       ],
255 |       "outputs": [],
256 |       "execution_count": 8,
257 |       "metadata": {
258 |         "collapsed": false,
259 |         "outputHidden": false,
260 |         "inputHidden": false
261 |       }
262 |     },
263 |     {
264 |       "cell_type": "code",
265 |       "source": [
266 |         "from sklearn.metrics import explained_variance_score, mean_absolute_error, mean_squared_error, r2_score\n",
267 |         "ex_var_score = explained_variance_score(Y_test, y_pred)\n",
268 |         "m_absolute_error = mean_absolute_error(Y_test, y_pred)\n",
269 |         "m_squared_error = mean_squared_error(Y_test, y_pred)\n",
270 |         "r_2_score = r2_score(Y_test, y_pred)\n",
271 |         "\n",
272 |         "print(\"Explained Variance Score: \"+str(ex_var_score))\n",
273 |         "print(\"Mean Absolute Error \"+str(m_absolute_error))\n",
274 |         "print(\"Mean Squared Error \"+str(m_squared_error))\n",
275 |         "print(\"R Squared Error \"+str(r_2_score))"
276 |       ],
277 |       "outputs": [
278 |         {
279 |           "output_type": "stream",
280 |           "name": "stdout",
281 |           "text": [
282 |             "Explained Variance Score: 0.0145762414645\n",
283 |             "Mean Absolute Error 4.3559157043\n",
284 |             "Mean Squared Error 22.546676437\n",
285 |             "R Squared Error 0.0145752513278\n"
286 |           ]
287 |         }
288 |       ],
289 |       "execution_count": 9,
290 |       "metadata": {
291 |         "collapsed": false,
292 |         "outputHidden": false,
293 |         "inputHidden": false
294 |       }
295 |     },
296 |     {
297 |       "cell_type": "code",
298 |       "source": [
299 |         "print ('Coefficients: ', regressor.coef_)\n",
300 |         "print(\"Residual sum of squares: %.2f\"\n",
301 |         "      % np.mean((y_pred - Y_test) ** 2))\n",
302 |         "\n",
303 |         "# Explained variance score: 1 is perfect prediction\n",
304 |         "print('Variance score: %.2f' % regressor.score(X_test, y_pred))"
305 |       ],
306 |       "outputs": [
307 |         {
308 |           "output_type": "stream",
309 |           "name": "stdout",
310 |           "text": [
311 |             "Coefficients:  [[-79.79361894 -53.18582378  15.74315198]]\n",
312 |             "Residual sum of squares: 22.55\n",
313 |             "Variance score: 1.00\n"
314 |           ]
315 |         }
316 |       ],
317 |       "execution_count": 10,
318 |       "metadata": {
319 |         "collapsed": false,
320 |         "outputHidden": false,
321 |         "inputHidden": false
322 |       }
323 |     },
324 |     {
325 |       "cell_type": "code",
326 |       "source": [
327 |         "print('Multiple Linear Score:', regressor.score(X_test, y_pred))"
328 |       ],
329 |       "outputs": [
330 |         {
331 |           "output_type": "stream",
332 |           "name": "stdout",
333 |           "text": [
334 |             "Multiple Linear Score: 0.0145752513278\n"
335 |           ]
336 |         }
337 |       ],
338 |       "execution_count": 12,
339 |       "metadata": {
340 |         "collapsed": false,
341 |         "outputHidden": false,
342 |         "inputHidden": false
343 |       }
344 |     }
345 |   ],
346 |   "metadata": {
347 |     "kernel_info": {
348 |       "name": "python3"
349 |     },
350 |     "language_info": {
351 |       "codemirror_mode": {
352 |         "name": "ipython",
353 |         "version": 3
354 |       },
355 |       "name": "python",
356 |       "nbconvert_exporter": "python",
357 |       "file_extension": ".py",
358 |       "pygments_lexer": "ipython3",
359 |       "version": "3.5.5",
360 |       "mimetype": "text/x-python"
361 |     },
362 |     "kernelspec": {
363 |       "name": "python3",
364 |       "language": "python",
365 |       "display_name": "Python 3"
366 |     },
367 |     "nteract": {
368 |       "version": "0.14.5"
369 |     }
370 |   },
371 |   "nbformat": 4,
372 |   "nbformat_minor": 0
373 | }


--------------------------------------------------------------------------------
/Stock_Algorithms/Principal_Component_Regression.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "source": [
  6 |         "# Principal Component Regression (PCR)"
  7 |       ],
  8 |       "metadata": {}
  9 |     },
 10 |     {
 11 |       "cell_type": "code",
 12 |       "source": [
 13 |         "import numpy as np\n",
 14 |         "import matplotlib.pyplot as plt\n",
 15 |         "import pandas as pd\n",
 16 |         "\n",
 17 |         "import warnings\n",
 18 |         "warnings.filterwarnings(\"ignore\")\n",
 19 |         "\n",
 20 |         "# fix_yahoo_finance is used to fetch data \n",
 21 |         "import fix_yahoo_finance as yf\n",
 22 |         "yf.pdr_override()"
 23 |       ],
 24 |       "outputs": [],
 25 |       "execution_count": 1,
 26 |       "metadata": {
 27 |         "collapsed": false,
 28 |         "outputHidden": false,
 29 |         "inputHidden": false
 30 |       }
 31 |     },
 32 |     {
 33 |       "cell_type": "code",
 34 |       "source": [
 35 |         "# input\n",
 36 |         "symbol = 'AMD'\n",
 37 |         "start = '2014-01-01'\n",
 38 |         "end = '2019-01-01'\n",
 39 |         "\n",
 40 |         "# Read data \n",
 41 |         "dataset = yf.download(symbol,start,end)\n",
 42 |         "\n",
 43 |         "# View Columns\n",
 44 |         "dataset.head()"
 45 |       ],
 46 |       "outputs": [
 47 |         {
 48 |           "output_type": "stream",
 49 |           "name": "stdout",
 50 |           "text": [
 51 |             "[*********************100%***********************]  1 of 1 downloaded\n"
 52 |           ]
 53 |         },
 54 |         {
 55 |           "output_type": "execute_result",
 56 |           "execution_count": 2,
 57 |           "data": {
 58 |             "text/html": [
 59 |               "<div>\n",
 60 |               "<style scoped>\n",
 61 |               "    .dataframe tbody tr th:only-of-type {\n",
 62 |               "        vertical-align: middle;\n",
 63 |               "    }\n",
 64 |               "\n",
 65 |               "    .dataframe tbody tr th {\n",
 66 |               "        vertical-align: top;\n",
 67 |               "    }\n",
 68 |               "\n",
 69 |               "    .dataframe thead th {\n",
 70 |               "        text-align: right;\n",
 71 |               "    }\n",
 72 |               "</style>\n",
 73 |               "<table border=\"1\" class=\"dataframe\">\n",
 74 |               "  <thead>\n",
 75 |               "    <tr style=\"text-align: right;\">\n",
 76 |               "      <th></th>\n",
 77 |               "      <th>Open</th>\n",
 78 |               "      <th>High</th>\n",
 79 |               "      <th>Low</th>\n",
 80 |               "      <th>Close</th>\n",
 81 |               "      <th>Adj Close</th>\n",
 82 |               "      <th>Volume</th>\n",
 83 |               "    </tr>\n",
 84 |               "    <tr>\n",
 85 |               "      <th>Date</th>\n",
 86 |               "      <th></th>\n",
 87 |               "      <th></th>\n",
 88 |               "      <th></th>\n",
 89 |               "      <th></th>\n",
 90 |               "      <th></th>\n",
 91 |               "      <th></th>\n",
 92 |               "    </tr>\n",
 93 |               "  </thead>\n",
 94 |               "  <tbody>\n",
 95 |               "    <tr>\n",
 96 |               "      <th>2014-01-02</th>\n",
 97 |               "      <td>3.85</td>\n",
 98 |               "      <td>3.98</td>\n",
 99 |               "      <td>3.84</td>\n",
100 |               "      <td>3.95</td>\n",
101 |               "      <td>3.95</td>\n",
102 |               "      <td>20548400</td>\n",
103 |               "    </tr>\n",
104 |               "    <tr>\n",
105 |               "      <th>2014-01-03</th>\n",
106 |               "      <td>3.98</td>\n",
107 |               "      <td>4.00</td>\n",
108 |               "      <td>3.88</td>\n",
109 |               "      <td>4.00</td>\n",
110 |               "      <td>4.00</td>\n",
111 |               "      <td>22887200</td>\n",
112 |               "    </tr>\n",
113 |               "    <tr>\n",
114 |               "      <th>2014-01-06</th>\n",
115 |               "      <td>4.01</td>\n",
116 |               "      <td>4.18</td>\n",
117 |               "      <td>3.99</td>\n",
118 |               "      <td>4.13</td>\n",
119 |               "      <td>4.13</td>\n",
120 |               "      <td>42398300</td>\n",
121 |               "    </tr>\n",
122 |               "    <tr>\n",
123 |               "      <th>2014-01-07</th>\n",
124 |               "      <td>4.19</td>\n",
125 |               "      <td>4.25</td>\n",
126 |               "      <td>4.11</td>\n",
127 |               "      <td>4.18</td>\n",
128 |               "      <td>4.18</td>\n",
129 |               "      <td>42932100</td>\n",
130 |               "    </tr>\n",
131 |               "    <tr>\n",
132 |               "      <th>2014-01-08</th>\n",
133 |               "      <td>4.23</td>\n",
134 |               "      <td>4.26</td>\n",
135 |               "      <td>4.14</td>\n",
136 |               "      <td>4.18</td>\n",
137 |               "      <td>4.18</td>\n",
138 |               "      <td>30678700</td>\n",
139 |               "    </tr>\n",
140 |               "  </tbody>\n",
141 |               "</table>\n",
142 |               "</div>"
143 |             ],
144 |             "text/plain": [
145 |               "            Open  High   Low  Close  Adj Close    Volume\n",
146 |               "Date                                                    \n",
147 |               "2014-01-02  3.85  3.98  3.84   3.95       3.95  20548400\n",
148 |               "2014-01-03  3.98  4.00  3.88   4.00       4.00  22887200\n",
149 |               "2014-01-06  4.01  4.18  3.99   4.13       4.13  42398300\n",
150 |               "2014-01-07  4.19  4.25  4.11   4.18       4.18  42932100\n",
151 |               "2014-01-08  4.23  4.26  4.14   4.18       4.18  30678700"
152 |             ]
153 |           },
154 |           "metadata": {}
155 |         }
156 |       ],
157 |       "execution_count": 2,
158 |       "metadata": {
159 |         "collapsed": false,
160 |         "outputHidden": false,
161 |         "inputHidden": false
162 |       }
163 |     },
164 |     {
165 |       "cell_type": "code",
166 |       "source": [
167 |         "X = dataset.iloc[ : , 1].values\n",
168 |         "Y = dataset.iloc[ : , 4].values"
169 |       ],
170 |       "outputs": [],
171 |       "execution_count": 3,
172 |       "metadata": {
173 |         "collapsed": false,
174 |         "outputHidden": false,
175 |         "inputHidden": false
176 |       }
177 |     },
178 |     {
179 |       "cell_type": "code",
180 |       "source": [
181 |         "print(X.shape)\n",
182 |         "print(Y.shape)"
183 |       ],
184 |       "outputs": [
185 |         {
186 |           "output_type": "stream",
187 |           "name": "stdout",
188 |           "text": [
189 |             "(1258,)\n",
190 |             "(1258,)\n"
191 |           ]
192 |         }
193 |       ],
194 |       "execution_count": 4,
195 |       "metadata": {
196 |         "collapsed": false,
197 |         "outputHidden": false,
198 |         "inputHidden": false
199 |       }
200 |     },
201 |     {
202 |       "cell_type": "code",
203 |       "source": [
204 |         "X = np.array(X).reshape(1258,-1)\n",
205 |         "Y = np.array(Y).reshape(1258,-1)"
206 |       ],
207 |       "outputs": [],
208 |       "execution_count": 5,
209 |       "metadata": {
210 |         "collapsed": false,
211 |         "outputHidden": false,
212 |         "inputHidden": false
213 |       }
214 |     },
215 |     {
216 |       "cell_type": "code",
217 |       "source": [
218 |         "from sklearn.decomposition import PCA"
219 |       ],
220 |       "outputs": [],
221 |       "execution_count": 6,
222 |       "metadata": {
223 |         "collapsed": false,
224 |         "outputHidden": false,
225 |         "inputHidden": false
226 |       }
227 |     },
228 |     {
229 |       "cell_type": "code",
230 |       "source": [
231 |         "pca = PCA(n_components=1)\n",
232 |         "pca.fit(X)"
233 |       ],
234 |       "outputs": [
235 |         {
236 |           "output_type": "execute_result",
237 |           "execution_count": 7,
238 |           "data": {
239 |             "text/plain": [
240 |               "PCA(copy=True, iterated_power='auto', n_components=1, random_state=None,\n",
241 |               "  svd_solver='auto', tol=0.0, whiten=False)"
242 |             ]
243 |           },
244 |           "metadata": {}
245 |         }
246 |       ],
247 |       "execution_count": 7,
248 |       "metadata": {
249 |         "collapsed": false,
250 |         "outputHidden": false,
251 |         "inputHidden": false
252 |       }
253 |     },
254 |     {
255 |       "cell_type": "code",
256 |       "source": [
257 |         "print(pca.explained_variance_ratio_)"
258 |       ],
259 |       "outputs": [
260 |         {
261 |           "output_type": "stream",
262 |           "name": "stdout",
263 |           "text": [
264 |             "[ 1.]\n"
265 |           ]
266 |         }
267 |       ],
268 |       "execution_count": 8,
269 |       "metadata": {
270 |         "collapsed": false,
271 |         "outputHidden": false,
272 |         "inputHidden": false
273 |       }
274 |     },
275 |     {
276 |       "cell_type": "code",
277 |       "source": [
278 |         "print(pca.singular_values_)"
279 |       ],
280 |       "outputs": [
281 |         {
282 |           "output_type": "stream",
283 |           "name": "stdout",
284 |           "text": [
285 |             "[ 236.05044323]\n"
286 |           ]
287 |         }
288 |       ],
289 |       "execution_count": 9,
290 |       "metadata": {
291 |         "collapsed": false,
292 |         "outputHidden": false,
293 |         "inputHidden": false
294 |       }
295 |     },
296 |     {
297 |       "cell_type": "code",
298 |       "source": [
299 |         "pca = PCA(n_components=1, svd_solver='full')\n",
300 |         "pca.fit(X)"
301 |       ],
302 |       "outputs": [
303 |         {
304 |           "output_type": "execute_result",
305 |           "execution_count": 10,
306 |           "data": {
307 |             "text/plain": [
308 |               "PCA(copy=True, iterated_power='auto', n_components=1, random_state=None,\n",
309 |               "  svd_solver='full', tol=0.0, whiten=False)"
310 |             ]
311 |           },
312 |           "metadata": {}
313 |         }
314 |       ],
315 |       "execution_count": 10,
316 |       "metadata": {
317 |         "collapsed": false,
318 |         "outputHidden": false,
319 |         "inputHidden": false
320 |       }
321 |     },
322 |     {
323 |       "cell_type": "code",
324 |       "source": [
325 |         "print(pca.explained_variance_ratio_)"
326 |       ],
327 |       "outputs": [
328 |         {
329 |           "output_type": "stream",
330 |           "name": "stdout",
331 |           "text": [
332 |             "[ 1.]\n"
333 |           ]
334 |         }
335 |       ],
336 |       "execution_count": 11,
337 |       "metadata": {
338 |         "collapsed": false,
339 |         "outputHidden": false,
340 |         "inputHidden": false
341 |       }
342 |     },
343 |     {
344 |       "cell_type": "code",
345 |       "source": [
346 |         "print(pca.singular_values_)"
347 |       ],
348 |       "outputs": [
349 |         {
350 |           "output_type": "stream",
351 |           "name": "stdout",
352 |           "text": [
353 |             "[ 236.05044323]\n"
354 |           ]
355 |         }
356 |       ],
357 |       "execution_count": 12,
358 |       "metadata": {
359 |         "collapsed": false,
360 |         "outputHidden": false,
361 |         "inputHidden": false
362 |       }
363 |     },
364 |     {
365 |       "cell_type": "code",
366 |       "source": [
367 |         "pca.score(X, y=None)"
368 |       ],
369 |       "outputs": [
370 |         {
371 |           "output_type": "execute_result",
372 |           "execution_count": 13,
373 |           "data": {
374 |             "text/plain": [
375 |               "-3.3143449973330785"
376 |             ]
377 |           },
378 |           "metadata": {}
379 |         }
380 |       ],
381 |       "execution_count": 13,
382 |       "metadata": {
383 |         "collapsed": false,
384 |         "outputHidden": false,
385 |         "inputHidden": false
386 |       }
387 |     }
388 |   ],
389 |   "metadata": {
390 |     "kernel_info": {
391 |       "name": "python3"
392 |     },
393 |     "language_info": {
394 |       "name": "python",
395 |       "codemirror_mode": {
396 |         "name": "ipython",
397 |         "version": 3
398 |       },
399 |       "version": "3.5.5",
400 |       "file_extension": ".py",
401 |       "nbconvert_exporter": "python",
402 |       "mimetype": "text/x-python",
403 |       "pygments_lexer": "ipython3"
404 |     },
405 |     "kernelspec": {
406 |       "name": "python3",
407 |       "language": "python",
408 |       "display_name": "Python 3"
409 |     },
410 |     "nteract": {
411 |       "version": "0.15.0"
412 |     }
413 |   },
414 |   "nbformat": 4,
415 |   "nbformat_minor": 4
416 | }


--------------------------------------------------------------------------------
/Stock_Algorithms/Quasi_Poisson_Regression.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "source": [
  6 |         "# Quasi-Poisson Regression"
  7 |       ],
  8 |       "metadata": {}
  9 |     },
 10 |     {
 11 |       "cell_type": "code",
 12 |       "source": [
 13 |         "import numpy as np\n",
 14 |         "import matplotlib.pyplot as plt\n",
 15 |         "import pandas as pd\n",
 16 |         "\n",
 17 |         "import warnings\n",
 18 |         "warnings.filterwarnings(\"ignore\")\n",
 19 |         "\n",
 20 |         "# yahoo finance is used to fetch data \n",
 21 |         "import yfinance as yf\n",
 22 |         "yf.pdr_override()"
 23 |       ],
 24 |       "outputs": [],
 25 |       "execution_count": 1,
 26 |       "metadata": {
 27 |         "collapsed": false,
 28 |         "outputHidden": false,
 29 |         "inputHidden": false,
 30 |         "execution": {
 31 |           "iopub.status.busy": "2021-04-24T04:48:24.336Z",
 32 |           "iopub.execute_input": "2021-04-24T04:48:24.341Z",
 33 |           "iopub.status.idle": "2021-04-24T04:48:25.077Z",
 34 |           "shell.execute_reply": "2021-04-24T04:48:25.095Z"
 35 |         }
 36 |       }
 37 |     },
 38 |     {
 39 |       "cell_type": "code",
 40 |       "source": [
 41 |         "# input\n",
 42 |         "symbol = 'AMD'\n",
 43 |         "start = '2014-01-01'\n",
 44 |         "end = '2018-08-27'\n",
 45 |         "\n",
 46 |         "# Read data \n",
 47 |         "dataset = yf.download(symbol,start,end)\n",
 48 |         "\n",
 49 |         "# View Columns\n",
 50 |         "dataset.head()"
 51 |       ],
 52 |       "outputs": [
 53 |         {
 54 |           "output_type": "stream",
 55 |           "name": "stdout",
 56 |           "text": [
 57 |             "[*********************100%***********************]  1 of 1 completed\n"
 58 |           ]
 59 |         },
 60 |         {
 61 |           "output_type": "execute_result",
 62 |           "execution_count": 2,
 63 |           "data": {
 64 |             "text/plain": "            Open  High   Low  Close  Adj Close    Volume\nDate                                                    \n2014-01-02  3.85  3.98  3.84   3.95       3.95  20548400\n2014-01-03  3.98  4.00  3.88   4.00       4.00  22887200\n2014-01-06  4.01  4.18  3.99   4.13       4.13  42398300\n2014-01-07  4.19  4.25  4.11   4.18       4.18  42932100\n2014-01-08  4.23  4.26  4.14   4.18       4.18  30678700",
 65 |             "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Open</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Close</th>\n      <th>Adj Close</th>\n      <th>Volume</th>\n    </tr>\n    <tr>\n      <th>Date</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2014-01-02</th>\n      <td>3.85</td>\n      <td>3.98</td>\n      <td>3.84</td>\n      <td>3.95</td>\n      <td>3.95</td>\n      <td>20548400</td>\n    </tr>\n    <tr>\n      <th>2014-01-03</th>\n      <td>3.98</td>\n      <td>4.00</td>\n      <td>3.88</td>\n      <td>4.00</td>\n      <td>4.00</td>\n      <td>22887200</td>\n    </tr>\n    <tr>\n      <th>2014-01-06</th>\n      <td>4.01</td>\n      <td>4.18</td>\n      <td>3.99</td>\n      <td>4.13</td>\n      <td>4.13</td>\n      <td>42398300</td>\n    </tr>\n    <tr>\n      <th>2014-01-07</th>\n      <td>4.19</td>\n      <td>4.25</td>\n      <td>4.11</td>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>42932100</td>\n    </tr>\n    <tr>\n      <th>2014-01-08</th>\n      <td>4.23</td>\n      <td>4.26</td>\n      <td>4.14</td>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>30678700</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
 66 |           },
 67 |           "metadata": {}
 68 |         }
 69 |       ],
 70 |       "execution_count": 2,
 71 |       "metadata": {
 72 |         "collapsed": false,
 73 |         "outputHidden": false,
 74 |         "inputHidden": false,
 75 |         "execution": {
 76 |           "iopub.status.busy": "2021-04-24T04:48:25.082Z",
 77 |           "iopub.execute_input": "2021-04-24T04:48:25.086Z",
 78 |           "iopub.status.idle": "2021-04-24T04:48:25.845Z",
 79 |           "shell.execute_reply": "2021-04-24T04:48:25.905Z"
 80 |         }
 81 |       }
 82 |     },
 83 |     {
 84 |       "cell_type": "code",
 85 |       "source": [
 86 |         "dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)\n",
 87 |         "dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)\n",
 88 |         "dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)\n",
 89 |         "dataset['Returns'] = dataset['Adj Close'].pct_change()\n",
 90 |         "dataset = dataset.dropna()"
 91 |       ],
 92 |       "outputs": [],
 93 |       "execution_count": 3,
 94 |       "metadata": {
 95 |         "collapsed": true,
 96 |         "jupyter": {
 97 |           "source_hidden": false,
 98 |           "outputs_hidden": false
 99 |         },
100 |         "nteract": {
101 |           "transient": {
102 |             "deleting": false
103 |           }
104 |         },
105 |         "execution": {
106 |           "iopub.status.busy": "2021-04-24T04:48:25.851Z",
107 |           "iopub.execute_input": "2021-04-24T04:48:25.854Z",
108 |           "shell.execute_reply": "2021-04-24T04:48:25.909Z",
109 |           "iopub.status.idle": "2021-04-24T04:48:25.919Z"
110 |         }
111 |       }
112 |     },
113 |     {
114 |       "cell_type": "code",
115 |       "source": [
116 |         "from statsmodels.genmod.generalized_estimating_equations import GEE\n",
117 |         "from statsmodels.genmod.cov_struct import (Exchangeable,\n",
118 |         "    Independence,Autoregressive)\n",
119 |         "from statsmodels.genmod.families import Poisson"
120 |       ],
121 |       "outputs": [],
122 |       "execution_count": 5,
123 |       "metadata": {
124 |         "collapsed": true,
125 |         "jupyter": {
126 |           "source_hidden": false,
127 |           "outputs_hidden": false
128 |         },
129 |         "nteract": {
130 |           "transient": {
131 |             "deleting": false
132 |           }
133 |         },
134 |         "execution": {
135 |           "iopub.status.busy": "2021-04-24T04:48:25.942Z",
136 |           "iopub.execute_input": "2021-04-24T04:48:25.945Z",
137 |           "iopub.status.idle": "2021-04-24T04:48:26.382Z",
138 |           "shell.execute_reply": "2021-04-24T04:48:26.394Z"
139 |         }
140 |       }
141 |     },
142 |     {
143 |       "cell_type": "code",
144 |       "source": [
145 |         "fam = Poisson()\n",
146 |         "ind = Independence()\n",
147 |         "model1 = GEE.from_formula(\"Increase_Decrease ~ Returns + Buy_Sell_on_Open + Open\", 'Buy_Sell', dataset, cov_struct=ind, family=fam)\n",
148 |         "result1 = model1.fit()\n",
149 |         "print(result1.summary())"
150 |       ],
151 |       "outputs": [
152 |         {
153 |           "output_type": "stream",
154 |           "name": "stdout",
155 |           "text": [
156 |             "                               GEE Regression Results                              \n",
157 |             "===================================================================================\n",
158 |             "Dep. Variable:           Increase_Decrease   No. Observations:                 1170\n",
159 |             "Model:                                 GEE   No. clusters:                        2\n",
160 |             "Method:                        Generalized   Min. cluster size:                 584\n",
161 |             "                      Estimating Equations   Max. cluster size:                 586\n",
162 |             "Family:                            Poisson   Mean cluster size:               585.0\n",
163 |             "Dependence structure:         Independence   Num. iterations:                     2\n",
164 |             "Date:                     Fri, 23 Apr 2021   Scale:                           1.000\n",
165 |             "Covariance type:                    robust   Time:                         21:51:04\n",
166 |             "====================================================================================\n",
167 |             "                       coef    std err          z      P>|z|      [0.025      0.975]\n",
168 |             "------------------------------------------------------------------------------------\n",
169 |             "Intercept           -0.7826      0.017    -45.953      0.000      -0.816      -0.749\n",
170 |             "Returns              0.9742      1.267      0.769      0.442      -1.508       3.457\n",
171 |             "Buy_Sell_on_Open    -0.0671      0.172     -0.390      0.696      -0.404       0.270\n",
172 |             "Open                 0.0036      0.003      1.180      0.238      -0.002       0.010\n",
173 |             "==============================================================================\n",
174 |             "Skew:                          0.1802   Kurtosis:                      -1.9614\n",
175 |             "Centered skew:                 0.1789   Centered kurtosis:             -1.9459\n",
176 |             "==============================================================================\n"
177 |           ]
178 |         }
179 |       ],
180 |       "execution_count": 11,
181 |       "metadata": {
182 |         "collapsed": true,
183 |         "jupyter": {
184 |           "source_hidden": false,
185 |           "outputs_hidden": false
186 |         },
187 |         "nteract": {
188 |           "transient": {
189 |             "deleting": false
190 |           }
191 |         },
192 |         "execution": {
193 |           "iopub.status.busy": "2021-04-24T04:51:04.044Z",
194 |           "iopub.execute_input": "2021-04-24T04:51:04.048Z",
195 |           "iopub.status.idle": "2021-04-24T04:51:04.105Z",
196 |           "shell.execute_reply": "2021-04-24T04:51:04.108Z"
197 |         }
198 |       }
199 |     }
200 |   ],
201 |   "metadata": {
202 |     "kernel_info": {
203 |       "name": "python3"
204 |     },
205 |     "language_info": {
206 |       "name": "python",
207 |       "version": "3.6.12",
208 |       "mimetype": "text/x-python",
209 |       "codemirror_mode": {
210 |         "name": "ipython",
211 |         "version": 3
212 |       },
213 |       "pygments_lexer": "ipython3",
214 |       "nbconvert_exporter": "python",
215 |       "file_extension": ".py"
216 |     },
217 |     "kernelspec": {
218 |       "name": "python3",
219 |       "language": "python",
220 |       "display_name": "Python 3"
221 |     },
222 |     "nteract": {
223 |       "version": "0.28.0"
224 |     }
225 |   },
226 |   "nbformat": 4,
227 |   "nbformat_minor": 4
228 | }


--------------------------------------------------------------------------------
/Stock_Algorithms/README.md:
--------------------------------------------------------------------------------
  1 | <img src="Algorithms.PNG">
  2 | 
  3 | ### Description:  
  4 | #### Machine learning algorithms are programs that uses math and logic to adjust themselves to perform better as they are exposed to more data. The “learning” part of machine learning means that those programs change how they process data over time, much as humans change how they process data by learning.  
  5 | 
  6 | ### List of Commonly Used Algorithms:  
  7 | #### Linear Regression: A simple yet powerful algorithm that models the relationship between a dependent variable and one or more independent variables by fitting a linear equation to the data.  
  8 | #### Logistic Regression: Widely used for binary classification problems, logistic regression estimates the probability of an event occurring based on a set of input features.  
  9 | #### Decision Trees: These algorithms use a hierarchical structure of nodes and branches to make decisions by splitting the data based on different features. They are interpretable and often form the building blocks of ensemble methods.  
 10 | #### Random Forest: An ensemble learning method that combines multiple decision trees to make predictions. It improves upon decision trees by reducing overfitting and increasing accuracy.   
 11 | #### Support Vector Machines (SVM): SVMs aim to find the optimal hyperplane that separates data points into different classes. They work well for both linearly separable and non-linearly separable data.   
 12 | #### K-Nearest Neighbors (KNN): KNN is a non-parametric algorithm that classifies data based on the majority vote of its nearest neighbors in the feature space.  
 13 | #### Naive Bayes: A probabilistic algorithm that uses Bayes' theorem to make predictions. Despite its simplicity, it performs remarkably well in text classification and spam filtering.
 14 | 
 15 | ### Regression in Machine Learning:  
 16 | #### Regression is a key concept in machine learning used for predicting continuous or numerical values. It analyzes the relationship between independent variables (features) and a dependent variable (target) to build a predictive model.  
 17 | ### Common regression techniques include:  
 18 | #### Linear Regression: The most basic form of regression, it fits a linear equation to the data by minimizing the sum of squared residuals.  
 19 | #### Polynomial Regression: Extends linear regression by introducing polynomial terms to capture non-linear relationships between variables.   
 20 | #### Ridge Regression: Adds a regularization term to linear regression to prevent overfitting by penalizing large coefficient values.   
 21 | #### Lasso Regression: Similar to ridge regression, but it uses the absolute value of the coefficients instead of squared values, leading to sparse solutions.    
 22 | #### Support Vector Regression (SVR): An extension of SVMs for regression tasks. SVR finds a hyperplane that maximizes the margin of support vectors while minimizing the error on the training data.
 23 | 
 24 | # :large_blue_diamond: List of Algorithms :large_blue_diamond:  
 25 | :arrow_forward: AdaBoost Classification  
 26 | :arrow_forward: AdaBoost Regressor   
 27 | :arrow_forward: Anomaly Detection  
 28 | :arrow_forward: Apriori Algorithm  
 29 | :arrow_forward: Artificial Neural Network   
 30 | :arrow_forward: Bagging Classifier   
 31 | :arrow_forward: Bayesian Ridge Regression  
 32 | :arrow_forward: Bernoulli Restricted Boltzmann Machine  
 33 | :arrow_forward: CatBoost Algorithms    
 34 | :arrow_forward: Classification and Clustering  
 35 | :arrow_forward: Clustering Algorithms  
 36 | :arrow_forward: CART (Classification and Regression Trees)     
 37 | :arrow_forward: Decision Tree Classification   
 38 | :arrow_forward: Decision Tree Regression  
 39 | :arrow_forward: Dimensionality Reduction Algorithms  
 40 | :arrow_forward: Ensemble Learning Algorithms  
 41 | :arrow_forward: Explanatory Algorithms  
 42 | :arrow_forward: Gradient Boosting Classification  
 43 | :arrow_forward: Generative Adversarial Networks (GANs)    
 44 | :arrow_forward: K-Means Clustering Algorithm   
 45 | :arrow_forward: K-Nearest Neighbors Algorithm  
 46 | :arrow_forward: Logistic Regression    
 47 | :arrow_forward: Linear Regression    
 48 | :arrow_forward: NetworkX  
 49 | :arrow_forward: Neural Networks Regression  
 50 | :arrow_forward: Quantile Regression  
 51 | :arrow_forward: Partial Least Squares Regression (PLSR)  
 52 | :arrow_forward: Polynomial Regression    
 53 | :arrow_forward: Principal Component Classification  
 54 | :arrow_forward: Principal Component Regression  
 55 | :arrow_forward: Random Forest Classification  
 56 | :arrow_forward: Random Forest Regression   
 57 | :arrow_forward: RNN Tensorflow  
 58 | :arrow_forward: Ridge Regression  
 59 | :arrow_forward: Similarity Algorithms  
 60 | :arrow_forward: Support Vector Machines (SVM)  
 61 | :arrow_forward: Tensorflow  
 62 | :arrow_forward: Time Series  
 63 | :arrow_forward: XGBoost  
 64 | 
 65 | ###  AdaBoost
 66 | AdaBoost is short for Adaptive Boosting and is a statistical classification meta-algorithm created by Yoav Freund and Robert Schapire in 1995. The meta-estimator begins by fitting a classifier on the original dataset and then fits additional copies of the classifier on the same dataset. However, the weights of incorrectly classified instances are adjusted such that subsequent classifiers focus more on difficult cases.      
 67 | 
 68 | ### Anomaly Detection 
 69 | Anomaly detection is identifying data points in data that don't fit the normal patterns.  It is used for identifying rare items, events or observations which deviate significantly from the majority of the data and do not conform to a well defined notion of normal behaviour. Each node or artificial neuron is connected to another and has an associated weight and threshold. If the output of any individual node is above the specified threshold value, that node is activated and sends data to the next layer of the network. Otherwise, no data is passed along to the next layer of the network.    
 70 | 
 71 | ### Artificial Neural Network  
 72 | Artificial neural networks (ANNs) consist of input, hidden, and output layers with connected neurons (nodes) to simulate the human brain.  
 73 | 
 74 | ### Bagging classifier  
 75 | The Bagging classifier is an ensemble meta-estimator that fits base classifiers each on random subsets of the original dataset and then aggregates their individual predictions (either by voting or by averaging) to form a final prediction.  
 76 | 
 77 | ### Bayesian Ridge Regression  
 78 | Bayesian Ridge Regression is similar to linear regression in which the statistical analysis is undertaken within the context of Bayesian inference. It allows a natural mechanism to survive insufficient data or poorly distributed data by formulating linear regression using probability distributors rather than point estimates.  
 79 | 
 80 | ### Bernoulli Restricted Boltzmann Machine   
 81 | Bernoulli Restricted Boltzmann Machine (RBM) is a generative stochastic artificial neural network that can learn a probability distribution over its set of inputs.  
 82 | 
 83 | ### Decision Tree  
 84 | The Decision Tree algorithm is a supervised machine learning technique and is used for both classification and regression.  Decision Tree uses multiple algorithms to decide to split a node into two or more sub-nodes. The creation of sub-nodes increases the homogeneity of resultant sub-nodes. However, the purity of the node increases with respect to the target variable.  
 85 | 
 86 | ### Gradient Boosting Algorithm    
 87 | Gradient Boosting is a machine learning technique used in regression and classification. Gradient boosting works on building simple or weak prediction models sequentially where each model tries to predict the error left over by the previous model, such as overfitting.  
 88 | 
 89 | ### K-Means Clustering Algorithm  
 90 | K-Means clustering is unsupervised machine learning algorithms and is used to solve complex machine learning problems.  
 91 | 
 92 | ### K-Nearest Neighbors Algorithm  
 93 | K-Nearest Neighbors (KNN or k-NN) is used for a non-parametric, supervised learning classifier, which uses proximity to make classifications or predictions about the grouping of an individual data point.  
 94 | 
 95 | ### Logistic Regression  
 96 | Logistic Regression is used for to estimate the probability of an event occurring, such as voting or didn't vote, based on a given dataset of independent variables. Since the outcome is a probability, the dependent variable is bounded between 0 and 1.  
 97 | 
 98 | ### Linear Regression   
 99 | Linear Regression is used to model the relationship between two variables by fitting a linear equation to observed data. One variable is considered to be an explanatory variable, and the other is considered to be a dependent variable.  
100 | 
101 | ### NetworkX  
102 | NetworkX provides classes for graphs which allow multiple edges between any pair of nodes. The MultiGraph and MultiDiGraph classes allow you to add the same edge twice, possibly with different edge data. NetworkX can be powerful for some applications; however, many algorithms are not well defined on such graphs.  
103 | 
104 | ### Neural Networks Regression  
105 | Neural Networks Regression is used for to learn the linear relationship between the features and target and therefore cannot learn the complex non-linear relationship. In order to learn the complex non-linear relationship between the features and target, we are in need of other techniques.  
106 | 
107 | ## Boosting Algorithms:  
108 | ### Gradient Boosting Machines (GBM):  
109 | Gradient boosting is a machine learning technique used in regression and classification tasks, among others. It gives a prediction model in the form of an ensemble of weak prediction models, which are typically decision trees.When a decision tree is the weak learner, the resulting algorithm is called gradient-boosted trees; it usually outperforms random forests.  A gradient-boosted trees model is built in a stage-wise fashion as in other boosting methods, but it generalizes the other methods by allowing optimization of an arbitrary differentiable loss function.  
110 | 
111 | ### XGBoost:  
112 | XGBoost is a scalable and highly accurate implementation of gradient boosting that pushes the limits of computing power for boosting three algorithms, being built largely to energize machine learning model performance and computational speed.  
113 | 
114 | ### LightGBM:  
115 | LightGBN is a gradient boosting framework that uses tree-based learning algorithms.  
116 | 
117 | ### Catboost:    
118 | CatBoost is an algorithm for gradient boosting on decision trees.  
119 | 
120 | ### Classification and Clustering  
121 | Classification examples are Logistic regression, Naive Bayes classifier, Support vector machines, and others relating to classification.  However, clustering are k-means clustering algorithm, Fuzzy c-means clustering algorithm, Gaussian (EM) clustering algorithm, and other algorithms relating to clustering.  
122 | 
123 | ## Authors  
124 | ### * Tin Hang  
125 | 
126 | ## 🔴 Warning: This is not financial advice. Do not use this for investing or trading purposes. It is for educational purposes only.  
127 | 


--------------------------------------------------------------------------------
/Stock_Algorithms/TruncatedSVD.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "source": [
  6 |         "# Dimensionality reduction using truncated SVD (aka LSA)"
  7 |       ],
  8 |       "metadata": {}
  9 |     },
 10 |     {
 11 |       "cell_type": "code",
 12 |       "source": [
 13 |         "import numpy as np\n",
 14 |         "import matplotlib.pyplot as plt\n",
 15 |         "import pandas as pd\n",
 16 |         "\n",
 17 |         "import warnings\n",
 18 |         "warnings.filterwarnings(\"ignore\")\n",
 19 |         "\n",
 20 |         "# fix_yahoo_finance is used to fetch data \n",
 21 |         "import fix_yahoo_finance as yf\n",
 22 |         "yf.pdr_override()"
 23 |       ],
 24 |       "outputs": [],
 25 |       "execution_count": 1,
 26 |       "metadata": {
 27 |         "collapsed": false,
 28 |         "outputHidden": false,
 29 |         "inputHidden": false
 30 |       }
 31 |     },
 32 |     {
 33 |       "cell_type": "code",
 34 |       "source": [
 35 |         "# input\n",
 36 |         "symbol = 'AMD'\n",
 37 |         "start = '2014-01-01'\n",
 38 |         "end = '2019-01-01'\n",
 39 |         "\n",
 40 |         "# Read data \n",
 41 |         "dataset = yf.download(symbol,start,end)\n",
 42 |         "\n",
 43 |         "# View Columns\n",
 44 |         "dataset.head()"
 45 |       ],
 46 |       "outputs": [
 47 |         {
 48 |           "output_type": "stream",
 49 |           "name": "stdout",
 50 |           "text": [
 51 |             "[*********************100%***********************]  1 of 1 downloaded\n"
 52 |           ]
 53 |         },
 54 |         {
 55 |           "output_type": "execute_result",
 56 |           "execution_count": 2,
 57 |           "data": {
 58 |             "text/html": [
 59 |               "<div>\n",
 60 |               "<style scoped>\n",
 61 |               "    .dataframe tbody tr th:only-of-type {\n",
 62 |               "        vertical-align: middle;\n",
 63 |               "    }\n",
 64 |               "\n",
 65 |               "    .dataframe tbody tr th {\n",
 66 |               "        vertical-align: top;\n",
 67 |               "    }\n",
 68 |               "\n",
 69 |               "    .dataframe thead th {\n",
 70 |               "        text-align: right;\n",
 71 |               "    }\n",
 72 |               "</style>\n",
 73 |               "<table border=\"1\" class=\"dataframe\">\n",
 74 |               "  <thead>\n",
 75 |               "    <tr style=\"text-align: right;\">\n",
 76 |               "      <th></th>\n",
 77 |               "      <th>Open</th>\n",
 78 |               "      <th>High</th>\n",
 79 |               "      <th>Low</th>\n",
 80 |               "      <th>Close</th>\n",
 81 |               "      <th>Adj Close</th>\n",
 82 |               "      <th>Volume</th>\n",
 83 |               "    </tr>\n",
 84 |               "    <tr>\n",
 85 |               "      <th>Date</th>\n",
 86 |               "      <th></th>\n",
 87 |               "      <th></th>\n",
 88 |               "      <th></th>\n",
 89 |               "      <th></th>\n",
 90 |               "      <th></th>\n",
 91 |               "      <th></th>\n",
 92 |               "    </tr>\n",
 93 |               "  </thead>\n",
 94 |               "  <tbody>\n",
 95 |               "    <tr>\n",
 96 |               "      <th>2014-01-02</th>\n",
 97 |               "      <td>3.85</td>\n",
 98 |               "      <td>3.98</td>\n",
 99 |               "      <td>3.84</td>\n",
100 |               "      <td>3.95</td>\n",
101 |               "      <td>3.95</td>\n",
102 |               "      <td>20548400</td>\n",
103 |               "    </tr>\n",
104 |               "    <tr>\n",
105 |               "      <th>2014-01-03</th>\n",
106 |               "      <td>3.98</td>\n",
107 |               "      <td>4.00</td>\n",
108 |               "      <td>3.88</td>\n",
109 |               "      <td>4.00</td>\n",
110 |               "      <td>4.00</td>\n",
111 |               "      <td>22887200</td>\n",
112 |               "    </tr>\n",
113 |               "    <tr>\n",
114 |               "      <th>2014-01-06</th>\n",
115 |               "      <td>4.01</td>\n",
116 |               "      <td>4.18</td>\n",
117 |               "      <td>3.99</td>\n",
118 |               "      <td>4.13</td>\n",
119 |               "      <td>4.13</td>\n",
120 |               "      <td>42398300</td>\n",
121 |               "    </tr>\n",
122 |               "    <tr>\n",
123 |               "      <th>2014-01-07</th>\n",
124 |               "      <td>4.19</td>\n",
125 |               "      <td>4.25</td>\n",
126 |               "      <td>4.11</td>\n",
127 |               "      <td>4.18</td>\n",
128 |               "      <td>4.18</td>\n",
129 |               "      <td>42932100</td>\n",
130 |               "    </tr>\n",
131 |               "    <tr>\n",
132 |               "      <th>2014-01-08</th>\n",
133 |               "      <td>4.23</td>\n",
134 |               "      <td>4.26</td>\n",
135 |               "      <td>4.14</td>\n",
136 |               "      <td>4.18</td>\n",
137 |               "      <td>4.18</td>\n",
138 |               "      <td>30678700</td>\n",
139 |               "    </tr>\n",
140 |               "  </tbody>\n",
141 |               "</table>\n",
142 |               "</div>"
143 |             ],
144 |             "text/plain": [
145 |               "            Open  High   Low  Close  Adj Close    Volume\n",
146 |               "Date                                                    \n",
147 |               "2014-01-02  3.85  3.98  3.84   3.95       3.95  20548400\n",
148 |               "2014-01-03  3.98  4.00  3.88   4.00       4.00  22887200\n",
149 |               "2014-01-06  4.01  4.18  3.99   4.13       4.13  42398300\n",
150 |               "2014-01-07  4.19  4.25  4.11   4.18       4.18  42932100\n",
151 |               "2014-01-08  4.23  4.26  4.14   4.18       4.18  30678700"
152 |             ]
153 |           },
154 |           "metadata": {}
155 |         }
156 |       ],
157 |       "execution_count": 2,
158 |       "metadata": {
159 |         "collapsed": false,
160 |         "outputHidden": false,
161 |         "inputHidden": false
162 |       }
163 |     },
164 |     {
165 |       "cell_type": "code",
166 |       "source": [
167 |         "X = dataset.iloc[ : , 1:4].values\n",
168 |         "Y = dataset.iloc[ : , 4].values"
169 |       ],
170 |       "outputs": [],
171 |       "execution_count": 23,
172 |       "metadata": {
173 |         "collapsed": false,
174 |         "outputHidden": false,
175 |         "inputHidden": false
176 |       }
177 |     },
178 |     {
179 |       "cell_type": "code",
180 |       "source": [
181 |         "print(X.shape)\n",
182 |         "print(Y.shape)"
183 |       ],
184 |       "outputs": [
185 |         {
186 |           "output_type": "stream",
187 |           "name": "stdout",
188 |           "text": [
189 |             "(1258, 3)\n",
190 |             "(1258,)\n"
191 |           ]
192 |         }
193 |       ],
194 |       "execution_count": 24,
195 |       "metadata": {
196 |         "collapsed": false,
197 |         "outputHidden": false,
198 |         "inputHidden": false
199 |       }
200 |     },
201 |     {
202 |       "cell_type": "code",
203 |       "source": [
204 |         "X = np.array(X).reshape(1258,-1)\n",
205 |         "Y = np.array(Y).reshape(1258,-1)"
206 |       ],
207 |       "outputs": [],
208 |       "execution_count": 25,
209 |       "metadata": {
210 |         "collapsed": false,
211 |         "outputHidden": false,
212 |         "inputHidden": false
213 |       }
214 |     },
215 |     {
216 |       "cell_type": "code",
217 |       "source": [
218 |         "from sklearn.decomposition import TruncatedSVD"
219 |       ],
220 |       "outputs": [],
221 |       "execution_count": 19,
222 |       "metadata": {
223 |         "collapsed": false,
224 |         "outputHidden": false,
225 |         "inputHidden": false
226 |       }
227 |     },
228 |     {
229 |       "cell_type": "code",
230 |       "source": [
231 |         "svd = TruncatedSVD(n_components=1, n_iter=2, random_state=42)\n",
232 |         "svd.fit(X)  "
233 |       ],
234 |       "outputs": [
235 |         {
236 |           "output_type": "execute_result",
237 |           "execution_count": 27,
238 |           "data": {
239 |             "text/plain": [
240 |               "TruncatedSVD(algorithm='randomized', n_components=1, n_iter=2,\n",
241 |               "       random_state=42, tol=0.0)"
242 |             ]
243 |           },
244 |           "metadata": {}
245 |         }
246 |       ],
247 |       "execution_count": 27,
248 |       "metadata": {
249 |         "collapsed": false,
250 |         "outputHidden": false,
251 |         "inputHidden": false
252 |       }
253 |     },
254 |     {
255 |       "cell_type": "code",
256 |       "source": [
257 |         "print(svd.explained_variance_ratio_)"
258 |       ],
259 |       "outputs": [
260 |         {
261 |           "output_type": "stream",
262 |           "name": "stdout",
263 |           "text": [
264 |             "[ 0.99951387]\n"
265 |           ]
266 |         }
267 |       ],
268 |       "execution_count": 28,
269 |       "metadata": {
270 |         "collapsed": false,
271 |         "outputHidden": false,
272 |         "inputHidden": false
273 |       }
274 |     },
275 |     {
276 |       "cell_type": "code",
277 |       "source": [
278 |         "print(svd.singular_values_) "
279 |       ],
280 |       "outputs": [
281 |         {
282 |           "output_type": "stream",
283 |           "name": "stdout",
284 |           "text": [
285 |             "[ 639.8979399]\n"
286 |           ]
287 |         }
288 |       ],
289 |       "execution_count": 29,
290 |       "metadata": {
291 |         "collapsed": false,
292 |         "outputHidden": false,
293 |         "inputHidden": false
294 |       }
295 |     }
296 |   ],
297 |   "metadata": {
298 |     "kernel_info": {
299 |       "name": "python3"
300 |     },
301 |     "language_info": {
302 |       "version": "3.5.5",
303 |       "codemirror_mode": {
304 |         "version": 3,
305 |         "name": "ipython"
306 |       },
307 |       "pygments_lexer": "ipython3",
308 |       "file_extension": ".py",
309 |       "nbconvert_exporter": "python",
310 |       "name": "python",
311 |       "mimetype": "text/x-python"
312 |     },
313 |     "kernelspec": {
314 |       "name": "python3",
315 |       "language": "python",
316 |       "display_name": "Python 3"
317 |     },
318 |     "nteract": {
319 |       "version": "0.12.2"
320 |     }
321 |   },
322 |   "nbformat": 4,
323 |   "nbformat_minor": 4
324 | }


--------------------------------------------------------------------------------
/Stock_Apps/README.md:
--------------------------------------------------------------------------------
 1 | <img src="Stock_Apps.PNG">
 2 | 
 3 | 
 4 | # Stock Applications & Softwares  
 5 | 
 6 | ## Descriptions:
 7 | Different types of algorithm for predictions
 8 | 
 9 | __Input__  
10 | 1: Input the stock starting date  
11 | 2: Input the stock ending date  
12 | 3. Input the stock symbol  
13 | 4. Choose Algorithms for Stock Prediction  
14 | 
15 | ## Authors  
16 | ### * Tin Hang  
17 | 


--------------------------------------------------------------------------------
/Stock_Apps/Stock_Apps.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LastAncientOne/Deep_Learning_Machine_Learning_Stock/e31e9fc689cd2cb6d74a8f2694750afd6e13bcec/Stock_Apps/Stock_Apps.PNG


--------------------------------------------------------------------------------
/Stock_Apps/Stock_ML_Data_PreProcessing_Apps.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sat May 25 14:21:27 2019
  4 | 
  5 | @author: Tin
  6 | """
  7 | import numpy as np
  8 | import pandas as pd
  9 | import datetime
 10 | 
 11 | from sklearn.preprocessing import MinMaxScaler
 12 | from sklearn.preprocessing import Binarizer
 13 | from sklearn.preprocessing import StandardScaler
 14 | from sklearn.model_selection import train_test_split
 15 | 
 16 | 
 17 | import warnings
 18 | warnings.filterwarnings("ignore")
 19 | 
 20 | # yahoo finance used to fetch data 
 21 | import yfinance as yf
 22 | yf.pdr_override()
 23 | 
 24 | options = " Data Preprocessing, Exit".split(",")
 25 | 
 26 | # Input Start Date
 27 | def start_date():
 28 |     date_entry = input('Enter a starting date in MM/DD/YYYY format: ')
 29 |     start = datetime.datetime.strptime(date_entry,'%m/%d/%Y')
 30 |     start = start.strftime('%Y-%m-%d')
 31 |     return start
 32 | 
 33 | # Input End Date
 34 | def end_date():
 35 |     date_entry = input('Enter a ending date in MM/DD/YYYY format: ')
 36 |     end = datetime.datetime.strptime(date_entry,'%m/%d/%Y')
 37 |     end = end.strftime('%Y-%m-%d')
 38 |     return end
 39 | 
 40 | # Input Symbols
 41 | def input_symbol():
 42 |     symbol = input("Enter symbol: ").upper()
 43 |     return symbol
 44 | 
 45 | # Rescaled Dataset
 46 | def Rescale_Dataset():
 47 |     s = start_date() 
 48 |     e = end_date()
 49 |     sym = input_symbol()
 50 |     df = yf.download(sym, s, e)
 51 |     array = df.values
 52 |     X = array[:,0:5]
 53 |     Y = array[:,5]
 54 |     # initialising the MinMaxScaler
 55 |     scaler = MinMaxScaler(feature_range=(0, 1))
 56 |     # learning the statistical parameters for each of the data and transforming
 57 |     rescaledX = scaler.fit_transform(X)
 58 |     np.set_printoptions(precision=3)
 59 |     print('Rescaled values between 0 to 1')
 60 |     print(rescaledX[0:5,:])
 61 |     print("")
 62 |     # Splitting the datasets into training sets and Test sets
 63 |     X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)
 64 |     sc_X = StandardScaler()
 65 |     # Splitting the datasets into training sets and Test sets
 66 |     X_train = sc_X.fit_transform(X_train)
 67 |     X_test = sc_X.fit_transform(X_test)
 68 |     print("Training Dataset")
 69 |     print(X_train)
 70 |     print("")
 71 |     print(Y_train)
 72 |     print("")
 73 |     print("Testing Dataset")
 74 |     print(X_test)
 75 |     print("")
 76 |     print(Y_test)
 77 |     print("")
 78 |     ans = ['1', '2'] 
 79 |     user_input=input("""                  
 80 | What would you like to do next? Enter option 1 or 2.  
 81 | 1. Menu
 82 | 2. Exit
 83 | Command: """)   
 84 |     while user_input not in ans:
 85 |         print("Error: Please enter a a valid option 1-2")
 86 |         user_input=input("Command: ")
 87 |     if user_input=="1":
 88 |         menu()
 89 |     elif user_input=="2":
 90 |         exit()    
 91 |         
 92 |         
 93 | #***********************************************************************************************************************#     
 94 | # Binarize Data 
 95 | def Binarize_Dataset():
 96 |     s = start_date() 
 97 |     e = end_date()
 98 |     sym = input_symbol()
 99 |     df = yf.download(sym, s, e)
100 |     array = df.values
101 |     X = array[:,0:5]
102 |     Y = array[:,5]
103 |     # initialising the binarize
104 |     binarizer = Binarizer(threshold = 0.0).fit(X)
105 |     binaryX = binarizer.transform(X)
106 |     np.set_printoptions(precision=3)
107 |     print('Binarize values equal or less than 0 are marked 0 and all of those above 0 are marked 1')
108 |     print(binaryX[0:5,:])
109 |     print("")
110 |     # Splitting the datasets into training sets and Test sets
111 |     X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)
112 |     sc_X = StandardScaler()
113 |     # Splitting the datasets into training sets and Test sets
114 |     X_train = sc_X.fit_transform(X_train)
115 |     X_test = sc_X.fit_transform(X_test)
116 |     print("Training Dataset")
117 |     print(X_train)
118 |     print("")
119 |     print(Y_train)
120 |     print("")
121 |     print("Testing Dataset")
122 |     print(X_test)
123 |     print("")
124 |     print(Y_test)
125 |     print("")
126 |     ans = ['1', '2'] 
127 |     user_input=input("""                  
128 | What would you like to do next? Enter option 1 or 2.  
129 | 1. Menu
130 | 2. Exit
131 | Command: """)   
132 |     while user_input not in ans:
133 |         print("Error: Please enter a a valid option 1-2")
134 |         user_input=input("Command: ")
135 |     if user_input=="1":
136 |         menu()
137 |     elif user_input=="2":
138 |         exit()    
139 |         
140 | 
141 | #***********************************************************************************************************************#     
142 | # Standardize Data  
143 | def Standardize_Dataset():
144 |     s = start_date() 
145 |     e = end_date()
146 |     sym = input_symbol()
147 |     df = yf.download(sym, s, e)
148 |     array = df.values
149 |     X = array[:,0:5]
150 |     Y = array[:,5]
151 |     # initialising the standardize
152 |     scaler = StandardScaler().fit(X)
153 |     rescaledX = scaler.transform(X)
154 |     np.set_printoptions(precision=3)
155 |     print('Standardize values with a mean of 0 and a standard deviation of 1')
156 |     print(rescaledX[0:5,:])
157 |     print("")
158 |     # Splitting the datasets into training sets and Test sets
159 |     X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)
160 |     sc_X = StandardScaler()
161 |     # Splitting the datasets into training sets and Test sets
162 |     X_train = sc_X.fit_transform(X_train)
163 |     X_test = sc_X.fit_transform(X_test)
164 |     print("Training Dataset")
165 |     print(X_train)
166 |     print("")
167 |     print(Y_train)
168 |     print("")
169 |     print("Testing Dataset")
170 |     print(X_test)
171 |     print("")
172 |     print(Y_test)
173 |     print("")
174 |     ans = ['1', '2'] 
175 |     user_input=input("""                  
176 | What would you like to do next? Enter option 1 or 2.  
177 | 1. Menu
178 | 2. Exit
179 | Command: """)   
180 |     while user_input not in ans:
181 |         print("Error: Please enter a a valid option 1-2")
182 |         user_input=input("Command: ")
183 |     if user_input=="1":
184 |         menu()
185 |     elif user_input=="2":
186 |         exit()    
187 |         
188 | 
189 | 
190 | 
191 | #***********************************************************************************************************************#
192 | #******************************************************* Menu **********************************************************#
193 | #***********************************************************************************************************************#  
194 | def menu():
195 |     ans = ['1', '2', '3', '4', '0'] 
196 |     print(""" 
197 |               
198 |                            MENU
199 |                      PREPROCESSING DATASET       
200 |                   ---------------------------
201 |                   1. Rescaled Data
202 |                   2. Binarize Data 
203 |                   3. Standardize Data  
204 |                   4. Beginning Menu
205 |                   0. Exit the Program
206 |                   """)
207 |     user_input = input("Command (0-3): ") 
208 |     while user_input not in ans:
209 |         print("Error: Please enter a valid option 0-3")
210 |         user_input=input("Command: ")             
211 |     if user_input == '1':
212 |         Rescaled_Dataset()
213 |     elif user_input == '2':
214 |         Binarize_Dataset()
215 |     elif user_input == '3':
216 |         Standardize_Dataset()
217 |     elif user_input == "4":  
218 |         beginning()
219 |     elif user_input == "0":
220 |         exit() 
221 |         
222 |         
223 | #***********************************************************************************************************************#    
224 | #*************************************************** Start of Program **************************************************# 
225 | #***********************************************************************************************************************#  
226 | def beginning():
227 |     print()
228 |     print("----------Welcome to Preprocessing Dataset--------")
229 |     print("""
230 | Please choose option 1 or 2
231 |               
232 | 1. Menu
233 | 2. Exit Program 
234 | ---------------------------------------------""")
235 |     ans = ['1', '2'] 
236 |     user_input=input("What is your Option?: ")    
237 |     while user_input not in ans:
238 |         print("Error: Please enter a a valid option 1-2")
239 |         user_input=input("Command: ")
240 |     if user_input=="1":
241 |         menu()
242 |     elif user_input=="2":
243 |         exit()
244 |   
245 |     
246 | #***********************************************************************************************************************#     
247 | beginning()      


--------------------------------------------------------------------------------
/Stock_Apps/Stock_ML_Predict_Apps.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sat May 25 14:21:27 2019
  4 | 
  5 | @author: Tin
  6 | """
  7 | import numpy as np
  8 | import matplotlib.pyplot as plt
  9 | 
 10 | import datetime
 11 | 
 12 | # Machine Learning Libraries
 13 | from sklearn.linear_model import LinearRegression
 14 | from sklearn.linear_model import LogisticRegression
 15 | from sklearn.model_selection import train_test_split
 16 | from sklearn import metrics
 17 | from sklearn.model_selection import cross_val_score
 18 | from sklearn.svm import SVR 
 19 | 
 20 | import warnings
 21 | warnings.filterwarnings("ignore")
 22 | 
 23 | # yahoo finance used to fetch data 
 24 | import yfinance as yf
 25 | yf.pdr_override()
 26 | 
 27 | options = " Stock Linear Regression Prediction, Stock Logistic Regression Prediction, Support Vector Regression, Exit".split(",")
 28 | 
 29 | # Input Start Date
 30 | def start_date():
 31 |     date_entry = input('Enter a starting date in MM/DD/YYYY format: ')
 32 |     start = datetime.datetime.strptime(date_entry,'%m/%d/%Y')
 33 |     start = start.strftime('%Y-%m-%d')
 34 |     return start
 35 | 
 36 | # Input End Date
 37 | def end_date():
 38 |     date_entry = input('Enter a ending date in MM/DD/YYYY format: ')
 39 |     end = datetime.datetime.strptime(date_entry,'%m/%d/%Y')
 40 |     end = end.strftime('%Y-%m-%d')
 41 |     return end
 42 | 
 43 | # Input Symbols
 44 | def input_symbol():
 45 |     symbol = input("Enter symbol: ").upper()
 46 |     return symbol
 47 | 
 48 | # Logistic Regression
 49 | def stock_logistic_regression():
 50 |     s = start_date() 
 51 |     e = end_date()
 52 |     sym = input_symbol()
 53 |     df = yf.download(sym, s, e)
 54 |  
 55 |     df = df.drop(['Date'], axis=1)
 56 |     X = df.loc[:, df.columns != 'Adj Close']
 57 |     y = np.where (df['Adj Close'].shift(-1) > df['Adj Close'],1,-1)
 58 | 
 59 |     split = int(0.7*len(df))
 60 |     X_train, X_test, y_train, y_test = X[:split], X[split:], y[:split], y[split:]
 61 |     model = LogisticRegression()
 62 |     model = model.fit(X_train,y_train)
 63 |     predicted = model.predict(X_test)
 64 |     print(metrics.confusion_matrix(y_test, predicted))
 65 |     print(metrics.classification_report(y_test, predicted))
 66 |     print(model.score(X_test,y_test))
 67 |     cross_val = cross_val_score(LogisticRegression(), X, y, scoring='accuracy', cv=10)
 68 |     print(cross_val)
 69 |     print(cross_val.mean())
 70 |     return
 71 | 
 72 | # Linear Regression
 73 | def stock_linear_regression():
 74 |     s = start_date() 
 75 |     e = end_date()
 76 |     sym = input_symbol()
 77 |     df = yf.download(sym, s, e)
 78 |     n = len(df.index)
 79 |     X = np.array(df['Open']).reshape(n,-1)
 80 |     Y = np.array(df['Adj Close']).reshape(n,-1)
 81 |     lr = LinearRegression()
 82 |     lr.fit(X, Y)
 83 |     lr.predict(X)
 84 |     
 85 |     plt.figure(figsize=(12,8))
 86 |     plt.scatter(df['Adj Close'], lr.predict(X))
 87 |     plt.plot(X, lr.predict(X), color = 'red')
 88 |     plt.xlabel('Prices')
 89 |     plt.ylabel('Predicted Prices')
 90 |     plt.grid()
 91 |     plt.title(sym + ' Prices vs Predicted Prices')
 92 |     plt.show()
 93 |     print('____________Summary:____________')      
 94 |     print('Estimate intercept coefficient:', lr.intercept_)
 95 |     print('Number of coefficients:', len(lr.coef_))
 96 |     print('Accuracy Score:', lr.score(X, Y))
 97 |     print("")
 98 |     return
 99 | 
100 | # Support Vector Regression
101 | def stock_svr():
102 |     s = start_date() 
103 |     e = end_date()
104 |     sym = input_symbol()
105 |     df = yf.download(sym, s, e)
106 |     dates = np.reshape(df.index,(len(df.index), 1)) # convert to 1xn dimension
107 |     x = 31
108 |     x = np.reshape(x,(len(x), 1))
109 |     prices = df['Adj Close']
110 |     svr_lin  = SVR(kernel='linear', C=1e3)
111 |     svr_poly = SVR(kernel='poly', C=1e3, degree=2)
112 |     svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
113 |     
114 |     # Fit regression model
115 |     svr_lin .fit(dates, prices)
116 |     svr_poly.fit(dates, prices)
117 |     svr_rbf.fit(dates, prices)
118 |     
119 |     plt.figure(figsize=(12,8))
120 |     plt.scatter(dates, prices, c='k', label='Data')
121 |     plt.plot(dates, svr_lin.predict(dates), c='g', label='Linear model')
122 |     plt.plot(dates, svr_rbf.predict(dates), c='r', label='RBF model')    
123 |     plt.plot(dates, svr_poly.predict(dates), c='b', label='Polynomial model')
124 |     plt.xlabel('Date')
125 |     plt.ylabel('Price')
126 |     plt.title('Support Vector Regression')
127 |     plt.legend()
128 |     plt.show()
129 |     print('____________Summary:____________')   
130 |     print('Linear Model:', svr_rbf.predict(x)[0])
131 |     print('RBF Model:', svr_lin.predict(x)[0])
132 |     print('Polynomial Model:', svr_poly.predict(x)[0])
133 |     print("")
134 |     return
135 | 
136 |     
137 | def main():
138 |     run_program = True
139 |     while run_program:
140 |         print("__________Stock Price Prediction__________")
141 |         print("Choose Options:")
142 |         for i in range(1, len(options)+1):
143 |             print("{} - {}".format(i, options[i-1]))
144 |         choice = int(input())
145 |         
146 |         if choice == 1:
147 |         print("____________Linear Regression_____________")    
148 |              stock_linear_regression()
149 |         elif choice == 2:
150 |         print("____________Logistic Regression_____________")
151 |              stock_logistic_regression()
152 |         elif choice == 3:
153 |         print("____________Support Vector Regression_____________")
154 |              stock_logistic_regression()    
155 |         elif choice == 4:
156 |              run_program = False             
157 | 
158 | 
159 | if __name__ == "__main__":
160 |     main()
161 | 


--------------------------------------------------------------------------------
/Stock_Apps/Stock_ML_Predict_Apps_Menu.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Fri Dec 20 19:44:59 2019
  4 | 
  5 | @author: Tin
  6 | """
  7 | 
  8 | import numpy as np
  9 | import matplotlib.pyplot as plt
 10 | 
 11 | import datetime
 12 | 
 13 | # Machine Learning Libraries
 14 | from sklearn.linear_model import LinearRegression
 15 | from sklearn.linear_model import LogisticRegression
 16 | # from sklearn.model_selection import train_test_split
 17 | from sklearn import metrics
 18 | from sklearn.model_selection import cross_validate
 19 | from sklearn.svm import SVR 
 20 | 
 21 | import warnings
 22 | warnings.filterwarnings("ignore")
 23 | 
 24 | # yahoo finance used to fetch data 
 25 | import yfinance as yf
 26 | yf.pdr_override()
 27 | 
 28 | options = " Stock Linear Regression Prediction, Stock Logistic Regression Prediction, Support Vector Regression, Exit".split(",")
 29 | 
 30 | # Input Start Date
 31 | def start_date():
 32 |     date_entry = input('Enter a starting date in MM/DD/YYYY format: ')
 33 |     start = datetime.datetime.strptime(date_entry,'%m/%d/%Y')
 34 |     start = start.strftime('%Y-%m-%d')
 35 |     return start
 36 | 
 37 | # Input End Date
 38 | def end_date():
 39 |     date_entry = input('Enter a ending date in MM/DD/YYYY format: ')
 40 |     end = datetime.datetime.strptime(date_entry,'%m/%d/%Y')
 41 |     end = end.strftime('%Y-%m-%d')
 42 |     return end
 43 | 
 44 | # Input Symbols
 45 | def input_symbol():
 46 |     symbol = input("Enter symbol: ").upper()
 47 |     return symbol
 48 | 
 49 | # Logistic Regression
 50 | def stock_logistic_regression():
 51 |     s = start_date() 
 52 |     e = end_date()
 53 |     sym = input_symbol()
 54 |     df = yf.download(sym, s, e)
 55 |  
 56 |     df = df.drop(['Date'], axis=1)
 57 |     X = df.loc[:, df.columns != 'Adj Close']
 58 |     y = np.where (df['Adj Close'].shift(-1) > df['Adj Close'],1,-1)
 59 | 
 60 |     split = int(0.7*len(df))
 61 |     X_train, X_test, y_train, y_test = X[:split], X[split:], y[:split], y[split:]
 62 |     model = LogisticRegression()
 63 |     model = model.fit(X_train,y_train)
 64 |     predicted = model.predict(X_test)
 65 |     print(metrics.confusion_matrix(y_test, predicted))
 66 |     print(metrics.classification_report(y_test, predicted))
 67 |     print(model.score(X_test,y_test))
 68 |     cross_val = cross_validate(LogisticRegression(), X, y, scoring='accuracy', cv=10)
 69 |     print('_____________Summary:_____________') 
 70 |     print(cross_val)
 71 |     print(cross_val.mean())
 72 |     print("")
 73 |     ans = ['1', '2'] 
 74 |     user_input=input("""                  
 75 | What would you like to do next? Enter option 1 or 2.  
 76 | 1. Menu
 77 | 2. Exit
 78 | Command: """)   
 79 |     while user_input not in ans:
 80 |         print("Error: Please enter a a valid option 1-2")
 81 |         user_input=input("Command: ")
 82 |     if user_input=="1":
 83 |         menu()
 84 |     elif user_input=="2":
 85 |         exit()    
 86 |         
 87 | 
 88 | # Linear Regression
 89 | def stock_linear_regression():
 90 |     s = start_date() 
 91 |     e = end_date()
 92 |     sym = input_symbol()
 93 |     df = yf.download(sym, s, e)
 94 |     n = len(df.index)
 95 |     X = np.array(df['Open']).reshape(n,-1)
 96 |     Y = np.array(df['Adj Close']).reshape(n,-1)
 97 |     lr = LinearRegression()
 98 |     lr.fit(X, Y)
 99 |     lr.predict(X)
100 |     
101 |     plt.figure(figsize=(12,8))
102 |     plt.scatter(df['Adj Close'], lr.predict(X))
103 |     plt.plot(X, lr.predict(X), color = 'red')
104 |     plt.xlabel('Prices')
105 |     plt.ylabel('Predicted Prices')
106 |     plt.grid()
107 |     plt.title(sym + ' Prices vs Predicted Prices')
108 |     plt.show()
109 |     print('_____________Summary:_____________')       
110 |     print('Estimate intercept coefficient:', lr.intercept_)
111 |     print('Number of coefficients:', len(lr.coef_))
112 |     print('Accuracy Score:', lr.score(X, Y))
113 |     print("")
114 |     ans = ['1', '2'] 
115 |     user_input=input("""                  
116 | What would you like to do next? Enter option 1 or 2.  
117 | 1. Menu
118 | 2. Exit
119 | Command: """)   
120 |     while user_input not in ans:
121 |         print("Error: Please enter a a valid option 1-2")
122 |         user_input=input("Command: ")
123 |     if user_input=="1":
124 |         menu()
125 |     elif user_input=="2":
126 |         exit()    
127 |         
128 | 
129 | # Support Vector Regression
130 | def stock_svr():
131 |     s = start_date() 
132 |     e = end_date()
133 |     sym = input_symbol()
134 |     df = yf.download(sym, s, e)
135 |     dates = np.reshape(df.index,(len(df.index), 1)) # convert to 1xn dimension
136 |     x = 31
137 |     x = np.reshape(x,(len(x), 1))
138 |     prices = df['Adj Close']
139 |     svr_lin  = SVR(kernel='linear', C=1e3)
140 |     svr_poly = SVR(kernel='poly', C=1e3, degree=2)
141 |     svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
142 |     
143 |     # Fit regression model
144 |     svr_lin .fit(dates, prices)
145 |     svr_poly.fit(dates, prices)
146 |     svr_rbf.fit(dates, prices)
147 |     
148 |     plt.figure(figsize=(12,8))
149 |     plt.scatter(dates, prices, c='k', label='Data')
150 |     plt.plot(dates, svr_lin.predict(dates), c='g', label='Linear model')
151 |     plt.plot(dates, svr_rbf.predict(dates), c='r', label='RBF model')    
152 |     plt.plot(dates, svr_poly.predict(dates), c='b', label='Polynomial model')
153 |     plt.xlabel('Date')
154 |     plt.ylabel('Price')
155 |     plt.title('Support Vector Regression')
156 |     plt.legend()
157 |     plt.show()
158 |     print('_____________Summary:_____________') 
159 |     print('Linear Model:', svr_rbf.predict(x)[0])
160 |     print('RBF Model:', svr_lin.predict(x)[0])
161 |     print('Polynomial Model:', svr_poly.predict(x)[0])
162 |     print("")
163 |     ans = ['1', '2'] 
164 |     user_input=input("""                  
165 | What would you like to do next? Enter option 1 or 2.  
166 | 1. Menu
167 | 2. Exit
168 | Command: """)   
169 |     while user_input not in ans:
170 |         print("Error: Please enter a a valid option 1-2")
171 |         user_input=input("Command: ")
172 |     if user_input=="1":
173 |         menu()
174 |     elif user_input=="2":
175 |         exit()    
176 |         
177 | 
178 | #***********************************************************************************************************************#
179 | #******************************************************* Menu **********************************************************#
180 | #***********************************************************************************************************************#  
181 | def menu():
182 |     ans = ['1', '2', '3', '4', '0'] 
183 |     print(""" 
184 |               
185 |                            MENU
186 |                   MACHINE LEARNING PREDICTION        
187 |                   ---------------------------
188 |                   1.Linear Regression
189 |                   2.Logistic Regressions
190 |                   3.Support Vector Regression
191 |                   4.Beginning Menu
192 |                   0.Exit the Program
193 |                   """)
194 |     user_input = input("Command (0-3): ") 
195 |     while user_input not in ans:
196 |         print("Error: Please enter a valid option 0-3")
197 |         user_input=input("Command: ")             
198 |     if user_input == '1':
199 |         stock_linear_regression()
200 |     elif user_input == '2':
201 |         stock_logistic_regression()
202 |     elif user_input == '3':
203 |         stock_svr()
204 |     elif user_input == "4":  
205 |         beginning()
206 |     elif user_input == "0":
207 |         exit() 
208 |         
209 |         
210 | #***********************************************************************************************************************#    
211 | #*************************************************** Start of Program **************************************************# 
212 | #***********************************************************************************************************************#  
213 | def beginning():
214 |     print()
215 |     print("----------Welcome to Machine Learning Predictions--------")
216 |     print("""
217 | Please choose option 1 or 2
218 |               
219 | 1. Menu
220 | 2. Exit Program 
221 | 
222 | ---------------------------------------------""")
223 |     ans = ['1', '2'] 
224 |     user_input=input("What is your Option?: ")    
225 |     while user_input not in ans:
226 |         print("Error: Please enter a a valid option 1-2")
227 |         user_input=input("Command: ")
228 |     if user_input=="1":
229 |         menu()
230 |     elif user_input=="2":
231 |         exit()
232 |   
233 |     
234 | #***********************************************************************************************************************#     
235 | beginning()      
236 | 


--------------------------------------------------------------------------------
/Stock_Apps/Stock_ML_Rescale_Data_Apps.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sat May 25 14:21:27 2019
 4 | 
 5 | @author: Tin
 6 | """
 7 | import numpy as np
 8 | import pandas as pd
 9 | import datetime
10 | 
11 | from sklearn.preprocessing import MinMaxScaler
12 | from sklearn.preprocessing import StandardScaler
13 | from sklearn.model_selection import train_test_split
14 | 
15 | 
16 | import warnings
17 | warnings.filterwarnings("ignore")
18 | 
19 | # yahoo finance used to fetch data 
20 | import yfinance as yf
21 | yf.pdr_override()
22 | 
23 | options = " Data Preprocessing, Exit".split(",")
24 | 
25 | # Input Start Date
26 | def start_date():
27 |     date_entry = input('Enter a starting date in MM/DD/YYYY format: ')
28 |     start = datetime.datetime.strptime(date_entry,'%m/%d/%Y')
29 |     start = start.strftime('%Y-%m-%d')
30 |     return start
31 | 
32 | # Input End Date
33 | def end_date():
34 |     date_entry = input('Enter a ending date in MM/DD/YYYY format: ')
35 |     end = datetime.datetime.strptime(date_entry,'%m/%d/%Y')
36 |     end = end.strftime('%Y-%m-%d')
37 |     return end
38 | 
39 | # Input Symbols
40 | def input_symbol():
41 |     symbol = input("Enter symbol: ").upper()
42 |     return symbol
43 | 
44 | def preprocessing_dataset():
45 |     s = start_date() 
46 |     e = end_date()
47 |     sym = input_symbol()
48 |     df = yf.download(sym, s, e)
49 |     array = df.values
50 |     X = array[:,0:5]
51 |     Y = array[:,5]
52 |     # initialising the MinMaxScaler
53 |     scaler = MinMaxScaler(feature_range=(0, 1))
54 |     # learning the statistical parameters for each of the data and transforming
55 |     rescaledX = scaler.fit_transform(X)
56 |     np.set_printoptions(precision=3)
57 |     print('Rescaled values between 0 to 1')
58 |     print(rescaledX[0:5,:])
59 |     print("")
60 |     # Splitting the datasets into training sets and Test sets
61 |     X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)
62 |     sc_X = StandardScaler()
63 |     # Splitting the datasets into training sets and Test sets
64 |     X_train = sc_X.fit_transform(X_train)
65 |     X_test = sc_X.fit_transform(X_test)
66 |     print("Training Dataset")
67 |     print(X_train)
68 |     print("")
69 |     print(Y_train)
70 |     print("")
71 |     print("Testing Dataset")
72 |     print(X_test)
73 |     print("")
74 |     print(Y_test)
75 |     return
76 | 
77 |     
78 | def main():
79 |     run_program = True
80 |     while run_program:
81 |         print("")
82 |         print("__________Preprocessing Dataset__________")
83 |         print("")
84 |         print("Choose Options:")
85 |         print("")
86 |         for i in range(1, len(options)+1):
87 |             print("{} - {}".format(i, options[i-1]))
88 |         choice = int(input())
89 |         
90 |         if choice == 1:
91 |              preprocessing_dataset()
92 |         elif choice == 2:
93 |              run_program = False             
94 | 
95 | 
96 | if __name__ == "__main__":
97 |     main()


--------------------------------------------------------------------------------
/Tensorflow_Basics.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "source": [
  6 |         "# Tensorflow Basic"
  7 |       ],
  8 |       "metadata": {}
  9 |     },
 10 |     {
 11 |       "cell_type": "code",
 12 |       "source": [
 13 |         "import tensorflow as tf"
 14 |       ],
 15 |       "outputs": [],
 16 |       "execution_count": 14,
 17 |       "metadata": {
 18 |         "collapsed": false,
 19 |         "outputHidden": false,
 20 |         "inputHidden": false
 21 |       }
 22 |     },
 23 |     {
 24 |       "cell_type": "code",
 25 |       "source": [
 26 |         "x = tf.constant(2)\n",
 27 |         "y = tf.constant(4)"
 28 |       ],
 29 |       "outputs": [],
 30 |       "execution_count": 2,
 31 |       "metadata": {
 32 |         "collapsed": false,
 33 |         "outputHidden": false,
 34 |         "inputHidden": false
 35 |       }
 36 |     },
 37 |     {
 38 |       "cell_type": "code",
 39 |       "source": [
 40 |         "with tf.Session() as sess:\n",
 41 |         "    print(\"x: %i\" % sess.run(x), \"y: %i\" % sess.run(y))\n",
 42 |         "    print(\"Addition with constants: %i\" % sess.run(x+y))\n",
 43 |         "    print(\"Multiplication with constants: %i\" % sess.run(x*y))"
 44 |       ],
 45 |       "outputs": [
 46 |         {
 47 |           "output_type": "stream",
 48 |           "name": "stdout",
 49 |           "text": [
 50 |             "x: 2 y: 4\n",
 51 |             "Addition with constants: 6\n",
 52 |             "Multiplication with constants: 8\n"
 53 |           ]
 54 |         }
 55 |       ],
 56 |       "execution_count": 3,
 57 |       "metadata": {
 58 |         "collapsed": false,
 59 |         "outputHidden": false,
 60 |         "inputHidden": false
 61 |       }
 62 |     },
 63 |     {
 64 |       "cell_type": "code",
 65 |       "source": [
 66 |         "add = tf.add(x, y)\n",
 67 |         "sess = tf.Session()\n",
 68 |         "value_of_add = sess.run(add)\n",
 69 |         "print(value_of_add)\n",
 70 |         "sess.close()"
 71 |       ],
 72 |       "outputs": [
 73 |         {
 74 |           "output_type": "stream",
 75 |           "name": "stdout",
 76 |           "text": [
 77 |             "6\n"
 78 |           ]
 79 |         }
 80 |       ],
 81 |       "execution_count": 4,
 82 |       "metadata": {
 83 |         "collapsed": false,
 84 |         "outputHidden": false,
 85 |         "inputHidden": false
 86 |       }
 87 |     },
 88 |     {
 89 |       "cell_type": "code",
 90 |       "source": [
 91 |         "sub = tf.subtract(x, y)\n",
 92 |         "sess = tf.Session()\n",
 93 |         "value_of_sub = sess.run(sub)\n",
 94 |         "print(value_of_sub)\n",
 95 |         "sess.close()"
 96 |       ],
 97 |       "outputs": [
 98 |         {
 99 |           "output_type": "stream",
100 |           "name": "stdout",
101 |           "text": [
102 |             "-2\n"
103 |           ]
104 |         }
105 |       ],
106 |       "execution_count": 5,
107 |       "metadata": {
108 |         "collapsed": false,
109 |         "outputHidden": false,
110 |         "inputHidden": false
111 |       }
112 |     },
113 |     {
114 |       "cell_type": "code",
115 |       "source": [
116 |         "mult = tf.multiply(x, y)\n",
117 |         "sess = tf.Session()\n",
118 |         "value_of_mult = sess.run(mult)\n",
119 |         "print(value_of_mult)\n",
120 |         "sess.close()"
121 |       ],
122 |       "outputs": [
123 |         {
124 |           "output_type": "stream",
125 |           "name": "stdout",
126 |           "text": [
127 |             "8\n"
128 |           ]
129 |         }
130 |       ],
131 |       "execution_count": 6,
132 |       "metadata": {
133 |         "collapsed": false,
134 |         "outputHidden": false,
135 |         "inputHidden": false
136 |       }
137 |     },
138 |     {
139 |       "cell_type": "code",
140 |       "source": [
141 |         "div = tf.divide(x, y)\n",
142 |         "sess = tf.Session()\n",
143 |         "value_of_div = sess.run(div)\n",
144 |         "print(value_of_div)\n",
145 |         "sess.close()"
146 |       ],
147 |       "outputs": [
148 |         {
149 |           "output_type": "stream",
150 |           "name": "stdout",
151 |           "text": [
152 |             "0.5\n"
153 |           ]
154 |         }
155 |       ],
156 |       "execution_count": 7,
157 |       "metadata": {
158 |         "collapsed": false,
159 |         "outputHidden": false,
160 |         "inputHidden": false
161 |       }
162 |     },
163 |     {
164 |       "cell_type": "code",
165 |       "source": [
166 |         "# y = W.x + b\n",
167 |         "x = tf.constant(-2.0, name=\"x\", dtype=tf.float32)\n",
168 |         "W = tf.constant(8.0, name=\"W\", dtype=tf.float32)\n",
169 |         "b = tf.constant(10.0, name=\"b\", dtype=tf.float32)\n",
170 |         "\n",
171 |         "y = tf.Variable(tf.add(tf.multiply(W, x), b))\n",
172 |         "\n",
173 |         "init = tf.global_variables_initializer()\n",
174 |         "\n",
175 |         "with tf.Session() as session:\n",
176 |         "    session.run(init)\n",
177 |         "    print(session.run(y))   "
178 |       ],
179 |       "outputs": [
180 |         {
181 |           "output_type": "stream",
182 |           "name": "stdout",
183 |           "text": [
184 |             "-6.0\n"
185 |           ]
186 |         }
187 |       ],
188 |       "execution_count": 8,
189 |       "metadata": {
190 |         "collapsed": false,
191 |         "outputHidden": false,
192 |         "inputHidden": false
193 |       }
194 |     },
195 |     {
196 |       "cell_type": "code",
197 |       "source": [
198 |         "a = tf.constant(2.8)\n",
199 |         "b = tf.constant(4.3)\n",
200 |         "\n",
201 |         "# Basic Math\n",
202 |         "total = a + b\n",
203 |         "diff = a - b\n",
204 |         "quot = tf.div(a, b)\n",
205 |         "prod = tf.multiply(a, b)\n",
206 |         "\n",
207 |         "# Matrix Multiplication\n",
208 |         "c = tf.constant([[1,2], [3,4], [5,6]])\n",
209 |         "d = tf.constant([[9,8,7], [6,5,4]])\n",
210 |         "matrix_prod = tf.matmul(c, d)\n",
211 |         "\n",
212 |         "# Excute the Session\n",
213 |         "with tf.Session() as sess:\n",
214 |         "    print(\"Sum: %f\" % sess.run(total))\n",
215 |         "    print(\"Difference: %f\" % sess.run(diff))\n",
216 |         "    print(\"Division: %f\" % sess.run(quot))\n",
217 |         "    print(\"Multiplication: %f\" % sess.run(prod))\n",
218 |         "    print(\"Matrix prouct: \",  sess.run(matrix_prod))\n",
219 |         "    print(\"Round: %f\" % sess.run(tf.round(a)))\n",
220 |         "    print(\"Round: %f\" % sess.run(tf.round(b)))"
221 |       ],
222 |       "outputs": [
223 |         {
224 |           "output_type": "stream",
225 |           "name": "stdout",
226 |           "text": [
227 |             "Sum: 7.100000\n",
228 |             "Difference: -1.500000\n",
229 |             "Division: 0.651163\n",
230 |             "Multiplication: 12.040000\n",
231 |             "Matrix prouct:  [[21 18 15]\n",
232 |             " [51 44 37]\n",
233 |             " [81 70 59]]\n",
234 |             "Round: 3.000000\n",
235 |             "Round: 4.000000\n"
236 |           ]
237 |         }
238 |       ],
239 |       "execution_count": 9,
240 |       "metadata": {}
241 |     },
242 |     {
243 |       "cell_type": "code",
244 |       "source": [
245 |         "# 2-D tensor\n",
246 |         "a = tf.constant([1, 2, 3, 4, 5, 6], shape=[2, 3])\n",
247 |         "\n",
248 |         "with tf.Session() as sess:\n",
249 |         "    print(\"2-D tensor:\")\n",
250 |         "    print(sess.run(a))"
251 |       ],
252 |       "outputs": [
253 |         {
254 |           "output_type": "stream",
255 |           "name": "stdout",
256 |           "text": [
257 |             "2-D tensor:\n",
258 |             "[[1 2 3]\n",
259 |             " [4 5 6]]\n"
260 |           ]
261 |         }
262 |       ],
263 |       "execution_count": 10,
264 |       "metadata": {
265 |         "collapsed": false,
266 |         "outputHidden": false,
267 |         "inputHidden": false
268 |       }
269 |     },
270 |     {
271 |       "cell_type": "code",
272 |       "source": [
273 |         "# 2-D tensor\n",
274 |         "b = tf.constant([7, 8, 9, 10, 11, 12], shape=[3, 2])\n",
275 |         "\n",
276 |         "with tf.Session() as sess:\n",
277 |         "    print(\"2-D tensor:\")\n",
278 |         "    print(sess.run(b))"
279 |       ],
280 |       "outputs": [
281 |         {
282 |           "output_type": "stream",
283 |           "name": "stdout",
284 |           "text": [
285 |             "2-D tensor:\n",
286 |             "[[ 7  8]\n",
287 |             " [ 9 10]\n",
288 |             " [11 12]]\n"
289 |           ]
290 |         }
291 |       ],
292 |       "execution_count": 11,
293 |       "metadata": {
294 |         "collapsed": false,
295 |         "outputHidden": false,
296 |         "inputHidden": false
297 |       }
298 |     },
299 |     {
300 |       "cell_type": "code",
301 |       "source": [
302 |         "c = tf.matmul(a, b)\n",
303 |         "\n",
304 |         "with tf.Session() as sess:\n",
305 |         "    print(\"a * b:\")\n",
306 |         "    print(sess.run(c))"
307 |       ],
308 |       "outputs": [
309 |         {
310 |           "output_type": "stream",
311 |           "name": "stdout",
312 |           "text": [
313 |             "a * b:\n",
314 |             "[[ 58  64]\n",
315 |             " [139 154]]\n"
316 |           ]
317 |         }
318 |       ],
319 |       "execution_count": 12,
320 |       "metadata": {
321 |         "collapsed": false,
322 |         "outputHidden": false,
323 |         "inputHidden": false
324 |       }
325 |     },
326 |     {
327 |       "cell_type": "code",
328 |       "source": [
329 |         "# 3-D tensor\n",
330 |         "import numpy as np\n",
331 |         "x = tf.constant(np.arange(1, 13, dtype=np.int32),\n",
332 |         "                shape=[2, 2, 3])\n",
333 |         "\n",
334 |         "y = tf.constant(np.arange(13, 25, dtype=np.int32),\n",
335 |         "                shape=[2, 3, 2])\n",
336 |         "\n",
337 |         "z = tf.constant(np.arange(1, 13, dtype=np.int32),\n",
338 |         "                shape=[2, 2, 3])\n",
339 |         "\n",
340 |         "with tf.Session() as sess:\n",
341 |         "    print(\"3-D tensor:\")\n",
342 |         "    print('--------------------')\n",
343 |         "    print(sess.run(x))\n",
344 |         "    print('--------------------')\n",
345 |         "    print(sess.run(y))\n",
346 |         "    print('--------------------')\n",
347 |         "    print('3-D Multiplication:')\n",
348 |         "    print(sess.run(tf.matmul(x,y)))\n",
349 |         "    print('--------------------')\n",
350 |         "    print('Dot Product:')\n",
351 |         "    print(sess.run(tf.tensordot(x,z, 3)))"
352 |       ],
353 |       "outputs": [
354 |         {
355 |           "output_type": "stream",
356 |           "name": "stdout",
357 |           "text": [
358 |             "3-D tensor:\n",
359 |             "--------------------\n",
360 |             "[[[ 1  2  3]\n",
361 |             "  [ 4  5  6]]\n",
362 |             "\n",
363 |             " [[ 7  8  9]\n",
364 |             "  [10 11 12]]]\n",
365 |             "--------------------\n",
366 |             "[[[13 14]\n",
367 |             "  [15 16]\n",
368 |             "  [17 18]]\n",
369 |             "\n",
370 |             " [[19 20]\n",
371 |             "  [21 22]\n",
372 |             "  [23 24]]]\n",
373 |             "--------------------\n",
374 |             "3-D Multiplication:\n",
375 |             "[[[ 94 100]\n",
376 |             "  [229 244]]\n",
377 |             "\n",
378 |             " [[508 532]\n",
379 |             "  [697 730]]]\n",
380 |             "--------------------\n",
381 |             "Dot Product:\n",
382 |             "650\n"
383 |           ]
384 |         }
385 |       ],
386 |       "execution_count": 13,
387 |       "metadata": {
388 |         "collapsed": false,
389 |         "outputHidden": false,
390 |         "inputHidden": false
391 |       }
392 |     }
393 |   ],
394 |   "metadata": {
395 |     "kernel_info": {
396 |       "name": "python3"
397 |     },
398 |     "language_info": {
399 |       "pygments_lexer": "ipython3",
400 |       "nbconvert_exporter": "python",
401 |       "codemirror_mode": {
402 |         "version": 3,
403 |         "name": "ipython"
404 |       },
405 |       "version": "3.5.5",
406 |       "name": "python",
407 |       "file_extension": ".py",
408 |       "mimetype": "text/x-python"
409 |     },
410 |     "kernelspec": {
411 |       "name": "python3",
412 |       "language": "python",
413 |       "display_name": "Python 3"
414 |     },
415 |     "nteract": {
416 |       "version": "0.15.0"
417 |     }
418 |   },
419 |   "nbformat": 4,
420 |   "nbformat_minor": 4
421 | }


--------------------------------------------------------------------------------
/Title.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LastAncientOne/Deep_Learning_Machine_Learning_Stock/e31e9fc689cd2cb6d74a8f2694750afd6e13bcec/Title.PNG


--------------------------------------------------------------------------------
/Variance_Inflation_Factor.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "source": [
  6 |         "# Variance inflation Factor (VIF)"
  7 |       ],
  8 |       "metadata": {
  9 |         "nteract": {
 10 |           "transient": {
 11 |             "deleting": false
 12 |           }
 13 |         }
 14 |       }
 15 |     },
 16 |     {
 17 |       "cell_type": "markdown",
 18 |       "source": [
 19 |         "In statistics, the variance inflation factor (VIF) is the quotient of the variance in a model with multiple terms by the variance of a model with one term alone. It quantifies the severity of multicollinearity in an ordinary least squares regression analysis. It provides an index that measures how much the variance (the square of the estimate's standard deviation) of an estimated regression coefficient is increased because of collinearity. https://en.wikipedia.org/wiki/Variance_inflation_factor"
 20 |       ],
 21 |       "metadata": {
 22 |         "nteract": {
 23 |           "transient": {
 24 |             "deleting": false
 25 |           }
 26 |         }
 27 |       }
 28 |     },
 29 |     {
 30 |       "cell_type": "code",
 31 |       "source": [
 32 |         "import numpy as np\n",
 33 |         "import matplotlib.pyplot as plt\n",
 34 |         "import pandas as pd\n",
 35 |         "\n",
 36 |         "import warnings\n",
 37 |         "warnings.filterwarnings(\"ignore\")\n",
 38 |         "\n",
 39 |         "# fetch yahoo data\n",
 40 |         "import yfinance as yf\n",
 41 |         "yf.pdr_override()"
 42 |       ],
 43 |       "outputs": [],
 44 |       "execution_count": 1,
 45 |       "metadata": {
 46 |         "collapsed": true,
 47 |         "jupyter": {
 48 |           "source_hidden": false,
 49 |           "outputs_hidden": false
 50 |         },
 51 |         "nteract": {
 52 |           "transient": {
 53 |             "deleting": false
 54 |           }
 55 |         },
 56 |         "execution": {
 57 |           "iopub.status.busy": "2020-08-09T21:00:54.254Z",
 58 |           "iopub.execute_input": "2020-08-09T21:00:54.261Z",
 59 |           "iopub.status.idle": "2020-08-09T21:00:55.369Z",
 60 |           "shell.execute_reply": "2020-08-09T21:00:55.394Z"
 61 |         }
 62 |       }
 63 |     },
 64 |     {
 65 |       "cell_type": "code",
 66 |       "source": [
 67 |         "# input\n",
 68 |         "symbol = 'AMD'\n",
 69 |         "start = '2014-01-01'\n",
 70 |         "end = '2018-08-27'\n",
 71 |         "\n",
 72 |         "# Read data \n",
 73 |         "dataset = yf.download(symbol,start,end)\n",
 74 |         "\n",
 75 |         "# Only keep close columns \n",
 76 |         "dataset.head()"
 77 |       ],
 78 |       "outputs": [
 79 |         {
 80 |           "output_type": "stream",
 81 |           "name": "stdout",
 82 |           "text": [
 83 |             "[*********************100%***********************]  1 of 1 completed\n"
 84 |           ]
 85 |         },
 86 |         {
 87 |           "output_type": "execute_result",
 88 |           "execution_count": 2,
 89 |           "data": {
 90 |             "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Adj Close</th>\n      <th>Close</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Open</th>\n      <th>Volume</th>\n    </tr>\n    <tr>\n      <th>Date</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2014-01-02</th>\n      <td>3.95</td>\n      <td>3.95</td>\n      <td>3.98</td>\n      <td>3.84</td>\n      <td>3.85</td>\n      <td>20548400</td>\n    </tr>\n    <tr>\n      <th>2014-01-03</th>\n      <td>4.00</td>\n      <td>4.00</td>\n      <td>4.00</td>\n      <td>3.88</td>\n      <td>3.98</td>\n      <td>22887200</td>\n    </tr>\n    <tr>\n      <th>2014-01-06</th>\n      <td>4.13</td>\n      <td>4.13</td>\n      <td>4.18</td>\n      <td>3.99</td>\n      <td>4.01</td>\n      <td>42398300</td>\n    </tr>\n    <tr>\n      <th>2014-01-07</th>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>4.25</td>\n      <td>4.11</td>\n      <td>4.19</td>\n      <td>42932100</td>\n    </tr>\n    <tr>\n      <th>2014-01-08</th>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>4.26</td>\n      <td>4.14</td>\n      <td>4.23</td>\n      <td>30678700</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
 91 |             "text/plain": "            Adj Close  Close  High   Low  Open    Volume\nDate                                                    \n2014-01-02       3.95   3.95  3.98  3.84  3.85  20548400\n2014-01-03       4.00   4.00  4.00  3.88  3.98  22887200\n2014-01-06       4.13   4.13  4.18  3.99  4.01  42398300\n2014-01-07       4.18   4.18  4.25  4.11  4.19  42932100\n2014-01-08       4.18   4.18  4.26  4.14  4.23  30678700"
 92 |           },
 93 |           "metadata": {}
 94 |         }
 95 |       ],
 96 |       "execution_count": 2,
 97 |       "metadata": {
 98 |         "collapsed": true,
 99 |         "jupyter": {
100 |           "source_hidden": false,
101 |           "outputs_hidden": false
102 |         },
103 |         "nteract": {
104 |           "transient": {
105 |             "deleting": false
106 |           }
107 |         },
108 |         "execution": {
109 |           "iopub.status.busy": "2020-08-09T21:00:55.376Z",
110 |           "iopub.execute_input": "2020-08-09T21:00:55.383Z",
111 |           "iopub.status.idle": "2020-08-09T21:00:56.732Z",
112 |           "shell.execute_reply": "2020-08-09T21:00:56.802Z"
113 |         }
114 |       }
115 |     },
116 |     {
117 |       "cell_type": "code",
118 |       "source": [
119 |         "dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)\n",
120 |         "dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)\n",
121 |         "dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)\n",
122 |         "dataset['Returns'] = dataset['Adj Close'].pct_change()\n",
123 |         "dataset = dataset.dropna()"
124 |       ],
125 |       "outputs": [],
126 |       "execution_count": 3,
127 |       "metadata": {
128 |         "collapsed": true,
129 |         "jupyter": {
130 |           "source_hidden": false,
131 |           "outputs_hidden": false
132 |         },
133 |         "nteract": {
134 |           "transient": {
135 |             "deleting": false
136 |           }
137 |         },
138 |         "execution": {
139 |           "iopub.status.busy": "2020-08-09T21:00:56.741Z",
140 |           "iopub.execute_input": "2020-08-09T21:00:56.747Z",
141 |           "iopub.status.idle": "2020-08-09T21:00:56.761Z",
142 |           "shell.execute_reply": "2020-08-09T21:00:56.806Z"
143 |         }
144 |       }
145 |     },
146 |     {
147 |       "cell_type": "code",
148 |       "source": [
149 |         "from statsmodels.stats.outliers_influence import variance_inflation_factor"
150 |       ],
151 |       "outputs": [],
152 |       "execution_count": 4,
153 |       "metadata": {
154 |         "collapsed": true,
155 |         "jupyter": {
156 |           "source_hidden": false,
157 |           "outputs_hidden": false
158 |         },
159 |         "nteract": {
160 |           "transient": {
161 |             "deleting": false
162 |           }
163 |         },
164 |         "execution": {
165 |           "iopub.status.busy": "2020-08-09T21:00:56.774Z",
166 |           "iopub.execute_input": "2020-08-09T21:00:56.780Z",
167 |           "iopub.status.idle": "2020-08-09T21:00:57.253Z",
168 |           "shell.execute_reply": "2020-08-09T21:00:57.355Z"
169 |         }
170 |       }
171 |     },
172 |     {
173 |       "cell_type": "code",
174 |       "source": [
175 |         "X = dataset"
176 |       ],
177 |       "outputs": [],
178 |       "execution_count": 5,
179 |       "metadata": {
180 |         "collapsed": true,
181 |         "jupyter": {
182 |           "source_hidden": false,
183 |           "outputs_hidden": false
184 |         },
185 |         "nteract": {
186 |           "transient": {
187 |             "deleting": false
188 |           }
189 |         },
190 |         "execution": {
191 |           "iopub.status.busy": "2020-08-09T21:00:57.264Z",
192 |           "iopub.execute_input": "2020-08-09T21:00:57.271Z",
193 |           "iopub.status.idle": "2020-08-09T21:00:57.287Z",
194 |           "shell.execute_reply": "2020-08-09T21:00:57.358Z"
195 |         }
196 |       }
197 |     },
198 |     {
199 |       "cell_type": "code",
200 |       "source": [
201 |         "vif = pd.DataFrame()\n",
202 |         "vif[\"VIF Factor\"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]\n",
203 |         "vif[\"features\"] = X.columns"
204 |       ],
205 |       "outputs": [],
206 |       "execution_count": 6,
207 |       "metadata": {
208 |         "collapsed": true,
209 |         "jupyter": {
210 |           "source_hidden": false,
211 |           "outputs_hidden": false
212 |         },
213 |         "nteract": {
214 |           "transient": {
215 |             "deleting": false
216 |           }
217 |         },
218 |         "execution": {
219 |           "iopub.status.busy": "2020-08-09T21:00:57.302Z",
220 |           "iopub.execute_input": "2020-08-09T21:00:57.307Z",
221 |           "iopub.status.idle": "2020-08-09T21:00:57.316Z",
222 |           "shell.execute_reply": "2020-08-09T21:00:57.361Z"
223 |         }
224 |       }
225 |     },
226 |     {
227 |       "cell_type": "code",
228 |       "source": [
229 |         "vif"
230 |       ],
231 |       "outputs": [
232 |         {
233 |           "output_type": "execute_result",
234 |           "execution_count": 7,
235 |           "data": {
236 |             "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>VIF Factor</th>\n      <th>features</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>inf</td>\n      <td>Adj Close</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>inf</td>\n      <td>Close</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1.321329e+04</td>\n      <td>High</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>9.797402e+03</td>\n      <td>Low</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>8.486690e+03</td>\n      <td>Open</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>8.249018e+00</td>\n      <td>Volume</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>1.718758e+00</td>\n      <td>Increase_Decrease</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>2.503856e+00</td>\n      <td>Buy_Sell_on_Open</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>1.869328e+00</td>\n      <td>Buy_Sell</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>1.946329e+00</td>\n      <td>Returns</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
237 |             "text/plain": "     VIF Factor           features\n0           inf          Adj Close\n1           inf              Close\n2  1.321329e+04               High\n3  9.797402e+03                Low\n4  8.486690e+03               Open\n5  8.249018e+00             Volume\n6  1.718758e+00  Increase_Decrease\n7  2.503856e+00   Buy_Sell_on_Open\n8  1.869328e+00           Buy_Sell\n9  1.946329e+00            Returns"
238 |           },
239 |           "metadata": {}
240 |         }
241 |       ],
242 |       "execution_count": 7,
243 |       "metadata": {
244 |         "collapsed": true,
245 |         "jupyter": {
246 |           "source_hidden": false,
247 |           "outputs_hidden": false
248 |         },
249 |         "nteract": {
250 |           "transient": {
251 |             "deleting": false
252 |           }
253 |         },
254 |         "execution": {
255 |           "iopub.status.busy": "2020-08-09T21:00:57.323Z",
256 |           "iopub.execute_input": "2020-08-09T21:00:57.329Z",
257 |           "iopub.status.idle": "2020-08-09T21:00:57.341Z",
258 |           "shell.execute_reply": "2020-08-09T21:00:57.366Z"
259 |         }
260 |       }
261 |     }
262 |   ],
263 |   "metadata": {
264 |     "kernel_info": {
265 |       "name": "python3"
266 |     },
267 |     "language_info": {
268 |       "file_extension": ".py",
269 |       "name": "python",
270 |       "nbconvert_exporter": "python",
271 |       "version": "3.5.5",
272 |       "mimetype": "text/x-python",
273 |       "codemirror_mode": {
274 |         "version": 3,
275 |         "name": "ipython"
276 |       },
277 |       "pygments_lexer": "ipython3"
278 |     },
279 |     "kernelspec": {
280 |       "argv": [
281 |         "C:\\Users\\Tin Hang\\Anaconda3\\envs\\py35\\python.exe",
282 |         "-m",
283 |         "ipykernel_launcher",
284 |         "-f",
285 |         "{connection_file}"
286 |       ],
287 |       "display_name": "Python 3",
288 |       "language": "python",
289 |       "name": "python3"
290 |     },
291 |     "nteract": {
292 |       "version": "0.24.1"
293 |     }
294 |   },
295 |   "nbformat": 4,
296 |   "nbformat_minor": 0
297 | }


--------------------------------------------------------------------------------