├── 001_Pandas.ipynb ├── 002_Numpy.ipynb ├── 003_Matrix.ipynb ├── 004_Data_PreProcessing.ipynb ├── 005_Pre_Proccessing (Part_2).ipynb ├── 006_Data_Visualization.ipynb ├── 007_Understand_Data.ipynb ├── 008_Basic_Statistics.ipynb ├── ANOVA_F_value.ipynb ├── Array_Selection_Numpy.ipynb ├── Basic_Machine_Learning_Predicts.ipynb ├── Categorical_Continuous.ipynb ├── Chi_Squared.ipynb ├── Column_Selection_Pandas.ipynb ├── DL_Title.PNG ├── Data_Cleaning_for_Machine_Learning.ipynb ├── Descriptive_Statistics.ipynb ├── Discrete_Probability_Distributions.ipynb ├── Drop_Highly_Correlated_Features.ipynb ├── Feature_Importance_Classification.ipynb ├── Feature_Importance_Continuous.ipynb ├── Features_Analysis.ipynb ├── Features_Extraction.ipynb ├── Features_Extraction_with_PCA.ipynb ├── Features_Rank.ipynb ├── Features_Scores.ipynb ├── Features_Selections.ipynb ├── Features_Selections_Stock.ipynb ├── Features_Transformation.ipynb ├── In_Sample_Out_Sample.ipynb ├── LICENSE ├── Linear_Regression_Stock.ipynb ├── Logistic_Regression_Stock.ipynb ├── Metric.ipynb ├── Nested_Cross-Validation_Part2.ipynb ├── NetworkX.ipynb ├── Poisson_Regression.ipynb ├── Principal_Component_Analysis_(PCA).ipynb ├── Principal_Component_Analysis_(PCA)_Stock.ipynb ├── Probabilities.ipynb ├── README.md ├── Scaling_and_Transformations.ipynb ├── Split_Data.ipynb ├── Stationary_Check.ipynb ├── Stationary_Check_Part_2.ipynb ├── Stock_Algorithms ├── 30_Regression_Models.ipynb ├── ARIMA_Models.ipynb ├── AdaBoost_Classification.ipynb ├── AdaBoost_Regressor.ipynb ├── Addictive_Model.ipynb ├── Algorithms.PNG ├── Anomaly_Detection.ipynb ├── Anomaly_Detection_SVM.ipynb ├── Artificial_Neural_Network.ipynb ├── Automatic_Relevance_Determination_Regression.ipynb ├── Bagging_Classifier.ipynb ├── Basic_Machine_Learning_Predicts.ipynb ├── Basic_Machine_Learning_Predicts_Updates.ipynb ├── Basic_Regressions.ipynb ├── Bayesian_Ridge_Regression.ipynb ├── Bayesian_Ridge_Regression_Part2.ipynb ├── Bernoulli_Restricted_Boltzmann_Machine.ipynb ├── Calibrate_Predicted_Probabilities.ipynb ├── CatBoost_Algorithms.ipynb ├── CatBoost_Algorithms_Part2.ipynb ├── Classification_Cluster.ipynb ├── Classification_Cluster_2.ipynb ├── Classification_Cluster_3.ipynb ├── Convolutional_Neural_Network.ipynb ├── Convolutional_Neural_Networks_Keras.ipynb ├── Cox_Proportional_Hazards.ipynb ├── Decision_Tree_Classifier_Visualize.ipynb ├── Decision_Tree_Multioutput_Regression.ipynb ├── Decision_Trees_Classification.ipynb ├── Decision_Trees_Classification_Explained.ipynb ├── Decision_Trees_Classification_Part2.ipynb ├── Decision_Trees_Classification_Part3.ipynb ├── Decision_Trees_Classification_Part4.ipynb ├── Decision_Trees_Classification_Part5.ipynb ├── Decision_Trees_Regression.ipynb ├── Decision_Trees_Regression_Part2.ipynb ├── Deep_Belief_Networks.ipynb ├── ElasticNet_Regression.ipynb ├── Fast_Fourier_Transformations.ipynb ├── Fixed_Effects_Model.ipynb ├── Gaussian_Regression.ipynb ├── Genetic_Algorithm.ipynb ├── Genetic_Algorithm_Part2.ipynb ├── Gradient_Ascent.ipynb ├── Gradient_Boosting_Classification.ipynb ├── Gradient_Boosting_Machine_(GBM).ipynb ├── Gradient_Boosting_Regressor.ipynb ├── Hierarchical_Clustering.ipynb ├── Huber_Regression.ipynb ├── Huber_Regression_Part2.ipynb ├── Hyperparameter_Tuning.ipynb ├── Implementing_Logistic_Regression.ipynb ├── Isotonic_Regression.ipynb ├── Isotonic_Regression_Linear_Regression.ipynb ├── K_Means.ipynb ├── K_Means_Clustering.ipynb ├── K_Means_Clustering_Part2.ipynb ├── K_Nearest_Neighbors.ipynb ├── K_Nearest_Neighbors_Multioutput_Regression.ipynb ├── K_Nearest_Neighbors_Part2.ipynb ├── LSTM_Neural_Networks.ipynb ├── LSTM_RNN.ipynb ├── LSTM_RNN_Part2.ipynb ├── Lasso_Regression.ipynb ├── Lasso_Regression_Alpha_Levels.ipynb ├── Lasso_Regression_Part2.ipynb ├── Lasso_Ridge_Regression.ipynb ├── Least_Angled_Regression.ipynb ├── Least_Squares_Regression.ipynb ├── Leave_One_Out_Cross_Validation.ipynb ├── Light_GBM.ipynb ├── Linear_Discriminant_Analysis.ipynb ├── Linear_Discriminant_Analysis_Classification.ipynb ├── Linear_Regression.ipynb ├── Linear_Regression_Classification.ipynb ├── Linear_Regression_Continuous.ipynb ├── Linear_Regression_Multioutput_Regression.ipynb ├── Linear_Regression_Predict_Future_Price.ipynb ├── Linear_Regression_Prediction.ipynb ├── Linear_Regression_Prediction_Part2.ipynb ├── Linear_Regression_Prediction_Part3.ipynb ├── Linear_Regression_Using_Linear_Algebra.ipynb ├── Linear_Regression_with_Normalize_Data.ipynb ├── Locally_Estimated_Scatterplot_Smoothing.ipynb ├── Locally_Weighted_Scatterplot_Smoothing_LOWESS.ipynb ├── Logistic_Model.ipynb ├── Logistic_Regression.ipynb ├── Logistic_Regression_Classification.ipynb ├── Logistic_Regression_Classification_Part2.ipynb ├── Logistic_Regression_Classification_Part3.ipynb ├── Logistic_Regression_Classification_Part4.ipynb ├── Logistic_Regression_Large_Data.ipynb ├── Logistic_Regression_Part2.ipynb ├── Mini-Batch_k-Means_Clustering.ipynb ├── Model_Selection.ipynb ├── MultiOutputRegressor.ipynb ├── Multioutput_Regression_With_Cross-Validation.ipynb ├── Multiple_Linear_Regression.ipynb ├── Multiple_Linear_Regression_Part2.ipynb ├── Multiple_Linear_Regression_with_Normalize_Data.ipynb ├── Multivariate_Adaptive_Regression_Splines.ipynb ├── Multivariate_Adaptive_Regression_Splines_Part2.ipynb ├── Multivariate_relationships.ipynb ├── Naive_Bayes_Classification.ipynb ├── Naive_Bayes_Multinomial_Classification.ipynb ├── Nearest_Neighbor_Classification.ipynb ├── Nested_Cross-Validation.ipynb ├── Nested_Cross-Validation_Part2.ipynb ├── NetworkX.ipynb ├── NetworkX_Part2.ipynb ├── Neural_Network_ANN.ipynb ├── Neural_Network_Part2.ipynb ├── Neural_Networks_Classification.ipynb ├── Neural_Networks_Regression.ipynb ├── Non_Linear_Least_Squares_Curve_Fitting.ipynb ├── Optimization_Parameters.ipynb ├── Ordinal_Regression.ipynb ├── Partial_Least_Squares_Regression_(PLSR).ipynb ├── Passive_Aggressive_Classification.ipynb ├── Passive_Aggressive_Classifier.ipynb ├── Passive_Aggressive_Regression.ipynb ├── Perceptron_Algorithm.ipynb ├── Polynomial_Regression.ipynb ├── Polynomial_Regression_Part2.ipynb ├── Polynomial_Regression_Part3.ipynb ├── Principal_Component_Classification.ipynb ├── Principal_Component_Regression.ipynb ├── PyBrain_Dataset.ipynb ├── PyCaret_Stock_Prediction.ipynb ├── PyCaret_Stock_Prediction_Part2.ipynb ├── PyTorch_Linear_Regression.ipynb ├── PyTorch_Regression.ipynb ├── Pynamical_Prediction.ipynb ├── Quantile_Regression.ipynb ├── Quantile_Regression_Part2.ipynb ├── Quasi_Poisson_Regression.ipynb ├── Quasi_Poisson_Regression_Part2.ipynb ├── RANSAC_Regression.ipynb ├── README.md ├── RNN_Tensorflow.ipynb ├── Radius_Neighbors_Regressor.ipynb ├── Random_Forests_Classification.ipynb ├── Random_Forests_Classification_Part2.ipynb ├── Random_Forests_Multioutput_Regression.ipynb ├── Random_Forests_Regression.ipynb ├── Regressor_Chain.ipynb ├── Ridge_Regression.ipynb ├── Robust_Linear_Models.ipynb ├── SMOTE_Near_Miss_Algorithm.ipynb ├── SVC_Predicted_Probabilities.ipynb ├── Simple_Linear_Regression.ipynb ├── Simple_Linear_Regression_Part2.ipynb ├── Simple_Linear_Regression_with_Normalize_Data.ipynb ├── Simple_Multiple_Linear_Regression.ipynb ├── Stepwise_Regression_Backward.ipynb ├── Stepwise_Regression_Forward.ipynb ├── Stochastic_Gradient_Descent_Classification.ipynb ├── Stochastic_Gradient_Descent_Regression.ipynb ├── Stochastic_Gradient_Descent_Regression_Part2.ipynb ├── Support_Vector_Classifiers.ipynb ├── Support_Vector_Machine.ipynb ├── Support_Vector_Machine_Part2.ipynb ├── TensorFlow_LinearRegression2.ipynb ├── TensorFlow_LinearRegressionSingle.ipynb ├── TensorFlow_LinearRegression_Basic.ipynb ├── Theil_Sen_Regression.ipynb ├── Time_Series_Decomposition_Random_Walks.ipynb ├── Time_Series_Forecasting.ipynb ├── Time_Series_Forecasting_Model.ipynb ├── TruncatedSVD.ipynb ├── Tweedie_Regression.ipynb ├── XGBoost_Algorithms.ipynb ├── XGBoost_Classification.ipynb ├── XGBoost_Classification_Part_2.ipynb ├── XGBoost_Regression.ipynb ├── XGBoost_Regressor.ipynb ├── XGBoost_Regressor_Part_2.ipynb ├── scikit-learn_Prediction.ipynb ├── shap_prediction.ipynb ├── t_SNE.ipynb ├── t_SNE_Part2.ipynb └── t_SNE_Part3.ipynb ├── Stock_Apps ├── README.md ├── Stock_Apps.PNG ├── Stock_ML_Data_PreProcessing_Apps.py ├── Stock_ML_Feature_Selection_Apps.py ├── Stock_ML_Predict_Apps.py ├── Stock_ML_Predict_Apps_Menu.py └── Stock_ML_Rescale_Data_Apps.py ├── Tensorflow_Basics.ipynb ├── Title.PNG ├── Train_Test_Split.ipynb ├── Train_Validate_Test.ipynb ├── Underfitting_Overfitting_Check_Regression.ipynb ├── Understand_Data.ipynb └── Variance_Inflation_Factor.ipynb /004_Data_PreProcessing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "source": [ 6 | "# Data PreProcessing" 7 | ], 8 | "metadata": {} 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "source": [ 13 | "Step 1: Importing the libraries" 14 | ], 15 | "metadata": {} 16 | }, 17 | { 18 | "cell_type": "code", 19 | "source": [ 20 | "import numpy as np\n", 21 | "import pandas as pd\n", 22 | "\n", 23 | "import warnings\n", 24 | "warnings.filterwarnings(\"ignore\")\n", 25 | "\n", 26 | "# fix_yahoo_finance is used to fetch data \n", 27 | "import fix_yahoo_finance as yf\n", 28 | "yf.pdr_override()" 29 | ], 30 | "outputs": [], 31 | "execution_count": 1, 32 | "metadata": { 33 | "collapsed": false, 34 | "outputHidden": false, 35 | "inputHidden": false 36 | } 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "source": [ 41 | "Step 2: Importing dataset" 42 | ], 43 | "metadata": {} 44 | }, 45 | { 46 | "cell_type": "code", 47 | "source": [ 48 | "# input\n", 49 | "symbol = 'AMD'\n", 50 | "start = '2014-01-01'\n", 51 | "end = '2018-08-27'\n", 52 | "\n", 53 | "# Read data \n", 54 | "dataset = yf.download(symbol,start,end)\n", 55 | "\n", 56 | "# Only keep close columns \n", 57 | "dataset.head()" 58 | ], 59 | "outputs": [ 60 | { 61 | "output_type": "stream", 62 | "name": "stdout", 63 | "text": [ 64 | "[*********************100%***********************] 1 of 1 downloaded\n" 65 | ] 66 | }, 67 | { 68 | "output_type": "execute_result", 69 | "execution_count": 2, 70 | "data": { 71 | "text/plain": [ 72 | " Open High Low Close Adj Close Volume\n", 73 | "Date \n", 74 | "2014-01-02 3.85 3.98 3.84 3.95 3.95 20548400\n", 75 | "2014-01-03 3.98 4.00 3.88 4.00 4.00 22887200\n", 76 | "2014-01-06 4.01 4.18 3.99 4.13 4.13 42398300\n", 77 | "2014-01-07 4.19 4.25 4.11 4.18 4.18 42932100\n", 78 | "2014-01-08 4.23 4.26 4.14 4.18 4.18 30678700" 79 | ], 80 | "text/html": [ 81 | "
\n", 82 | "\n", 95 | "\n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | "
OpenHighLowCloseAdj CloseVolume
Date
2014-01-023.853.983.843.953.9520548400
2014-01-033.984.003.884.004.0022887200
2014-01-064.014.183.994.134.1342398300
2014-01-074.194.254.114.184.1842932100
2014-01-084.234.264.144.184.1830678700
\n", 164 | "
" 165 | ] 166 | }, 167 | "metadata": {} 168 | } 169 | ], 170 | "execution_count": 2, 171 | "metadata": { 172 | "collapsed": false, 173 | "outputHidden": false, 174 | "inputHidden": false 175 | } 176 | }, 177 | { 178 | "cell_type": "code", 179 | "source": [ 180 | "X = dataset.iloc[ : , :-1].values\n", 181 | "Y = dataset.iloc[ : , 3].values" 182 | ], 183 | "outputs": [], 184 | "execution_count": 3, 185 | "metadata": { 186 | "collapsed": false, 187 | "outputHidden": false, 188 | "inputHidden": false 189 | } 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "source": [ 194 | "Step 3: Handling the missing data" 195 | ], 196 | "metadata": {} 197 | }, 198 | { 199 | "cell_type": "code", 200 | "source": [ 201 | "from sklearn.preprocessing import Imputer\n", 202 | "imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\", axis = 0)\n", 203 | "imputer = imputer.fit(X[ : , 1:3])\n", 204 | "X[ : , 1:3] = imputer.transform(X[ : , 1:3])" 205 | ], 206 | "outputs": [], 207 | "execution_count": 6, 208 | "metadata": { 209 | "collapsed": false, 210 | "outputHidden": false, 211 | "inputHidden": false 212 | } 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "source": [ 217 | "Step 4: Encoding categorical data" 218 | ], 219 | "metadata": {} 220 | }, 221 | { 222 | "cell_type": "code", 223 | "source": [ 224 | "from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n", 225 | "labelencoder_X = LabelEncoder()\n", 226 | "X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])" 227 | ], 228 | "outputs": [], 229 | "execution_count": 7, 230 | "metadata": { 231 | "collapsed": false, 232 | "outputHidden": false, 233 | "inputHidden": false 234 | } 235 | }, 236 | { 237 | "cell_type": "code", 238 | "source": [ 239 | "# Creating a dummy variable\n", 240 | "onehotencoder = OneHotEncoder(categorical_features = [0])\n", 241 | "X = onehotencoder.fit_transform(X).toarray()\n", 242 | "labelencoder_Y = LabelEncoder()\n", 243 | "Y = labelencoder_Y.fit_transform(Y)" 244 | ], 245 | "outputs": [], 246 | "execution_count": 8, 247 | "metadata": { 248 | "collapsed": false, 249 | "outputHidden": false, 250 | "inputHidden": false 251 | } 252 | }, 253 | { 254 | "cell_type": "markdown", 255 | "source": [ 256 | "Step 5: Splitting the datasets into training sets and Test sets" 257 | ], 258 | "metadata": {} 259 | }, 260 | { 261 | "cell_type": "code", 262 | "source": [ 263 | "from sklearn.cross_validation import train_test_split\n", 264 | "X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)" 265 | ], 266 | "outputs": [ 267 | { 268 | "output_type": "stream", 269 | "name": "stderr", 270 | "text": [ 271 | "C:\\Users\\Tin Hang\\Anaconda3\\envs\\py35\\lib\\site-packages\\sklearn\\cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n", 272 | " \"This module will be removed in 0.20.\", DeprecationWarning)\n" 273 | ] 274 | } 275 | ], 276 | "execution_count": 9, 277 | "metadata": { 278 | "collapsed": false, 279 | "outputHidden": false, 280 | "inputHidden": false 281 | } 282 | }, 283 | { 284 | "cell_type": "markdown", 285 | "source": [ 286 | "Step 6: Feature Scaling" 287 | ], 288 | "metadata": {} 289 | }, 290 | { 291 | "cell_type": "code", 292 | "source": [ 293 | "from sklearn.preprocessing import StandardScaler\n", 294 | "sc_X = StandardScaler()\n", 295 | "X_train = sc_X.fit_transform(X_train)\n", 296 | "X_test = sc_X.fit_transform(X_test)" 297 | ], 298 | "outputs": [], 299 | "execution_count": 10, 300 | "metadata": { 301 | "collapsed": false, 302 | "outputHidden": false, 303 | "inputHidden": false 304 | } 305 | } 306 | ], 307 | "metadata": { 308 | "kernel_info": { 309 | "name": "python3" 310 | }, 311 | "language_info": { 312 | "file_extension": ".py", 313 | "nbconvert_exporter": "python", 314 | "version": "3.5.5", 315 | "mimetype": "text/x-python", 316 | "pygments_lexer": "ipython3", 317 | "codemirror_mode": { 318 | "version": 3, 319 | "name": "ipython" 320 | }, 321 | "name": "python" 322 | }, 323 | "kernelspec": { 324 | "name": "python3", 325 | "language": "python", 326 | "display_name": "Python 3" 327 | }, 328 | "nteract": { 329 | "version": "0.11.9" 330 | } 331 | }, 332 | "nbformat": 4, 333 | "nbformat_minor": 4 334 | } -------------------------------------------------------------------------------- /Basic_Machine_Learning_Predicts.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "source": [ 6 | "# Simple Linear Regression for stock using scikit-learn\n" 7 | ], 8 | "metadata": {} 9 | }, 10 | { 11 | "cell_type": "code", 12 | "source": [ 13 | "import pandas as pd\n", 14 | "import numpy as np\n", 15 | "import matplotlib.pyplot as plt\n", 16 | "import math\n", 17 | "import seaborn as sns\n", 18 | "%matplotlib inline\n", 19 | "\n", 20 | "import warnings\n", 21 | "warnings.filterwarnings(\"ignore\")\n", 22 | "\n", 23 | "import fix_yahoo_finance as yf\n", 24 | "yf.pdr_override()" 25 | ], 26 | "outputs": [], 27 | "execution_count": 1, 28 | "metadata": { 29 | "collapsed": false, 30 | "outputHidden": false, 31 | "inputHidden": false 32 | } 33 | }, 34 | { 35 | "cell_type": "code", 36 | "source": [ 37 | "stock = 'AAPL'\n", 38 | "start = '2016-01-01' \n", 39 | "end = '2018-01-01'\n", 40 | "data = yf.download(stock, start, end)\n", 41 | "data.head()" 42 | ], 43 | "outputs": [ 44 | { 45 | "output_type": "stream", 46 | "name": "stdout", 47 | "text": [ 48 | "[*********************100%***********************] 1 of 1 downloaded\n" 49 | ] 50 | }, 51 | { 52 | "output_type": "execute_result", 53 | "execution_count": 2, 54 | "data": { 55 | "text/plain": " Open High Low Close Adj Close \\\nDate \n2016-01-04 102.610001 105.370003 102.000000 105.349998 100.274513 \n2016-01-05 105.750000 105.849998 102.410004 102.709999 97.761681 \n2016-01-06 100.559998 102.370003 99.870003 100.699997 95.848511 \n2016-01-07 98.680000 100.129997 96.430000 96.449997 91.803276 \n2016-01-08 98.550003 99.110001 96.760002 96.959999 92.288696 \n\n Volume \nDate \n2016-01-04 67649400 \n2016-01-05 55791000 \n2016-01-06 68457400 \n2016-01-07 81094400 \n2016-01-08 70798000 ", 56 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
OpenHighLowCloseAdj CloseVolume
Date
2016-01-04102.610001105.370003102.000000105.349998100.27451367649400
2016-01-05105.750000105.849998102.410004102.70999997.76168155791000
2016-01-06100.559998102.37000399.870003100.69999795.84851168457400
2016-01-0798.680000100.12999796.43000096.44999791.80327681094400
2016-01-0898.55000399.11000196.76000296.95999992.28869670798000
\n
" 57 | }, 58 | "metadata": {} 59 | } 60 | ], 61 | "execution_count": 2, 62 | "metadata": { 63 | "collapsed": false, 64 | "outputHidden": false, 65 | "inputHidden": false 66 | } 67 | }, 68 | { 69 | "cell_type": "code", 70 | "source": [ 71 | "df = data.reset_index()\n", 72 | "df.head()" 73 | ], 74 | "outputs": [ 75 | { 76 | "output_type": "execute_result", 77 | "execution_count": 3, 78 | "data": { 79 | "text/plain": " Date Open High Low Close Adj Close \\\n0 2016-01-04 102.610001 105.370003 102.000000 105.349998 100.274513 \n1 2016-01-05 105.750000 105.849998 102.410004 102.709999 97.761681 \n2 2016-01-06 100.559998 102.370003 99.870003 100.699997 95.848511 \n3 2016-01-07 98.680000 100.129997 96.430000 96.449997 91.803276 \n4 2016-01-08 98.550003 99.110001 96.760002 96.959999 92.288696 \n\n Volume \n0 67649400 \n1 55791000 \n2 68457400 \n3 81094400 \n4 70798000 ", 80 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
DateOpenHighLowCloseAdj CloseVolume
02016-01-04102.610001105.370003102.000000105.349998100.27451367649400
12016-01-05105.750000105.849998102.410004102.70999997.76168155791000
22016-01-06100.559998102.37000399.870003100.69999795.84851168457400
32016-01-0798.680000100.12999796.43000096.44999791.80327681094400
42016-01-0898.55000399.11000196.76000296.95999992.28869670798000
\n
" 81 | }, 82 | "metadata": {} 83 | } 84 | ], 85 | "execution_count": 3, 86 | "metadata": { 87 | "collapsed": false, 88 | "outputHidden": false, 89 | "inputHidden": false 90 | } 91 | }, 92 | { 93 | "cell_type": "code", 94 | "source": [ 95 | "X = df.drop(['Date','Close'], axis=1, inplace=True)\n", 96 | "y = df[['Adj Close']]" 97 | ], 98 | "outputs": [], 99 | "execution_count": 4, 100 | "metadata": { 101 | "collapsed": false, 102 | "outputHidden": false, 103 | "inputHidden": false 104 | } 105 | }, 106 | { 107 | "cell_type": "code", 108 | "source": [ 109 | "df = df.as_matrix()" 110 | ], 111 | "outputs": [], 112 | "execution_count": 5, 113 | "metadata": { 114 | "collapsed": false, 115 | "outputHidden": false, 116 | "inputHidden": false 117 | } 118 | }, 119 | { 120 | "cell_type": "code", 121 | "source": [ 122 | "from sklearn.model_selection import train_test_split\n", 123 | "\n", 124 | "# Split X and y into X_\n", 125 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)" 126 | ], 127 | "outputs": [], 128 | "execution_count": 6, 129 | "metadata": { 130 | "collapsed": false, 131 | "outputHidden": false, 132 | "inputHidden": false 133 | } 134 | }, 135 | { 136 | "cell_type": "code", 137 | "source": [ 138 | "from sklearn.linear_model import LinearRegression\n", 139 | "\n", 140 | "regression_model = LinearRegression()\n", 141 | "regression_model.fit(X_train, y_train)" 142 | ], 143 | "outputs": [ 144 | { 145 | "output_type": "execute_result", 146 | "execution_count": 7, 147 | "data": { 148 | "text/plain": "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)" 149 | }, 150 | "metadata": {} 151 | } 152 | ], 153 | "execution_count": 7, 154 | "metadata": { 155 | "collapsed": false, 156 | "outputHidden": false, 157 | "inputHidden": false 158 | } 159 | }, 160 | { 161 | "cell_type": "code", 162 | "source": [ 163 | "intercept = regression_model.intercept_[0]\n", 164 | "\n", 165 | "print(\"The intercept for our model is {}\".format(intercept))" 166 | ], 167 | "outputs": [ 168 | { 169 | "output_type": "stream", 170 | "name": "stdout", 171 | "text": [ 172 | "The intercept for our model is -1.2047109976265347e-09\n" 173 | ] 174 | } 175 | ], 176 | "execution_count": 8, 177 | "metadata": { 178 | "collapsed": false, 179 | "outputHidden": false, 180 | "inputHidden": false 181 | } 182 | }, 183 | { 184 | "cell_type": "code", 185 | "source": [ 186 | "regression_model.score(X_test, y_test)" 187 | ], 188 | "outputs": [ 189 | { 190 | "output_type": "execute_result", 191 | "execution_count": 9, 192 | "data": { 193 | "text/plain": "1.0" 194 | }, 195 | "metadata": {} 196 | } 197 | ], 198 | "execution_count": 9, 199 | "metadata": { 200 | "collapsed": false, 201 | "outputHidden": false, 202 | "inputHidden": false 203 | } 204 | }, 205 | { 206 | "cell_type": "code", 207 | "source": [ 208 | "from sklearn.metrics import mean_squared_error\n", 209 | "\n", 210 | "y_predict = regression_model.predict(X_test)\n", 211 | "\n", 212 | "regression_model_mse = mean_squared_error(y_predict, y_test)\n", 213 | "\n", 214 | "regression_model_mse" 215 | ], 216 | "outputs": [ 217 | { 218 | "output_type": "execute_result", 219 | "execution_count": 10, 220 | "data": { 221 | "text/plain": "2.8264629110010686e-19" 222 | }, 223 | "metadata": {} 224 | } 225 | ], 226 | "execution_count": 10, 227 | "metadata": { 228 | "collapsed": false, 229 | "outputHidden": false, 230 | "inputHidden": false 231 | } 232 | }, 233 | { 234 | "cell_type": "code", 235 | "source": [ 236 | "math.sqrt(regression_model_mse)" 237 | ], 238 | "outputs": [ 239 | { 240 | "output_type": "execute_result", 241 | "execution_count": 11, 242 | "data": { 243 | "text/plain": "5.316448919157475e-10" 244 | }, 245 | "metadata": {} 246 | } 247 | ], 248 | "execution_count": 11, 249 | "metadata": { 250 | "collapsed": false, 251 | "outputHidden": false, 252 | "inputHidden": false 253 | } 254 | }, 255 | { 256 | "cell_type": "code", 257 | "source": [ 258 | "# input the latest Open, High, Low, Close, Volume\n", 259 | "# predicts the next day price\n", 260 | "regression_model.predict([[167.81, 171.75, 165.19, 166.48, 37232900]])" 261 | ], 262 | "outputs": [ 263 | { 264 | "output_type": "execute_result", 265 | "execution_count": 12, 266 | "data": { 267 | "text/plain": "array([[166.48]])" 268 | }, 269 | "metadata": {} 270 | } 271 | ], 272 | "execution_count": 12, 273 | "metadata": { 274 | "collapsed": false, 275 | "outputHidden": false, 276 | "inputHidden": false 277 | } 278 | } 279 | ], 280 | "metadata": { 281 | "kernel_info": { 282 | "name": "python3" 283 | }, 284 | "kernelspec": { 285 | "name": "python3", 286 | "language": "python", 287 | "display_name": "Python 3" 288 | }, 289 | "language_info": { 290 | "file_extension": ".py", 291 | "pygments_lexer": "ipython3", 292 | "version": "3.5.5", 293 | "mimetype": "text/x-python", 294 | "codemirror_mode": { 295 | "version": 3, 296 | "name": "ipython" 297 | }, 298 | "name": "python", 299 | "nbconvert_exporter": "python" 300 | }, 301 | "nteract": { 302 | "version": "0.28.0" 303 | } 304 | }, 305 | "nbformat": 4, 306 | "nbformat_minor": 4 307 | } -------------------------------------------------------------------------------- /DL_Title.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LastAncientOne/Deep_Learning_Machine_Learning_Stock/e31e9fc689cd2cb6d74a8f2694750afd6e13bcec/DL_Title.PNG -------------------------------------------------------------------------------- /Data_Cleaning_for_Machine_Learning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "source": [ 6 | "# Data Cleaning for Machine Learning with Python" 7 | ], 8 | "metadata": { 9 | "nteract": { 10 | "transient": { 11 | "deleting": false 12 | } 13 | } 14 | } 15 | }, 16 | { 17 | "cell_type": "code", 18 | "source": [ 19 | "import numpy as np\n", 20 | "import matplotlib.pyplot as plt\n", 21 | "import pandas as pd\n", 22 | "\n", 23 | "import warnings\n", 24 | "warnings.filterwarnings(\"ignore\")\n", 25 | "\n", 26 | "# fetch yahoo data\n", 27 | "import yfinance as yf\n", 28 | "yf.pdr_override()" 29 | ], 30 | "outputs": [], 31 | "execution_count": 1, 32 | "metadata": { 33 | "collapsed": true, 34 | "jupyter": { 35 | "source_hidden": false, 36 | "outputs_hidden": false 37 | }, 38 | "nteract": { 39 | "transient": { 40 | "deleting": false 41 | } 42 | }, 43 | "execution": { 44 | "iopub.status.busy": "2020-08-21T01:15:24.606Z", 45 | "iopub.execute_input": "2020-08-21T01:15:24.613Z", 46 | "iopub.status.idle": "2020-08-21T01:15:25.604Z", 47 | "shell.execute_reply": "2020-08-21T01:15:25.631Z" 48 | } 49 | } 50 | }, 51 | { 52 | "cell_type": "code", 53 | "source": [ 54 | "# input\n", 55 | "symbol = 'AMD'\n", 56 | "start = '2014-01-01'\n", 57 | "end = '2018-08-27'\n", 58 | "\n", 59 | "# Read data \n", 60 | "dataset = yf.download(symbol,start,end)\n", 61 | "\n", 62 | "# Only keep close columns \n", 63 | "dataset.head()" 64 | ], 65 | "outputs": [ 66 | { 67 | "output_type": "stream", 68 | "name": "stdout", 69 | "text": [ 70 | "[*********************100%***********************] 1 of 1 completed\n" 71 | ] 72 | }, 73 | { 74 | "output_type": "execute_result", 75 | "execution_count": 2, 76 | "data": { 77 | "text/plain": " Adj Close Close High Low Open Volume\nDate \n2014-01-02 3.95 3.95 3.98 3.84 3.85 20548400\n2014-01-03 4.00 4.00 4.00 3.88 3.98 22887200\n2014-01-06 4.13 4.13 4.18 3.99 4.01 42398300\n2014-01-07 4.18 4.18 4.25 4.11 4.19 42932100\n2014-01-08 4.18 4.18 4.26 4.14 4.23 30678700", 78 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Adj CloseCloseHighLowOpenVolume
Date
2014-01-023.953.953.983.843.8520548400
2014-01-034.004.004.003.883.9822887200
2014-01-064.134.134.183.994.0142398300
2014-01-074.184.184.254.114.1942932100
2014-01-084.184.184.264.144.2330678700
\n
" 79 | }, 80 | "metadata": {} 81 | } 82 | ], 83 | "execution_count": 2, 84 | "metadata": { 85 | "collapsed": true, 86 | "jupyter": { 87 | "source_hidden": false, 88 | "outputs_hidden": false 89 | }, 90 | "nteract": { 91 | "transient": { 92 | "deleting": false 93 | } 94 | }, 95 | "execution": { 96 | "iopub.status.busy": "2020-08-21T01:15:25.614Z", 97 | "iopub.execute_input": "2020-08-21T01:15:25.621Z", 98 | "iopub.status.idle": "2020-08-21T01:15:26.860Z", 99 | "shell.execute_reply": "2020-08-21T01:15:27.073Z" 100 | } 101 | } 102 | }, 103 | { 104 | "cell_type": "code", 105 | "source": [ 106 | "dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)\n", 107 | "dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)\n", 108 | "dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)\n", 109 | "dataset['Returns'] = dataset['Adj Close'].pct_change()\n", 110 | "dataset = dataset.dropna()" 111 | ], 112 | "outputs": [], 113 | "execution_count": 3, 114 | "metadata": { 115 | "collapsed": true, 116 | "jupyter": { 117 | "source_hidden": false, 118 | "outputs_hidden": false 119 | }, 120 | "nteract": { 121 | "transient": { 122 | "deleting": false 123 | } 124 | }, 125 | "execution": { 126 | "iopub.status.busy": "2020-08-21T01:15:26.872Z", 127 | "iopub.execute_input": "2020-08-21T01:15:26.882Z", 128 | "iopub.status.idle": "2020-08-21T01:15:26.899Z", 129 | "shell.execute_reply": "2020-08-21T01:15:27.079Z" 130 | } 131 | } 132 | }, 133 | { 134 | "cell_type": "code", 135 | "source": [ 136 | "# summarize the number of unique values in each column\n", 137 | "print(dataset.nunique())" 138 | ], 139 | "outputs": [ 140 | { 141 | "output_type": "stream", 142 | "name": "stdout", 143 | "text": [ 144 | "Adj Close 657\n", 145 | "Close 657\n", 146 | "High 644\n", 147 | "Low 626\n", 148 | "Open 638\n", 149 | "Volume 1168\n", 150 | "Increase_Decrease 2\n", 151 | "Buy_Sell_on_Open 2\n", 152 | "Buy_Sell 2\n", 153 | "Returns 1078\n", 154 | "dtype: int64\n" 155 | ] 156 | } 157 | ], 158 | "execution_count": 4, 159 | "metadata": { 160 | "collapsed": true, 161 | "jupyter": { 162 | "source_hidden": false, 163 | "outputs_hidden": false 164 | }, 165 | "nteract": { 166 | "transient": { 167 | "deleting": false 168 | } 169 | }, 170 | "execution": { 171 | "iopub.status.busy": "2020-08-21T01:15:26.915Z", 172 | "iopub.execute_input": "2020-08-21T01:15:26.922Z", 173 | "iopub.status.idle": "2020-08-21T01:15:26.938Z", 174 | "shell.execute_reply": "2020-08-21T01:15:27.085Z" 175 | } 176 | } 177 | }, 178 | { 179 | "cell_type": "code", 180 | "source": [ 181 | "print(dataset.shape)" 182 | ], 183 | "outputs": [ 184 | { 185 | "output_type": "stream", 186 | "name": "stdout", 187 | "text": [ 188 | "(1170, 10)\n" 189 | ] 190 | } 191 | ], 192 | "execution_count": 5, 193 | "metadata": { 194 | "collapsed": true, 195 | "jupyter": { 196 | "source_hidden": false, 197 | "outputs_hidden": false 198 | }, 199 | "nteract": { 200 | "transient": { 201 | "deleting": false 202 | } 203 | }, 204 | "execution": { 205 | "iopub.status.busy": "2020-08-21T01:15:26.950Z", 206 | "iopub.execute_input": "2020-08-21T01:15:26.957Z", 207 | "iopub.status.idle": "2020-08-21T01:15:26.977Z", 208 | "shell.execute_reply": "2020-08-21T01:15:27.100Z" 209 | } 210 | } 211 | }, 212 | { 213 | "cell_type": "code", 214 | "source": [ 215 | "counts = dataset.nunique()\n", 216 | "to_del = [i for i,v in enumerate(counts) if v == 1]\n", 217 | "print(to_del)" 218 | ], 219 | "outputs": [ 220 | { 221 | "output_type": "stream", 222 | "name": "stdout", 223 | "text": [ 224 | "[]\n" 225 | ] 226 | } 227 | ], 228 | "execution_count": 6, 229 | "metadata": { 230 | "collapsed": true, 231 | "jupyter": { 232 | "source_hidden": false, 233 | "outputs_hidden": false 234 | }, 235 | "nteract": { 236 | "transient": { 237 | "deleting": false 238 | } 239 | }, 240 | "execution": { 241 | "iopub.status.busy": "2020-08-21T01:15:26.988Z", 242 | "iopub.execute_input": "2020-08-21T01:15:26.993Z", 243 | "iopub.status.idle": "2020-08-21T01:15:27.007Z", 244 | "shell.execute_reply": "2020-08-21T01:15:27.105Z" 245 | } 246 | } 247 | }, 248 | { 249 | "cell_type": "code", 250 | "source": [ 251 | "# drop useless columns\n", 252 | "dataset.drop(to_del, axis=1, inplace=True)\n", 253 | "print(dataset.shape)" 254 | ], 255 | "outputs": [ 256 | { 257 | "output_type": "stream", 258 | "name": "stdout", 259 | "text": [ 260 | "(1170, 10)\n" 261 | ] 262 | } 263 | ], 264 | "execution_count": 7, 265 | "metadata": { 266 | "collapsed": true, 267 | "jupyter": { 268 | "source_hidden": false, 269 | "outputs_hidden": false 270 | }, 271 | "nteract": { 272 | "transient": { 273 | "deleting": false 274 | } 275 | }, 276 | "execution": { 277 | "iopub.status.busy": "2020-08-21T01:15:27.016Z", 278 | "iopub.execute_input": "2020-08-21T01:15:27.022Z", 279 | "iopub.status.idle": "2020-08-21T01:15:27.036Z", 280 | "shell.execute_reply": "2020-08-21T01:15:27.109Z" 281 | } 282 | } 283 | } 284 | ], 285 | "metadata": { 286 | "kernel_info": { 287 | "name": "python3" 288 | }, 289 | "language_info": { 290 | "mimetype": "text/x-python", 291 | "codemirror_mode": { 292 | "name": "ipython", 293 | "version": 3 294 | }, 295 | "name": "python", 296 | "version": "3.5.5", 297 | "pygments_lexer": "ipython3", 298 | "nbconvert_exporter": "python", 299 | "file_extension": ".py" 300 | }, 301 | "kernelspec": { 302 | "argv": [ 303 | "C:\\Users\\Tin Hang\\Anaconda3\\envs\\py35\\python.exe", 304 | "-m", 305 | "ipykernel_launcher", 306 | "-f", 307 | "{connection_file}" 308 | ], 309 | "display_name": "Python 3", 310 | "language": "python", 311 | "name": "python3" 312 | }, 313 | "nteract": { 314 | "version": "0.24.1" 315 | } 316 | }, 317 | "nbformat": 4, 318 | "nbformat_minor": 0 319 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 LastAncientOne 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | [![Contributors][contributors-shield]][contributors-url] 3 | [![Forks][forks-shield]][forks-url] 4 | [![Stargazers][stars-shield]][stars-url] 5 | [![Issues][issues-shield]][issues-url] 6 | [![MIT License][license-shield]][license-url] 7 | [![LinkedIn][linkedin-shield]][linkedin-url] 8 | 9 | Buy Me A Coffee 10 | 11 | 12 | 13 | [contributors-shield]: https://img.shields.io/github/contributors/LastAncientOne/Deep-Learning-Machine-Learning-Stock.svg?style=for-the-badge 14 | [contributors-url]: https://github.com/LastAncientOne/Deep-Learning-Machine-Learning-Stock/graphs/contributors 15 | [forks-shield]: https://img.shields.io/github/forks/LastAncientOne/Deep-Learning-Machine-Learning-Stock.svg?style=for-the-badge 16 | [forks-url]: https://github.com/LastAncientOne/Deep-Learning-Machine-Learning-Stock/network/members 17 | [stars-shield]: https://img.shields.io/github/stars/LastAncientOne/Deep-Learning-Machine-Learning-Stock.svg?style=for-the-badge 18 | [stars-url]: https://github.com/LastAncientOne/Deep-Learning-Machine-Learning-Stock/stargazers 19 | [issues-shield]: https://img.shields.io/github/issues/LastAncientOne/Deep-Learning-Machine-Learning-Stock.svg?style=for-the-badge 20 | [issues-url]: https://github.com/LastAncientOne/Deep-Learning-Machine-Learning-Stock/issues 21 | [license-shield]: https://img.shields.io/github/license/LastAncientOne/Deep-Learning-Machine-Learning-Stock.svg?style=for-the-badge 22 | [license-url]: LICENSE 23 | [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555 24 | [linkedin-url]: https://linkedin.com/in/tin-hang 25 | 26 | 27 | 28 | 29 |

Deep Learning and Machine Learning for Stock Predictions

30 | 31 | Description: This is a comprehensive study and analysis of stocks using deep learning (DL) and machine learning (ML) techniques. Both machine learning and deep learning are types of artificial intelligence (AI). The objective is to predict stock behavior by employing various machine learning and deep learning algorithms. The focus is on experimenting with stock data to understand how and why certain methods are effective, as well as identifying reasons for their potential limitations. Different stock strategies are explored within the context of machine learning and deep learning. Technical Analysis and Fundamental Analysis are utilized to predict future stock prices using these AI techniques, encompassing both long-term and short-term predictions. 32 | 33 | Machine learning is a branch of artificial intelligence that involves the development of algorithms capable of automatically adapting and generating outputs by processing structured data. On the other hand, deep learning is a subset of machine learning that employs similar algorithms but with additional layers of complexity, enabling different interpretations of the data. The network of algorithms used in deep learning is known as artificial neural networks, which mimic the interconnectedness of neural pathways in the human brain. 34 | 35 | Deep learning and machine learning are powerful approaches that have revolutionized the AI landscape. Understanding the fundamentals of these techniques and the commonly used algorithms is essential for aspiring data scientists and AI enthusiasts. Regression, as a fundamental concept in predictive modeling, plays a crucial role in analyzing and predicting continuous variables. By harnessing the capabilities of these algorithms and techniques, we can unlock incredible potential in various domains, leading to advancements and improvements in numerous industries. 36 | 37 | ### Machine Learning Step-by-Step 38 | 1. Collecting/Gathering Data. 39 | 2. Preparing the Data - load data and prepare it for the machine learning training. 40 | 3. Choosing a Model. 41 | 4. Training the Model. 42 | 5. Evaluating the Model. 43 | 6. Parameter Tuning. 44 | 7. Make a Predictions. 45 | 46 | ### Deep Learning Model Step-by-Step 47 | 1. Define the Model. 48 | 2. Complie the Model. 49 | 3. Fit the Model with training dataset. 50 | 4. Make a Predictions. 51 | 52 |

Programming Languages and Tools:

53 |

python Nteract Anaconda Spyder Jupyter Notebook Notepad++

54 | 55 | ### Three main types of data: Categorical, Discrete, and Continuous variables 56 | 1. Categorical variable(Qualitative): Label data or distinct groups. 57 | Example: location, gender, material type, payment, highest level of education 58 | 2. Discrete variable (Class Data): Numerica variables but the data is countable number of values between any two values. 59 | Example: customer complaints or number of flaws or defects, Children per Household, age (number of years) 60 | 3. Continuous variable (Quantitative): Numeric variables that have an infinite number of values between any two values. 61 | Example: length of a part or the date and time a payment is received, running distance, age (infinitly accurate and use an infinite number of decimal places) 62 | 63 | ### Data Use 64 | 1. For 'Quantitative data' is used with all three centre measures (mean, median and mode) and all spread measures. 65 | 2. For 'Class data' is used with median and mode. 66 | 3. For 'Qualitative data' is for only with mode. 67 | 68 | ### Two types of problems: 69 | 1. Classification (predict label) 70 | 2. Regression (predict values) 71 | 72 | ### Bias-Variance Tradeoff 73 | #### Bias 74 | - Bias is the difference between our actual and predicted values. 75 | - Bias is the simple assumptions that our model makes about our data to be able to predict new data. 76 | - Assumptions made by a model to make a function easier to learn. 77 | #### Variance 78 | - Variance is opposite of bias. 79 | - Variance is variability of model prediction for a given data point or a value that tells us the spread of our data. 80 | - If you train your data on training data and obtain a very low error, upon changing the data and then training the same. 81 | 82 | ### Overfitting, Underfitting, and the bias-variance tradeoff 83 | Overfitted is when the model memorizes the noise and fits too closely to the training set. Good fit is a model that learns the training dataset and genernalizes well with the old out dataset. Underfitting is when it cannot establish the dominant trend within the data; as a result, in training errors and poor performance of the model. 84 | 85 | #### Overfitting: 86 | Overfitting model is a good model with the training data that fit or at lease with near each observation; however, the model mist the point and random noise is capture inside the model. The model have low training error and high CV error, low in-sample error and high out-of-sample error, and high variance. 87 | 1. High Train Accuracy 88 | 2. Low Test Accuracy 89 | #### Avoiding Overfitting: 90 | 1. Early stopping - stop the training before the model starts learning the noise within the model. 91 | 2. Training with more data - adding more data will increase the accuracy of the modelor can help algorithms detect the signal better. 92 | 3. Data augmentation - add clean and relevant data into training data. 93 | 4. Feature selection - Use important features within the data. Remove features. 94 | 5. Regularization - reduce features by using regularization methods such as L1 regularization, Lasso regularization, and dropout. 95 | 6. Ensemble methods - combine predictions from multiple separate models such as bagging and boosting. 96 | 7. Increase training data. 97 | #### Good fit: 98 | 1. High Train Accuracy 99 | 2. High Test Accuracy 100 | #### Underfitting: 101 | Underfitting model is not perfect, so it does not capture the underlying logic of the data. Therefore, the model does not have strong predictive power with low accuracy. The model have large training set error, large in-sample error, and high bias. 102 | 1. Low Train Accuracy 103 | 2. Low Test Accuracy 104 | #### Avoiding Underfitting: 105 | 1. Decrease regularization - reduce the variance with a model by applying a penalty to the input parameters with the larger coefficients such as L1 regularization, Lasso regularization, dropout, etc. 106 | 2. Increase the duration of training - extending the duration of training because stopping the training early will cause underfit model. 107 | 3. Feature selection - not enough predictive features present, then adding more features or features with greater importance would improve the model. 108 | 4. Increase the number of features - performing feature engineering 109 | 5. Remove noise from the data 110 | 111 | 112 | ## Python Reviews 113 | Step 1 through step 8 is a review on python. 114 | After step 8, everything you need to know is relates to data analysis, data engineering, data science, machine learning, and deep learning. 115 | Here the link to python tutorial: 116 | [Python Tutorial for Stock Analysis](https://github.com/LastAncientOne/SimpleStockAnalysisPython) 117 | 118 | 119 | ## List of Machine Learning Algorithms for Stock Trading 120 | ### Most Common Regression Algorithms 121 | 1. Linear Regression Model 122 | 2. Logistic Regression 123 | 3. Lasso Regression 124 | 4. Support Vector Machines 125 | 5. Polynomial Regression 126 | 6. Stepwise Regression 127 | 7. Ridge Regression 128 | 8. Multivariate Regression Algorithm 129 | 9. Multiple Regression Algorithm 130 | 10. K Means Clustering Algorithm 131 | 11. Naïve Bayes Classifier Algorithm 132 | 12. Random Forests 133 | 13. Decision Trees 134 | 14. Nearest Neighbours 135 | 15. Lasso Regression 136 | 16. ElasticNet Regression 137 | 17. Reinforcement Learning 138 | 18. Artificial Intelligence 139 | 19. MultiModal Network 140 | 20. Biologic Intelligence 141 | 142 | ### Different Types of Machine Learning Algorithms and Models 143 | Algorithms are processes and sets of instructions used to solve a class of problems. Additionally, algorithms perform computations such as calculations, data processing, automated reasoning, and other tasks. A machine learning algorithm is a method that enables systems to learn and improve automatically from experience, without the need for explicit formulation. 144 | 145 | # Prerequistes 146 | Python 3.5+ 147 | Jupyter Notebook Python 3 148 | Windows 7 or Windows 10 149 | 150 | ### Download Software 151 | https://www.python.org/ 152 | 153 |

Programming Language:

154 |

python 155 | 156 |

Tools:

157 |

Anaconda Spyder Jupyter Notebook Notepad++ Notepad++

158 | 159 | 160 | 161 | ## Authors 162 | ### Tin Hang 163 | 164 | ## Disclaimer 165 | 🔻 Do not use this code for investing or trading in the stock market. However, if you are interest in the stock market, you should read :books: books that relate to stock market, investment, or finance. On the other hand, if you into quant or machine learning, read books about 📘 machine trading, algorithmic trading, and quantitative trading. You should read 📗 about Machine Learning and Deep Learning to understand the concept, theory, and the mathematics. On the other hand, you should read academic paper and do research online about machine learning and deep learning on :computer: 166 | 167 | ### Certain portions of the code may encounter issues stemming from updates or obsolescence within specific library packages. Consequently, adjustments will be necessary, contingent upon the Python package library employed. It may be imperative to either upgrade or downgrade certain libraries accordingly. 168 | 169 | ## 🔴 Warning: This is not financial advice; it should not be relied upon for investment or trading decisions, as it is for educational purposes only. 170 | -------------------------------------------------------------------------------- /Stock_Algorithms/30_Regression_Models.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "source": [ 6 | "# 30 Regression Models" 7 | ], 8 | "metadata": { 9 | "nteract": { 10 | "transient": { 11 | "deleting": false 12 | } 13 | } 14 | } 15 | }, 16 | { 17 | "cell_type": "code", 18 | "source": [ 19 | "from lazypredict.Supervised import LazyRegressor\n", 20 | "from pandas.plotting import scatter_matrix# Scikit-learn packages\n", 21 | "from sklearn.linear_model import LinearRegression\n", 22 | "from sklearn.tree import DecisionTreeRegressor\n", 23 | "from sklearn.ensemble import ExtraTreesRegressor\n", 24 | "from sklearn import metrics\n", 25 | "from sklearn.metrics import mean_squared_error# Hide warnings\n", 26 | "from sklearn.model_selection import train_test_split \n", 27 | "\n", 28 | "import warnings\n", 29 | "warnings.filterwarnings(\"ignore\")\n", 30 | "\n", 31 | "import yfinance as yf\n", 32 | "yf.pdr_override()" 33 | ], 34 | "outputs": [ 35 | { 36 | "output_type": "stream", 37 | "name": "stderr", 38 | "text": [ 39 | "C:\\Users\\Tin Hang\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\deprecation.py:143: FutureWarning: The sklearn.utils.testing module is deprecated in version 0.22 and will be removed in version 0.24. The corresponding classes / functions should instead be imported from sklearn.utils. Anything that cannot be imported from sklearn.utils is now part of the private API.\n", 40 | " warnings.warn(message, FutureWarning)\n" 41 | ] 42 | } 43 | ], 44 | "execution_count": 1, 45 | "metadata": { 46 | "collapsed": true, 47 | "jupyter": { 48 | "source_hidden": false, 49 | "outputs_hidden": false 50 | }, 51 | "nteract": { 52 | "transient": { 53 | "deleting": false 54 | } 55 | }, 56 | "execution": { 57 | "shell.execute_reply": "2021-04-28T00:38:36.736Z", 58 | "iopub.status.busy": "2021-04-28T00:38:34.815Z", 59 | "iopub.execute_input": "2021-04-28T00:38:34.824Z", 60 | "iopub.status.idle": "2021-04-28T00:38:36.720Z" 61 | } 62 | } 63 | }, 64 | { 65 | "cell_type": "code", 66 | "source": [ 67 | "# input\n", 68 | "symbol = 'AMD'\n", 69 | "start = '2014-01-01'\n", 70 | "end = '2018-08-27'\n", 71 | "\n", 72 | "# Read data \n", 73 | "dataset = yf.download(symbol,start,end)\n", 74 | "\n", 75 | "# Only keep close columns \n", 76 | "dataset.head()" 77 | ], 78 | "outputs": [ 79 | { 80 | "output_type": "stream", 81 | "name": "stdout", 82 | "text": [ 83 | "[*********************100%***********************] 1 of 1 completed\n" 84 | ] 85 | }, 86 | { 87 | "output_type": "execute_result", 88 | "execution_count": 2, 89 | "data": { 90 | "text/plain": " Open High Low Close Adj Close Volume\nDate \n2014-01-02 3.85 3.98 3.84 3.95 3.95 20548400\n2014-01-03 3.98 4.00 3.88 4.00 4.00 22887200\n2014-01-06 4.01 4.18 3.99 4.13 4.13 42398300\n2014-01-07 4.19 4.25 4.11 4.18 4.18 42932100\n2014-01-08 4.23 4.26 4.14 4.18 4.18 30678700", 91 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
OpenHighLowCloseAdj CloseVolume
Date
2014-01-023.853.983.843.953.9520548400
2014-01-033.984.003.884.004.0022887200
2014-01-064.014.183.994.134.1342398300
2014-01-074.194.254.114.184.1842932100
2014-01-084.234.264.144.184.1830678700
\n
" 92 | }, 93 | "metadata": {} 94 | } 95 | ], 96 | "execution_count": 2, 97 | "metadata": { 98 | "collapsed": true, 99 | "jupyter": { 100 | "source_hidden": false, 101 | "outputs_hidden": false 102 | }, 103 | "nteract": { 104 | "transient": { 105 | "deleting": false 106 | } 107 | }, 108 | "execution": { 109 | "iopub.status.busy": "2021-04-28T00:38:36.725Z", 110 | "iopub.execute_input": "2021-04-28T00:38:36.729Z", 111 | "iopub.status.idle": "2021-04-28T00:38:37.064Z", 112 | "shell.execute_reply": "2021-04-28T00:38:37.060Z" 113 | } 114 | } 115 | }, 116 | { 117 | "cell_type": "code", 118 | "source": [ 119 | "# Creating train test split\n", 120 | "X = dataset.drop(columns=['Adj Close'])\n", 121 | "y = dataset['Adj Close']\n", 122 | "\n", 123 | "offset = int(X.shape[0] * 0.9)\n", 124 | "\n", 125 | "X_train, y_train = X[:offset], y[:offset]\n", 126 | "X_test, y_test = X[offset:], y[offset:]" 127 | ], 128 | "outputs": [], 129 | "execution_count": 3, 130 | "metadata": { 131 | "collapsed": true, 132 | "jupyter": { 133 | "source_hidden": false, 134 | "outputs_hidden": false 135 | }, 136 | "nteract": { 137 | "transient": { 138 | "deleting": false 139 | } 140 | }, 141 | "execution": { 142 | "iopub.status.busy": "2021-04-28T00:38:37.069Z", 143 | "iopub.execute_input": "2021-04-28T00:38:37.071Z", 144 | "iopub.status.idle": "2021-04-28T00:38:37.076Z", 145 | "shell.execute_reply": "2021-04-28T00:38:37.092Z" 146 | } 147 | } 148 | }, 149 | { 150 | "cell_type": "code", 151 | "source": [ 152 | "reg = LazyRegressor(verbose=0, ignore_warnings=False, custom_metric=None)\n", 153 | "models, predictions = reg.fit(X_train, X_test, y_train, y_test)\n", 154 | "print(models)" 155 | ], 156 | "outputs": [ 157 | { 158 | "output_type": "stream", 159 | "name": "stderr", 160 | "text": [ 161 | "100%|██████████| 42/42 [00:02<00:00, 17.49it/s]\n" 162 | ] 163 | }, 164 | { 165 | "output_type": "stream", 166 | "name": "stdout", 167 | "text": [ 168 | " Adjusted R-Squared R-Squared RMSE Time Taken\n", 169 | "Model \n", 170 | "RANSACRegressor 1.00 1.00 0.00 0.01\n", 171 | "Lars 1.00 1.00 0.00 0.01\n", 172 | "HuberRegressor 1.00 1.00 0.00 0.05\n", 173 | "LassoLarsCV 1.00 1.00 0.00 0.01\n", 174 | "LassoLarsIC 1.00 1.00 0.00 0.01\n", 175 | "LinearRegression 1.00 1.00 0.00 0.01\n", 176 | "TransformedTargetRegressor 1.00 1.00 0.00 0.01\n", 177 | "LarsCV 1.00 1.00 0.00 0.01\n", 178 | "OrthogonalMatchingPursuit 1.00 1.00 0.00 0.01\n", 179 | "OrthogonalMatchingPursuitCV 1.00 1.00 0.00 0.01\n", 180 | "BayesianRidge 1.00 1.00 0.00 0.01\n", 181 | "LinearSVR 1.00 1.00 0.00 0.02\n", 182 | "RidgeCV 1.00 1.00 0.03 0.01\n", 183 | "Ridge 1.00 1.00 0.10 0.01\n", 184 | "PassiveAggressiveRegressor 1.00 1.00 0.11 0.01\n", 185 | "LassoCV 1.00 1.00 0.11 0.05\n", 186 | "ElasticNetCV 1.00 1.00 0.17 0.05\n", 187 | "SGDRegressor 1.00 1.00 0.21 0.01\n", 188 | "MLPRegressor 1.00 1.00 0.21 0.79\n", 189 | "GeneralizedLinearRegressor 0.70 0.71 1.87 0.01\n", 190 | "TweedieRegressor 0.70 0.71 1.87 0.01\n", 191 | "ElasticNet 0.67 0.68 1.97 0.01\n", 192 | "Lasso 0.63 0.64 2.09 0.01\n", 193 | "GradientBoostingRegressor 0.62 0.64 2.11 0.10\n", 194 | "XGBRegressor 0.61 0.63 2.13 0.06\n", 195 | "ExtraTreesRegressor 0.59 0.61 2.19 0.15\n", 196 | "DecisionTreeRegressor 0.57 0.59 2.23 0.01\n", 197 | "BaggingRegressor 0.57 0.59 2.23 0.03\n", 198 | "RandomForestRegressor 0.57 0.59 2.24 0.21\n", 199 | "ExtraTreeRegressor 0.46 0.49 2.50 0.01\n", 200 | "KNeighborsRegressor 0.46 0.48 2.52 0.01\n", 201 | "LGBMRegressor 0.45 0.47 2.54 0.05\n", 202 | "HistGradientBoostingRegressor 0.45 0.47 2.54 0.42\n", 203 | "AdaBoostRegressor 0.26 0.29 2.94 0.03\n", 204 | "NuSVR -0.24 -0.19 3.81 0.06\n", 205 | "SVR -0.28 -0.22 3.86 0.02\n", 206 | "GammaRegressor -0.35 -0.29 3.97 0.01\n", 207 | "GaussianProcessRegressor -1.86 -1.74 5.78 0.07\n", 208 | "KernelRidge -2.29 -2.15 6.20 0.04\n", 209 | "PoissonRegressor -3.20 -3.02 7.01 0.01\n", 210 | "DummyRegressor -5.70 -5.42 8.85 0.01\n", 211 | "LassoLars -5.70 -5.42 8.85 0.01\n" 212 | ] 213 | } 214 | ], 215 | "execution_count": 4, 216 | "metadata": { 217 | "collapsed": true, 218 | "jupyter": { 219 | "source_hidden": false, 220 | "outputs_hidden": false 221 | }, 222 | "nteract": { 223 | "transient": { 224 | "deleting": false 225 | } 226 | }, 227 | "execution": { 228 | "iopub.status.busy": "2021-04-28T00:38:37.080Z", 229 | "iopub.execute_input": "2021-04-28T00:38:37.084Z", 230 | "iopub.status.idle": "2021-04-28T00:38:39.506Z", 231 | "shell.execute_reply": "2021-04-28T00:38:39.501Z" 232 | } 233 | } 234 | } 235 | ], 236 | "metadata": { 237 | "kernel_info": { 238 | "name": "python3" 239 | }, 240 | "language_info": { 241 | "name": "python", 242 | "version": "3.6.12", 243 | "mimetype": "text/x-python", 244 | "codemirror_mode": { 245 | "name": "ipython", 246 | "version": 3 247 | }, 248 | "pygments_lexer": "ipython3", 249 | "nbconvert_exporter": "python", 250 | "file_extension": ".py" 251 | }, 252 | "kernelspec": { 253 | "argv": [ 254 | "C:/Users/Tin Hang/Anaconda3\\python.exe", 255 | "-m", 256 | "ipykernel_launcher", 257 | "-f", 258 | "{connection_file}" 259 | ], 260 | "display_name": "Python 3", 261 | "language": "python", 262 | "name": "python3" 263 | }, 264 | "nteract": { 265 | "version": "0.28.0" 266 | } 267 | }, 268 | "nbformat": 4, 269 | "nbformat_minor": 0 270 | } -------------------------------------------------------------------------------- /Stock_Algorithms/Algorithms.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LastAncientOne/Deep_Learning_Machine_Learning_Stock/e31e9fc689cd2cb6d74a8f2694750afd6e13bcec/Stock_Algorithms/Algorithms.PNG -------------------------------------------------------------------------------- /Stock_Algorithms/Genetic_Algorithm.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "source": [ 6 | "# Genetic Algorithm" 7 | ], 8 | "metadata": { 9 | "nteract": { 10 | "transient": { 11 | "deleting": false 12 | } 13 | } 14 | } 15 | }, 16 | { 17 | "cell_type": "code", 18 | "source": [ 19 | "import numpy as np\n", 20 | "import matplotlib.pyplot as plt\n", 21 | "import pandas as pd\n", 22 | "\n", 23 | "# yahoo finance is used to fetch data \n", 24 | "import yfinance as yf\n", 25 | "yf.pdr_override()" 26 | ], 27 | "outputs": [], 28 | "execution_count": 1, 29 | "metadata": { 30 | "collapsed": true, 31 | "jupyter": { 32 | "source_hidden": false, 33 | "outputs_hidden": false 34 | }, 35 | "nteract": { 36 | "transient": { 37 | "deleting": false 38 | } 39 | }, 40 | "execution": { 41 | "iopub.status.busy": "2023-10-31T22:01:03.828Z", 42 | "iopub.execute_input": "2023-10-31T22:01:03.832Z", 43 | "shell.execute_reply": "2023-10-31T22:01:04.394Z", 44 | "iopub.status.idle": "2023-10-31T22:01:04.399Z" 45 | } 46 | } 47 | }, 48 | { 49 | "cell_type": "code", 50 | "source": [ 51 | "# input\n", 52 | "symbol = 'AMD'\n", 53 | "start = '2014-01-01'\n", 54 | "end = '2019-01-01'\n", 55 | "\n", 56 | "# Read data \n", 57 | "dataset = yf.download(symbol,start,end)" 58 | ], 59 | "outputs": [ 60 | { 61 | "output_type": "stream", 62 | "name": "stdout", 63 | "text": [ 64 | "[*********************100%***********************] 1 of 1 completed\n" 65 | ] 66 | } 67 | ], 68 | "execution_count": 2, 69 | "metadata": { 70 | "collapsed": true, 71 | "jupyter": { 72 | "source_hidden": false, 73 | "outputs_hidden": false 74 | }, 75 | "nteract": { 76 | "transient": { 77 | "deleting": false 78 | } 79 | }, 80 | "execution": { 81 | "iopub.status.busy": "2023-10-31T22:01:04.404Z", 82 | "iopub.execute_input": "2023-10-31T22:01:04.406Z", 83 | "shell.execute_reply": "2023-10-31T22:01:04.798Z", 84 | "iopub.status.idle": "2023-10-31T22:01:04.805Z" 85 | } 86 | } 87 | }, 88 | { 89 | "cell_type": "code", 90 | "source": [ 91 | "dataset['Open_Close'] = (dataset['Open'] - dataset['Adj Close'])/dataset['Open']\n", 92 | "dataset['High_Low'] = (dataset['High'] - dataset['Low'])/dataset['Low']\n", 93 | "dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)\n", 94 | "dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)\n", 95 | "dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)\n", 96 | "dataset['Returns'] = dataset['Adj Close'].pct_change()\n", 97 | "dataset = dataset.dropna()\n", 98 | "dataset.head()\n", 99 | "\n", 100 | "# View Columns\n", 101 | "dataset.head()" 102 | ], 103 | "outputs": [ 104 | { 105 | "output_type": "execute_result", 106 | "execution_count": 3, 107 | "data": { 108 | "text/plain": " Open High Low Close Adj Close Volume Open_Close \\\nDate \n2014-01-03 3.98 4.00 3.88 4.00 4.00 22887200 -0.005025 \n2014-01-06 4.01 4.18 3.99 4.13 4.13 42398300 -0.029925 \n2014-01-07 4.19 4.25 4.11 4.18 4.18 42932100 0.002387 \n2014-01-08 4.23 4.26 4.14 4.18 4.18 30678700 0.011820 \n2014-01-09 4.20 4.23 4.05 4.09 4.09 30667600 0.026190 \n\n High_Low Increase_Decrease Buy_Sell_on_Open Buy_Sell Returns \nDate \n2014-01-03 0.030928 1 1 1 0.012658 \n2014-01-06 0.047619 1 1 1 0.032500 \n2014-01-07 0.034063 0 1 0 0.012106 \n2014-01-08 0.028986 0 0 0 0.000000 \n2014-01-09 0.044444 0 0 1 -0.021531 ", 109 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
OpenHighLowCloseAdj CloseVolumeOpen_CloseHigh_LowIncrease_DecreaseBuy_Sell_on_OpenBuy_SellReturns
Date
2014-01-033.984.003.884.004.0022887200-0.0050250.0309281110.012658
2014-01-064.014.183.994.134.1342398300-0.0299250.0476191110.032500
2014-01-074.194.254.114.184.18429321000.0023870.0340630100.012106
2014-01-084.234.264.144.184.18306787000.0118200.0289860000.000000
2014-01-094.204.234.054.094.09306676000.0261900.044444001-0.021531
\n
" 110 | }, 111 | "metadata": {} 112 | } 113 | ], 114 | "execution_count": 3, 115 | "metadata": { 116 | "collapsed": true, 117 | "jupyter": { 118 | "source_hidden": false, 119 | "outputs_hidden": false 120 | }, 121 | "nteract": { 122 | "transient": { 123 | "deleting": false 124 | } 125 | }, 126 | "execution": { 127 | "iopub.status.busy": "2023-10-31T22:01:04.810Z", 128 | "iopub.execute_input": "2023-10-31T22:01:04.815Z", 129 | "iopub.status.idle": "2023-10-31T22:01:04.835Z", 130 | "shell.execute_reply": "2023-10-31T22:01:04.859Z" 131 | } 132 | } 133 | }, 134 | { 135 | "cell_type": "code", 136 | "source": [ 137 | "# Define a placeholder for the fitness function (you should implement this)\n", 138 | "def fitness_function(individual):\n", 139 | " # Calculate the fitness of the individual\n", 140 | " # You should define your fitness function based on your problem\n", 141 | " return 0 # Replace this with your actual fitness calculation\n", 142 | "\n", 143 | "def genetic_algorithm(population_size, num_generations, mutation_rate):\n", 144 | " population = np.random.randint(2, size=(population_size, 4)) # Initialize the population\n", 145 | "\n", 146 | " for generation in range(num_generations):\n", 147 | " fitness_scores = [fitness_function(individual) for individual in population]\n", 148 | " best_individual = population[np.argmax(fitness_scores)]\n", 149 | " best_fitness = max(fitness_scores)\n", 150 | "\n", 151 | " if generation % 10 == 0:\n", 152 | " print(f\"Generation {generation}: Best Fitness = {best_fitness}\")\n", 153 | "\n", 154 | " # Create a new population using mutation and crossover\n", 155 | " new_population = []\n", 156 | "\n", 157 | " for _ in range(population_size):\n", 158 | " parent1 = population[np.random.choice(range(population_size))]\n", 159 | " parent2 = population[np.random.choice(range(population_size))]\n", 160 | " crossover_point = np.random.randint(4)\n", 161 | " child = np.zeros(4) # Initialize child as an array of zeros\n", 162 | " child[:crossover_point] = parent1[:crossover_point]\n", 163 | " child[crossover_point:] = parent2[crossover_point:]\n", 164 | " # Apply mutation\n", 165 | " mutation_mask = (np.random.rand(4) < mutation_rate).astype(int)\n", 166 | " child = (child + mutation_mask) % 2\n", 167 | " new_population.append(child)\n", 168 | "\n", 169 | " population = np.array(new_population)\n", 170 | "\n", 171 | " return best_individual, best_fitness" 172 | ], 173 | "outputs": [], 174 | "execution_count": 4, 175 | "metadata": { 176 | "collapsed": true, 177 | "jupyter": { 178 | "source_hidden": false, 179 | "outputs_hidden": false 180 | }, 181 | "nteract": { 182 | "transient": { 183 | "deleting": false 184 | } 185 | }, 186 | "execution": { 187 | "iopub.status.busy": "2023-10-31T22:01:04.840Z", 188 | "iopub.execute_input": "2023-10-31T22:01:04.842Z", 189 | "iopub.status.idle": "2023-10-31T22:01:04.848Z", 190 | "shell.execute_reply": "2023-10-31T22:01:04.863Z" 191 | } 192 | } 193 | }, 194 | { 195 | "cell_type": "code", 196 | "source": [ 197 | "# Set genetic algorithm parameters\n", 198 | "population_size = 100\n", 199 | "num_generations = 100\n", 200 | "mutation_rate = 0.01\n", 201 | "\n", 202 | "# Run the genetic algorithm\n", 203 | "best_individual, best_fitness = genetic_algorithm(population_size, num_generations, mutation_rate)\n", 204 | "\n", 205 | "print(\"Best Individual:\", best_individual)\n", 206 | "print(\"Best Fitness:\", best_fitness)" 207 | ], 208 | "outputs": [ 209 | { 210 | "output_type": "stream", 211 | "name": "stdout", 212 | "text": [ 213 | "Generation 0: Best Fitness = 0\n", 214 | "Generation 10: Best Fitness = 0\n", 215 | "Generation 20: Best Fitness = 0\n", 216 | "Generation 30: Best Fitness = 0\n", 217 | "Generation 40: Best Fitness = 0\n", 218 | "Generation 50: Best Fitness = 0\n", 219 | "Generation 60: Best Fitness = 0\n", 220 | "Generation 70: Best Fitness = 0\n", 221 | "Generation 80: Best Fitness = 0\n", 222 | "Generation 90: Best Fitness = 0\n", 223 | "Best Individual: [1. 1. 1. 1.]\n", 224 | "Best Fitness: 0\n" 225 | ] 226 | } 227 | ], 228 | "execution_count": 5, 229 | "metadata": { 230 | "collapsed": true, 231 | "jupyter": { 232 | "source_hidden": false, 233 | "outputs_hidden": false 234 | }, 235 | "nteract": { 236 | "transient": { 237 | "deleting": false 238 | } 239 | }, 240 | "execution": { 241 | "iopub.status.busy": "2023-10-31T22:01:04.852Z", 242 | "iopub.execute_input": "2023-10-31T22:01:04.855Z", 243 | "iopub.status.idle": "2023-10-31T22:01:05.294Z", 244 | "shell.execute_reply": "2023-10-31T22:01:05.301Z" 245 | } 246 | } 247 | } 248 | ], 249 | "metadata": { 250 | "kernel_info": { 251 | "name": "python3" 252 | }, 253 | "language_info": { 254 | "name": "python", 255 | "version": "3.7.6", 256 | "mimetype": "text/x-python", 257 | "codemirror_mode": { 258 | "name": "ipython", 259 | "version": 3 260 | }, 261 | "pygments_lexer": "ipython3", 262 | "nbconvert_exporter": "python", 263 | "file_extension": ".py" 264 | }, 265 | "kernelspec": { 266 | "argv": [ 267 | "C:/Users/Tin Hang/anaconda3\\python.exe", 268 | "-m", 269 | "ipykernel_launcher", 270 | "-f", 271 | "{connection_file}" 272 | ], 273 | "display_name": "Python 3", 274 | "language": "python", 275 | "name": "python3" 276 | }, 277 | "nteract": { 278 | "version": "0.28.0" 279 | } 280 | }, 281 | "nbformat": 4, 282 | "nbformat_minor": 0 283 | } -------------------------------------------------------------------------------- /Stock_Algorithms/Gradient_Boosting_Regressor.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "source": [ 6 | "# Gradient Boosting Regressor" 7 | ], 8 | "metadata": {} 9 | }, 10 | { 11 | "cell_type": "code", 12 | "source": [ 13 | "import numpy as np\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import pandas as pd\n", 16 | "\n", 17 | "import warnings\n", 18 | "warnings.filterwarnings(\"ignore\")\n", 19 | "\n", 20 | "# fix_yahoo_finance is used to fetch data \n", 21 | "import fix_yahoo_finance as yf\n", 22 | "yf.pdr_override()" 23 | ], 24 | "outputs": [], 25 | "execution_count": null, 26 | "metadata": { 27 | "collapsed": false, 28 | "outputHidden": false, 29 | "inputHidden": false 30 | } 31 | }, 32 | { 33 | "cell_type": "code", 34 | "source": [ 35 | "# input\n", 36 | "symbol = 'AMD'\n", 37 | "start = '2007-01-01'\n", 38 | "end = '2018-11-16'\n", 39 | "\n", 40 | "# Read data \n", 41 | "dataset = yf.download(symbol,start,end)\n", 42 | "\n", 43 | "# View Columns\n", 44 | "dataset.head()" 45 | ], 46 | "outputs": [], 47 | "execution_count": null, 48 | "metadata": { 49 | "collapsed": false, 50 | "outputHidden": false, 51 | "inputHidden": false 52 | } 53 | }, 54 | { 55 | "cell_type": "code", 56 | "source": [ 57 | "dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)\n", 58 | "dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)\n", 59 | "dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)\n", 60 | "dataset['Returns'] = dataset['Adj Close'].pct_change()\n", 61 | "dataset = dataset.dropna()\n", 62 | "dataset.head()" 63 | ], 64 | "outputs": [], 65 | "execution_count": null, 66 | "metadata": { 67 | "collapsed": false, 68 | "outputHidden": false, 69 | "inputHidden": false 70 | } 71 | }, 72 | { 73 | "cell_type": "code", 74 | "source": [ 75 | "X = dataset[['Open', 'High', 'Low', 'Volume']].values\n", 76 | "y = dataset['Adj Close'].values" 77 | ], 78 | "outputs": [], 79 | "execution_count": null, 80 | "metadata": { 81 | "collapsed": false, 82 | "outputHidden": false, 83 | "inputHidden": false 84 | } 85 | }, 86 | { 87 | "cell_type": "code", 88 | "source": [ 89 | "# from sklearn.cross_validation import train_test_split\n", 90 | "from sklearn.model_selection import train_test_split\n", 91 | "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 1/4, random_state = 0) " 92 | ], 93 | "outputs": [], 94 | "execution_count": null, 95 | "metadata": { 96 | "collapsed": false, 97 | "outputHidden": false, 98 | "inputHidden": false 99 | } 100 | }, 101 | { 102 | "cell_type": "code", 103 | "source": [ 104 | "from sklearn.ensemble import GradientBoostingRegressor\n", 105 | "\n", 106 | "gb = GradientBoostingRegressor(max_depth=4, \n", 107 | " n_estimators=200,\n", 108 | " random_state=2)" 109 | ], 110 | "outputs": [], 111 | "execution_count": null, 112 | "metadata": { 113 | "collapsed": false, 114 | "outputHidden": false, 115 | "inputHidden": false 116 | } 117 | }, 118 | { 119 | "cell_type": "code", 120 | "source": [ 121 | "# Fit gb to the training set\n", 122 | "gb.fit(X_train, y_train)\n", 123 | "\n", 124 | "# Predict test set labels\n", 125 | "y_pred = gb.predict(X_test)" 126 | ], 127 | "outputs": [], 128 | "execution_count": null, 129 | "metadata": { 130 | "collapsed": false, 131 | "outputHidden": false, 132 | "inputHidden": false 133 | } 134 | }, 135 | { 136 | "cell_type": "code", 137 | "source": [ 138 | "from sklearn.metrics import mean_squared_error as MSE\n", 139 | "\n", 140 | "# Compute MSE\n", 141 | "mse_test = MSE(y_test, y_pred)\n", 142 | "\n", 143 | "# Compute RMSE\n", 144 | "rmse_test = mse_test**(1/2)\n", 145 | "\n", 146 | "# Print RMSE\n", 147 | "print('Test set RMSE of gb: {:.3f}'.format(rmse_test))" 148 | ], 149 | "outputs": [], 150 | "execution_count": null, 151 | "metadata": { 152 | "collapsed": false, 153 | "outputHidden": false, 154 | "inputHidden": false 155 | } 156 | } 157 | ], 158 | "metadata": { 159 | "kernel_info": { 160 | "name": "python3" 161 | }, 162 | "language_info": { 163 | "pygments_lexer": "ipython3", 164 | "version": "3.5.5", 165 | "codemirror_mode": { 166 | "version": 3, 167 | "name": "ipython" 168 | }, 169 | "nbconvert_exporter": "python", 170 | "name": "python", 171 | "mimetype": "text/x-python", 172 | "file_extension": ".py" 173 | }, 174 | "kernelspec": { 175 | "name": "python3", 176 | "language": "python", 177 | "display_name": "Python 3" 178 | }, 179 | "nteract": { 180 | "version": "0.12.2" 181 | } 182 | }, 183 | "nbformat": 4, 184 | "nbformat_minor": 4 185 | } -------------------------------------------------------------------------------- /Stock_Algorithms/Hyperparameter_Tuning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "source": [ 6 | "# Hyperparameter Tuning" 7 | ], 8 | "metadata": {} 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "source": [ 13 | "In machine learning, a hyperparameter is a parameter whose value is set before the learning process begins. By contrast, the values of other parameters are derived via training. Different model training algorithms require different hyperparameters, some simple algorithms require none. (Wikipedia)\n", 14 | "\n", 15 | "\n", 16 | "Hyperparameter tuning optimize is used for a single target variable and is called \"hypermeter metric.\" \n", 17 | "\n", 18 | "A model hyperparameter is a model that has value cannot be estimated from data.\n" 19 | ], 20 | "metadata": {} 21 | }, 22 | { 23 | "cell_type": "code", 24 | "source": [ 25 | "import numpy as np\n", 26 | "import pandas as pd\n", 27 | "import matplotlib.pyplot as plt\n", 28 | "\n", 29 | "import warnings\n", 30 | "warnings.filterwarnings(\"ignore\")\n", 31 | "\n", 32 | "# yahoo finance is used to fetch data \n", 33 | "import yfinance as yf\n", 34 | "yf.pdr_override()" 35 | ], 36 | "outputs": [], 37 | "execution_count": 1, 38 | "metadata": { 39 | "collapsed": false, 40 | "outputHidden": false, 41 | "inputHidden": false 42 | } 43 | }, 44 | { 45 | "cell_type": "code", 46 | "source": [ 47 | "# input\n", 48 | "symbol = 'AMD'\n", 49 | "start = '2014-01-01'\n", 50 | "end = '2018-08-27'\n", 51 | "\n", 52 | "# Read data \n", 53 | "dataset = yf.download(symbol,start,end)\n", 54 | "\n", 55 | "# View Columns\n", 56 | "dataset.head()" 57 | ], 58 | "outputs": [ 59 | { 60 | "output_type": "stream", 61 | "name": "stdout", 62 | "text": [ 63 | "[*********************100%***********************] 1 of 1 downloaded\n" 64 | ] 65 | }, 66 | { 67 | "output_type": "execute_result", 68 | "execution_count": 2, 69 | "data": { 70 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
OpenHighLowCloseAdj CloseVolume
Date
2014-01-023.853.983.843.953.9520548400
2014-01-033.984.003.884.004.0022887200
2014-01-064.014.183.994.134.1342398300
2014-01-074.194.254.114.184.1842932100
2014-01-084.234.264.144.184.1830678700
\n
", 71 | "text/plain": " Open High Low Close Adj Close Volume\nDate \n2014-01-02 3.85 3.98 3.84 3.95 3.95 20548400\n2014-01-03 3.98 4.00 3.88 4.00 4.00 22887200\n2014-01-06 4.01 4.18 3.99 4.13 4.13 42398300\n2014-01-07 4.19 4.25 4.11 4.18 4.18 42932100\n2014-01-08 4.23 4.26 4.14 4.18 4.18 30678700" 72 | }, 73 | "metadata": {} 74 | } 75 | ], 76 | "execution_count": 2, 77 | "metadata": { 78 | "collapsed": false, 79 | "outputHidden": false, 80 | "inputHidden": false 81 | } 82 | }, 83 | { 84 | "cell_type": "code", 85 | "source": [ 86 | "dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)\n", 87 | "dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)\n", 88 | "dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)\n", 89 | "dataset['Returns'] = dataset['Adj Close'].pct_change()\n", 90 | "dataset = dataset.dropna()\n", 91 | "dataset.head()" 92 | ], 93 | "outputs": [ 94 | { 95 | "output_type": "execute_result", 96 | "execution_count": 3, 97 | "data": { 98 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
OpenHighLowCloseAdj CloseVolumeIncrease_DecreaseBuy_Sell_on_OpenBuy_SellReturns
Date
2014-01-033.984.003.884.004.00228872001110.012658
2014-01-064.014.183.994.134.13423983001110.032500
2014-01-074.194.254.114.184.18429321000100.012107
2014-01-084.234.264.144.184.18306787000000.000000
2014-01-094.204.234.054.094.0930667600001-0.021531
\n
", 99 | "text/plain": " Open High Low Close Adj Close Volume Increase_Decrease \\\nDate \n2014-01-03 3.98 4.00 3.88 4.00 4.00 22887200 1 \n2014-01-06 4.01 4.18 3.99 4.13 4.13 42398300 1 \n2014-01-07 4.19 4.25 4.11 4.18 4.18 42932100 0 \n2014-01-08 4.23 4.26 4.14 4.18 4.18 30678700 0 \n2014-01-09 4.20 4.23 4.05 4.09 4.09 30667600 0 \n\n Buy_Sell_on_Open Buy_Sell Returns \nDate \n2014-01-03 1 1 0.012658 \n2014-01-06 1 1 0.032500 \n2014-01-07 1 0 0.012107 \n2014-01-08 0 0 0.000000 \n2014-01-09 0 1 -0.021531 " 100 | }, 101 | "metadata": {} 102 | } 103 | ], 104 | "execution_count": 3, 105 | "metadata": { 106 | "collapsed": false, 107 | "outputHidden": false, 108 | "inputHidden": false 109 | } 110 | }, 111 | { 112 | "cell_type": "code", 113 | "source": [ 114 | "X = np.array(dataset['Open']).reshape(-1, 1)\n", 115 | "y = np.array(dataset['Buy_Sell']).reshape(-1, 1)" 116 | ], 117 | "outputs": [], 118 | "execution_count": 4, 119 | "metadata": { 120 | "collapsed": false, 121 | "outputHidden": false, 122 | "inputHidden": false 123 | } 124 | }, 125 | { 126 | "cell_type": "code", 127 | "source": [ 128 | "# Create logistic regression\n", 129 | "from sklearn import linear_model\n", 130 | "\n", 131 | "logistic = linear_model.LogisticRegression()" 132 | ], 133 | "outputs": [], 134 | "execution_count": 5, 135 | "metadata": { 136 | "collapsed": false, 137 | "outputHidden": false, 138 | "inputHidden": false 139 | } 140 | }, 141 | { 142 | "cell_type": "code", 143 | "source": [ 144 | "# Create logistic regression\n", 145 | "logistic = linear_model.LogisticRegression()" 146 | ], 147 | "outputs": [], 148 | "execution_count": 6, 149 | "metadata": { 150 | "collapsed": false, 151 | "outputHidden": false, 152 | "inputHidden": false 153 | } 154 | }, 155 | { 156 | "cell_type": "code", 157 | "source": [ 158 | "# Create regularization penalty space\n", 159 | "penalty = ['l1', 'l2']\n", 160 | "\n", 161 | "# Create regularization hyperparameter space\n", 162 | "C = np.logspace(0, 4, 10)\n", 163 | "\n", 164 | "# Create hyperparameter options\n", 165 | "hyperparameters = dict(C=C, penalty=penalty)" 166 | ], 167 | "outputs": [], 168 | "execution_count": 7, 169 | "metadata": { 170 | "collapsed": false, 171 | "outputHidden": false, 172 | "inputHidden": false 173 | } 174 | }, 175 | { 176 | "cell_type": "code", 177 | "source": [ 178 | "# Create grid search using 5-fold cross validation\n", 179 | "from sklearn.model_selection import GridSearchCV\n", 180 | "\n", 181 | "clf = GridSearchCV(logistic, hyperparameters, cv=5, verbose=0)" 182 | ], 183 | "outputs": [], 184 | "execution_count": 8, 185 | "metadata": { 186 | "collapsed": false, 187 | "outputHidden": false, 188 | "inputHidden": false 189 | } 190 | }, 191 | { 192 | "cell_type": "code", 193 | "source": [ 194 | "# Fit grid search\n", 195 | "best_model = clf.fit(X, y)" 196 | ], 197 | "outputs": [], 198 | "execution_count": 9, 199 | "metadata": { 200 | "collapsed": false, 201 | "outputHidden": false, 202 | "inputHidden": false 203 | } 204 | }, 205 | { 206 | "cell_type": "code", 207 | "source": [ 208 | "# View best hyperparameters\n", 209 | "print('Best Penalty:', best_model.best_estimator_.get_params()['penalty'])\n", 210 | "print('Best C:', best_model.best_estimator_.get_params()['C'])" 211 | ], 212 | "outputs": [ 213 | { 214 | "output_type": "stream", 215 | "name": "stdout", 216 | "text": [ 217 | "Best Penalty: l1\n", 218 | "Best C: 1.0\n" 219 | ] 220 | } 221 | ], 222 | "execution_count": 10, 223 | "metadata": { 224 | "collapsed": false, 225 | "outputHidden": false, 226 | "inputHidden": false 227 | } 228 | }, 229 | { 230 | "cell_type": "code", 231 | "source": [ 232 | "# Predict target vector\n", 233 | "best_model.predict(X)" 234 | ], 235 | "outputs": [ 236 | { 237 | "output_type": "execute_result", 238 | "execution_count": 11, 239 | "data": { 240 | "text/plain": "array([0, 0, 0, ..., 1, 1, 1])" 241 | }, 242 | "metadata": {} 243 | } 244 | ], 245 | "execution_count": 11, 246 | "metadata": { 247 | "collapsed": false, 248 | "outputHidden": false, 249 | "inputHidden": false 250 | } 251 | } 252 | ], 253 | "metadata": { 254 | "kernel_info": { 255 | "name": "python3" 256 | }, 257 | "language_info": { 258 | "codemirror_mode": { 259 | "version": 3, 260 | "name": "ipython" 261 | }, 262 | "name": "python", 263 | "file_extension": ".py", 264 | "mimetype": "text/x-python", 265 | "nbconvert_exporter": "python", 266 | "version": "3.5.5", 267 | "pygments_lexer": "ipython3" 268 | }, 269 | "kernelspec": { 270 | "name": "python3", 271 | "language": "python", 272 | "display_name": "Python 3" 273 | }, 274 | "nteract": { 275 | "version": "0.28.0" 276 | } 277 | }, 278 | "nbformat": 4, 279 | "nbformat_minor": 4 280 | } -------------------------------------------------------------------------------- /Stock_Algorithms/K_Means_Clustering_Part2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "source": [ 6 | "# K Means Clustering Part 2" 7 | ], 8 | "metadata": { 9 | "nteract": { 10 | "transient": { 11 | "deleting": false 12 | } 13 | } 14 | } 15 | }, 16 | { 17 | "cell_type": "code", 18 | "source": [ 19 | "import pandas as pd\n", 20 | "import numpy as np\n", 21 | "import pylab as pl\n", 22 | "import datetime as dt\n", 23 | "from math import sqrt\n", 24 | "\n", 25 | "import warnings\n", 26 | "warnings.filterwarnings(\"ignore\")\n", 27 | "\n", 28 | "# yahoo finance used to fetch data \n", 29 | "import yfinance as yf\n", 30 | "yf.pdr_override()\n", 31 | "\n", 32 | "from sklearn.cluster import KMeans" 33 | ], 34 | "outputs": [], 35 | "execution_count": 1, 36 | "metadata": { 37 | "collapsed": true, 38 | "jupyter": { 39 | "source_hidden": false, 40 | "outputs_hidden": false 41 | }, 42 | "nteract": { 43 | "transient": { 44 | "deleting": false 45 | } 46 | }, 47 | "execution": { 48 | "iopub.status.busy": "2022-04-07T00:41:22.144Z", 49 | "iopub.execute_input": "2022-04-07T00:41:22.149Z", 50 | "iopub.status.idle": "2022-04-07T00:41:23.358Z", 51 | "shell.execute_reply": "2022-04-07T00:41:23.351Z" 52 | } 53 | } 54 | }, 55 | { 56 | "cell_type": "code", 57 | "source": [ 58 | "stocks = si.tickers_dow()\n", 59 | "stocks" 60 | ], 61 | "outputs": [ 62 | { 63 | "output_type": "error", 64 | "ename": "NameError", 65 | "evalue": "name 'si' is not defined", 66 | "traceback": [ 67 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 68 | "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", 69 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mstocks\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msi\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtickers_dow\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mstocks\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 70 | "\u001b[1;31mNameError\u001b[0m: name 'si' is not defined" 71 | ] 72 | } 73 | ], 74 | "execution_count": 2, 75 | "metadata": { 76 | "collapsed": true, 77 | "jupyter": { 78 | "source_hidden": false, 79 | "outputs_hidden": false 80 | }, 81 | "nteract": { 82 | "transient": { 83 | "deleting": false 84 | } 85 | }, 86 | "execution": { 87 | "iopub.status.busy": "2022-04-07T00:41:23.366Z", 88 | "iopub.execute_input": "2022-04-07T00:41:23.371Z", 89 | "iopub.status.idle": "2022-04-07T00:37:28.222Z", 90 | "shell.execute_reply": "2022-04-07T00:37:28.230Z" 91 | } 92 | } 93 | }, 94 | { 95 | "cell_type": "code", 96 | "source": [ 97 | "start = dt.datetime(2020, 1, 1)\n", 98 | "now = dt.datetime.now()\n" 99 | ], 100 | "outputs": [], 101 | "execution_count": null, 102 | "metadata": { 103 | "collapsed": true, 104 | "jupyter": { 105 | "source_hidden": false, 106 | "outputs_hidden": false 107 | }, 108 | "nteract": { 109 | "transient": { 110 | "deleting": false 111 | } 112 | }, 113 | "execution": { 114 | "iopub.status.busy": "2022-04-07T00:37:30.620Z", 115 | "iopub.execute_input": "2022-04-07T00:37:30.623Z", 116 | "shell.execute_reply": "2022-04-07T00:37:30.635Z", 117 | "iopub.status.idle": "2022-04-07T00:37:30.628Z" 118 | } 119 | } 120 | }, 121 | { 122 | "cell_type": "code", 123 | "source": [ 124 | "df = yf.download(stocks, start, now)['Adj Close']\n", 125 | "df.head()" 126 | ], 127 | "outputs": [], 128 | "execution_count": null, 129 | "metadata": { 130 | "collapsed": true, 131 | "jupyter": { 132 | "source_hidden": false, 133 | "outputs_hidden": false 134 | }, 135 | "nteract": { 136 | "transient": { 137 | "deleting": false 138 | } 139 | }, 140 | "execution": { 141 | "iopub.status.busy": "2022-04-07T00:37:47.765Z", 142 | "iopub.execute_input": "2022-04-07T00:37:47.768Z", 143 | "iopub.status.idle": "2022-04-07T00:37:50.505Z", 144 | "shell.execute_reply": "2022-04-07T00:37:50.553Z" 145 | } 146 | } 147 | }, 148 | { 149 | "cell_type": "code", 150 | "source": [ 151 | "returns = df.pct_change().mean() * 252\n", 152 | "variance = df.pct_change().std() * sqrt(252)\n", 153 | "returns.columns = [\"Returns\"]\n", 154 | "variance.columns = [\"Variance\"]" 155 | ], 156 | "outputs": [], 157 | "execution_count": null, 158 | "metadata": { 159 | "collapsed": true, 160 | "jupyter": { 161 | "source_hidden": false, 162 | "outputs_hidden": false 163 | }, 164 | "nteract": { 165 | "transient": { 166 | "deleting": false 167 | } 168 | }, 169 | "execution": { 170 | "iopub.status.busy": "2022-04-07T00:38:41.490Z", 171 | "iopub.execute_input": "2022-04-07T00:38:41.493Z", 172 | "iopub.status.idle": "2022-04-07T00:38:41.500Z", 173 | "shell.execute_reply": "2022-04-07T00:38:41.516Z" 174 | } 175 | } 176 | }, 177 | { 178 | "cell_type": "code", 179 | "source": [ 180 | "ret_var = pd.concat([returns, variance], axis = 1).dropna()\n", 181 | "ret_var.columns = [\"Returns\", \"Variance\"]" 182 | ], 183 | "outputs": [], 184 | "execution_count": null, 185 | "metadata": { 186 | "collapsed": true, 187 | "jupyter": { 188 | "source_hidden": false, 189 | "outputs_hidden": false 190 | }, 191 | "nteract": { 192 | "transient": { 193 | "deleting": false 194 | } 195 | }, 196 | "execution": { 197 | "iopub.status.busy": "2022-04-07T00:38:55.905Z", 198 | "iopub.execute_input": "2022-04-07T00:38:55.908Z", 199 | "shell.execute_reply": "2022-04-07T00:38:55.919Z", 200 | "iopub.status.idle": "2022-04-07T00:38:55.926Z" 201 | } 202 | } 203 | }, 204 | { 205 | "cell_type": "code", 206 | "source": [ 207 | "X = ret_var.values\n", 208 | "sse = []\n", 209 | "\n", 210 | "for k in range(2,15):\n", 211 | " \n", 212 | " kmeans = KMeans(n_clusters = k)\n", 213 | " kmeans.fit(X)\n", 214 | " \n", 215 | " sse.append(kmeans.inertia_) #SSE for each n_clusters\n", 216 | "pl.plot(range(2,15), sse)\n", 217 | "pl.title(\"Elbow Curve\")\n", 218 | "pl.subplots()\n", 219 | "pl.show()" 220 | ], 221 | "outputs": [], 222 | "execution_count": null, 223 | "metadata": { 224 | "collapsed": true, 225 | "jupyter": { 226 | "source_hidden": false, 227 | "outputs_hidden": false 228 | }, 229 | "nteract": { 230 | "transient": { 231 | "deleting": false 232 | } 233 | }, 234 | "execution": { 235 | "iopub.status.busy": "2022-04-07T00:39:58.118Z", 236 | "iopub.execute_input": "2022-04-07T00:39:58.122Z", 237 | "shell.execute_reply": "2022-04-07T00:39:58.523Z", 238 | "iopub.status.idle": "2022-04-07T00:39:58.515Z" 239 | } 240 | } 241 | }, 242 | { 243 | "cell_type": "code", 244 | "source": [ 245 | "kmeans = KMeans(n_clusters = 5).fit(X)\n", 246 | "centroids = kmeans.cluster_centers_\n", 247 | "pl.scatter(X[:,0],X[:,1], c = kmeans.labels_, cmap =\"rainbow\")\n", 248 | "pl.show()" 249 | ], 250 | "outputs": [], 251 | "execution_count": null, 252 | "metadata": { 253 | "collapsed": true, 254 | "jupyter": { 255 | "source_hidden": false, 256 | "outputs_hidden": false 257 | }, 258 | "nteract": { 259 | "transient": { 260 | "deleting": false 261 | } 262 | }, 263 | "execution": { 264 | "iopub.status.busy": "2022-04-07T00:40:02.008Z", 265 | "iopub.execute_input": "2022-04-07T00:40:02.013Z", 266 | "shell.execute_reply": "2022-04-07T00:40:02.101Z", 267 | "iopub.status.idle": "2022-04-07T00:40:02.108Z" 268 | } 269 | } 270 | }, 271 | { 272 | "cell_type": "code", 273 | "source": [ 274 | "X = ret_var.values\n", 275 | "kmeans =KMeans(n_clusters = 5).fit(X)\n", 276 | "centroids = kmeans.cluster_centers_\n", 277 | "pl.scatter(X[:,0],X[:,1], c = kmeans.labels_, cmap =\"rainbow\")\n", 278 | "pl.show()" 279 | ], 280 | "outputs": [], 281 | "execution_count": null, 282 | "metadata": { 283 | "collapsed": true, 284 | "jupyter": { 285 | "source_hidden": false, 286 | "outputs_hidden": false 287 | }, 288 | "nteract": { 289 | "transient": { 290 | "deleting": false 291 | } 292 | }, 293 | "execution": { 294 | "iopub.status.busy": "2022-04-07T00:40:12.616Z", 295 | "iopub.execute_input": "2022-04-07T00:40:12.620Z", 296 | "shell.execute_reply": "2022-04-07T00:40:12.703Z", 297 | "iopub.status.idle": "2022-04-07T00:40:12.709Z" 298 | } 299 | } 300 | }, 301 | { 302 | "cell_type": "code", 303 | "source": [ 304 | "Companies = pd.DataFrame(ret_var.index)\n", 305 | "cluster_labels = pd.DataFrame(kmeans.labels_)\n", 306 | "df = pd.concat([Companies, cluster_labels],axis = 1)\n", 307 | "df.columns = ['Stock', 'Cluster Labels']\n", 308 | "df.set_index('Stock')" 309 | ], 310 | "outputs": [], 311 | "execution_count": null, 312 | "metadata": { 313 | "collapsed": true, 314 | "jupyter": { 315 | "source_hidden": false, 316 | "outputs_hidden": false 317 | }, 318 | "nteract": { 319 | "transient": { 320 | "deleting": false 321 | } 322 | }, 323 | "execution": { 324 | "iopub.status.busy": "2022-04-07T00:41:01.920Z", 325 | "iopub.execute_input": "2022-04-07T00:41:01.925Z", 326 | "iopub.status.idle": "2022-04-07T00:41:01.936Z", 327 | "shell.execute_reply": "2022-04-07T00:41:01.950Z" 328 | } 329 | } 330 | }, 331 | { 332 | "cell_type": "code", 333 | "source": [ 334 | "df" 335 | ], 336 | "outputs": [], 337 | "execution_count": null, 338 | "metadata": { 339 | "collapsed": true, 340 | "jupyter": { 341 | "source_hidden": false, 342 | "outputs_hidden": false 343 | }, 344 | "nteract": { 345 | "transient": { 346 | "deleting": false 347 | } 348 | }, 349 | "execution": { 350 | "iopub.status.busy": "2022-04-07T00:41:12.197Z", 351 | "iopub.execute_input": "2022-04-07T00:41:12.202Z", 352 | "iopub.status.idle": "2022-04-07T00:41:12.212Z", 353 | "shell.execute_reply": "2022-04-07T00:41:12.226Z" 354 | } 355 | } 356 | } 357 | ], 358 | "metadata": { 359 | "kernel_info": { 360 | "name": "python3" 361 | }, 362 | "language_info": { 363 | "name": "python", 364 | "version": "3.6.13", 365 | "mimetype": "text/x-python", 366 | "codemirror_mode": { 367 | "name": "ipython", 368 | "version": 3 369 | }, 370 | "pygments_lexer": "ipython3", 371 | "nbconvert_exporter": "python", 372 | "file_extension": ".py" 373 | }, 374 | "kernelspec": { 375 | "argv": [ 376 | "C:/Users/Tin Hang/Anaconda3\\python.exe", 377 | "-m", 378 | "ipykernel_launcher", 379 | "-f", 380 | "{connection_file}" 381 | ], 382 | "display_name": "Python 3", 383 | "language": "python", 384 | "name": "python3" 385 | }, 386 | "nteract": { 387 | "version": "0.28.0" 388 | } 389 | }, 390 | "nbformat": 4, 391 | "nbformat_minor": 0 392 | } -------------------------------------------------------------------------------- /Stock_Algorithms/Multiple_Linear_Regression_with_Normalize_Data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "source": [ 6 | "# Multiple Linear Regression with Normalize Data" 7 | ], 8 | "metadata": {} 9 | }, 10 | { 11 | "cell_type": "code", 12 | "source": [ 13 | "# Importing the libraries\n", 14 | "import pandas as pd\n", 15 | "import numpy as np\n", 16 | "import matplotlib.pyplot as plt\n", 17 | "\n", 18 | "import warnings\n", 19 | "warnings.filterwarnings(\"ignore\")\n", 20 | "\n", 21 | "# fix_yahoo_finance is used to fetch data \n", 22 | "import fix_yahoo_finance as yf\n", 23 | "yf.pdr_override()" 24 | ], 25 | "outputs": [], 26 | "execution_count": 1, 27 | "metadata": { 28 | "collapsed": false, 29 | "outputHidden": false, 30 | "inputHidden": false 31 | } 32 | }, 33 | { 34 | "cell_type": "code", 35 | "source": [ 36 | "# input\n", 37 | "symbol = 'AMD'\n", 38 | "start = '2014-01-01'\n", 39 | "end = '2018-08-27'\n", 40 | "\n", 41 | "# Read data \n", 42 | "dataset = yf.download(symbol,start,end)\n", 43 | "\n", 44 | "# View columns \n", 45 | "dataset.head()" 46 | ], 47 | "outputs": [ 48 | { 49 | "output_type": "stream", 50 | "name": "stdout", 51 | "text": [ 52 | "[*********************100%***********************] 1 of 1 downloaded\n" 53 | ] 54 | }, 55 | { 56 | "output_type": "execute_result", 57 | "execution_count": 2, 58 | "data": { 59 | "text/plain": [ 60 | " Open High Low Close Adj Close Volume\n", 61 | "Date \n", 62 | "2014-01-02 3.85 3.98 3.84 3.95 3.95 20548400\n", 63 | "2014-01-03 3.98 4.00 3.88 4.00 4.00 22887200\n", 64 | "2014-01-06 4.01 4.18 3.99 4.13 4.13 42398300\n", 65 | "2014-01-07 4.19 4.25 4.11 4.18 4.18 42932100\n", 66 | "2014-01-08 4.23 4.26 4.14 4.18 4.18 30678700" 67 | ], 68 | "text/html": [ 69 | "
\n", 70 | "\n", 83 | "\n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | "
OpenHighLowCloseAdj CloseVolume
Date
2014-01-023.853.983.843.953.9520548400
2014-01-033.984.003.884.004.0022887200
2014-01-064.014.183.994.134.1342398300
2014-01-074.194.254.114.184.1842932100
2014-01-084.234.264.144.184.1830678700
\n", 152 | "
" 153 | ] 154 | }, 155 | "metadata": {} 156 | } 157 | ], 158 | "execution_count": 2, 159 | "metadata": { 160 | "collapsed": false, 161 | "outputHidden": false, 162 | "inputHidden": false 163 | } 164 | }, 165 | { 166 | "cell_type": "code", 167 | "source": [ 168 | "X = dataset.iloc[ : , 0:4].values\n", 169 | "Y = np.asanyarray(dataset[['Adj Close']])" 170 | ], 171 | "outputs": [], 172 | "execution_count": 3, 173 | "metadata": { 174 | "collapsed": false, 175 | "outputHidden": false, 176 | "inputHidden": false 177 | } 178 | }, 179 | { 180 | "cell_type": "code", 181 | "source": [ 182 | "from sklearn import preprocessing\n", 183 | "\n", 184 | "# normalize the data attributes\n", 185 | "normalized_X = preprocessing.normalize(X)" 186 | ], 187 | "outputs": [], 188 | "execution_count": 4, 189 | "metadata": { 190 | "collapsed": false, 191 | "outputHidden": false, 192 | "inputHidden": false 193 | } 194 | }, 195 | { 196 | "cell_type": "code", 197 | "source": [ 198 | "X = normalized_X[: , 1:]" 199 | ], 200 | "outputs": [], 201 | "execution_count": 5, 202 | "metadata": { 203 | "collapsed": false, 204 | "outputHidden": false, 205 | "inputHidden": false 206 | } 207 | }, 208 | { 209 | "cell_type": "code", 210 | "source": [ 211 | "# Splitting the dataset into the Training set and Test set\n", 212 | "from sklearn.model_selection import train_test_split\n", 213 | "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 0)" 214 | ], 215 | "outputs": [], 216 | "execution_count": 6, 217 | "metadata": { 218 | "collapsed": false, 219 | "outputHidden": false, 220 | "inputHidden": false 221 | } 222 | }, 223 | { 224 | "cell_type": "code", 225 | "source": [ 226 | "from sklearn.linear_model import LinearRegression\n", 227 | "regressor = LinearRegression()\n", 228 | "regressor.fit(X_train, Y_train)" 229 | ], 230 | "outputs": [ 231 | { 232 | "output_type": "execute_result", 233 | "execution_count": 7, 234 | "data": { 235 | "text/plain": [ 236 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n", 237 | " normalize=False)" 238 | ] 239 | }, 240 | "metadata": {} 241 | } 242 | ], 243 | "execution_count": 7, 244 | "metadata": { 245 | "collapsed": false, 246 | "outputHidden": false, 247 | "inputHidden": false 248 | } 249 | }, 250 | { 251 | "cell_type": "code", 252 | "source": [ 253 | "y_pred = regressor.predict(X_test)" 254 | ], 255 | "outputs": [], 256 | "execution_count": 8, 257 | "metadata": { 258 | "collapsed": false, 259 | "outputHidden": false, 260 | "inputHidden": false 261 | } 262 | }, 263 | { 264 | "cell_type": "code", 265 | "source": [ 266 | "from sklearn.metrics import explained_variance_score, mean_absolute_error, mean_squared_error, r2_score\n", 267 | "ex_var_score = explained_variance_score(Y_test, y_pred)\n", 268 | "m_absolute_error = mean_absolute_error(Y_test, y_pred)\n", 269 | "m_squared_error = mean_squared_error(Y_test, y_pred)\n", 270 | "r_2_score = r2_score(Y_test, y_pred)\n", 271 | "\n", 272 | "print(\"Explained Variance Score: \"+str(ex_var_score))\n", 273 | "print(\"Mean Absolute Error \"+str(m_absolute_error))\n", 274 | "print(\"Mean Squared Error \"+str(m_squared_error))\n", 275 | "print(\"R Squared Error \"+str(r_2_score))" 276 | ], 277 | "outputs": [ 278 | { 279 | "output_type": "stream", 280 | "name": "stdout", 281 | "text": [ 282 | "Explained Variance Score: 0.0145762414645\n", 283 | "Mean Absolute Error 4.3559157043\n", 284 | "Mean Squared Error 22.546676437\n", 285 | "R Squared Error 0.0145752513278\n" 286 | ] 287 | } 288 | ], 289 | "execution_count": 9, 290 | "metadata": { 291 | "collapsed": false, 292 | "outputHidden": false, 293 | "inputHidden": false 294 | } 295 | }, 296 | { 297 | "cell_type": "code", 298 | "source": [ 299 | "print ('Coefficients: ', regressor.coef_)\n", 300 | "print(\"Residual sum of squares: %.2f\"\n", 301 | " % np.mean((y_pred - Y_test) ** 2))\n", 302 | "\n", 303 | "# Explained variance score: 1 is perfect prediction\n", 304 | "print('Variance score: %.2f' % regressor.score(X_test, y_pred))" 305 | ], 306 | "outputs": [ 307 | { 308 | "output_type": "stream", 309 | "name": "stdout", 310 | "text": [ 311 | "Coefficients: [[-79.79361894 -53.18582378 15.74315198]]\n", 312 | "Residual sum of squares: 22.55\n", 313 | "Variance score: 1.00\n" 314 | ] 315 | } 316 | ], 317 | "execution_count": 10, 318 | "metadata": { 319 | "collapsed": false, 320 | "outputHidden": false, 321 | "inputHidden": false 322 | } 323 | }, 324 | { 325 | "cell_type": "code", 326 | "source": [ 327 | "print('Multiple Linear Score:', regressor.score(X_test, y_pred))" 328 | ], 329 | "outputs": [ 330 | { 331 | "output_type": "stream", 332 | "name": "stdout", 333 | "text": [ 334 | "Multiple Linear Score: 0.0145752513278\n" 335 | ] 336 | } 337 | ], 338 | "execution_count": 12, 339 | "metadata": { 340 | "collapsed": false, 341 | "outputHidden": false, 342 | "inputHidden": false 343 | } 344 | } 345 | ], 346 | "metadata": { 347 | "kernel_info": { 348 | "name": "python3" 349 | }, 350 | "language_info": { 351 | "codemirror_mode": { 352 | "name": "ipython", 353 | "version": 3 354 | }, 355 | "name": "python", 356 | "nbconvert_exporter": "python", 357 | "file_extension": ".py", 358 | "pygments_lexer": "ipython3", 359 | "version": "3.5.5", 360 | "mimetype": "text/x-python" 361 | }, 362 | "kernelspec": { 363 | "name": "python3", 364 | "language": "python", 365 | "display_name": "Python 3" 366 | }, 367 | "nteract": { 368 | "version": "0.14.5" 369 | } 370 | }, 371 | "nbformat": 4, 372 | "nbformat_minor": 0 373 | } -------------------------------------------------------------------------------- /Stock_Algorithms/Principal_Component_Regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "source": [ 6 | "# Principal Component Regression (PCR)" 7 | ], 8 | "metadata": {} 9 | }, 10 | { 11 | "cell_type": "code", 12 | "source": [ 13 | "import numpy as np\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import pandas as pd\n", 16 | "\n", 17 | "import warnings\n", 18 | "warnings.filterwarnings(\"ignore\")\n", 19 | "\n", 20 | "# fix_yahoo_finance is used to fetch data \n", 21 | "import fix_yahoo_finance as yf\n", 22 | "yf.pdr_override()" 23 | ], 24 | "outputs": [], 25 | "execution_count": 1, 26 | "metadata": { 27 | "collapsed": false, 28 | "outputHidden": false, 29 | "inputHidden": false 30 | } 31 | }, 32 | { 33 | "cell_type": "code", 34 | "source": [ 35 | "# input\n", 36 | "symbol = 'AMD'\n", 37 | "start = '2014-01-01'\n", 38 | "end = '2019-01-01'\n", 39 | "\n", 40 | "# Read data \n", 41 | "dataset = yf.download(symbol,start,end)\n", 42 | "\n", 43 | "# View Columns\n", 44 | "dataset.head()" 45 | ], 46 | "outputs": [ 47 | { 48 | "output_type": "stream", 49 | "name": "stdout", 50 | "text": [ 51 | "[*********************100%***********************] 1 of 1 downloaded\n" 52 | ] 53 | }, 54 | { 55 | "output_type": "execute_result", 56 | "execution_count": 2, 57 | "data": { 58 | "text/html": [ 59 | "
\n", 60 | "\n", 73 | "\n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | "
OpenHighLowCloseAdj CloseVolume
Date
2014-01-023.853.983.843.953.9520548400
2014-01-033.984.003.884.004.0022887200
2014-01-064.014.183.994.134.1342398300
2014-01-074.194.254.114.184.1842932100
2014-01-084.234.264.144.184.1830678700
\n", 142 | "
" 143 | ], 144 | "text/plain": [ 145 | " Open High Low Close Adj Close Volume\n", 146 | "Date \n", 147 | "2014-01-02 3.85 3.98 3.84 3.95 3.95 20548400\n", 148 | "2014-01-03 3.98 4.00 3.88 4.00 4.00 22887200\n", 149 | "2014-01-06 4.01 4.18 3.99 4.13 4.13 42398300\n", 150 | "2014-01-07 4.19 4.25 4.11 4.18 4.18 42932100\n", 151 | "2014-01-08 4.23 4.26 4.14 4.18 4.18 30678700" 152 | ] 153 | }, 154 | "metadata": {} 155 | } 156 | ], 157 | "execution_count": 2, 158 | "metadata": { 159 | "collapsed": false, 160 | "outputHidden": false, 161 | "inputHidden": false 162 | } 163 | }, 164 | { 165 | "cell_type": "code", 166 | "source": [ 167 | "X = dataset.iloc[ : , 1].values\n", 168 | "Y = dataset.iloc[ : , 4].values" 169 | ], 170 | "outputs": [], 171 | "execution_count": 3, 172 | "metadata": { 173 | "collapsed": false, 174 | "outputHidden": false, 175 | "inputHidden": false 176 | } 177 | }, 178 | { 179 | "cell_type": "code", 180 | "source": [ 181 | "print(X.shape)\n", 182 | "print(Y.shape)" 183 | ], 184 | "outputs": [ 185 | { 186 | "output_type": "stream", 187 | "name": "stdout", 188 | "text": [ 189 | "(1258,)\n", 190 | "(1258,)\n" 191 | ] 192 | } 193 | ], 194 | "execution_count": 4, 195 | "metadata": { 196 | "collapsed": false, 197 | "outputHidden": false, 198 | "inputHidden": false 199 | } 200 | }, 201 | { 202 | "cell_type": "code", 203 | "source": [ 204 | "X = np.array(X).reshape(1258,-1)\n", 205 | "Y = np.array(Y).reshape(1258,-1)" 206 | ], 207 | "outputs": [], 208 | "execution_count": 5, 209 | "metadata": { 210 | "collapsed": false, 211 | "outputHidden": false, 212 | "inputHidden": false 213 | } 214 | }, 215 | { 216 | "cell_type": "code", 217 | "source": [ 218 | "from sklearn.decomposition import PCA" 219 | ], 220 | "outputs": [], 221 | "execution_count": 6, 222 | "metadata": { 223 | "collapsed": false, 224 | "outputHidden": false, 225 | "inputHidden": false 226 | } 227 | }, 228 | { 229 | "cell_type": "code", 230 | "source": [ 231 | "pca = PCA(n_components=1)\n", 232 | "pca.fit(X)" 233 | ], 234 | "outputs": [ 235 | { 236 | "output_type": "execute_result", 237 | "execution_count": 7, 238 | "data": { 239 | "text/plain": [ 240 | "PCA(copy=True, iterated_power='auto', n_components=1, random_state=None,\n", 241 | " svd_solver='auto', tol=0.0, whiten=False)" 242 | ] 243 | }, 244 | "metadata": {} 245 | } 246 | ], 247 | "execution_count": 7, 248 | "metadata": { 249 | "collapsed": false, 250 | "outputHidden": false, 251 | "inputHidden": false 252 | } 253 | }, 254 | { 255 | "cell_type": "code", 256 | "source": [ 257 | "print(pca.explained_variance_ratio_)" 258 | ], 259 | "outputs": [ 260 | { 261 | "output_type": "stream", 262 | "name": "stdout", 263 | "text": [ 264 | "[ 1.]\n" 265 | ] 266 | } 267 | ], 268 | "execution_count": 8, 269 | "metadata": { 270 | "collapsed": false, 271 | "outputHidden": false, 272 | "inputHidden": false 273 | } 274 | }, 275 | { 276 | "cell_type": "code", 277 | "source": [ 278 | "print(pca.singular_values_)" 279 | ], 280 | "outputs": [ 281 | { 282 | "output_type": "stream", 283 | "name": "stdout", 284 | "text": [ 285 | "[ 236.05044323]\n" 286 | ] 287 | } 288 | ], 289 | "execution_count": 9, 290 | "metadata": { 291 | "collapsed": false, 292 | "outputHidden": false, 293 | "inputHidden": false 294 | } 295 | }, 296 | { 297 | "cell_type": "code", 298 | "source": [ 299 | "pca = PCA(n_components=1, svd_solver='full')\n", 300 | "pca.fit(X)" 301 | ], 302 | "outputs": [ 303 | { 304 | "output_type": "execute_result", 305 | "execution_count": 10, 306 | "data": { 307 | "text/plain": [ 308 | "PCA(copy=True, iterated_power='auto', n_components=1, random_state=None,\n", 309 | " svd_solver='full', tol=0.0, whiten=False)" 310 | ] 311 | }, 312 | "metadata": {} 313 | } 314 | ], 315 | "execution_count": 10, 316 | "metadata": { 317 | "collapsed": false, 318 | "outputHidden": false, 319 | "inputHidden": false 320 | } 321 | }, 322 | { 323 | "cell_type": "code", 324 | "source": [ 325 | "print(pca.explained_variance_ratio_)" 326 | ], 327 | "outputs": [ 328 | { 329 | "output_type": "stream", 330 | "name": "stdout", 331 | "text": [ 332 | "[ 1.]\n" 333 | ] 334 | } 335 | ], 336 | "execution_count": 11, 337 | "metadata": { 338 | "collapsed": false, 339 | "outputHidden": false, 340 | "inputHidden": false 341 | } 342 | }, 343 | { 344 | "cell_type": "code", 345 | "source": [ 346 | "print(pca.singular_values_)" 347 | ], 348 | "outputs": [ 349 | { 350 | "output_type": "stream", 351 | "name": "stdout", 352 | "text": [ 353 | "[ 236.05044323]\n" 354 | ] 355 | } 356 | ], 357 | "execution_count": 12, 358 | "metadata": { 359 | "collapsed": false, 360 | "outputHidden": false, 361 | "inputHidden": false 362 | } 363 | }, 364 | { 365 | "cell_type": "code", 366 | "source": [ 367 | "pca.score(X, y=None)" 368 | ], 369 | "outputs": [ 370 | { 371 | "output_type": "execute_result", 372 | "execution_count": 13, 373 | "data": { 374 | "text/plain": [ 375 | "-3.3143449973330785" 376 | ] 377 | }, 378 | "metadata": {} 379 | } 380 | ], 381 | "execution_count": 13, 382 | "metadata": { 383 | "collapsed": false, 384 | "outputHidden": false, 385 | "inputHidden": false 386 | } 387 | } 388 | ], 389 | "metadata": { 390 | "kernel_info": { 391 | "name": "python3" 392 | }, 393 | "language_info": { 394 | "name": "python", 395 | "codemirror_mode": { 396 | "name": "ipython", 397 | "version": 3 398 | }, 399 | "version": "3.5.5", 400 | "file_extension": ".py", 401 | "nbconvert_exporter": "python", 402 | "mimetype": "text/x-python", 403 | "pygments_lexer": "ipython3" 404 | }, 405 | "kernelspec": { 406 | "name": "python3", 407 | "language": "python", 408 | "display_name": "Python 3" 409 | }, 410 | "nteract": { 411 | "version": "0.15.0" 412 | } 413 | }, 414 | "nbformat": 4, 415 | "nbformat_minor": 4 416 | } -------------------------------------------------------------------------------- /Stock_Algorithms/Quasi_Poisson_Regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "source": [ 6 | "# Quasi-Poisson Regression" 7 | ], 8 | "metadata": {} 9 | }, 10 | { 11 | "cell_type": "code", 12 | "source": [ 13 | "import numpy as np\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import pandas as pd\n", 16 | "\n", 17 | "import warnings\n", 18 | "warnings.filterwarnings(\"ignore\")\n", 19 | "\n", 20 | "# yahoo finance is used to fetch data \n", 21 | "import yfinance as yf\n", 22 | "yf.pdr_override()" 23 | ], 24 | "outputs": [], 25 | "execution_count": 1, 26 | "metadata": { 27 | "collapsed": false, 28 | "outputHidden": false, 29 | "inputHidden": false, 30 | "execution": { 31 | "iopub.status.busy": "2021-04-24T04:48:24.336Z", 32 | "iopub.execute_input": "2021-04-24T04:48:24.341Z", 33 | "iopub.status.idle": "2021-04-24T04:48:25.077Z", 34 | "shell.execute_reply": "2021-04-24T04:48:25.095Z" 35 | } 36 | } 37 | }, 38 | { 39 | "cell_type": "code", 40 | "source": [ 41 | "# input\n", 42 | "symbol = 'AMD'\n", 43 | "start = '2014-01-01'\n", 44 | "end = '2018-08-27'\n", 45 | "\n", 46 | "# Read data \n", 47 | "dataset = yf.download(symbol,start,end)\n", 48 | "\n", 49 | "# View Columns\n", 50 | "dataset.head()" 51 | ], 52 | "outputs": [ 53 | { 54 | "output_type": "stream", 55 | "name": "stdout", 56 | "text": [ 57 | "[*********************100%***********************] 1 of 1 completed\n" 58 | ] 59 | }, 60 | { 61 | "output_type": "execute_result", 62 | "execution_count": 2, 63 | "data": { 64 | "text/plain": " Open High Low Close Adj Close Volume\nDate \n2014-01-02 3.85 3.98 3.84 3.95 3.95 20548400\n2014-01-03 3.98 4.00 3.88 4.00 4.00 22887200\n2014-01-06 4.01 4.18 3.99 4.13 4.13 42398300\n2014-01-07 4.19 4.25 4.11 4.18 4.18 42932100\n2014-01-08 4.23 4.26 4.14 4.18 4.18 30678700", 65 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
OpenHighLowCloseAdj CloseVolume
Date
2014-01-023.853.983.843.953.9520548400
2014-01-033.984.003.884.004.0022887200
2014-01-064.014.183.994.134.1342398300
2014-01-074.194.254.114.184.1842932100
2014-01-084.234.264.144.184.1830678700
\n
" 66 | }, 67 | "metadata": {} 68 | } 69 | ], 70 | "execution_count": 2, 71 | "metadata": { 72 | "collapsed": false, 73 | "outputHidden": false, 74 | "inputHidden": false, 75 | "execution": { 76 | "iopub.status.busy": "2021-04-24T04:48:25.082Z", 77 | "iopub.execute_input": "2021-04-24T04:48:25.086Z", 78 | "iopub.status.idle": "2021-04-24T04:48:25.845Z", 79 | "shell.execute_reply": "2021-04-24T04:48:25.905Z" 80 | } 81 | } 82 | }, 83 | { 84 | "cell_type": "code", 85 | "source": [ 86 | "dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)\n", 87 | "dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)\n", 88 | "dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)\n", 89 | "dataset['Returns'] = dataset['Adj Close'].pct_change()\n", 90 | "dataset = dataset.dropna()" 91 | ], 92 | "outputs": [], 93 | "execution_count": 3, 94 | "metadata": { 95 | "collapsed": true, 96 | "jupyter": { 97 | "source_hidden": false, 98 | "outputs_hidden": false 99 | }, 100 | "nteract": { 101 | "transient": { 102 | "deleting": false 103 | } 104 | }, 105 | "execution": { 106 | "iopub.status.busy": "2021-04-24T04:48:25.851Z", 107 | "iopub.execute_input": "2021-04-24T04:48:25.854Z", 108 | "shell.execute_reply": "2021-04-24T04:48:25.909Z", 109 | "iopub.status.idle": "2021-04-24T04:48:25.919Z" 110 | } 111 | } 112 | }, 113 | { 114 | "cell_type": "code", 115 | "source": [ 116 | "from statsmodels.genmod.generalized_estimating_equations import GEE\n", 117 | "from statsmodels.genmod.cov_struct import (Exchangeable,\n", 118 | " Independence,Autoregressive)\n", 119 | "from statsmodels.genmod.families import Poisson" 120 | ], 121 | "outputs": [], 122 | "execution_count": 5, 123 | "metadata": { 124 | "collapsed": true, 125 | "jupyter": { 126 | "source_hidden": false, 127 | "outputs_hidden": false 128 | }, 129 | "nteract": { 130 | "transient": { 131 | "deleting": false 132 | } 133 | }, 134 | "execution": { 135 | "iopub.status.busy": "2021-04-24T04:48:25.942Z", 136 | "iopub.execute_input": "2021-04-24T04:48:25.945Z", 137 | "iopub.status.idle": "2021-04-24T04:48:26.382Z", 138 | "shell.execute_reply": "2021-04-24T04:48:26.394Z" 139 | } 140 | } 141 | }, 142 | { 143 | "cell_type": "code", 144 | "source": [ 145 | "fam = Poisson()\n", 146 | "ind = Independence()\n", 147 | "model1 = GEE.from_formula(\"Increase_Decrease ~ Returns + Buy_Sell_on_Open + Open\", 'Buy_Sell', dataset, cov_struct=ind, family=fam)\n", 148 | "result1 = model1.fit()\n", 149 | "print(result1.summary())" 150 | ], 151 | "outputs": [ 152 | { 153 | "output_type": "stream", 154 | "name": "stdout", 155 | "text": [ 156 | " GEE Regression Results \n", 157 | "===================================================================================\n", 158 | "Dep. Variable: Increase_Decrease No. Observations: 1170\n", 159 | "Model: GEE No. clusters: 2\n", 160 | "Method: Generalized Min. cluster size: 584\n", 161 | " Estimating Equations Max. cluster size: 586\n", 162 | "Family: Poisson Mean cluster size: 585.0\n", 163 | "Dependence structure: Independence Num. iterations: 2\n", 164 | "Date: Fri, 23 Apr 2021 Scale: 1.000\n", 165 | "Covariance type: robust Time: 21:51:04\n", 166 | "====================================================================================\n", 167 | " coef std err z P>|z| [0.025 0.975]\n", 168 | "------------------------------------------------------------------------------------\n", 169 | "Intercept -0.7826 0.017 -45.953 0.000 -0.816 -0.749\n", 170 | "Returns 0.9742 1.267 0.769 0.442 -1.508 3.457\n", 171 | "Buy_Sell_on_Open -0.0671 0.172 -0.390 0.696 -0.404 0.270\n", 172 | "Open 0.0036 0.003 1.180 0.238 -0.002 0.010\n", 173 | "==============================================================================\n", 174 | "Skew: 0.1802 Kurtosis: -1.9614\n", 175 | "Centered skew: 0.1789 Centered kurtosis: -1.9459\n", 176 | "==============================================================================\n" 177 | ] 178 | } 179 | ], 180 | "execution_count": 11, 181 | "metadata": { 182 | "collapsed": true, 183 | "jupyter": { 184 | "source_hidden": false, 185 | "outputs_hidden": false 186 | }, 187 | "nteract": { 188 | "transient": { 189 | "deleting": false 190 | } 191 | }, 192 | "execution": { 193 | "iopub.status.busy": "2021-04-24T04:51:04.044Z", 194 | "iopub.execute_input": "2021-04-24T04:51:04.048Z", 195 | "iopub.status.idle": "2021-04-24T04:51:04.105Z", 196 | "shell.execute_reply": "2021-04-24T04:51:04.108Z" 197 | } 198 | } 199 | } 200 | ], 201 | "metadata": { 202 | "kernel_info": { 203 | "name": "python3" 204 | }, 205 | "language_info": { 206 | "name": "python", 207 | "version": "3.6.12", 208 | "mimetype": "text/x-python", 209 | "codemirror_mode": { 210 | "name": "ipython", 211 | "version": 3 212 | }, 213 | "pygments_lexer": "ipython3", 214 | "nbconvert_exporter": "python", 215 | "file_extension": ".py" 216 | }, 217 | "kernelspec": { 218 | "name": "python3", 219 | "language": "python", 220 | "display_name": "Python 3" 221 | }, 222 | "nteract": { 223 | "version": "0.28.0" 224 | } 225 | }, 226 | "nbformat": 4, 227 | "nbformat_minor": 4 228 | } -------------------------------------------------------------------------------- /Stock_Algorithms/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ### Description: 4 | #### Machine learning algorithms are programs that uses math and logic to adjust themselves to perform better as they are exposed to more data. The “learning” part of machine learning means that those programs change how they process data over time, much as humans change how they process data by learning. 5 | 6 | ### List of Commonly Used Algorithms: 7 | #### Linear Regression: A simple yet powerful algorithm that models the relationship between a dependent variable and one or more independent variables by fitting a linear equation to the data. 8 | #### Logistic Regression: Widely used for binary classification problems, logistic regression estimates the probability of an event occurring based on a set of input features. 9 | #### Decision Trees: These algorithms use a hierarchical structure of nodes and branches to make decisions by splitting the data based on different features. They are interpretable and often form the building blocks of ensemble methods. 10 | #### Random Forest: An ensemble learning method that combines multiple decision trees to make predictions. It improves upon decision trees by reducing overfitting and increasing accuracy. 11 | #### Support Vector Machines (SVM): SVMs aim to find the optimal hyperplane that separates data points into different classes. They work well for both linearly separable and non-linearly separable data. 12 | #### K-Nearest Neighbors (KNN): KNN is a non-parametric algorithm that classifies data based on the majority vote of its nearest neighbors in the feature space. 13 | #### Naive Bayes: A probabilistic algorithm that uses Bayes' theorem to make predictions. Despite its simplicity, it performs remarkably well in text classification and spam filtering. 14 | 15 | ### Regression in Machine Learning: 16 | #### Regression is a key concept in machine learning used for predicting continuous or numerical values. It analyzes the relationship between independent variables (features) and a dependent variable (target) to build a predictive model. 17 | ### Common regression techniques include: 18 | #### Linear Regression: The most basic form of regression, it fits a linear equation to the data by minimizing the sum of squared residuals. 19 | #### Polynomial Regression: Extends linear regression by introducing polynomial terms to capture non-linear relationships between variables. 20 | #### Ridge Regression: Adds a regularization term to linear regression to prevent overfitting by penalizing large coefficient values. 21 | #### Lasso Regression: Similar to ridge regression, but it uses the absolute value of the coefficients instead of squared values, leading to sparse solutions. 22 | #### Support Vector Regression (SVR): An extension of SVMs for regression tasks. SVR finds a hyperplane that maximizes the margin of support vectors while minimizing the error on the training data. 23 | 24 | # :large_blue_diamond: List of Algorithms :large_blue_diamond: 25 | :arrow_forward: AdaBoost Classification 26 | :arrow_forward: AdaBoost Regressor 27 | :arrow_forward: Anomaly Detection 28 | :arrow_forward: Apriori Algorithm 29 | :arrow_forward: Artificial Neural Network 30 | :arrow_forward: Bagging Classifier 31 | :arrow_forward: Bayesian Ridge Regression 32 | :arrow_forward: Bernoulli Restricted Boltzmann Machine 33 | :arrow_forward: CatBoost Algorithms 34 | :arrow_forward: Classification and Clustering 35 | :arrow_forward: Clustering Algorithms 36 | :arrow_forward: CART (Classification and Regression Trees) 37 | :arrow_forward: Decision Tree Classification 38 | :arrow_forward: Decision Tree Regression 39 | :arrow_forward: Dimensionality Reduction Algorithms 40 | :arrow_forward: Ensemble Learning Algorithms 41 | :arrow_forward: Explanatory Algorithms 42 | :arrow_forward: Gradient Boosting Classification 43 | :arrow_forward: Generative Adversarial Networks (GANs) 44 | :arrow_forward: K-Means Clustering Algorithm 45 | :arrow_forward: K-Nearest Neighbors Algorithm 46 | :arrow_forward: Logistic Regression 47 | :arrow_forward: Linear Regression 48 | :arrow_forward: NetworkX 49 | :arrow_forward: Neural Networks Regression 50 | :arrow_forward: Quantile Regression 51 | :arrow_forward: Partial Least Squares Regression (PLSR) 52 | :arrow_forward: Polynomial Regression 53 | :arrow_forward: Principal Component Classification 54 | :arrow_forward: Principal Component Regression 55 | :arrow_forward: Random Forest Classification 56 | :arrow_forward: Random Forest Regression 57 | :arrow_forward: RNN Tensorflow 58 | :arrow_forward: Ridge Regression 59 | :arrow_forward: Similarity Algorithms 60 | :arrow_forward: Support Vector Machines (SVM) 61 | :arrow_forward: Tensorflow 62 | :arrow_forward: Time Series 63 | :arrow_forward: XGBoost 64 | 65 | ### AdaBoost 66 | AdaBoost is short for Adaptive Boosting and is a statistical classification meta-algorithm created by Yoav Freund and Robert Schapire in 1995. The meta-estimator begins by fitting a classifier on the original dataset and then fits additional copies of the classifier on the same dataset. However, the weights of incorrectly classified instances are adjusted such that subsequent classifiers focus more on difficult cases. 67 | 68 | ### Anomaly Detection 69 | Anomaly detection is identifying data points in data that don't fit the normal patterns. It is used for identifying rare items, events or observations which deviate significantly from the majority of the data and do not conform to a well defined notion of normal behaviour. Each node or artificial neuron is connected to another and has an associated weight and threshold. If the output of any individual node is above the specified threshold value, that node is activated and sends data to the next layer of the network. Otherwise, no data is passed along to the next layer of the network. 70 | 71 | ### Artificial Neural Network 72 | Artificial neural networks (ANNs) consist of input, hidden, and output layers with connected neurons (nodes) to simulate the human brain. 73 | 74 | ### Bagging classifier 75 | The Bagging classifier is an ensemble meta-estimator that fits base classifiers each on random subsets of the original dataset and then aggregates their individual predictions (either by voting or by averaging) to form a final prediction. 76 | 77 | ### Bayesian Ridge Regression 78 | Bayesian Ridge Regression is similar to linear regression in which the statistical analysis is undertaken within the context of Bayesian inference. It allows a natural mechanism to survive insufficient data or poorly distributed data by formulating linear regression using probability distributors rather than point estimates. 79 | 80 | ### Bernoulli Restricted Boltzmann Machine 81 | Bernoulli Restricted Boltzmann Machine (RBM) is a generative stochastic artificial neural network that can learn a probability distribution over its set of inputs. 82 | 83 | ### Decision Tree 84 | The Decision Tree algorithm is a supervised machine learning technique and is used for both classification and regression. Decision Tree uses multiple algorithms to decide to split a node into two or more sub-nodes. The creation of sub-nodes increases the homogeneity of resultant sub-nodes. However, the purity of the node increases with respect to the target variable. 85 | 86 | ### Gradient Boosting Algorithm 87 | Gradient Boosting is a machine learning technique used in regression and classification. Gradient boosting works on building simple or weak prediction models sequentially where each model tries to predict the error left over by the previous model, such as overfitting. 88 | 89 | ### K-Means Clustering Algorithm 90 | K-Means clustering is unsupervised machine learning algorithms and is used to solve complex machine learning problems. 91 | 92 | ### K-Nearest Neighbors Algorithm 93 | K-Nearest Neighbors (KNN or k-NN) is used for a non-parametric, supervised learning classifier, which uses proximity to make classifications or predictions about the grouping of an individual data point. 94 | 95 | ### Logistic Regression 96 | Logistic Regression is used for to estimate the probability of an event occurring, such as voting or didn't vote, based on a given dataset of independent variables. Since the outcome is a probability, the dependent variable is bounded between 0 and 1. 97 | 98 | ### Linear Regression 99 | Linear Regression is used to model the relationship between two variables by fitting a linear equation to observed data. One variable is considered to be an explanatory variable, and the other is considered to be a dependent variable. 100 | 101 | ### NetworkX 102 | NetworkX provides classes for graphs which allow multiple edges between any pair of nodes. The MultiGraph and MultiDiGraph classes allow you to add the same edge twice, possibly with different edge data. NetworkX can be powerful for some applications; however, many algorithms are not well defined on such graphs. 103 | 104 | ### Neural Networks Regression 105 | Neural Networks Regression is used for to learn the linear relationship between the features and target and therefore cannot learn the complex non-linear relationship. In order to learn the complex non-linear relationship between the features and target, we are in need of other techniques. 106 | 107 | ## Boosting Algorithms: 108 | ### Gradient Boosting Machines (GBM): 109 | Gradient boosting is a machine learning technique used in regression and classification tasks, among others. It gives a prediction model in the form of an ensemble of weak prediction models, which are typically decision trees.When a decision tree is the weak learner, the resulting algorithm is called gradient-boosted trees; it usually outperforms random forests. A gradient-boosted trees model is built in a stage-wise fashion as in other boosting methods, but it generalizes the other methods by allowing optimization of an arbitrary differentiable loss function. 110 | 111 | ### XGBoost: 112 | XGBoost is a scalable and highly accurate implementation of gradient boosting that pushes the limits of computing power for boosting three algorithms, being built largely to energize machine learning model performance and computational speed. 113 | 114 | ### LightGBM: 115 | LightGBN is a gradient boosting framework that uses tree-based learning algorithms. 116 | 117 | ### Catboost: 118 | CatBoost is an algorithm for gradient boosting on decision trees. 119 | 120 | ### Classification and Clustering 121 | Classification examples are Logistic regression, Naive Bayes classifier, Support vector machines, and others relating to classification. However, clustering are k-means clustering algorithm, Fuzzy c-means clustering algorithm, Gaussian (EM) clustering algorithm, and other algorithms relating to clustering. 122 | 123 | ## Authors 124 | ### * Tin Hang 125 | 126 | ## 🔴 Warning: This is not financial advice. Do not use this for investing or trading purposes. It is for educational purposes only. 127 | -------------------------------------------------------------------------------- /Stock_Algorithms/TruncatedSVD.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "source": [ 6 | "# Dimensionality reduction using truncated SVD (aka LSA)" 7 | ], 8 | "metadata": {} 9 | }, 10 | { 11 | "cell_type": "code", 12 | "source": [ 13 | "import numpy as np\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import pandas as pd\n", 16 | "\n", 17 | "import warnings\n", 18 | "warnings.filterwarnings(\"ignore\")\n", 19 | "\n", 20 | "# fix_yahoo_finance is used to fetch data \n", 21 | "import fix_yahoo_finance as yf\n", 22 | "yf.pdr_override()" 23 | ], 24 | "outputs": [], 25 | "execution_count": 1, 26 | "metadata": { 27 | "collapsed": false, 28 | "outputHidden": false, 29 | "inputHidden": false 30 | } 31 | }, 32 | { 33 | "cell_type": "code", 34 | "source": [ 35 | "# input\n", 36 | "symbol = 'AMD'\n", 37 | "start = '2014-01-01'\n", 38 | "end = '2019-01-01'\n", 39 | "\n", 40 | "# Read data \n", 41 | "dataset = yf.download(symbol,start,end)\n", 42 | "\n", 43 | "# View Columns\n", 44 | "dataset.head()" 45 | ], 46 | "outputs": [ 47 | { 48 | "output_type": "stream", 49 | "name": "stdout", 50 | "text": [ 51 | "[*********************100%***********************] 1 of 1 downloaded\n" 52 | ] 53 | }, 54 | { 55 | "output_type": "execute_result", 56 | "execution_count": 2, 57 | "data": { 58 | "text/html": [ 59 | "
\n", 60 | "\n", 73 | "\n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | "
OpenHighLowCloseAdj CloseVolume
Date
2014-01-023.853.983.843.953.9520548400
2014-01-033.984.003.884.004.0022887200
2014-01-064.014.183.994.134.1342398300
2014-01-074.194.254.114.184.1842932100
2014-01-084.234.264.144.184.1830678700
\n", 142 | "
" 143 | ], 144 | "text/plain": [ 145 | " Open High Low Close Adj Close Volume\n", 146 | "Date \n", 147 | "2014-01-02 3.85 3.98 3.84 3.95 3.95 20548400\n", 148 | "2014-01-03 3.98 4.00 3.88 4.00 4.00 22887200\n", 149 | "2014-01-06 4.01 4.18 3.99 4.13 4.13 42398300\n", 150 | "2014-01-07 4.19 4.25 4.11 4.18 4.18 42932100\n", 151 | "2014-01-08 4.23 4.26 4.14 4.18 4.18 30678700" 152 | ] 153 | }, 154 | "metadata": {} 155 | } 156 | ], 157 | "execution_count": 2, 158 | "metadata": { 159 | "collapsed": false, 160 | "outputHidden": false, 161 | "inputHidden": false 162 | } 163 | }, 164 | { 165 | "cell_type": "code", 166 | "source": [ 167 | "X = dataset.iloc[ : , 1:4].values\n", 168 | "Y = dataset.iloc[ : , 4].values" 169 | ], 170 | "outputs": [], 171 | "execution_count": 23, 172 | "metadata": { 173 | "collapsed": false, 174 | "outputHidden": false, 175 | "inputHidden": false 176 | } 177 | }, 178 | { 179 | "cell_type": "code", 180 | "source": [ 181 | "print(X.shape)\n", 182 | "print(Y.shape)" 183 | ], 184 | "outputs": [ 185 | { 186 | "output_type": "stream", 187 | "name": "stdout", 188 | "text": [ 189 | "(1258, 3)\n", 190 | "(1258,)\n" 191 | ] 192 | } 193 | ], 194 | "execution_count": 24, 195 | "metadata": { 196 | "collapsed": false, 197 | "outputHidden": false, 198 | "inputHidden": false 199 | } 200 | }, 201 | { 202 | "cell_type": "code", 203 | "source": [ 204 | "X = np.array(X).reshape(1258,-1)\n", 205 | "Y = np.array(Y).reshape(1258,-1)" 206 | ], 207 | "outputs": [], 208 | "execution_count": 25, 209 | "metadata": { 210 | "collapsed": false, 211 | "outputHidden": false, 212 | "inputHidden": false 213 | } 214 | }, 215 | { 216 | "cell_type": "code", 217 | "source": [ 218 | "from sklearn.decomposition import TruncatedSVD" 219 | ], 220 | "outputs": [], 221 | "execution_count": 19, 222 | "metadata": { 223 | "collapsed": false, 224 | "outputHidden": false, 225 | "inputHidden": false 226 | } 227 | }, 228 | { 229 | "cell_type": "code", 230 | "source": [ 231 | "svd = TruncatedSVD(n_components=1, n_iter=2, random_state=42)\n", 232 | "svd.fit(X) " 233 | ], 234 | "outputs": [ 235 | { 236 | "output_type": "execute_result", 237 | "execution_count": 27, 238 | "data": { 239 | "text/plain": [ 240 | "TruncatedSVD(algorithm='randomized', n_components=1, n_iter=2,\n", 241 | " random_state=42, tol=0.0)" 242 | ] 243 | }, 244 | "metadata": {} 245 | } 246 | ], 247 | "execution_count": 27, 248 | "metadata": { 249 | "collapsed": false, 250 | "outputHidden": false, 251 | "inputHidden": false 252 | } 253 | }, 254 | { 255 | "cell_type": "code", 256 | "source": [ 257 | "print(svd.explained_variance_ratio_)" 258 | ], 259 | "outputs": [ 260 | { 261 | "output_type": "stream", 262 | "name": "stdout", 263 | "text": [ 264 | "[ 0.99951387]\n" 265 | ] 266 | } 267 | ], 268 | "execution_count": 28, 269 | "metadata": { 270 | "collapsed": false, 271 | "outputHidden": false, 272 | "inputHidden": false 273 | } 274 | }, 275 | { 276 | "cell_type": "code", 277 | "source": [ 278 | "print(svd.singular_values_) " 279 | ], 280 | "outputs": [ 281 | { 282 | "output_type": "stream", 283 | "name": "stdout", 284 | "text": [ 285 | "[ 639.8979399]\n" 286 | ] 287 | } 288 | ], 289 | "execution_count": 29, 290 | "metadata": { 291 | "collapsed": false, 292 | "outputHidden": false, 293 | "inputHidden": false 294 | } 295 | } 296 | ], 297 | "metadata": { 298 | "kernel_info": { 299 | "name": "python3" 300 | }, 301 | "language_info": { 302 | "version": "3.5.5", 303 | "codemirror_mode": { 304 | "version": 3, 305 | "name": "ipython" 306 | }, 307 | "pygments_lexer": "ipython3", 308 | "file_extension": ".py", 309 | "nbconvert_exporter": "python", 310 | "name": "python", 311 | "mimetype": "text/x-python" 312 | }, 313 | "kernelspec": { 314 | "name": "python3", 315 | "language": "python", 316 | "display_name": "Python 3" 317 | }, 318 | "nteract": { 319 | "version": "0.12.2" 320 | } 321 | }, 322 | "nbformat": 4, 323 | "nbformat_minor": 4 324 | } -------------------------------------------------------------------------------- /Stock_Apps/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # Stock Applications & Softwares 5 | 6 | ## Descriptions: 7 | Different types of algorithm for predictions 8 | 9 | __Input__ 10 | 1: Input the stock starting date 11 | 2: Input the stock ending date 12 | 3. Input the stock symbol 13 | 4. Choose Algorithms for Stock Prediction 14 | 15 | ## Authors 16 | ### * Tin Hang 17 | -------------------------------------------------------------------------------- /Stock_Apps/Stock_Apps.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LastAncientOne/Deep_Learning_Machine_Learning_Stock/e31e9fc689cd2cb6d74a8f2694750afd6e13bcec/Stock_Apps/Stock_Apps.PNG -------------------------------------------------------------------------------- /Stock_Apps/Stock_ML_Data_PreProcessing_Apps.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat May 25 14:21:27 2019 4 | 5 | @author: Tin 6 | """ 7 | import numpy as np 8 | import pandas as pd 9 | import datetime 10 | 11 | from sklearn.preprocessing import MinMaxScaler 12 | from sklearn.preprocessing import Binarizer 13 | from sklearn.preprocessing import StandardScaler 14 | from sklearn.model_selection import train_test_split 15 | 16 | 17 | import warnings 18 | warnings.filterwarnings("ignore") 19 | 20 | # yahoo finance used to fetch data 21 | import yfinance as yf 22 | yf.pdr_override() 23 | 24 | options = " Data Preprocessing, Exit".split(",") 25 | 26 | # Input Start Date 27 | def start_date(): 28 | date_entry = input('Enter a starting date in MM/DD/YYYY format: ') 29 | start = datetime.datetime.strptime(date_entry,'%m/%d/%Y') 30 | start = start.strftime('%Y-%m-%d') 31 | return start 32 | 33 | # Input End Date 34 | def end_date(): 35 | date_entry = input('Enter a ending date in MM/DD/YYYY format: ') 36 | end = datetime.datetime.strptime(date_entry,'%m/%d/%Y') 37 | end = end.strftime('%Y-%m-%d') 38 | return end 39 | 40 | # Input Symbols 41 | def input_symbol(): 42 | symbol = input("Enter symbol: ").upper() 43 | return symbol 44 | 45 | # Rescaled Dataset 46 | def Rescale_Dataset(): 47 | s = start_date() 48 | e = end_date() 49 | sym = input_symbol() 50 | df = yf.download(sym, s, e) 51 | array = df.values 52 | X = array[:,0:5] 53 | Y = array[:,5] 54 | # initialising the MinMaxScaler 55 | scaler = MinMaxScaler(feature_range=(0, 1)) 56 | # learning the statistical parameters for each of the data and transforming 57 | rescaledX = scaler.fit_transform(X) 58 | np.set_printoptions(precision=3) 59 | print('Rescaled values between 0 to 1') 60 | print(rescaledX[0:5,:]) 61 | print("") 62 | # Splitting the datasets into training sets and Test sets 63 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0) 64 | sc_X = StandardScaler() 65 | # Splitting the datasets into training sets and Test sets 66 | X_train = sc_X.fit_transform(X_train) 67 | X_test = sc_X.fit_transform(X_test) 68 | print("Training Dataset") 69 | print(X_train) 70 | print("") 71 | print(Y_train) 72 | print("") 73 | print("Testing Dataset") 74 | print(X_test) 75 | print("") 76 | print(Y_test) 77 | print("") 78 | ans = ['1', '2'] 79 | user_input=input(""" 80 | What would you like to do next? Enter option 1 or 2. 81 | 1. Menu 82 | 2. Exit 83 | Command: """) 84 | while user_input not in ans: 85 | print("Error: Please enter a a valid option 1-2") 86 | user_input=input("Command: ") 87 | if user_input=="1": 88 | menu() 89 | elif user_input=="2": 90 | exit() 91 | 92 | 93 | #***********************************************************************************************************************# 94 | # Binarize Data 95 | def Binarize_Dataset(): 96 | s = start_date() 97 | e = end_date() 98 | sym = input_symbol() 99 | df = yf.download(sym, s, e) 100 | array = df.values 101 | X = array[:,0:5] 102 | Y = array[:,5] 103 | # initialising the binarize 104 | binarizer = Binarizer(threshold = 0.0).fit(X) 105 | binaryX = binarizer.transform(X) 106 | np.set_printoptions(precision=3) 107 | print('Binarize values equal or less than 0 are marked 0 and all of those above 0 are marked 1') 108 | print(binaryX[0:5,:]) 109 | print("") 110 | # Splitting the datasets into training sets and Test sets 111 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0) 112 | sc_X = StandardScaler() 113 | # Splitting the datasets into training sets and Test sets 114 | X_train = sc_X.fit_transform(X_train) 115 | X_test = sc_X.fit_transform(X_test) 116 | print("Training Dataset") 117 | print(X_train) 118 | print("") 119 | print(Y_train) 120 | print("") 121 | print("Testing Dataset") 122 | print(X_test) 123 | print("") 124 | print(Y_test) 125 | print("") 126 | ans = ['1', '2'] 127 | user_input=input(""" 128 | What would you like to do next? Enter option 1 or 2. 129 | 1. Menu 130 | 2. Exit 131 | Command: """) 132 | while user_input not in ans: 133 | print("Error: Please enter a a valid option 1-2") 134 | user_input=input("Command: ") 135 | if user_input=="1": 136 | menu() 137 | elif user_input=="2": 138 | exit() 139 | 140 | 141 | #***********************************************************************************************************************# 142 | # Standardize Data 143 | def Standardize_Dataset(): 144 | s = start_date() 145 | e = end_date() 146 | sym = input_symbol() 147 | df = yf.download(sym, s, e) 148 | array = df.values 149 | X = array[:,0:5] 150 | Y = array[:,5] 151 | # initialising the standardize 152 | scaler = StandardScaler().fit(X) 153 | rescaledX = scaler.transform(X) 154 | np.set_printoptions(precision=3) 155 | print('Standardize values with a mean of 0 and a standard deviation of 1') 156 | print(rescaledX[0:5,:]) 157 | print("") 158 | # Splitting the datasets into training sets and Test sets 159 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0) 160 | sc_X = StandardScaler() 161 | # Splitting the datasets into training sets and Test sets 162 | X_train = sc_X.fit_transform(X_train) 163 | X_test = sc_X.fit_transform(X_test) 164 | print("Training Dataset") 165 | print(X_train) 166 | print("") 167 | print(Y_train) 168 | print("") 169 | print("Testing Dataset") 170 | print(X_test) 171 | print("") 172 | print(Y_test) 173 | print("") 174 | ans = ['1', '2'] 175 | user_input=input(""" 176 | What would you like to do next? Enter option 1 or 2. 177 | 1. Menu 178 | 2. Exit 179 | Command: """) 180 | while user_input not in ans: 181 | print("Error: Please enter a a valid option 1-2") 182 | user_input=input("Command: ") 183 | if user_input=="1": 184 | menu() 185 | elif user_input=="2": 186 | exit() 187 | 188 | 189 | 190 | 191 | #***********************************************************************************************************************# 192 | #******************************************************* Menu **********************************************************# 193 | #***********************************************************************************************************************# 194 | def menu(): 195 | ans = ['1', '2', '3', '4', '0'] 196 | print(""" 197 | 198 | MENU 199 | PREPROCESSING DATASET 200 | --------------------------- 201 | 1. Rescaled Data 202 | 2. Binarize Data 203 | 3. Standardize Data 204 | 4. Beginning Menu 205 | 0. Exit the Program 206 | """) 207 | user_input = input("Command (0-3): ") 208 | while user_input not in ans: 209 | print("Error: Please enter a valid option 0-3") 210 | user_input=input("Command: ") 211 | if user_input == '1': 212 | Rescaled_Dataset() 213 | elif user_input == '2': 214 | Binarize_Dataset() 215 | elif user_input == '3': 216 | Standardize_Dataset() 217 | elif user_input == "4": 218 | beginning() 219 | elif user_input == "0": 220 | exit() 221 | 222 | 223 | #***********************************************************************************************************************# 224 | #*************************************************** Start of Program **************************************************# 225 | #***********************************************************************************************************************# 226 | def beginning(): 227 | print() 228 | print("----------Welcome to Preprocessing Dataset--------") 229 | print(""" 230 | Please choose option 1 or 2 231 | 232 | 1. Menu 233 | 2. Exit Program 234 | ---------------------------------------------""") 235 | ans = ['1', '2'] 236 | user_input=input("What is your Option?: ") 237 | while user_input not in ans: 238 | print("Error: Please enter a a valid option 1-2") 239 | user_input=input("Command: ") 240 | if user_input=="1": 241 | menu() 242 | elif user_input=="2": 243 | exit() 244 | 245 | 246 | #***********************************************************************************************************************# 247 | beginning() -------------------------------------------------------------------------------- /Stock_Apps/Stock_ML_Predict_Apps.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat May 25 14:21:27 2019 4 | 5 | @author: Tin 6 | """ 7 | import numpy as np 8 | import matplotlib.pyplot as plt 9 | 10 | import datetime 11 | 12 | # Machine Learning Libraries 13 | from sklearn.linear_model import LinearRegression 14 | from sklearn.linear_model import LogisticRegression 15 | from sklearn.model_selection import train_test_split 16 | from sklearn import metrics 17 | from sklearn.model_selection import cross_val_score 18 | from sklearn.svm import SVR 19 | 20 | import warnings 21 | warnings.filterwarnings("ignore") 22 | 23 | # yahoo finance used to fetch data 24 | import yfinance as yf 25 | yf.pdr_override() 26 | 27 | options = " Stock Linear Regression Prediction, Stock Logistic Regression Prediction, Support Vector Regression, Exit".split(",") 28 | 29 | # Input Start Date 30 | def start_date(): 31 | date_entry = input('Enter a starting date in MM/DD/YYYY format: ') 32 | start = datetime.datetime.strptime(date_entry,'%m/%d/%Y') 33 | start = start.strftime('%Y-%m-%d') 34 | return start 35 | 36 | # Input End Date 37 | def end_date(): 38 | date_entry = input('Enter a ending date in MM/DD/YYYY format: ') 39 | end = datetime.datetime.strptime(date_entry,'%m/%d/%Y') 40 | end = end.strftime('%Y-%m-%d') 41 | return end 42 | 43 | # Input Symbols 44 | def input_symbol(): 45 | symbol = input("Enter symbol: ").upper() 46 | return symbol 47 | 48 | # Logistic Regression 49 | def stock_logistic_regression(): 50 | s = start_date() 51 | e = end_date() 52 | sym = input_symbol() 53 | df = yf.download(sym, s, e) 54 | 55 | df = df.drop(['Date'], axis=1) 56 | X = df.loc[:, df.columns != 'Adj Close'] 57 | y = np.where (df['Adj Close'].shift(-1) > df['Adj Close'],1,-1) 58 | 59 | split = int(0.7*len(df)) 60 | X_train, X_test, y_train, y_test = X[:split], X[split:], y[:split], y[split:] 61 | model = LogisticRegression() 62 | model = model.fit(X_train,y_train) 63 | predicted = model.predict(X_test) 64 | print(metrics.confusion_matrix(y_test, predicted)) 65 | print(metrics.classification_report(y_test, predicted)) 66 | print(model.score(X_test,y_test)) 67 | cross_val = cross_val_score(LogisticRegression(), X, y, scoring='accuracy', cv=10) 68 | print(cross_val) 69 | print(cross_val.mean()) 70 | return 71 | 72 | # Linear Regression 73 | def stock_linear_regression(): 74 | s = start_date() 75 | e = end_date() 76 | sym = input_symbol() 77 | df = yf.download(sym, s, e) 78 | n = len(df.index) 79 | X = np.array(df['Open']).reshape(n,-1) 80 | Y = np.array(df['Adj Close']).reshape(n,-1) 81 | lr = LinearRegression() 82 | lr.fit(X, Y) 83 | lr.predict(X) 84 | 85 | plt.figure(figsize=(12,8)) 86 | plt.scatter(df['Adj Close'], lr.predict(X)) 87 | plt.plot(X, lr.predict(X), color = 'red') 88 | plt.xlabel('Prices') 89 | plt.ylabel('Predicted Prices') 90 | plt.grid() 91 | plt.title(sym + ' Prices vs Predicted Prices') 92 | plt.show() 93 | print('____________Summary:____________') 94 | print('Estimate intercept coefficient:', lr.intercept_) 95 | print('Number of coefficients:', len(lr.coef_)) 96 | print('Accuracy Score:', lr.score(X, Y)) 97 | print("") 98 | return 99 | 100 | # Support Vector Regression 101 | def stock_svr(): 102 | s = start_date() 103 | e = end_date() 104 | sym = input_symbol() 105 | df = yf.download(sym, s, e) 106 | dates = np.reshape(df.index,(len(df.index), 1)) # convert to 1xn dimension 107 | x = 31 108 | x = np.reshape(x,(len(x), 1)) 109 | prices = df['Adj Close'] 110 | svr_lin = SVR(kernel='linear', C=1e3) 111 | svr_poly = SVR(kernel='poly', C=1e3, degree=2) 112 | svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1) 113 | 114 | # Fit regression model 115 | svr_lin .fit(dates, prices) 116 | svr_poly.fit(dates, prices) 117 | svr_rbf.fit(dates, prices) 118 | 119 | plt.figure(figsize=(12,8)) 120 | plt.scatter(dates, prices, c='k', label='Data') 121 | plt.plot(dates, svr_lin.predict(dates), c='g', label='Linear model') 122 | plt.plot(dates, svr_rbf.predict(dates), c='r', label='RBF model') 123 | plt.plot(dates, svr_poly.predict(dates), c='b', label='Polynomial model') 124 | plt.xlabel('Date') 125 | plt.ylabel('Price') 126 | plt.title('Support Vector Regression') 127 | plt.legend() 128 | plt.show() 129 | print('____________Summary:____________') 130 | print('Linear Model:', svr_rbf.predict(x)[0]) 131 | print('RBF Model:', svr_lin.predict(x)[0]) 132 | print('Polynomial Model:', svr_poly.predict(x)[0]) 133 | print("") 134 | return 135 | 136 | 137 | def main(): 138 | run_program = True 139 | while run_program: 140 | print("__________Stock Price Prediction__________") 141 | print("Choose Options:") 142 | for i in range(1, len(options)+1): 143 | print("{} - {}".format(i, options[i-1])) 144 | choice = int(input()) 145 | 146 | if choice == 1: 147 | print("____________Linear Regression_____________") 148 | stock_linear_regression() 149 | elif choice == 2: 150 | print("____________Logistic Regression_____________") 151 | stock_logistic_regression() 152 | elif choice == 3: 153 | print("____________Support Vector Regression_____________") 154 | stock_logistic_regression() 155 | elif choice == 4: 156 | run_program = False 157 | 158 | 159 | if __name__ == "__main__": 160 | main() 161 | -------------------------------------------------------------------------------- /Stock_Apps/Stock_ML_Predict_Apps_Menu.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Dec 20 19:44:59 2019 4 | 5 | @author: Tin 6 | """ 7 | 8 | import numpy as np 9 | import matplotlib.pyplot as plt 10 | 11 | import datetime 12 | 13 | # Machine Learning Libraries 14 | from sklearn.linear_model import LinearRegression 15 | from sklearn.linear_model import LogisticRegression 16 | # from sklearn.model_selection import train_test_split 17 | from sklearn import metrics 18 | from sklearn.model_selection import cross_validate 19 | from sklearn.svm import SVR 20 | 21 | import warnings 22 | warnings.filterwarnings("ignore") 23 | 24 | # yahoo finance used to fetch data 25 | import yfinance as yf 26 | yf.pdr_override() 27 | 28 | options = " Stock Linear Regression Prediction, Stock Logistic Regression Prediction, Support Vector Regression, Exit".split(",") 29 | 30 | # Input Start Date 31 | def start_date(): 32 | date_entry = input('Enter a starting date in MM/DD/YYYY format: ') 33 | start = datetime.datetime.strptime(date_entry,'%m/%d/%Y') 34 | start = start.strftime('%Y-%m-%d') 35 | return start 36 | 37 | # Input End Date 38 | def end_date(): 39 | date_entry = input('Enter a ending date in MM/DD/YYYY format: ') 40 | end = datetime.datetime.strptime(date_entry,'%m/%d/%Y') 41 | end = end.strftime('%Y-%m-%d') 42 | return end 43 | 44 | # Input Symbols 45 | def input_symbol(): 46 | symbol = input("Enter symbol: ").upper() 47 | return symbol 48 | 49 | # Logistic Regression 50 | def stock_logistic_regression(): 51 | s = start_date() 52 | e = end_date() 53 | sym = input_symbol() 54 | df = yf.download(sym, s, e) 55 | 56 | df = df.drop(['Date'], axis=1) 57 | X = df.loc[:, df.columns != 'Adj Close'] 58 | y = np.where (df['Adj Close'].shift(-1) > df['Adj Close'],1,-1) 59 | 60 | split = int(0.7*len(df)) 61 | X_train, X_test, y_train, y_test = X[:split], X[split:], y[:split], y[split:] 62 | model = LogisticRegression() 63 | model = model.fit(X_train,y_train) 64 | predicted = model.predict(X_test) 65 | print(metrics.confusion_matrix(y_test, predicted)) 66 | print(metrics.classification_report(y_test, predicted)) 67 | print(model.score(X_test,y_test)) 68 | cross_val = cross_validate(LogisticRegression(), X, y, scoring='accuracy', cv=10) 69 | print('_____________Summary:_____________') 70 | print(cross_val) 71 | print(cross_val.mean()) 72 | print("") 73 | ans = ['1', '2'] 74 | user_input=input(""" 75 | What would you like to do next? Enter option 1 or 2. 76 | 1. Menu 77 | 2. Exit 78 | Command: """) 79 | while user_input not in ans: 80 | print("Error: Please enter a a valid option 1-2") 81 | user_input=input("Command: ") 82 | if user_input=="1": 83 | menu() 84 | elif user_input=="2": 85 | exit() 86 | 87 | 88 | # Linear Regression 89 | def stock_linear_regression(): 90 | s = start_date() 91 | e = end_date() 92 | sym = input_symbol() 93 | df = yf.download(sym, s, e) 94 | n = len(df.index) 95 | X = np.array(df['Open']).reshape(n,-1) 96 | Y = np.array(df['Adj Close']).reshape(n,-1) 97 | lr = LinearRegression() 98 | lr.fit(X, Y) 99 | lr.predict(X) 100 | 101 | plt.figure(figsize=(12,8)) 102 | plt.scatter(df['Adj Close'], lr.predict(X)) 103 | plt.plot(X, lr.predict(X), color = 'red') 104 | plt.xlabel('Prices') 105 | plt.ylabel('Predicted Prices') 106 | plt.grid() 107 | plt.title(sym + ' Prices vs Predicted Prices') 108 | plt.show() 109 | print('_____________Summary:_____________') 110 | print('Estimate intercept coefficient:', lr.intercept_) 111 | print('Number of coefficients:', len(lr.coef_)) 112 | print('Accuracy Score:', lr.score(X, Y)) 113 | print("") 114 | ans = ['1', '2'] 115 | user_input=input(""" 116 | What would you like to do next? Enter option 1 or 2. 117 | 1. Menu 118 | 2. Exit 119 | Command: """) 120 | while user_input not in ans: 121 | print("Error: Please enter a a valid option 1-2") 122 | user_input=input("Command: ") 123 | if user_input=="1": 124 | menu() 125 | elif user_input=="2": 126 | exit() 127 | 128 | 129 | # Support Vector Regression 130 | def stock_svr(): 131 | s = start_date() 132 | e = end_date() 133 | sym = input_symbol() 134 | df = yf.download(sym, s, e) 135 | dates = np.reshape(df.index,(len(df.index), 1)) # convert to 1xn dimension 136 | x = 31 137 | x = np.reshape(x,(len(x), 1)) 138 | prices = df['Adj Close'] 139 | svr_lin = SVR(kernel='linear', C=1e3) 140 | svr_poly = SVR(kernel='poly', C=1e3, degree=2) 141 | svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1) 142 | 143 | # Fit regression model 144 | svr_lin .fit(dates, prices) 145 | svr_poly.fit(dates, prices) 146 | svr_rbf.fit(dates, prices) 147 | 148 | plt.figure(figsize=(12,8)) 149 | plt.scatter(dates, prices, c='k', label='Data') 150 | plt.plot(dates, svr_lin.predict(dates), c='g', label='Linear model') 151 | plt.plot(dates, svr_rbf.predict(dates), c='r', label='RBF model') 152 | plt.plot(dates, svr_poly.predict(dates), c='b', label='Polynomial model') 153 | plt.xlabel('Date') 154 | plt.ylabel('Price') 155 | plt.title('Support Vector Regression') 156 | plt.legend() 157 | plt.show() 158 | print('_____________Summary:_____________') 159 | print('Linear Model:', svr_rbf.predict(x)[0]) 160 | print('RBF Model:', svr_lin.predict(x)[0]) 161 | print('Polynomial Model:', svr_poly.predict(x)[0]) 162 | print("") 163 | ans = ['1', '2'] 164 | user_input=input(""" 165 | What would you like to do next? Enter option 1 or 2. 166 | 1. Menu 167 | 2. Exit 168 | Command: """) 169 | while user_input not in ans: 170 | print("Error: Please enter a a valid option 1-2") 171 | user_input=input("Command: ") 172 | if user_input=="1": 173 | menu() 174 | elif user_input=="2": 175 | exit() 176 | 177 | 178 | #***********************************************************************************************************************# 179 | #******************************************************* Menu **********************************************************# 180 | #***********************************************************************************************************************# 181 | def menu(): 182 | ans = ['1', '2', '3', '4', '0'] 183 | print(""" 184 | 185 | MENU 186 | MACHINE LEARNING PREDICTION 187 | --------------------------- 188 | 1.Linear Regression 189 | 2.Logistic Regressions 190 | 3.Support Vector Regression 191 | 4.Beginning Menu 192 | 0.Exit the Program 193 | """) 194 | user_input = input("Command (0-3): ") 195 | while user_input not in ans: 196 | print("Error: Please enter a valid option 0-3") 197 | user_input=input("Command: ") 198 | if user_input == '1': 199 | stock_linear_regression() 200 | elif user_input == '2': 201 | stock_logistic_regression() 202 | elif user_input == '3': 203 | stock_svr() 204 | elif user_input == "4": 205 | beginning() 206 | elif user_input == "0": 207 | exit() 208 | 209 | 210 | #***********************************************************************************************************************# 211 | #*************************************************** Start of Program **************************************************# 212 | #***********************************************************************************************************************# 213 | def beginning(): 214 | print() 215 | print("----------Welcome to Machine Learning Predictions--------") 216 | print(""" 217 | Please choose option 1 or 2 218 | 219 | 1. Menu 220 | 2. Exit Program 221 | 222 | ---------------------------------------------""") 223 | ans = ['1', '2'] 224 | user_input=input("What is your Option?: ") 225 | while user_input not in ans: 226 | print("Error: Please enter a a valid option 1-2") 227 | user_input=input("Command: ") 228 | if user_input=="1": 229 | menu() 230 | elif user_input=="2": 231 | exit() 232 | 233 | 234 | #***********************************************************************************************************************# 235 | beginning() 236 | -------------------------------------------------------------------------------- /Stock_Apps/Stock_ML_Rescale_Data_Apps.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat May 25 14:21:27 2019 4 | 5 | @author: Tin 6 | """ 7 | import numpy as np 8 | import pandas as pd 9 | import datetime 10 | 11 | from sklearn.preprocessing import MinMaxScaler 12 | from sklearn.preprocessing import StandardScaler 13 | from sklearn.model_selection import train_test_split 14 | 15 | 16 | import warnings 17 | warnings.filterwarnings("ignore") 18 | 19 | # yahoo finance used to fetch data 20 | import yfinance as yf 21 | yf.pdr_override() 22 | 23 | options = " Data Preprocessing, Exit".split(",") 24 | 25 | # Input Start Date 26 | def start_date(): 27 | date_entry = input('Enter a starting date in MM/DD/YYYY format: ') 28 | start = datetime.datetime.strptime(date_entry,'%m/%d/%Y') 29 | start = start.strftime('%Y-%m-%d') 30 | return start 31 | 32 | # Input End Date 33 | def end_date(): 34 | date_entry = input('Enter a ending date in MM/DD/YYYY format: ') 35 | end = datetime.datetime.strptime(date_entry,'%m/%d/%Y') 36 | end = end.strftime('%Y-%m-%d') 37 | return end 38 | 39 | # Input Symbols 40 | def input_symbol(): 41 | symbol = input("Enter symbol: ").upper() 42 | return symbol 43 | 44 | def preprocessing_dataset(): 45 | s = start_date() 46 | e = end_date() 47 | sym = input_symbol() 48 | df = yf.download(sym, s, e) 49 | array = df.values 50 | X = array[:,0:5] 51 | Y = array[:,5] 52 | # initialising the MinMaxScaler 53 | scaler = MinMaxScaler(feature_range=(0, 1)) 54 | # learning the statistical parameters for each of the data and transforming 55 | rescaledX = scaler.fit_transform(X) 56 | np.set_printoptions(precision=3) 57 | print('Rescaled values between 0 to 1') 58 | print(rescaledX[0:5,:]) 59 | print("") 60 | # Splitting the datasets into training sets and Test sets 61 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0) 62 | sc_X = StandardScaler() 63 | # Splitting the datasets into training sets and Test sets 64 | X_train = sc_X.fit_transform(X_train) 65 | X_test = sc_X.fit_transform(X_test) 66 | print("Training Dataset") 67 | print(X_train) 68 | print("") 69 | print(Y_train) 70 | print("") 71 | print("Testing Dataset") 72 | print(X_test) 73 | print("") 74 | print(Y_test) 75 | return 76 | 77 | 78 | def main(): 79 | run_program = True 80 | while run_program: 81 | print("") 82 | print("__________Preprocessing Dataset__________") 83 | print("") 84 | print("Choose Options:") 85 | print("") 86 | for i in range(1, len(options)+1): 87 | print("{} - {}".format(i, options[i-1])) 88 | choice = int(input()) 89 | 90 | if choice == 1: 91 | preprocessing_dataset() 92 | elif choice == 2: 93 | run_program = False 94 | 95 | 96 | if __name__ == "__main__": 97 | main() -------------------------------------------------------------------------------- /Tensorflow_Basics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "source": [ 6 | "# Tensorflow Basic" 7 | ], 8 | "metadata": {} 9 | }, 10 | { 11 | "cell_type": "code", 12 | "source": [ 13 | "import tensorflow as tf" 14 | ], 15 | "outputs": [], 16 | "execution_count": 14, 17 | "metadata": { 18 | "collapsed": false, 19 | "outputHidden": false, 20 | "inputHidden": false 21 | } 22 | }, 23 | { 24 | "cell_type": "code", 25 | "source": [ 26 | "x = tf.constant(2)\n", 27 | "y = tf.constant(4)" 28 | ], 29 | "outputs": [], 30 | "execution_count": 2, 31 | "metadata": { 32 | "collapsed": false, 33 | "outputHidden": false, 34 | "inputHidden": false 35 | } 36 | }, 37 | { 38 | "cell_type": "code", 39 | "source": [ 40 | "with tf.Session() as sess:\n", 41 | " print(\"x: %i\" % sess.run(x), \"y: %i\" % sess.run(y))\n", 42 | " print(\"Addition with constants: %i\" % sess.run(x+y))\n", 43 | " print(\"Multiplication with constants: %i\" % sess.run(x*y))" 44 | ], 45 | "outputs": [ 46 | { 47 | "output_type": "stream", 48 | "name": "stdout", 49 | "text": [ 50 | "x: 2 y: 4\n", 51 | "Addition with constants: 6\n", 52 | "Multiplication with constants: 8\n" 53 | ] 54 | } 55 | ], 56 | "execution_count": 3, 57 | "metadata": { 58 | "collapsed": false, 59 | "outputHidden": false, 60 | "inputHidden": false 61 | } 62 | }, 63 | { 64 | "cell_type": "code", 65 | "source": [ 66 | "add = tf.add(x, y)\n", 67 | "sess = tf.Session()\n", 68 | "value_of_add = sess.run(add)\n", 69 | "print(value_of_add)\n", 70 | "sess.close()" 71 | ], 72 | "outputs": [ 73 | { 74 | "output_type": "stream", 75 | "name": "stdout", 76 | "text": [ 77 | "6\n" 78 | ] 79 | } 80 | ], 81 | "execution_count": 4, 82 | "metadata": { 83 | "collapsed": false, 84 | "outputHidden": false, 85 | "inputHidden": false 86 | } 87 | }, 88 | { 89 | "cell_type": "code", 90 | "source": [ 91 | "sub = tf.subtract(x, y)\n", 92 | "sess = tf.Session()\n", 93 | "value_of_sub = sess.run(sub)\n", 94 | "print(value_of_sub)\n", 95 | "sess.close()" 96 | ], 97 | "outputs": [ 98 | { 99 | "output_type": "stream", 100 | "name": "stdout", 101 | "text": [ 102 | "-2\n" 103 | ] 104 | } 105 | ], 106 | "execution_count": 5, 107 | "metadata": { 108 | "collapsed": false, 109 | "outputHidden": false, 110 | "inputHidden": false 111 | } 112 | }, 113 | { 114 | "cell_type": "code", 115 | "source": [ 116 | "mult = tf.multiply(x, y)\n", 117 | "sess = tf.Session()\n", 118 | "value_of_mult = sess.run(mult)\n", 119 | "print(value_of_mult)\n", 120 | "sess.close()" 121 | ], 122 | "outputs": [ 123 | { 124 | "output_type": "stream", 125 | "name": "stdout", 126 | "text": [ 127 | "8\n" 128 | ] 129 | } 130 | ], 131 | "execution_count": 6, 132 | "metadata": { 133 | "collapsed": false, 134 | "outputHidden": false, 135 | "inputHidden": false 136 | } 137 | }, 138 | { 139 | "cell_type": "code", 140 | "source": [ 141 | "div = tf.divide(x, y)\n", 142 | "sess = tf.Session()\n", 143 | "value_of_div = sess.run(div)\n", 144 | "print(value_of_div)\n", 145 | "sess.close()" 146 | ], 147 | "outputs": [ 148 | { 149 | "output_type": "stream", 150 | "name": "stdout", 151 | "text": [ 152 | "0.5\n" 153 | ] 154 | } 155 | ], 156 | "execution_count": 7, 157 | "metadata": { 158 | "collapsed": false, 159 | "outputHidden": false, 160 | "inputHidden": false 161 | } 162 | }, 163 | { 164 | "cell_type": "code", 165 | "source": [ 166 | "# y = W.x + b\n", 167 | "x = tf.constant(-2.0, name=\"x\", dtype=tf.float32)\n", 168 | "W = tf.constant(8.0, name=\"W\", dtype=tf.float32)\n", 169 | "b = tf.constant(10.0, name=\"b\", dtype=tf.float32)\n", 170 | "\n", 171 | "y = tf.Variable(tf.add(tf.multiply(W, x), b))\n", 172 | "\n", 173 | "init = tf.global_variables_initializer()\n", 174 | "\n", 175 | "with tf.Session() as session:\n", 176 | " session.run(init)\n", 177 | " print(session.run(y)) " 178 | ], 179 | "outputs": [ 180 | { 181 | "output_type": "stream", 182 | "name": "stdout", 183 | "text": [ 184 | "-6.0\n" 185 | ] 186 | } 187 | ], 188 | "execution_count": 8, 189 | "metadata": { 190 | "collapsed": false, 191 | "outputHidden": false, 192 | "inputHidden": false 193 | } 194 | }, 195 | { 196 | "cell_type": "code", 197 | "source": [ 198 | "a = tf.constant(2.8)\n", 199 | "b = tf.constant(4.3)\n", 200 | "\n", 201 | "# Basic Math\n", 202 | "total = a + b\n", 203 | "diff = a - b\n", 204 | "quot = tf.div(a, b)\n", 205 | "prod = tf.multiply(a, b)\n", 206 | "\n", 207 | "# Matrix Multiplication\n", 208 | "c = tf.constant([[1,2], [3,4], [5,6]])\n", 209 | "d = tf.constant([[9,8,7], [6,5,4]])\n", 210 | "matrix_prod = tf.matmul(c, d)\n", 211 | "\n", 212 | "# Excute the Session\n", 213 | "with tf.Session() as sess:\n", 214 | " print(\"Sum: %f\" % sess.run(total))\n", 215 | " print(\"Difference: %f\" % sess.run(diff))\n", 216 | " print(\"Division: %f\" % sess.run(quot))\n", 217 | " print(\"Multiplication: %f\" % sess.run(prod))\n", 218 | " print(\"Matrix prouct: \", sess.run(matrix_prod))\n", 219 | " print(\"Round: %f\" % sess.run(tf.round(a)))\n", 220 | " print(\"Round: %f\" % sess.run(tf.round(b)))" 221 | ], 222 | "outputs": [ 223 | { 224 | "output_type": "stream", 225 | "name": "stdout", 226 | "text": [ 227 | "Sum: 7.100000\n", 228 | "Difference: -1.500000\n", 229 | "Division: 0.651163\n", 230 | "Multiplication: 12.040000\n", 231 | "Matrix prouct: [[21 18 15]\n", 232 | " [51 44 37]\n", 233 | " [81 70 59]]\n", 234 | "Round: 3.000000\n", 235 | "Round: 4.000000\n" 236 | ] 237 | } 238 | ], 239 | "execution_count": 9, 240 | "metadata": {} 241 | }, 242 | { 243 | "cell_type": "code", 244 | "source": [ 245 | "# 2-D tensor\n", 246 | "a = tf.constant([1, 2, 3, 4, 5, 6], shape=[2, 3])\n", 247 | "\n", 248 | "with tf.Session() as sess:\n", 249 | " print(\"2-D tensor:\")\n", 250 | " print(sess.run(a))" 251 | ], 252 | "outputs": [ 253 | { 254 | "output_type": "stream", 255 | "name": "stdout", 256 | "text": [ 257 | "2-D tensor:\n", 258 | "[[1 2 3]\n", 259 | " [4 5 6]]\n" 260 | ] 261 | } 262 | ], 263 | "execution_count": 10, 264 | "metadata": { 265 | "collapsed": false, 266 | "outputHidden": false, 267 | "inputHidden": false 268 | } 269 | }, 270 | { 271 | "cell_type": "code", 272 | "source": [ 273 | "# 2-D tensor\n", 274 | "b = tf.constant([7, 8, 9, 10, 11, 12], shape=[3, 2])\n", 275 | "\n", 276 | "with tf.Session() as sess:\n", 277 | " print(\"2-D tensor:\")\n", 278 | " print(sess.run(b))" 279 | ], 280 | "outputs": [ 281 | { 282 | "output_type": "stream", 283 | "name": "stdout", 284 | "text": [ 285 | "2-D tensor:\n", 286 | "[[ 7 8]\n", 287 | " [ 9 10]\n", 288 | " [11 12]]\n" 289 | ] 290 | } 291 | ], 292 | "execution_count": 11, 293 | "metadata": { 294 | "collapsed": false, 295 | "outputHidden": false, 296 | "inputHidden": false 297 | } 298 | }, 299 | { 300 | "cell_type": "code", 301 | "source": [ 302 | "c = tf.matmul(a, b)\n", 303 | "\n", 304 | "with tf.Session() as sess:\n", 305 | " print(\"a * b:\")\n", 306 | " print(sess.run(c))" 307 | ], 308 | "outputs": [ 309 | { 310 | "output_type": "stream", 311 | "name": "stdout", 312 | "text": [ 313 | "a * b:\n", 314 | "[[ 58 64]\n", 315 | " [139 154]]\n" 316 | ] 317 | } 318 | ], 319 | "execution_count": 12, 320 | "metadata": { 321 | "collapsed": false, 322 | "outputHidden": false, 323 | "inputHidden": false 324 | } 325 | }, 326 | { 327 | "cell_type": "code", 328 | "source": [ 329 | "# 3-D tensor\n", 330 | "import numpy as np\n", 331 | "x = tf.constant(np.arange(1, 13, dtype=np.int32),\n", 332 | " shape=[2, 2, 3])\n", 333 | "\n", 334 | "y = tf.constant(np.arange(13, 25, dtype=np.int32),\n", 335 | " shape=[2, 3, 2])\n", 336 | "\n", 337 | "z = tf.constant(np.arange(1, 13, dtype=np.int32),\n", 338 | " shape=[2, 2, 3])\n", 339 | "\n", 340 | "with tf.Session() as sess:\n", 341 | " print(\"3-D tensor:\")\n", 342 | " print('--------------------')\n", 343 | " print(sess.run(x))\n", 344 | " print('--------------------')\n", 345 | " print(sess.run(y))\n", 346 | " print('--------------------')\n", 347 | " print('3-D Multiplication:')\n", 348 | " print(sess.run(tf.matmul(x,y)))\n", 349 | " print('--------------------')\n", 350 | " print('Dot Product:')\n", 351 | " print(sess.run(tf.tensordot(x,z, 3)))" 352 | ], 353 | "outputs": [ 354 | { 355 | "output_type": "stream", 356 | "name": "stdout", 357 | "text": [ 358 | "3-D tensor:\n", 359 | "--------------------\n", 360 | "[[[ 1 2 3]\n", 361 | " [ 4 5 6]]\n", 362 | "\n", 363 | " [[ 7 8 9]\n", 364 | " [10 11 12]]]\n", 365 | "--------------------\n", 366 | "[[[13 14]\n", 367 | " [15 16]\n", 368 | " [17 18]]\n", 369 | "\n", 370 | " [[19 20]\n", 371 | " [21 22]\n", 372 | " [23 24]]]\n", 373 | "--------------------\n", 374 | "3-D Multiplication:\n", 375 | "[[[ 94 100]\n", 376 | " [229 244]]\n", 377 | "\n", 378 | " [[508 532]\n", 379 | " [697 730]]]\n", 380 | "--------------------\n", 381 | "Dot Product:\n", 382 | "650\n" 383 | ] 384 | } 385 | ], 386 | "execution_count": 13, 387 | "metadata": { 388 | "collapsed": false, 389 | "outputHidden": false, 390 | "inputHidden": false 391 | } 392 | } 393 | ], 394 | "metadata": { 395 | "kernel_info": { 396 | "name": "python3" 397 | }, 398 | "language_info": { 399 | "pygments_lexer": "ipython3", 400 | "nbconvert_exporter": "python", 401 | "codemirror_mode": { 402 | "version": 3, 403 | "name": "ipython" 404 | }, 405 | "version": "3.5.5", 406 | "name": "python", 407 | "file_extension": ".py", 408 | "mimetype": "text/x-python" 409 | }, 410 | "kernelspec": { 411 | "name": "python3", 412 | "language": "python", 413 | "display_name": "Python 3" 414 | }, 415 | "nteract": { 416 | "version": "0.15.0" 417 | } 418 | }, 419 | "nbformat": 4, 420 | "nbformat_minor": 4 421 | } -------------------------------------------------------------------------------- /Title.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LastAncientOne/Deep_Learning_Machine_Learning_Stock/e31e9fc689cd2cb6d74a8f2694750afd6e13bcec/Title.PNG -------------------------------------------------------------------------------- /Variance_Inflation_Factor.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "source": [ 6 | "# Variance inflation Factor (VIF)" 7 | ], 8 | "metadata": { 9 | "nteract": { 10 | "transient": { 11 | "deleting": false 12 | } 13 | } 14 | } 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "source": [ 19 | "In statistics, the variance inflation factor (VIF) is the quotient of the variance in a model with multiple terms by the variance of a model with one term alone. It quantifies the severity of multicollinearity in an ordinary least squares regression analysis. It provides an index that measures how much the variance (the square of the estimate's standard deviation) of an estimated regression coefficient is increased because of collinearity. https://en.wikipedia.org/wiki/Variance_inflation_factor" 20 | ], 21 | "metadata": { 22 | "nteract": { 23 | "transient": { 24 | "deleting": false 25 | } 26 | } 27 | } 28 | }, 29 | { 30 | "cell_type": "code", 31 | "source": [ 32 | "import numpy as np\n", 33 | "import matplotlib.pyplot as plt\n", 34 | "import pandas as pd\n", 35 | "\n", 36 | "import warnings\n", 37 | "warnings.filterwarnings(\"ignore\")\n", 38 | "\n", 39 | "# fetch yahoo data\n", 40 | "import yfinance as yf\n", 41 | "yf.pdr_override()" 42 | ], 43 | "outputs": [], 44 | "execution_count": 1, 45 | "metadata": { 46 | "collapsed": true, 47 | "jupyter": { 48 | "source_hidden": false, 49 | "outputs_hidden": false 50 | }, 51 | "nteract": { 52 | "transient": { 53 | "deleting": false 54 | } 55 | }, 56 | "execution": { 57 | "iopub.status.busy": "2020-08-09T21:00:54.254Z", 58 | "iopub.execute_input": "2020-08-09T21:00:54.261Z", 59 | "iopub.status.idle": "2020-08-09T21:00:55.369Z", 60 | "shell.execute_reply": "2020-08-09T21:00:55.394Z" 61 | } 62 | } 63 | }, 64 | { 65 | "cell_type": "code", 66 | "source": [ 67 | "# input\n", 68 | "symbol = 'AMD'\n", 69 | "start = '2014-01-01'\n", 70 | "end = '2018-08-27'\n", 71 | "\n", 72 | "# Read data \n", 73 | "dataset = yf.download(symbol,start,end)\n", 74 | "\n", 75 | "# Only keep close columns \n", 76 | "dataset.head()" 77 | ], 78 | "outputs": [ 79 | { 80 | "output_type": "stream", 81 | "name": "stdout", 82 | "text": [ 83 | "[*********************100%***********************] 1 of 1 completed\n" 84 | ] 85 | }, 86 | { 87 | "output_type": "execute_result", 88 | "execution_count": 2, 89 | "data": { 90 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Adj CloseCloseHighLowOpenVolume
Date
2014-01-023.953.953.983.843.8520548400
2014-01-034.004.004.003.883.9822887200
2014-01-064.134.134.183.994.0142398300
2014-01-074.184.184.254.114.1942932100
2014-01-084.184.184.264.144.2330678700
\n
", 91 | "text/plain": " Adj Close Close High Low Open Volume\nDate \n2014-01-02 3.95 3.95 3.98 3.84 3.85 20548400\n2014-01-03 4.00 4.00 4.00 3.88 3.98 22887200\n2014-01-06 4.13 4.13 4.18 3.99 4.01 42398300\n2014-01-07 4.18 4.18 4.25 4.11 4.19 42932100\n2014-01-08 4.18 4.18 4.26 4.14 4.23 30678700" 92 | }, 93 | "metadata": {} 94 | } 95 | ], 96 | "execution_count": 2, 97 | "metadata": { 98 | "collapsed": true, 99 | "jupyter": { 100 | "source_hidden": false, 101 | "outputs_hidden": false 102 | }, 103 | "nteract": { 104 | "transient": { 105 | "deleting": false 106 | } 107 | }, 108 | "execution": { 109 | "iopub.status.busy": "2020-08-09T21:00:55.376Z", 110 | "iopub.execute_input": "2020-08-09T21:00:55.383Z", 111 | "iopub.status.idle": "2020-08-09T21:00:56.732Z", 112 | "shell.execute_reply": "2020-08-09T21:00:56.802Z" 113 | } 114 | } 115 | }, 116 | { 117 | "cell_type": "code", 118 | "source": [ 119 | "dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)\n", 120 | "dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)\n", 121 | "dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)\n", 122 | "dataset['Returns'] = dataset['Adj Close'].pct_change()\n", 123 | "dataset = dataset.dropna()" 124 | ], 125 | "outputs": [], 126 | "execution_count": 3, 127 | "metadata": { 128 | "collapsed": true, 129 | "jupyter": { 130 | "source_hidden": false, 131 | "outputs_hidden": false 132 | }, 133 | "nteract": { 134 | "transient": { 135 | "deleting": false 136 | } 137 | }, 138 | "execution": { 139 | "iopub.status.busy": "2020-08-09T21:00:56.741Z", 140 | "iopub.execute_input": "2020-08-09T21:00:56.747Z", 141 | "iopub.status.idle": "2020-08-09T21:00:56.761Z", 142 | "shell.execute_reply": "2020-08-09T21:00:56.806Z" 143 | } 144 | } 145 | }, 146 | { 147 | "cell_type": "code", 148 | "source": [ 149 | "from statsmodels.stats.outliers_influence import variance_inflation_factor" 150 | ], 151 | "outputs": [], 152 | "execution_count": 4, 153 | "metadata": { 154 | "collapsed": true, 155 | "jupyter": { 156 | "source_hidden": false, 157 | "outputs_hidden": false 158 | }, 159 | "nteract": { 160 | "transient": { 161 | "deleting": false 162 | } 163 | }, 164 | "execution": { 165 | "iopub.status.busy": "2020-08-09T21:00:56.774Z", 166 | "iopub.execute_input": "2020-08-09T21:00:56.780Z", 167 | "iopub.status.idle": "2020-08-09T21:00:57.253Z", 168 | "shell.execute_reply": "2020-08-09T21:00:57.355Z" 169 | } 170 | } 171 | }, 172 | { 173 | "cell_type": "code", 174 | "source": [ 175 | "X = dataset" 176 | ], 177 | "outputs": [], 178 | "execution_count": 5, 179 | "metadata": { 180 | "collapsed": true, 181 | "jupyter": { 182 | "source_hidden": false, 183 | "outputs_hidden": false 184 | }, 185 | "nteract": { 186 | "transient": { 187 | "deleting": false 188 | } 189 | }, 190 | "execution": { 191 | "iopub.status.busy": "2020-08-09T21:00:57.264Z", 192 | "iopub.execute_input": "2020-08-09T21:00:57.271Z", 193 | "iopub.status.idle": "2020-08-09T21:00:57.287Z", 194 | "shell.execute_reply": "2020-08-09T21:00:57.358Z" 195 | } 196 | } 197 | }, 198 | { 199 | "cell_type": "code", 200 | "source": [ 201 | "vif = pd.DataFrame()\n", 202 | "vif[\"VIF Factor\"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]\n", 203 | "vif[\"features\"] = X.columns" 204 | ], 205 | "outputs": [], 206 | "execution_count": 6, 207 | "metadata": { 208 | "collapsed": true, 209 | "jupyter": { 210 | "source_hidden": false, 211 | "outputs_hidden": false 212 | }, 213 | "nteract": { 214 | "transient": { 215 | "deleting": false 216 | } 217 | }, 218 | "execution": { 219 | "iopub.status.busy": "2020-08-09T21:00:57.302Z", 220 | "iopub.execute_input": "2020-08-09T21:00:57.307Z", 221 | "iopub.status.idle": "2020-08-09T21:00:57.316Z", 222 | "shell.execute_reply": "2020-08-09T21:00:57.361Z" 223 | } 224 | } 225 | }, 226 | { 227 | "cell_type": "code", 228 | "source": [ 229 | "vif" 230 | ], 231 | "outputs": [ 232 | { 233 | "output_type": "execute_result", 234 | "execution_count": 7, 235 | "data": { 236 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
VIF Factorfeatures
0infAdj Close
1infClose
21.321329e+04High
39.797402e+03Low
48.486690e+03Open
58.249018e+00Volume
61.718758e+00Increase_Decrease
72.503856e+00Buy_Sell_on_Open
81.869328e+00Buy_Sell
91.946329e+00Returns
\n
", 237 | "text/plain": " VIF Factor features\n0 inf Adj Close\n1 inf Close\n2 1.321329e+04 High\n3 9.797402e+03 Low\n4 8.486690e+03 Open\n5 8.249018e+00 Volume\n6 1.718758e+00 Increase_Decrease\n7 2.503856e+00 Buy_Sell_on_Open\n8 1.869328e+00 Buy_Sell\n9 1.946329e+00 Returns" 238 | }, 239 | "metadata": {} 240 | } 241 | ], 242 | "execution_count": 7, 243 | "metadata": { 244 | "collapsed": true, 245 | "jupyter": { 246 | "source_hidden": false, 247 | "outputs_hidden": false 248 | }, 249 | "nteract": { 250 | "transient": { 251 | "deleting": false 252 | } 253 | }, 254 | "execution": { 255 | "iopub.status.busy": "2020-08-09T21:00:57.323Z", 256 | "iopub.execute_input": "2020-08-09T21:00:57.329Z", 257 | "iopub.status.idle": "2020-08-09T21:00:57.341Z", 258 | "shell.execute_reply": "2020-08-09T21:00:57.366Z" 259 | } 260 | } 261 | } 262 | ], 263 | "metadata": { 264 | "kernel_info": { 265 | "name": "python3" 266 | }, 267 | "language_info": { 268 | "file_extension": ".py", 269 | "name": "python", 270 | "nbconvert_exporter": "python", 271 | "version": "3.5.5", 272 | "mimetype": "text/x-python", 273 | "codemirror_mode": { 274 | "version": 3, 275 | "name": "ipython" 276 | }, 277 | "pygments_lexer": "ipython3" 278 | }, 279 | "kernelspec": { 280 | "argv": [ 281 | "C:\\Users\\Tin Hang\\Anaconda3\\envs\\py35\\python.exe", 282 | "-m", 283 | "ipykernel_launcher", 284 | "-f", 285 | "{connection_file}" 286 | ], 287 | "display_name": "Python 3", 288 | "language": "python", 289 | "name": "python3" 290 | }, 291 | "nteract": { 292 | "version": "0.24.1" 293 | } 294 | }, 295 | "nbformat": 4, 296 | "nbformat_minor": 0 297 | } --------------------------------------------------------------------------------