├── 001_Pandas.ipynb
├── 002_Numpy.ipynb
├── 003_Matrix.ipynb
├── 004_Data_PreProcessing.ipynb
├── 005_Pre_Proccessing (Part_2).ipynb
├── 006_Data_Visualization.ipynb
├── 007_Understand_Data.ipynb
├── 008_Basic_Statistics.ipynb
├── ANOVA_F_value.ipynb
├── Array_Selection_Numpy.ipynb
├── Basic_Machine_Learning_Predicts.ipynb
├── Categorical_Continuous.ipynb
├── Chi_Squared.ipynb
├── Column_Selection_Pandas.ipynb
├── DL_Title.PNG
├── Data_Cleaning_for_Machine_Learning.ipynb
├── Descriptive_Statistics.ipynb
├── Discrete_Probability_Distributions.ipynb
├── Drop_Highly_Correlated_Features.ipynb
├── Feature_Importance_Classification.ipynb
├── Feature_Importance_Continuous.ipynb
├── Features_Analysis.ipynb
├── Features_Extraction.ipynb
├── Features_Extraction_with_PCA.ipynb
├── Features_Rank.ipynb
├── Features_Scores.ipynb
├── Features_Selections.ipynb
├── Features_Selections_Stock.ipynb
├── Features_Transformation.ipynb
├── In_Sample_Out_Sample.ipynb
├── LICENSE
├── Linear_Regression_Stock.ipynb
├── Logistic_Regression_Stock.ipynb
├── Metric.ipynb
├── Nested_Cross-Validation_Part2.ipynb
├── NetworkX.ipynb
├── Poisson_Regression.ipynb
├── Principal_Component_Analysis_(PCA).ipynb
├── Principal_Component_Analysis_(PCA)_Stock.ipynb
├── Probabilities.ipynb
├── README.md
├── Scaling_and_Transformations.ipynb
├── Split_Data.ipynb
├── Stationary_Check.ipynb
├── Stationary_Check_Part_2.ipynb
├── Stock_Algorithms
├── 30_Regression_Models.ipynb
├── ARIMA_Models.ipynb
├── AdaBoost_Classification.ipynb
├── AdaBoost_Regressor.ipynb
├── Addictive_Model.ipynb
├── Algorithms.PNG
├── Anomaly_Detection.ipynb
├── Anomaly_Detection_SVM.ipynb
├── Artificial_Neural_Network.ipynb
├── Automatic_Relevance_Determination_Regression.ipynb
├── Bagging_Classifier.ipynb
├── Basic_Machine_Learning_Predicts.ipynb
├── Basic_Machine_Learning_Predicts_Updates.ipynb
├── Basic_Regressions.ipynb
├── Bayesian_Ridge_Regression.ipynb
├── Bayesian_Ridge_Regression_Part2.ipynb
├── Bernoulli_Restricted_Boltzmann_Machine.ipynb
├── Calibrate_Predicted_Probabilities.ipynb
├── CatBoost_Algorithms.ipynb
├── CatBoost_Algorithms_Part2.ipynb
├── Classification_Cluster.ipynb
├── Classification_Cluster_2.ipynb
├── Classification_Cluster_3.ipynb
├── Convolutional_Neural_Network.ipynb
├── Convolutional_Neural_Networks_Keras.ipynb
├── Cox_Proportional_Hazards.ipynb
├── Decision_Tree_Classifier_Visualize.ipynb
├── Decision_Tree_Multioutput_Regression.ipynb
├── Decision_Trees_Classification.ipynb
├── Decision_Trees_Classification_Explained.ipynb
├── Decision_Trees_Classification_Part2.ipynb
├── Decision_Trees_Classification_Part3.ipynb
├── Decision_Trees_Classification_Part4.ipynb
├── Decision_Trees_Classification_Part5.ipynb
├── Decision_Trees_Regression.ipynb
├── Decision_Trees_Regression_Part2.ipynb
├── Deep_Belief_Networks.ipynb
├── ElasticNet_Regression.ipynb
├── Fast_Fourier_Transformations.ipynb
├── Fixed_Effects_Model.ipynb
├── Gaussian_Regression.ipynb
├── Genetic_Algorithm.ipynb
├── Genetic_Algorithm_Part2.ipynb
├── Gradient_Ascent.ipynb
├── Gradient_Boosting_Classification.ipynb
├── Gradient_Boosting_Machine_(GBM).ipynb
├── Gradient_Boosting_Regressor.ipynb
├── Hierarchical_Clustering.ipynb
├── Huber_Regression.ipynb
├── Huber_Regression_Part2.ipynb
├── Hyperparameter_Tuning.ipynb
├── Implementing_Logistic_Regression.ipynb
├── Isotonic_Regression.ipynb
├── Isotonic_Regression_Linear_Regression.ipynb
├── K_Means.ipynb
├── K_Means_Clustering.ipynb
├── K_Means_Clustering_Part2.ipynb
├── K_Nearest_Neighbors.ipynb
├── K_Nearest_Neighbors_Multioutput_Regression.ipynb
├── K_Nearest_Neighbors_Part2.ipynb
├── LSTM_Neural_Networks.ipynb
├── LSTM_RNN.ipynb
├── LSTM_RNN_Part2.ipynb
├── Lasso_Regression.ipynb
├── Lasso_Regression_Alpha_Levels.ipynb
├── Lasso_Regression_Part2.ipynb
├── Lasso_Ridge_Regression.ipynb
├── Least_Angled_Regression.ipynb
├── Least_Squares_Regression.ipynb
├── Leave_One_Out_Cross_Validation.ipynb
├── Light_GBM.ipynb
├── Linear_Discriminant_Analysis.ipynb
├── Linear_Discriminant_Analysis_Classification.ipynb
├── Linear_Regression.ipynb
├── Linear_Regression_Classification.ipynb
├── Linear_Regression_Continuous.ipynb
├── Linear_Regression_Multioutput_Regression.ipynb
├── Linear_Regression_Predict_Future_Price.ipynb
├── Linear_Regression_Prediction.ipynb
├── Linear_Regression_Prediction_Part2.ipynb
├── Linear_Regression_Prediction_Part3.ipynb
├── Linear_Regression_Using_Linear_Algebra.ipynb
├── Linear_Regression_with_Normalize_Data.ipynb
├── Locally_Estimated_Scatterplot_Smoothing.ipynb
├── Locally_Weighted_Scatterplot_Smoothing_LOWESS.ipynb
├── Logistic_Model.ipynb
├── Logistic_Regression.ipynb
├── Logistic_Regression_Classification.ipynb
├── Logistic_Regression_Classification_Part2.ipynb
├── Logistic_Regression_Classification_Part3.ipynb
├── Logistic_Regression_Classification_Part4.ipynb
├── Logistic_Regression_Large_Data.ipynb
├── Logistic_Regression_Part2.ipynb
├── Mini-Batch_k-Means_Clustering.ipynb
├── Model_Selection.ipynb
├── MultiOutputRegressor.ipynb
├── Multioutput_Regression_With_Cross-Validation.ipynb
├── Multiple_Linear_Regression.ipynb
├── Multiple_Linear_Regression_Part2.ipynb
├── Multiple_Linear_Regression_with_Normalize_Data.ipynb
├── Multivariate_Adaptive_Regression_Splines.ipynb
├── Multivariate_Adaptive_Regression_Splines_Part2.ipynb
├── Multivariate_relationships.ipynb
├── Naive_Bayes_Classification.ipynb
├── Naive_Bayes_Multinomial_Classification.ipynb
├── Nearest_Neighbor_Classification.ipynb
├── Nested_Cross-Validation.ipynb
├── Nested_Cross-Validation_Part2.ipynb
├── NetworkX.ipynb
├── NetworkX_Part2.ipynb
├── Neural_Network_ANN.ipynb
├── Neural_Network_Part2.ipynb
├── Neural_Networks_Classification.ipynb
├── Neural_Networks_Regression.ipynb
├── Non_Linear_Least_Squares_Curve_Fitting.ipynb
├── Optimization_Parameters.ipynb
├── Ordinal_Regression.ipynb
├── Partial_Least_Squares_Regression_(PLSR).ipynb
├── Passive_Aggressive_Classification.ipynb
├── Passive_Aggressive_Classifier.ipynb
├── Passive_Aggressive_Regression.ipynb
├── Perceptron_Algorithm.ipynb
├── Polynomial_Regression.ipynb
├── Polynomial_Regression_Part2.ipynb
├── Polynomial_Regression_Part3.ipynb
├── Principal_Component_Classification.ipynb
├── Principal_Component_Regression.ipynb
├── PyBrain_Dataset.ipynb
├── PyCaret_Stock_Prediction.ipynb
├── PyCaret_Stock_Prediction_Part2.ipynb
├── PyTorch_Linear_Regression.ipynb
├── PyTorch_Regression.ipynb
├── Pynamical_Prediction.ipynb
├── Quantile_Regression.ipynb
├── Quantile_Regression_Part2.ipynb
├── Quasi_Poisson_Regression.ipynb
├── Quasi_Poisson_Regression_Part2.ipynb
├── RANSAC_Regression.ipynb
├── README.md
├── RNN_Tensorflow.ipynb
├── Radius_Neighbors_Regressor.ipynb
├── Random_Forests_Classification.ipynb
├── Random_Forests_Classification_Part2.ipynb
├── Random_Forests_Multioutput_Regression.ipynb
├── Random_Forests_Regression.ipynb
├── Regressor_Chain.ipynb
├── Ridge_Regression.ipynb
├── Robust_Linear_Models.ipynb
├── SMOTE_Near_Miss_Algorithm.ipynb
├── SVC_Predicted_Probabilities.ipynb
├── Simple_Linear_Regression.ipynb
├── Simple_Linear_Regression_Part2.ipynb
├── Simple_Linear_Regression_with_Normalize_Data.ipynb
├── Simple_Multiple_Linear_Regression.ipynb
├── Stepwise_Regression_Backward.ipynb
├── Stepwise_Regression_Forward.ipynb
├── Stochastic_Gradient_Descent_Classification.ipynb
├── Stochastic_Gradient_Descent_Regression.ipynb
├── Stochastic_Gradient_Descent_Regression_Part2.ipynb
├── Support_Vector_Classifiers.ipynb
├── Support_Vector_Machine.ipynb
├── Support_Vector_Machine_Part2.ipynb
├── TensorFlow_LinearRegression2.ipynb
├── TensorFlow_LinearRegressionSingle.ipynb
├── TensorFlow_LinearRegression_Basic.ipynb
├── Theil_Sen_Regression.ipynb
├── Time_Series_Decomposition_Random_Walks.ipynb
├── Time_Series_Forecasting.ipynb
├── Time_Series_Forecasting_Model.ipynb
├── TruncatedSVD.ipynb
├── Tweedie_Regression.ipynb
├── XGBoost_Algorithms.ipynb
├── XGBoost_Classification.ipynb
├── XGBoost_Classification_Part_2.ipynb
├── XGBoost_Regression.ipynb
├── XGBoost_Regressor.ipynb
├── XGBoost_Regressor_Part_2.ipynb
├── scikit-learn_Prediction.ipynb
├── shap_prediction.ipynb
├── t_SNE.ipynb
├── t_SNE_Part2.ipynb
└── t_SNE_Part3.ipynb
├── Stock_Apps
├── README.md
├── Stock_Apps.PNG
├── Stock_ML_Data_PreProcessing_Apps.py
├── Stock_ML_Feature_Selection_Apps.py
├── Stock_ML_Predict_Apps.py
├── Stock_ML_Predict_Apps_Menu.py
└── Stock_ML_Rescale_Data_Apps.py
├── Tensorflow_Basics.ipynb
├── Title.PNG
├── Train_Test_Split.ipynb
├── Train_Validate_Test.ipynb
├── Underfitting_Overfitting_Check_Regression.ipynb
├── Understand_Data.ipynb
└── Variance_Inflation_Factor.ipynb
/004_Data_PreProcessing.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "source": [
6 | "# Data PreProcessing"
7 | ],
8 | "metadata": {}
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "source": [
13 | "Step 1: Importing the libraries"
14 | ],
15 | "metadata": {}
16 | },
17 | {
18 | "cell_type": "code",
19 | "source": [
20 | "import numpy as np\n",
21 | "import pandas as pd\n",
22 | "\n",
23 | "import warnings\n",
24 | "warnings.filterwarnings(\"ignore\")\n",
25 | "\n",
26 | "# fix_yahoo_finance is used to fetch data \n",
27 | "import fix_yahoo_finance as yf\n",
28 | "yf.pdr_override()"
29 | ],
30 | "outputs": [],
31 | "execution_count": 1,
32 | "metadata": {
33 | "collapsed": false,
34 | "outputHidden": false,
35 | "inputHidden": false
36 | }
37 | },
38 | {
39 | "cell_type": "markdown",
40 | "source": [
41 | "Step 2: Importing dataset"
42 | ],
43 | "metadata": {}
44 | },
45 | {
46 | "cell_type": "code",
47 | "source": [
48 | "# input\n",
49 | "symbol = 'AMD'\n",
50 | "start = '2014-01-01'\n",
51 | "end = '2018-08-27'\n",
52 | "\n",
53 | "# Read data \n",
54 | "dataset = yf.download(symbol,start,end)\n",
55 | "\n",
56 | "# Only keep close columns \n",
57 | "dataset.head()"
58 | ],
59 | "outputs": [
60 | {
61 | "output_type": "stream",
62 | "name": "stdout",
63 | "text": [
64 | "[*********************100%***********************] 1 of 1 downloaded\n"
65 | ]
66 | },
67 | {
68 | "output_type": "execute_result",
69 | "execution_count": 2,
70 | "data": {
71 | "text/plain": [
72 | " Open High Low Close Adj Close Volume\n",
73 | "Date \n",
74 | "2014-01-02 3.85 3.98 3.84 3.95 3.95 20548400\n",
75 | "2014-01-03 3.98 4.00 3.88 4.00 4.00 22887200\n",
76 | "2014-01-06 4.01 4.18 3.99 4.13 4.13 42398300\n",
77 | "2014-01-07 4.19 4.25 4.11 4.18 4.18 42932100\n",
78 | "2014-01-08 4.23 4.26 4.14 4.18 4.18 30678700"
79 | ],
80 | "text/html": [
81 | "
\n",
82 | "\n",
95 | "
\n",
96 | " \n",
97 | " \n",
98 | " | \n",
99 | " Open | \n",
100 | " High | \n",
101 | " Low | \n",
102 | " Close | \n",
103 | " Adj Close | \n",
104 | " Volume | \n",
105 | "
\n",
106 | " \n",
107 | " Date | \n",
108 | " | \n",
109 | " | \n",
110 | " | \n",
111 | " | \n",
112 | " | \n",
113 | " | \n",
114 | "
\n",
115 | " \n",
116 | " \n",
117 | " \n",
118 | " 2014-01-02 | \n",
119 | " 3.85 | \n",
120 | " 3.98 | \n",
121 | " 3.84 | \n",
122 | " 3.95 | \n",
123 | " 3.95 | \n",
124 | " 20548400 | \n",
125 | "
\n",
126 | " \n",
127 | " 2014-01-03 | \n",
128 | " 3.98 | \n",
129 | " 4.00 | \n",
130 | " 3.88 | \n",
131 | " 4.00 | \n",
132 | " 4.00 | \n",
133 | " 22887200 | \n",
134 | "
\n",
135 | " \n",
136 | " 2014-01-06 | \n",
137 | " 4.01 | \n",
138 | " 4.18 | \n",
139 | " 3.99 | \n",
140 | " 4.13 | \n",
141 | " 4.13 | \n",
142 | " 42398300 | \n",
143 | "
\n",
144 | " \n",
145 | " 2014-01-07 | \n",
146 | " 4.19 | \n",
147 | " 4.25 | \n",
148 | " 4.11 | \n",
149 | " 4.18 | \n",
150 | " 4.18 | \n",
151 | " 42932100 | \n",
152 | "
\n",
153 | " \n",
154 | " 2014-01-08 | \n",
155 | " 4.23 | \n",
156 | " 4.26 | \n",
157 | " 4.14 | \n",
158 | " 4.18 | \n",
159 | " 4.18 | \n",
160 | " 30678700 | \n",
161 | "
\n",
162 | " \n",
163 | "
\n",
164 | "
"
165 | ]
166 | },
167 | "metadata": {}
168 | }
169 | ],
170 | "execution_count": 2,
171 | "metadata": {
172 | "collapsed": false,
173 | "outputHidden": false,
174 | "inputHidden": false
175 | }
176 | },
177 | {
178 | "cell_type": "code",
179 | "source": [
180 | "X = dataset.iloc[ : , :-1].values\n",
181 | "Y = dataset.iloc[ : , 3].values"
182 | ],
183 | "outputs": [],
184 | "execution_count": 3,
185 | "metadata": {
186 | "collapsed": false,
187 | "outputHidden": false,
188 | "inputHidden": false
189 | }
190 | },
191 | {
192 | "cell_type": "markdown",
193 | "source": [
194 | "Step 3: Handling the missing data"
195 | ],
196 | "metadata": {}
197 | },
198 | {
199 | "cell_type": "code",
200 | "source": [
201 | "from sklearn.preprocessing import Imputer\n",
202 | "imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\", axis = 0)\n",
203 | "imputer = imputer.fit(X[ : , 1:3])\n",
204 | "X[ : , 1:3] = imputer.transform(X[ : , 1:3])"
205 | ],
206 | "outputs": [],
207 | "execution_count": 6,
208 | "metadata": {
209 | "collapsed": false,
210 | "outputHidden": false,
211 | "inputHidden": false
212 | }
213 | },
214 | {
215 | "cell_type": "markdown",
216 | "source": [
217 | "Step 4: Encoding categorical data"
218 | ],
219 | "metadata": {}
220 | },
221 | {
222 | "cell_type": "code",
223 | "source": [
224 | "from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n",
225 | "labelencoder_X = LabelEncoder()\n",
226 | "X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])"
227 | ],
228 | "outputs": [],
229 | "execution_count": 7,
230 | "metadata": {
231 | "collapsed": false,
232 | "outputHidden": false,
233 | "inputHidden": false
234 | }
235 | },
236 | {
237 | "cell_type": "code",
238 | "source": [
239 | "# Creating a dummy variable\n",
240 | "onehotencoder = OneHotEncoder(categorical_features = [0])\n",
241 | "X = onehotencoder.fit_transform(X).toarray()\n",
242 | "labelencoder_Y = LabelEncoder()\n",
243 | "Y = labelencoder_Y.fit_transform(Y)"
244 | ],
245 | "outputs": [],
246 | "execution_count": 8,
247 | "metadata": {
248 | "collapsed": false,
249 | "outputHidden": false,
250 | "inputHidden": false
251 | }
252 | },
253 | {
254 | "cell_type": "markdown",
255 | "source": [
256 | "Step 5: Splitting the datasets into training sets and Test sets"
257 | ],
258 | "metadata": {}
259 | },
260 | {
261 | "cell_type": "code",
262 | "source": [
263 | "from sklearn.cross_validation import train_test_split\n",
264 | "X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)"
265 | ],
266 | "outputs": [
267 | {
268 | "output_type": "stream",
269 | "name": "stderr",
270 | "text": [
271 | "C:\\Users\\Tin Hang\\Anaconda3\\envs\\py35\\lib\\site-packages\\sklearn\\cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
272 | " \"This module will be removed in 0.20.\", DeprecationWarning)\n"
273 | ]
274 | }
275 | ],
276 | "execution_count": 9,
277 | "metadata": {
278 | "collapsed": false,
279 | "outputHidden": false,
280 | "inputHidden": false
281 | }
282 | },
283 | {
284 | "cell_type": "markdown",
285 | "source": [
286 | "Step 6: Feature Scaling"
287 | ],
288 | "metadata": {}
289 | },
290 | {
291 | "cell_type": "code",
292 | "source": [
293 | "from sklearn.preprocessing import StandardScaler\n",
294 | "sc_X = StandardScaler()\n",
295 | "X_train = sc_X.fit_transform(X_train)\n",
296 | "X_test = sc_X.fit_transform(X_test)"
297 | ],
298 | "outputs": [],
299 | "execution_count": 10,
300 | "metadata": {
301 | "collapsed": false,
302 | "outputHidden": false,
303 | "inputHidden": false
304 | }
305 | }
306 | ],
307 | "metadata": {
308 | "kernel_info": {
309 | "name": "python3"
310 | },
311 | "language_info": {
312 | "file_extension": ".py",
313 | "nbconvert_exporter": "python",
314 | "version": "3.5.5",
315 | "mimetype": "text/x-python",
316 | "pygments_lexer": "ipython3",
317 | "codemirror_mode": {
318 | "version": 3,
319 | "name": "ipython"
320 | },
321 | "name": "python"
322 | },
323 | "kernelspec": {
324 | "name": "python3",
325 | "language": "python",
326 | "display_name": "Python 3"
327 | },
328 | "nteract": {
329 | "version": "0.11.9"
330 | }
331 | },
332 | "nbformat": 4,
333 | "nbformat_minor": 4
334 | }
--------------------------------------------------------------------------------
/Basic_Machine_Learning_Predicts.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "source": [
6 | "# Simple Linear Regression for stock using scikit-learn\n"
7 | ],
8 | "metadata": {}
9 | },
10 | {
11 | "cell_type": "code",
12 | "source": [
13 | "import pandas as pd\n",
14 | "import numpy as np\n",
15 | "import matplotlib.pyplot as plt\n",
16 | "import math\n",
17 | "import seaborn as sns\n",
18 | "%matplotlib inline\n",
19 | "\n",
20 | "import warnings\n",
21 | "warnings.filterwarnings(\"ignore\")\n",
22 | "\n",
23 | "import fix_yahoo_finance as yf\n",
24 | "yf.pdr_override()"
25 | ],
26 | "outputs": [],
27 | "execution_count": 1,
28 | "metadata": {
29 | "collapsed": false,
30 | "outputHidden": false,
31 | "inputHidden": false
32 | }
33 | },
34 | {
35 | "cell_type": "code",
36 | "source": [
37 | "stock = 'AAPL'\n",
38 | "start = '2016-01-01' \n",
39 | "end = '2018-01-01'\n",
40 | "data = yf.download(stock, start, end)\n",
41 | "data.head()"
42 | ],
43 | "outputs": [
44 | {
45 | "output_type": "stream",
46 | "name": "stdout",
47 | "text": [
48 | "[*********************100%***********************] 1 of 1 downloaded\n"
49 | ]
50 | },
51 | {
52 | "output_type": "execute_result",
53 | "execution_count": 2,
54 | "data": {
55 | "text/plain": " Open High Low Close Adj Close \\\nDate \n2016-01-04 102.610001 105.370003 102.000000 105.349998 100.274513 \n2016-01-05 105.750000 105.849998 102.410004 102.709999 97.761681 \n2016-01-06 100.559998 102.370003 99.870003 100.699997 95.848511 \n2016-01-07 98.680000 100.129997 96.430000 96.449997 91.803276 \n2016-01-08 98.550003 99.110001 96.760002 96.959999 92.288696 \n\n Volume \nDate \n2016-01-04 67649400 \n2016-01-05 55791000 \n2016-01-06 68457400 \n2016-01-07 81094400 \n2016-01-08 70798000 ",
56 | "text/html": "\n\n
\n \n \n | \n Open | \n High | \n Low | \n Close | \n Adj Close | \n Volume | \n
\n \n Date | \n | \n | \n | \n | \n | \n | \n
\n \n \n \n 2016-01-04 | \n 102.610001 | \n 105.370003 | \n 102.000000 | \n 105.349998 | \n 100.274513 | \n 67649400 | \n
\n \n 2016-01-05 | \n 105.750000 | \n 105.849998 | \n 102.410004 | \n 102.709999 | \n 97.761681 | \n 55791000 | \n
\n \n 2016-01-06 | \n 100.559998 | \n 102.370003 | \n 99.870003 | \n 100.699997 | \n 95.848511 | \n 68457400 | \n
\n \n 2016-01-07 | \n 98.680000 | \n 100.129997 | \n 96.430000 | \n 96.449997 | \n 91.803276 | \n 81094400 | \n
\n \n 2016-01-08 | \n 98.550003 | \n 99.110001 | \n 96.760002 | \n 96.959999 | \n 92.288696 | \n 70798000 | \n
\n \n
\n
"
57 | },
58 | "metadata": {}
59 | }
60 | ],
61 | "execution_count": 2,
62 | "metadata": {
63 | "collapsed": false,
64 | "outputHidden": false,
65 | "inputHidden": false
66 | }
67 | },
68 | {
69 | "cell_type": "code",
70 | "source": [
71 | "df = data.reset_index()\n",
72 | "df.head()"
73 | ],
74 | "outputs": [
75 | {
76 | "output_type": "execute_result",
77 | "execution_count": 3,
78 | "data": {
79 | "text/plain": " Date Open High Low Close Adj Close \\\n0 2016-01-04 102.610001 105.370003 102.000000 105.349998 100.274513 \n1 2016-01-05 105.750000 105.849998 102.410004 102.709999 97.761681 \n2 2016-01-06 100.559998 102.370003 99.870003 100.699997 95.848511 \n3 2016-01-07 98.680000 100.129997 96.430000 96.449997 91.803276 \n4 2016-01-08 98.550003 99.110001 96.760002 96.959999 92.288696 \n\n Volume \n0 67649400 \n1 55791000 \n2 68457400 \n3 81094400 \n4 70798000 ",
80 | "text/html": "\n\n
\n \n \n | \n Date | \n Open | \n High | \n Low | \n Close | \n Adj Close | \n Volume | \n
\n \n \n \n 0 | \n 2016-01-04 | \n 102.610001 | \n 105.370003 | \n 102.000000 | \n 105.349998 | \n 100.274513 | \n 67649400 | \n
\n \n 1 | \n 2016-01-05 | \n 105.750000 | \n 105.849998 | \n 102.410004 | \n 102.709999 | \n 97.761681 | \n 55791000 | \n
\n \n 2 | \n 2016-01-06 | \n 100.559998 | \n 102.370003 | \n 99.870003 | \n 100.699997 | \n 95.848511 | \n 68457400 | \n
\n \n 3 | \n 2016-01-07 | \n 98.680000 | \n 100.129997 | \n 96.430000 | \n 96.449997 | \n 91.803276 | \n 81094400 | \n
\n \n 4 | \n 2016-01-08 | \n 98.550003 | \n 99.110001 | \n 96.760002 | \n 96.959999 | \n 92.288696 | \n 70798000 | \n
\n \n
\n
"
81 | },
82 | "metadata": {}
83 | }
84 | ],
85 | "execution_count": 3,
86 | "metadata": {
87 | "collapsed": false,
88 | "outputHidden": false,
89 | "inputHidden": false
90 | }
91 | },
92 | {
93 | "cell_type": "code",
94 | "source": [
95 | "X = df.drop(['Date','Close'], axis=1, inplace=True)\n",
96 | "y = df[['Adj Close']]"
97 | ],
98 | "outputs": [],
99 | "execution_count": 4,
100 | "metadata": {
101 | "collapsed": false,
102 | "outputHidden": false,
103 | "inputHidden": false
104 | }
105 | },
106 | {
107 | "cell_type": "code",
108 | "source": [
109 | "df = df.as_matrix()"
110 | ],
111 | "outputs": [],
112 | "execution_count": 5,
113 | "metadata": {
114 | "collapsed": false,
115 | "outputHidden": false,
116 | "inputHidden": false
117 | }
118 | },
119 | {
120 | "cell_type": "code",
121 | "source": [
122 | "from sklearn.model_selection import train_test_split\n",
123 | "\n",
124 | "# Split X and y into X_\n",
125 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)"
126 | ],
127 | "outputs": [],
128 | "execution_count": 6,
129 | "metadata": {
130 | "collapsed": false,
131 | "outputHidden": false,
132 | "inputHidden": false
133 | }
134 | },
135 | {
136 | "cell_type": "code",
137 | "source": [
138 | "from sklearn.linear_model import LinearRegression\n",
139 | "\n",
140 | "regression_model = LinearRegression()\n",
141 | "regression_model.fit(X_train, y_train)"
142 | ],
143 | "outputs": [
144 | {
145 | "output_type": "execute_result",
146 | "execution_count": 7,
147 | "data": {
148 | "text/plain": "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)"
149 | },
150 | "metadata": {}
151 | }
152 | ],
153 | "execution_count": 7,
154 | "metadata": {
155 | "collapsed": false,
156 | "outputHidden": false,
157 | "inputHidden": false
158 | }
159 | },
160 | {
161 | "cell_type": "code",
162 | "source": [
163 | "intercept = regression_model.intercept_[0]\n",
164 | "\n",
165 | "print(\"The intercept for our model is {}\".format(intercept))"
166 | ],
167 | "outputs": [
168 | {
169 | "output_type": "stream",
170 | "name": "stdout",
171 | "text": [
172 | "The intercept for our model is -1.2047109976265347e-09\n"
173 | ]
174 | }
175 | ],
176 | "execution_count": 8,
177 | "metadata": {
178 | "collapsed": false,
179 | "outputHidden": false,
180 | "inputHidden": false
181 | }
182 | },
183 | {
184 | "cell_type": "code",
185 | "source": [
186 | "regression_model.score(X_test, y_test)"
187 | ],
188 | "outputs": [
189 | {
190 | "output_type": "execute_result",
191 | "execution_count": 9,
192 | "data": {
193 | "text/plain": "1.0"
194 | },
195 | "metadata": {}
196 | }
197 | ],
198 | "execution_count": 9,
199 | "metadata": {
200 | "collapsed": false,
201 | "outputHidden": false,
202 | "inputHidden": false
203 | }
204 | },
205 | {
206 | "cell_type": "code",
207 | "source": [
208 | "from sklearn.metrics import mean_squared_error\n",
209 | "\n",
210 | "y_predict = regression_model.predict(X_test)\n",
211 | "\n",
212 | "regression_model_mse = mean_squared_error(y_predict, y_test)\n",
213 | "\n",
214 | "regression_model_mse"
215 | ],
216 | "outputs": [
217 | {
218 | "output_type": "execute_result",
219 | "execution_count": 10,
220 | "data": {
221 | "text/plain": "2.8264629110010686e-19"
222 | },
223 | "metadata": {}
224 | }
225 | ],
226 | "execution_count": 10,
227 | "metadata": {
228 | "collapsed": false,
229 | "outputHidden": false,
230 | "inputHidden": false
231 | }
232 | },
233 | {
234 | "cell_type": "code",
235 | "source": [
236 | "math.sqrt(regression_model_mse)"
237 | ],
238 | "outputs": [
239 | {
240 | "output_type": "execute_result",
241 | "execution_count": 11,
242 | "data": {
243 | "text/plain": "5.316448919157475e-10"
244 | },
245 | "metadata": {}
246 | }
247 | ],
248 | "execution_count": 11,
249 | "metadata": {
250 | "collapsed": false,
251 | "outputHidden": false,
252 | "inputHidden": false
253 | }
254 | },
255 | {
256 | "cell_type": "code",
257 | "source": [
258 | "# input the latest Open, High, Low, Close, Volume\n",
259 | "# predicts the next day price\n",
260 | "regression_model.predict([[167.81, 171.75, 165.19, 166.48, 37232900]])"
261 | ],
262 | "outputs": [
263 | {
264 | "output_type": "execute_result",
265 | "execution_count": 12,
266 | "data": {
267 | "text/plain": "array([[166.48]])"
268 | },
269 | "metadata": {}
270 | }
271 | ],
272 | "execution_count": 12,
273 | "metadata": {
274 | "collapsed": false,
275 | "outputHidden": false,
276 | "inputHidden": false
277 | }
278 | }
279 | ],
280 | "metadata": {
281 | "kernel_info": {
282 | "name": "python3"
283 | },
284 | "kernelspec": {
285 | "name": "python3",
286 | "language": "python",
287 | "display_name": "Python 3"
288 | },
289 | "language_info": {
290 | "file_extension": ".py",
291 | "pygments_lexer": "ipython3",
292 | "version": "3.5.5",
293 | "mimetype": "text/x-python",
294 | "codemirror_mode": {
295 | "version": 3,
296 | "name": "ipython"
297 | },
298 | "name": "python",
299 | "nbconvert_exporter": "python"
300 | },
301 | "nteract": {
302 | "version": "0.28.0"
303 | }
304 | },
305 | "nbformat": 4,
306 | "nbformat_minor": 4
307 | }
--------------------------------------------------------------------------------
/DL_Title.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LastAncientOne/Deep_Learning_Machine_Learning_Stock/e31e9fc689cd2cb6d74a8f2694750afd6e13bcec/DL_Title.PNG
--------------------------------------------------------------------------------
/Data_Cleaning_for_Machine_Learning.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "source": [
6 | "# Data Cleaning for Machine Learning with Python"
7 | ],
8 | "metadata": {
9 | "nteract": {
10 | "transient": {
11 | "deleting": false
12 | }
13 | }
14 | }
15 | },
16 | {
17 | "cell_type": "code",
18 | "source": [
19 | "import numpy as np\n",
20 | "import matplotlib.pyplot as plt\n",
21 | "import pandas as pd\n",
22 | "\n",
23 | "import warnings\n",
24 | "warnings.filterwarnings(\"ignore\")\n",
25 | "\n",
26 | "# fetch yahoo data\n",
27 | "import yfinance as yf\n",
28 | "yf.pdr_override()"
29 | ],
30 | "outputs": [],
31 | "execution_count": 1,
32 | "metadata": {
33 | "collapsed": true,
34 | "jupyter": {
35 | "source_hidden": false,
36 | "outputs_hidden": false
37 | },
38 | "nteract": {
39 | "transient": {
40 | "deleting": false
41 | }
42 | },
43 | "execution": {
44 | "iopub.status.busy": "2020-08-21T01:15:24.606Z",
45 | "iopub.execute_input": "2020-08-21T01:15:24.613Z",
46 | "iopub.status.idle": "2020-08-21T01:15:25.604Z",
47 | "shell.execute_reply": "2020-08-21T01:15:25.631Z"
48 | }
49 | }
50 | },
51 | {
52 | "cell_type": "code",
53 | "source": [
54 | "# input\n",
55 | "symbol = 'AMD'\n",
56 | "start = '2014-01-01'\n",
57 | "end = '2018-08-27'\n",
58 | "\n",
59 | "# Read data \n",
60 | "dataset = yf.download(symbol,start,end)\n",
61 | "\n",
62 | "# Only keep close columns \n",
63 | "dataset.head()"
64 | ],
65 | "outputs": [
66 | {
67 | "output_type": "stream",
68 | "name": "stdout",
69 | "text": [
70 | "[*********************100%***********************] 1 of 1 completed\n"
71 | ]
72 | },
73 | {
74 | "output_type": "execute_result",
75 | "execution_count": 2,
76 | "data": {
77 | "text/plain": " Adj Close Close High Low Open Volume\nDate \n2014-01-02 3.95 3.95 3.98 3.84 3.85 20548400\n2014-01-03 4.00 4.00 4.00 3.88 3.98 22887200\n2014-01-06 4.13 4.13 4.18 3.99 4.01 42398300\n2014-01-07 4.18 4.18 4.25 4.11 4.19 42932100\n2014-01-08 4.18 4.18 4.26 4.14 4.23 30678700",
78 | "text/html": "\n\n
\n \n \n | \n Adj Close | \n Close | \n High | \n Low | \n Open | \n Volume | \n
\n \n Date | \n | \n | \n | \n | \n | \n | \n
\n \n \n \n 2014-01-02 | \n 3.95 | \n 3.95 | \n 3.98 | \n 3.84 | \n 3.85 | \n 20548400 | \n
\n \n 2014-01-03 | \n 4.00 | \n 4.00 | \n 4.00 | \n 3.88 | \n 3.98 | \n 22887200 | \n
\n \n 2014-01-06 | \n 4.13 | \n 4.13 | \n 4.18 | \n 3.99 | \n 4.01 | \n 42398300 | \n
\n \n 2014-01-07 | \n 4.18 | \n 4.18 | \n 4.25 | \n 4.11 | \n 4.19 | \n 42932100 | \n
\n \n 2014-01-08 | \n 4.18 | \n 4.18 | \n 4.26 | \n 4.14 | \n 4.23 | \n 30678700 | \n
\n \n
\n
"
79 | },
80 | "metadata": {}
81 | }
82 | ],
83 | "execution_count": 2,
84 | "metadata": {
85 | "collapsed": true,
86 | "jupyter": {
87 | "source_hidden": false,
88 | "outputs_hidden": false
89 | },
90 | "nteract": {
91 | "transient": {
92 | "deleting": false
93 | }
94 | },
95 | "execution": {
96 | "iopub.status.busy": "2020-08-21T01:15:25.614Z",
97 | "iopub.execute_input": "2020-08-21T01:15:25.621Z",
98 | "iopub.status.idle": "2020-08-21T01:15:26.860Z",
99 | "shell.execute_reply": "2020-08-21T01:15:27.073Z"
100 | }
101 | }
102 | },
103 | {
104 | "cell_type": "code",
105 | "source": [
106 | "dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)\n",
107 | "dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)\n",
108 | "dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)\n",
109 | "dataset['Returns'] = dataset['Adj Close'].pct_change()\n",
110 | "dataset = dataset.dropna()"
111 | ],
112 | "outputs": [],
113 | "execution_count": 3,
114 | "metadata": {
115 | "collapsed": true,
116 | "jupyter": {
117 | "source_hidden": false,
118 | "outputs_hidden": false
119 | },
120 | "nteract": {
121 | "transient": {
122 | "deleting": false
123 | }
124 | },
125 | "execution": {
126 | "iopub.status.busy": "2020-08-21T01:15:26.872Z",
127 | "iopub.execute_input": "2020-08-21T01:15:26.882Z",
128 | "iopub.status.idle": "2020-08-21T01:15:26.899Z",
129 | "shell.execute_reply": "2020-08-21T01:15:27.079Z"
130 | }
131 | }
132 | },
133 | {
134 | "cell_type": "code",
135 | "source": [
136 | "# summarize the number of unique values in each column\n",
137 | "print(dataset.nunique())"
138 | ],
139 | "outputs": [
140 | {
141 | "output_type": "stream",
142 | "name": "stdout",
143 | "text": [
144 | "Adj Close 657\n",
145 | "Close 657\n",
146 | "High 644\n",
147 | "Low 626\n",
148 | "Open 638\n",
149 | "Volume 1168\n",
150 | "Increase_Decrease 2\n",
151 | "Buy_Sell_on_Open 2\n",
152 | "Buy_Sell 2\n",
153 | "Returns 1078\n",
154 | "dtype: int64\n"
155 | ]
156 | }
157 | ],
158 | "execution_count": 4,
159 | "metadata": {
160 | "collapsed": true,
161 | "jupyter": {
162 | "source_hidden": false,
163 | "outputs_hidden": false
164 | },
165 | "nteract": {
166 | "transient": {
167 | "deleting": false
168 | }
169 | },
170 | "execution": {
171 | "iopub.status.busy": "2020-08-21T01:15:26.915Z",
172 | "iopub.execute_input": "2020-08-21T01:15:26.922Z",
173 | "iopub.status.idle": "2020-08-21T01:15:26.938Z",
174 | "shell.execute_reply": "2020-08-21T01:15:27.085Z"
175 | }
176 | }
177 | },
178 | {
179 | "cell_type": "code",
180 | "source": [
181 | "print(dataset.shape)"
182 | ],
183 | "outputs": [
184 | {
185 | "output_type": "stream",
186 | "name": "stdout",
187 | "text": [
188 | "(1170, 10)\n"
189 | ]
190 | }
191 | ],
192 | "execution_count": 5,
193 | "metadata": {
194 | "collapsed": true,
195 | "jupyter": {
196 | "source_hidden": false,
197 | "outputs_hidden": false
198 | },
199 | "nteract": {
200 | "transient": {
201 | "deleting": false
202 | }
203 | },
204 | "execution": {
205 | "iopub.status.busy": "2020-08-21T01:15:26.950Z",
206 | "iopub.execute_input": "2020-08-21T01:15:26.957Z",
207 | "iopub.status.idle": "2020-08-21T01:15:26.977Z",
208 | "shell.execute_reply": "2020-08-21T01:15:27.100Z"
209 | }
210 | }
211 | },
212 | {
213 | "cell_type": "code",
214 | "source": [
215 | "counts = dataset.nunique()\n",
216 | "to_del = [i for i,v in enumerate(counts) if v == 1]\n",
217 | "print(to_del)"
218 | ],
219 | "outputs": [
220 | {
221 | "output_type": "stream",
222 | "name": "stdout",
223 | "text": [
224 | "[]\n"
225 | ]
226 | }
227 | ],
228 | "execution_count": 6,
229 | "metadata": {
230 | "collapsed": true,
231 | "jupyter": {
232 | "source_hidden": false,
233 | "outputs_hidden": false
234 | },
235 | "nteract": {
236 | "transient": {
237 | "deleting": false
238 | }
239 | },
240 | "execution": {
241 | "iopub.status.busy": "2020-08-21T01:15:26.988Z",
242 | "iopub.execute_input": "2020-08-21T01:15:26.993Z",
243 | "iopub.status.idle": "2020-08-21T01:15:27.007Z",
244 | "shell.execute_reply": "2020-08-21T01:15:27.105Z"
245 | }
246 | }
247 | },
248 | {
249 | "cell_type": "code",
250 | "source": [
251 | "# drop useless columns\n",
252 | "dataset.drop(to_del, axis=1, inplace=True)\n",
253 | "print(dataset.shape)"
254 | ],
255 | "outputs": [
256 | {
257 | "output_type": "stream",
258 | "name": "stdout",
259 | "text": [
260 | "(1170, 10)\n"
261 | ]
262 | }
263 | ],
264 | "execution_count": 7,
265 | "metadata": {
266 | "collapsed": true,
267 | "jupyter": {
268 | "source_hidden": false,
269 | "outputs_hidden": false
270 | },
271 | "nteract": {
272 | "transient": {
273 | "deleting": false
274 | }
275 | },
276 | "execution": {
277 | "iopub.status.busy": "2020-08-21T01:15:27.016Z",
278 | "iopub.execute_input": "2020-08-21T01:15:27.022Z",
279 | "iopub.status.idle": "2020-08-21T01:15:27.036Z",
280 | "shell.execute_reply": "2020-08-21T01:15:27.109Z"
281 | }
282 | }
283 | }
284 | ],
285 | "metadata": {
286 | "kernel_info": {
287 | "name": "python3"
288 | },
289 | "language_info": {
290 | "mimetype": "text/x-python",
291 | "codemirror_mode": {
292 | "name": "ipython",
293 | "version": 3
294 | },
295 | "name": "python",
296 | "version": "3.5.5",
297 | "pygments_lexer": "ipython3",
298 | "nbconvert_exporter": "python",
299 | "file_extension": ".py"
300 | },
301 | "kernelspec": {
302 | "argv": [
303 | "C:\\Users\\Tin Hang\\Anaconda3\\envs\\py35\\python.exe",
304 | "-m",
305 | "ipykernel_launcher",
306 | "-f",
307 | "{connection_file}"
308 | ],
309 | "display_name": "Python 3",
310 | "language": "python",
311 | "name": "python3"
312 | },
313 | "nteract": {
314 | "version": "0.24.1"
315 | }
316 | },
317 | "nbformat": 4,
318 | "nbformat_minor": 0
319 | }
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 LastAncientOne
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | [![Contributors][contributors-shield]][contributors-url]
3 | [![Forks][forks-shield]][forks-url]
4 | [![Stargazers][stars-shield]][stars-url]
5 | [![Issues][issues-shield]][issues-url]
6 | [![MIT License][license-shield]][license-url]
7 | [![LinkedIn][linkedin-shield]][linkedin-url]
8 |
9 |
10 |
11 |
12 |
13 | [contributors-shield]: https://img.shields.io/github/contributors/LastAncientOne/Deep-Learning-Machine-Learning-Stock.svg?style=for-the-badge
14 | [contributors-url]: https://github.com/LastAncientOne/Deep-Learning-Machine-Learning-Stock/graphs/contributors
15 | [forks-shield]: https://img.shields.io/github/forks/LastAncientOne/Deep-Learning-Machine-Learning-Stock.svg?style=for-the-badge
16 | [forks-url]: https://github.com/LastAncientOne/Deep-Learning-Machine-Learning-Stock/network/members
17 | [stars-shield]: https://img.shields.io/github/stars/LastAncientOne/Deep-Learning-Machine-Learning-Stock.svg?style=for-the-badge
18 | [stars-url]: https://github.com/LastAncientOne/Deep-Learning-Machine-Learning-Stock/stargazers
19 | [issues-shield]: https://img.shields.io/github/issues/LastAncientOne/Deep-Learning-Machine-Learning-Stock.svg?style=for-the-badge
20 | [issues-url]: https://github.com/LastAncientOne/Deep-Learning-Machine-Learning-Stock/issues
21 | [license-shield]: https://img.shields.io/github/license/LastAncientOne/Deep-Learning-Machine-Learning-Stock.svg?style=for-the-badge
22 | [license-url]: LICENSE
23 | [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
24 | [linkedin-url]: https://linkedin.com/in/tin-hang
25 |
26 |
27 |
28 |
29 | Deep Learning and Machine Learning for Stock Predictions
30 |
31 | Description: This is a comprehensive study and analysis of stocks using deep learning (DL) and machine learning (ML) techniques. Both machine learning and deep learning are types of artificial intelligence (AI). The objective is to predict stock behavior by employing various machine learning and deep learning algorithms. The focus is on experimenting with stock data to understand how and why certain methods are effective, as well as identifying reasons for their potential limitations. Different stock strategies are explored within the context of machine learning and deep learning. Technical Analysis and Fundamental Analysis are utilized to predict future stock prices using these AI techniques, encompassing both long-term and short-term predictions.
32 |
33 | Machine learning is a branch of artificial intelligence that involves the development of algorithms capable of automatically adapting and generating outputs by processing structured data. On the other hand, deep learning is a subset of machine learning that employs similar algorithms but with additional layers of complexity, enabling different interpretations of the data. The network of algorithms used in deep learning is known as artificial neural networks, which mimic the interconnectedness of neural pathways in the human brain.
34 |
35 | Deep learning and machine learning are powerful approaches that have revolutionized the AI landscape. Understanding the fundamentals of these techniques and the commonly used algorithms is essential for aspiring data scientists and AI enthusiasts. Regression, as a fundamental concept in predictive modeling, plays a crucial role in analyzing and predicting continuous variables. By harnessing the capabilities of these algorithms and techniques, we can unlock incredible potential in various domains, leading to advancements and improvements in numerous industries.
36 |
37 | ### Machine Learning Step-by-Step
38 | 1. Collecting/Gathering Data.
39 | 2. Preparing the Data - load data and prepare it for the machine learning training.
40 | 3. Choosing a Model.
41 | 4. Training the Model.
42 | 5. Evaluating the Model.
43 | 6. Parameter Tuning.
44 | 7. Make a Predictions.
45 |
46 | ### Deep Learning Model Step-by-Step
47 | 1. Define the Model.
48 | 2. Complie the Model.
49 | 3. Fit the Model with training dataset.
50 | 4. Make a Predictions.
51 |
52 | Programming Languages and Tools:
53 |
54 |
55 | ### Three main types of data: Categorical, Discrete, and Continuous variables
56 | 1. Categorical variable(Qualitative): Label data or distinct groups.
57 | Example: location, gender, material type, payment, highest level of education
58 | 2. Discrete variable (Class Data): Numerica variables but the data is countable number of values between any two values.
59 | Example: customer complaints or number of flaws or defects, Children per Household, age (number of years)
60 | 3. Continuous variable (Quantitative): Numeric variables that have an infinite number of values between any two values.
61 | Example: length of a part or the date and time a payment is received, running distance, age (infinitly accurate and use an infinite number of decimal places)
62 |
63 | ### Data Use
64 | 1. For 'Quantitative data' is used with all three centre measures (mean, median and mode) and all spread measures.
65 | 2. For 'Class data' is used with median and mode.
66 | 3. For 'Qualitative data' is for only with mode.
67 |
68 | ### Two types of problems:
69 | 1. Classification (predict label)
70 | 2. Regression (predict values)
71 |
72 | ### Bias-Variance Tradeoff
73 | #### Bias
74 | - Bias is the difference between our actual and predicted values.
75 | - Bias is the simple assumptions that our model makes about our data to be able to predict new data.
76 | - Assumptions made by a model to make a function easier to learn.
77 | #### Variance
78 | - Variance is opposite of bias.
79 | - Variance is variability of model prediction for a given data point or a value that tells us the spread of our data.
80 | - If you train your data on training data and obtain a very low error, upon changing the data and then training the same.
81 |
82 | ### Overfitting, Underfitting, and the bias-variance tradeoff
83 | Overfitted is when the model memorizes the noise and fits too closely to the training set. Good fit is a model that learns the training dataset and genernalizes well with the old out dataset. Underfitting is when it cannot establish the dominant trend within the data; as a result, in training errors and poor performance of the model.
84 |
85 | #### Overfitting:
86 | Overfitting model is a good model with the training data that fit or at lease with near each observation; however, the model mist the point and random noise is capture inside the model. The model have low training error and high CV error, low in-sample error and high out-of-sample error, and high variance.
87 | 1. High Train Accuracy
88 | 2. Low Test Accuracy
89 | #### Avoiding Overfitting:
90 | 1. Early stopping - stop the training before the model starts learning the noise within the model.
91 | 2. Training with more data - adding more data will increase the accuracy of the modelor can help algorithms detect the signal better.
92 | 3. Data augmentation - add clean and relevant data into training data.
93 | 4. Feature selection - Use important features within the data. Remove features.
94 | 5. Regularization - reduce features by using regularization methods such as L1 regularization, Lasso regularization, and dropout.
95 | 6. Ensemble methods - combine predictions from multiple separate models such as bagging and boosting.
96 | 7. Increase training data.
97 | #### Good fit:
98 | 1. High Train Accuracy
99 | 2. High Test Accuracy
100 | #### Underfitting:
101 | Underfitting model is not perfect, so it does not capture the underlying logic of the data. Therefore, the model does not have strong predictive power with low accuracy. The model have large training set error, large in-sample error, and high bias.
102 | 1. Low Train Accuracy
103 | 2. Low Test Accuracy
104 | #### Avoiding Underfitting:
105 | 1. Decrease regularization - reduce the variance with a model by applying a penalty to the input parameters with the larger coefficients such as L1 regularization, Lasso regularization, dropout, etc.
106 | 2. Increase the duration of training - extending the duration of training because stopping the training early will cause underfit model.
107 | 3. Feature selection - not enough predictive features present, then adding more features or features with greater importance would improve the model.
108 | 4. Increase the number of features - performing feature engineering
109 | 5. Remove noise from the data
110 |
111 |
112 | ## Python Reviews
113 | Step 1 through step 8 is a review on python.
114 | After step 8, everything you need to know is relates to data analysis, data engineering, data science, machine learning, and deep learning.
115 | Here the link to python tutorial:
116 | [Python Tutorial for Stock Analysis](https://github.com/LastAncientOne/SimpleStockAnalysisPython)
117 |
118 |
119 | ## List of Machine Learning Algorithms for Stock Trading
120 | ### Most Common Regression Algorithms
121 | 1. Linear Regression Model
122 | 2. Logistic Regression
123 | 3. Lasso Regression
124 | 4. Support Vector Machines
125 | 5. Polynomial Regression
126 | 6. Stepwise Regression
127 | 7. Ridge Regression
128 | 8. Multivariate Regression Algorithm
129 | 9. Multiple Regression Algorithm
130 | 10. K Means Clustering Algorithm
131 | 11. Naïve Bayes Classifier Algorithm
132 | 12. Random Forests
133 | 13. Decision Trees
134 | 14. Nearest Neighbours
135 | 15. Lasso Regression
136 | 16. ElasticNet Regression
137 | 17. Reinforcement Learning
138 | 18. Artificial Intelligence
139 | 19. MultiModal Network
140 | 20. Biologic Intelligence
141 |
142 | ### Different Types of Machine Learning Algorithms and Models
143 | Algorithms are processes and sets of instructions used to solve a class of problems. Additionally, algorithms perform computations such as calculations, data processing, automated reasoning, and other tasks. A machine learning algorithm is a method that enables systems to learn and improve automatically from experience, without the need for explicit formulation.
144 |
145 | # Prerequistes
146 | Python 3.5+
147 | Jupyter Notebook Python 3
148 | Windows 7 or Windows 10
149 |
150 | ### Download Software
151 | https://www.python.org/
152 |
153 | Programming Language:
154 |
155 |
156 |
Tools:
157 |
158 |
159 |
160 |
161 | ## Authors
162 | ### Tin Hang
163 |
164 | ## Disclaimer
165 | 🔻 Do not use this code for investing or trading in the stock market. However, if you are interest in the stock market, you should read :books: books that relate to stock market, investment, or finance. On the other hand, if you into quant or machine learning, read books about 📘 machine trading, algorithmic trading, and quantitative trading. You should read 📗 about Machine Learning and Deep Learning to understand the concept, theory, and the mathematics. On the other hand, you should read academic paper and do research online about machine learning and deep learning on :computer:
166 |
167 | ### Certain portions of the code may encounter issues stemming from updates or obsolescence within specific library packages. Consequently, adjustments will be necessary, contingent upon the Python package library employed. It may be imperative to either upgrade or downgrade certain libraries accordingly.
168 |
169 | ## 🔴 Warning: This is not financial advice; it should not be relied upon for investment or trading decisions, as it is for educational purposes only.
170 |
--------------------------------------------------------------------------------
/Stock_Algorithms/30_Regression_Models.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "source": [
6 | "# 30 Regression Models"
7 | ],
8 | "metadata": {
9 | "nteract": {
10 | "transient": {
11 | "deleting": false
12 | }
13 | }
14 | }
15 | },
16 | {
17 | "cell_type": "code",
18 | "source": [
19 | "from lazypredict.Supervised import LazyRegressor\n",
20 | "from pandas.plotting import scatter_matrix# Scikit-learn packages\n",
21 | "from sklearn.linear_model import LinearRegression\n",
22 | "from sklearn.tree import DecisionTreeRegressor\n",
23 | "from sklearn.ensemble import ExtraTreesRegressor\n",
24 | "from sklearn import metrics\n",
25 | "from sklearn.metrics import mean_squared_error# Hide warnings\n",
26 | "from sklearn.model_selection import train_test_split \n",
27 | "\n",
28 | "import warnings\n",
29 | "warnings.filterwarnings(\"ignore\")\n",
30 | "\n",
31 | "import yfinance as yf\n",
32 | "yf.pdr_override()"
33 | ],
34 | "outputs": [
35 | {
36 | "output_type": "stream",
37 | "name": "stderr",
38 | "text": [
39 | "C:\\Users\\Tin Hang\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\deprecation.py:143: FutureWarning: The sklearn.utils.testing module is deprecated in version 0.22 and will be removed in version 0.24. The corresponding classes / functions should instead be imported from sklearn.utils. Anything that cannot be imported from sklearn.utils is now part of the private API.\n",
40 | " warnings.warn(message, FutureWarning)\n"
41 | ]
42 | }
43 | ],
44 | "execution_count": 1,
45 | "metadata": {
46 | "collapsed": true,
47 | "jupyter": {
48 | "source_hidden": false,
49 | "outputs_hidden": false
50 | },
51 | "nteract": {
52 | "transient": {
53 | "deleting": false
54 | }
55 | },
56 | "execution": {
57 | "shell.execute_reply": "2021-04-28T00:38:36.736Z",
58 | "iopub.status.busy": "2021-04-28T00:38:34.815Z",
59 | "iopub.execute_input": "2021-04-28T00:38:34.824Z",
60 | "iopub.status.idle": "2021-04-28T00:38:36.720Z"
61 | }
62 | }
63 | },
64 | {
65 | "cell_type": "code",
66 | "source": [
67 | "# input\n",
68 | "symbol = 'AMD'\n",
69 | "start = '2014-01-01'\n",
70 | "end = '2018-08-27'\n",
71 | "\n",
72 | "# Read data \n",
73 | "dataset = yf.download(symbol,start,end)\n",
74 | "\n",
75 | "# Only keep close columns \n",
76 | "dataset.head()"
77 | ],
78 | "outputs": [
79 | {
80 | "output_type": "stream",
81 | "name": "stdout",
82 | "text": [
83 | "[*********************100%***********************] 1 of 1 completed\n"
84 | ]
85 | },
86 | {
87 | "output_type": "execute_result",
88 | "execution_count": 2,
89 | "data": {
90 | "text/plain": " Open High Low Close Adj Close Volume\nDate \n2014-01-02 3.85 3.98 3.84 3.95 3.95 20548400\n2014-01-03 3.98 4.00 3.88 4.00 4.00 22887200\n2014-01-06 4.01 4.18 3.99 4.13 4.13 42398300\n2014-01-07 4.19 4.25 4.11 4.18 4.18 42932100\n2014-01-08 4.23 4.26 4.14 4.18 4.18 30678700",
91 | "text/html": "\n\n
\n \n \n | \n Open | \n High | \n Low | \n Close | \n Adj Close | \n Volume | \n
\n \n Date | \n | \n | \n | \n | \n | \n | \n
\n \n \n \n 2014-01-02 | \n 3.85 | \n 3.98 | \n 3.84 | \n 3.95 | \n 3.95 | \n 20548400 | \n
\n \n 2014-01-03 | \n 3.98 | \n 4.00 | \n 3.88 | \n 4.00 | \n 4.00 | \n 22887200 | \n
\n \n 2014-01-06 | \n 4.01 | \n 4.18 | \n 3.99 | \n 4.13 | \n 4.13 | \n 42398300 | \n
\n \n 2014-01-07 | \n 4.19 | \n 4.25 | \n 4.11 | \n 4.18 | \n 4.18 | \n 42932100 | \n
\n \n 2014-01-08 | \n 4.23 | \n 4.26 | \n 4.14 | \n 4.18 | \n 4.18 | \n 30678700 | \n
\n \n
\n
"
92 | },
93 | "metadata": {}
94 | }
95 | ],
96 | "execution_count": 2,
97 | "metadata": {
98 | "collapsed": true,
99 | "jupyter": {
100 | "source_hidden": false,
101 | "outputs_hidden": false
102 | },
103 | "nteract": {
104 | "transient": {
105 | "deleting": false
106 | }
107 | },
108 | "execution": {
109 | "iopub.status.busy": "2021-04-28T00:38:36.725Z",
110 | "iopub.execute_input": "2021-04-28T00:38:36.729Z",
111 | "iopub.status.idle": "2021-04-28T00:38:37.064Z",
112 | "shell.execute_reply": "2021-04-28T00:38:37.060Z"
113 | }
114 | }
115 | },
116 | {
117 | "cell_type": "code",
118 | "source": [
119 | "# Creating train test split\n",
120 | "X = dataset.drop(columns=['Adj Close'])\n",
121 | "y = dataset['Adj Close']\n",
122 | "\n",
123 | "offset = int(X.shape[0] * 0.9)\n",
124 | "\n",
125 | "X_train, y_train = X[:offset], y[:offset]\n",
126 | "X_test, y_test = X[offset:], y[offset:]"
127 | ],
128 | "outputs": [],
129 | "execution_count": 3,
130 | "metadata": {
131 | "collapsed": true,
132 | "jupyter": {
133 | "source_hidden": false,
134 | "outputs_hidden": false
135 | },
136 | "nteract": {
137 | "transient": {
138 | "deleting": false
139 | }
140 | },
141 | "execution": {
142 | "iopub.status.busy": "2021-04-28T00:38:37.069Z",
143 | "iopub.execute_input": "2021-04-28T00:38:37.071Z",
144 | "iopub.status.idle": "2021-04-28T00:38:37.076Z",
145 | "shell.execute_reply": "2021-04-28T00:38:37.092Z"
146 | }
147 | }
148 | },
149 | {
150 | "cell_type": "code",
151 | "source": [
152 | "reg = LazyRegressor(verbose=0, ignore_warnings=False, custom_metric=None)\n",
153 | "models, predictions = reg.fit(X_train, X_test, y_train, y_test)\n",
154 | "print(models)"
155 | ],
156 | "outputs": [
157 | {
158 | "output_type": "stream",
159 | "name": "stderr",
160 | "text": [
161 | "100%|██████████| 42/42 [00:02<00:00, 17.49it/s]\n"
162 | ]
163 | },
164 | {
165 | "output_type": "stream",
166 | "name": "stdout",
167 | "text": [
168 | " Adjusted R-Squared R-Squared RMSE Time Taken\n",
169 | "Model \n",
170 | "RANSACRegressor 1.00 1.00 0.00 0.01\n",
171 | "Lars 1.00 1.00 0.00 0.01\n",
172 | "HuberRegressor 1.00 1.00 0.00 0.05\n",
173 | "LassoLarsCV 1.00 1.00 0.00 0.01\n",
174 | "LassoLarsIC 1.00 1.00 0.00 0.01\n",
175 | "LinearRegression 1.00 1.00 0.00 0.01\n",
176 | "TransformedTargetRegressor 1.00 1.00 0.00 0.01\n",
177 | "LarsCV 1.00 1.00 0.00 0.01\n",
178 | "OrthogonalMatchingPursuit 1.00 1.00 0.00 0.01\n",
179 | "OrthogonalMatchingPursuitCV 1.00 1.00 0.00 0.01\n",
180 | "BayesianRidge 1.00 1.00 0.00 0.01\n",
181 | "LinearSVR 1.00 1.00 0.00 0.02\n",
182 | "RidgeCV 1.00 1.00 0.03 0.01\n",
183 | "Ridge 1.00 1.00 0.10 0.01\n",
184 | "PassiveAggressiveRegressor 1.00 1.00 0.11 0.01\n",
185 | "LassoCV 1.00 1.00 0.11 0.05\n",
186 | "ElasticNetCV 1.00 1.00 0.17 0.05\n",
187 | "SGDRegressor 1.00 1.00 0.21 0.01\n",
188 | "MLPRegressor 1.00 1.00 0.21 0.79\n",
189 | "GeneralizedLinearRegressor 0.70 0.71 1.87 0.01\n",
190 | "TweedieRegressor 0.70 0.71 1.87 0.01\n",
191 | "ElasticNet 0.67 0.68 1.97 0.01\n",
192 | "Lasso 0.63 0.64 2.09 0.01\n",
193 | "GradientBoostingRegressor 0.62 0.64 2.11 0.10\n",
194 | "XGBRegressor 0.61 0.63 2.13 0.06\n",
195 | "ExtraTreesRegressor 0.59 0.61 2.19 0.15\n",
196 | "DecisionTreeRegressor 0.57 0.59 2.23 0.01\n",
197 | "BaggingRegressor 0.57 0.59 2.23 0.03\n",
198 | "RandomForestRegressor 0.57 0.59 2.24 0.21\n",
199 | "ExtraTreeRegressor 0.46 0.49 2.50 0.01\n",
200 | "KNeighborsRegressor 0.46 0.48 2.52 0.01\n",
201 | "LGBMRegressor 0.45 0.47 2.54 0.05\n",
202 | "HistGradientBoostingRegressor 0.45 0.47 2.54 0.42\n",
203 | "AdaBoostRegressor 0.26 0.29 2.94 0.03\n",
204 | "NuSVR -0.24 -0.19 3.81 0.06\n",
205 | "SVR -0.28 -0.22 3.86 0.02\n",
206 | "GammaRegressor -0.35 -0.29 3.97 0.01\n",
207 | "GaussianProcessRegressor -1.86 -1.74 5.78 0.07\n",
208 | "KernelRidge -2.29 -2.15 6.20 0.04\n",
209 | "PoissonRegressor -3.20 -3.02 7.01 0.01\n",
210 | "DummyRegressor -5.70 -5.42 8.85 0.01\n",
211 | "LassoLars -5.70 -5.42 8.85 0.01\n"
212 | ]
213 | }
214 | ],
215 | "execution_count": 4,
216 | "metadata": {
217 | "collapsed": true,
218 | "jupyter": {
219 | "source_hidden": false,
220 | "outputs_hidden": false
221 | },
222 | "nteract": {
223 | "transient": {
224 | "deleting": false
225 | }
226 | },
227 | "execution": {
228 | "iopub.status.busy": "2021-04-28T00:38:37.080Z",
229 | "iopub.execute_input": "2021-04-28T00:38:37.084Z",
230 | "iopub.status.idle": "2021-04-28T00:38:39.506Z",
231 | "shell.execute_reply": "2021-04-28T00:38:39.501Z"
232 | }
233 | }
234 | }
235 | ],
236 | "metadata": {
237 | "kernel_info": {
238 | "name": "python3"
239 | },
240 | "language_info": {
241 | "name": "python",
242 | "version": "3.6.12",
243 | "mimetype": "text/x-python",
244 | "codemirror_mode": {
245 | "name": "ipython",
246 | "version": 3
247 | },
248 | "pygments_lexer": "ipython3",
249 | "nbconvert_exporter": "python",
250 | "file_extension": ".py"
251 | },
252 | "kernelspec": {
253 | "argv": [
254 | "C:/Users/Tin Hang/Anaconda3\\python.exe",
255 | "-m",
256 | "ipykernel_launcher",
257 | "-f",
258 | "{connection_file}"
259 | ],
260 | "display_name": "Python 3",
261 | "language": "python",
262 | "name": "python3"
263 | },
264 | "nteract": {
265 | "version": "0.28.0"
266 | }
267 | },
268 | "nbformat": 4,
269 | "nbformat_minor": 0
270 | }
--------------------------------------------------------------------------------
/Stock_Algorithms/Algorithms.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LastAncientOne/Deep_Learning_Machine_Learning_Stock/e31e9fc689cd2cb6d74a8f2694750afd6e13bcec/Stock_Algorithms/Algorithms.PNG
--------------------------------------------------------------------------------
/Stock_Algorithms/Genetic_Algorithm.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "source": [
6 | "# Genetic Algorithm"
7 | ],
8 | "metadata": {
9 | "nteract": {
10 | "transient": {
11 | "deleting": false
12 | }
13 | }
14 | }
15 | },
16 | {
17 | "cell_type": "code",
18 | "source": [
19 | "import numpy as np\n",
20 | "import matplotlib.pyplot as plt\n",
21 | "import pandas as pd\n",
22 | "\n",
23 | "# yahoo finance is used to fetch data \n",
24 | "import yfinance as yf\n",
25 | "yf.pdr_override()"
26 | ],
27 | "outputs": [],
28 | "execution_count": 1,
29 | "metadata": {
30 | "collapsed": true,
31 | "jupyter": {
32 | "source_hidden": false,
33 | "outputs_hidden": false
34 | },
35 | "nteract": {
36 | "transient": {
37 | "deleting": false
38 | }
39 | },
40 | "execution": {
41 | "iopub.status.busy": "2023-10-31T22:01:03.828Z",
42 | "iopub.execute_input": "2023-10-31T22:01:03.832Z",
43 | "shell.execute_reply": "2023-10-31T22:01:04.394Z",
44 | "iopub.status.idle": "2023-10-31T22:01:04.399Z"
45 | }
46 | }
47 | },
48 | {
49 | "cell_type": "code",
50 | "source": [
51 | "# input\n",
52 | "symbol = 'AMD'\n",
53 | "start = '2014-01-01'\n",
54 | "end = '2019-01-01'\n",
55 | "\n",
56 | "# Read data \n",
57 | "dataset = yf.download(symbol,start,end)"
58 | ],
59 | "outputs": [
60 | {
61 | "output_type": "stream",
62 | "name": "stdout",
63 | "text": [
64 | "[*********************100%***********************] 1 of 1 completed\n"
65 | ]
66 | }
67 | ],
68 | "execution_count": 2,
69 | "metadata": {
70 | "collapsed": true,
71 | "jupyter": {
72 | "source_hidden": false,
73 | "outputs_hidden": false
74 | },
75 | "nteract": {
76 | "transient": {
77 | "deleting": false
78 | }
79 | },
80 | "execution": {
81 | "iopub.status.busy": "2023-10-31T22:01:04.404Z",
82 | "iopub.execute_input": "2023-10-31T22:01:04.406Z",
83 | "shell.execute_reply": "2023-10-31T22:01:04.798Z",
84 | "iopub.status.idle": "2023-10-31T22:01:04.805Z"
85 | }
86 | }
87 | },
88 | {
89 | "cell_type": "code",
90 | "source": [
91 | "dataset['Open_Close'] = (dataset['Open'] - dataset['Adj Close'])/dataset['Open']\n",
92 | "dataset['High_Low'] = (dataset['High'] - dataset['Low'])/dataset['Low']\n",
93 | "dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)\n",
94 | "dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)\n",
95 | "dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)\n",
96 | "dataset['Returns'] = dataset['Adj Close'].pct_change()\n",
97 | "dataset = dataset.dropna()\n",
98 | "dataset.head()\n",
99 | "\n",
100 | "# View Columns\n",
101 | "dataset.head()"
102 | ],
103 | "outputs": [
104 | {
105 | "output_type": "execute_result",
106 | "execution_count": 3,
107 | "data": {
108 | "text/plain": " Open High Low Close Adj Close Volume Open_Close \\\nDate \n2014-01-03 3.98 4.00 3.88 4.00 4.00 22887200 -0.005025 \n2014-01-06 4.01 4.18 3.99 4.13 4.13 42398300 -0.029925 \n2014-01-07 4.19 4.25 4.11 4.18 4.18 42932100 0.002387 \n2014-01-08 4.23 4.26 4.14 4.18 4.18 30678700 0.011820 \n2014-01-09 4.20 4.23 4.05 4.09 4.09 30667600 0.026190 \n\n High_Low Increase_Decrease Buy_Sell_on_Open Buy_Sell Returns \nDate \n2014-01-03 0.030928 1 1 1 0.012658 \n2014-01-06 0.047619 1 1 1 0.032500 \n2014-01-07 0.034063 0 1 0 0.012106 \n2014-01-08 0.028986 0 0 0 0.000000 \n2014-01-09 0.044444 0 0 1 -0.021531 ",
109 | "text/html": "\n\n
\n \n \n | \n Open | \n High | \n Low | \n Close | \n Adj Close | \n Volume | \n Open_Close | \n High_Low | \n Increase_Decrease | \n Buy_Sell_on_Open | \n Buy_Sell | \n Returns | \n
\n \n Date | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n
\n \n \n \n 2014-01-03 | \n 3.98 | \n 4.00 | \n 3.88 | \n 4.00 | \n 4.00 | \n 22887200 | \n -0.005025 | \n 0.030928 | \n 1 | \n 1 | \n 1 | \n 0.012658 | \n
\n \n 2014-01-06 | \n 4.01 | \n 4.18 | \n 3.99 | \n 4.13 | \n 4.13 | \n 42398300 | \n -0.029925 | \n 0.047619 | \n 1 | \n 1 | \n 1 | \n 0.032500 | \n
\n \n 2014-01-07 | \n 4.19 | \n 4.25 | \n 4.11 | \n 4.18 | \n 4.18 | \n 42932100 | \n 0.002387 | \n 0.034063 | \n 0 | \n 1 | \n 0 | \n 0.012106 | \n
\n \n 2014-01-08 | \n 4.23 | \n 4.26 | \n 4.14 | \n 4.18 | \n 4.18 | \n 30678700 | \n 0.011820 | \n 0.028986 | \n 0 | \n 0 | \n 0 | \n 0.000000 | \n
\n \n 2014-01-09 | \n 4.20 | \n 4.23 | \n 4.05 | \n 4.09 | \n 4.09 | \n 30667600 | \n 0.026190 | \n 0.044444 | \n 0 | \n 0 | \n 1 | \n -0.021531 | \n
\n \n
\n
"
110 | },
111 | "metadata": {}
112 | }
113 | ],
114 | "execution_count": 3,
115 | "metadata": {
116 | "collapsed": true,
117 | "jupyter": {
118 | "source_hidden": false,
119 | "outputs_hidden": false
120 | },
121 | "nteract": {
122 | "transient": {
123 | "deleting": false
124 | }
125 | },
126 | "execution": {
127 | "iopub.status.busy": "2023-10-31T22:01:04.810Z",
128 | "iopub.execute_input": "2023-10-31T22:01:04.815Z",
129 | "iopub.status.idle": "2023-10-31T22:01:04.835Z",
130 | "shell.execute_reply": "2023-10-31T22:01:04.859Z"
131 | }
132 | }
133 | },
134 | {
135 | "cell_type": "code",
136 | "source": [
137 | "# Define a placeholder for the fitness function (you should implement this)\n",
138 | "def fitness_function(individual):\n",
139 | " # Calculate the fitness of the individual\n",
140 | " # You should define your fitness function based on your problem\n",
141 | " return 0 # Replace this with your actual fitness calculation\n",
142 | "\n",
143 | "def genetic_algorithm(population_size, num_generations, mutation_rate):\n",
144 | " population = np.random.randint(2, size=(population_size, 4)) # Initialize the population\n",
145 | "\n",
146 | " for generation in range(num_generations):\n",
147 | " fitness_scores = [fitness_function(individual) for individual in population]\n",
148 | " best_individual = population[np.argmax(fitness_scores)]\n",
149 | " best_fitness = max(fitness_scores)\n",
150 | "\n",
151 | " if generation % 10 == 0:\n",
152 | " print(f\"Generation {generation}: Best Fitness = {best_fitness}\")\n",
153 | "\n",
154 | " # Create a new population using mutation and crossover\n",
155 | " new_population = []\n",
156 | "\n",
157 | " for _ in range(population_size):\n",
158 | " parent1 = population[np.random.choice(range(population_size))]\n",
159 | " parent2 = population[np.random.choice(range(population_size))]\n",
160 | " crossover_point = np.random.randint(4)\n",
161 | " child = np.zeros(4) # Initialize child as an array of zeros\n",
162 | " child[:crossover_point] = parent1[:crossover_point]\n",
163 | " child[crossover_point:] = parent2[crossover_point:]\n",
164 | " # Apply mutation\n",
165 | " mutation_mask = (np.random.rand(4) < mutation_rate).astype(int)\n",
166 | " child = (child + mutation_mask) % 2\n",
167 | " new_population.append(child)\n",
168 | "\n",
169 | " population = np.array(new_population)\n",
170 | "\n",
171 | " return best_individual, best_fitness"
172 | ],
173 | "outputs": [],
174 | "execution_count": 4,
175 | "metadata": {
176 | "collapsed": true,
177 | "jupyter": {
178 | "source_hidden": false,
179 | "outputs_hidden": false
180 | },
181 | "nteract": {
182 | "transient": {
183 | "deleting": false
184 | }
185 | },
186 | "execution": {
187 | "iopub.status.busy": "2023-10-31T22:01:04.840Z",
188 | "iopub.execute_input": "2023-10-31T22:01:04.842Z",
189 | "iopub.status.idle": "2023-10-31T22:01:04.848Z",
190 | "shell.execute_reply": "2023-10-31T22:01:04.863Z"
191 | }
192 | }
193 | },
194 | {
195 | "cell_type": "code",
196 | "source": [
197 | "# Set genetic algorithm parameters\n",
198 | "population_size = 100\n",
199 | "num_generations = 100\n",
200 | "mutation_rate = 0.01\n",
201 | "\n",
202 | "# Run the genetic algorithm\n",
203 | "best_individual, best_fitness = genetic_algorithm(population_size, num_generations, mutation_rate)\n",
204 | "\n",
205 | "print(\"Best Individual:\", best_individual)\n",
206 | "print(\"Best Fitness:\", best_fitness)"
207 | ],
208 | "outputs": [
209 | {
210 | "output_type": "stream",
211 | "name": "stdout",
212 | "text": [
213 | "Generation 0: Best Fitness = 0\n",
214 | "Generation 10: Best Fitness = 0\n",
215 | "Generation 20: Best Fitness = 0\n",
216 | "Generation 30: Best Fitness = 0\n",
217 | "Generation 40: Best Fitness = 0\n",
218 | "Generation 50: Best Fitness = 0\n",
219 | "Generation 60: Best Fitness = 0\n",
220 | "Generation 70: Best Fitness = 0\n",
221 | "Generation 80: Best Fitness = 0\n",
222 | "Generation 90: Best Fitness = 0\n",
223 | "Best Individual: [1. 1. 1. 1.]\n",
224 | "Best Fitness: 0\n"
225 | ]
226 | }
227 | ],
228 | "execution_count": 5,
229 | "metadata": {
230 | "collapsed": true,
231 | "jupyter": {
232 | "source_hidden": false,
233 | "outputs_hidden": false
234 | },
235 | "nteract": {
236 | "transient": {
237 | "deleting": false
238 | }
239 | },
240 | "execution": {
241 | "iopub.status.busy": "2023-10-31T22:01:04.852Z",
242 | "iopub.execute_input": "2023-10-31T22:01:04.855Z",
243 | "iopub.status.idle": "2023-10-31T22:01:05.294Z",
244 | "shell.execute_reply": "2023-10-31T22:01:05.301Z"
245 | }
246 | }
247 | }
248 | ],
249 | "metadata": {
250 | "kernel_info": {
251 | "name": "python3"
252 | },
253 | "language_info": {
254 | "name": "python",
255 | "version": "3.7.6",
256 | "mimetype": "text/x-python",
257 | "codemirror_mode": {
258 | "name": "ipython",
259 | "version": 3
260 | },
261 | "pygments_lexer": "ipython3",
262 | "nbconvert_exporter": "python",
263 | "file_extension": ".py"
264 | },
265 | "kernelspec": {
266 | "argv": [
267 | "C:/Users/Tin Hang/anaconda3\\python.exe",
268 | "-m",
269 | "ipykernel_launcher",
270 | "-f",
271 | "{connection_file}"
272 | ],
273 | "display_name": "Python 3",
274 | "language": "python",
275 | "name": "python3"
276 | },
277 | "nteract": {
278 | "version": "0.28.0"
279 | }
280 | },
281 | "nbformat": 4,
282 | "nbformat_minor": 0
283 | }
--------------------------------------------------------------------------------
/Stock_Algorithms/Gradient_Boosting_Regressor.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "source": [
6 | "# Gradient Boosting Regressor"
7 | ],
8 | "metadata": {}
9 | },
10 | {
11 | "cell_type": "code",
12 | "source": [
13 | "import numpy as np\n",
14 | "import matplotlib.pyplot as plt\n",
15 | "import pandas as pd\n",
16 | "\n",
17 | "import warnings\n",
18 | "warnings.filterwarnings(\"ignore\")\n",
19 | "\n",
20 | "# fix_yahoo_finance is used to fetch data \n",
21 | "import fix_yahoo_finance as yf\n",
22 | "yf.pdr_override()"
23 | ],
24 | "outputs": [],
25 | "execution_count": null,
26 | "metadata": {
27 | "collapsed": false,
28 | "outputHidden": false,
29 | "inputHidden": false
30 | }
31 | },
32 | {
33 | "cell_type": "code",
34 | "source": [
35 | "# input\n",
36 | "symbol = 'AMD'\n",
37 | "start = '2007-01-01'\n",
38 | "end = '2018-11-16'\n",
39 | "\n",
40 | "# Read data \n",
41 | "dataset = yf.download(symbol,start,end)\n",
42 | "\n",
43 | "# View Columns\n",
44 | "dataset.head()"
45 | ],
46 | "outputs": [],
47 | "execution_count": null,
48 | "metadata": {
49 | "collapsed": false,
50 | "outputHidden": false,
51 | "inputHidden": false
52 | }
53 | },
54 | {
55 | "cell_type": "code",
56 | "source": [
57 | "dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)\n",
58 | "dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)\n",
59 | "dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)\n",
60 | "dataset['Returns'] = dataset['Adj Close'].pct_change()\n",
61 | "dataset = dataset.dropna()\n",
62 | "dataset.head()"
63 | ],
64 | "outputs": [],
65 | "execution_count": null,
66 | "metadata": {
67 | "collapsed": false,
68 | "outputHidden": false,
69 | "inputHidden": false
70 | }
71 | },
72 | {
73 | "cell_type": "code",
74 | "source": [
75 | "X = dataset[['Open', 'High', 'Low', 'Volume']].values\n",
76 | "y = dataset['Adj Close'].values"
77 | ],
78 | "outputs": [],
79 | "execution_count": null,
80 | "metadata": {
81 | "collapsed": false,
82 | "outputHidden": false,
83 | "inputHidden": false
84 | }
85 | },
86 | {
87 | "cell_type": "code",
88 | "source": [
89 | "# from sklearn.cross_validation import train_test_split\n",
90 | "from sklearn.model_selection import train_test_split\n",
91 | "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 1/4, random_state = 0) "
92 | ],
93 | "outputs": [],
94 | "execution_count": null,
95 | "metadata": {
96 | "collapsed": false,
97 | "outputHidden": false,
98 | "inputHidden": false
99 | }
100 | },
101 | {
102 | "cell_type": "code",
103 | "source": [
104 | "from sklearn.ensemble import GradientBoostingRegressor\n",
105 | "\n",
106 | "gb = GradientBoostingRegressor(max_depth=4, \n",
107 | " n_estimators=200,\n",
108 | " random_state=2)"
109 | ],
110 | "outputs": [],
111 | "execution_count": null,
112 | "metadata": {
113 | "collapsed": false,
114 | "outputHidden": false,
115 | "inputHidden": false
116 | }
117 | },
118 | {
119 | "cell_type": "code",
120 | "source": [
121 | "# Fit gb to the training set\n",
122 | "gb.fit(X_train, y_train)\n",
123 | "\n",
124 | "# Predict test set labels\n",
125 | "y_pred = gb.predict(X_test)"
126 | ],
127 | "outputs": [],
128 | "execution_count": null,
129 | "metadata": {
130 | "collapsed": false,
131 | "outputHidden": false,
132 | "inputHidden": false
133 | }
134 | },
135 | {
136 | "cell_type": "code",
137 | "source": [
138 | "from sklearn.metrics import mean_squared_error as MSE\n",
139 | "\n",
140 | "# Compute MSE\n",
141 | "mse_test = MSE(y_test, y_pred)\n",
142 | "\n",
143 | "# Compute RMSE\n",
144 | "rmse_test = mse_test**(1/2)\n",
145 | "\n",
146 | "# Print RMSE\n",
147 | "print('Test set RMSE of gb: {:.3f}'.format(rmse_test))"
148 | ],
149 | "outputs": [],
150 | "execution_count": null,
151 | "metadata": {
152 | "collapsed": false,
153 | "outputHidden": false,
154 | "inputHidden": false
155 | }
156 | }
157 | ],
158 | "metadata": {
159 | "kernel_info": {
160 | "name": "python3"
161 | },
162 | "language_info": {
163 | "pygments_lexer": "ipython3",
164 | "version": "3.5.5",
165 | "codemirror_mode": {
166 | "version": 3,
167 | "name": "ipython"
168 | },
169 | "nbconvert_exporter": "python",
170 | "name": "python",
171 | "mimetype": "text/x-python",
172 | "file_extension": ".py"
173 | },
174 | "kernelspec": {
175 | "name": "python3",
176 | "language": "python",
177 | "display_name": "Python 3"
178 | },
179 | "nteract": {
180 | "version": "0.12.2"
181 | }
182 | },
183 | "nbformat": 4,
184 | "nbformat_minor": 4
185 | }
--------------------------------------------------------------------------------
/Stock_Algorithms/Hyperparameter_Tuning.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "source": [
6 | "# Hyperparameter Tuning"
7 | ],
8 | "metadata": {}
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "source": [
13 | "In machine learning, a hyperparameter is a parameter whose value is set before the learning process begins. By contrast, the values of other parameters are derived via training. Different model training algorithms require different hyperparameters, some simple algorithms require none. (Wikipedia)\n",
14 | "\n",
15 | "\n",
16 | "Hyperparameter tuning optimize is used for a single target variable and is called \"hypermeter metric.\" \n",
17 | "\n",
18 | "A model hyperparameter is a model that has value cannot be estimated from data.\n"
19 | ],
20 | "metadata": {}
21 | },
22 | {
23 | "cell_type": "code",
24 | "source": [
25 | "import numpy as np\n",
26 | "import pandas as pd\n",
27 | "import matplotlib.pyplot as plt\n",
28 | "\n",
29 | "import warnings\n",
30 | "warnings.filterwarnings(\"ignore\")\n",
31 | "\n",
32 | "# yahoo finance is used to fetch data \n",
33 | "import yfinance as yf\n",
34 | "yf.pdr_override()"
35 | ],
36 | "outputs": [],
37 | "execution_count": 1,
38 | "metadata": {
39 | "collapsed": false,
40 | "outputHidden": false,
41 | "inputHidden": false
42 | }
43 | },
44 | {
45 | "cell_type": "code",
46 | "source": [
47 | "# input\n",
48 | "symbol = 'AMD'\n",
49 | "start = '2014-01-01'\n",
50 | "end = '2018-08-27'\n",
51 | "\n",
52 | "# Read data \n",
53 | "dataset = yf.download(symbol,start,end)\n",
54 | "\n",
55 | "# View Columns\n",
56 | "dataset.head()"
57 | ],
58 | "outputs": [
59 | {
60 | "output_type": "stream",
61 | "name": "stdout",
62 | "text": [
63 | "[*********************100%***********************] 1 of 1 downloaded\n"
64 | ]
65 | },
66 | {
67 | "output_type": "execute_result",
68 | "execution_count": 2,
69 | "data": {
70 | "text/html": "\n\n
\n \n \n | \n Open | \n High | \n Low | \n Close | \n Adj Close | \n Volume | \n
\n \n Date | \n | \n | \n | \n | \n | \n | \n
\n \n \n \n 2014-01-02 | \n 3.85 | \n 3.98 | \n 3.84 | \n 3.95 | \n 3.95 | \n 20548400 | \n
\n \n 2014-01-03 | \n 3.98 | \n 4.00 | \n 3.88 | \n 4.00 | \n 4.00 | \n 22887200 | \n
\n \n 2014-01-06 | \n 4.01 | \n 4.18 | \n 3.99 | \n 4.13 | \n 4.13 | \n 42398300 | \n
\n \n 2014-01-07 | \n 4.19 | \n 4.25 | \n 4.11 | \n 4.18 | \n 4.18 | \n 42932100 | \n
\n \n 2014-01-08 | \n 4.23 | \n 4.26 | \n 4.14 | \n 4.18 | \n 4.18 | \n 30678700 | \n
\n \n
\n
",
71 | "text/plain": " Open High Low Close Adj Close Volume\nDate \n2014-01-02 3.85 3.98 3.84 3.95 3.95 20548400\n2014-01-03 3.98 4.00 3.88 4.00 4.00 22887200\n2014-01-06 4.01 4.18 3.99 4.13 4.13 42398300\n2014-01-07 4.19 4.25 4.11 4.18 4.18 42932100\n2014-01-08 4.23 4.26 4.14 4.18 4.18 30678700"
72 | },
73 | "metadata": {}
74 | }
75 | ],
76 | "execution_count": 2,
77 | "metadata": {
78 | "collapsed": false,
79 | "outputHidden": false,
80 | "inputHidden": false
81 | }
82 | },
83 | {
84 | "cell_type": "code",
85 | "source": [
86 | "dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)\n",
87 | "dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)\n",
88 | "dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)\n",
89 | "dataset['Returns'] = dataset['Adj Close'].pct_change()\n",
90 | "dataset = dataset.dropna()\n",
91 | "dataset.head()"
92 | ],
93 | "outputs": [
94 | {
95 | "output_type": "execute_result",
96 | "execution_count": 3,
97 | "data": {
98 | "text/html": "\n\n
\n \n \n | \n Open | \n High | \n Low | \n Close | \n Adj Close | \n Volume | \n Increase_Decrease | \n Buy_Sell_on_Open | \n Buy_Sell | \n Returns | \n
\n \n Date | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n
\n \n \n \n 2014-01-03 | \n 3.98 | \n 4.00 | \n 3.88 | \n 4.00 | \n 4.00 | \n 22887200 | \n 1 | \n 1 | \n 1 | \n 0.012658 | \n
\n \n 2014-01-06 | \n 4.01 | \n 4.18 | \n 3.99 | \n 4.13 | \n 4.13 | \n 42398300 | \n 1 | \n 1 | \n 1 | \n 0.032500 | \n
\n \n 2014-01-07 | \n 4.19 | \n 4.25 | \n 4.11 | \n 4.18 | \n 4.18 | \n 42932100 | \n 0 | \n 1 | \n 0 | \n 0.012107 | \n
\n \n 2014-01-08 | \n 4.23 | \n 4.26 | \n 4.14 | \n 4.18 | \n 4.18 | \n 30678700 | \n 0 | \n 0 | \n 0 | \n 0.000000 | \n
\n \n 2014-01-09 | \n 4.20 | \n 4.23 | \n 4.05 | \n 4.09 | \n 4.09 | \n 30667600 | \n 0 | \n 0 | \n 1 | \n -0.021531 | \n
\n \n
\n
",
99 | "text/plain": " Open High Low Close Adj Close Volume Increase_Decrease \\\nDate \n2014-01-03 3.98 4.00 3.88 4.00 4.00 22887200 1 \n2014-01-06 4.01 4.18 3.99 4.13 4.13 42398300 1 \n2014-01-07 4.19 4.25 4.11 4.18 4.18 42932100 0 \n2014-01-08 4.23 4.26 4.14 4.18 4.18 30678700 0 \n2014-01-09 4.20 4.23 4.05 4.09 4.09 30667600 0 \n\n Buy_Sell_on_Open Buy_Sell Returns \nDate \n2014-01-03 1 1 0.012658 \n2014-01-06 1 1 0.032500 \n2014-01-07 1 0 0.012107 \n2014-01-08 0 0 0.000000 \n2014-01-09 0 1 -0.021531 "
100 | },
101 | "metadata": {}
102 | }
103 | ],
104 | "execution_count": 3,
105 | "metadata": {
106 | "collapsed": false,
107 | "outputHidden": false,
108 | "inputHidden": false
109 | }
110 | },
111 | {
112 | "cell_type": "code",
113 | "source": [
114 | "X = np.array(dataset['Open']).reshape(-1, 1)\n",
115 | "y = np.array(dataset['Buy_Sell']).reshape(-1, 1)"
116 | ],
117 | "outputs": [],
118 | "execution_count": 4,
119 | "metadata": {
120 | "collapsed": false,
121 | "outputHidden": false,
122 | "inputHidden": false
123 | }
124 | },
125 | {
126 | "cell_type": "code",
127 | "source": [
128 | "# Create logistic regression\n",
129 | "from sklearn import linear_model\n",
130 | "\n",
131 | "logistic = linear_model.LogisticRegression()"
132 | ],
133 | "outputs": [],
134 | "execution_count": 5,
135 | "metadata": {
136 | "collapsed": false,
137 | "outputHidden": false,
138 | "inputHidden": false
139 | }
140 | },
141 | {
142 | "cell_type": "code",
143 | "source": [
144 | "# Create logistic regression\n",
145 | "logistic = linear_model.LogisticRegression()"
146 | ],
147 | "outputs": [],
148 | "execution_count": 6,
149 | "metadata": {
150 | "collapsed": false,
151 | "outputHidden": false,
152 | "inputHidden": false
153 | }
154 | },
155 | {
156 | "cell_type": "code",
157 | "source": [
158 | "# Create regularization penalty space\n",
159 | "penalty = ['l1', 'l2']\n",
160 | "\n",
161 | "# Create regularization hyperparameter space\n",
162 | "C = np.logspace(0, 4, 10)\n",
163 | "\n",
164 | "# Create hyperparameter options\n",
165 | "hyperparameters = dict(C=C, penalty=penalty)"
166 | ],
167 | "outputs": [],
168 | "execution_count": 7,
169 | "metadata": {
170 | "collapsed": false,
171 | "outputHidden": false,
172 | "inputHidden": false
173 | }
174 | },
175 | {
176 | "cell_type": "code",
177 | "source": [
178 | "# Create grid search using 5-fold cross validation\n",
179 | "from sklearn.model_selection import GridSearchCV\n",
180 | "\n",
181 | "clf = GridSearchCV(logistic, hyperparameters, cv=5, verbose=0)"
182 | ],
183 | "outputs": [],
184 | "execution_count": 8,
185 | "metadata": {
186 | "collapsed": false,
187 | "outputHidden": false,
188 | "inputHidden": false
189 | }
190 | },
191 | {
192 | "cell_type": "code",
193 | "source": [
194 | "# Fit grid search\n",
195 | "best_model = clf.fit(X, y)"
196 | ],
197 | "outputs": [],
198 | "execution_count": 9,
199 | "metadata": {
200 | "collapsed": false,
201 | "outputHidden": false,
202 | "inputHidden": false
203 | }
204 | },
205 | {
206 | "cell_type": "code",
207 | "source": [
208 | "# View best hyperparameters\n",
209 | "print('Best Penalty:', best_model.best_estimator_.get_params()['penalty'])\n",
210 | "print('Best C:', best_model.best_estimator_.get_params()['C'])"
211 | ],
212 | "outputs": [
213 | {
214 | "output_type": "stream",
215 | "name": "stdout",
216 | "text": [
217 | "Best Penalty: l1\n",
218 | "Best C: 1.0\n"
219 | ]
220 | }
221 | ],
222 | "execution_count": 10,
223 | "metadata": {
224 | "collapsed": false,
225 | "outputHidden": false,
226 | "inputHidden": false
227 | }
228 | },
229 | {
230 | "cell_type": "code",
231 | "source": [
232 | "# Predict target vector\n",
233 | "best_model.predict(X)"
234 | ],
235 | "outputs": [
236 | {
237 | "output_type": "execute_result",
238 | "execution_count": 11,
239 | "data": {
240 | "text/plain": "array([0, 0, 0, ..., 1, 1, 1])"
241 | },
242 | "metadata": {}
243 | }
244 | ],
245 | "execution_count": 11,
246 | "metadata": {
247 | "collapsed": false,
248 | "outputHidden": false,
249 | "inputHidden": false
250 | }
251 | }
252 | ],
253 | "metadata": {
254 | "kernel_info": {
255 | "name": "python3"
256 | },
257 | "language_info": {
258 | "codemirror_mode": {
259 | "version": 3,
260 | "name": "ipython"
261 | },
262 | "name": "python",
263 | "file_extension": ".py",
264 | "mimetype": "text/x-python",
265 | "nbconvert_exporter": "python",
266 | "version": "3.5.5",
267 | "pygments_lexer": "ipython3"
268 | },
269 | "kernelspec": {
270 | "name": "python3",
271 | "language": "python",
272 | "display_name": "Python 3"
273 | },
274 | "nteract": {
275 | "version": "0.28.0"
276 | }
277 | },
278 | "nbformat": 4,
279 | "nbformat_minor": 4
280 | }
--------------------------------------------------------------------------------
/Stock_Algorithms/K_Means_Clustering_Part2.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "source": [
6 | "# K Means Clustering Part 2"
7 | ],
8 | "metadata": {
9 | "nteract": {
10 | "transient": {
11 | "deleting": false
12 | }
13 | }
14 | }
15 | },
16 | {
17 | "cell_type": "code",
18 | "source": [
19 | "import pandas as pd\n",
20 | "import numpy as np\n",
21 | "import pylab as pl\n",
22 | "import datetime as dt\n",
23 | "from math import sqrt\n",
24 | "\n",
25 | "import warnings\n",
26 | "warnings.filterwarnings(\"ignore\")\n",
27 | "\n",
28 | "# yahoo finance used to fetch data \n",
29 | "import yfinance as yf\n",
30 | "yf.pdr_override()\n",
31 | "\n",
32 | "from sklearn.cluster import KMeans"
33 | ],
34 | "outputs": [],
35 | "execution_count": 1,
36 | "metadata": {
37 | "collapsed": true,
38 | "jupyter": {
39 | "source_hidden": false,
40 | "outputs_hidden": false
41 | },
42 | "nteract": {
43 | "transient": {
44 | "deleting": false
45 | }
46 | },
47 | "execution": {
48 | "iopub.status.busy": "2022-04-07T00:41:22.144Z",
49 | "iopub.execute_input": "2022-04-07T00:41:22.149Z",
50 | "iopub.status.idle": "2022-04-07T00:41:23.358Z",
51 | "shell.execute_reply": "2022-04-07T00:41:23.351Z"
52 | }
53 | }
54 | },
55 | {
56 | "cell_type": "code",
57 | "source": [
58 | "stocks = si.tickers_dow()\n",
59 | "stocks"
60 | ],
61 | "outputs": [
62 | {
63 | "output_type": "error",
64 | "ename": "NameError",
65 | "evalue": "name 'si' is not defined",
66 | "traceback": [
67 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
68 | "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
69 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mstocks\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msi\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtickers_dow\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mstocks\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
70 | "\u001b[1;31mNameError\u001b[0m: name 'si' is not defined"
71 | ]
72 | }
73 | ],
74 | "execution_count": 2,
75 | "metadata": {
76 | "collapsed": true,
77 | "jupyter": {
78 | "source_hidden": false,
79 | "outputs_hidden": false
80 | },
81 | "nteract": {
82 | "transient": {
83 | "deleting": false
84 | }
85 | },
86 | "execution": {
87 | "iopub.status.busy": "2022-04-07T00:41:23.366Z",
88 | "iopub.execute_input": "2022-04-07T00:41:23.371Z",
89 | "iopub.status.idle": "2022-04-07T00:37:28.222Z",
90 | "shell.execute_reply": "2022-04-07T00:37:28.230Z"
91 | }
92 | }
93 | },
94 | {
95 | "cell_type": "code",
96 | "source": [
97 | "start = dt.datetime(2020, 1, 1)\n",
98 | "now = dt.datetime.now()\n"
99 | ],
100 | "outputs": [],
101 | "execution_count": null,
102 | "metadata": {
103 | "collapsed": true,
104 | "jupyter": {
105 | "source_hidden": false,
106 | "outputs_hidden": false
107 | },
108 | "nteract": {
109 | "transient": {
110 | "deleting": false
111 | }
112 | },
113 | "execution": {
114 | "iopub.status.busy": "2022-04-07T00:37:30.620Z",
115 | "iopub.execute_input": "2022-04-07T00:37:30.623Z",
116 | "shell.execute_reply": "2022-04-07T00:37:30.635Z",
117 | "iopub.status.idle": "2022-04-07T00:37:30.628Z"
118 | }
119 | }
120 | },
121 | {
122 | "cell_type": "code",
123 | "source": [
124 | "df = yf.download(stocks, start, now)['Adj Close']\n",
125 | "df.head()"
126 | ],
127 | "outputs": [],
128 | "execution_count": null,
129 | "metadata": {
130 | "collapsed": true,
131 | "jupyter": {
132 | "source_hidden": false,
133 | "outputs_hidden": false
134 | },
135 | "nteract": {
136 | "transient": {
137 | "deleting": false
138 | }
139 | },
140 | "execution": {
141 | "iopub.status.busy": "2022-04-07T00:37:47.765Z",
142 | "iopub.execute_input": "2022-04-07T00:37:47.768Z",
143 | "iopub.status.idle": "2022-04-07T00:37:50.505Z",
144 | "shell.execute_reply": "2022-04-07T00:37:50.553Z"
145 | }
146 | }
147 | },
148 | {
149 | "cell_type": "code",
150 | "source": [
151 | "returns = df.pct_change().mean() * 252\n",
152 | "variance = df.pct_change().std() * sqrt(252)\n",
153 | "returns.columns = [\"Returns\"]\n",
154 | "variance.columns = [\"Variance\"]"
155 | ],
156 | "outputs": [],
157 | "execution_count": null,
158 | "metadata": {
159 | "collapsed": true,
160 | "jupyter": {
161 | "source_hidden": false,
162 | "outputs_hidden": false
163 | },
164 | "nteract": {
165 | "transient": {
166 | "deleting": false
167 | }
168 | },
169 | "execution": {
170 | "iopub.status.busy": "2022-04-07T00:38:41.490Z",
171 | "iopub.execute_input": "2022-04-07T00:38:41.493Z",
172 | "iopub.status.idle": "2022-04-07T00:38:41.500Z",
173 | "shell.execute_reply": "2022-04-07T00:38:41.516Z"
174 | }
175 | }
176 | },
177 | {
178 | "cell_type": "code",
179 | "source": [
180 | "ret_var = pd.concat([returns, variance], axis = 1).dropna()\n",
181 | "ret_var.columns = [\"Returns\", \"Variance\"]"
182 | ],
183 | "outputs": [],
184 | "execution_count": null,
185 | "metadata": {
186 | "collapsed": true,
187 | "jupyter": {
188 | "source_hidden": false,
189 | "outputs_hidden": false
190 | },
191 | "nteract": {
192 | "transient": {
193 | "deleting": false
194 | }
195 | },
196 | "execution": {
197 | "iopub.status.busy": "2022-04-07T00:38:55.905Z",
198 | "iopub.execute_input": "2022-04-07T00:38:55.908Z",
199 | "shell.execute_reply": "2022-04-07T00:38:55.919Z",
200 | "iopub.status.idle": "2022-04-07T00:38:55.926Z"
201 | }
202 | }
203 | },
204 | {
205 | "cell_type": "code",
206 | "source": [
207 | "X = ret_var.values\n",
208 | "sse = []\n",
209 | "\n",
210 | "for k in range(2,15):\n",
211 | " \n",
212 | " kmeans = KMeans(n_clusters = k)\n",
213 | " kmeans.fit(X)\n",
214 | " \n",
215 | " sse.append(kmeans.inertia_) #SSE for each n_clusters\n",
216 | "pl.plot(range(2,15), sse)\n",
217 | "pl.title(\"Elbow Curve\")\n",
218 | "pl.subplots()\n",
219 | "pl.show()"
220 | ],
221 | "outputs": [],
222 | "execution_count": null,
223 | "metadata": {
224 | "collapsed": true,
225 | "jupyter": {
226 | "source_hidden": false,
227 | "outputs_hidden": false
228 | },
229 | "nteract": {
230 | "transient": {
231 | "deleting": false
232 | }
233 | },
234 | "execution": {
235 | "iopub.status.busy": "2022-04-07T00:39:58.118Z",
236 | "iopub.execute_input": "2022-04-07T00:39:58.122Z",
237 | "shell.execute_reply": "2022-04-07T00:39:58.523Z",
238 | "iopub.status.idle": "2022-04-07T00:39:58.515Z"
239 | }
240 | }
241 | },
242 | {
243 | "cell_type": "code",
244 | "source": [
245 | "kmeans = KMeans(n_clusters = 5).fit(X)\n",
246 | "centroids = kmeans.cluster_centers_\n",
247 | "pl.scatter(X[:,0],X[:,1], c = kmeans.labels_, cmap =\"rainbow\")\n",
248 | "pl.show()"
249 | ],
250 | "outputs": [],
251 | "execution_count": null,
252 | "metadata": {
253 | "collapsed": true,
254 | "jupyter": {
255 | "source_hidden": false,
256 | "outputs_hidden": false
257 | },
258 | "nteract": {
259 | "transient": {
260 | "deleting": false
261 | }
262 | },
263 | "execution": {
264 | "iopub.status.busy": "2022-04-07T00:40:02.008Z",
265 | "iopub.execute_input": "2022-04-07T00:40:02.013Z",
266 | "shell.execute_reply": "2022-04-07T00:40:02.101Z",
267 | "iopub.status.idle": "2022-04-07T00:40:02.108Z"
268 | }
269 | }
270 | },
271 | {
272 | "cell_type": "code",
273 | "source": [
274 | "X = ret_var.values\n",
275 | "kmeans =KMeans(n_clusters = 5).fit(X)\n",
276 | "centroids = kmeans.cluster_centers_\n",
277 | "pl.scatter(X[:,0],X[:,1], c = kmeans.labels_, cmap =\"rainbow\")\n",
278 | "pl.show()"
279 | ],
280 | "outputs": [],
281 | "execution_count": null,
282 | "metadata": {
283 | "collapsed": true,
284 | "jupyter": {
285 | "source_hidden": false,
286 | "outputs_hidden": false
287 | },
288 | "nteract": {
289 | "transient": {
290 | "deleting": false
291 | }
292 | },
293 | "execution": {
294 | "iopub.status.busy": "2022-04-07T00:40:12.616Z",
295 | "iopub.execute_input": "2022-04-07T00:40:12.620Z",
296 | "shell.execute_reply": "2022-04-07T00:40:12.703Z",
297 | "iopub.status.idle": "2022-04-07T00:40:12.709Z"
298 | }
299 | }
300 | },
301 | {
302 | "cell_type": "code",
303 | "source": [
304 | "Companies = pd.DataFrame(ret_var.index)\n",
305 | "cluster_labels = pd.DataFrame(kmeans.labels_)\n",
306 | "df = pd.concat([Companies, cluster_labels],axis = 1)\n",
307 | "df.columns = ['Stock', 'Cluster Labels']\n",
308 | "df.set_index('Stock')"
309 | ],
310 | "outputs": [],
311 | "execution_count": null,
312 | "metadata": {
313 | "collapsed": true,
314 | "jupyter": {
315 | "source_hidden": false,
316 | "outputs_hidden": false
317 | },
318 | "nteract": {
319 | "transient": {
320 | "deleting": false
321 | }
322 | },
323 | "execution": {
324 | "iopub.status.busy": "2022-04-07T00:41:01.920Z",
325 | "iopub.execute_input": "2022-04-07T00:41:01.925Z",
326 | "iopub.status.idle": "2022-04-07T00:41:01.936Z",
327 | "shell.execute_reply": "2022-04-07T00:41:01.950Z"
328 | }
329 | }
330 | },
331 | {
332 | "cell_type": "code",
333 | "source": [
334 | "df"
335 | ],
336 | "outputs": [],
337 | "execution_count": null,
338 | "metadata": {
339 | "collapsed": true,
340 | "jupyter": {
341 | "source_hidden": false,
342 | "outputs_hidden": false
343 | },
344 | "nteract": {
345 | "transient": {
346 | "deleting": false
347 | }
348 | },
349 | "execution": {
350 | "iopub.status.busy": "2022-04-07T00:41:12.197Z",
351 | "iopub.execute_input": "2022-04-07T00:41:12.202Z",
352 | "iopub.status.idle": "2022-04-07T00:41:12.212Z",
353 | "shell.execute_reply": "2022-04-07T00:41:12.226Z"
354 | }
355 | }
356 | }
357 | ],
358 | "metadata": {
359 | "kernel_info": {
360 | "name": "python3"
361 | },
362 | "language_info": {
363 | "name": "python",
364 | "version": "3.6.13",
365 | "mimetype": "text/x-python",
366 | "codemirror_mode": {
367 | "name": "ipython",
368 | "version": 3
369 | },
370 | "pygments_lexer": "ipython3",
371 | "nbconvert_exporter": "python",
372 | "file_extension": ".py"
373 | },
374 | "kernelspec": {
375 | "argv": [
376 | "C:/Users/Tin Hang/Anaconda3\\python.exe",
377 | "-m",
378 | "ipykernel_launcher",
379 | "-f",
380 | "{connection_file}"
381 | ],
382 | "display_name": "Python 3",
383 | "language": "python",
384 | "name": "python3"
385 | },
386 | "nteract": {
387 | "version": "0.28.0"
388 | }
389 | },
390 | "nbformat": 4,
391 | "nbformat_minor": 0
392 | }
--------------------------------------------------------------------------------
/Stock_Algorithms/Multiple_Linear_Regression_with_Normalize_Data.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "source": [
6 | "# Multiple Linear Regression with Normalize Data"
7 | ],
8 | "metadata": {}
9 | },
10 | {
11 | "cell_type": "code",
12 | "source": [
13 | "# Importing the libraries\n",
14 | "import pandas as pd\n",
15 | "import numpy as np\n",
16 | "import matplotlib.pyplot as plt\n",
17 | "\n",
18 | "import warnings\n",
19 | "warnings.filterwarnings(\"ignore\")\n",
20 | "\n",
21 | "# fix_yahoo_finance is used to fetch data \n",
22 | "import fix_yahoo_finance as yf\n",
23 | "yf.pdr_override()"
24 | ],
25 | "outputs": [],
26 | "execution_count": 1,
27 | "metadata": {
28 | "collapsed": false,
29 | "outputHidden": false,
30 | "inputHidden": false
31 | }
32 | },
33 | {
34 | "cell_type": "code",
35 | "source": [
36 | "# input\n",
37 | "symbol = 'AMD'\n",
38 | "start = '2014-01-01'\n",
39 | "end = '2018-08-27'\n",
40 | "\n",
41 | "# Read data \n",
42 | "dataset = yf.download(symbol,start,end)\n",
43 | "\n",
44 | "# View columns \n",
45 | "dataset.head()"
46 | ],
47 | "outputs": [
48 | {
49 | "output_type": "stream",
50 | "name": "stdout",
51 | "text": [
52 | "[*********************100%***********************] 1 of 1 downloaded\n"
53 | ]
54 | },
55 | {
56 | "output_type": "execute_result",
57 | "execution_count": 2,
58 | "data": {
59 | "text/plain": [
60 | " Open High Low Close Adj Close Volume\n",
61 | "Date \n",
62 | "2014-01-02 3.85 3.98 3.84 3.95 3.95 20548400\n",
63 | "2014-01-03 3.98 4.00 3.88 4.00 4.00 22887200\n",
64 | "2014-01-06 4.01 4.18 3.99 4.13 4.13 42398300\n",
65 | "2014-01-07 4.19 4.25 4.11 4.18 4.18 42932100\n",
66 | "2014-01-08 4.23 4.26 4.14 4.18 4.18 30678700"
67 | ],
68 | "text/html": [
69 | "\n",
70 | "\n",
83 | "
\n",
84 | " \n",
85 | " \n",
86 | " | \n",
87 | " Open | \n",
88 | " High | \n",
89 | " Low | \n",
90 | " Close | \n",
91 | " Adj Close | \n",
92 | " Volume | \n",
93 | "
\n",
94 | " \n",
95 | " Date | \n",
96 | " | \n",
97 | " | \n",
98 | " | \n",
99 | " | \n",
100 | " | \n",
101 | " | \n",
102 | "
\n",
103 | " \n",
104 | " \n",
105 | " \n",
106 | " 2014-01-02 | \n",
107 | " 3.85 | \n",
108 | " 3.98 | \n",
109 | " 3.84 | \n",
110 | " 3.95 | \n",
111 | " 3.95 | \n",
112 | " 20548400 | \n",
113 | "
\n",
114 | " \n",
115 | " 2014-01-03 | \n",
116 | " 3.98 | \n",
117 | " 4.00 | \n",
118 | " 3.88 | \n",
119 | " 4.00 | \n",
120 | " 4.00 | \n",
121 | " 22887200 | \n",
122 | "
\n",
123 | " \n",
124 | " 2014-01-06 | \n",
125 | " 4.01 | \n",
126 | " 4.18 | \n",
127 | " 3.99 | \n",
128 | " 4.13 | \n",
129 | " 4.13 | \n",
130 | " 42398300 | \n",
131 | "
\n",
132 | " \n",
133 | " 2014-01-07 | \n",
134 | " 4.19 | \n",
135 | " 4.25 | \n",
136 | " 4.11 | \n",
137 | " 4.18 | \n",
138 | " 4.18 | \n",
139 | " 42932100 | \n",
140 | "
\n",
141 | " \n",
142 | " 2014-01-08 | \n",
143 | " 4.23 | \n",
144 | " 4.26 | \n",
145 | " 4.14 | \n",
146 | " 4.18 | \n",
147 | " 4.18 | \n",
148 | " 30678700 | \n",
149 | "
\n",
150 | " \n",
151 | "
\n",
152 | "
"
153 | ]
154 | },
155 | "metadata": {}
156 | }
157 | ],
158 | "execution_count": 2,
159 | "metadata": {
160 | "collapsed": false,
161 | "outputHidden": false,
162 | "inputHidden": false
163 | }
164 | },
165 | {
166 | "cell_type": "code",
167 | "source": [
168 | "X = dataset.iloc[ : , 0:4].values\n",
169 | "Y = np.asanyarray(dataset[['Adj Close']])"
170 | ],
171 | "outputs": [],
172 | "execution_count": 3,
173 | "metadata": {
174 | "collapsed": false,
175 | "outputHidden": false,
176 | "inputHidden": false
177 | }
178 | },
179 | {
180 | "cell_type": "code",
181 | "source": [
182 | "from sklearn import preprocessing\n",
183 | "\n",
184 | "# normalize the data attributes\n",
185 | "normalized_X = preprocessing.normalize(X)"
186 | ],
187 | "outputs": [],
188 | "execution_count": 4,
189 | "metadata": {
190 | "collapsed": false,
191 | "outputHidden": false,
192 | "inputHidden": false
193 | }
194 | },
195 | {
196 | "cell_type": "code",
197 | "source": [
198 | "X = normalized_X[: , 1:]"
199 | ],
200 | "outputs": [],
201 | "execution_count": 5,
202 | "metadata": {
203 | "collapsed": false,
204 | "outputHidden": false,
205 | "inputHidden": false
206 | }
207 | },
208 | {
209 | "cell_type": "code",
210 | "source": [
211 | "# Splitting the dataset into the Training set and Test set\n",
212 | "from sklearn.model_selection import train_test_split\n",
213 | "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 0)"
214 | ],
215 | "outputs": [],
216 | "execution_count": 6,
217 | "metadata": {
218 | "collapsed": false,
219 | "outputHidden": false,
220 | "inputHidden": false
221 | }
222 | },
223 | {
224 | "cell_type": "code",
225 | "source": [
226 | "from sklearn.linear_model import LinearRegression\n",
227 | "regressor = LinearRegression()\n",
228 | "regressor.fit(X_train, Y_train)"
229 | ],
230 | "outputs": [
231 | {
232 | "output_type": "execute_result",
233 | "execution_count": 7,
234 | "data": {
235 | "text/plain": [
236 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n",
237 | " normalize=False)"
238 | ]
239 | },
240 | "metadata": {}
241 | }
242 | ],
243 | "execution_count": 7,
244 | "metadata": {
245 | "collapsed": false,
246 | "outputHidden": false,
247 | "inputHidden": false
248 | }
249 | },
250 | {
251 | "cell_type": "code",
252 | "source": [
253 | "y_pred = regressor.predict(X_test)"
254 | ],
255 | "outputs": [],
256 | "execution_count": 8,
257 | "metadata": {
258 | "collapsed": false,
259 | "outputHidden": false,
260 | "inputHidden": false
261 | }
262 | },
263 | {
264 | "cell_type": "code",
265 | "source": [
266 | "from sklearn.metrics import explained_variance_score, mean_absolute_error, mean_squared_error, r2_score\n",
267 | "ex_var_score = explained_variance_score(Y_test, y_pred)\n",
268 | "m_absolute_error = mean_absolute_error(Y_test, y_pred)\n",
269 | "m_squared_error = mean_squared_error(Y_test, y_pred)\n",
270 | "r_2_score = r2_score(Y_test, y_pred)\n",
271 | "\n",
272 | "print(\"Explained Variance Score: \"+str(ex_var_score))\n",
273 | "print(\"Mean Absolute Error \"+str(m_absolute_error))\n",
274 | "print(\"Mean Squared Error \"+str(m_squared_error))\n",
275 | "print(\"R Squared Error \"+str(r_2_score))"
276 | ],
277 | "outputs": [
278 | {
279 | "output_type": "stream",
280 | "name": "stdout",
281 | "text": [
282 | "Explained Variance Score: 0.0145762414645\n",
283 | "Mean Absolute Error 4.3559157043\n",
284 | "Mean Squared Error 22.546676437\n",
285 | "R Squared Error 0.0145752513278\n"
286 | ]
287 | }
288 | ],
289 | "execution_count": 9,
290 | "metadata": {
291 | "collapsed": false,
292 | "outputHidden": false,
293 | "inputHidden": false
294 | }
295 | },
296 | {
297 | "cell_type": "code",
298 | "source": [
299 | "print ('Coefficients: ', regressor.coef_)\n",
300 | "print(\"Residual sum of squares: %.2f\"\n",
301 | " % np.mean((y_pred - Y_test) ** 2))\n",
302 | "\n",
303 | "# Explained variance score: 1 is perfect prediction\n",
304 | "print('Variance score: %.2f' % regressor.score(X_test, y_pred))"
305 | ],
306 | "outputs": [
307 | {
308 | "output_type": "stream",
309 | "name": "stdout",
310 | "text": [
311 | "Coefficients: [[-79.79361894 -53.18582378 15.74315198]]\n",
312 | "Residual sum of squares: 22.55\n",
313 | "Variance score: 1.00\n"
314 | ]
315 | }
316 | ],
317 | "execution_count": 10,
318 | "metadata": {
319 | "collapsed": false,
320 | "outputHidden": false,
321 | "inputHidden": false
322 | }
323 | },
324 | {
325 | "cell_type": "code",
326 | "source": [
327 | "print('Multiple Linear Score:', regressor.score(X_test, y_pred))"
328 | ],
329 | "outputs": [
330 | {
331 | "output_type": "stream",
332 | "name": "stdout",
333 | "text": [
334 | "Multiple Linear Score: 0.0145752513278\n"
335 | ]
336 | }
337 | ],
338 | "execution_count": 12,
339 | "metadata": {
340 | "collapsed": false,
341 | "outputHidden": false,
342 | "inputHidden": false
343 | }
344 | }
345 | ],
346 | "metadata": {
347 | "kernel_info": {
348 | "name": "python3"
349 | },
350 | "language_info": {
351 | "codemirror_mode": {
352 | "name": "ipython",
353 | "version": 3
354 | },
355 | "name": "python",
356 | "nbconvert_exporter": "python",
357 | "file_extension": ".py",
358 | "pygments_lexer": "ipython3",
359 | "version": "3.5.5",
360 | "mimetype": "text/x-python"
361 | },
362 | "kernelspec": {
363 | "name": "python3",
364 | "language": "python",
365 | "display_name": "Python 3"
366 | },
367 | "nteract": {
368 | "version": "0.14.5"
369 | }
370 | },
371 | "nbformat": 4,
372 | "nbformat_minor": 0
373 | }
--------------------------------------------------------------------------------
/Stock_Algorithms/Principal_Component_Regression.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "source": [
6 | "# Principal Component Regression (PCR)"
7 | ],
8 | "metadata": {}
9 | },
10 | {
11 | "cell_type": "code",
12 | "source": [
13 | "import numpy as np\n",
14 | "import matplotlib.pyplot as plt\n",
15 | "import pandas as pd\n",
16 | "\n",
17 | "import warnings\n",
18 | "warnings.filterwarnings(\"ignore\")\n",
19 | "\n",
20 | "# fix_yahoo_finance is used to fetch data \n",
21 | "import fix_yahoo_finance as yf\n",
22 | "yf.pdr_override()"
23 | ],
24 | "outputs": [],
25 | "execution_count": 1,
26 | "metadata": {
27 | "collapsed": false,
28 | "outputHidden": false,
29 | "inputHidden": false
30 | }
31 | },
32 | {
33 | "cell_type": "code",
34 | "source": [
35 | "# input\n",
36 | "symbol = 'AMD'\n",
37 | "start = '2014-01-01'\n",
38 | "end = '2019-01-01'\n",
39 | "\n",
40 | "# Read data \n",
41 | "dataset = yf.download(symbol,start,end)\n",
42 | "\n",
43 | "# View Columns\n",
44 | "dataset.head()"
45 | ],
46 | "outputs": [
47 | {
48 | "output_type": "stream",
49 | "name": "stdout",
50 | "text": [
51 | "[*********************100%***********************] 1 of 1 downloaded\n"
52 | ]
53 | },
54 | {
55 | "output_type": "execute_result",
56 | "execution_count": 2,
57 | "data": {
58 | "text/html": [
59 | "\n",
60 | "\n",
73 | "
\n",
74 | " \n",
75 | " \n",
76 | " | \n",
77 | " Open | \n",
78 | " High | \n",
79 | " Low | \n",
80 | " Close | \n",
81 | " Adj Close | \n",
82 | " Volume | \n",
83 | "
\n",
84 | " \n",
85 | " Date | \n",
86 | " | \n",
87 | " | \n",
88 | " | \n",
89 | " | \n",
90 | " | \n",
91 | " | \n",
92 | "
\n",
93 | " \n",
94 | " \n",
95 | " \n",
96 | " 2014-01-02 | \n",
97 | " 3.85 | \n",
98 | " 3.98 | \n",
99 | " 3.84 | \n",
100 | " 3.95 | \n",
101 | " 3.95 | \n",
102 | " 20548400 | \n",
103 | "
\n",
104 | " \n",
105 | " 2014-01-03 | \n",
106 | " 3.98 | \n",
107 | " 4.00 | \n",
108 | " 3.88 | \n",
109 | " 4.00 | \n",
110 | " 4.00 | \n",
111 | " 22887200 | \n",
112 | "
\n",
113 | " \n",
114 | " 2014-01-06 | \n",
115 | " 4.01 | \n",
116 | " 4.18 | \n",
117 | " 3.99 | \n",
118 | " 4.13 | \n",
119 | " 4.13 | \n",
120 | " 42398300 | \n",
121 | "
\n",
122 | " \n",
123 | " 2014-01-07 | \n",
124 | " 4.19 | \n",
125 | " 4.25 | \n",
126 | " 4.11 | \n",
127 | " 4.18 | \n",
128 | " 4.18 | \n",
129 | " 42932100 | \n",
130 | "
\n",
131 | " \n",
132 | " 2014-01-08 | \n",
133 | " 4.23 | \n",
134 | " 4.26 | \n",
135 | " 4.14 | \n",
136 | " 4.18 | \n",
137 | " 4.18 | \n",
138 | " 30678700 | \n",
139 | "
\n",
140 | " \n",
141 | "
\n",
142 | "
"
143 | ],
144 | "text/plain": [
145 | " Open High Low Close Adj Close Volume\n",
146 | "Date \n",
147 | "2014-01-02 3.85 3.98 3.84 3.95 3.95 20548400\n",
148 | "2014-01-03 3.98 4.00 3.88 4.00 4.00 22887200\n",
149 | "2014-01-06 4.01 4.18 3.99 4.13 4.13 42398300\n",
150 | "2014-01-07 4.19 4.25 4.11 4.18 4.18 42932100\n",
151 | "2014-01-08 4.23 4.26 4.14 4.18 4.18 30678700"
152 | ]
153 | },
154 | "metadata": {}
155 | }
156 | ],
157 | "execution_count": 2,
158 | "metadata": {
159 | "collapsed": false,
160 | "outputHidden": false,
161 | "inputHidden": false
162 | }
163 | },
164 | {
165 | "cell_type": "code",
166 | "source": [
167 | "X = dataset.iloc[ : , 1].values\n",
168 | "Y = dataset.iloc[ : , 4].values"
169 | ],
170 | "outputs": [],
171 | "execution_count": 3,
172 | "metadata": {
173 | "collapsed": false,
174 | "outputHidden": false,
175 | "inputHidden": false
176 | }
177 | },
178 | {
179 | "cell_type": "code",
180 | "source": [
181 | "print(X.shape)\n",
182 | "print(Y.shape)"
183 | ],
184 | "outputs": [
185 | {
186 | "output_type": "stream",
187 | "name": "stdout",
188 | "text": [
189 | "(1258,)\n",
190 | "(1258,)\n"
191 | ]
192 | }
193 | ],
194 | "execution_count": 4,
195 | "metadata": {
196 | "collapsed": false,
197 | "outputHidden": false,
198 | "inputHidden": false
199 | }
200 | },
201 | {
202 | "cell_type": "code",
203 | "source": [
204 | "X = np.array(X).reshape(1258,-1)\n",
205 | "Y = np.array(Y).reshape(1258,-1)"
206 | ],
207 | "outputs": [],
208 | "execution_count": 5,
209 | "metadata": {
210 | "collapsed": false,
211 | "outputHidden": false,
212 | "inputHidden": false
213 | }
214 | },
215 | {
216 | "cell_type": "code",
217 | "source": [
218 | "from sklearn.decomposition import PCA"
219 | ],
220 | "outputs": [],
221 | "execution_count": 6,
222 | "metadata": {
223 | "collapsed": false,
224 | "outputHidden": false,
225 | "inputHidden": false
226 | }
227 | },
228 | {
229 | "cell_type": "code",
230 | "source": [
231 | "pca = PCA(n_components=1)\n",
232 | "pca.fit(X)"
233 | ],
234 | "outputs": [
235 | {
236 | "output_type": "execute_result",
237 | "execution_count": 7,
238 | "data": {
239 | "text/plain": [
240 | "PCA(copy=True, iterated_power='auto', n_components=1, random_state=None,\n",
241 | " svd_solver='auto', tol=0.0, whiten=False)"
242 | ]
243 | },
244 | "metadata": {}
245 | }
246 | ],
247 | "execution_count": 7,
248 | "metadata": {
249 | "collapsed": false,
250 | "outputHidden": false,
251 | "inputHidden": false
252 | }
253 | },
254 | {
255 | "cell_type": "code",
256 | "source": [
257 | "print(pca.explained_variance_ratio_)"
258 | ],
259 | "outputs": [
260 | {
261 | "output_type": "stream",
262 | "name": "stdout",
263 | "text": [
264 | "[ 1.]\n"
265 | ]
266 | }
267 | ],
268 | "execution_count": 8,
269 | "metadata": {
270 | "collapsed": false,
271 | "outputHidden": false,
272 | "inputHidden": false
273 | }
274 | },
275 | {
276 | "cell_type": "code",
277 | "source": [
278 | "print(pca.singular_values_)"
279 | ],
280 | "outputs": [
281 | {
282 | "output_type": "stream",
283 | "name": "stdout",
284 | "text": [
285 | "[ 236.05044323]\n"
286 | ]
287 | }
288 | ],
289 | "execution_count": 9,
290 | "metadata": {
291 | "collapsed": false,
292 | "outputHidden": false,
293 | "inputHidden": false
294 | }
295 | },
296 | {
297 | "cell_type": "code",
298 | "source": [
299 | "pca = PCA(n_components=1, svd_solver='full')\n",
300 | "pca.fit(X)"
301 | ],
302 | "outputs": [
303 | {
304 | "output_type": "execute_result",
305 | "execution_count": 10,
306 | "data": {
307 | "text/plain": [
308 | "PCA(copy=True, iterated_power='auto', n_components=1, random_state=None,\n",
309 | " svd_solver='full', tol=0.0, whiten=False)"
310 | ]
311 | },
312 | "metadata": {}
313 | }
314 | ],
315 | "execution_count": 10,
316 | "metadata": {
317 | "collapsed": false,
318 | "outputHidden": false,
319 | "inputHidden": false
320 | }
321 | },
322 | {
323 | "cell_type": "code",
324 | "source": [
325 | "print(pca.explained_variance_ratio_)"
326 | ],
327 | "outputs": [
328 | {
329 | "output_type": "stream",
330 | "name": "stdout",
331 | "text": [
332 | "[ 1.]\n"
333 | ]
334 | }
335 | ],
336 | "execution_count": 11,
337 | "metadata": {
338 | "collapsed": false,
339 | "outputHidden": false,
340 | "inputHidden": false
341 | }
342 | },
343 | {
344 | "cell_type": "code",
345 | "source": [
346 | "print(pca.singular_values_)"
347 | ],
348 | "outputs": [
349 | {
350 | "output_type": "stream",
351 | "name": "stdout",
352 | "text": [
353 | "[ 236.05044323]\n"
354 | ]
355 | }
356 | ],
357 | "execution_count": 12,
358 | "metadata": {
359 | "collapsed": false,
360 | "outputHidden": false,
361 | "inputHidden": false
362 | }
363 | },
364 | {
365 | "cell_type": "code",
366 | "source": [
367 | "pca.score(X, y=None)"
368 | ],
369 | "outputs": [
370 | {
371 | "output_type": "execute_result",
372 | "execution_count": 13,
373 | "data": {
374 | "text/plain": [
375 | "-3.3143449973330785"
376 | ]
377 | },
378 | "metadata": {}
379 | }
380 | ],
381 | "execution_count": 13,
382 | "metadata": {
383 | "collapsed": false,
384 | "outputHidden": false,
385 | "inputHidden": false
386 | }
387 | }
388 | ],
389 | "metadata": {
390 | "kernel_info": {
391 | "name": "python3"
392 | },
393 | "language_info": {
394 | "name": "python",
395 | "codemirror_mode": {
396 | "name": "ipython",
397 | "version": 3
398 | },
399 | "version": "3.5.5",
400 | "file_extension": ".py",
401 | "nbconvert_exporter": "python",
402 | "mimetype": "text/x-python",
403 | "pygments_lexer": "ipython3"
404 | },
405 | "kernelspec": {
406 | "name": "python3",
407 | "language": "python",
408 | "display_name": "Python 3"
409 | },
410 | "nteract": {
411 | "version": "0.15.0"
412 | }
413 | },
414 | "nbformat": 4,
415 | "nbformat_minor": 4
416 | }
--------------------------------------------------------------------------------
/Stock_Algorithms/Quasi_Poisson_Regression.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "source": [
6 | "# Quasi-Poisson Regression"
7 | ],
8 | "metadata": {}
9 | },
10 | {
11 | "cell_type": "code",
12 | "source": [
13 | "import numpy as np\n",
14 | "import matplotlib.pyplot as plt\n",
15 | "import pandas as pd\n",
16 | "\n",
17 | "import warnings\n",
18 | "warnings.filterwarnings(\"ignore\")\n",
19 | "\n",
20 | "# yahoo finance is used to fetch data \n",
21 | "import yfinance as yf\n",
22 | "yf.pdr_override()"
23 | ],
24 | "outputs": [],
25 | "execution_count": 1,
26 | "metadata": {
27 | "collapsed": false,
28 | "outputHidden": false,
29 | "inputHidden": false,
30 | "execution": {
31 | "iopub.status.busy": "2021-04-24T04:48:24.336Z",
32 | "iopub.execute_input": "2021-04-24T04:48:24.341Z",
33 | "iopub.status.idle": "2021-04-24T04:48:25.077Z",
34 | "shell.execute_reply": "2021-04-24T04:48:25.095Z"
35 | }
36 | }
37 | },
38 | {
39 | "cell_type": "code",
40 | "source": [
41 | "# input\n",
42 | "symbol = 'AMD'\n",
43 | "start = '2014-01-01'\n",
44 | "end = '2018-08-27'\n",
45 | "\n",
46 | "# Read data \n",
47 | "dataset = yf.download(symbol,start,end)\n",
48 | "\n",
49 | "# View Columns\n",
50 | "dataset.head()"
51 | ],
52 | "outputs": [
53 | {
54 | "output_type": "stream",
55 | "name": "stdout",
56 | "text": [
57 | "[*********************100%***********************] 1 of 1 completed\n"
58 | ]
59 | },
60 | {
61 | "output_type": "execute_result",
62 | "execution_count": 2,
63 | "data": {
64 | "text/plain": " Open High Low Close Adj Close Volume\nDate \n2014-01-02 3.85 3.98 3.84 3.95 3.95 20548400\n2014-01-03 3.98 4.00 3.88 4.00 4.00 22887200\n2014-01-06 4.01 4.18 3.99 4.13 4.13 42398300\n2014-01-07 4.19 4.25 4.11 4.18 4.18 42932100\n2014-01-08 4.23 4.26 4.14 4.18 4.18 30678700",
65 | "text/html": "\n\n
\n \n \n | \n Open | \n High | \n Low | \n Close | \n Adj Close | \n Volume | \n
\n \n Date | \n | \n | \n | \n | \n | \n | \n
\n \n \n \n 2014-01-02 | \n 3.85 | \n 3.98 | \n 3.84 | \n 3.95 | \n 3.95 | \n 20548400 | \n
\n \n 2014-01-03 | \n 3.98 | \n 4.00 | \n 3.88 | \n 4.00 | \n 4.00 | \n 22887200 | \n
\n \n 2014-01-06 | \n 4.01 | \n 4.18 | \n 3.99 | \n 4.13 | \n 4.13 | \n 42398300 | \n
\n \n 2014-01-07 | \n 4.19 | \n 4.25 | \n 4.11 | \n 4.18 | \n 4.18 | \n 42932100 | \n
\n \n 2014-01-08 | \n 4.23 | \n 4.26 | \n 4.14 | \n 4.18 | \n 4.18 | \n 30678700 | \n
\n \n
\n
"
66 | },
67 | "metadata": {}
68 | }
69 | ],
70 | "execution_count": 2,
71 | "metadata": {
72 | "collapsed": false,
73 | "outputHidden": false,
74 | "inputHidden": false,
75 | "execution": {
76 | "iopub.status.busy": "2021-04-24T04:48:25.082Z",
77 | "iopub.execute_input": "2021-04-24T04:48:25.086Z",
78 | "iopub.status.idle": "2021-04-24T04:48:25.845Z",
79 | "shell.execute_reply": "2021-04-24T04:48:25.905Z"
80 | }
81 | }
82 | },
83 | {
84 | "cell_type": "code",
85 | "source": [
86 | "dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)\n",
87 | "dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)\n",
88 | "dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)\n",
89 | "dataset['Returns'] = dataset['Adj Close'].pct_change()\n",
90 | "dataset = dataset.dropna()"
91 | ],
92 | "outputs": [],
93 | "execution_count": 3,
94 | "metadata": {
95 | "collapsed": true,
96 | "jupyter": {
97 | "source_hidden": false,
98 | "outputs_hidden": false
99 | },
100 | "nteract": {
101 | "transient": {
102 | "deleting": false
103 | }
104 | },
105 | "execution": {
106 | "iopub.status.busy": "2021-04-24T04:48:25.851Z",
107 | "iopub.execute_input": "2021-04-24T04:48:25.854Z",
108 | "shell.execute_reply": "2021-04-24T04:48:25.909Z",
109 | "iopub.status.idle": "2021-04-24T04:48:25.919Z"
110 | }
111 | }
112 | },
113 | {
114 | "cell_type": "code",
115 | "source": [
116 | "from statsmodels.genmod.generalized_estimating_equations import GEE\n",
117 | "from statsmodels.genmod.cov_struct import (Exchangeable,\n",
118 | " Independence,Autoregressive)\n",
119 | "from statsmodels.genmod.families import Poisson"
120 | ],
121 | "outputs": [],
122 | "execution_count": 5,
123 | "metadata": {
124 | "collapsed": true,
125 | "jupyter": {
126 | "source_hidden": false,
127 | "outputs_hidden": false
128 | },
129 | "nteract": {
130 | "transient": {
131 | "deleting": false
132 | }
133 | },
134 | "execution": {
135 | "iopub.status.busy": "2021-04-24T04:48:25.942Z",
136 | "iopub.execute_input": "2021-04-24T04:48:25.945Z",
137 | "iopub.status.idle": "2021-04-24T04:48:26.382Z",
138 | "shell.execute_reply": "2021-04-24T04:48:26.394Z"
139 | }
140 | }
141 | },
142 | {
143 | "cell_type": "code",
144 | "source": [
145 | "fam = Poisson()\n",
146 | "ind = Independence()\n",
147 | "model1 = GEE.from_formula(\"Increase_Decrease ~ Returns + Buy_Sell_on_Open + Open\", 'Buy_Sell', dataset, cov_struct=ind, family=fam)\n",
148 | "result1 = model1.fit()\n",
149 | "print(result1.summary())"
150 | ],
151 | "outputs": [
152 | {
153 | "output_type": "stream",
154 | "name": "stdout",
155 | "text": [
156 | " GEE Regression Results \n",
157 | "===================================================================================\n",
158 | "Dep. Variable: Increase_Decrease No. Observations: 1170\n",
159 | "Model: GEE No. clusters: 2\n",
160 | "Method: Generalized Min. cluster size: 584\n",
161 | " Estimating Equations Max. cluster size: 586\n",
162 | "Family: Poisson Mean cluster size: 585.0\n",
163 | "Dependence structure: Independence Num. iterations: 2\n",
164 | "Date: Fri, 23 Apr 2021 Scale: 1.000\n",
165 | "Covariance type: robust Time: 21:51:04\n",
166 | "====================================================================================\n",
167 | " coef std err z P>|z| [0.025 0.975]\n",
168 | "------------------------------------------------------------------------------------\n",
169 | "Intercept -0.7826 0.017 -45.953 0.000 -0.816 -0.749\n",
170 | "Returns 0.9742 1.267 0.769 0.442 -1.508 3.457\n",
171 | "Buy_Sell_on_Open -0.0671 0.172 -0.390 0.696 -0.404 0.270\n",
172 | "Open 0.0036 0.003 1.180 0.238 -0.002 0.010\n",
173 | "==============================================================================\n",
174 | "Skew: 0.1802 Kurtosis: -1.9614\n",
175 | "Centered skew: 0.1789 Centered kurtosis: -1.9459\n",
176 | "==============================================================================\n"
177 | ]
178 | }
179 | ],
180 | "execution_count": 11,
181 | "metadata": {
182 | "collapsed": true,
183 | "jupyter": {
184 | "source_hidden": false,
185 | "outputs_hidden": false
186 | },
187 | "nteract": {
188 | "transient": {
189 | "deleting": false
190 | }
191 | },
192 | "execution": {
193 | "iopub.status.busy": "2021-04-24T04:51:04.044Z",
194 | "iopub.execute_input": "2021-04-24T04:51:04.048Z",
195 | "iopub.status.idle": "2021-04-24T04:51:04.105Z",
196 | "shell.execute_reply": "2021-04-24T04:51:04.108Z"
197 | }
198 | }
199 | }
200 | ],
201 | "metadata": {
202 | "kernel_info": {
203 | "name": "python3"
204 | },
205 | "language_info": {
206 | "name": "python",
207 | "version": "3.6.12",
208 | "mimetype": "text/x-python",
209 | "codemirror_mode": {
210 | "name": "ipython",
211 | "version": 3
212 | },
213 | "pygments_lexer": "ipython3",
214 | "nbconvert_exporter": "python",
215 | "file_extension": ".py"
216 | },
217 | "kernelspec": {
218 | "name": "python3",
219 | "language": "python",
220 | "display_name": "Python 3"
221 | },
222 | "nteract": {
223 | "version": "0.28.0"
224 | }
225 | },
226 | "nbformat": 4,
227 | "nbformat_minor": 4
228 | }
--------------------------------------------------------------------------------
/Stock_Algorithms/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | ### Description:
4 | #### Machine learning algorithms are programs that uses math and logic to adjust themselves to perform better as they are exposed to more data. The “learning” part of machine learning means that those programs change how they process data over time, much as humans change how they process data by learning.
5 |
6 | ### List of Commonly Used Algorithms:
7 | #### Linear Regression: A simple yet powerful algorithm that models the relationship between a dependent variable and one or more independent variables by fitting a linear equation to the data.
8 | #### Logistic Regression: Widely used for binary classification problems, logistic regression estimates the probability of an event occurring based on a set of input features.
9 | #### Decision Trees: These algorithms use a hierarchical structure of nodes and branches to make decisions by splitting the data based on different features. They are interpretable and often form the building blocks of ensemble methods.
10 | #### Random Forest: An ensemble learning method that combines multiple decision trees to make predictions. It improves upon decision trees by reducing overfitting and increasing accuracy.
11 | #### Support Vector Machines (SVM): SVMs aim to find the optimal hyperplane that separates data points into different classes. They work well for both linearly separable and non-linearly separable data.
12 | #### K-Nearest Neighbors (KNN): KNN is a non-parametric algorithm that classifies data based on the majority vote of its nearest neighbors in the feature space.
13 | #### Naive Bayes: A probabilistic algorithm that uses Bayes' theorem to make predictions. Despite its simplicity, it performs remarkably well in text classification and spam filtering.
14 |
15 | ### Regression in Machine Learning:
16 | #### Regression is a key concept in machine learning used for predicting continuous or numerical values. It analyzes the relationship between independent variables (features) and a dependent variable (target) to build a predictive model.
17 | ### Common regression techniques include:
18 | #### Linear Regression: The most basic form of regression, it fits a linear equation to the data by minimizing the sum of squared residuals.
19 | #### Polynomial Regression: Extends linear regression by introducing polynomial terms to capture non-linear relationships between variables.
20 | #### Ridge Regression: Adds a regularization term to linear regression to prevent overfitting by penalizing large coefficient values.
21 | #### Lasso Regression: Similar to ridge regression, but it uses the absolute value of the coefficients instead of squared values, leading to sparse solutions.
22 | #### Support Vector Regression (SVR): An extension of SVMs for regression tasks. SVR finds a hyperplane that maximizes the margin of support vectors while minimizing the error on the training data.
23 |
24 | # :large_blue_diamond: List of Algorithms :large_blue_diamond:
25 | :arrow_forward: AdaBoost Classification
26 | :arrow_forward: AdaBoost Regressor
27 | :arrow_forward: Anomaly Detection
28 | :arrow_forward: Apriori Algorithm
29 | :arrow_forward: Artificial Neural Network
30 | :arrow_forward: Bagging Classifier
31 | :arrow_forward: Bayesian Ridge Regression
32 | :arrow_forward: Bernoulli Restricted Boltzmann Machine
33 | :arrow_forward: CatBoost Algorithms
34 | :arrow_forward: Classification and Clustering
35 | :arrow_forward: Clustering Algorithms
36 | :arrow_forward: CART (Classification and Regression Trees)
37 | :arrow_forward: Decision Tree Classification
38 | :arrow_forward: Decision Tree Regression
39 | :arrow_forward: Dimensionality Reduction Algorithms
40 | :arrow_forward: Ensemble Learning Algorithms
41 | :arrow_forward: Explanatory Algorithms
42 | :arrow_forward: Gradient Boosting Classification
43 | :arrow_forward: Generative Adversarial Networks (GANs)
44 | :arrow_forward: K-Means Clustering Algorithm
45 | :arrow_forward: K-Nearest Neighbors Algorithm
46 | :arrow_forward: Logistic Regression
47 | :arrow_forward: Linear Regression
48 | :arrow_forward: NetworkX
49 | :arrow_forward: Neural Networks Regression
50 | :arrow_forward: Quantile Regression
51 | :arrow_forward: Partial Least Squares Regression (PLSR)
52 | :arrow_forward: Polynomial Regression
53 | :arrow_forward: Principal Component Classification
54 | :arrow_forward: Principal Component Regression
55 | :arrow_forward: Random Forest Classification
56 | :arrow_forward: Random Forest Regression
57 | :arrow_forward: RNN Tensorflow
58 | :arrow_forward: Ridge Regression
59 | :arrow_forward: Similarity Algorithms
60 | :arrow_forward: Support Vector Machines (SVM)
61 | :arrow_forward: Tensorflow
62 | :arrow_forward: Time Series
63 | :arrow_forward: XGBoost
64 |
65 | ### AdaBoost
66 | AdaBoost is short for Adaptive Boosting and is a statistical classification meta-algorithm created by Yoav Freund and Robert Schapire in 1995. The meta-estimator begins by fitting a classifier on the original dataset and then fits additional copies of the classifier on the same dataset. However, the weights of incorrectly classified instances are adjusted such that subsequent classifiers focus more on difficult cases.
67 |
68 | ### Anomaly Detection
69 | Anomaly detection is identifying data points in data that don't fit the normal patterns. It is used for identifying rare items, events or observations which deviate significantly from the majority of the data and do not conform to a well defined notion of normal behaviour. Each node or artificial neuron is connected to another and has an associated weight and threshold. If the output of any individual node is above the specified threshold value, that node is activated and sends data to the next layer of the network. Otherwise, no data is passed along to the next layer of the network.
70 |
71 | ### Artificial Neural Network
72 | Artificial neural networks (ANNs) consist of input, hidden, and output layers with connected neurons (nodes) to simulate the human brain.
73 |
74 | ### Bagging classifier
75 | The Bagging classifier is an ensemble meta-estimator that fits base classifiers each on random subsets of the original dataset and then aggregates their individual predictions (either by voting or by averaging) to form a final prediction.
76 |
77 | ### Bayesian Ridge Regression
78 | Bayesian Ridge Regression is similar to linear regression in which the statistical analysis is undertaken within the context of Bayesian inference. It allows a natural mechanism to survive insufficient data or poorly distributed data by formulating linear regression using probability distributors rather than point estimates.
79 |
80 | ### Bernoulli Restricted Boltzmann Machine
81 | Bernoulli Restricted Boltzmann Machine (RBM) is a generative stochastic artificial neural network that can learn a probability distribution over its set of inputs.
82 |
83 | ### Decision Tree
84 | The Decision Tree algorithm is a supervised machine learning technique and is used for both classification and regression. Decision Tree uses multiple algorithms to decide to split a node into two or more sub-nodes. The creation of sub-nodes increases the homogeneity of resultant sub-nodes. However, the purity of the node increases with respect to the target variable.
85 |
86 | ### Gradient Boosting Algorithm
87 | Gradient Boosting is a machine learning technique used in regression and classification. Gradient boosting works on building simple or weak prediction models sequentially where each model tries to predict the error left over by the previous model, such as overfitting.
88 |
89 | ### K-Means Clustering Algorithm
90 | K-Means clustering is unsupervised machine learning algorithms and is used to solve complex machine learning problems.
91 |
92 | ### K-Nearest Neighbors Algorithm
93 | K-Nearest Neighbors (KNN or k-NN) is used for a non-parametric, supervised learning classifier, which uses proximity to make classifications or predictions about the grouping of an individual data point.
94 |
95 | ### Logistic Regression
96 | Logistic Regression is used for to estimate the probability of an event occurring, such as voting or didn't vote, based on a given dataset of independent variables. Since the outcome is a probability, the dependent variable is bounded between 0 and 1.
97 |
98 | ### Linear Regression
99 | Linear Regression is used to model the relationship between two variables by fitting a linear equation to observed data. One variable is considered to be an explanatory variable, and the other is considered to be a dependent variable.
100 |
101 | ### NetworkX
102 | NetworkX provides classes for graphs which allow multiple edges between any pair of nodes. The MultiGraph and MultiDiGraph classes allow you to add the same edge twice, possibly with different edge data. NetworkX can be powerful for some applications; however, many algorithms are not well defined on such graphs.
103 |
104 | ### Neural Networks Regression
105 | Neural Networks Regression is used for to learn the linear relationship between the features and target and therefore cannot learn the complex non-linear relationship. In order to learn the complex non-linear relationship between the features and target, we are in need of other techniques.
106 |
107 | ## Boosting Algorithms:
108 | ### Gradient Boosting Machines (GBM):
109 | Gradient boosting is a machine learning technique used in regression and classification tasks, among others. It gives a prediction model in the form of an ensemble of weak prediction models, which are typically decision trees.When a decision tree is the weak learner, the resulting algorithm is called gradient-boosted trees; it usually outperforms random forests. A gradient-boosted trees model is built in a stage-wise fashion as in other boosting methods, but it generalizes the other methods by allowing optimization of an arbitrary differentiable loss function.
110 |
111 | ### XGBoost:
112 | XGBoost is a scalable and highly accurate implementation of gradient boosting that pushes the limits of computing power for boosting three algorithms, being built largely to energize machine learning model performance and computational speed.
113 |
114 | ### LightGBM:
115 | LightGBN is a gradient boosting framework that uses tree-based learning algorithms.
116 |
117 | ### Catboost:
118 | CatBoost is an algorithm for gradient boosting on decision trees.
119 |
120 | ### Classification and Clustering
121 | Classification examples are Logistic regression, Naive Bayes classifier, Support vector machines, and others relating to classification. However, clustering are k-means clustering algorithm, Fuzzy c-means clustering algorithm, Gaussian (EM) clustering algorithm, and other algorithms relating to clustering.
122 |
123 | ## Authors
124 | ### * Tin Hang
125 |
126 | ## 🔴 Warning: This is not financial advice. Do not use this for investing or trading purposes. It is for educational purposes only.
127 |
--------------------------------------------------------------------------------
/Stock_Algorithms/TruncatedSVD.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "source": [
6 | "# Dimensionality reduction using truncated SVD (aka LSA)"
7 | ],
8 | "metadata": {}
9 | },
10 | {
11 | "cell_type": "code",
12 | "source": [
13 | "import numpy as np\n",
14 | "import matplotlib.pyplot as plt\n",
15 | "import pandas as pd\n",
16 | "\n",
17 | "import warnings\n",
18 | "warnings.filterwarnings(\"ignore\")\n",
19 | "\n",
20 | "# fix_yahoo_finance is used to fetch data \n",
21 | "import fix_yahoo_finance as yf\n",
22 | "yf.pdr_override()"
23 | ],
24 | "outputs": [],
25 | "execution_count": 1,
26 | "metadata": {
27 | "collapsed": false,
28 | "outputHidden": false,
29 | "inputHidden": false
30 | }
31 | },
32 | {
33 | "cell_type": "code",
34 | "source": [
35 | "# input\n",
36 | "symbol = 'AMD'\n",
37 | "start = '2014-01-01'\n",
38 | "end = '2019-01-01'\n",
39 | "\n",
40 | "# Read data \n",
41 | "dataset = yf.download(symbol,start,end)\n",
42 | "\n",
43 | "# View Columns\n",
44 | "dataset.head()"
45 | ],
46 | "outputs": [
47 | {
48 | "output_type": "stream",
49 | "name": "stdout",
50 | "text": [
51 | "[*********************100%***********************] 1 of 1 downloaded\n"
52 | ]
53 | },
54 | {
55 | "output_type": "execute_result",
56 | "execution_count": 2,
57 | "data": {
58 | "text/html": [
59 | "\n",
60 | "\n",
73 | "
\n",
74 | " \n",
75 | " \n",
76 | " | \n",
77 | " Open | \n",
78 | " High | \n",
79 | " Low | \n",
80 | " Close | \n",
81 | " Adj Close | \n",
82 | " Volume | \n",
83 | "
\n",
84 | " \n",
85 | " Date | \n",
86 | " | \n",
87 | " | \n",
88 | " | \n",
89 | " | \n",
90 | " | \n",
91 | " | \n",
92 | "
\n",
93 | " \n",
94 | " \n",
95 | " \n",
96 | " 2014-01-02 | \n",
97 | " 3.85 | \n",
98 | " 3.98 | \n",
99 | " 3.84 | \n",
100 | " 3.95 | \n",
101 | " 3.95 | \n",
102 | " 20548400 | \n",
103 | "
\n",
104 | " \n",
105 | " 2014-01-03 | \n",
106 | " 3.98 | \n",
107 | " 4.00 | \n",
108 | " 3.88 | \n",
109 | " 4.00 | \n",
110 | " 4.00 | \n",
111 | " 22887200 | \n",
112 | "
\n",
113 | " \n",
114 | " 2014-01-06 | \n",
115 | " 4.01 | \n",
116 | " 4.18 | \n",
117 | " 3.99 | \n",
118 | " 4.13 | \n",
119 | " 4.13 | \n",
120 | " 42398300 | \n",
121 | "
\n",
122 | " \n",
123 | " 2014-01-07 | \n",
124 | " 4.19 | \n",
125 | " 4.25 | \n",
126 | " 4.11 | \n",
127 | " 4.18 | \n",
128 | " 4.18 | \n",
129 | " 42932100 | \n",
130 | "
\n",
131 | " \n",
132 | " 2014-01-08 | \n",
133 | " 4.23 | \n",
134 | " 4.26 | \n",
135 | " 4.14 | \n",
136 | " 4.18 | \n",
137 | " 4.18 | \n",
138 | " 30678700 | \n",
139 | "
\n",
140 | " \n",
141 | "
\n",
142 | "
"
143 | ],
144 | "text/plain": [
145 | " Open High Low Close Adj Close Volume\n",
146 | "Date \n",
147 | "2014-01-02 3.85 3.98 3.84 3.95 3.95 20548400\n",
148 | "2014-01-03 3.98 4.00 3.88 4.00 4.00 22887200\n",
149 | "2014-01-06 4.01 4.18 3.99 4.13 4.13 42398300\n",
150 | "2014-01-07 4.19 4.25 4.11 4.18 4.18 42932100\n",
151 | "2014-01-08 4.23 4.26 4.14 4.18 4.18 30678700"
152 | ]
153 | },
154 | "metadata": {}
155 | }
156 | ],
157 | "execution_count": 2,
158 | "metadata": {
159 | "collapsed": false,
160 | "outputHidden": false,
161 | "inputHidden": false
162 | }
163 | },
164 | {
165 | "cell_type": "code",
166 | "source": [
167 | "X = dataset.iloc[ : , 1:4].values\n",
168 | "Y = dataset.iloc[ : , 4].values"
169 | ],
170 | "outputs": [],
171 | "execution_count": 23,
172 | "metadata": {
173 | "collapsed": false,
174 | "outputHidden": false,
175 | "inputHidden": false
176 | }
177 | },
178 | {
179 | "cell_type": "code",
180 | "source": [
181 | "print(X.shape)\n",
182 | "print(Y.shape)"
183 | ],
184 | "outputs": [
185 | {
186 | "output_type": "stream",
187 | "name": "stdout",
188 | "text": [
189 | "(1258, 3)\n",
190 | "(1258,)\n"
191 | ]
192 | }
193 | ],
194 | "execution_count": 24,
195 | "metadata": {
196 | "collapsed": false,
197 | "outputHidden": false,
198 | "inputHidden": false
199 | }
200 | },
201 | {
202 | "cell_type": "code",
203 | "source": [
204 | "X = np.array(X).reshape(1258,-1)\n",
205 | "Y = np.array(Y).reshape(1258,-1)"
206 | ],
207 | "outputs": [],
208 | "execution_count": 25,
209 | "metadata": {
210 | "collapsed": false,
211 | "outputHidden": false,
212 | "inputHidden": false
213 | }
214 | },
215 | {
216 | "cell_type": "code",
217 | "source": [
218 | "from sklearn.decomposition import TruncatedSVD"
219 | ],
220 | "outputs": [],
221 | "execution_count": 19,
222 | "metadata": {
223 | "collapsed": false,
224 | "outputHidden": false,
225 | "inputHidden": false
226 | }
227 | },
228 | {
229 | "cell_type": "code",
230 | "source": [
231 | "svd = TruncatedSVD(n_components=1, n_iter=2, random_state=42)\n",
232 | "svd.fit(X) "
233 | ],
234 | "outputs": [
235 | {
236 | "output_type": "execute_result",
237 | "execution_count": 27,
238 | "data": {
239 | "text/plain": [
240 | "TruncatedSVD(algorithm='randomized', n_components=1, n_iter=2,\n",
241 | " random_state=42, tol=0.0)"
242 | ]
243 | },
244 | "metadata": {}
245 | }
246 | ],
247 | "execution_count": 27,
248 | "metadata": {
249 | "collapsed": false,
250 | "outputHidden": false,
251 | "inputHidden": false
252 | }
253 | },
254 | {
255 | "cell_type": "code",
256 | "source": [
257 | "print(svd.explained_variance_ratio_)"
258 | ],
259 | "outputs": [
260 | {
261 | "output_type": "stream",
262 | "name": "stdout",
263 | "text": [
264 | "[ 0.99951387]\n"
265 | ]
266 | }
267 | ],
268 | "execution_count": 28,
269 | "metadata": {
270 | "collapsed": false,
271 | "outputHidden": false,
272 | "inputHidden": false
273 | }
274 | },
275 | {
276 | "cell_type": "code",
277 | "source": [
278 | "print(svd.singular_values_) "
279 | ],
280 | "outputs": [
281 | {
282 | "output_type": "stream",
283 | "name": "stdout",
284 | "text": [
285 | "[ 639.8979399]\n"
286 | ]
287 | }
288 | ],
289 | "execution_count": 29,
290 | "metadata": {
291 | "collapsed": false,
292 | "outputHidden": false,
293 | "inputHidden": false
294 | }
295 | }
296 | ],
297 | "metadata": {
298 | "kernel_info": {
299 | "name": "python3"
300 | },
301 | "language_info": {
302 | "version": "3.5.5",
303 | "codemirror_mode": {
304 | "version": 3,
305 | "name": "ipython"
306 | },
307 | "pygments_lexer": "ipython3",
308 | "file_extension": ".py",
309 | "nbconvert_exporter": "python",
310 | "name": "python",
311 | "mimetype": "text/x-python"
312 | },
313 | "kernelspec": {
314 | "name": "python3",
315 | "language": "python",
316 | "display_name": "Python 3"
317 | },
318 | "nteract": {
319 | "version": "0.12.2"
320 | }
321 | },
322 | "nbformat": 4,
323 | "nbformat_minor": 4
324 | }
--------------------------------------------------------------------------------
/Stock_Apps/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | # Stock Applications & Softwares
5 |
6 | ## Descriptions:
7 | Different types of algorithm for predictions
8 |
9 | __Input__
10 | 1: Input the stock starting date
11 | 2: Input the stock ending date
12 | 3. Input the stock symbol
13 | 4. Choose Algorithms for Stock Prediction
14 |
15 | ## Authors
16 | ### * Tin Hang
17 |
--------------------------------------------------------------------------------
/Stock_Apps/Stock_Apps.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LastAncientOne/Deep_Learning_Machine_Learning_Stock/e31e9fc689cd2cb6d74a8f2694750afd6e13bcec/Stock_Apps/Stock_Apps.PNG
--------------------------------------------------------------------------------
/Stock_Apps/Stock_ML_Data_PreProcessing_Apps.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Sat May 25 14:21:27 2019
4 |
5 | @author: Tin
6 | """
7 | import numpy as np
8 | import pandas as pd
9 | import datetime
10 |
11 | from sklearn.preprocessing import MinMaxScaler
12 | from sklearn.preprocessing import Binarizer
13 | from sklearn.preprocessing import StandardScaler
14 | from sklearn.model_selection import train_test_split
15 |
16 |
17 | import warnings
18 | warnings.filterwarnings("ignore")
19 |
20 | # yahoo finance used to fetch data
21 | import yfinance as yf
22 | yf.pdr_override()
23 |
24 | options = " Data Preprocessing, Exit".split(",")
25 |
26 | # Input Start Date
27 | def start_date():
28 | date_entry = input('Enter a starting date in MM/DD/YYYY format: ')
29 | start = datetime.datetime.strptime(date_entry,'%m/%d/%Y')
30 | start = start.strftime('%Y-%m-%d')
31 | return start
32 |
33 | # Input End Date
34 | def end_date():
35 | date_entry = input('Enter a ending date in MM/DD/YYYY format: ')
36 | end = datetime.datetime.strptime(date_entry,'%m/%d/%Y')
37 | end = end.strftime('%Y-%m-%d')
38 | return end
39 |
40 | # Input Symbols
41 | def input_symbol():
42 | symbol = input("Enter symbol: ").upper()
43 | return symbol
44 |
45 | # Rescaled Dataset
46 | def Rescale_Dataset():
47 | s = start_date()
48 | e = end_date()
49 | sym = input_symbol()
50 | df = yf.download(sym, s, e)
51 | array = df.values
52 | X = array[:,0:5]
53 | Y = array[:,5]
54 | # initialising the MinMaxScaler
55 | scaler = MinMaxScaler(feature_range=(0, 1))
56 | # learning the statistical parameters for each of the data and transforming
57 | rescaledX = scaler.fit_transform(X)
58 | np.set_printoptions(precision=3)
59 | print('Rescaled values between 0 to 1')
60 | print(rescaledX[0:5,:])
61 | print("")
62 | # Splitting the datasets into training sets and Test sets
63 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)
64 | sc_X = StandardScaler()
65 | # Splitting the datasets into training sets and Test sets
66 | X_train = sc_X.fit_transform(X_train)
67 | X_test = sc_X.fit_transform(X_test)
68 | print("Training Dataset")
69 | print(X_train)
70 | print("")
71 | print(Y_train)
72 | print("")
73 | print("Testing Dataset")
74 | print(X_test)
75 | print("")
76 | print(Y_test)
77 | print("")
78 | ans = ['1', '2']
79 | user_input=input("""
80 | What would you like to do next? Enter option 1 or 2.
81 | 1. Menu
82 | 2. Exit
83 | Command: """)
84 | while user_input not in ans:
85 | print("Error: Please enter a a valid option 1-2")
86 | user_input=input("Command: ")
87 | if user_input=="1":
88 | menu()
89 | elif user_input=="2":
90 | exit()
91 |
92 |
93 | #***********************************************************************************************************************#
94 | # Binarize Data
95 | def Binarize_Dataset():
96 | s = start_date()
97 | e = end_date()
98 | sym = input_symbol()
99 | df = yf.download(sym, s, e)
100 | array = df.values
101 | X = array[:,0:5]
102 | Y = array[:,5]
103 | # initialising the binarize
104 | binarizer = Binarizer(threshold = 0.0).fit(X)
105 | binaryX = binarizer.transform(X)
106 | np.set_printoptions(precision=3)
107 | print('Binarize values equal or less than 0 are marked 0 and all of those above 0 are marked 1')
108 | print(binaryX[0:5,:])
109 | print("")
110 | # Splitting the datasets into training sets and Test sets
111 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)
112 | sc_X = StandardScaler()
113 | # Splitting the datasets into training sets and Test sets
114 | X_train = sc_X.fit_transform(X_train)
115 | X_test = sc_X.fit_transform(X_test)
116 | print("Training Dataset")
117 | print(X_train)
118 | print("")
119 | print(Y_train)
120 | print("")
121 | print("Testing Dataset")
122 | print(X_test)
123 | print("")
124 | print(Y_test)
125 | print("")
126 | ans = ['1', '2']
127 | user_input=input("""
128 | What would you like to do next? Enter option 1 or 2.
129 | 1. Menu
130 | 2. Exit
131 | Command: """)
132 | while user_input not in ans:
133 | print("Error: Please enter a a valid option 1-2")
134 | user_input=input("Command: ")
135 | if user_input=="1":
136 | menu()
137 | elif user_input=="2":
138 | exit()
139 |
140 |
141 | #***********************************************************************************************************************#
142 | # Standardize Data
143 | def Standardize_Dataset():
144 | s = start_date()
145 | e = end_date()
146 | sym = input_symbol()
147 | df = yf.download(sym, s, e)
148 | array = df.values
149 | X = array[:,0:5]
150 | Y = array[:,5]
151 | # initialising the standardize
152 | scaler = StandardScaler().fit(X)
153 | rescaledX = scaler.transform(X)
154 | np.set_printoptions(precision=3)
155 | print('Standardize values with a mean of 0 and a standard deviation of 1')
156 | print(rescaledX[0:5,:])
157 | print("")
158 | # Splitting the datasets into training sets and Test sets
159 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)
160 | sc_X = StandardScaler()
161 | # Splitting the datasets into training sets and Test sets
162 | X_train = sc_X.fit_transform(X_train)
163 | X_test = sc_X.fit_transform(X_test)
164 | print("Training Dataset")
165 | print(X_train)
166 | print("")
167 | print(Y_train)
168 | print("")
169 | print("Testing Dataset")
170 | print(X_test)
171 | print("")
172 | print(Y_test)
173 | print("")
174 | ans = ['1', '2']
175 | user_input=input("""
176 | What would you like to do next? Enter option 1 or 2.
177 | 1. Menu
178 | 2. Exit
179 | Command: """)
180 | while user_input not in ans:
181 | print("Error: Please enter a a valid option 1-2")
182 | user_input=input("Command: ")
183 | if user_input=="1":
184 | menu()
185 | elif user_input=="2":
186 | exit()
187 |
188 |
189 |
190 |
191 | #***********************************************************************************************************************#
192 | #******************************************************* Menu **********************************************************#
193 | #***********************************************************************************************************************#
194 | def menu():
195 | ans = ['1', '2', '3', '4', '0']
196 | print("""
197 |
198 | MENU
199 | PREPROCESSING DATASET
200 | ---------------------------
201 | 1. Rescaled Data
202 | 2. Binarize Data
203 | 3. Standardize Data
204 | 4. Beginning Menu
205 | 0. Exit the Program
206 | """)
207 | user_input = input("Command (0-3): ")
208 | while user_input not in ans:
209 | print("Error: Please enter a valid option 0-3")
210 | user_input=input("Command: ")
211 | if user_input == '1':
212 | Rescaled_Dataset()
213 | elif user_input == '2':
214 | Binarize_Dataset()
215 | elif user_input == '3':
216 | Standardize_Dataset()
217 | elif user_input == "4":
218 | beginning()
219 | elif user_input == "0":
220 | exit()
221 |
222 |
223 | #***********************************************************************************************************************#
224 | #*************************************************** Start of Program **************************************************#
225 | #***********************************************************************************************************************#
226 | def beginning():
227 | print()
228 | print("----------Welcome to Preprocessing Dataset--------")
229 | print("""
230 | Please choose option 1 or 2
231 |
232 | 1. Menu
233 | 2. Exit Program
234 | ---------------------------------------------""")
235 | ans = ['1', '2']
236 | user_input=input("What is your Option?: ")
237 | while user_input not in ans:
238 | print("Error: Please enter a a valid option 1-2")
239 | user_input=input("Command: ")
240 | if user_input=="1":
241 | menu()
242 | elif user_input=="2":
243 | exit()
244 |
245 |
246 | #***********************************************************************************************************************#
247 | beginning()
--------------------------------------------------------------------------------
/Stock_Apps/Stock_ML_Predict_Apps.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Sat May 25 14:21:27 2019
4 |
5 | @author: Tin
6 | """
7 | import numpy as np
8 | import matplotlib.pyplot as plt
9 |
10 | import datetime
11 |
12 | # Machine Learning Libraries
13 | from sklearn.linear_model import LinearRegression
14 | from sklearn.linear_model import LogisticRegression
15 | from sklearn.model_selection import train_test_split
16 | from sklearn import metrics
17 | from sklearn.model_selection import cross_val_score
18 | from sklearn.svm import SVR
19 |
20 | import warnings
21 | warnings.filterwarnings("ignore")
22 |
23 | # yahoo finance used to fetch data
24 | import yfinance as yf
25 | yf.pdr_override()
26 |
27 | options = " Stock Linear Regression Prediction, Stock Logistic Regression Prediction, Support Vector Regression, Exit".split(",")
28 |
29 | # Input Start Date
30 | def start_date():
31 | date_entry = input('Enter a starting date in MM/DD/YYYY format: ')
32 | start = datetime.datetime.strptime(date_entry,'%m/%d/%Y')
33 | start = start.strftime('%Y-%m-%d')
34 | return start
35 |
36 | # Input End Date
37 | def end_date():
38 | date_entry = input('Enter a ending date in MM/DD/YYYY format: ')
39 | end = datetime.datetime.strptime(date_entry,'%m/%d/%Y')
40 | end = end.strftime('%Y-%m-%d')
41 | return end
42 |
43 | # Input Symbols
44 | def input_symbol():
45 | symbol = input("Enter symbol: ").upper()
46 | return symbol
47 |
48 | # Logistic Regression
49 | def stock_logistic_regression():
50 | s = start_date()
51 | e = end_date()
52 | sym = input_symbol()
53 | df = yf.download(sym, s, e)
54 |
55 | df = df.drop(['Date'], axis=1)
56 | X = df.loc[:, df.columns != 'Adj Close']
57 | y = np.where (df['Adj Close'].shift(-1) > df['Adj Close'],1,-1)
58 |
59 | split = int(0.7*len(df))
60 | X_train, X_test, y_train, y_test = X[:split], X[split:], y[:split], y[split:]
61 | model = LogisticRegression()
62 | model = model.fit(X_train,y_train)
63 | predicted = model.predict(X_test)
64 | print(metrics.confusion_matrix(y_test, predicted))
65 | print(metrics.classification_report(y_test, predicted))
66 | print(model.score(X_test,y_test))
67 | cross_val = cross_val_score(LogisticRegression(), X, y, scoring='accuracy', cv=10)
68 | print(cross_val)
69 | print(cross_val.mean())
70 | return
71 |
72 | # Linear Regression
73 | def stock_linear_regression():
74 | s = start_date()
75 | e = end_date()
76 | sym = input_symbol()
77 | df = yf.download(sym, s, e)
78 | n = len(df.index)
79 | X = np.array(df['Open']).reshape(n,-1)
80 | Y = np.array(df['Adj Close']).reshape(n,-1)
81 | lr = LinearRegression()
82 | lr.fit(X, Y)
83 | lr.predict(X)
84 |
85 | plt.figure(figsize=(12,8))
86 | plt.scatter(df['Adj Close'], lr.predict(X))
87 | plt.plot(X, lr.predict(X), color = 'red')
88 | plt.xlabel('Prices')
89 | plt.ylabel('Predicted Prices')
90 | plt.grid()
91 | plt.title(sym + ' Prices vs Predicted Prices')
92 | plt.show()
93 | print('____________Summary:____________')
94 | print('Estimate intercept coefficient:', lr.intercept_)
95 | print('Number of coefficients:', len(lr.coef_))
96 | print('Accuracy Score:', lr.score(X, Y))
97 | print("")
98 | return
99 |
100 | # Support Vector Regression
101 | def stock_svr():
102 | s = start_date()
103 | e = end_date()
104 | sym = input_symbol()
105 | df = yf.download(sym, s, e)
106 | dates = np.reshape(df.index,(len(df.index), 1)) # convert to 1xn dimension
107 | x = 31
108 | x = np.reshape(x,(len(x), 1))
109 | prices = df['Adj Close']
110 | svr_lin = SVR(kernel='linear', C=1e3)
111 | svr_poly = SVR(kernel='poly', C=1e3, degree=2)
112 | svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
113 |
114 | # Fit regression model
115 | svr_lin .fit(dates, prices)
116 | svr_poly.fit(dates, prices)
117 | svr_rbf.fit(dates, prices)
118 |
119 | plt.figure(figsize=(12,8))
120 | plt.scatter(dates, prices, c='k', label='Data')
121 | plt.plot(dates, svr_lin.predict(dates), c='g', label='Linear model')
122 | plt.plot(dates, svr_rbf.predict(dates), c='r', label='RBF model')
123 | plt.plot(dates, svr_poly.predict(dates), c='b', label='Polynomial model')
124 | plt.xlabel('Date')
125 | plt.ylabel('Price')
126 | plt.title('Support Vector Regression')
127 | plt.legend()
128 | plt.show()
129 | print('____________Summary:____________')
130 | print('Linear Model:', svr_rbf.predict(x)[0])
131 | print('RBF Model:', svr_lin.predict(x)[0])
132 | print('Polynomial Model:', svr_poly.predict(x)[0])
133 | print("")
134 | return
135 |
136 |
137 | def main():
138 | run_program = True
139 | while run_program:
140 | print("__________Stock Price Prediction__________")
141 | print("Choose Options:")
142 | for i in range(1, len(options)+1):
143 | print("{} - {}".format(i, options[i-1]))
144 | choice = int(input())
145 |
146 | if choice == 1:
147 | print("____________Linear Regression_____________")
148 | stock_linear_regression()
149 | elif choice == 2:
150 | print("____________Logistic Regression_____________")
151 | stock_logistic_regression()
152 | elif choice == 3:
153 | print("____________Support Vector Regression_____________")
154 | stock_logistic_regression()
155 | elif choice == 4:
156 | run_program = False
157 |
158 |
159 | if __name__ == "__main__":
160 | main()
161 |
--------------------------------------------------------------------------------
/Stock_Apps/Stock_ML_Predict_Apps_Menu.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Fri Dec 20 19:44:59 2019
4 |
5 | @author: Tin
6 | """
7 |
8 | import numpy as np
9 | import matplotlib.pyplot as plt
10 |
11 | import datetime
12 |
13 | # Machine Learning Libraries
14 | from sklearn.linear_model import LinearRegression
15 | from sklearn.linear_model import LogisticRegression
16 | # from sklearn.model_selection import train_test_split
17 | from sklearn import metrics
18 | from sklearn.model_selection import cross_validate
19 | from sklearn.svm import SVR
20 |
21 | import warnings
22 | warnings.filterwarnings("ignore")
23 |
24 | # yahoo finance used to fetch data
25 | import yfinance as yf
26 | yf.pdr_override()
27 |
28 | options = " Stock Linear Regression Prediction, Stock Logistic Regression Prediction, Support Vector Regression, Exit".split(",")
29 |
30 | # Input Start Date
31 | def start_date():
32 | date_entry = input('Enter a starting date in MM/DD/YYYY format: ')
33 | start = datetime.datetime.strptime(date_entry,'%m/%d/%Y')
34 | start = start.strftime('%Y-%m-%d')
35 | return start
36 |
37 | # Input End Date
38 | def end_date():
39 | date_entry = input('Enter a ending date in MM/DD/YYYY format: ')
40 | end = datetime.datetime.strptime(date_entry,'%m/%d/%Y')
41 | end = end.strftime('%Y-%m-%d')
42 | return end
43 |
44 | # Input Symbols
45 | def input_symbol():
46 | symbol = input("Enter symbol: ").upper()
47 | return symbol
48 |
49 | # Logistic Regression
50 | def stock_logistic_regression():
51 | s = start_date()
52 | e = end_date()
53 | sym = input_symbol()
54 | df = yf.download(sym, s, e)
55 |
56 | df = df.drop(['Date'], axis=1)
57 | X = df.loc[:, df.columns != 'Adj Close']
58 | y = np.where (df['Adj Close'].shift(-1) > df['Adj Close'],1,-1)
59 |
60 | split = int(0.7*len(df))
61 | X_train, X_test, y_train, y_test = X[:split], X[split:], y[:split], y[split:]
62 | model = LogisticRegression()
63 | model = model.fit(X_train,y_train)
64 | predicted = model.predict(X_test)
65 | print(metrics.confusion_matrix(y_test, predicted))
66 | print(metrics.classification_report(y_test, predicted))
67 | print(model.score(X_test,y_test))
68 | cross_val = cross_validate(LogisticRegression(), X, y, scoring='accuracy', cv=10)
69 | print('_____________Summary:_____________')
70 | print(cross_val)
71 | print(cross_val.mean())
72 | print("")
73 | ans = ['1', '2']
74 | user_input=input("""
75 | What would you like to do next? Enter option 1 or 2.
76 | 1. Menu
77 | 2. Exit
78 | Command: """)
79 | while user_input not in ans:
80 | print("Error: Please enter a a valid option 1-2")
81 | user_input=input("Command: ")
82 | if user_input=="1":
83 | menu()
84 | elif user_input=="2":
85 | exit()
86 |
87 |
88 | # Linear Regression
89 | def stock_linear_regression():
90 | s = start_date()
91 | e = end_date()
92 | sym = input_symbol()
93 | df = yf.download(sym, s, e)
94 | n = len(df.index)
95 | X = np.array(df['Open']).reshape(n,-1)
96 | Y = np.array(df['Adj Close']).reshape(n,-1)
97 | lr = LinearRegression()
98 | lr.fit(X, Y)
99 | lr.predict(X)
100 |
101 | plt.figure(figsize=(12,8))
102 | plt.scatter(df['Adj Close'], lr.predict(X))
103 | plt.plot(X, lr.predict(X), color = 'red')
104 | plt.xlabel('Prices')
105 | plt.ylabel('Predicted Prices')
106 | plt.grid()
107 | plt.title(sym + ' Prices vs Predicted Prices')
108 | plt.show()
109 | print('_____________Summary:_____________')
110 | print('Estimate intercept coefficient:', lr.intercept_)
111 | print('Number of coefficients:', len(lr.coef_))
112 | print('Accuracy Score:', lr.score(X, Y))
113 | print("")
114 | ans = ['1', '2']
115 | user_input=input("""
116 | What would you like to do next? Enter option 1 or 2.
117 | 1. Menu
118 | 2. Exit
119 | Command: """)
120 | while user_input not in ans:
121 | print("Error: Please enter a a valid option 1-2")
122 | user_input=input("Command: ")
123 | if user_input=="1":
124 | menu()
125 | elif user_input=="2":
126 | exit()
127 |
128 |
129 | # Support Vector Regression
130 | def stock_svr():
131 | s = start_date()
132 | e = end_date()
133 | sym = input_symbol()
134 | df = yf.download(sym, s, e)
135 | dates = np.reshape(df.index,(len(df.index), 1)) # convert to 1xn dimension
136 | x = 31
137 | x = np.reshape(x,(len(x), 1))
138 | prices = df['Adj Close']
139 | svr_lin = SVR(kernel='linear', C=1e3)
140 | svr_poly = SVR(kernel='poly', C=1e3, degree=2)
141 | svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
142 |
143 | # Fit regression model
144 | svr_lin .fit(dates, prices)
145 | svr_poly.fit(dates, prices)
146 | svr_rbf.fit(dates, prices)
147 |
148 | plt.figure(figsize=(12,8))
149 | plt.scatter(dates, prices, c='k', label='Data')
150 | plt.plot(dates, svr_lin.predict(dates), c='g', label='Linear model')
151 | plt.plot(dates, svr_rbf.predict(dates), c='r', label='RBF model')
152 | plt.plot(dates, svr_poly.predict(dates), c='b', label='Polynomial model')
153 | plt.xlabel('Date')
154 | plt.ylabel('Price')
155 | plt.title('Support Vector Regression')
156 | plt.legend()
157 | plt.show()
158 | print('_____________Summary:_____________')
159 | print('Linear Model:', svr_rbf.predict(x)[0])
160 | print('RBF Model:', svr_lin.predict(x)[0])
161 | print('Polynomial Model:', svr_poly.predict(x)[0])
162 | print("")
163 | ans = ['1', '2']
164 | user_input=input("""
165 | What would you like to do next? Enter option 1 or 2.
166 | 1. Menu
167 | 2. Exit
168 | Command: """)
169 | while user_input not in ans:
170 | print("Error: Please enter a a valid option 1-2")
171 | user_input=input("Command: ")
172 | if user_input=="1":
173 | menu()
174 | elif user_input=="2":
175 | exit()
176 |
177 |
178 | #***********************************************************************************************************************#
179 | #******************************************************* Menu **********************************************************#
180 | #***********************************************************************************************************************#
181 | def menu():
182 | ans = ['1', '2', '3', '4', '0']
183 | print("""
184 |
185 | MENU
186 | MACHINE LEARNING PREDICTION
187 | ---------------------------
188 | 1.Linear Regression
189 | 2.Logistic Regressions
190 | 3.Support Vector Regression
191 | 4.Beginning Menu
192 | 0.Exit the Program
193 | """)
194 | user_input = input("Command (0-3): ")
195 | while user_input not in ans:
196 | print("Error: Please enter a valid option 0-3")
197 | user_input=input("Command: ")
198 | if user_input == '1':
199 | stock_linear_regression()
200 | elif user_input == '2':
201 | stock_logistic_regression()
202 | elif user_input == '3':
203 | stock_svr()
204 | elif user_input == "4":
205 | beginning()
206 | elif user_input == "0":
207 | exit()
208 |
209 |
210 | #***********************************************************************************************************************#
211 | #*************************************************** Start of Program **************************************************#
212 | #***********************************************************************************************************************#
213 | def beginning():
214 | print()
215 | print("----------Welcome to Machine Learning Predictions--------")
216 | print("""
217 | Please choose option 1 or 2
218 |
219 | 1. Menu
220 | 2. Exit Program
221 |
222 | ---------------------------------------------""")
223 | ans = ['1', '2']
224 | user_input=input("What is your Option?: ")
225 | while user_input not in ans:
226 | print("Error: Please enter a a valid option 1-2")
227 | user_input=input("Command: ")
228 | if user_input=="1":
229 | menu()
230 | elif user_input=="2":
231 | exit()
232 |
233 |
234 | #***********************************************************************************************************************#
235 | beginning()
236 |
--------------------------------------------------------------------------------
/Stock_Apps/Stock_ML_Rescale_Data_Apps.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Sat May 25 14:21:27 2019
4 |
5 | @author: Tin
6 | """
7 | import numpy as np
8 | import pandas as pd
9 | import datetime
10 |
11 | from sklearn.preprocessing import MinMaxScaler
12 | from sklearn.preprocessing import StandardScaler
13 | from sklearn.model_selection import train_test_split
14 |
15 |
16 | import warnings
17 | warnings.filterwarnings("ignore")
18 |
19 | # yahoo finance used to fetch data
20 | import yfinance as yf
21 | yf.pdr_override()
22 |
23 | options = " Data Preprocessing, Exit".split(",")
24 |
25 | # Input Start Date
26 | def start_date():
27 | date_entry = input('Enter a starting date in MM/DD/YYYY format: ')
28 | start = datetime.datetime.strptime(date_entry,'%m/%d/%Y')
29 | start = start.strftime('%Y-%m-%d')
30 | return start
31 |
32 | # Input End Date
33 | def end_date():
34 | date_entry = input('Enter a ending date in MM/DD/YYYY format: ')
35 | end = datetime.datetime.strptime(date_entry,'%m/%d/%Y')
36 | end = end.strftime('%Y-%m-%d')
37 | return end
38 |
39 | # Input Symbols
40 | def input_symbol():
41 | symbol = input("Enter symbol: ").upper()
42 | return symbol
43 |
44 | def preprocessing_dataset():
45 | s = start_date()
46 | e = end_date()
47 | sym = input_symbol()
48 | df = yf.download(sym, s, e)
49 | array = df.values
50 | X = array[:,0:5]
51 | Y = array[:,5]
52 | # initialising the MinMaxScaler
53 | scaler = MinMaxScaler(feature_range=(0, 1))
54 | # learning the statistical parameters for each of the data and transforming
55 | rescaledX = scaler.fit_transform(X)
56 | np.set_printoptions(precision=3)
57 | print('Rescaled values between 0 to 1')
58 | print(rescaledX[0:5,:])
59 | print("")
60 | # Splitting the datasets into training sets and Test sets
61 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)
62 | sc_X = StandardScaler()
63 | # Splitting the datasets into training sets and Test sets
64 | X_train = sc_X.fit_transform(X_train)
65 | X_test = sc_X.fit_transform(X_test)
66 | print("Training Dataset")
67 | print(X_train)
68 | print("")
69 | print(Y_train)
70 | print("")
71 | print("Testing Dataset")
72 | print(X_test)
73 | print("")
74 | print(Y_test)
75 | return
76 |
77 |
78 | def main():
79 | run_program = True
80 | while run_program:
81 | print("")
82 | print("__________Preprocessing Dataset__________")
83 | print("")
84 | print("Choose Options:")
85 | print("")
86 | for i in range(1, len(options)+1):
87 | print("{} - {}".format(i, options[i-1]))
88 | choice = int(input())
89 |
90 | if choice == 1:
91 | preprocessing_dataset()
92 | elif choice == 2:
93 | run_program = False
94 |
95 |
96 | if __name__ == "__main__":
97 | main()
--------------------------------------------------------------------------------
/Tensorflow_Basics.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "source": [
6 | "# Tensorflow Basic"
7 | ],
8 | "metadata": {}
9 | },
10 | {
11 | "cell_type": "code",
12 | "source": [
13 | "import tensorflow as tf"
14 | ],
15 | "outputs": [],
16 | "execution_count": 14,
17 | "metadata": {
18 | "collapsed": false,
19 | "outputHidden": false,
20 | "inputHidden": false
21 | }
22 | },
23 | {
24 | "cell_type": "code",
25 | "source": [
26 | "x = tf.constant(2)\n",
27 | "y = tf.constant(4)"
28 | ],
29 | "outputs": [],
30 | "execution_count": 2,
31 | "metadata": {
32 | "collapsed": false,
33 | "outputHidden": false,
34 | "inputHidden": false
35 | }
36 | },
37 | {
38 | "cell_type": "code",
39 | "source": [
40 | "with tf.Session() as sess:\n",
41 | " print(\"x: %i\" % sess.run(x), \"y: %i\" % sess.run(y))\n",
42 | " print(\"Addition with constants: %i\" % sess.run(x+y))\n",
43 | " print(\"Multiplication with constants: %i\" % sess.run(x*y))"
44 | ],
45 | "outputs": [
46 | {
47 | "output_type": "stream",
48 | "name": "stdout",
49 | "text": [
50 | "x: 2 y: 4\n",
51 | "Addition with constants: 6\n",
52 | "Multiplication with constants: 8\n"
53 | ]
54 | }
55 | ],
56 | "execution_count": 3,
57 | "metadata": {
58 | "collapsed": false,
59 | "outputHidden": false,
60 | "inputHidden": false
61 | }
62 | },
63 | {
64 | "cell_type": "code",
65 | "source": [
66 | "add = tf.add(x, y)\n",
67 | "sess = tf.Session()\n",
68 | "value_of_add = sess.run(add)\n",
69 | "print(value_of_add)\n",
70 | "sess.close()"
71 | ],
72 | "outputs": [
73 | {
74 | "output_type": "stream",
75 | "name": "stdout",
76 | "text": [
77 | "6\n"
78 | ]
79 | }
80 | ],
81 | "execution_count": 4,
82 | "metadata": {
83 | "collapsed": false,
84 | "outputHidden": false,
85 | "inputHidden": false
86 | }
87 | },
88 | {
89 | "cell_type": "code",
90 | "source": [
91 | "sub = tf.subtract(x, y)\n",
92 | "sess = tf.Session()\n",
93 | "value_of_sub = sess.run(sub)\n",
94 | "print(value_of_sub)\n",
95 | "sess.close()"
96 | ],
97 | "outputs": [
98 | {
99 | "output_type": "stream",
100 | "name": "stdout",
101 | "text": [
102 | "-2\n"
103 | ]
104 | }
105 | ],
106 | "execution_count": 5,
107 | "metadata": {
108 | "collapsed": false,
109 | "outputHidden": false,
110 | "inputHidden": false
111 | }
112 | },
113 | {
114 | "cell_type": "code",
115 | "source": [
116 | "mult = tf.multiply(x, y)\n",
117 | "sess = tf.Session()\n",
118 | "value_of_mult = sess.run(mult)\n",
119 | "print(value_of_mult)\n",
120 | "sess.close()"
121 | ],
122 | "outputs": [
123 | {
124 | "output_type": "stream",
125 | "name": "stdout",
126 | "text": [
127 | "8\n"
128 | ]
129 | }
130 | ],
131 | "execution_count": 6,
132 | "metadata": {
133 | "collapsed": false,
134 | "outputHidden": false,
135 | "inputHidden": false
136 | }
137 | },
138 | {
139 | "cell_type": "code",
140 | "source": [
141 | "div = tf.divide(x, y)\n",
142 | "sess = tf.Session()\n",
143 | "value_of_div = sess.run(div)\n",
144 | "print(value_of_div)\n",
145 | "sess.close()"
146 | ],
147 | "outputs": [
148 | {
149 | "output_type": "stream",
150 | "name": "stdout",
151 | "text": [
152 | "0.5\n"
153 | ]
154 | }
155 | ],
156 | "execution_count": 7,
157 | "metadata": {
158 | "collapsed": false,
159 | "outputHidden": false,
160 | "inputHidden": false
161 | }
162 | },
163 | {
164 | "cell_type": "code",
165 | "source": [
166 | "# y = W.x + b\n",
167 | "x = tf.constant(-2.0, name=\"x\", dtype=tf.float32)\n",
168 | "W = tf.constant(8.0, name=\"W\", dtype=tf.float32)\n",
169 | "b = tf.constant(10.0, name=\"b\", dtype=tf.float32)\n",
170 | "\n",
171 | "y = tf.Variable(tf.add(tf.multiply(W, x), b))\n",
172 | "\n",
173 | "init = tf.global_variables_initializer()\n",
174 | "\n",
175 | "with tf.Session() as session:\n",
176 | " session.run(init)\n",
177 | " print(session.run(y)) "
178 | ],
179 | "outputs": [
180 | {
181 | "output_type": "stream",
182 | "name": "stdout",
183 | "text": [
184 | "-6.0\n"
185 | ]
186 | }
187 | ],
188 | "execution_count": 8,
189 | "metadata": {
190 | "collapsed": false,
191 | "outputHidden": false,
192 | "inputHidden": false
193 | }
194 | },
195 | {
196 | "cell_type": "code",
197 | "source": [
198 | "a = tf.constant(2.8)\n",
199 | "b = tf.constant(4.3)\n",
200 | "\n",
201 | "# Basic Math\n",
202 | "total = a + b\n",
203 | "diff = a - b\n",
204 | "quot = tf.div(a, b)\n",
205 | "prod = tf.multiply(a, b)\n",
206 | "\n",
207 | "# Matrix Multiplication\n",
208 | "c = tf.constant([[1,2], [3,4], [5,6]])\n",
209 | "d = tf.constant([[9,8,7], [6,5,4]])\n",
210 | "matrix_prod = tf.matmul(c, d)\n",
211 | "\n",
212 | "# Excute the Session\n",
213 | "with tf.Session() as sess:\n",
214 | " print(\"Sum: %f\" % sess.run(total))\n",
215 | " print(\"Difference: %f\" % sess.run(diff))\n",
216 | " print(\"Division: %f\" % sess.run(quot))\n",
217 | " print(\"Multiplication: %f\" % sess.run(prod))\n",
218 | " print(\"Matrix prouct: \", sess.run(matrix_prod))\n",
219 | " print(\"Round: %f\" % sess.run(tf.round(a)))\n",
220 | " print(\"Round: %f\" % sess.run(tf.round(b)))"
221 | ],
222 | "outputs": [
223 | {
224 | "output_type": "stream",
225 | "name": "stdout",
226 | "text": [
227 | "Sum: 7.100000\n",
228 | "Difference: -1.500000\n",
229 | "Division: 0.651163\n",
230 | "Multiplication: 12.040000\n",
231 | "Matrix prouct: [[21 18 15]\n",
232 | " [51 44 37]\n",
233 | " [81 70 59]]\n",
234 | "Round: 3.000000\n",
235 | "Round: 4.000000\n"
236 | ]
237 | }
238 | ],
239 | "execution_count": 9,
240 | "metadata": {}
241 | },
242 | {
243 | "cell_type": "code",
244 | "source": [
245 | "# 2-D tensor\n",
246 | "a = tf.constant([1, 2, 3, 4, 5, 6], shape=[2, 3])\n",
247 | "\n",
248 | "with tf.Session() as sess:\n",
249 | " print(\"2-D tensor:\")\n",
250 | " print(sess.run(a))"
251 | ],
252 | "outputs": [
253 | {
254 | "output_type": "stream",
255 | "name": "stdout",
256 | "text": [
257 | "2-D tensor:\n",
258 | "[[1 2 3]\n",
259 | " [4 5 6]]\n"
260 | ]
261 | }
262 | ],
263 | "execution_count": 10,
264 | "metadata": {
265 | "collapsed": false,
266 | "outputHidden": false,
267 | "inputHidden": false
268 | }
269 | },
270 | {
271 | "cell_type": "code",
272 | "source": [
273 | "# 2-D tensor\n",
274 | "b = tf.constant([7, 8, 9, 10, 11, 12], shape=[3, 2])\n",
275 | "\n",
276 | "with tf.Session() as sess:\n",
277 | " print(\"2-D tensor:\")\n",
278 | " print(sess.run(b))"
279 | ],
280 | "outputs": [
281 | {
282 | "output_type": "stream",
283 | "name": "stdout",
284 | "text": [
285 | "2-D tensor:\n",
286 | "[[ 7 8]\n",
287 | " [ 9 10]\n",
288 | " [11 12]]\n"
289 | ]
290 | }
291 | ],
292 | "execution_count": 11,
293 | "metadata": {
294 | "collapsed": false,
295 | "outputHidden": false,
296 | "inputHidden": false
297 | }
298 | },
299 | {
300 | "cell_type": "code",
301 | "source": [
302 | "c = tf.matmul(a, b)\n",
303 | "\n",
304 | "with tf.Session() as sess:\n",
305 | " print(\"a * b:\")\n",
306 | " print(sess.run(c))"
307 | ],
308 | "outputs": [
309 | {
310 | "output_type": "stream",
311 | "name": "stdout",
312 | "text": [
313 | "a * b:\n",
314 | "[[ 58 64]\n",
315 | " [139 154]]\n"
316 | ]
317 | }
318 | ],
319 | "execution_count": 12,
320 | "metadata": {
321 | "collapsed": false,
322 | "outputHidden": false,
323 | "inputHidden": false
324 | }
325 | },
326 | {
327 | "cell_type": "code",
328 | "source": [
329 | "# 3-D tensor\n",
330 | "import numpy as np\n",
331 | "x = tf.constant(np.arange(1, 13, dtype=np.int32),\n",
332 | " shape=[2, 2, 3])\n",
333 | "\n",
334 | "y = tf.constant(np.arange(13, 25, dtype=np.int32),\n",
335 | " shape=[2, 3, 2])\n",
336 | "\n",
337 | "z = tf.constant(np.arange(1, 13, dtype=np.int32),\n",
338 | " shape=[2, 2, 3])\n",
339 | "\n",
340 | "with tf.Session() as sess:\n",
341 | " print(\"3-D tensor:\")\n",
342 | " print('--------------------')\n",
343 | " print(sess.run(x))\n",
344 | " print('--------------------')\n",
345 | " print(sess.run(y))\n",
346 | " print('--------------------')\n",
347 | " print('3-D Multiplication:')\n",
348 | " print(sess.run(tf.matmul(x,y)))\n",
349 | " print('--------------------')\n",
350 | " print('Dot Product:')\n",
351 | " print(sess.run(tf.tensordot(x,z, 3)))"
352 | ],
353 | "outputs": [
354 | {
355 | "output_type": "stream",
356 | "name": "stdout",
357 | "text": [
358 | "3-D tensor:\n",
359 | "--------------------\n",
360 | "[[[ 1 2 3]\n",
361 | " [ 4 5 6]]\n",
362 | "\n",
363 | " [[ 7 8 9]\n",
364 | " [10 11 12]]]\n",
365 | "--------------------\n",
366 | "[[[13 14]\n",
367 | " [15 16]\n",
368 | " [17 18]]\n",
369 | "\n",
370 | " [[19 20]\n",
371 | " [21 22]\n",
372 | " [23 24]]]\n",
373 | "--------------------\n",
374 | "3-D Multiplication:\n",
375 | "[[[ 94 100]\n",
376 | " [229 244]]\n",
377 | "\n",
378 | " [[508 532]\n",
379 | " [697 730]]]\n",
380 | "--------------------\n",
381 | "Dot Product:\n",
382 | "650\n"
383 | ]
384 | }
385 | ],
386 | "execution_count": 13,
387 | "metadata": {
388 | "collapsed": false,
389 | "outputHidden": false,
390 | "inputHidden": false
391 | }
392 | }
393 | ],
394 | "metadata": {
395 | "kernel_info": {
396 | "name": "python3"
397 | },
398 | "language_info": {
399 | "pygments_lexer": "ipython3",
400 | "nbconvert_exporter": "python",
401 | "codemirror_mode": {
402 | "version": 3,
403 | "name": "ipython"
404 | },
405 | "version": "3.5.5",
406 | "name": "python",
407 | "file_extension": ".py",
408 | "mimetype": "text/x-python"
409 | },
410 | "kernelspec": {
411 | "name": "python3",
412 | "language": "python",
413 | "display_name": "Python 3"
414 | },
415 | "nteract": {
416 | "version": "0.15.0"
417 | }
418 | },
419 | "nbformat": 4,
420 | "nbformat_minor": 4
421 | }
--------------------------------------------------------------------------------
/Title.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LastAncientOne/Deep_Learning_Machine_Learning_Stock/e31e9fc689cd2cb6d74a8f2694750afd6e13bcec/Title.PNG
--------------------------------------------------------------------------------
/Variance_Inflation_Factor.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "source": [
6 | "# Variance inflation Factor (VIF)"
7 | ],
8 | "metadata": {
9 | "nteract": {
10 | "transient": {
11 | "deleting": false
12 | }
13 | }
14 | }
15 | },
16 | {
17 | "cell_type": "markdown",
18 | "source": [
19 | "In statistics, the variance inflation factor (VIF) is the quotient of the variance in a model with multiple terms by the variance of a model with one term alone. It quantifies the severity of multicollinearity in an ordinary least squares regression analysis. It provides an index that measures how much the variance (the square of the estimate's standard deviation) of an estimated regression coefficient is increased because of collinearity. https://en.wikipedia.org/wiki/Variance_inflation_factor"
20 | ],
21 | "metadata": {
22 | "nteract": {
23 | "transient": {
24 | "deleting": false
25 | }
26 | }
27 | }
28 | },
29 | {
30 | "cell_type": "code",
31 | "source": [
32 | "import numpy as np\n",
33 | "import matplotlib.pyplot as plt\n",
34 | "import pandas as pd\n",
35 | "\n",
36 | "import warnings\n",
37 | "warnings.filterwarnings(\"ignore\")\n",
38 | "\n",
39 | "# fetch yahoo data\n",
40 | "import yfinance as yf\n",
41 | "yf.pdr_override()"
42 | ],
43 | "outputs": [],
44 | "execution_count": 1,
45 | "metadata": {
46 | "collapsed": true,
47 | "jupyter": {
48 | "source_hidden": false,
49 | "outputs_hidden": false
50 | },
51 | "nteract": {
52 | "transient": {
53 | "deleting": false
54 | }
55 | },
56 | "execution": {
57 | "iopub.status.busy": "2020-08-09T21:00:54.254Z",
58 | "iopub.execute_input": "2020-08-09T21:00:54.261Z",
59 | "iopub.status.idle": "2020-08-09T21:00:55.369Z",
60 | "shell.execute_reply": "2020-08-09T21:00:55.394Z"
61 | }
62 | }
63 | },
64 | {
65 | "cell_type": "code",
66 | "source": [
67 | "# input\n",
68 | "symbol = 'AMD'\n",
69 | "start = '2014-01-01'\n",
70 | "end = '2018-08-27'\n",
71 | "\n",
72 | "# Read data \n",
73 | "dataset = yf.download(symbol,start,end)\n",
74 | "\n",
75 | "# Only keep close columns \n",
76 | "dataset.head()"
77 | ],
78 | "outputs": [
79 | {
80 | "output_type": "stream",
81 | "name": "stdout",
82 | "text": [
83 | "[*********************100%***********************] 1 of 1 completed\n"
84 | ]
85 | },
86 | {
87 | "output_type": "execute_result",
88 | "execution_count": 2,
89 | "data": {
90 | "text/html": "\n\n
\n \n \n | \n Adj Close | \n Close | \n High | \n Low | \n Open | \n Volume | \n
\n \n Date | \n | \n | \n | \n | \n | \n | \n
\n \n \n \n 2014-01-02 | \n 3.95 | \n 3.95 | \n 3.98 | \n 3.84 | \n 3.85 | \n 20548400 | \n
\n \n 2014-01-03 | \n 4.00 | \n 4.00 | \n 4.00 | \n 3.88 | \n 3.98 | \n 22887200 | \n
\n \n 2014-01-06 | \n 4.13 | \n 4.13 | \n 4.18 | \n 3.99 | \n 4.01 | \n 42398300 | \n
\n \n 2014-01-07 | \n 4.18 | \n 4.18 | \n 4.25 | \n 4.11 | \n 4.19 | \n 42932100 | \n
\n \n 2014-01-08 | \n 4.18 | \n 4.18 | \n 4.26 | \n 4.14 | \n 4.23 | \n 30678700 | \n
\n \n
\n
",
91 | "text/plain": " Adj Close Close High Low Open Volume\nDate \n2014-01-02 3.95 3.95 3.98 3.84 3.85 20548400\n2014-01-03 4.00 4.00 4.00 3.88 3.98 22887200\n2014-01-06 4.13 4.13 4.18 3.99 4.01 42398300\n2014-01-07 4.18 4.18 4.25 4.11 4.19 42932100\n2014-01-08 4.18 4.18 4.26 4.14 4.23 30678700"
92 | },
93 | "metadata": {}
94 | }
95 | ],
96 | "execution_count": 2,
97 | "metadata": {
98 | "collapsed": true,
99 | "jupyter": {
100 | "source_hidden": false,
101 | "outputs_hidden": false
102 | },
103 | "nteract": {
104 | "transient": {
105 | "deleting": false
106 | }
107 | },
108 | "execution": {
109 | "iopub.status.busy": "2020-08-09T21:00:55.376Z",
110 | "iopub.execute_input": "2020-08-09T21:00:55.383Z",
111 | "iopub.status.idle": "2020-08-09T21:00:56.732Z",
112 | "shell.execute_reply": "2020-08-09T21:00:56.802Z"
113 | }
114 | }
115 | },
116 | {
117 | "cell_type": "code",
118 | "source": [
119 | "dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)\n",
120 | "dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)\n",
121 | "dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)\n",
122 | "dataset['Returns'] = dataset['Adj Close'].pct_change()\n",
123 | "dataset = dataset.dropna()"
124 | ],
125 | "outputs": [],
126 | "execution_count": 3,
127 | "metadata": {
128 | "collapsed": true,
129 | "jupyter": {
130 | "source_hidden": false,
131 | "outputs_hidden": false
132 | },
133 | "nteract": {
134 | "transient": {
135 | "deleting": false
136 | }
137 | },
138 | "execution": {
139 | "iopub.status.busy": "2020-08-09T21:00:56.741Z",
140 | "iopub.execute_input": "2020-08-09T21:00:56.747Z",
141 | "iopub.status.idle": "2020-08-09T21:00:56.761Z",
142 | "shell.execute_reply": "2020-08-09T21:00:56.806Z"
143 | }
144 | }
145 | },
146 | {
147 | "cell_type": "code",
148 | "source": [
149 | "from statsmodels.stats.outliers_influence import variance_inflation_factor"
150 | ],
151 | "outputs": [],
152 | "execution_count": 4,
153 | "metadata": {
154 | "collapsed": true,
155 | "jupyter": {
156 | "source_hidden": false,
157 | "outputs_hidden": false
158 | },
159 | "nteract": {
160 | "transient": {
161 | "deleting": false
162 | }
163 | },
164 | "execution": {
165 | "iopub.status.busy": "2020-08-09T21:00:56.774Z",
166 | "iopub.execute_input": "2020-08-09T21:00:56.780Z",
167 | "iopub.status.idle": "2020-08-09T21:00:57.253Z",
168 | "shell.execute_reply": "2020-08-09T21:00:57.355Z"
169 | }
170 | }
171 | },
172 | {
173 | "cell_type": "code",
174 | "source": [
175 | "X = dataset"
176 | ],
177 | "outputs": [],
178 | "execution_count": 5,
179 | "metadata": {
180 | "collapsed": true,
181 | "jupyter": {
182 | "source_hidden": false,
183 | "outputs_hidden": false
184 | },
185 | "nteract": {
186 | "transient": {
187 | "deleting": false
188 | }
189 | },
190 | "execution": {
191 | "iopub.status.busy": "2020-08-09T21:00:57.264Z",
192 | "iopub.execute_input": "2020-08-09T21:00:57.271Z",
193 | "iopub.status.idle": "2020-08-09T21:00:57.287Z",
194 | "shell.execute_reply": "2020-08-09T21:00:57.358Z"
195 | }
196 | }
197 | },
198 | {
199 | "cell_type": "code",
200 | "source": [
201 | "vif = pd.DataFrame()\n",
202 | "vif[\"VIF Factor\"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]\n",
203 | "vif[\"features\"] = X.columns"
204 | ],
205 | "outputs": [],
206 | "execution_count": 6,
207 | "metadata": {
208 | "collapsed": true,
209 | "jupyter": {
210 | "source_hidden": false,
211 | "outputs_hidden": false
212 | },
213 | "nteract": {
214 | "transient": {
215 | "deleting": false
216 | }
217 | },
218 | "execution": {
219 | "iopub.status.busy": "2020-08-09T21:00:57.302Z",
220 | "iopub.execute_input": "2020-08-09T21:00:57.307Z",
221 | "iopub.status.idle": "2020-08-09T21:00:57.316Z",
222 | "shell.execute_reply": "2020-08-09T21:00:57.361Z"
223 | }
224 | }
225 | },
226 | {
227 | "cell_type": "code",
228 | "source": [
229 | "vif"
230 | ],
231 | "outputs": [
232 | {
233 | "output_type": "execute_result",
234 | "execution_count": 7,
235 | "data": {
236 | "text/html": "\n\n
\n \n \n | \n VIF Factor | \n features | \n
\n \n \n \n 0 | \n inf | \n Adj Close | \n
\n \n 1 | \n inf | \n Close | \n
\n \n 2 | \n 1.321329e+04 | \n High | \n
\n \n 3 | \n 9.797402e+03 | \n Low | \n
\n \n 4 | \n 8.486690e+03 | \n Open | \n
\n \n 5 | \n 8.249018e+00 | \n Volume | \n
\n \n 6 | \n 1.718758e+00 | \n Increase_Decrease | \n
\n \n 7 | \n 2.503856e+00 | \n Buy_Sell_on_Open | \n
\n \n 8 | \n 1.869328e+00 | \n Buy_Sell | \n
\n \n 9 | \n 1.946329e+00 | \n Returns | \n
\n \n
\n
",
237 | "text/plain": " VIF Factor features\n0 inf Adj Close\n1 inf Close\n2 1.321329e+04 High\n3 9.797402e+03 Low\n4 8.486690e+03 Open\n5 8.249018e+00 Volume\n6 1.718758e+00 Increase_Decrease\n7 2.503856e+00 Buy_Sell_on_Open\n8 1.869328e+00 Buy_Sell\n9 1.946329e+00 Returns"
238 | },
239 | "metadata": {}
240 | }
241 | ],
242 | "execution_count": 7,
243 | "metadata": {
244 | "collapsed": true,
245 | "jupyter": {
246 | "source_hidden": false,
247 | "outputs_hidden": false
248 | },
249 | "nteract": {
250 | "transient": {
251 | "deleting": false
252 | }
253 | },
254 | "execution": {
255 | "iopub.status.busy": "2020-08-09T21:00:57.323Z",
256 | "iopub.execute_input": "2020-08-09T21:00:57.329Z",
257 | "iopub.status.idle": "2020-08-09T21:00:57.341Z",
258 | "shell.execute_reply": "2020-08-09T21:00:57.366Z"
259 | }
260 | }
261 | }
262 | ],
263 | "metadata": {
264 | "kernel_info": {
265 | "name": "python3"
266 | },
267 | "language_info": {
268 | "file_extension": ".py",
269 | "name": "python",
270 | "nbconvert_exporter": "python",
271 | "version": "3.5.5",
272 | "mimetype": "text/x-python",
273 | "codemirror_mode": {
274 | "version": 3,
275 | "name": "ipython"
276 | },
277 | "pygments_lexer": "ipython3"
278 | },
279 | "kernelspec": {
280 | "argv": [
281 | "C:\\Users\\Tin Hang\\Anaconda3\\envs\\py35\\python.exe",
282 | "-m",
283 | "ipykernel_launcher",
284 | "-f",
285 | "{connection_file}"
286 | ],
287 | "display_name": "Python 3",
288 | "language": "python",
289 | "name": "python3"
290 | },
291 | "nteract": {
292 | "version": "0.24.1"
293 | }
294 | },
295 | "nbformat": 4,
296 | "nbformat_minor": 0
297 | }
--------------------------------------------------------------------------------