├── .gitignore
├── LICENSE
├── MPT_portfolio_optimisation_input_target_return_targetvariance.py
├── README.md
├── Time_Series_Stationarity_Tests1.py
├── lstm_stockprice_forecast.py
├── portfolio_allocation.py
└── portfolio_getyahoodata.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Hexal
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MPT_portfolio_optimisation_input_target_return_targetvariance.py:
--------------------------------------------------------------------------------
 1 | import math, numpy
 2 | def calc_min_variance_portfolio(return_vector, stddev_vector, correlation_matrix, target_return):
 3 |     """
 4 |     Given return, variance, and correlation data on multiple assets and a target portfolio
 5 |     return, calculate the minimum variance portfolio that achieves the target_return, if possible.
 6 | 
 7 |     return_vector: vector of returns
 8 |     stddev_vector: vector of standard deviations of returns
 9 |     correlation_matrix: correlation matrix
10 |     target_return: target portfolio return
11 |     returns: (portfolio allocations, portfolio variance)
12 | 
13 |     Short positions are indicated as negative allocations
14 |     """
15 |     MU = return_vector
16 |     R = correlation_matrix
17 |     m = target_return
18 |     S = numpy.matrix(numpy.diagflat(stddev_vector))
19 |     COV = S * R * S
20 |     ONE = numpy.matrix((1,)*COV.shape[0]).T
21 |     A = ONE.T * COV.I * ONE
22 |     a = float(A)
23 |     B = MU.T * COV.I * ONE
24 |     b = float(B)
25 |     C = MU.T * COV.I * MU
26 |     c = float(C)
27 |     LAMBDA = (a*m-b)/(a*c-(b*b))
28 |     GAMMA = ((c-b*m)/((a*c)-(b*b)))
29 |     WSTAR = COV.I * ((LAMBDA * MU) + (GAMMA * ONE))
30 |     STDDEV = math.sqrt(WSTAR.T * COV * WSTAR)
31 |     return WSTAR, STDDEV
32 | 
33 | def calc_max_return_portfolio(return_vector, stddev_vector, correlation_matrix, target_variance):
34 |     """
35 |     Given return, variance, and correlation data on multiple assets and a target portfolio
36 |     variance, calculate the maximum return portfolio that achieves the target variance, if possible.
37 | 
38 |     return_vector: vector of returns
39 |     stddev_vector: vector of standard deviations of returns
40 |     correlation_matrix: correlation matrix
41 |     target_variance: target portfolio variance
42 |     returns: (portfolio allocations, portfolio variance, portfolio return)
43 | 
44 |     Short positions are indicated as negative allocations
45 |     """
46 |     last_return = None
47 |     last_allocation = None
48 |     last_stddev = None
49 |     target_return = float(min(return_vector))
50 |     while target_return <= float(max(return_vector)):
51 |         this_allocation, this_stddev = calc_min_variance_portfolio(return_vector, stddev_vector, correlation_matrix, target_return)
52 |         if this_stddev > target_variance:
53 |             return (last_allocation, last_stddev, last_return)
54 |         last_allocation = this_allocation
55 |         last_stddev = this_stddev
56 |         last_return = target_return
57 |         target_return += .0005 # TODO: linear search, not ideal (try Newton Raphson instead?)
58 |     return (None, None, None)
59 | 
60 | if __name__ == "__main__":
61 |     return_vector = numpy.matrix((0.05, 0.07, 0.15, 0.27)).T
62 |     stddev_vector = numpy.matrix((0.07, 0.12, 0.30, 0.60)).T
63 |     correlation_matrix = numpy.matrix(((1.0, 0.8, 0.5, 0.4),
64 |                                        (0.8, 1.0, 0.7, 0.5),
65 |                                        (0.5, 0.7, 1.0, 0.8),
66 |                                        (0.4, 0.5, 0.8, 1.0)))
67 | 
68 |     target_return = .125
69 |     allocations, stddev = calc_min_variance_portfolio(return_vector, stddev_vector,
70 |                                 correlation_matrix, target_return)
71 |     print "scenario 1 - optimize portfolio for target return"
72 |     print "target return: %.2f%%" % (target_return * 100.0)
73 |     print "min variance portfolio:"
74 |     print allocations
75 |     print "portfolio std deviation: %.2f%%" % (stddev * 100.0)
76 | 
77 |     print "-" * 40
78 | 
79 |     target_variance = .15
80 |     allocations, stddev, rtn = calc_max_return_portfolio(return_vector, stddev_vector,
81 |                                                          correlation_matrix, target_variance)
82 |     print "scenario 2 - optimize portfolio for target variance"
83 |     print "target variance: %.2f%%" % (target_variance * 100.0)
84 |     print "max return:"
85 |     print allocations
86 |     print "portfolio std deviation: %.2f%%" % (stddev * 100.0)
87 |     print "portfolio return: %.2f%%" % (rtn * 100.0)
88 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Python Quantitative Finance Research
 2 | 
 3 | Python based Quant Finance Models, Tools and Algorithmic Application and Decision Making
 4 | 
 5 | A repository of Python based code for:
 6 | 
 7 |  - Creating econometric market models and hypotheses
 8 |     - modelling, testing and analysing market hypotheses
 9 |         - identifying risk weighted market opportunities
10 |            - identifying and applying statistically robust strategies
11 |                - backtesting trading strategies and performance analytics 
12 |                   - portfolio allocation and optimisation
13 |                      - applying risk and performance management metrics  
14 |                   
15 |   
16 |   Portfolio Allocation - Get yahoo data, sort, analyse and allocate portfolio by quadratic optimisation.
17 |   
18 |   Portfolio Optimisation - Target return and Target Variance
19 |   
20 |   Time Series Analysis
21 |         
22 | LSTM stock price forecasting and prediction
23 | 


--------------------------------------------------------------------------------
/Time_Series_Stationarity_Tests1.py:
--------------------------------------------------------------------------------
  1 |  ##One of the key trading concepts in the quantitative toolbox is that of mean reversion.
  2 |  ##This process refers to a time series that displays a tendency to revert to its historical mean value.
  3 |  ##Mathematically, such a (continuous) time series is referred to as an Ornstein-Uhlenbeck process.
  4 |  ##This is in contrast to a random walk (Brownian motion), which has no "memory" of where it has been at
  5 |  ##each particular instance of time. The mean-reverting property of a time series can be exploited in
  6 |  ##order to produce profitable trading strategies.
  7 | 
  8 | ## We must carry out statistical tests to identify mean reversion.
  9 | ## The first step is to test for stationarity
 10 | 
 11 | ##Testing for Mean Reversion
 12 | ##A continuous mean-reverting time series can be represented
 13 | ##by an Ornstein-Uhlenbeck stochastic differential equation:
 14 | 
 15 | ##  dxt=θ(μ−xt)dt+σdWt
 16 | 
 17 | ## xt is the price of the asset under investigation at time period t
 18 | ##θ is the rate of reversion to the mean,
 19 | ## μ is the mean value of the process,
 20 | ## σ is the variance of the process
 21 | ## Wt is a Wiener Process or Brownian Motion.
 22 | 
 23 | ##In a discrete setting the equation states that the change of the price series
 24 | ##in the next time period is proportional to the difference between the mean
 25 | ##price and the current price, with the addition of Gaussian noise.
 26 | 
 27 | ## Test for stationarity STEP 1  - Augmented Dickey-Fuller Test
 28 | 
 29 | ## We test for the presence of a unit root in an autoregressive time series sample.
 30 | ## Intuitively we know that that if a price series possesses mean reversion,
 31 | ## then the next price level will be proportional to the current price level.
 32 | ## A linear lag model of order p is used for the time series:
 33 | 
 34 | ## Δyt=α+βt+γyt−1+δ1Δyt−1+⋯+δp−1Δyt−p+1+ϵt
 35 | 
 36 | 
 37 | # α is a constant,
 38 | # β represents the coefficient of a temporal trend
 39 | # Δyt=y(t)−y(t−1)Δyt=y(t)−y(t−1).
 40 | 
 41 | ## The ADF hypothesis test checks against the null hypothesis that γ=0,
 42 | ## ie.  α=β=0α=β=0 - that the process is a random walk and thus non mean reverting.
 43 | 
 44 | ##If the hypothesis that γ=0 can be rejected then the following
 45 | ##movement of the price series is proportional to the current price and thus it is unlikely to be a random walk.
 46 | 
 47 | ## Data Series:
 48 | ## Google price series from 2000-01-01 to 2013-01-01
 49 | ## Google price series from 2000-01-01 to 2013-01-01
 50 | 
 51 | ## ADF TEST ##
 52 | 
 53 | # Import the Time Series library
 54 | import statsmodels.tsa.stattools as ts
 55 | 
 56 | # Import Datetime and the Pandas DataReader
 57 | from datetime import datetime
 58 | from pandas.io.data import DataReader
 59 | 
 60 | # Download the Google OHLCV data from 1/1/2000 to 1/1/2013
 61 | goog = DataReader("GOOG", "yahoo", datetime(2000,1,1), datetime(2013,1,1))
 62 | 
 63 | # Output the results of the Augmented Dickey-Fuller test for Google
 64 | # with a lag order value of 1
 65 | ts.adfuller(goog['Adj Close'], 1)
 66 | #Here is the output of the Augmented Dickey-Fuller test for Google over the period.
 67 | #The first value is the calculated test-statistic, while the second value is the p-value.
 68 | #The fourth is the number of data points in the sample. The fifth value, the dictionary,
 69 | #contains the critical values of the test-statistic at the 1, 5 and 10 percent values respectively.
 70 | 
 71 | '''(-2.1900105430326064,
 72 |  0.20989101040060731,
 73 |  0,
 74 |  2106,
 75 |  {'1%': -3.4334588739173006,
 76 |   '10%': -2.5675011176676956,
 77 |   '5%': -2.8629133710702983},
 78 |  15436.871010333041)
 79 | '''
 80 |  ##Since the calculated value of the test statistic is larger than any of the critical
 81 |  ##values at the 1, 5 or 10 percent levels, we cannot reject the null hypothesis
 82 |  ##and thus we are unlikely to have found a mean reverting time series.
 83 | 
 84 | ##An alternative means of identifying a mean reverting time series is provided by the concept of stationarity,
 85 | 
 86 | ##Testing for Stationarity STEP 2 - HURST EXPONENT
 87 | 
 88 | ##A time series (or stochastic process) is defined to be strongly stationary if
 89 | ##its joint probability distribution is invariant under translations in time or space.
 90 | ##In particular, and of key importance for traders, the mean and variance of the process
 91 | ##do not change over time or space and they each do not follow a trend.
 92 | 
 93 | ##A critical feature of stationary price series is that the prices within the series
 94 | ##diffuse from their initial value at a rate slower than that of a Geometric Brownian Motion.
 95 | ##By measuring the rate of this diffusive behaviour we can identify the nature of the time series.
 96 | 
 97 | ##We will use the Hurst Exponent, which helps us to characterise the stationarity of a time series.
 98 | 
 99 | 
100 | from numpy import cumsum, log, polyfit, sqrt, std, subtract
101 | from numpy.random import randn
102 | 
103 | def hurst(ts):
104 | 	"""Returns the Hurst Exponent of the time series vector ts"""
105 | 	# Create the range of lag values
106 | 	lags = range(2, 100)
107 | 
108 | 	# Calculate the array of the variances of the lagged differences
109 | 	tau = [sqrt(std(subtract(ts[lag:], ts[:-lag]))) for lag in lags]
110 | 
111 | 	# Use a linear fit to estimate the Hurst Exponent
112 | 	poly = polyfit(log(lags), log(tau), 1)
113 | 
114 | 	# Return the Hurst exponent from the polyfit output
115 | 	return poly[0]*2.0
116 | 
117 | # Create a Gometric Brownian Motion, Mean-Reverting and Trending Series
118 | gbm = log(cumsum(randn(100000))+1000)
119 | mr = log(randn(100000)+1000)
120 | tr = log(cumsum(randn(100000)+1)+1000)
121 | 
122 | # Output the Hurst Exponent for each of the above series
123 | # and the price of Google (the Adjusted Close price) for
124 | # the ADF test given above in the article
125 | print "Hurst(GBM):   %s" % hurst(gbm)
126 | print "Hurst(MR):    %s" % hurst(mr)
127 | print "Hurst(TR):    %s" % hurst(tr)
128 | 
129 | # Assuming you have run the above code to obtain 'goog'!
130 | print "Hurst(GOOG):  %s" % hurst(goog['Adj Close'])
131 | #The output from the Hurst Exponent Python code is given below:
132 | 
133 | '''Hurst(GBM):   0.500606209426
134 | Hurst(MR):    0.000313348900533
135 | Hurst(TR):    0.947502376783
136 | Hurst(GOOG):  0.507880122614
137 | '''
138 | 
139 | '''From this output we can see that the Geometric Brownian Motion posssesses a Hurst Exponent,
140 | HH, that is almost exactly 0.5. The mean reverting series has HH almost equal to zero,
141 | while the trending series has HH close to 1.
142 | 
143 | Interestingly, Google has HH also nearly equal to 0.5 indicating that it is
144 | extremely close to a geometric random walk (at least for the sample period we're making use of).
145 | 
146 | While we now have a means of characterising the nature of a price time series,
147 | we have yet to discuss how statistically significant this value of HH is.
148 | We need to be able to determine if we can reject the null hypothesis that H=0.5H=0.5 to
149 |  ascertain mean reverting or trending behaviour.
150 | '''
151 | 


--------------------------------------------------------------------------------
/lstm_stockprice_forecast.py:
--------------------------------------------------------------------------------
  1 | ##Overview
  2 | 
  3 | ##We use an LSTM neural network to predict the closing price of the S&P 500 using a dataset of past prices.
  4 | 
  5 | ##Dependencies  - keras and tensorflow
  6 | 
  7 | 
  8 | import time
  9 | import warnings
 10 | import numpy as np
 11 | from numpy import newaxis
 12 | from keras.layers.core import Dense, Activation, Dropout
 13 | from keras.layers.recurrent import LSTM
 14 | from keras.models import Sequential
 15 | import matplotlib.pyplot as plt
 16 | 
 17 | 
 18 | warnings.filterwarnings("ignore")
 19 | 
 20 | def plot_results_multiple(predicted_data, true_data, prediction_len):
 21 |     fig = plt.figure(facecolor='white')
 22 |     ax = fig.add_subplot(111)
 23 |     ax.plot(true_data, label='True Data')
 24 |     print 'yo'
 25 |     #Pad the list of predictions to shift it in the graph to it's correct start
 26 |     for i, data in enumerate(predicted_data):
 27 |         padding = [None for p in xrange(i * prediction_len)]
 28 |         plt.plot(padding + data, label='Prediction')
 29 |         plt.legend()
 30 |     plt.show()
 31 | 
 32 | def load_data(filename, seq_len, normalise_window):
 33 |     f = open(filename, 'r').read()
 34 |     data = f.split('\n')
 35 | 
 36 |     sequence_length = seq_len + 1
 37 |     result = []
 38 |     for index in range(len(data) - sequence_length):
 39 |         result.append(data[index: index + sequence_length])
 40 | 
 41 |     if normalise_window:
 42 |         result = normalise_windows(result)
 43 | 
 44 |     result = np.array(result)
 45 | 
 46 |     row = round(0.9 * result.shape[0])
 47 |     train = result[:int(row), :]
 48 |     np.random.shuffle(train)
 49 |     x_train = train[:, :-1]
 50 |     y_train = train[:, -1]
 51 |     x_test = result[int(row):, :-1]
 52 |     y_test = result[int(row):, -1]
 53 | 
 54 |     x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
 55 |     x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
 56 | 
 57 |     return [x_train, y_train, x_test, y_test]
 58 | 
 59 | def normalise_windows(window_data):
 60 |     normalised_data = []
 61 |     for window in window_data:
 62 |         normalised_window = [((float(p) / float(window[0])) - 1) for p in window]
 63 |         normalised_data.append(normalised_window)
 64 |     return normalised_data
 65 | 
 66 | def build_model(layers):
 67 |     model = Sequential()
 68 | 
 69 |     model.add(LSTM(
 70 |         input_dim=layers[0],
 71 |         output_dim=layers[1],
 72 |         return_sequences=True))
 73 |     model.add(Dropout(0.2))
 74 | 
 75 |     model.add(LSTM(
 76 |         layers[2],
 77 |         return_sequences=False))
 78 |     model.add(Dropout(0.2))
 79 | 
 80 |     model.add(Dense(
 81 |         output_dim=layers[3]))
 82 |     model.add(Activation("linear"))
 83 | 
 84 |     start = time.time()
 85 |     model.compile(loss="mse", optimizer="rmsprop")
 86 |     print "Compilation Time : ", time.time() - start
 87 |     return model
 88 | 
 89 | def predict_point_by_point(model, data):
 90 |     #Predict each timestep given the last sequence of true data, in effect only predicting 1 step ahead each time
 91 |     predicted = model.predict(data)
 92 |     predicted = np.reshape(predicted, (predicted.size,))
 93 |     return predicted
 94 | 
 95 | def predict_sequence_full(model, data, window_size):
 96 |     #Shift the window by 1 new prediction each time, re-run predictions on new window
 97 |     curr_frame = data[0]
 98 |     predicted = []
 99 |     for i in xrange(len(data)):
100 |         predicted.append(model.predict(curr_frame[newaxis,:,:])[0,0])
101 |         curr_frame = curr_frame[1:]
102 |         curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)
103 |     return predicted
104 | 
105 | def predict_sequences_multiple(model, data, window_size, prediction_len):
106 |     #Predict sequence of 50 steps before shifting prediction run forward by 50 steps
107 |     prediction_seqs = []
108 |     for i in xrange(len(data)/prediction_len):
109 |         curr_frame = data[i*prediction_len]
110 |         predicted = []
111 |         for j in xrange(prediction_len):
112 |             predicted.append(model.predict(curr_frame[newaxis,:,:])[0,0])
113 |             curr_frame = curr_frame[1:]
114 |             curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)
115 |         prediction_seqs.append(predicted)
116 |     return prediction_seqs
117 | 


--------------------------------------------------------------------------------
/portfolio_allocation.py:
--------------------------------------------------------------------------------
 1 | ### Portfolio Allocation
 2 | 
 3 | # Finds an optimal allocation of stocks in a portfolio,
 4 | # satisfying a minimum expected return.
 5 | # The problem is posed as a Quadratic Program, and solved
 6 | # using the cvxopt library.
 7 | # Uses actual past stock data, obtained using the stocks module.
 8 | 
 9 | from cvxopt import matrix, solvers
10 | import stocks
11 | import numpy
12 | 
13 | # solves the QP, where x is the allocation of the portfolio:
14 | # minimize   x'Px + q'x
15 | # subject to Gx <= h
16 | #            Ax == b
17 | #
18 | # Input:  n       - # of assets
19 | #         avg_ret - nx1 matrix of average returns
20 | #         covs    - nxn matrix of return covariance
21 | #         r_min   - the minimum expected return that you'd
22 | #                   like to achieve
23 | # Output: sol - cvxopt solution object
24 | def optimize_portfolio(n, avg_ret, covs, r_min):
25 | 	P = covs
26 | 	# x = variable(n)
27 | 	q = matrix(numpy.zeros((n, 1)), tc='d')
28 | 	# inequality constraints Gx <= h
29 | 	# captures the constraints (avg_ret'x >= r_min) and (x >= 0)
30 | 	G = matrix(numpy.concatenate((
31 | 		-numpy.transpose(numpy.array(avg_ret)),
32 | 		-numpy.identity(n)), 0))
33 | 	h = matrix(numpy.concatenate((
34 | 		-numpy.ones((1,1))*r_min,
35 | 		numpy.zeros((n,1))), 0))
36 | 	# equality constraint Ax = b; captures the constraint sum(x) == 1
37 | 	A = matrix(1.0, (1,n))
38 | 	b = matrix(1.0)
39 | 	sol = solvers.qp(P, q, G, h, A, b)
40 | 	return sol
41 | 
42 | ### setup the parameters
43 | symbols = ['GOOG', 'AIMC', 'CE', 'BH', 'AHGP', 'AB', 'HLS', 'BKH', 'LUV']
44 | # pull data from this date range
45 | start   = '1/1/2010'
46 | end     = '1/1/2016'
47 | n       = len(symbols)
48 | # average yearly return for each stock
49 | avg_ret = matrix(map(lambda s: stocks.avg_return(s, start, end, 'y'), symbols))
50 | # covariance of asset returns
51 | covs    = matrix(numpy.array(stocks.cov_matrix(symbols, start, end, 'y')))
52 | # minimum expected return threshold
53 | r_min   = 0.10
54 | 
55 | ### solve
56 | solution = optimize_portfolio(n, avg_ret, covs, r_min)
57 | 
58 | print solution['x']
59 | 


--------------------------------------------------------------------------------
/portfolio_getyahoodata.py:
--------------------------------------------------------------------------------
  1 | # A Python module for retrieving stock data using the
  2 | # yahoo finance API
  3 | 
  4 | # requires the following installations:
  5 | #pip install cvxopt
  6 | #pip install numpy
  7 | #pip install scipy
  8 | #pip install pandas
  9 | #pip install patsy
 10 | #pip install statsmodels
 11 | 
 12 | import csv
 13 | import urllib2
 14 | from collections import defaultdict
 15 | import numpy
 16 | from statsmodels.stats.correlation_tools import cov_nearest
 17 | 
 18 | # Retrieves the stock quote for the given symbol
 19 | # from Yahoo Finance as a float.
 20 | # Input:  symbol - stock symbol as a string
 21 | # Output: price  - latest trade price from yahoo finance
 22 | 
 23 | def get_stock_quote(symbol):
 24 | 	BASE_URL = 'http://download.finance.yahoo.com/d/quotes.csv?s='
 25 | 	ID = symbol
 26 | 	close_prop = '&f=l1'
 27 | 	SUFFIX = '&e=.csv'
 28 | 	url = "%s%s%s%s" % (BASE_URL, ID, close_prop, SUFFIX)
 29 | 	price = float(urllib2.urlopen(url).read().strip())
 30 | 	return price
 31 | 
 32 | # Downloads the stock history for the given symbol,
 33 | # for the given date range, as a csv file.
 34 | # Input: symbol   - stock symbol as a string
 35 | #        start    - start date in the form 'mm/dd/yyyy'
 36 | #        end      - end date in the form 'mm/dd/yyyy'
 37 | #        outfile  - output filename, e.g. 'out.csv'
 38 | #        interval - trading interval; either d, w, m (daily, weekly, monthl7)
 39 | 
 40 | def csv_quote_history(symbol, start, end, outfile, interval='d'):
 41 | 	response = _quote_history(symbol, start, end, interval)
 42 | 	with open(outfile, 'wb') as f:
 43 | 		csv_reader = csv.reader(response)
 44 | 		csv_writer = csv.writer(f)
 45 | 		for row in csv_reader:
 46 | 			csv_writer.writerow(row)
 47 | 
 48 | # Gives the stock history for the given symbol,
 49 | # for the given date range, as a dictionary.
 50 | # Output: keys: ['High', 'Adj Close', 'Volume', 'Low', 'Date', 'Close', 'Open']
 51 | #         values: list
 52 | def quote_history_dict(symbol, start, end, interval='m'):
 53 | 	history = defaultdict(lambda: [])
 54 | 	response = _quote_history(symbol, start, end, interval)
 55 | 	dreader = csv.DictReader(response)
 56 | 	for row in dreader:
 57 | 		for key in row.iterkeys():
 58 | 			history[key].insert(0, row[key])
 59 | 	return history
 60 | 
 61 | def _quote_history(symbol, start, end, interval):
 62 | 	BASE_URL = 'http://ichart.yahoo.com/table.csv?s='
 63 | 	ID = symbol
 64 | 	sm, sd, sy = start.split('/')
 65 | 	em, ed, ey = end.split('/')
 66 | 	url = "%s%s&a=%d&b=%d&c=%d&d=%d&e=%d&f=%d&g=%s" % (BASE_URL, ID, (int(sm)-1), int(sd), int(sy), (int(em)-1), int(ed), int(ey), interval)
 67 | 	response = urllib2.urlopen(url)
 68 | 	return response
 69 | 
 70 | def get_prices(symbol, start, end, interval='m'):
 71 | 	history = quote_history_dict(symbol, start, end, interval)
 72 | 	prices = map(lambda x: round(float(x),2), history['Close'])
 73 | 	prices[0] = round(float(history['Open'][0]),2)
 74 | 	return prices
 75 | 
 76 | def get_returns(symbol, start, end, interval='m'):
 77 | 	history = quote_history_dict(symbol, start, end, interval)
 78 | 	prices = map(lambda x: round(float(x),2), history['Close'])
 79 | 	prices[0] = round(float(history['Open'][0]),2)
 80 | 	returns = map(lambda (x, y): (y/x)-1, zip(prices[0:-1], prices[1:]))
 81 | 	return returns
 82 | 
 83 | def get_yr_returns(symbol, start, end):
 84 | 	history = quote_history_dict(symbol, start, end, 'm')
 85 | 	prices = map(lambda x: round(float(x),2), history['Close'])
 86 | 	prices[0] = round(float(history['Open'][0]),2)
 87 | 	prices.insert(0, prices[0])
 88 | 	returns = map(lambda (x, y): (y/x)-1, zip(prices[0::12][:-1], prices[12::12]))
 89 | 	return returns
 90 | 
 91 | def avg_return(symbol, start, end, interval='m'):
 92 | 	if interval=='y':
 93 | 		return numpy.mean(get_yr_returns(symbol, start, end))
 94 | 	else:
 95 | 		return numpy.mean(get_returns(symbol, start, end, interval))
 96 | 
 97 | def cov_matrix(symbols, start, end, interval='m'):
 98 | 	if interval=='y':
 99 | 		data = [numpy.array(get_yr_returns(s, start, end)) for s in symbols]
100 | 	else:
101 | 		data = [numpy.array(get_returns(s, start, end, interval)) for s in symbols]
102 | 	x = numpy.array(data)
103 | 	return cov_nearest(numpy.cov(x))
104 | 


--------------------------------------------------------------------------------