├── .gitignore ├── LICENSE ├── MPT_portfolio_optimisation_input_target_return_targetvariance.py ├── README.md ├── Time_Series_Stationarity_Tests1.py ├── lstm_stockprice_forecast.py ├── portfolio_allocation.py └── portfolio_getyahoodata.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Hexal 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MPT_portfolio_optimisation_input_target_return_targetvariance.py: -------------------------------------------------------------------------------- 1 | import math, numpy 2 | def calc_min_variance_portfolio(return_vector, stddev_vector, correlation_matrix, target_return): 3 | """ 4 | Given return, variance, and correlation data on multiple assets and a target portfolio 5 | return, calculate the minimum variance portfolio that achieves the target_return, if possible. 6 | 7 | return_vector: vector of returns 8 | stddev_vector: vector of standard deviations of returns 9 | correlation_matrix: correlation matrix 10 | target_return: target portfolio return 11 | returns: (portfolio allocations, portfolio variance) 12 | 13 | Short positions are indicated as negative allocations 14 | """ 15 | MU = return_vector 16 | R = correlation_matrix 17 | m = target_return 18 | S = numpy.matrix(numpy.diagflat(stddev_vector)) 19 | COV = S * R * S 20 | ONE = numpy.matrix((1,)*COV.shape[0]).T 21 | A = ONE.T * COV.I * ONE 22 | a = float(A) 23 | B = MU.T * COV.I * ONE 24 | b = float(B) 25 | C = MU.T * COV.I * MU 26 | c = float(C) 27 | LAMBDA = (a*m-b)/(a*c-(b*b)) 28 | GAMMA = ((c-b*m)/((a*c)-(b*b))) 29 | WSTAR = COV.I * ((LAMBDA * MU) + (GAMMA * ONE)) 30 | STDDEV = math.sqrt(WSTAR.T * COV * WSTAR) 31 | return WSTAR, STDDEV 32 | 33 | def calc_max_return_portfolio(return_vector, stddev_vector, correlation_matrix, target_variance): 34 | """ 35 | Given return, variance, and correlation data on multiple assets and a target portfolio 36 | variance, calculate the maximum return portfolio that achieves the target variance, if possible. 37 | 38 | return_vector: vector of returns 39 | stddev_vector: vector of standard deviations of returns 40 | correlation_matrix: correlation matrix 41 | target_variance: target portfolio variance 42 | returns: (portfolio allocations, portfolio variance, portfolio return) 43 | 44 | Short positions are indicated as negative allocations 45 | """ 46 | last_return = None 47 | last_allocation = None 48 | last_stddev = None 49 | target_return = float(min(return_vector)) 50 | while target_return <= float(max(return_vector)): 51 | this_allocation, this_stddev = calc_min_variance_portfolio(return_vector, stddev_vector, correlation_matrix, target_return) 52 | if this_stddev > target_variance: 53 | return (last_allocation, last_stddev, last_return) 54 | last_allocation = this_allocation 55 | last_stddev = this_stddev 56 | last_return = target_return 57 | target_return += .0005 # TODO: linear search, not ideal (try Newton Raphson instead?) 58 | return (None, None, None) 59 | 60 | if __name__ == "__main__": 61 | return_vector = numpy.matrix((0.05, 0.07, 0.15, 0.27)).T 62 | stddev_vector = numpy.matrix((0.07, 0.12, 0.30, 0.60)).T 63 | correlation_matrix = numpy.matrix(((1.0, 0.8, 0.5, 0.4), 64 | (0.8, 1.0, 0.7, 0.5), 65 | (0.5, 0.7, 1.0, 0.8), 66 | (0.4, 0.5, 0.8, 1.0))) 67 | 68 | target_return = .125 69 | allocations, stddev = calc_min_variance_portfolio(return_vector, stddev_vector, 70 | correlation_matrix, target_return) 71 | print "scenario 1 - optimize portfolio for target return" 72 | print "target return: %.2f%%" % (target_return * 100.0) 73 | print "min variance portfolio:" 74 | print allocations 75 | print "portfolio std deviation: %.2f%%" % (stddev * 100.0) 76 | 77 | print "-" * 40 78 | 79 | target_variance = .15 80 | allocations, stddev, rtn = calc_max_return_portfolio(return_vector, stddev_vector, 81 | correlation_matrix, target_variance) 82 | print "scenario 2 - optimize portfolio for target variance" 83 | print "target variance: %.2f%%" % (target_variance * 100.0) 84 | print "max return:" 85 | print allocations 86 | print "portfolio std deviation: %.2f%%" % (stddev * 100.0) 87 | print "portfolio return: %.2f%%" % (rtn * 100.0) 88 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Python Quantitative Finance Research 2 | 3 | Python based Quant Finance Models, Tools and Algorithmic Application and Decision Making 4 | 5 | A repository of Python based code for: 6 | 7 | - Creating econometric market models and hypotheses 8 | - modelling, testing and analysing market hypotheses 9 | - identifying risk weighted market opportunities 10 | - identifying and applying statistically robust strategies 11 | - backtesting trading strategies and performance analytics 12 | - portfolio allocation and optimisation 13 | - applying risk and performance management metrics 14 | 15 | 16 | Portfolio Allocation - Get yahoo data, sort, analyse and allocate portfolio by quadratic optimisation. 17 | 18 | Portfolio Optimisation - Target return and Target Variance 19 | 20 | Time Series Analysis 21 | 22 | LSTM stock price forecasting and prediction 23 | -------------------------------------------------------------------------------- /Time_Series_Stationarity_Tests1.py: -------------------------------------------------------------------------------- 1 | ##One of the key trading concepts in the quantitative toolbox is that of mean reversion. 2 | ##This process refers to a time series that displays a tendency to revert to its historical mean value. 3 | ##Mathematically, such a (continuous) time series is referred to as an Ornstein-Uhlenbeck process. 4 | ##This is in contrast to a random walk (Brownian motion), which has no "memory" of where it has been at 5 | ##each particular instance of time. The mean-reverting property of a time series can be exploited in 6 | ##order to produce profitable trading strategies. 7 | 8 | ## We must carry out statistical tests to identify mean reversion. 9 | ## The first step is to test for stationarity 10 | 11 | ##Testing for Mean Reversion 12 | ##A continuous mean-reverting time series can be represented 13 | ##by an Ornstein-Uhlenbeck stochastic differential equation: 14 | 15 | ## dxt=θ(μ−xt)dt+σdWt 16 | 17 | ## xt is the price of the asset under investigation at time period t 18 | ##θ is the rate of reversion to the mean, 19 | ## μ is the mean value of the process, 20 | ## σ is the variance of the process 21 | ## Wt is a Wiener Process or Brownian Motion. 22 | 23 | ##In a discrete setting the equation states that the change of the price series 24 | ##in the next time period is proportional to the difference between the mean 25 | ##price and the current price, with the addition of Gaussian noise. 26 | 27 | ## Test for stationarity STEP 1 - Augmented Dickey-Fuller Test 28 | 29 | ## We test for the presence of a unit root in an autoregressive time series sample. 30 | ## Intuitively we know that that if a price series possesses mean reversion, 31 | ## then the next price level will be proportional to the current price level. 32 | ## A linear lag model of order p is used for the time series: 33 | 34 | ## Δyt=α+βt+γyt−1+δ1Δyt−1+⋯+δp−1Δyt−p+1+ϵt 35 | 36 | 37 | # α is a constant, 38 | # β represents the coefficient of a temporal trend 39 | # Δyt=y(t)−y(t−1)Δyt=y(t)−y(t−1). 40 | 41 | ## The ADF hypothesis test checks against the null hypothesis that γ=0, 42 | ## ie. α=β=0α=β=0 - that the process is a random walk and thus non mean reverting. 43 | 44 | ##If the hypothesis that γ=0 can be rejected then the following 45 | ##movement of the price series is proportional to the current price and thus it is unlikely to be a random walk. 46 | 47 | ## Data Series: 48 | ## Google price series from 2000-01-01 to 2013-01-01 49 | ## Google price series from 2000-01-01 to 2013-01-01 50 | 51 | ## ADF TEST ## 52 | 53 | # Import the Time Series library 54 | import statsmodels.tsa.stattools as ts 55 | 56 | # Import Datetime and the Pandas DataReader 57 | from datetime import datetime 58 | from pandas.io.data import DataReader 59 | 60 | # Download the Google OHLCV data from 1/1/2000 to 1/1/2013 61 | goog = DataReader("GOOG", "yahoo", datetime(2000,1,1), datetime(2013,1,1)) 62 | 63 | # Output the results of the Augmented Dickey-Fuller test for Google 64 | # with a lag order value of 1 65 | ts.adfuller(goog['Adj Close'], 1) 66 | #Here is the output of the Augmented Dickey-Fuller test for Google over the period. 67 | #The first value is the calculated test-statistic, while the second value is the p-value. 68 | #The fourth is the number of data points in the sample. The fifth value, the dictionary, 69 | #contains the critical values of the test-statistic at the 1, 5 and 10 percent values respectively. 70 | 71 | '''(-2.1900105430326064, 72 | 0.20989101040060731, 73 | 0, 74 | 2106, 75 | {'1%': -3.4334588739173006, 76 | '10%': -2.5675011176676956, 77 | '5%': -2.8629133710702983}, 78 | 15436.871010333041) 79 | ''' 80 | ##Since the calculated value of the test statistic is larger than any of the critical 81 | ##values at the 1, 5 or 10 percent levels, we cannot reject the null hypothesis 82 | ##and thus we are unlikely to have found a mean reverting time series. 83 | 84 | ##An alternative means of identifying a mean reverting time series is provided by the concept of stationarity, 85 | 86 | ##Testing for Stationarity STEP 2 - HURST EXPONENT 87 | 88 | ##A time series (or stochastic process) is defined to be strongly stationary if 89 | ##its joint probability distribution is invariant under translations in time or space. 90 | ##In particular, and of key importance for traders, the mean and variance of the process 91 | ##do not change over time or space and they each do not follow a trend. 92 | 93 | ##A critical feature of stationary price series is that the prices within the series 94 | ##diffuse from their initial value at a rate slower than that of a Geometric Brownian Motion. 95 | ##By measuring the rate of this diffusive behaviour we can identify the nature of the time series. 96 | 97 | ##We will use the Hurst Exponent, which helps us to characterise the stationarity of a time series. 98 | 99 | 100 | from numpy import cumsum, log, polyfit, sqrt, std, subtract 101 | from numpy.random import randn 102 | 103 | def hurst(ts): 104 | """Returns the Hurst Exponent of the time series vector ts""" 105 | # Create the range of lag values 106 | lags = range(2, 100) 107 | 108 | # Calculate the array of the variances of the lagged differences 109 | tau = [sqrt(std(subtract(ts[lag:], ts[:-lag]))) for lag in lags] 110 | 111 | # Use a linear fit to estimate the Hurst Exponent 112 | poly = polyfit(log(lags), log(tau), 1) 113 | 114 | # Return the Hurst exponent from the polyfit output 115 | return poly[0]*2.0 116 | 117 | # Create a Gometric Brownian Motion, Mean-Reverting and Trending Series 118 | gbm = log(cumsum(randn(100000))+1000) 119 | mr = log(randn(100000)+1000) 120 | tr = log(cumsum(randn(100000)+1)+1000) 121 | 122 | # Output the Hurst Exponent for each of the above series 123 | # and the price of Google (the Adjusted Close price) for 124 | # the ADF test given above in the article 125 | print "Hurst(GBM): %s" % hurst(gbm) 126 | print "Hurst(MR): %s" % hurst(mr) 127 | print "Hurst(TR): %s" % hurst(tr) 128 | 129 | # Assuming you have run the above code to obtain 'goog'! 130 | print "Hurst(GOOG): %s" % hurst(goog['Adj Close']) 131 | #The output from the Hurst Exponent Python code is given below: 132 | 133 | '''Hurst(GBM): 0.500606209426 134 | Hurst(MR): 0.000313348900533 135 | Hurst(TR): 0.947502376783 136 | Hurst(GOOG): 0.507880122614 137 | ''' 138 | 139 | '''From this output we can see that the Geometric Brownian Motion posssesses a Hurst Exponent, 140 | HH, that is almost exactly 0.5. The mean reverting series has HH almost equal to zero, 141 | while the trending series has HH close to 1. 142 | 143 | Interestingly, Google has HH also nearly equal to 0.5 indicating that it is 144 | extremely close to a geometric random walk (at least for the sample period we're making use of). 145 | 146 | While we now have a means of characterising the nature of a price time series, 147 | we have yet to discuss how statistically significant this value of HH is. 148 | We need to be able to determine if we can reject the null hypothesis that H=0.5H=0.5 to 149 | ascertain mean reverting or trending behaviour. 150 | ''' 151 | -------------------------------------------------------------------------------- /lstm_stockprice_forecast.py: -------------------------------------------------------------------------------- 1 | ##Overview 2 | 3 | ##We use an LSTM neural network to predict the closing price of the S&P 500 using a dataset of past prices. 4 | 5 | ##Dependencies - keras and tensorflow 6 | 7 | 8 | import time 9 | import warnings 10 | import numpy as np 11 | from numpy import newaxis 12 | from keras.layers.core import Dense, Activation, Dropout 13 | from keras.layers.recurrent import LSTM 14 | from keras.models import Sequential 15 | import matplotlib.pyplot as plt 16 | 17 | 18 | warnings.filterwarnings("ignore") 19 | 20 | def plot_results_multiple(predicted_data, true_data, prediction_len): 21 | fig = plt.figure(facecolor='white') 22 | ax = fig.add_subplot(111) 23 | ax.plot(true_data, label='True Data') 24 | print 'yo' 25 | #Pad the list of predictions to shift it in the graph to it's correct start 26 | for i, data in enumerate(predicted_data): 27 | padding = [None for p in xrange(i * prediction_len)] 28 | plt.plot(padding + data, label='Prediction') 29 | plt.legend() 30 | plt.show() 31 | 32 | def load_data(filename, seq_len, normalise_window): 33 | f = open(filename, 'r').read() 34 | data = f.split('\n') 35 | 36 | sequence_length = seq_len + 1 37 | result = [] 38 | for index in range(len(data) - sequence_length): 39 | result.append(data[index: index + sequence_length]) 40 | 41 | if normalise_window: 42 | result = normalise_windows(result) 43 | 44 | result = np.array(result) 45 | 46 | row = round(0.9 * result.shape[0]) 47 | train = result[:int(row), :] 48 | np.random.shuffle(train) 49 | x_train = train[:, :-1] 50 | y_train = train[:, -1] 51 | x_test = result[int(row):, :-1] 52 | y_test = result[int(row):, -1] 53 | 54 | x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1)) 55 | x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1)) 56 | 57 | return [x_train, y_train, x_test, y_test] 58 | 59 | def normalise_windows(window_data): 60 | normalised_data = [] 61 | for window in window_data: 62 | normalised_window = [((float(p) / float(window[0])) - 1) for p in window] 63 | normalised_data.append(normalised_window) 64 | return normalised_data 65 | 66 | def build_model(layers): 67 | model = Sequential() 68 | 69 | model.add(LSTM( 70 | input_dim=layers[0], 71 | output_dim=layers[1], 72 | return_sequences=True)) 73 | model.add(Dropout(0.2)) 74 | 75 | model.add(LSTM( 76 | layers[2], 77 | return_sequences=False)) 78 | model.add(Dropout(0.2)) 79 | 80 | model.add(Dense( 81 | output_dim=layers[3])) 82 | model.add(Activation("linear")) 83 | 84 | start = time.time() 85 | model.compile(loss="mse", optimizer="rmsprop") 86 | print "Compilation Time : ", time.time() - start 87 | return model 88 | 89 | def predict_point_by_point(model, data): 90 | #Predict each timestep given the last sequence of true data, in effect only predicting 1 step ahead each time 91 | predicted = model.predict(data) 92 | predicted = np.reshape(predicted, (predicted.size,)) 93 | return predicted 94 | 95 | def predict_sequence_full(model, data, window_size): 96 | #Shift the window by 1 new prediction each time, re-run predictions on new window 97 | curr_frame = data[0] 98 | predicted = [] 99 | for i in xrange(len(data)): 100 | predicted.append(model.predict(curr_frame[newaxis,:,:])[0,0]) 101 | curr_frame = curr_frame[1:] 102 | curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0) 103 | return predicted 104 | 105 | def predict_sequences_multiple(model, data, window_size, prediction_len): 106 | #Predict sequence of 50 steps before shifting prediction run forward by 50 steps 107 | prediction_seqs = [] 108 | for i in xrange(len(data)/prediction_len): 109 | curr_frame = data[i*prediction_len] 110 | predicted = [] 111 | for j in xrange(prediction_len): 112 | predicted.append(model.predict(curr_frame[newaxis,:,:])[0,0]) 113 | curr_frame = curr_frame[1:] 114 | curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0) 115 | prediction_seqs.append(predicted) 116 | return prediction_seqs 117 | -------------------------------------------------------------------------------- /portfolio_allocation.py: -------------------------------------------------------------------------------- 1 | ### Portfolio Allocation 2 | 3 | # Finds an optimal allocation of stocks in a portfolio, 4 | # satisfying a minimum expected return. 5 | # The problem is posed as a Quadratic Program, and solved 6 | # using the cvxopt library. 7 | # Uses actual past stock data, obtained using the stocks module. 8 | 9 | from cvxopt import matrix, solvers 10 | import stocks 11 | import numpy 12 | 13 | # solves the QP, where x is the allocation of the portfolio: 14 | # minimize x'Px + q'x 15 | # subject to Gx <= h 16 | # Ax == b 17 | # 18 | # Input: n - # of assets 19 | # avg_ret - nx1 matrix of average returns 20 | # covs - nxn matrix of return covariance 21 | # r_min - the minimum expected return that you'd 22 | # like to achieve 23 | # Output: sol - cvxopt solution object 24 | def optimize_portfolio(n, avg_ret, covs, r_min): 25 | P = covs 26 | # x = variable(n) 27 | q = matrix(numpy.zeros((n, 1)), tc='d') 28 | # inequality constraints Gx <= h 29 | # captures the constraints (avg_ret'x >= r_min) and (x >= 0) 30 | G = matrix(numpy.concatenate(( 31 | -numpy.transpose(numpy.array(avg_ret)), 32 | -numpy.identity(n)), 0)) 33 | h = matrix(numpy.concatenate(( 34 | -numpy.ones((1,1))*r_min, 35 | numpy.zeros((n,1))), 0)) 36 | # equality constraint Ax = b; captures the constraint sum(x) == 1 37 | A = matrix(1.0, (1,n)) 38 | b = matrix(1.0) 39 | sol = solvers.qp(P, q, G, h, A, b) 40 | return sol 41 | 42 | ### setup the parameters 43 | symbols = ['GOOG', 'AIMC', 'CE', 'BH', 'AHGP', 'AB', 'HLS', 'BKH', 'LUV'] 44 | # pull data from this date range 45 | start = '1/1/2010' 46 | end = '1/1/2016' 47 | n = len(symbols) 48 | # average yearly return for each stock 49 | avg_ret = matrix(map(lambda s: stocks.avg_return(s, start, end, 'y'), symbols)) 50 | # covariance of asset returns 51 | covs = matrix(numpy.array(stocks.cov_matrix(symbols, start, end, 'y'))) 52 | # minimum expected return threshold 53 | r_min = 0.10 54 | 55 | ### solve 56 | solution = optimize_portfolio(n, avg_ret, covs, r_min) 57 | 58 | print solution['x'] 59 | -------------------------------------------------------------------------------- /portfolio_getyahoodata.py: -------------------------------------------------------------------------------- 1 | # A Python module for retrieving stock data using the 2 | # yahoo finance API 3 | 4 | # requires the following installations: 5 | #pip install cvxopt 6 | #pip install numpy 7 | #pip install scipy 8 | #pip install pandas 9 | #pip install patsy 10 | #pip install statsmodels 11 | 12 | import csv 13 | import urllib2 14 | from collections import defaultdict 15 | import numpy 16 | from statsmodels.stats.correlation_tools import cov_nearest 17 | 18 | # Retrieves the stock quote for the given symbol 19 | # from Yahoo Finance as a float. 20 | # Input: symbol - stock symbol as a string 21 | # Output: price - latest trade price from yahoo finance 22 | 23 | def get_stock_quote(symbol): 24 | BASE_URL = 'http://download.finance.yahoo.com/d/quotes.csv?s=' 25 | ID = symbol 26 | close_prop = '&f=l1' 27 | SUFFIX = '&e=.csv' 28 | url = "%s%s%s%s" % (BASE_URL, ID, close_prop, SUFFIX) 29 | price = float(urllib2.urlopen(url).read().strip()) 30 | return price 31 | 32 | # Downloads the stock history for the given symbol, 33 | # for the given date range, as a csv file. 34 | # Input: symbol - stock symbol as a string 35 | # start - start date in the form 'mm/dd/yyyy' 36 | # end - end date in the form 'mm/dd/yyyy' 37 | # outfile - output filename, e.g. 'out.csv' 38 | # interval - trading interval; either d, w, m (daily, weekly, monthl7) 39 | 40 | def csv_quote_history(symbol, start, end, outfile, interval='d'): 41 | response = _quote_history(symbol, start, end, interval) 42 | with open(outfile, 'wb') as f: 43 | csv_reader = csv.reader(response) 44 | csv_writer = csv.writer(f) 45 | for row in csv_reader: 46 | csv_writer.writerow(row) 47 | 48 | # Gives the stock history for the given symbol, 49 | # for the given date range, as a dictionary. 50 | # Output: keys: ['High', 'Adj Close', 'Volume', 'Low', 'Date', 'Close', 'Open'] 51 | # values: list 52 | def quote_history_dict(symbol, start, end, interval='m'): 53 | history = defaultdict(lambda: []) 54 | response = _quote_history(symbol, start, end, interval) 55 | dreader = csv.DictReader(response) 56 | for row in dreader: 57 | for key in row.iterkeys(): 58 | history[key].insert(0, row[key]) 59 | return history 60 | 61 | def _quote_history(symbol, start, end, interval): 62 | BASE_URL = 'http://ichart.yahoo.com/table.csv?s=' 63 | ID = symbol 64 | sm, sd, sy = start.split('/') 65 | em, ed, ey = end.split('/') 66 | url = "%s%s&a=%d&b=%d&c=%d&d=%d&e=%d&f=%d&g=%s" % (BASE_URL, ID, (int(sm)-1), int(sd), int(sy), (int(em)-1), int(ed), int(ey), interval) 67 | response = urllib2.urlopen(url) 68 | return response 69 | 70 | def get_prices(symbol, start, end, interval='m'): 71 | history = quote_history_dict(symbol, start, end, interval) 72 | prices = map(lambda x: round(float(x),2), history['Close']) 73 | prices[0] = round(float(history['Open'][0]),2) 74 | return prices 75 | 76 | def get_returns(symbol, start, end, interval='m'): 77 | history = quote_history_dict(symbol, start, end, interval) 78 | prices = map(lambda x: round(float(x),2), history['Close']) 79 | prices[0] = round(float(history['Open'][0]),2) 80 | returns = map(lambda (x, y): (y/x)-1, zip(prices[0:-1], prices[1:])) 81 | return returns 82 | 83 | def get_yr_returns(symbol, start, end): 84 | history = quote_history_dict(symbol, start, end, 'm') 85 | prices = map(lambda x: round(float(x),2), history['Close']) 86 | prices[0] = round(float(history['Open'][0]),2) 87 | prices.insert(0, prices[0]) 88 | returns = map(lambda (x, y): (y/x)-1, zip(prices[0::12][:-1], prices[12::12])) 89 | return returns 90 | 91 | def avg_return(symbol, start, end, interval='m'): 92 | if interval=='y': 93 | return numpy.mean(get_yr_returns(symbol, start, end)) 94 | else: 95 | return numpy.mean(get_returns(symbol, start, end, interval)) 96 | 97 | def cov_matrix(symbols, start, end, interval='m'): 98 | if interval=='y': 99 | data = [numpy.array(get_yr_returns(s, start, end)) for s in symbols] 100 | else: 101 | data = [numpy.array(get_returns(s, start, end, interval)) for s in symbols] 102 | x = numpy.array(data) 103 | return cov_nearest(numpy.cov(x)) 104 | --------------------------------------------------------------------------------