├── LICENSE.md ├── README.md ├── arima.py ├── backtesting.py ├── capm.py ├── cointegration.py ├── jumps.py ├── option.py ├── portfolio.py └── stock.py /LICENSE.md: -------------------------------------------------------------------------------- 1 | # The MIT License (MIT) 2 | 3 | *** 4 | 5 | ### Copyright (c) 2014 James Brofos and Ajay Kannan 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions:

13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software.

16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Python Financial Tools 2 | ====================== 3 | 4 | An open-source Python implementation of popular tools and techniques in financial asset management. The purpose of this repository is to provide high-quality financial data analytics software. The software relies on Yahoo Finance! to download the most recent trends in asset performance. 5 | 6 | ## Features 7 | - Instantiate stock objects easily by specifying their ticker symbol 8 | - Calculate the value-at-risk on a stock or portfolio to protect yourself against unexpected losses 9 | - Discover arbitrage opportunities through statistical analysis of the CAPM model 10 | - Optimize the allocation of your portfolio assets through quadratic programming 11 | - Visualize the performance of your stocks using Python's graphics libraries 12 | 13 | ``` 14 | If you like project and want to cite this toolbox, please use the following citation: 15 | 16 | Brofos, James A., and Ajay Kannan. Python Financial Tools. Computer software. 17 | Vers. 0.1. GitHub, Inc., 08 Feb. 2014. Web. [DD] [MM]. [YYYY]. 18 | ``` 19 | 20 | ## Demo 21 | 22 | 23 | 24 | ## Dependencies 25 | 26 | Python Financial Tools relies on the libraries listed below: 27 | 28 | * [Numpy](http://www.numpy.org/) Standard numerical computations with vectors 29 | * [Scipy](http://www.scipy.org/) Used for calculations involving statistical distributions 30 | * [Matplotlib](http://matplotlib.org/) Visualizations of historical prices and returns 31 | * [CVXOPT](http://cvxopt.org/) Optimization toolbox for portfolio asset allocation 32 | 33 | [![Bitdeli Badge](https://d2weczhvl823v0.cloudfront.net/JamesBrofos/python-financial-tools/trend.png)](https://bitdeli.com/free "Bitdeli Badge") 34 | -------------------------------------------------------------------------------- /arima.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy import stats 3 | from pprint import pprint 4 | 5 | # Autoregressive process 6 | class AR(object): 7 | def __init__(self,time_series): 8 | self.time_series = time_series 9 | self.p = self.calculate_times_series_order() 10 | self.regression = self.time_series_regression() 11 | 12 | def time_series_regression(self): 13 | regression = {} 14 | m = self.p + 1 15 | n = len(self.time_series) - self.p 16 | X = np.zeros((n,m)) 17 | targets = self.time_series[self.p:] 18 | for i in range(n): 19 | X[i,0] = 1.0 20 | for j in range(1,m): 21 | X[i,j] = self.time_series[i - self.p + j] 22 | coefficients = np.linalg.lstsq(X,targets)[0] 23 | phi = coefficients[1:] 24 | mu = coefficients[0] / (1 - np.sum(phi)) 25 | 26 | regression["mu"] = mu[0] 27 | regression["phi"] = phi[0] 28 | return regression 29 | 30 | def calculate_times_series_order(self): 31 | # For the moment, naively assume that the order of the time 32 | # series is one. Later, it would be advantageous to implement 33 | # a automated method for selecting the order. 34 | return 1 35 | 36 | def autocorrelation_function(self): 37 | pass 38 | 39 | def partial_autocorrelation_function(self): 40 | pass 41 | 42 | def calculate_ljung_box_statistic(self): 43 | pass 44 | 45 | if True: 46 | phi = .5 47 | n = 1000 48 | time_series = np.zeros((n,1)) 49 | for i in range(1,n): 50 | time_series[i] = phi * time_series[i-1] + np.random.normal() 51 | 52 | ar = AR(time_series) 53 | pprint(ar.regression) 54 | -------------------------------------------------------------------------------- /backtesting.py: -------------------------------------------------------------------------------- 1 | from portfolio import Portfolio 2 | from stock import Stock 3 | from pprint import pprint 4 | from dateutil import rrule, parser 5 | 6 | class Backtesting(object): 7 | def __init__(self,portfolio,strategies,time_interval = None,transaction_cost = 0.0): 8 | self.portfolio = portfolio 9 | 10 | # Create an array of strategies to test. For the moment it is assumed that 11 | # there is a single strategy for each asset in the portfolio, or else there 12 | # is only a single strategy. 13 | self.strategies = [strategies] if type(strategies) is not list else strategies 14 | dates = time_interval if time_interval is not None else self.portfolio.assets[0].date_range 15 | 16 | self.time_interval = [date.strftime("%Y-%m-%d") for date in list(rrule.rrule(rrule.DAILY, 17 | dtstart = parser.parse(dates["start"]), 18 | until = parser.parse(dates["end"])))] 19 | 20 | self.results = self.test_strategies_in_time_interval() 21 | 22 | def __str__(self): 23 | print_string = "" 24 | return print_string 25 | 26 | def test_strategies_in_time_interval(self): 27 | results = {} 28 | n_assets = self.portfolio.n 29 | 30 | # 31 | position = ["Buy"] * n_assets 32 | 33 | if n_assets > 1 and n_assets != len(self.strategies): 34 | print "The number of strategies must equal the number of assets in the portfolio." 35 | return None 36 | 37 | for date in self.time_interval: 38 | for i in range(n_assets): 39 | asset = self.portfolio.assets[i] 40 | strategy = self.strategies[0] if n_assets == 1 else self.strategies[i] 41 | 42 | if date in asset.profile.keys(): 43 | position[i] = strategy(date,asset,position[i]) 44 | print position[i] 45 | 46 | 47 | 48 | 49 | def strategy(date,asset,position): 50 | # For this simple trading strategy, we will choose to buy stock 51 | # of WNC when the price is below $9.50, and we will choose to 52 | # sell when the price rises above $10.50. Because we have the 53 | # benefit of looking backwards, this strategy should to be very 54 | # profitable over the specified time interval. 55 | 56 | 57 | if float(asset.profile[date]["Close"]) < 9.5: 58 | decision = "Buy" 59 | elif float(asset.profile[date]["Close"]) > 10.5: 60 | decision = "Sell" 61 | else: 62 | decision = "Hold" 63 | 64 | return decision if decision != position else "Hold" 65 | 66 | 67 | 68 | portfolio = Portfolio([{"ticker" : "WNC","date_range" : {"start" : "2013-02-13","end" : "2013-08-13"}}]) 69 | if False: 70 | portfolio.assets[0].display_price() 71 | 72 | backtest = Backtesting(portfolio,strategy) 73 | 74 | # print portfolio 75 | # print backtest 76 | -------------------------------------------------------------------------------- /capm.py: -------------------------------------------------------------------------------- 1 | # capm.py: A Python class representing the CAPM model given 2 | # the risk free rate and market returns. The object 3 | # can be instantiated given Stock objects representing 4 | # the risk free and market data or their respective 5 | # ticker symbols. 6 | # 7 | # Calling asset_regression with a stock object or asset 8 | # dictionary as a parameter will calculate and store the CAPM 9 | # model's alpha, beta, and associated confidence intervals. 10 | # 11 | # The folowing is an example usage of the CAPM class to 12 | # calculate the alpha and beta of the stock given an asset 13 | # dictionary. 14 | # date_range = {"start" : "2012-01-03", "end" : "2013-01-08"} 15 | # tickers = ("^IRX","^GSPC","GOOG") 16 | # capm = CAPM({"ticker" : tickers[0],"date_range" : date_range}, 17 | # {"ticker" : tickers[1],"date_range" : date_range}) 18 | # capm.asset_regression({"ticker" : tickers[2],"date_range" : date_range}) 19 | 20 | import numpy as np 21 | from scipy.stats import norm 22 | from stock import Stock 23 | 24 | class CAPM(object): 25 | def __init__(self,risk_free,market,alpha = .05): 26 | 27 | self.risk_free = Stock(risk_free["ticker"],risk_free["date_range"]) if type(risk_free) is dict else Stock(risk_free) 28 | self.market = Stock(market["ticker"],market["date_range"]) if type(market) is dict else Stock(market) 29 | 30 | self.alpha, self.beta = {}, {} 31 | self.critical_value = norm.ppf(1 - alpha / 2.0) 32 | 33 | def __str__(self): 34 | if len(self.alpha.keys()) and len(self.beta.keys()): 35 | alpha = self.alpha 36 | beta = self.beta 37 | else: 38 | return "The alpha and beta coefficients were not initialized. Please call the asset regression method before continuing." 39 | 40 | 41 | print_string = "Capital Asset Pricing Model:\n\tCritical value: %.2f\n" % self.critical_value 42 | print_string += "\t\t\tValue\t\tLower Bound\tUpper Bound\n" 43 | print_string += "\talpha:\t\t%.4f\t\t%.4f\t\t%.4f\n" % (alpha["value"],alpha["confidence_interval"][0], 44 | alpha["confidence_interval"][1]) 45 | print_string += "\tbeta:\t\t%.4f\t\t%.4f\t\t%.4f\n\n" % (beta["value"],beta["confidence_interval"][0], 46 | beta["confidence_interval"][1]) 47 | 48 | if alpha["confidence_interval"][0] > 0: 49 | print_string += "\tThe CAPM reports that the security is overpriced. Asset returns are too large on average." 50 | elif alpha["confidence_interval"][1] < 0: 51 | print_string += "\tThe CAPM reports that the security is underpriced. Asset returns are too small on average." 52 | else: 53 | print_string += "\tThere CAPM reports that the security is appropriately priced." 54 | 55 | return print_string 56 | 57 | def asset_regression(self,asset_data): 58 | 59 | alpha, beta = {}, {} 60 | 61 | asset = Stock(asset_data["ticker"],asset_data["date_range"]) if type(asset_data) is dict else Stock(asset_data) 62 | market_premium = np.atleast_2d(self.market.statistics["returns"] - self.risk_free.statistics["returns"]).T 63 | asset_premium = np.atleast_2d(asset.statistics["returns"] - self.risk_free.statistics["returns"]).T 64 | 65 | constant = np.ones((market_premium.shape[0],1)) 66 | covariates = np.concatenate((constant,market_premium),axis = 1) 67 | 68 | # Solve the capital asset pricing model in the least-squares sense. In 69 | # particular, wel solve the following linear model for parameters theta_0 70 | # and theta_1: 71 | # R_{j,t} - mu_{f,t} = theta_0 + theta_1 * (R_{M,t} - mu_{f,t}) + e_{j,t} 72 | # Where R_{j,t} is the asset premium of the jth asset, mu_{f,t} is the 73 | # risk-free rate, R_{M,t} is the market premium, and e_{j,t} represents an 74 | # error term. Refer to page 435 in the Statistics and Data Analysis for 75 | # Financial Engineering. 76 | theta = np.linalg.lstsq(covariates,asset_premium)[0] 77 | residuals = asset_premium - np.dot(covariates,theta) 78 | 79 | # The rank of the covariates matrix is presumably two, and it is for that 80 | # reason that we subtract two in the denominator. 81 | s_squared = np.sum(residuals * residuals) / (market_premium.shape[0] - 2) 82 | 83 | standard_errors = np.sqrt(s_squared * np.linalg.inv(np.dot(covariates.T,covariates))) 84 | alpha["value"] = theta[0] 85 | alpha["confidence_interval"] = theta[0] + standard_errors[0,0] * self.critical_value * np.array([-1,1]) 86 | 87 | beta["value"] = theta[1] 88 | beta["confidence_interval"] = theta[1] + standard_errors[1,1] * self.critical_value * np.array([-1,1]) 89 | 90 | self.alpha = alpha 91 | self.beta = beta 92 | 93 | date_range = {"start" : "2012-01-03", "end" : "2013-01-08"} 94 | tickers = ("^IRX","^GSPC","GOOG") 95 | capm = CAPM({"ticker" : tickers[0],"date_range" : date_range}, 96 | {"ticker" : tickers[1],"date_range" : date_range}) 97 | capm.asset_regression({"ticker" : tickers[2],"date_range" : date_range}) 98 | 99 | print capm 100 | -------------------------------------------------------------------------------- /cointegration.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from stock import Stock 3 | 4 | class CointegratedAssets(object): 5 | # The "CointegratedAssets" class implements the Engle-Granger approach 6 | # to cointegrated time series. 7 | 8 | def __init__(self,assets): 9 | self.price_series = np.atleast_2d([asset.asset_closing_prices() for asset in assets]).T 10 | self.dependent = self.price_series[:,0].T 11 | self.independent = self.price_series[:,1:] 12 | 13 | self.engle_granger = {} 14 | self.engle_granger["step_one"] = self.engle_granger_step_one() 15 | 16 | if self.engle_granger_cointegration_test(): 17 | print "The Engle-Granger test reports that cointegration exists between the time-series." 18 | else: 19 | print "The Engle-Granger test reports that cointegration does not exist between the time-series." 20 | 21 | def __str__(self): 22 | return "Cointegration assets" 23 | 24 | def engle_granger_step_one(self): 25 | 26 | constant = np.ones((self.independent.shape[0],1)) 27 | covariates = np.concatenate((constant,self.independent),axis = 1) 28 | 29 | theta = np.linalg.lstsq(covariates,self.dependent)[0] 30 | residuals = self.independent - np.dot(covariates,theta) 31 | 32 | return {"theta" : theta,"residuals" : residuals} 33 | 34 | 35 | def engle_granger_step_two(self): 36 | pass 37 | 38 | def engle_granger_cointegration_test(self): 39 | pass 40 | 41 | 42 | assets = [Stock("MSFT"),Stock("GOOG")] 43 | ca = CointegratedAssets(assets) 44 | 45 | -------------------------------------------------------------------------------- /jumps.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from stock import Stock 3 | from scipy.special import gamma 4 | from scipy import stats 5 | 6 | class JumpStatistics(object): 7 | def __init__(self,stock): 8 | self.stock = stock 9 | 10 | class BarndorffNielsen(JumpStatistics): 11 | # An implementation of the Barnforff-Nielsen test statistic used for detecting "jumps" 12 | # (or "suprises") in stock price data. The mathematics for this test statistic can be 13 | # found at the following two resources: 14 | # 15 | # Michael William Schwert. 2008. "Problems in the Application of Jump Detection Tests 16 | # to Stock Price Data". Duke University. 17 | # 18 | # "Some Like it Smooth, and Some Like it Rough: Untangling Continuous and Jump 19 | # Components in Measuring, Modeling, and Forecasting Asset Return Volatility". 20 | # Torben G. Andersen, Tim Bollerslev and Francis X. Diebold. September 2003. 21 | # 22 | # The following is an example of how to apply the Barnforff-Nielsen statistic to detect 23 | # surprises in Microsoft stock data: 24 | # if True: 25 | # # Observe a trend in Microsoft stock prices where a jump occurs. 26 | # stock = Stock("MSFT",{"start" : "2013-02-14","end" : "2014-02-14"}) 27 | # else: 28 | # # Otherwise, view a sequence of stock prices where no jump was detected. 29 | # stock = Stock("MSFT",{"start" : "2013-03-01","end" : "2013-04-01"}) 30 | # stock.display_price() 31 | # bn = BarndorffNielsen(stock) 32 | # bn.barndorff_nielsen_test() 33 | 34 | def __init__(self,stock): 35 | super(BarndorffNielsen,self).__init__(stock) 36 | self.n = len(self.stock.statistics["log_returns"]) 37 | self.realized_variance = self.calculate_realized_variance() 38 | self.bipower_variance = self.calculate_bipower_variance() 39 | 40 | self.relative_jump = np.float(self.realized_variance - self.bipower_variance) / self.realized_variance 41 | self.tripower_quarticity = self.calculate_tripower_quarticity() 42 | 43 | self.statistic = self.barndorff_nielsen_statistic() 44 | 45 | def calculate_realized_variance(self): 46 | log_returns = self.stock.statistics["log_returns"] 47 | variance = np.sum(np.power(log_returns,2)) 48 | return variance 49 | 50 | def calculate_bipower_variance(self): 51 | n = self.n 52 | log_returns = np.absolute(self.stock.statistics["log_returns"]) 53 | 54 | variance = (np.pi / 2.0) * (np.float(n) / (n - 1.0)) * np.sum(log_returns[1:] * log_returns[:-1]) 55 | return variance 56 | 57 | def calculate_tripower_quarticity(self): 58 | n = self.n 59 | 60 | # Notice that the absolute value of the log returns is calculated in this step. This is to 61 | # prevent numerical nan's from being produced. This also seems to be consistent with the 62 | # notation specified by Michael Schwert and Torben G. Andersen et al. 63 | log_returns = np.absolute(self.stock.statistics["log_returns"]) 64 | mu = np.power(np.power(2.0,2.0 / 3) * gamma(7.0 / 6.0) * np.power(gamma(1.0 / 2.0),-1),-3) 65 | 66 | tripower = np.sum(np.power(log_returns[2:],4.0 / 3) * 67 | np.power(log_returns[1:-1],4.0 / 3) * np.power(log_returns[:-2],4.0 / 3)) 68 | quarticity = n * mu * (np.float(n) / (n - 2.0)) * tripower 69 | return quarticity 70 | 71 | def barndorff_nielsen_statistic(self): 72 | n = self.n 73 | pi = np.pi 74 | relative_jump = self.relative_jump 75 | tripower = self.tripower_quarticity 76 | bipower = self.bipower_variance 77 | 78 | statistic = relative_jump / np.sqrt(((pi / 2) ** 2 + pi - 5) * (1.0 / n) * max(1,tripower / (bipower ** 2))) 79 | 80 | return statistic 81 | 82 | def barndorff_nielsen_test(self,alpha = .01): 83 | 84 | quantile = stats.norm.ppf(1 - alpha) 85 | 86 | print_string = "" 87 | if self.statistic > quantile: 88 | print_string += "\tThe Barndorff-Nielsen Test reports that there was a jump in asset price.\n" 89 | else: 90 | print_string += "\tThe Barndorff-Nielsen Test reports that there was not a jump in asset price.\n" 91 | 92 | print_string += "\tThe significance level of the test: %.2f\n" % alpha 93 | print self.stock 94 | print print_string 95 | 96 | 97 | if True: 98 | # Observe a trend in Microsoft stock prices where a jump occurs. 99 | stock = Stock("MSFT",{"start" : "2013-02-14","end" : "2014-02-14"}) 100 | else: 101 | # Otherwise, view a sequence of stock prices where no jump was detected. 102 | stock = Stock("MSFT",{"start" : "2013-03-01","end" : "2013-04-01"}) 103 | stock.display_price() 104 | bn = BarndorffNielsen(stock) 105 | bn.barndorff_nielsen_test() 106 | -------------------------------------------------------------------------------- /option.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | from scipy import stats 4 | 5 | class Option(object): 6 | def __init__(self): 7 | self.strike_price = np.float(50) 8 | self.tau = .5 9 | self.risk_free = .03 10 | self.deviation = .45 11 | self.stock_price = np.float(55) 12 | 13 | 14 | class EuropeanCall(Option): 15 | def evaluate_black_scholes(self): 16 | S = self.stock_price 17 | X = self.strike_price 18 | r = self.risk_free 19 | sigma = self.deviation 20 | tau = self.tau 21 | 22 | d_1 = (np.log(S / X) + (r + (sigma ** 2) / 2) * tau) / (sigma * np.sqrt(tau)) 23 | d_2 = d_1 - sigma * np.sqrt(tau) 24 | 25 | value = S * stats.norm.cdf(d_1) - X * np.exp(-r * tau) * stats.norm.cdf(d_2) 26 | return value 27 | 28 | european_call = EuropeanCall() 29 | print european_call.evaluate_black_scholes() 30 | -------------------------------------------------------------------------------- /portfolio.py: -------------------------------------------------------------------------------- 1 | # portfolio.py This class represents a portfolio of stocks. It supports optimization 2 | # of assets via a quadratic program. 3 | # 4 | # The following is an example of how the portfolio class may be used to represent a 5 | # portfolio of assets representing major technology companies: 6 | # portfolio = Portfolio(["MSFT","GOOG","IBM"]) 7 | # print "The value at risk: %.2f" % portfolio.calculate_parametric_risk(.05,1000) 8 | # print "The expected shortfall: %.2f" % portfolio.calculate_parametric_risk(.05,1000,True) 9 | 10 | import numpy as np 11 | from stock import Stock 12 | from cvxopt import matrix 13 | from cvxopt.blas import dot 14 | from cvxopt import solvers 15 | from scipy import stats 16 | 17 | from pprint import pprint 18 | 19 | solvers.options["show_progress"] = False 20 | 21 | 22 | class Portfolio(object): 23 | def __init__(self,assets,risk_free = None,position = None): 24 | # The position refers to the dollar amount invested into this particular 25 | # portfolio. The position can be allocated so that it corresponds to the 26 | # portfolio with the maximum sharpe's ratio, or to the portfolio with the 27 | # minimum risk. 28 | self.position = position if position is not None else None 29 | 30 | self.assets = [Stock(stock["ticker"],stock["date_range"]) if type(stock) is dict else Stock(stock) for stock in assets] 31 | 32 | if risk_free is not None: 33 | self.risk_free = Stock(risk_free["ticker"],risk_free["date_range"]) if type(risk_free) is dict else Stock(risk_free) 34 | else: 35 | self.risk_free = Stock("^IRX") 36 | 37 | self.n = len(self.assets) 38 | self.statistics = self.calculate_statistics() 39 | self.optimization = self.optimize_portfolio() 40 | self.returns = self.calculate_portfolio_returns() 41 | 42 | def __str__(self): 43 | print_string = "Assets in portfolio: [" + " ".join([asset.ticker for asset in self.assets]) + "]\n\n" 44 | for asset in self.assets: 45 | print_string += asset.__str__() + "\n\n" 46 | print_string += "The weights for each asset in the portfolio:\n" 47 | for i in range(self.n): 48 | print_string += "\t" + self.assets[i].ticker + "\t: " + str(self.optimization["max_sharpe_weights"][i][0]) + "\n" 49 | print_string += "\nExpected return: %.4f" % self.returns 50 | 51 | return print_string 52 | 53 | def calculate_portfolio_returns(self): 54 | returns = 0.0 55 | for i in range(self.n): 56 | returns += self.assets[i].statistics["expected_return"] * self.optimization["max_sharpe_weights"][i][0] 57 | return returns 58 | 59 | 60 | def calculate_statistics(self): 61 | statistics = {} 62 | returns = np.zeros((len(self.assets[0].statistics["returns"]),self.n)) 63 | 64 | for i in range(self.n): 65 | returns[:,i] = self.assets[i].statistics["returns"] 66 | 67 | statistics["expected_asset_returns"] = np.array([asset.statistics["expected_return"] for asset in self.assets]) 68 | statistics["covariance"] = np.cov(returns,rowvar = 0) 69 | 70 | # Due to the behavior of the numpy "diag" function, scalar inputs will fail and 71 | # produce an error. This instance occurs when there is only a single asset in the 72 | # portfolio. In this case, simply exclude the call to "diag" and calculate the 73 | # standard deviation and the square root of a scalar covariance "matrix". 74 | if statistics["covariance"].shape == (): 75 | statistics["standard_deviation"] = np.sqrt(statistics["covariance"]) 76 | else: 77 | statistics["standard_deviation"] = np.sqrt(np.diag(statistics["covariance"])) 78 | return statistics 79 | 80 | def calculate_parametric_risk(self,alpha,expected_shortfall = False,position = None): 81 | 82 | if position is None and self.position is not None: 83 | position = self.position 84 | elif position is None and self.position is None: 85 | print "Either specify a position for the portfolio object or provide one as an input parameter." 86 | return np.nan 87 | 88 | mu = self.statistics["expected_asset_returns"] 89 | S = self.statistics["covariance"] 90 | w = self.optimization["max_sharpe_weights"] 91 | portfolio_mu = np.dot(mu,w) 92 | portfolio_sigma = np.sqrt(np.dot(np.dot(w.T,S),w))[0] 93 | 94 | quantile = stats.norm.ppf(alpha) 95 | 96 | if expected_shortfall: 97 | risk = position * (-portfolio_mu + portfolio_sigma * (stats.norm.pdf(quantile) / alpha)) 98 | else: 99 | risk = -position * (portfolio_mu + quantile * portfolio_sigma) 100 | 101 | return risk 102 | 103 | 104 | def optimize_kelly_criterion(self): 105 | # This code attempts to reproduce the optimization routine proposed by 106 | # Vasily Nekrasov using the Kelly criterion. In particular, this code 107 | # uses as reference the following work: 108 | # 109 | # Nekrasov, Vasily. 2013. "Kelly Criterion for Multivariate Portfolios: 110 | # A Model-Free Approach". 111 | 112 | kelly_optimization = {} 113 | 114 | n = self.n 115 | r = self.risk_free.statistics["expected_daily_return"] 116 | S = matrix(1.0 / ((1 + r) ** 2) * self.statistics["covariance"]) 117 | r_assets = matrix([asset.statistics["expected_daily_return"] for asset in self.assets]) 118 | 119 | q = matrix(1.0 / (1 + r) * (r_assets - r)) 120 | G, h, A, b = self.optimization_constraint_matrices() 121 | 122 | # Notice that the "linear" term in the quadratic optimization formulation is made 123 | # negative. This is because Nekrasov maximizes the function, whereas CXVOPT is forced 124 | # to minimize. By making the linear term negative, we arrive at an equivalent 125 | # formulation. 126 | portfolio_weights = solvers.qp(S,-q,G,h,A,b)["x"] 127 | kelly_optimization["weights"] = np.array([portfolio_weights[i] for i in range(n)]) 128 | return kelly_optimization 129 | 130 | 131 | def optimize_portfolio(self): 132 | optimization = {} 133 | 134 | n = self.n 135 | S = matrix(2 * self.statistics["covariance"]) 136 | expected_returns = matrix(self.statistics["expected_asset_returns"]) 137 | G, h, A, b = self.optimization_constraint_matrices() 138 | 139 | mu_array = [10**(5.0*t/100-1.0) for t in range(100)] 140 | 141 | portfolio_weights = [solvers.qp(mu*S,-expected_returns,G,h,A,b)["x"] for mu in mu_array] 142 | returns = [dot(expected_returns,w) for w in portfolio_weights] 143 | risk = [np.sqrt(dot(w,S*w)) for w in portfolio_weights] 144 | 145 | # Calculate the portfolio with the greatest "reward-to-risk" ratio, which 146 | # is Sharpe's ratio. Notice that it is not necessary to specify the risk 147 | # free rate in the calculation of Sharpe's ratio, as without loss of generality 148 | # it may be assumed to be zero. In either case, the same portfolio will 149 | # achieve the maximum. However, since the risk free asset defaults to a 150 | # Treasury bill, we take no action regarding this observation. 151 | mu_free = self.risk_free.statistics["expected_return"] 152 | sharpe_ratio = (returns - mu_free) / risk 153 | max_sharpe_index = sharpe_ratio == max(sharpe_ratio) 154 | min_variance_index = risk == min(risk) 155 | 156 | optimization["returns"] = returns 157 | optimization["risk"] = risk 158 | 159 | # If possible, try to decrease the number of for loops used to extract the 160 | # optimal weights of the portfolio. At the time of writing this, it seems 161 | # that the matrix data structure is somewhat bizarre. Therefore, in order to 162 | # generate the desired numpy array object, so many for loops turned out to 163 | # be necessary. 164 | max_sharpe_weights = [portfolio_weights[i] for i in range(len(portfolio_weights)) if max_sharpe_index[i]] 165 | min_variance_weights = [portfolio_weights[i] for i in range(len(portfolio_weights)) if min_variance_index[i]] 166 | optimization["max_sharpe_weights"] = np.zeros((n,1)) 167 | optimization["min_variance_weights"] = np.zeros((n,1)) 168 | 169 | for i in range(len(max_sharpe_weights[0])): 170 | optimization["max_sharpe_weights"][i] = max_sharpe_weights[0][i] 171 | for i in range(len(min_variance_weights[0])): 172 | optimization["min_variance_weights"][i] = min_variance_weights[0][i] 173 | 174 | return optimization 175 | 176 | def optimization_constraint_matrices(self): 177 | n = self.n 178 | G = matrix(0.0, (n,n)) 179 | G[::n+1] = -1.0 180 | h = matrix(0.0, (n,1)) 181 | A = matrix(1.0, (1,n)) 182 | b = matrix(1.0) 183 | 184 | return G, h, A, b 185 | 186 | -------------------------------------------------------------------------------- /stock.py: -------------------------------------------------------------------------------- 1 | # Stock.py: A Python class representing a stock as downloaded from Yahoo 2 | # Finance! A stock object is instantiated by specifying a ticker 3 | # and, optionally, a start and end date in the format "YYYY/MM/DD". 4 | # 5 | # The stock object is completely specified by its ticker and a pair of 6 | # dates across which is aggregates financial data. The stock class then 7 | # computes the returns, the expected return, and the gross return based on 8 | # daily price information. 9 | # 10 | # The stock class supports operations to calculate the value-at-risk, and 11 | # utility functions to graph the daily prices. 12 | # 13 | # The following is an example usage of the stock class to download 14 | # historical stock information from Google over a specified period: 15 | # date_range = {"start" : "2012-01-03", "end" : "2013-01-08"} 16 | # ticker = "GOOG" 17 | # stock = Stock(ticker,date_range) 18 | # stock.display_price() 19 | # print stock 20 | 21 | import numpy as np 22 | from urllib2 import Request, urlopen 23 | from urllib import urlencode 24 | import matplotlib.pyplot as plt 25 | import matplotlib.dates as mdates 26 | import datetime 27 | from scipy import stats 28 | 29 | class Stock(object): 30 | def __init__(self,ticker,date_range = None,position = None): 31 | self.ticker = ticker 32 | self.position = position if position is not None else None 33 | 34 | if date_range is not None: 35 | self.date_range = date_range 36 | else: 37 | # If there was no specified time interval, presume that the 38 | # user intends to download historical price data from the 39 | # past year. Notice that the end of the time interval is 40 | # today, while the start is one year in the past. 41 | end = datetime.datetime.now().strftime("%Y-%m-%d") 42 | start = (datetime.datetime.now() - datetime.timedelta(days = 365)).strftime("%Y-%m-%d") 43 | self.date_range = {"start" : start, "end" : end} 44 | 45 | try: 46 | self.profile = self.yahoo_download_daily() 47 | self.statistics = self.calculate_statistics() 48 | except: 49 | print "Invalid ticker symbol specified or else there was not an internet connection available." 50 | 51 | 52 | def __str__(self): 53 | print_string = "Ticker: " + self.ticker + "\n" 54 | print_string += "Time series: From " + self.date_range["start"] + " to " + self.date_range["end"] + "\n\n" 55 | print_string += "Current performance:\n" 56 | print_string += "Date\t\tOpen\tHigh\tLow\tClose\tVolume\t\tAdjusted Close\n" 57 | 58 | current_date = sorted(self.profile.keys())[-1] 59 | current_performance = self.profile[current_date] 60 | 61 | print_string += "%s\t%.2f\t%.2f\t%.2f\t%.2f\t%7e\t%.2f\n\n" % (current_date, 62 | float(current_performance["Open"]), 63 | float(current_performance["High"]), 64 | float(current_performance["Low"]), 65 | float(current_performance["Close"]), 66 | int(current_performance["Volume"]), 67 | float(current_performance["Adj Close"]) 68 | ) 69 | print_string += "Expected return: %.4f" % self.statistics["expected_return"] 70 | return print_string 71 | 72 | def calculate_statistics(self): 73 | statistics = {} 74 | closing_prices = np.array( 75 | [np.float(self.profile[day]["Close"]) for day in self.profile.keys()] 76 | ) 77 | 78 | # Occasionally, values of zero are obtained as an asset price. In all likelihood, this 79 | # value is rubbish and cannot be trusted, as it implies that the asset has no value. 80 | # In these cases, we replace the reported asset price by the mean of all asset prices. 81 | closing_prices[closing_prices == 0] = np.mean(closing_prices) 82 | 83 | # Calculate the daily returns on the stock option. These calculation is 84 | # defined by the formula: 85 | # R_t = (P_t / P_{t - 1}) - 1 86 | # Refer to page five of Statistics and Data Analysis for Financial 87 | # Engineering. For the expected return, we simply take the mean value of 88 | # the calculated daily returns. 89 | statistics["returns"] = closing_prices[1:] / closing_prices[:-1] - 1 90 | statistics["log_returns"] = np.log(statistics["returns"] + 1) 91 | 92 | # Multiply the average daily return by the length of the time series in order to 93 | # obtain the expected return over the entire period. 94 | statistics["expected_daily_return"] = np.mean(statistics["returns"]) 95 | statistics["expected_return"] = statistics["expected_daily_return"] * len(statistics["returns"]) 96 | 97 | return statistics 98 | 99 | def calculate_parametric_risk(self,alpha,position = None): 100 | 101 | if position is None and self.position is not None: 102 | position = self.position 103 | elif position is None and self.position is None: 104 | print "Either specify a position for the stock object or provide one as an input parameter." 105 | return np.nan 106 | 107 | returns = self.statistics["returns"] 108 | 109 | # Fit a t-distribution to the daily returns data using the 110 | # method of maximum likelihood estimation. 111 | tdof, tloc, tscale = stats.t.fit(returns) 112 | quantile = stats.t.ppf(alpha, tdof, tloc, tscale) 113 | 114 | # Assuming that returns are i.i.d. with a t-distribution, it 115 | # can be shown that value-at-risk is calculated as: 116 | # VaR_t(alpha) = -S * {mu + q_{alpha}(nu) * lambda} 117 | # Is this formula, S refers to the size of the position. The 118 | # parameters mu, lambda, and scale are the estimated mean, 119 | # scale, and degrees of freedom of the sample returns. The 120 | # parameter q_{alpha}(nu) is the alpha-quantile of a 121 | # t-distribution with nu degrees of freedom. Refer to page 122 | # 510 in Statistics and Data Analysis for Financial 123 | # Engineering. 124 | value_at_risk = -position * (tloc + quantile * tscale) 125 | return value_at_risk 126 | 127 | def asset_closing_prices(self,array = False): 128 | sorted_dates = sorted(self.profile.keys()) 129 | closing_prices = [np.float(self.profile[day]["Close"]) for day in sorted_dates] 130 | return np.array(closing_prices) if array else closing_prices 131 | 132 | def display_price(self): 133 | sorted_dates = sorted(self.profile.keys()) 134 | plt.plot_date([mdates.strpdate2num('%Y-%m-%d')(day) for day in sorted_dates], 135 | self.asset_closing_prices(), 136 | fmt="k-o") 137 | plt.title(self.ticker + " Closing Prices") 138 | plt.ylabel("Daily Prices") 139 | plt.xlabel("Historical Dates") 140 | plt.grid(True) 141 | plt.show() 142 | 143 | def yahoo_download_daily(self): 144 | # Stocks are defined over a range of time, with a beginning and an end 145 | # date. We use these dates to query yahoo Finance! for the relevant 146 | # historical price data. 147 | start_date = self.date_range["start"] 148 | end_date = self.date_range["end"] 149 | 150 | 151 | # Encode the query parameters to be used in the GET request to yahoo 152 | # Finance! 153 | yahoo = {} 154 | yahoo["parameters"] = urlencode({ 155 | "s": self.ticker, 156 | "a": int(start_date[5:7]) - 1, 157 | "b": int(start_date[8:10]), 158 | "c": int(start_date[0:4]), 159 | "d": int(end_date[5:7]) - 1, 160 | "e": int(end_date[8:10]), 161 | "f": int(end_date[0:4]), 162 | "g": "d", 163 | "ignore": ".csv", 164 | }) 165 | yahoo["url"] = "http://ichart.yahoo.com/table.csv?%s" % yahoo["parameters"] 166 | yahoo["query"] = Request(yahoo["url"]) 167 | yahoo["response"] = urlopen(yahoo["query"]) 168 | yahoo["content"] = str(yahoo["response"].read().decode("utf-8").strip()) 169 | 170 | daily_data = yahoo["content"].splitlines() 171 | historical_data = {} 172 | keys = daily_data[0].split(",") 173 | 174 | # For every day, create an entry in a dictionary of dates with the trading 175 | # volume, the closing price, the opening price, the high and the low price, 176 | # and the adjusted closing price. The data structure representing the 177 | # historical price data is as follows: 178 | # 'YYYY-MM-DD': {'Adj Close': 'float', 179 | # 'Close': 'float', 180 | # 'High': 'float', 181 | # 'Low': 'float', 182 | # 'Open': 'float', 183 | # 'Volume': 'int' 184 | # } 185 | for day in daily_data[1:]: 186 | day_data = day.split(",") 187 | date = day_data[0] 188 | historical_data[date] = { 189 | keys[1]: day_data[1], 190 | keys[2]: day_data[2], 191 | keys[3]: day_data[3], 192 | keys[4]: day_data[4], 193 | keys[5]: day_data[5], 194 | keys[6]: day_data[6] 195 | } 196 | return historical_data 197 | 198 | --------------------------------------------------------------------------------