├── .gitignore ├── requirements.txt ├── README.md ├── .github └── workflows │ └── python-app.yml ├── models.py ├── metrics.py ├── multivariate_models.py ├── data.py └── test.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .vscode/settings.json 3 | *.csv 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pandas 2 | numpy 3 | scipy 4 | arch 5 | gdown 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Modeling Tails of Distributions by Importance Sampling: Application in Risk-Management 2 | 3 | This repository contains code for the project "Modeling Tails of Distributions by Importance Sampling: Application in Risk-Management". The aim of the project is to estimate Value at Risk (VaR) and Expected Shortfall (ES) risk measurements of a financial portfolio by the Importance Sampling technique. 4 | -------------------------------------------------------------------------------- /.github/workflows/python-app.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python application 5 | 6 | on: 7 | push: 8 | pull_request: 9 | 10 | permissions: 11 | contents: read 12 | 13 | jobs: 14 | build: 15 | 16 | runs-on: ubuntu-latest 17 | 18 | steps: 19 | - uses: actions/checkout@v3 20 | - name: Set up Python 3.10 21 | uses: actions/setup-python@v3 22 | with: 23 | python-version: "3.10" 24 | - name: Install dependencies 25 | run: | 26 | python -m pip install --upgrade pip 27 | pip install flake8 pytest 28 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 29 | - name: Lint with flake8 30 | run: | 31 | # stop the build if there are Python syntax errors or undefined names 32 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 33 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 34 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 35 | - name: Test with pytest 36 | run: | 37 | pytest test.py 38 | -------------------------------------------------------------------------------- /models.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from arch import arch_model 4 | from scipy.stats import norm 5 | 6 | 7 | class RiskMetrics: 8 | ''' 9 | Longerstaey, Jacques, and Martin Spencer. "Riskmetricstm—technical 10 | document." Morgan Guaranty Trust Company of New York: New York 51 11 | (1996): 54. 12 | ''' 13 | def __init__(self, alpha): 14 | self.alpha = alpha 15 | self.lambd = 0.94 16 | self.window_size = 74 17 | 18 | def forecast(self, feat): 19 | sigma2 = 0 20 | for r in feat[-self.window_size:]: 21 | sigma2 = self.lambd * sigma2 + (1 - self.lambd) * r**2 22 | return norm.ppf(1 - self.alpha, scale=sigma2**0.5) 23 | 24 | 25 | class HistoricalSimulation: 26 | def __init__(self, alpha, window_size): 27 | self.alpha = alpha 28 | self.window_size = window_size 29 | 30 | def forecast(self, feat): 31 | return np.quantile(feat[-self.window_size:], q=1-self.alpha) 32 | 33 | 34 | class GARCH11: 35 | def __init__(self, alpha, window_size): 36 | self.alpha = alpha 37 | self.window_size = window_size 38 | 39 | def forecast(self, feat): 40 | model = arch_model(feat[-self.window_size:], p=1, q=1, rescale=False) 41 | res = model.fit(disp='off') 42 | sigma2 = res.forecast(horizon=1, reindex=False).variance.values[0, 0] 43 | return norm.ppf(1 - self.alpha, scale=sigma2**0.5) 44 | -------------------------------------------------------------------------------- /metrics.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from scipy.stats import chi2 4 | 5 | 6 | def pof_test(var, target, alpha=0.99): 7 | exception = target < var 8 | t = len(target) 9 | m = exception.sum() 10 | nom = (1 - alpha)**m * alpha**(t-m) 11 | den = (1 - m/t)**(t - m) * (m / t)**m 12 | lr_pof = -2 * np.log(nom / den) 13 | pvalue = 1 - chi2.cdf(lr_pof, df=1) 14 | return pvalue 15 | 16 | 17 | def if_test(var, target): 18 | exception = target < var 19 | pairs = [(exception[i], exception[i+1]) for i in range(len(exception) - 1)] 20 | pairs = np.array(pairs).astype('int') 21 | n00 = ((pairs[:, 0] == 0) & (pairs[:, 1] == 0)).sum() 22 | n01 = ((pairs[:, 0] == 0) & (pairs[:, 1] == 1)).sum() 23 | n10 = ((pairs[:, 0] == 1) & (pairs[:, 1] == 0)).sum() 24 | n11 = ((pairs[:, 0] == 1) & (pairs[:, 1] == 1)).sum() 25 | pi = (n01 + n11) / (n00 + n01 + n10 + n11) 26 | pi0 = n01 / (n00 + n01) 27 | pi1 = n11 / (n10 + n11) 28 | nom = (1 - pi)**(n00 + n10) * pi**(n01 + n11) 29 | den = (1 - pi0)**n00 * pi0**n01 * (1 - pi1)**n10 * pi1**n11 30 | lr_if = -2 * np.log(nom / den) 31 | pvalue = 1 - chi2.cdf(lr_if, df=1) 32 | return pvalue 33 | 34 | 35 | def quantile_loss(var, target, alpha=0.99): 36 | qloss = np.abs(var-target) 37 | qloss[target < var] = qloss[target < var] * 2 * alpha 38 | qloss[target >= var] = qloss[target >= var] * 2 * (1 - alpha) 39 | return qloss.mean() 40 | -------------------------------------------------------------------------------- /multivariate_models.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import gdown 3 | import numpy as np 4 | from scipy.stats import norm 5 | 6 | def multivariate_var(tickers, # list of tickers 7 | weights, # list of weights 8 | from_date, 9 | to_date, 10 | initial_investment, # value of initial investment (integer or float) 11 | alpha, # alpha, where 1 - alpha = confidence level 12 | n # number of days for n-days VaR calculation 13 | ): 14 | # converting weights to array 15 | weights = np.array(weights) 16 | # import returns data 17 | url = 'https://drive.google.com/file/d/1lLQV4oc30mo1_m39p4JXlpd1gV6pLw6A/view?usp=sharing' 18 | gdown.download(url, 'stocks.csv', fuzzy=True) 19 | data = pd.read_csv('stocks.csv', index_col=0)[tickers] 20 | data.index = pd.to_datetime(data.index) 21 | from_mask = data.index >= pd.to_datetime(from_date) 22 | to_mask = data.index <= pd.to_datetime(to_date) 23 | data = data[from_mask & to_mask] 24 | returns = data.pct_change() 25 | 26 | # generate covariance matrix 27 | cov_matrix = returns.cov() 28 | 29 | # calculate mean and standard deviation 30 | port_mean = returns.mean().dot(weights) 31 | mean_investment = (1 + port_mean) * initial_investment 32 | port_stdev = np.sqrt(weights.T.dot(cov_matrix).dot(weights)) 33 | stdev_investment = initial_investment * port_stdev 34 | 35 | # determine confidence level cutoff from the normal distribution 36 | cutoff = norm.ppf(alpha, mean_investment, stdev_investment) 37 | 38 | # calculate daily VaR 39 | VaR = initial_investment - cutoff 40 | 41 | # calculate n-days VaR 42 | VaR_n_days = np.round(VaR * np.sqrt(n), 2) 43 | 44 | return VaR_n_days 45 | -------------------------------------------------------------------------------- /data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import gdown 4 | 5 | class Dataloader: 6 | def __init__( 7 | self, 8 | series: pd.Series, 9 | window_size: int, 10 | step_size: int, 11 | horizon: int, 12 | first_pred: int 13 | ): 14 | self.series = series 15 | self.window_size = window_size 16 | self.step_size = step_size 17 | self.horizon = horizon 18 | self.first_pred = first_pred 19 | assert self.first_pred > self.window_size 20 | feat_idx = [] 21 | target_idx = [] 22 | for i in range(self.first_pred, self.series.shape[0], self.step_size): 23 | feat_idx.append(range(i-self.horizon-self.window_size+1, i-self.horizon+1)) 24 | target_idx.append(i) 25 | self.feat_idx = feat_idx 26 | self.target_idx = target_idx 27 | 28 | def __len__(self): 29 | return len(self.feat_idx) 30 | 31 | def __iter__(self): 32 | self.iter = 0 33 | return self 34 | 35 | def __next__(self): 36 | if self.iter < len(self.feat_idx): 37 | feat = self.series.iloc[self.feat_idx[self.iter]] 38 | target = self.series.iloc[self.target_idx[self.iter]] 39 | self.iter += 1 40 | return feat, target 41 | else: 42 | raise StopIteration 43 | 44 | 45 | def _get_returns(data, assets, weights, from_date, to_date): 46 | data.index = pd.to_datetime(data.index) 47 | portfolio = (data[assets] * weights).sum(axis=1) 48 | from_mask = portfolio.index >= pd.to_datetime(from_date) 49 | to_mask = portfolio.index <= pd.to_datetime(to_date) 50 | return (portfolio / portfolio.shift() - 1)[from_mask & to_mask] 51 | 52 | 53 | def stocks_returns(assets, weights, from_date, to_date): 54 | url = 'https://drive.google.com/file/d/1lLQV4oc30mo1_m39p4JXlpd1gV6pLw6A/view?usp=sharing' 55 | gdown.download(url, 'stocks.csv', fuzzy=True) 56 | data = pd.read_csv('stocks.csv', index_col=0) 57 | return _get_returns(data, assets, weights, from_date, to_date) 58 | 59 | 60 | def commodities_returns(assets, weights, from_date, to_date): 61 | url = 'https://drive.google.com/file/d/1GFq1jcV00BjFEa7hmZSO1xD7K4j4gv3O/view?usp=sharing' 62 | gdown.download(url, 'commodities.csv', fuzzy=True) 63 | data = pd.read_csv('commodities.csv', index_col=0) 64 | return _get_returns(data, assets, weights, from_date, to_date) 65 | 66 | 67 | def cryptocurrencies_returns(assets, weights, from_date, to_date): 68 | url = 'https://drive.google.com/file/d/1mPP5Vb57Jc2mYPeLYZPgAJM8ogjiguSO/view?usp=sharing' 69 | gdown.download(url, 'cryptocurrencies.csv', fuzzy=True) 70 | data = pd.read_csv('cryptocurrencies.csv', index_col=0) 71 | return _get_returns(data, assets, weights, from_date, to_date) 72 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from data import ( 4 | stocks_returns, 5 | commodities_returns, 6 | cryptocurrencies_returns, 7 | Dataloader 8 | ) 9 | from metrics import pof_test, if_test, quantile_loss 10 | from models import HistoricalSimulation, RiskMetrics 11 | 12 | class TestData: 13 | def test_stocks_returns(self): 14 | assets = ['AAPL'] 15 | weights = [1.] 16 | returns = stocks_returns(assets, weights, from_date='09/02/2022', to_date='09/07/2022') 17 | test_returns = pd.Series( 18 | data=[-0.0136, -0.0082, 0.0093], 19 | index=pd.to_datetime(['09/02/2022', '09/06/2022', '09/07/2022']), 20 | ) 21 | assert np.allclose(returns, test_returns, atol=0.0001) 22 | 23 | assets = ['AAPL', 'GOOGL'] 24 | weights = [0.3, 0.7] 25 | returns = stocks_returns(assets, weights, from_date='09/02/2022', to_date='09/07/2022') 26 | test_returns = pd.Series( 27 | data=[-0.0158, -0.0091, 0.0188], 28 | index=pd.to_datetime(['09/02/2022', '09/06/2022', '09/07/2022']), 29 | ) 30 | assert np.allclose(returns, test_returns, atol=0.0001) 31 | 32 | assets = ['AAPL', 'AMD', 'AMZN', 'GOOGL', 'INTC', 'META', 'MSFT', 'MU', 'NVDA', 'TSLA'] 33 | weights = np.ones(10) 34 | returns = stocks_returns(assets, weights, from_date='09/02/2022', to_date='09/07/2022') 35 | test_returns = pd.Series( 36 | data=[-0.0193, -0.0068, 0.0196], 37 | index=pd.to_datetime(['09/02/2022', '09/06/2022', '09/07/2022']), 38 | ) 39 | assert np.allclose(returns, test_returns, atol=0.0001) 40 | 41 | def test_commodities_returns(self): 42 | assets = [ 43 | 'Brent Oil', 'Crude Oil WTI', 'Natural Gas', 44 | 'Heating Oil', 'Gold', 'Silver', 'Copper', 45 | 'US Coffee C', 'US Corn' 46 | ] 47 | weights = np.ones(9) 48 | returns = commodities_returns(assets, weights, from_date='09/02/2022', to_date='09/07/2022') 49 | test_returns = pd.Series( 50 | data=[ 0.0075, 0.001 , -0.0021], 51 | index=pd.to_datetime(['2022-09-02', '2022-09-06', '2022-09-07']), 52 | ) 53 | assert np.allclose(returns, test_returns, atol=0.0001) 54 | 55 | def test_cryptocurrencies_returns(self): 56 | assets = ['ADA', 'BNB', 'BTC', 'BUSD', 'DOGE', 'ETH', 'USDC', 'USDT', 'XRP'] 57 | weights = np.ones(9) 58 | returns = cryptocurrencies_returns(assets, weights, from_date='09/02/2022', to_date='09/07/2022') 59 | test_returns = pd.Series( 60 | data=[-0.0076, -0.0072, 0.0081, -0.0063, -0.0481, 0.026 ], 61 | index=pd.to_datetime(['2022-09-02', '2022-09-03', '2022-09-04', '2022-09-05', '2022-09-06', '2022-09-07']), 62 | ) 63 | assert np.allclose(returns, test_returns, atol=0.0001) 64 | 65 | 66 | class TestMetrics: 67 | def test_quantile_loss(self): 68 | np.random.seed(0) 69 | target = np.random.randn(10) 70 | var = np.ones(10) 71 | assert np.isclose(quantile_loss(var, target, alpha=0.9), 1.0461, atol=0.0001) 72 | assert np.isclose(quantile_loss(var, target, alpha=0.1), 0.6269, atol=0.0001) 73 | 74 | def test_pof_test(self): 75 | np.random.seed(0) 76 | target = np.random.randn(10) 77 | var = -np.ones(10) * 2 78 | assert np.isclose(pof_test(var, target, alpha=0.95), 0.3111, atol=0.0001) 79 | 80 | def test_if_test(self): 81 | np.random.seed(0) 82 | target = np.random.randn(1000) 83 | var = -np.ones(1000) * 2 84 | assert np.isclose(if_test(var, target), 0.2770, atol=0.0001) 85 | 86 | 87 | class TestModels: 88 | def setup_class(self): 89 | assets = ['AAPL', 'GOOGL'] 90 | weights = [0.3, 0.7] 91 | returns = stocks_returns(assets, weights, from_date='09/02/2020', to_date='09/02/2022') 92 | logreturns = np.log(returns + 1) 93 | self.loader = Dataloader( 94 | series=logreturns, 95 | window_size=125, # a half of trading year 96 | step_size=1, 97 | horizon=1, 98 | first_pred=125+1 99 | ) 100 | 101 | def test_historical_simulation(self): 102 | alpha = 0.95 103 | hs = HistoricalSimulation(alpha, window_size=125) 104 | var = [] 105 | target = [] 106 | for feat, _target in self.loader: 107 | var.append(hs.forecast(feat)) 108 | target.append(_target) 109 | var = np.array(var) 110 | target = np.array(target) 111 | assert np.isclose(quantile_loss(var, target, alpha), 0.0038, atol=0.0001) 112 | assert if_test(var, target) > 0.05 113 | 114 | def test_riskmetrics(self): 115 | alpha = 0.95 116 | rm = RiskMetrics(alpha) 117 | var = [] 118 | target = [] 119 | for feat, _target in self.loader: 120 | var.append(rm.forecast(feat)) 121 | target.append(_target) 122 | var = np.array(var) 123 | target = np.array(target) 124 | assert np.isclose(quantile_loss(var, target, alpha), 0.0036, atol=0.0001) 125 | assert pof_test(var, target, alpha) > 0.05 126 | --------------------------------------------------------------------------------