├── .gitignore
├── requirements.txt
├── README.md
├── .github
    └── workflows
    │   └── python-app.yml
├── models.py
├── metrics.py
├── multivariate_models.py
├── data.py
└── test.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | .vscode/settings.json
3 | *.csv
4 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas
2 | numpy
3 | scipy
4 | arch
5 | gdown
6 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Modeling Tails of Distributions by Importance Sampling: Application in Risk-Management
2 | 
3 | This repository contains code for the project "Modeling Tails of Distributions by Importance Sampling: Application in Risk-Management". The aim of the project is to estimate Value at Risk (VaR) and Expected Shortfall (ES) risk measurements of a financial portfolio by the Importance Sampling technique.
4 | 


--------------------------------------------------------------------------------
/.github/workflows/python-app.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Python application
 5 | 
 6 | on:
 7 |   push:
 8 |   pull_request:
 9 | 
10 | permissions:
11 |   contents: read
12 | 
13 | jobs:
14 |   build:
15 | 
16 |     runs-on: ubuntu-latest
17 | 
18 |     steps:
19 |     - uses: actions/checkout@v3
20 |     - name: Set up Python 3.10
21 |       uses: actions/setup-python@v3
22 |       with:
23 |         python-version: "3.10"
24 |     - name: Install dependencies
25 |       run: |
26 |         python -m pip install --upgrade pip
27 |         pip install flake8 pytest
28 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
29 |     - name: Lint with flake8
30 |       run: |
31 |         # stop the build if there are Python syntax errors or undefined names
32 |         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
33 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
34 |         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
35 |     - name: Test with pytest
36 |       run: |
37 |         pytest test.py
38 | 


--------------------------------------------------------------------------------
/models.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | from arch import arch_model
 4 | from scipy.stats import norm
 5 | 
 6 | 
 7 | class RiskMetrics:
 8 |     '''
 9 |     Longerstaey, Jacques, and Martin Spencer. "Riskmetricstm—technical
10 |     document." Morgan Guaranty Trust Company of New York: New York 51
11 |     (1996): 54.
12 |     '''
13 |     def __init__(self, alpha):
14 |         self.alpha = alpha
15 |         self.lambd = 0.94
16 |         self.window_size = 74
17 | 
18 |     def forecast(self, feat):
19 |         sigma2 = 0
20 |         for r in feat[-self.window_size:]:
21 |             sigma2 = self.lambd * sigma2 + (1 - self.lambd) * r**2
22 |         return norm.ppf(1 - self.alpha, scale=sigma2**0.5)
23 | 
24 | 
25 | class HistoricalSimulation:
26 |     def __init__(self, alpha, window_size):
27 |         self.alpha = alpha
28 |         self.window_size = window_size
29 | 
30 |     def forecast(self, feat):
31 |         return np.quantile(feat[-self.window_size:], q=1-self.alpha)
32 | 
33 | 
34 | class GARCH11:
35 |     def __init__(self, alpha, window_size):
36 |         self.alpha = alpha
37 |         self.window_size = window_size
38 | 
39 |     def forecast(self, feat):
40 |         model = arch_model(feat[-self.window_size:], p=1, q=1, rescale=False)
41 |         res = model.fit(disp='off')
42 |         sigma2 = res.forecast(horizon=1, reindex=False).variance.values[0, 0]
43 |         return norm.ppf(1 - self.alpha, scale=sigma2**0.5)
44 | 


--------------------------------------------------------------------------------
/metrics.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | from scipy.stats import chi2
 4 | 
 5 | 
 6 | def pof_test(var, target, alpha=0.99):
 7 |     exception = target < var
 8 |     t = len(target)
 9 |     m = exception.sum()
10 |     nom = (1 - alpha)**m * alpha**(t-m)
11 |     den = (1 - m/t)**(t - m) * (m / t)**m
12 |     lr_pof = -2 * np.log(nom / den)
13 |     pvalue = 1 - chi2.cdf(lr_pof, df=1)
14 |     return pvalue
15 | 
16 | 
17 | def if_test(var, target):
18 |     exception = target < var
19 |     pairs = [(exception[i], exception[i+1]) for i in range(len(exception) - 1)]
20 |     pairs = np.array(pairs).astype('int')
21 |     n00 = ((pairs[:, 0] == 0) & (pairs[:, 1] == 0)).sum()
22 |     n01 = ((pairs[:, 0] == 0) & (pairs[:, 1] == 1)).sum()
23 |     n10 = ((pairs[:, 0] == 1) & (pairs[:, 1] == 0)).sum()
24 |     n11 = ((pairs[:, 0] == 1) & (pairs[:, 1] == 1)).sum()
25 |     pi = (n01 + n11) / (n00 + n01 + n10 + n11)
26 |     pi0 = n01 / (n00 + n01)
27 |     pi1 = n11 / (n10 + n11)
28 |     nom = (1 - pi)**(n00 + n10) * pi**(n01 + n11)
29 |     den = (1 - pi0)**n00 * pi0**n01 * (1 - pi1)**n10 * pi1**n11
30 |     lr_if = -2 * np.log(nom / den)
31 |     pvalue = 1 - chi2.cdf(lr_if, df=1)
32 |     return pvalue
33 | 
34 | 
35 | def quantile_loss(var, target, alpha=0.99):
36 |     qloss = np.abs(var-target)
37 |     qloss[target < var] = qloss[target < var] * 2 * alpha
38 |     qloss[target >= var] = qloss[target >= var] * 2 * (1 - alpha)
39 |     return qloss.mean()
40 | 


--------------------------------------------------------------------------------
/multivariate_models.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import gdown
 3 | import numpy as np
 4 | from scipy.stats import norm
 5 | 
 6 | def multivariate_var(tickers, # list of tickers
 7 |                      weights, # list of weights
 8 |                      from_date,
 9 |                      to_date,
10 |                      initial_investment, # value of initial investment (integer or float)
11 |                      alpha, # alpha, where 1 - alpha = confidence level
12 |                      n # number of days for n-days VaR calculation
13 | ):
14 |     # converting weights to array
15 |     weights = np.array(weights)
16 |     # import returns data
17 |     url = 'https://drive.google.com/file/d/1lLQV4oc30mo1_m39p4JXlpd1gV6pLw6A/view?usp=sharing'
18 |     gdown.download(url, 'stocks.csv', fuzzy=True)
19 |     data = pd.read_csv('stocks.csv', index_col=0)[tickers]
20 |     data.index = pd.to_datetime(data.index)
21 |     from_mask = data.index >= pd.to_datetime(from_date)
22 |     to_mask = data.index <= pd.to_datetime(to_date)
23 |     data = data[from_mask & to_mask]
24 |     returns = data.pct_change()
25 | 
26 |     # generate covariance matrix
27 |     cov_matrix = returns.cov()
28 | 
29 |     # calculate mean and standard deviation
30 |     port_mean = returns.mean().dot(weights)
31 |     mean_investment = (1 + port_mean) * initial_investment
32 |     port_stdev = np.sqrt(weights.T.dot(cov_matrix).dot(weights))
33 |     stdev_investment = initial_investment * port_stdev
34 | 
35 |     # determine confidence level cutoff from the normal distribution
36 |     cutoff = norm.ppf(alpha, mean_investment, stdev_investment)
37 | 
38 |     # calculate daily VaR
39 |     VaR = initial_investment - cutoff
40 | 
41 |     # calculate n-days VaR
42 |     VaR_n_days = np.round(VaR * np.sqrt(n), 2)
43 | 
44 |     return VaR_n_days
45 | 


--------------------------------------------------------------------------------
/data.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import gdown
 4 | 
 5 | class Dataloader:
 6 |     def __init__(
 7 |             self,
 8 |             series: pd.Series,
 9 |             window_size: int,
10 |             step_size: int,
11 |             horizon: int,
12 |             first_pred: int
13 |     ):
14 |         self.series = series
15 |         self.window_size = window_size
16 |         self.step_size = step_size
17 |         self.horizon = horizon
18 |         self.first_pred = first_pred
19 |         assert self.first_pred > self.window_size
20 |         feat_idx = []
21 |         target_idx = []
22 |         for i in range(self.first_pred, self.series.shape[0], self.step_size):
23 |             feat_idx.append(range(i-self.horizon-self.window_size+1, i-self.horizon+1))
24 |             target_idx.append(i)
25 |         self.feat_idx = feat_idx
26 |         self.target_idx = target_idx
27 | 
28 |     def __len__(self):
29 |         return len(self.feat_idx)
30 | 
31 |     def __iter__(self):
32 |         self.iter = 0
33 |         return self
34 | 
35 |     def __next__(self):
36 |         if self.iter < len(self.feat_idx):
37 |             feat = self.series.iloc[self.feat_idx[self.iter]]
38 |             target = self.series.iloc[self.target_idx[self.iter]]
39 |             self.iter += 1
40 |             return feat, target
41 |         else:
42 |             raise StopIteration
43 | 
44 | 
45 | def _get_returns(data, assets, weights, from_date, to_date):
46 |     data.index = pd.to_datetime(data.index)
47 |     portfolio = (data[assets] * weights).sum(axis=1)
48 |     from_mask = portfolio.index >= pd.to_datetime(from_date)
49 |     to_mask = portfolio.index <= pd.to_datetime(to_date)
50 |     return (portfolio / portfolio.shift() - 1)[from_mask & to_mask]
51 | 
52 | 
53 | def stocks_returns(assets, weights, from_date, to_date):
54 |     url = 'https://drive.google.com/file/d/1lLQV4oc30mo1_m39p4JXlpd1gV6pLw6A/view?usp=sharing'
55 |     gdown.download(url, 'stocks.csv', fuzzy=True)
56 |     data = pd.read_csv('stocks.csv', index_col=0)
57 |     return _get_returns(data, assets, weights, from_date, to_date)
58 | 
59 | 
60 | def commodities_returns(assets, weights, from_date, to_date):
61 |     url = 'https://drive.google.com/file/d/1GFq1jcV00BjFEa7hmZSO1xD7K4j4gv3O/view?usp=sharing'
62 |     gdown.download(url, 'commodities.csv', fuzzy=True)
63 |     data = pd.read_csv('commodities.csv', index_col=0)
64 |     return _get_returns(data, assets, weights, from_date, to_date)
65 | 
66 | 
67 | def cryptocurrencies_returns(assets, weights, from_date, to_date):
68 |     url = 'https://drive.google.com/file/d/1mPP5Vb57Jc2mYPeLYZPgAJM8ogjiguSO/view?usp=sharing'
69 |     gdown.download(url, 'cryptocurrencies.csv', fuzzy=True)
70 |     data = pd.read_csv('cryptocurrencies.csv', index_col=0)
71 |     return _get_returns(data, assets, weights, from_date, to_date)
72 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from data import (
  4 |     stocks_returns, 
  5 |     commodities_returns, 
  6 |     cryptocurrencies_returns, 
  7 |     Dataloader
  8 | )
  9 | from metrics import pof_test, if_test, quantile_loss
 10 | from models import HistoricalSimulation, RiskMetrics
 11 | 
 12 | class TestData:
 13 |     def test_stocks_returns(self):
 14 |         assets = ['AAPL']
 15 |         weights = [1.]
 16 |         returns = stocks_returns(assets, weights, from_date='09/02/2022', to_date='09/07/2022')
 17 |         test_returns = pd.Series(
 18 |             data=[-0.0136, -0.0082, 0.0093], 
 19 |             index=pd.to_datetime(['09/02/2022', '09/06/2022', '09/07/2022']),
 20 |         )
 21 |         assert np.allclose(returns, test_returns, atol=0.0001)
 22 |     
 23 |         assets = ['AAPL', 'GOOGL']
 24 |         weights = [0.3, 0.7]
 25 |         returns = stocks_returns(assets, weights, from_date='09/02/2022', to_date='09/07/2022')
 26 |         test_returns = pd.Series(
 27 |             data=[-0.0158, -0.0091, 0.0188], 
 28 |             index=pd.to_datetime(['09/02/2022', '09/06/2022', '09/07/2022']),
 29 |         )
 30 |         assert np.allclose(returns, test_returns, atol=0.0001)
 31 | 
 32 |         assets = ['AAPL', 'AMD', 'AMZN', 'GOOGL', 'INTC', 'META', 'MSFT', 'MU', 'NVDA', 'TSLA']
 33 |         weights = np.ones(10)
 34 |         returns = stocks_returns(assets, weights, from_date='09/02/2022', to_date='09/07/2022')
 35 |         test_returns = pd.Series(
 36 |             data=[-0.0193, -0.0068,  0.0196],
 37 |             index=pd.to_datetime(['09/02/2022', '09/06/2022', '09/07/2022']),
 38 |         )
 39 |         assert np.allclose(returns, test_returns, atol=0.0001)
 40 | 
 41 |     def test_commodities_returns(self):
 42 |         assets = [
 43 |             'Brent Oil', 'Crude Oil WTI', 'Natural Gas',
 44 |             'Heating Oil', 'Gold', 'Silver', 'Copper', 
 45 |             'US Coffee C', 'US Corn'
 46 |         ]
 47 |         weights = np.ones(9)
 48 |         returns = commodities_returns(assets, weights, from_date='09/02/2022', to_date='09/07/2022')
 49 |         test_returns = pd.Series(
 50 |             data=[ 0.0075,  0.001 , -0.0021],
 51 |             index=pd.to_datetime(['2022-09-02', '2022-09-06', '2022-09-07']),
 52 |         )
 53 |         assert np.allclose(returns, test_returns, atol=0.0001)
 54 | 
 55 |     def test_cryptocurrencies_returns(self):
 56 |         assets = ['ADA', 'BNB', 'BTC', 'BUSD', 'DOGE', 'ETH', 'USDC', 'USDT', 'XRP']
 57 |         weights = np.ones(9)
 58 |         returns = cryptocurrencies_returns(assets, weights, from_date='09/02/2022', to_date='09/07/2022')
 59 |         test_returns = pd.Series(
 60 |             data=[-0.0076, -0.0072,  0.0081, -0.0063, -0.0481,  0.026 ],
 61 |             index=pd.to_datetime(['2022-09-02', '2022-09-03', '2022-09-04', '2022-09-05', '2022-09-06', '2022-09-07']),
 62 |         )
 63 |         assert np.allclose(returns, test_returns, atol=0.0001)
 64 | 
 65 | 
 66 | class TestMetrics:
 67 |     def test_quantile_loss(self):
 68 |         np.random.seed(0)
 69 |         target = np.random.randn(10)
 70 |         var = np.ones(10)
 71 |         assert np.isclose(quantile_loss(var, target, alpha=0.9), 1.0461, atol=0.0001)
 72 |         assert np.isclose(quantile_loss(var, target, alpha=0.1), 0.6269, atol=0.0001)
 73 | 
 74 |     def test_pof_test(self):
 75 |         np.random.seed(0)
 76 |         target = np.random.randn(10)
 77 |         var = -np.ones(10) * 2
 78 |         assert np.isclose(pof_test(var, target, alpha=0.95), 0.3111, atol=0.0001)
 79 | 
 80 |     def test_if_test(self):
 81 |         np.random.seed(0)
 82 |         target = np.random.randn(1000)
 83 |         var = -np.ones(1000) * 2
 84 |         assert np.isclose(if_test(var, target), 0.2770, atol=0.0001)
 85 | 
 86 | 
 87 | class TestModels:
 88 |     def setup_class(self):
 89 |         assets = ['AAPL', 'GOOGL']
 90 |         weights = [0.3, 0.7]
 91 |         returns = stocks_returns(assets, weights, from_date='09/02/2020', to_date='09/02/2022')
 92 |         logreturns = np.log(returns + 1)
 93 |         self.loader =  Dataloader(
 94 |             series=logreturns,
 95 |             window_size=125, # a half of trading year
 96 |             step_size=1,
 97 |             horizon=1,
 98 |             first_pred=125+1
 99 |         )
100 | 
101 |     def test_historical_simulation(self):
102 |         alpha = 0.95
103 |         hs = HistoricalSimulation(alpha, window_size=125)
104 |         var = []
105 |         target = []
106 |         for feat, _target in self.loader:
107 |             var.append(hs.forecast(feat))
108 |             target.append(_target)
109 |         var = np.array(var)
110 |         target = np.array(target)
111 |         assert np.isclose(quantile_loss(var, target, alpha), 0.0038, atol=0.0001)
112 |         assert if_test(var, target) > 0.05
113 |     
114 |     def test_riskmetrics(self):
115 |         alpha = 0.95
116 |         rm = RiskMetrics(alpha)
117 |         var = []
118 |         target = []
119 |         for feat, _target in self.loader:
120 |             var.append(rm.forecast(feat))
121 |             target.append(_target)
122 |         var = np.array(var)
123 |         target = np.array(target)
124 |         assert np.isclose(quantile_loss(var, target, alpha), 0.0036, atol=0.0001)
125 |         assert pof_test(var, target, alpha) > 0.05
126 | 


--------------------------------------------------------------------------------