├── .gitignore ├── CODE_OF_CONDUCT.md ├── LICENSE ├── README.md ├── bisection_alpha ├── README.md ├── SWCalibrate.py ├── SWExtrapolate.py ├── SWHeart.py └── bisection_alpha.py ├── black_sholes ├── README.md └── simulate_black_scholes.py ├── correlated_brownian_motion ├── CorBM.py ├── LICENSE └── README.md ├── dothan_one_factor ├── Dothan_one_factor.py └── README.MD ├── hull_white_one_factor ├── LICENSE ├── README.md └── simulate_Hull_White_One_Factor.py ├── images ├── OSM_logo.jpeg ├── Open-source modelling-logos_transparent.png └── README.md ├── nelson_siegel_svansson ├── README.md ├── main.py └── nelsonsiegelsvensson.py ├── singular_spectrum_analysis ├── LICENSE ├── README.md ├── SSA_Example.ipynb ├── SSA_Example.pdf └── ssaBasic.py ├── smith_wilson ├── LICENSE ├── README.md ├── SWCalibrate.py ├── SWExtrapolate.py ├── SWHeart.py ├── Smith&Wilson_example.xlsx └── main.py ├── stationary_bootstrap ├── LICENSE ├── README.md ├── Stationary Bootstrap Italian Swap Example.ipynb ├── stationary_bootstrap.py ├── stationary_bootstrap_calibrate.py └── tests.py ├── stationary_bootstrap_calibration ├── LICENSE ├── README.md ├── stationary_bootstrap_calibrate.py └── tests_calibration.py ├── vasicek_one_factor ├── LICENSE ├── README.md ├── Vasicek_one_factor.py └── maximum_likelihood_validation.py └── vasicek_two_factor ├── Calibration.py ├── LICENSE ├── Pricing.py ├── README.md ├── Vasicek.py └── main.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, caste, color, religion, or sexual 10 | identity and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the overall 26 | community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or advances of 31 | any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email address, 35 | without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | gregor@osmodelling.com. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series of 86 | actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or permanent 93 | ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within the 113 | community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.1, available at 119 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. 120 | 121 | Community Impact Guidelines were inspired by 122 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC]. 123 | 124 | For answers to common questions about this code of conduct, see the FAQ at 125 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at 126 | [https://www.contributor-covenant.org/translations][translations]. 127 | 128 | [homepage]: https://www.contributor-covenant.org 129 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html 130 | [Mozilla CoC]: https://github.com/mozilla/diversity 131 | [FAQ]: https://www.contributor-covenant.org/faq 132 | [translations]: https://www.contributor-covenant.org/translations 133 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Actuarial Algorithms 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 | Logo 5 | 6 | 7 |
8 | 9 |

10 | 11 | 🐍 Actuarial models in Python 🐍 12 | 13 |

14 | 15 |
16 | 17 |

18 | Collection of useful models that actuaries can use to speed up their tasks. 19 |

20 | 21 | 22 | ## Algorithms available 23 | 24 | | Algorithm | Source | Description | 25 | | -------------------------| ----------------------------------- | ----------------------------------------------------------------------------- | 26 | | [Smith_Wilson] | [Technical-documentation] | Interpolation and extrapolation of missing interest rates. | 27 | | [Stationary_boot_calib] | [Whitepaper-2004] | Automatic calibration of the stationary bootstrap algorithm. | 28 | | [Stationary_bootstrap] | [Politis-Romano-1994] | Resampling procedure for weakly dependent stationary observations. | 29 | | [Calibration_of_alpha] | [Technical-documentation] | Calibration of the Smith & Wilson's alpha parameter. | 30 | | [Correlated Brownian] | [Wiki Brownian motion] | Simple function to generate correlated Brownian motion in multiple dimensions.| 31 | | [Nel_Si_Svansson] | [BIS whitepaper] | Nelson-Siegel-Svansson model for approximating the yield curve. | 32 | | [Black_Scholes] | [Wiki Black&Sholes] | Black&Scholes model for pricing option contracts. | 33 | | [Vasicek one factor] | [Wiki Vasicek] | Vasicek model for modelling the evolution of interest rates. | 34 | | [Vasicek two factor] | [Wiki Vasicek] | Vasicek model for modelling the evolution of a pair of interest rates. | 35 | | [1F Hull White] | [Wiki Hull White] | One factor Hull White model of short rates. | 36 | | [Dothan one factor] | [Quant Exchange] | One factor Dothan model of short rates. | 37 | | [Singular Spectrum analysis] | [Paper SSA] | Non-parametric technique used for time series analysis and forecasting. | 38 | 39 | 40 | [Singular Spectrum analysis]:https://github.com/open-source-modelling/singular_spectrum_analysis 41 | [Paper SSA]:https://papers.ssrn.com/sol3/papers.cfm?abstract_id=5136637 42 | [Quant Exchange]:https://quant.stackexchange.com/questions/16017/for-the-dothan-model-eqbt-infty 43 | [Dothan one factor]:https://github.com/open-source-modelling/insurance_python/tree/main/dothan_one_factor 44 | [Wiki Hull White]:https://en.wikipedia.org/wiki/Hull%E2%80%93White_model 45 | [1F Hull White]:https://github.com/open-source-modelling/insurance_python/tree/main/hull_white_one_factor 46 | [Smith_Wilson]: https://github.com/open-source-modelling/insurance_python/tree/main/smith_wilson 47 | [Technical-documentation]: https://www.eiopa.europa.eu/sites/default/files/risk_free_interest_rate/12092019-technical_documentation.pdf 48 | [Stationary_boot_calib]: https://github.com/open-source-modelling/insurance_python/tree/main/stationary_bootstrap_calibration 49 | [Whitepaper-2004]: http://public.econ.duke.edu/~ap172/Politis_White_2004.pdf 50 | [Stationary_bootstrap]: https://github.com/open-source-modelling/insurance_python/tree/main/stationary_bootstrap 51 | [Politis-Romano-1994]: https://www.jstor.org/stable/2290993 52 | [Calibration_of_alpha]: https://github.com/open-source-modelling/insurance_python/tree/main/bisection_alpha 53 | [Correlated Brownian]: https://github.com/open-source-modelling/insurance_python/tree/main/correlated_brownian_motion 54 | [Wiki Brownian motion]: https://en.wikipedia.org/wiki/Brownian_motion 55 | [Nel_Si_Svansson]: https://github.com/open-source-modelling/insurance_python/tree/main/nelson_siegel_svansson 56 | [BIS whitepaper]: https://www.bis.org/publ/bppdf/bispap25l.pdf 57 | [Black_Scholes]: https://github.com/open-source-modelling/insurance_python/tree/main/black_sholes 58 | [Wiki Black&Sholes]: https://en.wikipedia.org/wiki/Black%E2%80%93Scholes_model 59 | [Vasicek one factor]: https://github.com/open-source-modelling/insurance_python/tree/main/vasicek_one_factor 60 | [Wiki Vasicek]: https://en.wikipedia.org/wiki/Vasicek_model 61 | [Vasicek two factor]: https://github.com/open-source-modelling/insurance_python/tree/main/vasicek_two_factor 62 | 63 | ## Algorithms planned 64 | 65 | | Algorithm | Source | Description | 66 | | ---------------------- | ----------------------------------- | ---------------------------------------------------------------------- | 67 | | Matrix on fraction | TBD | Heuristics for calculating transition matrices on fractions of power | 68 | | G2++ with piec cons vol| TBD | Calibration of a G2++ model with piecewise constant volatility | 69 | | Carter-Lee model | TBD | Simple stochastic mortality model | 70 | | Metropolis-Hastings | TBD | Sampling of probability distributions | 71 | 72 | New suggestions for algorithms are welcome. 73 | 74 | If anybody is interested in publishing an algorithm they implemented, or help with the project, contact us and we will make it happen. 75 | 76 | Queries and suggestions; gregor@osmodelling.com 77 | -------------------------------------------------------------------------------- /bisection_alpha/README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 | 🐍 Bisection method that finds the optimal parameter α for the Smith & Wilson algorithm 🐍 4 | 5 |

6 | 7 | This repository has an implementation for a simple bisection method that finds the optimal parameter α for the Smith & Wilson algorithm often used in insurance to interpolate/extrapolate rates or yields. 8 | 9 | The implementation is based on [Technical documentation of the Methodology to derive EIOPA's risk-free interest rate term structure](https://www.eiopa.europa.eu/document/download/df541a50-a9e7-458b-86ae-6ad16c2d6a29_en?filename=16-09-2022%20Technical%20documentation) and [Wiki on Bisection method](https://en.wikipedia.org/wiki/Bisection_method) 10 | 11 | ## Problem 12 | Before using the Smith & Wilson algorithm, the user needs to provide the convergence speed parameter α. This parameter needs to be calibrated primarily so that that the extrapolated result matches the desired long-term behaviour. 13 | 14 | ## Solution 15 | By transforming the minimization problem at the point of convergence into a problem of finding a root of the shifted function g(α) - τ, this repository implements a simple bisection algorithm to find the optimal α. 16 | 17 | ### Input 18 | - The minimum allowed value of the convergence speed parameter α. 19 | - The maximum allowed value of the convergence speed parameter α. 20 | - Maturities of bonds, observed on the market and provided as output. 21 | - Zero-coupon rates, for which the user wishes to calibrate the algorithm. Each rate belongs to an observable zero-coupon bond with a known maturity. 22 | - The ultimate forward rate towards which the user wishes the resulting curve to converge. 23 | - Allowed difference between the given ultimate forward rate and the resulting curve. 24 | - The numeric precision of the calculation. Higher the precision, more accurate the estimation of the root. 25 | - The maximum number of iterations allowed. This is to prevent an infinite loop in case the method does not converge to a solution. 26 | 27 | ### Output 28 | - Optimal value of the parameter α (if the bisection method converged). 29 | 30 | Note that to be consistent with EIOPA's recommendations, the lower bound of the interval should be set to 0.05. 31 | 32 | ## Getting started 33 | ```python 34 | import numpy as np 35 | from SWCalibrate import SWCalibrate as SWCalibrate 36 | from SWExtrapolate import SWExtrapolate as SWExtrapolate 37 | from bisection_alpha import Galfa as Galfa 38 | from bisection_alpha import BisectionAlpha as BisectionAlpha 39 | 40 | # Maturities of bonds observed on the market 41 | M_Obs = np.transpose(np.array([1, 2, 4, 5, 6, 7])) 42 | 43 | # Yields observed on the market 44 | r_Obs = np.transpose(np.array([0.01, 0.02, 0.03, 0.032, 0.035, 0.04])) 45 | 46 | # Ultimate forward rate 47 | ufr = 0.04 48 | 49 | # Numeric precision of the optimisation 50 | Precision = 0.0000000001 51 | 52 | # Targeted distance between the extrapolated curve and the ultimate forward rate at the convergence point 53 | Tau = 0.0001 # 1 basis point 54 | 55 | # Examples of a call to Galfa and BisectionAlpha 56 | print("Example in the documentation for Galfa: "+ str(Galfa(M_Obs, r_Obs, ufr, 0.15, Tau))) 57 | print("Example in the documentation for BisectionAlpha: "+ str(BisectionAlpha(0.05, 0.5, M_Obs, r_Obs, ufr, Tau, Precision, 1000))) 58 | ``` 59 | Note that this implementation use functions `SWCalibrate` and `SWExtrapolate` from the [Smith & Wilson implementation](https://github.com/open-source-modelling/insurance_python/tree/main/smith_wilson). They are duplicated to this repository for completeness. If there are any inconsistencies or suggestions, raise an issue or contact us directly. 60 | 61 | -------------------------------------------------------------------------------- /bisection_alpha/SWCalibrate.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def SWCalibrate(r: np.ndarray, M: np.ndarray, ufr: float, alpha: float)-> np.ndarray: 4 | """ 5 | Calculate the calibration vector using the Smith-Wilson algorithm. 6 | 7 | Calculates the calibration vector `b` used for interpolation and extrapolation of rates. 8 | 9 | Arguments: 10 | r: n x 1 ndarray of rates for which you wish to calibrate the algorithm. Each rate belongs to an observable zero-coupon bond with a known maturity. Example: r = np.array([[0.0024], [0.0034]]) 11 | M: n x 1 ndarray of maturities of bonds that have rates provided in the input `r`. Example: M = np.array([[1], [3]]) 12 | ufr: Floating number representing the ultimate forward rate. Example: ufr = 0.042 13 | alpha: Floating number representing the convergence speed parameter alpha. Example: alpha = 0.05 14 | 15 | Returns: 16 | n x 1 ndarray representing the calibration vector needed for interpolation and extrapolation. Example: b = np.array([[14], [-21]]) 17 | 18 | For more information, refer to the documentation at: 19 | https://www.eiopa.europa.eu/sites/default/files/risk_free_interest_rate/12092019-technical_documentation.pdf 20 | """ 21 | 22 | from SWHeart import SWHeart as SWHeart 23 | 24 | 25 | C = np.identity(M.size) 26 | p = (1+r) **(-M) # Transform rates to implied market prices of a ZCB bond 27 | d = np.exp(-np.log(1+ufr) * M) # Calculate vector d described in paragraph 138 28 | Q = np.diag(d) @ C # Matrix Q described in paragraph 139 29 | q = C.transpose() @ d # Vector q described in paragraph 139 30 | H = SWHeart(M, M, alpha) # Heart of the Wilson function from paragraph 132 31 | 32 | return np.linalg.inv(Q.transpose() @ H @ Q) @ (p-q) # Calibration vector b from paragraph 149 33 | -------------------------------------------------------------------------------- /bisection_alpha/SWExtrapolate.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def SWExtrapolate(M_Target: np.ndarray, M_Obs: np.ndarray, b: np.ndarray, ufr: float, alpha:float)->np.ndarray: 4 | """ 5 | Interpolate or extrapolate rates for targeted maturities using the Smith-Wilson algorithm. 6 | 7 | Calculates the rates for maturities specified in `M_Target` using the calibration vector `b` obtained 8 | from observed bond maturities in `M_Obs`. 9 | 10 | Arguments: 11 | M_Target: k x 1 ndarray representing each targeted bond maturity of interest. Example: M_Target = np.array([[1], [2], [3], [5]]) 12 | M_Obs: n x 1 ndarray representing the observed bond maturities used for calibrating the calibration vector `b`. Example: M_Obs = np.array([[1], [3]]) 13 | b: n x 1 ndarray representing the calibration vector calculated on observed bonds. 14 | ufr: Floating number representing the ultimate forward rate. Example: ufr = 0.042 15 | alpha: Floating number representing the convergence speed parameter alpha. Example: alpha = 0.05 16 | 17 | Returns: 18 | k x 1 ndarray representing the targeted rates for zero-coupon bonds. Each rate belongs to a targeted 19 | zero-coupon bond with a maturity from `M_Target`. Example: r = np.array([0.0024, 0.0029, 0.0034, 0.0039]) 20 | 21 | For more information, refer to the documentation at: 22 | https://www.eiopa.europa.eu/sites/default/files/risk_free_interest_rate/12092019-technical_documentation.pdf 23 | """ 24 | 25 | from SWHeart import SWHeart as SWHeart 26 | 27 | C = np.identity(M_Obs.size) 28 | d = np.exp(-np.log(1+ufr) * M_Obs) # Calculate vector d described in paragraph 138 29 | Q = np.diag(d) @ C # Matrix Q described in paragraph 139 30 | H = SWHeart(M_Target, M_Obs, alpha) # Heart of the Wilson function from paragraph 132 31 | p = np.exp(-np.log(1+ufr)* M_Target) + np.diag(np.exp(-np.log(1+ufr) * M_Target)) @ H @ Q @ b # Discount pricing function for targeted maturities from paragraph 147 32 | return p ** (-1/ M_Target) -1 # Convert obtained prices to rates and return prices 33 | -------------------------------------------------------------------------------- /bisection_alpha/SWHeart.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def SWHeart(u: np.ndarray, v: np.ndarray, alpha: float)->np.ndarray: 4 | """ 5 | Calculate the heart of the Wilson function. 6 | 7 | Calculates the matrix H (Heart of the Wilson function) for maturities specified by vectors u and v. 8 | The formula is taken from the EIOPA technical specifications paragraph 132. 9 | 10 | Arguments: 11 | u: n_1 x 1 vector of maturities. Example: u = [1, 3] 12 | v: n_2 x 1 vector of maturities. Example: v = [1, 2, 3, 5] 13 | alpha: 1 x 1 floating number representing the convergence speed parameter alpha. Example: alpha = 0.05 14 | 15 | Returns: 16 | n_1 x n_2 matrix representing the Heart of the Wilson function for selected maturities and parameter alpha. 17 | H is calculated as described in paragraph 132 of the EIOPA documentation. 18 | 19 | For more information, see: 20 | https://www.eiopa.europa.eu/sites/default/files/risk_free_interest_rate/12092019-technical_documentation.pdf 21 | """ 22 | 23 | u_Mat = np.tile(u, [v.size, 1]).transpose() 24 | v_Mat = np.tile(v, [u.size, 1]) 25 | return 0.5 * (alpha * (u_Mat + v_Mat) + np.exp(-alpha * (u_Mat + v_Mat)) - alpha * np.absolute(u_Mat-v_Mat) - np.exp(-alpha * np.absolute(u_Mat-v_Mat))); # Heart of the Wilson function from paragraph 132 26 | -------------------------------------------------------------------------------- /bisection_alpha/bisection_alpha.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from SWCalibrate import SWCalibrate as SWCalibrate 3 | from SWExtrapolate import SWExtrapolate as SWExtrapolate 4 | 5 | def Galfa(M_Obs: np.ndarray, r_Obs: np.ndarray, ufr: float, alpha: float, Tau: float)->float: 6 | """ 7 | Calculates the gap at the convergence point between the allowable tolerance Tau and the curve extrapolated using the Smith-Wilson algorithm. 8 | interpolation and extrapolation of rates. 9 | 10 | Args: 11 | M_Obs = n x 1 ndarray of maturities of bonds, that have rates provided in input (r). Ex. u=[[1], [3]] 12 | r_Obs = n x 1 ndarray of rates, for which you wish to calibrate the algorithm. Each rate belongs to an observable Zero-Coupon Bond with a known maturity. Ex. r = [[0.0024], [0.0034]] 13 | ufr = 1 x 1 floating number, representing the ultimate forward rate. Ex. ufr = 0.042 14 | alpha = 1 x 1 floating number representing the convergence speed parameter alpha. Ex. alpha = 0.05 15 | Tau = 1 x 1 floating number representing the allowed difference between ufr and actual curve. Ex. Tau = 0.00001 16 | 17 | Returns: 18 | 1 x 1 floating number representing the distance between ufr input and the maximum allowed discrepancy Tau 19 | 20 | Example of use: 21 | >>> import numpy as np 22 | >>> from SWCalibrate import SWCalibrate as SWCalibrate 23 | >>> from SWExtrapolate import SWExtrapolate as SWExtrapolate 24 | >>> M_Obs = np.transpose(np.array([1, 2, 4, 5, 6, 7])) 25 | >>> r_Obs = np.transpose(np.array([0.01, 0.02, 0.03, 0.032, 0.035, 0.04])) 26 | >>> alfa = 0.15 27 | >>> ufr = 0.04 28 | >>> Precision = 0.0000000001 29 | >>> Tau = 0.0001 30 | >>> Galfa(M_Obs, r_Obs, ufr, alfa, Tau) 31 | [Out] -8.544212205612438e-05 32 | 33 | For more information see https://www.eiopa.europa.eu/sites/default/files/risk_free_interest_rate/12092019-technical_documentation.pdf 34 | 35 | Implemented by Gregor Fabjan from Qnity Consultants on 17/12/2021. 36 | """ 37 | 38 | U = max(M_Obs) # Find maximum liquid maturity from input 39 | T = max(U + 40, 60) # Define the convergence point as defined in paragraph 120 and again in 157 40 | C = np.identity(M_Obs.size) # Construct cash flow matrix described in paragraph 137 assuming the input is ZCB bonds with notional value of 1 41 | d = np.exp(-np.log(1 + ufr) * M_Obs) # Calculate vector d described in paragraph 138 42 | Q = np.diag(d) @ C # Matrix Q described in paragraph 139 43 | b = SWCalibrate(r_Obs, M_Obs, ufr, alpha) # Calculate the calibration vector b using the equation from paragraph 149 44 | K = (1+alpha * M_Obs @ Q@ b) / (np.sinh(alpha * M_Obs.transpose())@ Q@ b) # Calculate kappa as defined in the paragraph 155 45 | return( alpha/np.abs(1 - K*np.exp(alpha*T))-Tau) # Size of the gap at the convergence point between the allowable tolerance Tau and the actual curve. Defined in paragraph 158 46 | 47 | def BisectionAlpha(xStart: float, xEnd: float, M_Obs: np.ndarray, r_Obs: np.ndarray, ufr: float, Tau: float, Precision: float, maxIter: int)->float: 48 | """ 49 | Bisection root finding algorithm for finding the root of a function. The function here is the allowed difference between the ultimate forward rate and the extrapolated curve using Smith & Wilson. 50 | 51 | Args: 52 | xStart = 1 x 1 floating number representing the minimum allowed value of the convergence speed parameter alpha. Ex. alpha = 0.05 53 | xEnd = 1 x 1 floating number representing the maximum allowed value of the convergence speed parameter alpha. Ex. alpha = 0.8 54 | M_Obs = n x 1 ndarray of maturities of bonds, that have rates provided in input (r). Ex. u=[[1], [3]] 55 | r_Obs = n x 1 ndarray of rates, for which you wish to calibrate the algorithm. Each rate belongs to an observable Zero-Coupon Bond with a known maturity. Ex. r = [[0.0024], [0.0034]] 56 | ufr = 1 x 1 floating number, representing the ultimate forward rate. Ex. ufr = 0.042 57 | Tau = 1 x 1 floating number representing the allowed difference between ufr and actual curve. Ex. Tau = 0.00001 58 | Precision = 1 x 1 floating number representing the precision of the calculation. Higher the precision, more accurate the estimation of the root 59 | maxIter = 1 x 1 positive integer representing the maximum number of iterations allowed. This is to prevent an infinite loop in case the method does not converge to a solution 60 | 61 | Returns: 62 | 1 x 1 floating number representing the optimal value of the parameter alpha 63 | 64 | Example of use: 65 | >>> import numpy as np 66 | >>> from SWCalibrate import SWCalibrate as SWCalibrate 67 | >>> M_Obs = np.transpose(np.array([1, 2, 4, 5, 6, 7])) 68 | >>> r_Obs = np.transpose(np.array([0.01, 0.02, 0.03, 0.032, 0.035, 0.04])) 69 | >>> xStart = 0.05 70 | >>> xEnd = 0.5 71 | >>> maxIter = 1000 72 | >>> alfa = 0.15 73 | >>> ufr = 0.042 74 | >>> Precision = 0.0000000001 75 | >>> Tau = 0.0001 76 | >>> BisectionAlpha(xStart, xEnd, M_Obs, r_Obs, ufr, Tau, Precision, maxIter) 77 | [Out] 0.11549789285636511 78 | 79 | For more information see https://www.eiopa.europa.eu/sites/default/files/risk_free_interest_rate/12092019-technical_documentation.pdf and https://en.wikipedia.org/wiki/Bisection_method 80 | 81 | Implemented by Gregor Fabjan from Qnity Consultants on 17/12/2021. 82 | """ 83 | 84 | yStart = Galfa(M_Obs, r_Obs, ufr, xStart, Tau) # Check if the initial point is a solution 85 | yEnd = Galfa(M_Obs, r_Obs, ufr, xEnd, Tau) # Check if the final point is a solution 86 | if np.abs(yStart) < Precision: 87 | return xStart # If initial point already satisfies the conditions return start point 88 | if np.abs(yEnd) < Precision: 89 | return xEnd # If final point already satisfies the conditions return end point 90 | iIter = 0 91 | while iIter <= maxIter: 92 | xMid = (xEnd+xStart)/2 # calculate mid-point 93 | yMid = Galfa(M_Obs, r_Obs, ufr, xMid, Tau) # What is the solution at midpoint 94 | 95 | if (yMid == 0 or (xEnd-xStart)/2 < Precision): # Solution found 96 | return xMid 97 | else: # Solution not found 98 | iIter += 1 99 | if np.sign(yMid) == np.sign(yStart): # If the start point and the middle point have the same sign, then the root must be in the second half of the interval 100 | xStart = xMid 101 | else: # If the start point and the middle point have a different sign than by mean value theorem the interval must contain at least one root 102 | xEnd = xMid 103 | print("Method failed to converge") 104 | -------------------------------------------------------------------------------- /black_sholes/README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 | 🐍 Black-Sholes model for simulating the price of a stock🐍 4 | 5 |

6 | 7 | Black Sholes model is one of oldest models for simulating the stock market. 8 | 9 | ## Problem 10 | 11 | Modelling the stock market is a well-researched field. There are numerous models each with their advantages and drawbacks. 12 | 13 | ## Solution 14 | 15 | One of the oldest and simplest models developed is the [Black-Sholes-Merton](https://en.wikipedia.org/wiki/Black%E2%80%93Scholes_model) model which assumes that the asset prices can be described by the [Black-Sholes equation](https://en.wikipedia.org/wiki/Black%E2%80%93Scholes_equation). This implementation simulates the price of a stock in time. 16 | 17 | ### Input 18 | 19 | Black Sholes simulation: 20 | - `S0` ... integer, specifying the initial value of the underlying asset. 21 | - `mu` ... float, specifying the drift rate of the underlying asset. 22 | - `sigma` ... float, standard deviation of the underlying asset's return. 23 | - `T` ... integer, specifying the maximum modelling time. ex. if T = 2 then modelling time will run from 0 to 2. 24 | - `dt` ... float, specifying the length of each subinterval. ex. dt=10, then there will be 10 intervals of length 0.1 between two integers of modelling time. 25 | 26 | ### Output 27 | 28 | Return: 29 | - `stock_price_simulation` ... N x 2 pandas DataFrame where index is modelling time and values are a realisation of the underlying’s price. 30 | 31 | ## Getting started 32 | 33 | Model the price of a stock which is worth today 100. The market has a future annualized risk-free rate of 5% and an annualized volatility of 30%. The user is interested in a price projection for the next 10 years in increments of 6 months (0.5 years) 34 | 35 | ``` python 36 | import numpy as np 37 | import pandas as pd 38 | from simulate_black_scholes import simulate_black_scholes 39 | 40 | # Example usage 41 | S0 = 100 # Initial stock price 42 | mu = 0.05 # Expected return 43 | sigma = 0.3 # Volatility 44 | T = 10 # 10 years 45 | dt = 0.5 # 6-month intervals 46 | 47 | print(simulate_black_scholes(S0=S0, mu=mu, sigma=sigma, T=T, dt=dt)) 48 | Simulation 49 | 0.0 100.000000 50 | 0.5 102.844245 51 | 1.0 110.906953 52 | 1.5 144.208580 53 | 2.0 200.774653 54 | 2.5 209.315112 55 | 3.0 151.210005 56 | 3.5 96.068103 57 | 4.0 82.690847 58 | 4.5 86.983517 59 | 5.0 102.113069 60 | 5.5 119.007173 61 | 6.0 171.645169 62 | 6.5 202.591723 63 | 7.0 321.676284 64 | 7.5 401.060230 65 | 8.0 364.666643 66 | 8.5 514.189187 67 | 9.0 364.648269 68 | 9.5 499.020044 69 | 10.0 496.552723 70 | ``` 71 | ## Risk neutral pricing 72 | When an ESG simulation output is presented, a standard test is applied to confirm that the scenarios are risk neutral. Black Sholes can be one such model. This test relies on the fact that in a risk-neutral framework, there is an explicit relationship between the price of a fixed income financial instrument and the expected discounted cash flows. 73 | 74 | Below is the Martingale test for the hypothetical example from above. To pass the test, the expected discounted cash flows should equal the initial stock price of 100. 75 | 76 | ``` python 77 | import numpy as np 78 | import pandas as pd 79 | from simulate_black_scholes import simulate_black_scholes 80 | 81 | # Risk neutral pricing test 82 | S0 = 100 # Initial stock price 83 | mu = 0.05 # Expected return 84 | sigma = 0.3 # Volatility 85 | T = 10 # 10 years 86 | dt = 0.5 # 6-month intervals 87 | bank_end = np.exp(T*mu) # return of the risk-free asset 88 | 89 | nIter = 50000 90 | result = np.zeros(nIter) 91 | 92 | for iter in range(1,nIter): 93 | out = simulate_black_scholes(S0, mu, sigma, T, dt) 94 | martingale = out.iloc[-1,:].values[0] / bank_end 95 | result[iter] = martingale 96 | 97 | print(np.mean(result)) 98 | # [out] = 99.8743118539787 99 | ``` 100 | -------------------------------------------------------------------------------- /black_sholes/simulate_black_scholes.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | def simulate_black_scholes(S0: float, mu: float, sigma: float, T: float, dt: float) -> pd.DataFrame: 5 | """ 6 | Simulate a single path for stock prices using the Black-Scholes model with vectorized operations. 7 | 8 | Args: 9 | S0 (float): Initial value of the underlying asset. 10 | mu (float): Drift rate of the underlying asset. 11 | sigma (float): Standard deviation of the underlying asset's return. 12 | T (float): Maximum modeling time. 13 | dt (float): Length of each subinterval. 14 | 15 | Returns: 16 | pd.DataFrame: DataFrame with time as the index and a single column for the simulated stock price. 17 | 18 | Example: 19 | Model the price of a stock which is worth today 100. The market has a future annualized risk-free rate of 5% and an annualized volatility of 30%. The user is interested in a price projection for the next 10 years in increments of 6 months (0.5 years). 20 | import pandas as pd 21 | import numpy as np 22 | simulate_Black_Scholes(100, 0.05, 0.3, 10, 0.5) 23 | Output: 24 | Simulation 25 | 0.0 100.000000 26 | 0.5 129.988711 27 | 1.0 164.336273 28 | 1.5 123.987617 29 | 2.0 170.535179 30 | 2.5 165.820812 31 | 3.0 168.108448 32 | 3.5 155.481838 33 | 4.0 125.441538 34 | 4.5 96.216396 35 | 5.0 99.090337 36 | 5.5 149.457225 37 | 6.0 183.199463 38 | 6.5 183.598311 39 | 7.0 130.795698 40 | 7.5 126.942983 41 | 8.0 152.856431 42 | 8.5 184.111451 43 | 9.0 140.536182 44 | 9.5 120.704683 45 | 10.0 157.433053 46 | 47 | Reference: 48 | For more information, see: https://en.wikipedia.org/wiki/Black%E2%80%93Scholes_model 49 | """ 50 | 51 | N = int(T / dt) # number of steps 52 | time = np.linspace(0, T, N + 1) 53 | 54 | # Generate random shocks with standard deviation adjusted by sqrt(dt) 55 | random_shocks = np.random.normal(0, 1, N) 56 | 57 | # Calculate the increments in a vectorized manner 58 | increments = (mu - 0.5 * sigma ** 2) * dt + sigma* np.sqrt(dt)* random_shocks 59 | 60 | # Compute the cumulative product for the price path 61 | price_path = S0 * np.exp(np.insert(np.cumsum(increments), 0, 0)) 62 | 63 | # Convert to DataFrame with a single column for the simulated path 64 | return pd.DataFrame(price_path, index=time, columns=['Simulation']) 65 | 66 | -------------------------------------------------------------------------------- /correlated_brownian_motion/CorBM.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def CorBrownian(mu:np.ndarray, E: np.ndarray, sampleSize: int) ->np.ndarray: 4 | """ 5 | Algorithm generates samples of increments from a correlated Brownian motion with a given mean and Variance-Covariance matrix (E). 6 | The algorithm uses the fact that if you have n independent brownian motions, the samples given by "mu+ C*Z" are distributed as N(mu,E), where mu is the vector of means and C is the square root of the Variance-Covariance matrix. 7 | For calculating the square root of the VarCovar matrix, the Cholesky decomposition is implemented. 8 | 9 | Arguments: 10 | mu: Array with n elements containing the mean of each BM. 11 | E: n x n numpy array with the Variance-Covariance matrix. 12 | sampleSize: Integer representing the number of samples. 13 | 14 | Returns: 15 | sampleSize x n numpy array containing sampled increments for the correlated Brownian motion. 16 | 17 | Note: 18 | The algorithm is not optimized for speed and no testing of inputs is implemented. If this would be useful to you, let us know and we can extend the code. 19 | 20 | Example of use: 21 | import numpy as np 22 | mu = [1, 2] 23 | VarCovar = np.matrix('1, 0.8; 0.8, 3') 24 | sampleSize = 5 25 | out = CorBrownian(mu, VarCovar, sampleSize) 26 | > [[ 2.83211068 4.50021193] 27 | [ 0.26392619 1.56450446] 28 | [-0.25928109 0.97167124] 29 | [ 1.52038489 1.76274556]] 30 | """ 31 | 32 | def Cholesky(X:np.ndarray) ->np.ndarray: 33 | """ 34 | Cholesky–Banachiewicz algorithm decomposes a Hermitian matrix into a product of a lower triangular matrix and its conjugate transpose. 35 | 36 | Arguments: 37 | X: n x n ndarray representing a Hermitian matrix that the user wants to decompose. 38 | 39 | Returns: 40 | n x n ndarray lower triangular matrix such that the matrix product between it and its conjugate transpose returns X. 41 | 42 | More info on: https://en.wikipedia.org/wiki/Cholesky_decomposition#The_Cholesky.E2.80.93Banachiewicz_and_Cholesky.E2.80.93Crout_algorithms 43 | """ 44 | 45 | L = np.zeros_like(X) 46 | n = X.shape[0] 47 | 48 | for i in range(0, n): 49 | for j in range(0, i+1): 50 | sum = 0 51 | for k in range(0, j): 52 | sum = sum+ L[i, k]*L[j, k] 53 | if (i==j): 54 | L[i, j] = np.sqrt(X[i, i]-sum) 55 | else: 56 | L[i, j] = 1.0/L[j, j] * (X[i, j]-sum) 57 | return L 58 | 59 | dim = E.shape[0] # Guess the number of Brownian motions (dimension) from the size of the Var-Covar matrix 60 | Z = np.random.default_rng().normal(0,1,(sampleSize, dim)) # Generate independent increments of a simpleSize dimensional Brownian motion 61 | Y = np.zeros((sampleSize, dim)) # Predefine the final output 62 | L = Cholesky(E) # Calculate the square root of the Var-Covar matrix 63 | 64 | for iSample in range(sampleSize): # For each sample, calculate mu + L*Z 65 | Y[iSample] =np.transpose(mu) + L @ np.transpose(Z[iSample]) 66 | return Y 67 | -------------------------------------------------------------------------------- /correlated_brownian_motion/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Insurance Algorithms 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /correlated_brownian_motion/README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 | 🐍 Sampled increments from two or more correlated Brownian motions (BM) 🐍 4 | 5 |

6 | 7 |
8 | 9 | Popular algorithm for generating a matrix of increments from a multidimensional Brownian motion (BM) with a given vector of means and a Variance-Covariance matrix. 10 | 11 | ## Problem 12 | 13 | Often when using multifactor models, the model requires correlated sources of noise. A popular choice is to use a multidimensional Brownian motion. 14 | 15 | ## Solution 16 | 17 | The proposed algorithm uses two properties of BM: 18 | - Increments of a BM are normally distributed. 19 | - Assuming n independent BM's whose increments are generated from a standard normal distribution (denoted N(0,1)), a derived process. 20 | Y = μ + L\*z has its increments distributed as N(μ, E) where μ is the vector of means and L is the square root of the Variance-Covariance matrix (denoted E in the code). 21 | 22 | ### Inputs 23 | 24 | - Vector of means for each BM `mu`. 25 | - Variance-Covariance matrix whose diagonal elements describe the volatility of each BM and the off-diagonal elements describe the covariance `E`. 26 | - Number of samples needed `sampleSize`. 27 | 28 | ### Output 29 | 30 | - Matrix of samples where each column represents a BM and each row a new increment. 31 | 32 | ## Getting started 33 | 34 | The user is interested in generating samples from 2 Brownian motions with a correlation of 0.8. Additionally, the first BM has a mean of 1 and a variance of 1.5. The second BM has a mean of 0 and a variance of 2. The user is interested in 480 samples. 35 | 36 | ```python 37 | import numpy as np 38 | from CorBM import * 39 | 40 | mu = [1,0] 41 | VarCovar = np.matrix('1.5, 0.8; 0.8, 2') 42 | sampleSize = 480 43 | 44 | out = CorBrownian(mu, VarCovar, sampleSize) 45 | print(out) 46 | >> [out] = [[ 3.01432184 0.21246299] 47 | [ 0.98350335 2.68478661] 48 | [ 1.42922771 -0.9489711 ] 49 | [-0.49778143 -3.0404678 ] 50 | [ 0.29396451 0.64626096] 51 | [ 0.80811248 -0.4542567 ] 52 | [ 0.29441548 0.41880222] 53 | [ 2.11883666 1.0262339 ] 54 | [ 3.17777954 -1.94769166] 55 | [ 0.02836758 -0.89723843] 56 | [ 1.41159275 -0.91685134] 57 | [ 0.80002199 0.94750505] 58 | [ 2.10351657 -0.97580137] 59 | [-0.67431779 -2.1930151 ] 60 | [-0.10431932 -0.00665984] 61 | [ 1.18779711 0.41885266] 62 | [ 1.54634453 -3.74537725] 63 | [ 4.10357654 3.50137711] 64 | [ 1.27666983 -0.202701 ] 65 | [ 1.45607394 -1.29394992] 66 | [ 4.42056013 1.17314064] 67 | [ 0.17981926 0.02718553] 68 | [ 2.08776471 -1.2151297 ] 69 | [ 0.00975734 -1.87426259] 70 | [ 0.44370952 -0.84889546] 71 | [-0.91702991 -0.64974021] 72 | [ 2.41707492 -0.48260669] 73 | [ 0.26420335 -1.08896695] 74 | [ 2.62254181 -0.06424235] 75 | [ 2.17341372 1.99310141] 76 | [ 2.71013259 -1.83770762] 77 | [-0.48606994 -0.92329548] 78 | [ 0.62851221 -2.64537566] 79 | [ 1.78415689 2.56601775] 80 | [ 0.85502579 1.50565467] 81 | [ 1.16226566 -0.49576818] 82 | [ 0.97005964 1.3292272 ] 83 | [ 0.74111532 -2.0000361 ] 84 | [ 1.52058537 0.32836389] 85 | [ 2.45704707 1.73679504] 86 | [ 1.46771852 1.08691729] 87 | [-1.23507574 -1.16338738] 88 | [ 0.16330948 -1.72058513] 89 | [-0.11886678 -0.71182892] 90 | [ 1.64520848 1.89947365] 91 | [-0.29259006 -0.13394478] 92 | [-0.32839732 -0.83890525] 93 | [-1.00189062 -0.14614664] 94 | [ 1.37039228 0.16268565] 95 | [ 3.35019224 -1.41428558] 96 | [ 2.35659306 -0.65411604] 97 | [ 1.04461038 1.3945269 ] 98 | [ 0.46508655 0.93780721] 99 | [-0.28913945 -0.60518967] 100 | [ 1.80189922 0.35159355] 101 | [ 0.57657657 -1.39084704] 102 | [ 1.27479344 0.27996933] 103 | [-0.30639903 2.54723502] 104 | [ 2.5373733 1.87532831] 105 | [-1.14445785 -2.47072282] 106 | [-0.59016974 0.66626821] 107 | [ 0.7555812 -1.30411159] 108 | [-1.08346996 2.02117262] 109 | [-0.41431095 1.37450613] 110 | [-1.06265565 -1.18989157] 111 | [ 1.80578244 1.79412479] 112 | [ 4.19777057 0.99893666] 113 | [ 0.50213584 -0.77556348] 114 | [ 1.9186039 1.09613311] 115 | [ 1.6930982 2.03285367] 116 | [-0.27571345 -0.98032212] 117 | [ 2.81264489 -1.780791 ] 118 | [ 0.06394456 -1.71073406] 119 | [ 1.73889537 1.51100972] 120 | [ 0.39641242 -0.351381 ] 121 | [ 2.99119662 -0.23606054] 122 | [ 2.93104271 1.63527194] 123 | [-0.53147698 -0.67081085] 124 | [ 1.6547926 0.16459858] 125 | [ 0.43974066 -0.0947692 ] 126 | [ 1.74082625 1.67718711] 127 | [ 0.99803465 -1.11834038] 128 | [ 0.20050859 0.25441171] 129 | [ 1.04611722 0.92303653] 130 | [ 0.77831377 0.25247936] 131 | [ 0.15764237 -1.45322145] 132 | [ 2.32716896 1.50761654] 133 | [ 0.46371323 -0.89645604] 134 | [ 2.08381869 2.13579417] 135 | [ 1.56593025 2.4389585 ] 136 | [-0.81187929 0.60117895] 137 | [ 0.32764279 -0.01306386] 138 | [ 1.41249816 0.24986421] 139 | [ 2.06642759 1.30855174] 140 | [-0.19649758 -0.63859554] 141 | [ 1.19242652 0.89506971] 142 | [ 0.35556785 -3.65657223] 143 | [ 1.74584652 0.79949231] 144 | [ 2.21807447 -0.14098937] 145 | [ 2.81308771 2.65884627]] 146 | ``` 147 | -------------------------------------------------------------------------------- /dothan_one_factor/Dothan_one_factor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | def simulate_Dothan_One_Factor(r0: float = 0.1, a: float = 1.0, sigma: float = 0.2, T: int = 52, dt = 0.1) -> pd.DataFrame: 5 | """ Simulates a temporal series of interest rates using the One Factor Dothan model 6 | interest_rate_simulation = simulate_Dothan_One_Factor(r0, a, lam, sigma, T, dt) 7 | 8 | Args: 9 | r0 (float): starting interest rate of the driftless geometric Brownian process 10 | a (float): market price of risk 11 | sigma (float): instantaneous volatility measures instant by instant the amplitude of randomness entering the system 12 | T (integer): end modelling time. From 0 to T the time series runs. 13 | dt (float): increment of time that the process runs on. Ex. dt = 0.1 then the time series is 0, 0.1, 0.2,... 14 | 15 | Returns: 16 | N x 2 DataFrame where index is modelling time and values are a realisation of the underlying's price 17 | 18 | Example: 19 | Model the interest rate which is 10% today. The annualized instant volatility is 20%. The market price of risk is 1. The user is interested in an interest rate projection of the next 10 years in increments of 6 months (0.5 years) 20 | 21 | import pandas as pd 22 | import numpy as np 23 | 24 | simulate_Dothan_One_Factor(0.1, 1.0, 0.2, 10, 0.5) 25 | [out] = Time Stock Price 26 | 0.0 0.100000 27 | 0.5 0.049780 28 | 1.0 0.019302 29 | 1.5 0.013762 30 | 2.0 0.006840 31 | 2.5 0.004245 32 | 3.0 0.002246 33 | 3.5 0.001363 34 | 4.0 0.000936 35 | 4.5 0.000650 36 | 5.0 0.000385 37 | 5.5 0.000249 38 | 6.0 0.000172 39 | 6.5 0.000122 40 | 7.0 0.000050 41 | 7.5 0.000034 42 | 8.0 0.000020 43 | 8.5 0.000013 44 | 9.0 0.000009 45 | 9.5 0.000005 46 | 10.0 0.000003 47 | """ 48 | N = int(T / dt) + 1 # number of end-points of subintervals of length 1/dt between 0 and max modelling time T 49 | 50 | time, delta_t = np.linspace(0, T, num = N, retstep = True) 51 | 52 | r = np.ones(N) * r0 53 | 54 | for t in range(1,N): 55 | E = r[t-1] * np.exp(-a*dt) 56 | SD = r[t-1]* np.exp(-a*dt)* np.sqrt(np.exp(sigma**2*dt)-1) 57 | r[t] = E + SD * np.random.normal(loc = 0,scale = 1) 58 | 59 | dict = {'Time' : time, 'Interest Rate' : r} 60 | 61 | interest_rate_simulation = pd.DataFrame.from_dict(data = dict) 62 | interest_rate_simulation.set_index('Time', inplace = True) 63 | 64 | return interest_rate_simulation 65 | -------------------------------------------------------------------------------- /dothan_one_factor/README.MD: -------------------------------------------------------------------------------- 1 | 2 |

3 | 4 | 🐍 Dothan One-Factor model 🐍 5 | 6 |

7 | 8 | The Dothan one factor model is a simple model for simulating the evolution of short rates. The model assumes that the short rate process evolves as adriftless geometric Brownian motion. 9 | 10 | ## Problem 11 | 12 | When trying to simulate interest rates, there is a variety of models. The choice of the model and its limitations are a key factor in deciding which model to implement. A good practice is to start with simpler models. The Dothan model is one such model. 13 | 14 | ## Solution 15 | 16 | One of the simplest models, the [Dothan one factor model](https://quant.stackexchange.com/questions/16017/for-the-dothan-model-eqbt-infty) assumes that the short rate can be described by a simple stochastic process with one source of uncertainty coming from a [Brownian motion](https://en.wikipedia.org/wiki/Brownian_motion). 17 | 18 | The stochastic differential equation (SDE) of the Dothan model is shown on the Wiki page https://en.wikipedia.org/wiki/Geometric_Brownian_motion but without drift#. 19 | 20 | ### Input 21 | 22 | - `r0` (float): starting interest rate of the Vasicek process. 23 | - `a` (float): market price of risk. 24 | - `sigma` (float): instantaneous volatility measures instant by instant the amplitude of randomness entering the system. 25 | - `T` (integer): end modelling time. From 0 to T the time series runs. 26 | - `dt` (float): increment of time that the process runs on. Ex. dt = 0.1 then the time series is 0, 0.1, 0.2,... 27 | 28 | ### Output 29 | 30 | - N x 2 Pandas DataFrame with a sample path as values and modelling time as index. 31 | 32 | ## Getting started 33 | 34 | ```python 35 | import numpy as np 36 | import pandas as pd 37 | from Dothan_one_factor import simulate_Dothan_One_Factor 38 | 39 | r0 = 0.1 # The starting interest rate 40 | a = 1.0 # market price of risk 41 | sigma = 0.2 # instantaneous volatility 42 | T = 52 # end modelling time 43 | dt = 0.1 # increments of time 44 | 45 | print(simulate_Dothan_One_Factor(r0, a, sigma, T, dt)) 46 | ``` 47 | 48 | -------------------------------------------------------------------------------- /hull_white_one_factor/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Actuarial Algorithms 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /hull_white_one_factor/README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 | 🐍 Hull White One-Factor model 🐍 4 | 5 |

6 | 7 | 8 | ## Problem 9 | Modelling interest rate evolution simulation is common task in financial modelling. Some common use cases are derivative valuation and risk management. This is usually done using different time structure models that describe the evolution of the future interest rates (The modelling can describe the evolution of different quantities. mainly the choice is between future rates and short rates.). 10 | 11 | ## Solution 12 | A popular choice of model in practice is the Hull-White model. This is an extension of the Vasicek model, that can completely replicate the initial term structure. This allows to construct no-arbitrage curves using current market structure of interest rates. This is achieved by allowing the reversion-level parameter theta which is constant in the classical Vasicek model to vary in time with the observable future rates. The one factor version presented in this repository models the short rate using the dynamics described in the [Wiki](https://en.wikipedia.org/wiki/Hull%E2%80%93White_model) 13 | 14 | ### Input 15 | The inputs to the Hull-White model are the following: 16 | - `r0` (float): starting interest rate of the Hull-White process. 17 | - `a` (float): speed of reversion parameter that is related to the velocity at which such trajectories will regroup around the forward rate theta. 18 | - `sigma` (float): instantaneous volatility measures instant by instant the amplitude of randomness entering the system. 19 | - `t` (array of floats): representing times at which the output is generated. 20 | - `f` (array of floats): representing the instantaneous forward rates at times from input t. 21 | 22 | ### Output 23 | - N x 2 Pandas DataFrame where index is modelling time and values are a realisation of the spot rate increments. 24 | 25 | ## Getting started 26 | 27 | ```python 28 | import numpy as np 29 | import pandas as pd 30 | from simulate_Hull_White_One_Factor import simulate_Hull_White_One_Factor 31 | 32 | time = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) 33 | forwards = np.array([0.03, 0.03, 0.03, 0.03, 0.03, 0.03, 0.03, 0.03, 0.03, 0.03]) 34 | sigma = 0.2 35 | alpha = 0.04 36 | r0 = 0.02 37 | 38 | out = simulate_Hull_White_One_Factor(r0, alpha, sigma, time, forwards) 39 | 40 | index_evolution = np.insert(np.exp(np.cumsum(out["Interest Rate"].values)),0,1) 41 | print(index_evolution) 42 | ``` 43 | -------------------------------------------------------------------------------- /hull_white_one_factor/simulate_Hull_White_One_Factor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | def simulate_Hull_White_One_Factor(r0: float , a: float, sigma: float, t, f) ->pd.DataFrame: 5 | """ Simulates a temporal series of interest rates using the One Factor Hull-White model 6 | Form of the model is dr_{t} = [theta[t] - alpha * r_{t-1}] dt + sigma * dW_{t} 7 | interest_rate_simulation = simulate_Hull_White_One_Factor(r0, alpha, sigma, t, f) 8 | 9 | Args: 10 | r0 (float): starting interest rate of the Hull White process. 11 | a (float): speed of reversion parameter that is related to the velocity at which such trajectories will regroup around the forward rate theta. 12 | sigma (float): instantaneous volatility measures instant by instant the amplitude of randomness entering the system. 13 | t (array of floats): representing times at which the output is generated. 14 | f (array of floats): representing the instantaneous forward rates at times from input t. 15 | 16 | Returns: 17 | N x 2 Pandas DataFrame where index is modeling time and values are a realisation of the spot rate increments. 18 | 19 | Example: 20 | Model the interest rate which is 2% today. The annualized instant volatility is 20%. The external analysis points out that the parameter alpha is 0.04 and the forward rates are equal to 3% in all maturities. 21 | The user is interested in an interest rate projection of the next 10 years in annual time steps 22 | 23 | import pandas as pd 24 | import numpy as np 25 | 26 | simulate_Hull_White_One_Factor(0.02, 0.04, 0.2, np.array([1,2,3,4,5,6,7,8,9,10]), np.array([0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03])) 27 | [out] = Time Interest Rate 28 | 1 0.020000 29 | 2 0.168049 30 | 3 0.265637 31 | 4 0.400717 32 | 5 0.053308 33 | 6 0.088566 34 | 7 0.434668 35 | 8 0.414285 36 | 9 0.186548 37 | 10 0.354167 38 | For more information see https://en.wikipedia.org/wiki/Hull-White_model 39 | """ 40 | 41 | N = t.shape[0] 42 | e = np.zeros(N) 43 | v = np.zeros(N) 44 | r = np.ones(N) * r0 45 | alpha = f + sigma**2/(2*a**2)*(1-np.exp(-a*t))**2 46 | for el in range(1, N): 47 | deltat = t[el] - t[el-1] 48 | e[el] = r[el-1] * np.exp(-a*deltat) + alpha[el] - alpha[el-1] * np.exp(-a*deltat) 49 | v[el] = sigma**2/(2*a) * (1 - np.exp(-2*a*deltat)) 50 | r[el] = np.random.normal(e[el], np.sqrt(v[el])) 51 | dict = {'Time' : t, 'Interest Rate' : r} 52 | 53 | interest_rate_simulation = pd.DataFrame.from_dict(data = dict) 54 | interest_rate_simulation.set_index('Time', inplace = True) 55 | return interest_rate_simulation 56 | -------------------------------------------------------------------------------- /images/OSM_logo.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/open-source-modelling/insurance_python/db27be755c757a5b4b676e9f04c2ccd3cd75f9ce/images/OSM_logo.jpeg -------------------------------------------------------------------------------- /images/Open-source modelling-logos_transparent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/open-source-modelling/insurance_python/db27be755c757a5b4b676e9f04c2ccd3cd75f9ce/images/Open-source modelling-logos_transparent.png -------------------------------------------------------------------------------- /images/README.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /nelson_siegel_svansson/README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 | 🐍 Nelson-Siegel-Svannson algorithm 🐍 4 | 5 |

6 | 7 |
8 | 9 | Popular algorithm for fitting a yield curve to observed data. 10 | 11 | ## Problem 12 | Data on bond yields is usually available only for a small set of maturities, while the user is normally interested in a wider range of yields. 13 | 14 | ## Solution 15 | A popular solution is to use an algorithm to find a function that fits the existing datapoints. This way, the function can be used to interpolate/extrapolate any other point. The Nelson-Siegel-Svannson model is a curve-fitting-algorithm that is flexible enough to approximate most real-world applications. 16 | 17 | The Nelson-Siegel-Svensson is an extension of the 4-parameter Nelson-Siegel method to 6 parameters. Scennson introduced two extra parameters to better fit the variety of shapes of either the instantaneous forward rate or yield curves that are observed in practice. 18 | 19 | Advantages: 20 | - It produces a smooth and well-behaved forward rate curve. 21 | - The intuitive explanation of the parameters. `beta0` is the long-term interest rate and `beta0+beta1` is the instantaneous short-term rate. 22 | 23 | To find the optimal value of the parameters, the Nelder-Mead simplex algorithm is used (Already implemented in the scipy package). The link to the optimization algorithm is Gao, F. and Han, L. Implementing the Nelder-Mead simplex algorithm with adaptive parameters. 2012. Computational Optimization and Applications. 51:1, pp. 259-277. 24 | 25 | The formula for the yield curve (Value of the yield for a maturity at time 't') is given by the formula: 26 | 27 | ![formula](https://render.githubusercontent.com/render/math?math=\r(t)=\beta_{1}) + 28 | ![formula](https://render.githubusercontent.com/render/math?math=\beta_{2}) 29 | ![formula](https://render.githubusercontent.com/render/math?math=\big(\frac{1-exp(\frac{-t}{\lambda_1})}{\frac{t}{\lambda_1}}\big)) + 30 | ![formula](https://render.githubusercontent.com/render/math?math=\beta_{3}) 31 | ![formula](https://render.githubusercontent.com/render/math?math=\big(\frac{1-exp(\frac{-t}{\lambda_1})}{\frac{t}{\lambda_1}}-exp(\frac{-t}{\lambda_1})\big)) + 32 | ![formula](https://render.githubusercontent.com/render/math?math=\beta_{4}) 33 | ![formula](https://render.githubusercontent.com/render/math?math=\big(\frac{1-exp(\frac{-t}{\lambda_2})}{\frac{t}{\lambda_2}}-exp(\frac{-t}{\lambda_2})\big)) 34 | 35 | ### Parameters 36 | 37 | - Observed yield rates `YieldVec`. 38 | - Maturity of each observed yield `TimeVec`. 39 | - Initial guess for parameters `beta0`, `beta1`, `beta2`, `beta3`, `labda0`, and `lambda1`. 40 | - Target maturities `TimeResultVec`. 41 | 42 | ### Desired output 43 | 44 | - Calculated yield rates for maturities of interest `TimeResultVec`. 45 | 46 | ## Getting started 47 | 48 | The user is interested in the projected yield for government bonds with a maturity in 1, 2, 5, 10, 25, 30, and 31 years. They have data on government bonds maturing in 49 | 1, 2, 5, 10, and 25 years. The calculated yield for those bonds is 0.39%, 0.61%, 1.66%, 2.58%, and 3.32%. 50 | 51 | ```bash 52 | from nelsonsiegelsvensson import * 53 | import numpy as np 54 | 55 | TimeVec = np.array([1, 2, 5, 10, 25]) 56 | YieldVec = np.array([0.0039, 0.0061, 0.0166, 0.0258, 0.0332]) 57 | beta0 = 0.1 # initial guess 58 | beta1 = 0.1 # initial guess 59 | beta2 = 0.1 # initial guess 60 | beta3 = 0.1 # initial guess 61 | lambda0 = 1 # initial guess 62 | lambda1 = 1 # initial guess 63 | 64 | TimeResultVec = np.array([1, 2, 5, 10, 25, 30, 31]) # Maturities for yields that we are interested in 65 | 66 | ## Implementation 67 | OptiParam = NSSMinimize(beta0, beta1, beta2, beta3, lambda0, lambda1, TimeVec, YieldVec) # The Nelder-Mead simplex algorithm is used to find the parameters that result in a curve with the minimum residuals compared to the market data. 68 | 69 | # Print the yield curve with optimal parameter to compare with the data provided 70 | print(NelsonSiegelSvansson(TimeResultVec, OptiParam[0], OptiParam[1], OptiParam[2], OptiParam[3], OptiParam[4], OptiParam[5])) 71 | ``` 72 | -------------------------------------------------------------------------------- /nelson_siegel_svansson/main.py: -------------------------------------------------------------------------------- 1 | """ The Nelson-Siegel-Svensson is a popular extension of the 4-parameter Nelson-Siegel method to 6 parameters. It is an algorithm for interpolating/extrapolating the yield curve among other uses. 2 | Scennson introduces two extra parameters to better fit the variety of shapes of either the instantaneous forward rate or yield curves that are observed in practice. 3 | A desirable property of the model is that it produces a smooth and well-behaved forward rate curve. 4 | Another desirable property is the intuitive explanation of the parameters. beta0 is the long-term interest rate and beta0+beta1 is the instantaneous short-term rate. 5 | To find the optimal value of the parameters, the Nelder-Mead simplex algorithm is used (Already implemented in the scipy package). The link to the optimization algorithm is 6 | Gao, F. and Han, L. Implementing the Nelder-Mead simplex algorithm with adaptive parameters. 2012. Computational Optimization and Applications. 51:1, pp. 259-277 7 | """ 8 | from nelsonsiegelsvensson import * 9 | import numpy as np 10 | 11 | ## Inputs 12 | # - Observed yield rates (YieldVec) 13 | # - Maturity of each observed yield (TimeVec) 14 | # - Initial guess for parameters (beta0, beta1, beta2,beta3, labda0 , and lambda1) 15 | # - Target maturities (TimeResultVec) 16 | 17 | TimeVec = np.array([1,2,5,10,25]) 18 | YieldVec = np.array([0.0039, 0.0061, 0.0166, 0.0258, 0.0332]) 19 | beta0 = 0.1 # initial guess 20 | beta1 = 0.1 # initial guess 21 | beta2 = 0.1 # initial guess 22 | beta3 = 0.1 # initial guess 23 | lambda0 = 1 # initial guess 24 | lambda1 = 1 # initial guess 25 | 26 | TimeResultVec = np.array([1,2,5,10,25,30,31]) # Maturities for yields that we are interested in 27 | 28 | ## Implementation 29 | OptiParam = NSSMinimize(beta0, beta1, beta2, beta3, lambda0, lambda1, TimeVec, YieldVec) # The Nelder-Mead simplex algorithem is used to find the parameters that result in a curve with the minimum residuals compared to the market data. 30 | 31 | # Print the yield curve with optimal parameter to compare with the data provided 32 | print(NelsonSiegelSvansson(TimeResultVec, OptiParam[0], OptiParam[1], OptiParam[2], OptiParam[3], OptiParam[4], OptiParam[5])) 33 | -------------------------------------------------------------------------------- /nelson_siegel_svansson/nelsonsiegelsvensson.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.optimize import minimize 3 | 4 | def NelsonSiegelSvansson(T, beta0: float, beta1: float, beta2: float, beta3: float, lambda0: float, lambda1: float): 5 | """ 6 | NelsonSiegelSvansson calculates the interpolated/extrapolated curve at points in the array "T" using the Nelson-Siegel-Svannson algorithm, 7 | parameterized with parameters beta0, beta1, beta2, beta3, lambda0, lambda1. It returns a numpy ndarray of points. 8 | 9 | Arguments: 10 | T: n x 1 ndarray of maturities for which the user wants to calculate the corresponding rate. 11 | beta0: 1 x 1 floating number, representing the first factor of the NSS parametrization. 12 | beta1: 1 x 1 floating number, representing the second factor of the NSS parametrization. 13 | beta2: 1 x 1 floating number, representing the third factor of the NSS parametrization. 14 | beta3: 1 x 1 floating number, representing the fourth factor of the NSS parametrization. 15 | lambda0: 1 x 1 floating number, representing the first shape parameter lambda of the NSS parametrization. 16 | lambda1: 1 x 1 floating number, representing the second shape parameter lambda of the NSS parametrization. 17 | 18 | Returns: 19 | n x 1 ndarray of interpolated/extrapolated points corresponding to maturities inside T. Where n is the length of the vector T. 20 | 21 | Implemented by Gregor Fabjan from Qnity Consultants on 11/07/2023 22 | """ 23 | alpha1 = (1-np.exp(-T/lambda0)) / (T/lambda0) 24 | alpha2 = alpha1 - np.exp(-T/lambda0) 25 | alpha3 = (1-np.exp(-T/lambda1)) / (T/lambda1) - np.exp(-T/lambda1) 26 | 27 | return beta0 + beta1*alpha1 + beta2*alpha2 + beta3*alpha3 28 | 29 | def NSSGoodFit(params: list, TimeVec, YieldVec): 30 | """ 31 | NSSGoodFit calculates the residuals between the yield predicted by the NSS algorithm with the specified parameterization and the market observed ones. 32 | 33 | Arguments: 34 | params: 6 x 1 tuple containing the 6 parameters of the NSS algorithm. The sequence of parameters needs to be (beta0, ..., beta4, lambda0, lambda1). 35 | TimeVec: n x 1 ndarray of maturities for which the yields in YieldVec were observed. 36 | YieldVec: n x 1 ndarray of observed yields. 37 | 38 | Returns: 39 | 1 x 1 float number Euclidean distance between the calculated points and observed data. 40 | 41 | Implemented by Gregor Fabjan from Qnity Consultants on 11/07/2023 42 | """ 43 | 44 | return np.sum((NelsonSiegelSvansson(TimeVec, params[0], params[1], params[2], params[3], params[4], params[5])-YieldVec)**2) 45 | 46 | def NSSMinimize(beta0: float, beta1: float, beta2: float, beta3: float, lambda0: float, lambda1: float, TimeVec, YieldVec) -> list: 47 | """ 48 | NSSMinimize uses the built-in minimize function from the Python's scipy package. The function sets up the parameters and the function NSSGoodFit as to make it 49 | compatible with the way minimize requires its arguments. If the optimization does not converge, the output is an empty array. 50 | 51 | Arguments: 52 | beta0: 1 x 1 floating number, representing the first factor of the NSS parametrization. 53 | beta1: 1 x 1 floating number, representing the second factor of the NSS parametrization. 54 | beta2: 1 x 1 floating number, representing the third factor of the NSS parametrization. 55 | beta3: 1 x 1 floating number, representing the fourth factor of the NSS parametrization. 56 | lambda0: 1 x 1 floating number, representing the first shape parameter lambda of the NSS parametrization. 57 | lambda1: 1 x 1 floating number, representing the second shape parameter lambda of the NSS parametrization. 58 | TimeVec: n x 1 ndarray of maturities for which the yields in YieldVec were observed. 59 | YieldVec: n x 1 ndarray of observed yields. 60 | 61 | Returns: 62 | 6 x 1 array of parameters and factors that best fit the observed yields (or an empty array if the optimization was not successful). 63 | 64 | Source: 65 | - https://docs.scipy.org/doc/scipy/reference/optimize.minimize-neldermead.html 66 | - https://en.wikipedia.org/wiki/Nelder%E2%80%93Mead_method 67 | 68 | Implemented by Gregor Fabjan from Qnity Consultants on 11/07/2023 69 | """ 70 | 71 | opt_sol = minimize(NSSGoodFit, x0=np.array([beta0, beta1, beta2, beta3, lambda0, lambda1]), args = (TimeVec, YieldVec), method="Nelder-Mead") 72 | if (opt_sol.success): 73 | return opt_sol.x 74 | else: 75 | return [] 76 | -------------------------------------------------------------------------------- /singular_spectrum_analysis/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Open Source Modelling 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /singular_spectrum_analysis/README.md: -------------------------------------------------------------------------------- 1 | # Singular Spectrum analysis 2 | 3 | Welcome to the Singular Spectrum analysis repository. This repository hosts the code and examples for a non-parametric and automated algorithm for time-series analysis. The goal of this project is to provide a transparent, reproducible codebase for actuarial use-cases. This repository is based on, among others, the paper [Casalini, Riccardo, Singular Spectrum Analysis: Back to basics (February 13, 2025)]( https://papers.ssrn.com/sol3/papers.cfm?abstract_id=5136637). 4 | 5 | ________________________________________ 6 | ## Overview 7 | Singular Spectrum Analysis (SSA) is a non-parametric technique of time series analysis and forecasting. SSA aims at decomposing the original series in a small number of possibly interpretable components such as a slowly varying trend, oscillatory components, and noise. 8 | 9 | SSA allows for an automated time-series analysis and forecasting with minimal assumptions on the model form. It aims at a decomposition of the original series into a sum of a small number of interpretable components such as a slowly varying trend, oscillatory components and a structureless noise. 10 | Basic SSA analysis consists of four steps: 11 | 12 | ________________________________________ 13 | ## Getting Started 14 | ### Prerequisites 15 | - Python 3.8+ 16 | - Jupyter Lab or Jupyter Notebook 17 | - Familiarity with Python data-analysis libraries (e.g., NumPy, Matplotlib, Seaborn) 18 | 19 | ### Data Preparation 20 | 1) Download the entire repo 21 | 2) Open the Jupyter notebook using Anaconda 22 | 3) Run the notebook 23 | 24 | ________________________________________ 25 | ## Content: 26 | #### ssaBasic.py 27 | Python code implementing the SSA algorithm and the calibration protocols. 28 | 29 | #### SSA_Example.ipynb 30 | A Jupyter notebook that shows an example of use for the SSA algorithm and individual functions. 31 | 32 | ________________________________________ 33 | ## Contributing 34 | Contributions are welcome! If you have any ideas, bug reports, or suggestions: 35 | 1. Fork the repository. 36 | 2. Create a new branch. 37 | 3. Make your changes and commit them: git commit -m "Add some feature". 38 | 39 | A similar code (written for Matlab) is available at [GithHub](https://github.com/NiemandN/SSABASIC) and [Mathworks Exchange](https://www.mathworks.com/matlabcentral/fileexchange/180188-singular-spectrum-analysis). The original paper is available at SSRN: https://ssrn.com/abstract=5136637 or http://dx.doi.org/10.2139/ssrn.5136637. Feel free to contribute also to the Matlab version of SSA. 40 | 41 | ________________________________________ 42 | ## Licence 43 | The notebooks are released under a MIT license. Free to copy, download, modify and use in other products. 44 | ________________________________________ 45 | ## Disclaimer 46 | No Warranties: This software is provided on an “as is” basis, without warranties or conditions of any kind, either express or implied. The authors do not assume any responsibility for the correct functioning of the code, for any bugs or for unintended consequences arising from the use of this software. Use at your own risk. 47 | ________________________________________ 48 | If you have questions, suggestions, or concerns, reach out to gregor@osmodelling.com. 49 | -------------------------------------------------------------------------------- /singular_spectrum_analysis/SSA_Example.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/open-source-modelling/insurance_python/db27be755c757a5b4b676e9f04c2ccd3cd75f9ce/singular_spectrum_analysis/SSA_Example.pdf -------------------------------------------------------------------------------- /singular_spectrum_analysis/ssaBasic.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | from numpy.matlib import repmat 5 | import warnings 6 | 7 | class ssaBasic: 8 | """ 9 | SSABASIC performs a basic version of Singula Spectrum Analysis 10 | This class implements the Singular Spectrum Analysis (SSA) according to the contents of the book "Analysis of Time Series Structure: SSA and Related Techniques", N. Golyandina, V. Nekrutkin, and A. Zhigljavsky, 2001. 11 | From the introduction: SSA is essentially a model-free technique; it is more an exploratory, modelbuilding technique than a confirmatory procedure. It aims at a decomposition of the original series into a sum of a small number of interpretable components such as a 12 | slowly varying trend, oscillatory components and a structureless noise. The main concept in studying the SSA properties is separability, which characterizes how well different components can be separated from each other. 13 | Basic SSA analysis consists of four steps: 14 | 1) Embedding 15 | 2) Singular Value Decomposition (this and the previous step are performed within the class contructor) 16 | 3) Grouping (this step is performed by the grouping method) 17 | 4) Diagonal Averaging (this step is performed by the method hankelization) 18 | The previous steps lay the groundwork for capturing the data generation process through the reconstruction method. 19 | Finally, the forecast is made using the forecast method, which applies a linear recursive formula. 20 | Diagnostic methods included in this class are: 21 | wcorrelation: weighted correlation to assess how separate are groups; 22 | plotSingularValue: scree plot to identify the leading singular values; 23 | scatterplotseigenvectors: scatter plot of the eigenvectors to capture the periodicy of their respective principal component; 24 | crossval_r0 and crossval_L0: respectively, the cross validation of the number of eigen-triples needed for signal reconstruction and the number of lags necessary to single out the relevant signal components (eigen-triples). 25 | Validation methods included in this class are: 26 | validateG0: ensures G0 is a non-empty, positive, numeric and without gaps array or scalar. Moreover, max(G0) < L0 + 1 and length(G0) < L0 + 1; 27 | validateL0: ensures L0 is a positive scalar less than half the number of observation; 28 | validateR0: ensures r0 is a non-empty, positive, numeric array or scalar and the max(r0) < L0 + 1; 29 | validateNumVal: ensures NumVal is a positive scalar less then or equal to the embedding dimension. 30 | backtest: evaluate the SSA forecast robustness by splitting the data sample into in-sample and out-of-sample sets. Perform the forecast on the in-sample data and compare the obtained forecast with the known data from the out-of-sample set. 31 | """ 32 | 33 | def __init__(self, x: np.ndarray, L0: int): 34 | 35 | if not isinstance(x, np.ndarray): 36 | raise TypeError(f"Expected input to be a numpy.ndarray, but got {type(x).__name__}") 37 | 38 | self.mustBeNumericArray(x) 39 | 40 | if x.ndim == 1: 41 | x = x[np.newaxis, :] # ensure row vector 42 | elif x.shape[0] > x.shape[1]: 43 | x = x.T # transpose to row vector 44 | 45 | [n,m] = x.shape 46 | 47 | self.N = x.size 48 | 49 | self.mX = np.mean(x) 50 | 51 | x = x - np.mean(x) 52 | 53 | # Make sure x is a row vector 54 | if n>m: 55 | x = x.transpose() 56 | 57 | # If L0 is not provided, set ut ti N/2 (Handled as default argument) 58 | if L0 is None: 59 | L0 = int(np.floor(self.N/2)) 60 | 61 | L0 = self.validateL0(L0) 62 | 63 | Hx = self.embedding(x,L0) 64 | 65 | U, S, V = np.linalg.svd(Hx,full_matrices=False) 66 | Vt = V.transpose() 67 | S = np.diag(S) 68 | self.L = L0 69 | self.x = x 70 | self.U = U 71 | self.S = S 72 | self.V = Vt 73 | self.H = Hx 74 | 75 | 76 | def mustBeNumericArray(self, x:np.ndarray): 77 | """ 78 | Checks if x is a numeric vector 79 | """ 80 | 81 | if not isinstance(x, np.ndarray): 82 | raise TypeError("Array must be a numeric numpy array vector") 83 | 84 | [n,m] = x.shape 85 | if min(n,m)>1 or n == m: 86 | raise ValueError("Array must be a numeric numpy array vector") 87 | 88 | return True 89 | 90 | def validateL0(self, L0: int): 91 | """ 92 | Ensure L0 is a scalar 93 | """ 94 | 95 | # Check if L0 is an integer 96 | if not isinstance(L0, int): 97 | raise TypeError("L0 is not an integer") 98 | 99 | # Make sure L0 is positive 100 | L = int(abs(L0)) 101 | 102 | # Check if L0 is less than N/2 103 | N2 = np.floor(self.N /2) 104 | 105 | if L0>N2: 106 | warnings.warn("L0 is too high (L/2). Reducing it to L/2") 107 | L = int(N2) 108 | 109 | return L 110 | 111 | def embedding(self, x: np.ndarray, L0: int): 112 | """ 113 | Constructs a Hankel matrix from the input numpy row vector x based on the lag L0. 114 | 115 | Parameters: 116 | x (np.ndarray): The input row vector (1, N) from which the Hankel matrix is constructed. 117 | L0 (int): The embedding dimension that determines the structure of the Hankel matrix. 118 | 119 | Returns: 120 | np.ndarray: The resulting Hankel matrix of size (N - L0 + 1) x (L0 + 1), where 121 | each row represents overlapping segments of the input array. 122 | """ 123 | 124 | N = x.shape[1] # Get length from the second dimension 125 | 126 | # Construct the Hankel matrix 127 | Hx = np.array([x[0, i:N - L0 + i] for i in range(L0 + 1)]) 128 | 129 | return Hx 130 | 131 | def reconstruction(self, r0): 132 | """ 133 | RECONSTRUCTION Reconstructs the signal using a subset of singular values. 134 | y = reconstruction(self, r0) reconstructs the signal from its trajectory matrix 135 | singular value decomposition (SVD). The reconstruction is based on selected singular values. 136 | 137 | Input: 138 | - r0: Specifies which singular values to use for the reconstruction. 139 | If r0 is a scalar, the first r0 singular values are used. 140 | If r0 is a vector (e.g., [0 1 4]), the singular values at the corresponding positions are used (e.g., the first, second, and fifth). 141 | 142 | Output: 143 | - y: A vector containing the recontructed signal 144 | """ 145 | r = self.validateR0(r0) 146 | Sr = np.diag(self.S) 147 | Sr = np.diag(Sr[r].flatten()) 148 | y = self.U[:,r.flatten()] @ Sr @ self.V[:,r.flatten()].transpose() 149 | y = self.hankelization(y) 150 | y = y + self.mX 151 | return y 152 | 153 | def validateR0(self, r0,): 154 | """ 155 | Ensures r0 is a valid scalar or array of positive integers. 156 | Returns: 157 | r : a NumPy array of indices (1-based if scalar, as range) 158 | Raises: 159 | ValueError if input is invalid 160 | """ 161 | if r0 is None or (hasattr(r0, '__len__') and len(r0) == 0): 162 | raise ValueError("r0 must be a non-empty array or scalar") 163 | 164 | # Is G0 non-negative ToDo check if it can be an array 165 | if not self.mustBePositive(r0): 166 | raise ValueError("r0 must contain only positive integers") 167 | 168 | # Convert to numpy array 169 | r0_arr = np.atleast_1d(r0) 170 | 171 | # Check if numeric and all positive 172 | if not np.issubdtype(r0_arr.dtype, np.number) or np.any(r0_arr < 0): 173 | raise ValueError(f"r0 must be a non-empty, positive, numeric array or scalar. Got: {type(r0).__name__} instead.") 174 | 175 | # Check if max(r0) is within bounds 176 | self.checkMaxSingularValues(r0_arr) 177 | 178 | # Return 1-based range if scalar 179 | if r0_arr.size == 1: 180 | return np.arange(0, int(r0_arr[0]))[np.newaxis,:] 181 | elif r0_arr.shape[0] == 1: 182 | return r0_arr.astype(int) 183 | else: 184 | return r0_arr.astype(int)[np.newaxis,:] 185 | 186 | def checkMaxSingularValues(self, r0=None): 187 | 188 | max_singular_values = self.L 189 | ft = False 190 | if r0 is None: 191 | ft = True 192 | return [ft, max_singular_values] 193 | 194 | if np.max(r0)>self.L: 195 | raise("For SSA recursive forecast, r0 must be less than L + 1, The space generated by the selected right singular vectors must not contain it") 196 | else: 197 | ft = True 198 | 199 | return [ft, max_singular_values] 200 | 201 | 202 | def grouping(self, G: np.ndarray, display = "on"): 203 | """ 204 | GROUPING groups the eigentriples according to groups in G. 205 | y = grouping(self, G, display) groups eigen-triples according to G 206 | where G is an array of numbers (e.g. G = np.array([1, 1, 2, 2, 3, 0, 0, 0])). 207 | Singular values with the same number in array G are 208 | collected in the same group. Values with a 0 are ignored. 209 | (e.g. if G = np.array([1, 1, 2, 0, 0, 0, 0, 0]) the first 210 | two eigen-triples are summed together and the third is 211 | considered in a separate group). 212 | """ 213 | G = self.validateG0(G) 214 | m = int(np.max(G)) 215 | n = self.U.shape[0] + self.V.shape[0] - 1 216 | y = np.zeros((m, n)) 217 | allPos = np.array(range(0, self.L + 1)) 218 | for ii in range(1, m+1): 219 | tmp_pos = allPos[G == ii] 220 | tmp_d = np.diag(self.S)[np.newaxis,] 221 | tmp_u = self.U[:, tmp_pos] * repmat(tmp_d[0,tmp_pos],self.L+1,1) 222 | tmp_y = tmp_u @ self.V[:,tmp_pos].transpose() # ToDo 223 | y[ii-1, :] = self.hankelization(tmp_y) + self.mX # Assuming obj has hankelization and mX 224 | 225 | 226 | if display == 'on': 227 | plt.figure(figsize=(8, 2 * m)) # Adjust figure size as needed 228 | 229 | for ii in range(0, m): 230 | plt.subplot(m, 1, ii + 1) 231 | plt.plot(y[ii, :]) 232 | plt.xlim([0, self.N]) 233 | plt.title(f'Component {ii + 1}') 234 | plt.xlabel('Obs') 235 | plt.ylabel(f'$x_{{{ii + 1}}}$') 236 | plt.tight_layout() 237 | plt.show() 238 | 239 | return y 240 | 241 | def bootstrap(self, r, m: int): 242 | """ 243 | BOOTSTRAP bootstraps m times SSA residuals 244 | bootstrap(self, r, m) given a time series x and the number of eigen-triples (r) used for reconstructing the signal z generates m copies of x sampling on residuals computed by the linear regression of z on x using ordinary least squares (OLS) 245 | Input: 246 | r - Number of eigentriples used for reconstructing the signal z. 247 | m - Number of bootstrap samples to generate. 248 | 249 | Output: 250 | Rz - A matrix of size (m, length(z)), where each row is a bootstrap sample of the reconstructed signal. 251 | """ 252 | z = self.reconstruction(r) 253 | z_len = np.max(z.shape) 254 | zt = z.transpose() # Reshape to column vector 255 | xt = (self.x + self.mX).transpose() # Reshape to column vector 256 | 257 | # Compute residuals using OLS 258 | beta = np.linalg.lstsq(np.hstack((np.ones((z_len, 1)), zt)), xt, rcond=None)[0] 259 | 260 | ols_res = (xt - np.hstack((np.ones((z_len, 1)), zt)) @ beta).flatten() 261 | # True bootstrapping 262 | random_indices = np.random.randint(0, z_len, size=(m, z_len)) 263 | R = ols_res[random_indices] 264 | Rz = R + np.tile(z, (m, 1)) 265 | return Rz 266 | 267 | def forecast(self, r0, M, num_samp=100, display="off"): 268 | """ 269 | FORECAST forecasts the signal according to basic SSA. 270 | xM, xCi, xSamp = forecast(self, r, M) forecasts the signal extracted from 271 | the original series x using the recursive algorithm, M 272 | times ahead. 273 | 274 | Input: 275 | r0 - A scalar or array specifying which singular values to 276 | use for the signal reconstruction. If r0 is a scalar, the method 277 | uses the first r0 singular values. If r0 is an array, 278 | it uses the singular values corresponding to the 279 | indices listed in r0 (e.g., r0 = [0, 1, 4] uses the 280 | 1st, 2nd, and 5th singular values). 281 | M - The number of periods to forecast ahead. 282 | num_samp - (Optional) The number of bootstrap samples to generate 283 | for uncertainty estimation. Default is 100. 284 | 285 | Output: 286 | xM - A vector containing the original time series data 287 | followed by the forecasted values for the next M periods. 288 | xCi - A matrix containing the confidence intervals for the 289 | forecasted values, calculated from bootstrap samples. 290 | The intervals are determined using the 97.5th and 2.5th 291 | percentiles. 292 | xSamp - A matrix containing forecast values derived from bootstrap 293 | samples to assess forecast uncertainty. Each row represents a 294 | different bootstrap sample forecast. 295 | """ 296 | 297 | r = self.validateR0(r0) 298 | 299 | P = self.U[:, r.flatten()] 300 | 301 | xM = self.forecastRecursive(self.x, P, M) 302 | xM = xM + self.mX 303 | 304 | if num_samp is not None: 305 | xSamp = np.zeros((num_samp, np.max(xM.shape))) 306 | xR = self.bootstrap(r.flatten(), num_samp) 307 | xR = xR - self.mX 308 | 309 | for ii in range(0, num_samp): 310 | tmpZ = self.embedding(xR[ii, :][np.newaxis,:], self.L) 311 | tmpU, _, _ = np.linalg.svd(tmpZ, full_matrices=False) 312 | tmpP = tmpU[:, r.flatten()] 313 | xSamp[ii, :] = self.forecastRecursive(xR[ii, :][np.newaxis,:], tmpP, M) 314 | 315 | xSamp = xSamp[:, -M:] 316 | xCi = np.percentile(xSamp, [97.5, 2.5], axis=0) 317 | xCi = xCi + self.mX 318 | xSamp = xSamp + self.mX 319 | 320 | if display == "on": 321 | # Assuming self.x, self.mX, xSamp, and M are defined 322 | inSamp = int(np.floor(0.1 * np.max(self.x.shape))) 323 | Dy = np.arange(1, inSamp + 1) 324 | Dn = np.arange(inSamp + 1, inSamp + M + 1) 325 | 326 | # Historical data 327 | yHist = np.vstack((Dy, self.x[0, -inSamp:] + self.mX)).T 328 | 329 | # Forecast data (mean of samples) 330 | yFore = np.vstack((Dn, xSamp)).T 331 | 332 | upper = xCi[0, :] # 97.5% 333 | lower = xCi[1, :] # 2.5% 334 | 335 | # Fan plot replacement using matplotlib 336 | plt.figure(figsize=(10, 5)) 337 | plt.plot(yHist[:, 0], yHist[:, 1], 'o-', label='Historical', color='black') 338 | 339 | for i_plot in range(1, num_samp): 340 | plt.plot(yFore[:, 0], yFore[:, i_plot], '--', linewidth=0.2) 341 | 342 | plt.plot(yFore[:, 0], yFore.mean(axis=1), 'o-', label='Forecast Mean', color='blue') 343 | 344 | plt.fill_between(Dn, lower, upper, color='blue', alpha=0.3, label='95% CI') 345 | 346 | plt.title('Forecast with SSA basic') 347 | plt.xlabel('Time') 348 | plt.ylabel('Value') 349 | plt.legend() 350 | plt.grid(True) 351 | plt.tight_layout() 352 | plt.show() 353 | return xM, xCi, xSamp 354 | 355 | return xM, None, None 356 | 357 | def forecastRecursive(self, y, P, M): 358 | """ 359 | FORECASTRECURSIVE recursively forecasts y, M periods ahead. 360 | yNew = forecastRecursive(y, P, M) applies a recursive 361 | algorithm to project y on the r-space defined by the basis 362 | vectors in P, M periods ahead. 363 | 364 | Input: 365 | y - A vector representing the time series data to be forecasted. 366 | P - A matrix of basis vectors defining the r-space for projection. 367 | M - The number of periods to forecast ahead. 368 | 369 | Output: 370 | yNew - A vector containing the original time series data followed by the forecasted values for the next M periods. 371 | """ 372 | L1 = P[:-1, :].shape[0] 373 | y_len = y.shape[1] 374 | Hx = self.embedding(y, L1) 375 | Xhat = P @ P.T @ Hx # project H on basis vectors 376 | Y = self.hankelization(Xhat) # hankelization 377 | 378 | # apply recursion 379 | nu2 = np.sum(P[-1, :]**2) 380 | Pup = P[:-1, :] * P[-1, :] # NumPy broadcasting handles repmat 381 | R = 1 / (1 - nu2) * np.sum(Pup, axis=1)[:,np.newaxis] 382 | yNew = np.zeros((1,y_len + M)) 383 | yNew[0,:y_len] = Y 384 | 385 | for ii in range(0,M): 386 | yNew[:,y_len + ii] = yNew[:,y_len - L1 + ii:y_len + ii] @ R 387 | 388 | return yNew 389 | 390 | def plotSingularValues(self, num_values=None, display='double'): 391 | """ 392 | PLOTSINGULARVALUES Plots ordered singular values and their contributions. 393 | plotSingularValues(self) creates two plots: 394 | 1. A scree plot of the first numValues singular values. 395 | 2. A bar plot of the relative cumulative contribution of each singular value 396 | to the overall signal variance. 397 | 398 | Inputs: 399 | num_values - The number of singular values to plot (default is self.L). 400 | display - (optional) A string that specifies the type of plot: 401 | 'double' (default) for both singular values and contributions, 402 | 'cm' for only contributions, 403 | 'scree' for only singular values, 404 | 'none' for no plot. 405 | """ 406 | 407 | if num_values is None: 408 | num_values = min(self.L, 30) 409 | 410 | self.validateNumVal(num_values) 411 | 412 | D = np.diag(self.S) 413 | Drel = np.cumsum(D) / np.sum(D) 414 | 415 | # make plot 416 | plt.figure() 417 | display_lower = display.lower() 418 | 419 | if display_lower == 'double': 420 | # plot singular values 421 | plt.subplot(2, 1, 1) 422 | plt.stem(D[:num_values]) 423 | plt.title(f'First {num_values} Singular Values') 424 | plt.xlabel('Lags') 425 | plt.ylabel('singular values') 426 | # plot relative singular values 427 | plt.subplot(2, 1, 2) 428 | plt.bar(np.arange(1, num_values + 1), Drel[:num_values]) 429 | plt.xlabel('Lags') 430 | plt.ylabel('relative contribution') 431 | plt.title('Relative contribution to signal variance') 432 | elif display_lower == 'cm': 433 | plt.bar(np.arange(1, num_values + 1), Drel[:num_values]) 434 | plt.xlabel('Lags') 435 | plt.ylabel('relative contribution') 436 | plt.title('Cumulated Singular Values:\n Relative contribution to signal variance') 437 | elif display_lower == 'scree': 438 | plt.stem(D[:num_values]) 439 | plt.title(f'First {num_values} Singular Values') 440 | plt.xlabel('Lags') 441 | plt.ylabel('singular values') 442 | else: 443 | raise ValueError('Available display options are: double, scree, cm') 444 | 445 | plt.show() 446 | 447 | 448 | def wcorrelation(self, G, display='on'): 449 | """ 450 | WCORRELATION returns the w-correlation matrix of two series. 451 | C = wcorrelation(self,G, display) returns a symmetric matrix C of 452 | weighted correlation coefficients calculated from an input 453 | nvar-by-nobs matrix Y where columns are observations and 454 | rows are variables, and an input 1-by-nobs vector w of 455 | weights for the observations. 456 | """ 457 | 458 | # Validate G0 459 | G = self.validateG0(G) 460 | 461 | Y = self.grouping(G, "off") 462 | n_obs = Y.shape[1] # nobs: number of observations; nvar: number of variables 463 | 464 | # ---------------- compute weights --------------- 465 | w = np.zeros((1,n_obs)) 466 | 467 | L = self.L + 1 468 | w[0, :L] = np.arange(1, L + 1) 469 | w[0, (L+1):n_obs - L + 1] = L 470 | w[0, n_obs - L + 1:] = n_obs - np.arange(n_obs - L + 1, n_obs) - 1 471 | 472 | # ------------------------------------------------ 473 | w_mean = (Y @ w.transpose()) / np.sum(w) # weighted means of Y 474 | temp = Y - w_mean # center Y by removing weighted means 475 | 476 | temp = temp @ (temp * w).T # weighted covariance matrix 477 | temp = 0.5 * (temp + temp.T) # Must be exactly symmetric 478 | R = np.diag(temp) 479 | C = temp / np.sqrt(R * R[:, np.newaxis]) # Matrix of Weighted Correlation Coefficients 480 | # ------------------------------------------------- 481 | # plot w-correlation matrix 482 | if display == 'on': 483 | plt.figure() 484 | sns.heatmap(np.abs(C)) 485 | plt.title('w-correlation matrix') 486 | plt.show() 487 | 488 | return C 489 | 490 | def scatterplotsEigenvectors(self, G): 491 | """ 492 | Scatter-plots of the paired singular vectors according to groups in G. 493 | Produces plots of paired eigenvectors to show the periodicity of the corresponding component. 494 | 495 | Parameters: 496 | self: An ssaBasic class object 497 | G: A list or NumPy array of group labels for the eigenvectors 498 | """ 499 | self.validateG0(G) 500 | 501 | len_g = np.max(G.shape) 502 | max_group = int(np.max(G)) 503 | all_pos = np.arange(0, len_g) 504 | 505 | # draw figure 506 | plt.figure() 507 | for k in range(1, max_group + 1): 508 | indices = all_pos[G == k] # Adjust for 0-based indexing 509 | if np.max(indices.shape) == 2: 510 | tmp_x = self.V[:, indices] # Transpose to match MATLAB's column-wise extraction 511 | plt.subplot(max_group, 1, k) 512 | # plt.scatter(tmp_x[0, :], tmp_x[1, :], marker=".") 513 | plt.plot(tmp_x[:, 0], tmp_x[:, 1]) 514 | plt.grid(True) 515 | plt.title(f'Scatterplot of Group_{k}') 516 | plt.xlabel(f'V_{indices[0] + 1}') # Adjust for 1-based indexing 517 | plt.ylabel(f'V_{indices[1] + 1}') # Adjust for 1-based indexing 518 | plt.axis('equal') # Equal scaling for both axes 519 | else: 520 | print(f'Component {k} corresponds to {np.max(indices.shape)} singular vectors; scatter plot not possible.') 521 | plt.show() 522 | 523 | def crossval_r0(self, qInSample=0.9, numTest=100, display="on"): 524 | """ 525 | CROSSVAL_R0 does the cross-validation eigen-triples number r0 526 | best_r0 = crossval_r0(self, qInSample, numTest, display) takes as optional inputs p the proportion of sample used for cross-validation (in-sample) and the number of trials (numTest) and 527 | gives the number of eigen-triples which minimizes the total rmse (in-sample + out-of-sample). 528 | [best_r0, best_rmse] = crossval_r0(self, qInSample, numTest, display) provides also the root mean square error of best_r0. 529 | """ 530 | 531 | if not isinstance(qInSample, (int, float)) or not (0 <= qInSample <= 1): 532 | raise('qInSample must be a number between 0 and 1.') 533 | 534 | numInSamp = int(np.floor(np.max(self.x.shape) * qInSample)) 535 | X0 = self.x + self.mX 536 | 537 | inX = X0[0, :numInSamp][np.newaxis, :] 538 | outX = X0[0, numInSamp:][np.newaxis, :] 539 | L0 = self.L 540 | tmpSSA = ssaBasic(inX, L0) 541 | 542 | [ft, max_r0] = tmpSSA.checkMaxSingularValues() 543 | 544 | array_test = np.floor(np.linspace(2, max_r0, numTest)).astype(int)[:, np.newaxis] 545 | 546 | inErr = np.zeros((numTest,1)) 547 | outErr = np.zeros((numTest,1)) 548 | 549 | for ii in range(0, numTest): 550 | # In-sample reconstruction error 551 | tmpX = tmpSSA.reconstruction(int(array_test[ii, 0])) 552 | inErr[ii, 0] = np.sqrt(np.mean((inX - tmpX) ** 2)) 553 | 554 | # Out-of-sample forecast error 555 | [tmpX, _, _] = tmpSSA.forecast(int(array_test[ii, 0]), np.max(outX.shape)) 556 | outErr[ii, 0]= np.sqrt(np.mean((outX - tmpX[0, numInSamp:]) ** 2)) 557 | 558 | # total error (in-sample + out-sample) 559 | totErr = (1 - qInSample) * inErr + qInSample * outErr 560 | best_idx = np.argmin(totErr) 561 | best_r0 = array_test[best_idx, 0] 562 | 563 | best_rmse = totErr[best_idx] 564 | 565 | if display == "on": 566 | plt.figure(figsize=(10, 5)) 567 | plt.plot(array_test, np.log(outErr), 'd', label='outError', markersize=7) 568 | plt.plot(array_test, np.log(inErr), 's', label='inError', markersize=7) 569 | plt.plot(array_test, np.log(totErr), '-', linewidth=1.5, label='total') 570 | plt.title(f'Cross-validation r with L = {L0}') 571 | plt.xlabel('L') 572 | plt.ylabel('RMSE (log-scale)') 573 | plt.legend(loc='upper right') 574 | plt.grid(True) 575 | plt.xlim([array_test[0], array_test[-1]]) 576 | plt.tight_layout() 577 | plt.show() 578 | 579 | return best_r0, best_rmse 580 | 581 | 582 | def crossval_L0(self, r0, qInSample = 0.9, numTest = 100, display= "on"): 583 | """ 584 | CROSSVAL_L0 does the cross-validation of number of lags L0 585 | best_L0 = crossval_L0(self, r0, qInSample, numTest, display) given the number of eigen-triples r0, tests the best number of lags L0. 586 | It takes as optional inputs qInSample, the proportion of sample for cross-validation (in-sample) and the number of trials (numTest). best_L0 is the number of lags which minimizes the total rmse (in-sample + out-of-sample). 587 | [best_L0, best_rmse] = crossval_L0(self, qInSample, numTest, display) provides also the root mean square error of best_L0. 588 | """ 589 | 590 | r0 = self.validateR0(r0) 591 | if not isinstance(qInSample, (int, float)) or not (0 <= qInSample <= 1): 592 | raise('qInSample must be a number between 0 and 1.') 593 | 594 | numInSamp = int(np.max(self.x.shape) * qInSample) 595 | X0 = self.x + self.mX 596 | 597 | inX = X0[0, :numInSamp][np.newaxis, :] 598 | outX = X0[0,numInSamp:][np.newaxis, :] 599 | 600 | max_L0 = int(numInSamp // 2) 601 | 602 | min_L0 = np.max(r0)+1 603 | 604 | array_test = np.floor(np.linspace(min_L0, max_L0, numTest)).astype(int)[:, np.newaxis] 605 | 606 | inErr = np.zeros((numTest,1)) 607 | outErr = np.zeros((numTest,1)) 608 | 609 | for ii in range(0, numTest): 610 | tmpSSA = ssaBasic(inX, int(array_test[ii,0])) 611 | 612 | # In-sample reconstruction error 613 | tmpX_in = tmpSSA.reconstruction(r0) 614 | inErr[ii, 0] = np.sqrt(np.mean((inX - (tmpX_in + tmpSSA.mX)) ** 2)) 615 | 616 | # Out-of-sample forecast error 617 | [tmpX_out, xCi_out, xSamp_out] = tmpSSA.forecast(r0, np.max(outX.shape)) 618 | outErr[ii, 0]= np.sqrt(np.mean((outX - (tmpX_out[0,numInSamp:])) ** 2)) 619 | 620 | totErr = (1 - qInSample) * inErr + qInSample * outErr 621 | 622 | best_idx = np.argmin(totErr) 623 | best_L0 = array_test[best_idx,0] 624 | best_rmse = totErr[best_idx] 625 | 626 | if display == "on": 627 | plt.figure(figsize=(10, 5)) 628 | plt.plot(array_test, np.log(outErr), 'd', label='outError', markersize=7) 629 | plt.plot(array_test, np.log(inErr), 's', label='inError', markersize=7) 630 | plt.plot(array_test, np.log(totErr), '-', linewidth=1.5, label='total') 631 | plt.title(f'Cross-validation r with r_prior = {max(r0)}') 632 | plt.xlabel('L') 633 | plt.ylabel('RMSE (log-scale)') 634 | plt.legend(loc='upper right') 635 | plt.grid(True) 636 | plt.xlim([array_test[0], array_test[-1]]) 637 | plt.tight_layout() 638 | plt.show() 639 | 640 | return best_L0, best_rmse 641 | 642 | 643 | def backtest(self, r0, qInSample): 644 | """ 645 | Performs SSA forecast backtesting with optional red noise model for residual correction. 646 | 647 | Parameters: 648 | x : time series data (1D array) 649 | L : SSA window length 650 | N : length of the time series (should be len(x)) 651 | r0 : list or array of eigentriple indices (or max r0 as int) 652 | qInSample : list/array of proportions (between 0 and 1) for in-sample size 653 | 654 | Returns: 655 | testRMSE : array of RMSEs, shape (len(qInSample), 2) 656 | xF : forecasts, shape (max out-sample length, 2) 657 | """ 658 | # Validate inputs 659 | r0 = self.validateR0(r0) 660 | if np.any((qInSample < 0) | (qInSample > 1)): 661 | raise ValueError("qInSample must be numbers between 0 and 1.") 662 | 663 | lenqInSample = np.max(qInSample.shape) 664 | testRMSE = np.zeros((lenqInSample, 2)) 665 | numObs = self.N 666 | minInSamp = int(np.floor(min(qInSample) * numObs)) 667 | maxOutSamp = numObs - minInSamp 668 | xF = np.zeros((maxOutSamp, 2)) 669 | 670 | L0 = self.L 671 | 672 | for idx, q in enumerate(qInSample): 673 | inSampObs = int(np.floor(q * numObs)) 674 | outSampObs = numObs-inSampObs 675 | inX = self.x[:, :inSampObs] + self.mX 676 | outX = self.x[:, inSampObs:] 677 | mySSA = ssaBasic(inX, L0) 678 | # SSA forecasting 679 | [xF_SSA, _, _] = mySSA.forecast(r0, outSampObs) 680 | xR_SSA = xF_SSA[:,:inSampObs] 681 | xF_SSA = xF_SSA[:,inSampObs:] 682 | testRMSE[idx, 0] = np.sqrt(np.mean((outX + self.mX - xF_SSA) ** 2)) 683 | xF_SSA_SARIMA = np.zeros(outSampObs) 684 | xF[:,0] = xF_SSA 685 | xF[:,1] = xF_SSA_SARIMA 686 | 687 | return testRMSE, xF 688 | 689 | def validateNumVal(self, numVal): 690 | 691 | if not np.isscalar(numVal): 692 | raise("nnumVal must be a scalar") 693 | 694 | numVal = abs(numVal) 695 | 696 | if numVal > self.L: 697 | raise("must be less than or equal to the embedding dimension L") 698 | 699 | return numVal 700 | 701 | def validateG0(self, G0): 702 | # Check if empty 703 | if G0 is None: 704 | raise("G0 must be a non-empty, positive, numeric array or number") 705 | 706 | # Is G0 non-negative ToDo check if it can be an array 707 | if not self.mustBePositive(G0): 708 | raise("G0 must be a non-empty, positive, numeric array or number") 709 | 710 | self.checkMaxSingularValues(G0) 711 | self.noGaps(G0) 712 | 713 | if np.max(G0.shape) == self.L+1: 714 | return G0 715 | else: 716 | raise("ssaBasic:InvalidInput:validateG0','length(G0) must be equal to the embedding dimension") 717 | 718 | 719 | def mustBePositive(self, value): 720 | """ 721 | Checks if the input is a positive integer or if all elements in a list/array are positive numbers. 722 | 723 | Parameters: 724 | value (int or list/tuple): A single integer or a list/tuple of numbers. 725 | 726 | Returns: 727 | bool: True if the integer is positive or all numbers in the list/tuple are positive, False otherwise. 728 | """ 729 | if isinstance(value, int): 730 | return value >= 0 731 | elif isinstance(value, (np.ndarray, np.matrix)): 732 | for x in value: 733 | if np.all(x<0): 734 | raise ("Input must be all positive") 735 | else: 736 | raise("Input must be an integer or a list/tuple of numbers.") 737 | return True 738 | 739 | def noGaps(self, G0): 740 | """ 741 | Check if the array G0 has any gaps in the elements. 742 | """ 743 | maxValue = np.max(G0) 744 | requiredNumbers = np.array(range(1,maxValue+1)) 745 | presentNumbers = np.unique(G0) 746 | for i_number in requiredNumbers: 747 | if i_number not in presentNumbers: 748 | raise("G0 must not have any gaps.") 749 | 750 | def hankelization(self, Y): 751 | """ 752 | Hankelization of matrix Y. 753 | Computes the averages of the anti-diagonals of matrix Y and 754 | stores the results in a 1D array. 755 | 756 | Parameters: 757 | Y (np.ndarray): Input 2D Hankel matrix. 758 | 759 | Returns: 760 | np.ndarray: Row vector containing the averaged anti-diagonals (Reconstructed time series) 761 | """ 762 | 763 | n, m = Y.shape 764 | N = n + m - 1 # number of elements in the array y 765 | y = np.zeros((1, N)) 766 | Y = np.fliplr(Y) # flip Y along the vertical axis 767 | 768 | for ii in range(0, N): # CHANGED 769 | kk = ii - n + 1 770 | y[0, ii] = np.mean(np.diag(Y, kk)) 771 | 772 | return np.flip(y) 773 | 774 | 775 | 776 | 777 | 778 | 779 | 780 | 781 | 782 | 783 | 784 | 785 | 786 | 787 | 788 | -------------------------------------------------------------------------------- /smith_wilson/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 GregorFabjan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /smith_wilson/README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 | 🐍 Smith & Wilson algorithm 🐍 4 | 5 |

6 | 7 |
8 | 9 | Popular algorithm for interpolating and extrapolating various curves such as bond yields and risk-free rates. 10 | 11 | This implementation is based on the [Technical documentation of the Methodology to derive EIOPA's risk-free interest rate term structure](https://www.eiopa.europa.eu/sites/default/files/risk_free_interest_rate/12092019-technical_documentation.pdf). 12 | 13 | The link is for version published on 12/09/2019. See Section 7. 14 | 15 | ## Problem 16 | 17 | When analysing market expectations of future rates, a common approach is to look at fixed income instruments such as government or corporate bonds that mature in the future. In practice, the maturities observable (and liquid) on the market rarely covers all the maturities that are needed. 18 | 19 | ## Solution 20 | 21 | This implementation takes as input the available market information, parameters describing the long-term behaviour of the curve and the data on desired (target) maturities for which the yields are needed. 22 | 23 | ### Available market information 24 | 25 | - Observed yields of the Zero-Coupon Bonds (ZCB). 26 | - Maturity of the observed ZCB. 27 | 28 | ### Parameters 29 | 30 | - Ultimate froward rate `ufr` represents the rate to which the rate curve will converge as time increases. 31 | - Convergence speed parameter α controls the speed at which the curve converges towards the ufr parameter from the last liquid point (last data point available in the market information input). 32 | 33 | ### Desired output 34 | - List of maturities for which the SW algorithm will calculate the yields. 35 | 36 | Note that this implementation assumes that the yields were calculated on ZCB. This assumption can be easily relaxed in future releases. 37 | 38 | The implementation is split in two parts: 39 | 40 | - The available market data and the parameters are used to "calibrate" the algorithm. This returns a calibration vector that can be used to interpolate or extrapolate target maturities. This is done by calibrating the kernel functions. Look at the function `SWCalibrate()`. 41 | - The yields for ZCB with targeted maturities are Interpolated/extrapolated. Look at the function `SWExtrapolate()`. 42 | 43 | The syntax in the functions tries to be consistent with EIOPA technical specifications. 44 | 45 | ## Getting started 46 | 47 | Given the data on 6 ZCB with maturities of 1, 2, 4, 5, 6, and 7 years with observed yields 1%, 2%, 3%, 3.2%, 3.5%, and 4% respectively. The user is interested in yields for ZCB at maturities 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, and 20 years. The given calibration for the parameter alpha is 0.15 and the ultimate forward rate is 4%. 48 | 49 | ```bash 50 | import numpy as np 51 | from SWCalibrate import SWCalibrate as SWCalibrate 52 | from SWExtrapolate import SWExtrapolate as SWExtrapolate 53 | 54 | # Yields observed on the market 55 | r_Obs = np.transpose(np.array([0.01, 0.02, 0.03, 0.032, 0.035, 0.04])) 56 | 57 | # Maturities of bonds observed on the market 58 | M_Obs = np.transpose(np.array([1, 2, 4, 5, 6, 7])) 59 | 60 | # Ultimate forward rate 61 | ufr = 0.04 62 | 63 | # Convergence speed parameter 64 | alpha = 0.15 65 | 66 | # Targeted maturities for interpolation/extrapolation 67 | M_Target = np.transpose(np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20])) 68 | 69 | # Calibration vector calculation 70 | b = SWCalibrate(r_Obs,M_Obs, ufr, alpha) 71 | 72 | # Calculation of target yields 73 | r_Target = SWExtrapolate(M_Target,M_Obs, b, ufr, alpha) 74 | 75 | # Display target yields 76 | print("The interpolated/extrapolated rates are:") 77 | print(r_Target) 78 | ``` 79 | 80 | ## About the example in main.py 81 | 82 | Example.py contains a script with an example from EIOPA's own Excel implementation tool (Smith-Wilson Risk-Free Interest Rate Extrapolation Tool 27102015.xlsb ). In this example, the yields are available for ZCB maturing in 1 year, 2 years, ..., 20 years. The output is the curve for up to 65 years. 83 | 84 | ###Note: 85 | To extrapolate the curve, it is enough to know the additional parameters (alpha and ufr), the maturities used for calibration and the vector b*Q. If this is the case, it is not difficult to modify the function `SWExtrapolate()` to take as input Qb instead of b. To see an example of this, see the Jupyter Notebook at https://github.com/open-source-modelling/insurance_python/tree/main/EIOPA_smith_wilson_test . 86 | 87 | An example of this format is the monthly risk-free rate published by the European Insurance and Occupational Pensions Authority (https://www.eiopa.europa.eu/tools-and-data/). 88 | 89 |
90 | 91 | If you have any suggestions for improving the code/comments etc., please let us know. 92 | -------------------------------------------------------------------------------- /smith_wilson/SWCalibrate.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def SWCalibrate(r, M, ufr: float, alpha: float): 4 | """ 5 | Calculate the calibration vector using the Smith-Wilson algorithm. 6 | 7 | Calculates the calibration vector `b` used for interpolation and extrapolation of rates. 8 | 9 | Arguments: 10 | r: n x 1 ndarray of rates for which you wish to calibrate the algorithm. Each rate belongs to an observable zero-coupon bond with a known maturity. Example: r = np.array([[0.0024], [0.0034]]) 11 | M: n x 1 ndarray of maturities of bonds that have rates provided in the input `r`. Example: M = np.array([[1], [3]]) 12 | ufr: Floating number representing the ultimate forward rate. Example: ufr = 0.042 13 | alpha: Floating number representing the convergence speed parameter alpha. Example: alpha = 0.05 14 | 15 | Returns: 16 | n x 1 ndarray representing the calibration vector needed for interpolation and extrapolation. Example: b = np.array([[14], [-21]]) 17 | 18 | For more information, refer to the documentation at: 19 | https://www.eiopa.europa.eu/sites/default/files/risk_free_interest_rate/12092019-technical_documentation.pdf 20 | """ 21 | 22 | from SWHeart import SWHeart as SWHeart 23 | 24 | C = np.identity(M.size) 25 | p = (1+r) **(-M) # Transform rates to implied market prices of a ZCB bond 26 | d = np.exp(-np.log(1+ufr) * M) # Calculate vector d described in paragraph 138 27 | Q = np.diag(d) @ C # Matrix Q described in paragraph 139 28 | q = C.transpose() @ d # Vector q described in paragraph 139 29 | H = SWHeart(M, M, alpha) # Heart of the Wilson function from paragraph 132 30 | 31 | return np.linalg.inv(Q.transpose() @ H @ Q) @ (p-q) # Calibration vector b from paragraph 149 32 | -------------------------------------------------------------------------------- /smith_wilson/SWExtrapolate.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def SWExtrapolate(M_Target, M_Obs, b, ufr: float, alpha: float): 4 | """ 5 | Interpolate or extrapolate rates for targeted maturities using the Smith-Wilson algorithm. 6 | 7 | Calculates the rates for maturities specified in `M_Target` using the calibration vector `b` obtained 8 | from observed bond maturities in `M_Obs`. 9 | 10 | Arguments: 11 | M_Target: k x 1 ndarray representing each targeted bond maturity of interest. Example: M_Target = np.array([[1], [2], [3], [5]]) 12 | M_Obs: n x 1 ndarray representing the observed bond maturities used for calibrating the calibration vector `b`. Example: M_Obs = np.array([[1], [3]]) 13 | b: n x 1 ndarray representing the calibration vector calculated on observed bonds. 14 | ufr: Floating number representing the ultimate forward rate. Example: ufr = 0.042 15 | alpha: Floating number representing the convergence speed parameter alpha. Example: alpha = 0.05 16 | 17 | Returns: 18 | k x 1 ndarray representing the targeted rates for zero-coupon bonds. Each rate belongs to a targeted 19 | zero-coupon bond with a maturity from `M_Target`. Example: r = np.array([0.0024, 0.0029, 0.0034, 0.0039]) 20 | 21 | For more information, refer to the documentation at: 22 | https://www.eiopa.europa.eu/sites/default/files/risk_free_interest_rate/12092019-technical_documentation.pdf 23 | """ 24 | 25 | from SWHeart import SWHeart as SWHeart 26 | C = np.identity(M_Obs.size) 27 | d = np.exp(-np.log(1+ufr) * M_Obs) # Calculate vector d described in paragraph 138 28 | Q = np.diag(d) @ C # Matrix Q described in paragraph 139 29 | H = SWHeart(M_Target, M_Obs, alpha) # Heart of the Wilson function from paragraph 132 30 | p = np.exp(-np.log(1+ufr)* M_Target) + np.diag(np.exp(-np.log(1+ufr) * M_Target)) @ H @ Q @ b # Discount pricing function for targeted maturities from paragraph 147 31 | return p ** (-1/ M_Target) -1 # Convert obtained prices to rates and return prices 32 | -------------------------------------------------------------------------------- /smith_wilson/SWHeart.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def SWHeart(u, v, alpha: float): 4 | """ 5 | Calculate the heart of the Wilson function. 6 | 7 | Calculates the matrix H (Heart of the Wilson function) for maturities specified by vectors u and v. 8 | The formula is taken from the EIOPA technical specifications paragraph 132. 9 | 10 | Arguments: 11 | u: n_1 x 1 vector of maturities. Example: u = [1, 3] 12 | v: n_2 x 1 vector of maturities. Example: v = [1, 2, 3, 5] 13 | alpha: 1 x 1 floating number representing the convergence speed parameter alpha. Example: alpha = 0.05 14 | 15 | Returns: 16 | n_1 x n_2 matrix representing the Heart of the Wilson function for selected maturities and parameter alpha. 17 | H is calculated as described in paragraph 132 of the EIOPA documentation. 18 | 19 | For more information, see: 20 | https://www.eiopa.europa.eu/sites/default/files/risk_free_interest_rate/12092019-technical_documentation.pdf 21 | """ 22 | 23 | u_Mat = np.tile(u, [v.size, 1]).transpose() 24 | v_Mat = np.tile(v, [u.size, 1]) 25 | 26 | # Return the heart of the Wilson function from paragraph 132 27 | return 0.5 * (alpha * (u_Mat + v_Mat) + np.exp(-alpha * (u_Mat + v_Mat)) - alpha * np.absolute(u_Mat-v_Mat) - np.exp(-alpha * np.absolute(u_Mat-v_Mat))) 28 | -------------------------------------------------------------------------------- /smith_wilson/Smith&Wilson_example.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/open-source-modelling/insurance_python/db27be755c757a5b4b676e9f04c2ccd3cd75f9ce/smith_wilson/Smith&Wilson_example.xlsx -------------------------------------------------------------------------------- /smith_wilson/main.py: -------------------------------------------------------------------------------- 1 | # Smith & Wilson algorithm 2 | 3 | """ Smith & Wilson is a widely used algorithm that can do both interpolation and 4 | extrapolation of rates at the same time. This implementation is based on the 5 | Technical documentation published by EIOPA and used to derive the risk-free interest 6 | rate term structure (Version published on 12/09/2019). 7 | """ 8 | ## This example 9 | 10 | """ The example is taken from EIOPA's own Excel implementation tool (_Smith-Wilson 11 | Risk-Free Interest Rate Extrapolation Tool 27102015.xlsb_). In this example we 12 | have observed the zero-cupon bond rates for bonds maturing in 1 year, 2 years, ... 20 years. 13 | We are interested in extrapolating the curve for up to 65 years. 14 | To make the code more readable, numpy is used for matrix multiplication. 15 | """ 16 | import numpy as np 17 | from SWCalibrate import SWCalibrate as SWCalibrate 18 | from SWExtrapolate import SWExtrapolate as SWExtrapolate 19 | 20 | ## Inputs 21 | # - Observed spot rates (r_Obs) 22 | # - Maturities for the observed spot rates (M_Obs) 23 | # - Ultimate Forward Rate (ufr) 24 | # - Convergence parameter alpha (alpha) 25 | # - Targeted maturities (M_Target) 26 | 27 | M_Obs = np.transpose(np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20])) 28 | r_Obs = np.transpose(np.array([0.0131074591432979, 0.0222629098372424, 0.0273403667327403, 0.0317884414257146, 0.0327205345299401, 0.0332867589595655, 0.0336112121443886, 0.0341947663149128, 0.0345165922380981, 0.0346854377006694, 0.0357173340791270, 0.0368501673784445, 0.0376263620230677, 0.0385237084707761, 0.0395043823351044, 0.0401574909803133, 0.0405715278625131, 0.0415574765441695, 0.0415582458410996, 0.0425511326946310])) 29 | ufr = 0.042 # Ultimate froward rate ufr represents the rate to which the rate curve will converge as time increases. 30 | alpha = 0.142068; # Convergence speed parameter alpha controls the speed at which the curve converges towards the ufr from the last liquid point. 31 | M_Target = np.transpose(np.arange(1,66)) # For which maturities do we want the SW algorithm to calculate the rates. In this case, for every year up to 65. 32 | 33 | ## Implementation 34 | b = SWCalibrate(r_Obs,M_Obs, ufr, alpha) # The calibration of the kernel functions 35 | # is done by the function Calibrate_b. 36 | 37 | r_Target = SWExtrapolate(M_Target,M_Obs, b, ufr, alpha) # The interpolation/extrapolation of targeted 38 | # maturitites is done by the function ExtrapolateSW. 39 | print("The interpolated/extrapolated rates are:") 40 | print(r_Target) 41 | 42 | ## Test 43 | """ The vector "expected" contains the values from the Excel implementation released 44 | by EIOPA. This is not needed for the actual calculations but is used at the 45 | end to show the goodness of fit at the end. The second norm of the difference 46 | between the Excel results and Matlab implementation is shown below. 47 | """ 48 | expected = np.transpose(np.array([ 0.0131074591433162, 0.0222629098372631, 0.0273403667327665, 0.0317884414257348, 0.0327205345299595, 0.0332867589595818, 0.0336112121444057, 0.0341947663149282, 0.0345165922381123, 0.0346854377006820, 0.0357173340791390, 0.0368501673784565, 0.0376263620230795, 0.0385237084707877, 0.0395043823351151, 0.0401574909803222, 0.0405715278625236, 0.0415574765441811, 0.0415582458411092, 0.0425511326946399, 0.0436656239235407, 0.0445561338093701, 0.0452628707713729, 0.0458188495571263, 0.0462512293260686, 0.0465823804152550, 0.0468307431055235, 0.0470115242330582, 0.0471372655651476, 0.0472183095640757, 0.0472631822720417, 0.0472789087725782, 0.0472712735066854, 0.0472450353102873, 0.0472041051721557, 0.0471516932406448, 0.0470904304327322, 0.0470224690500156, 0.0469495660338741, 0.0468731518591676, 0.0467943875455887, 0.0467142118366739, 0.0466333802421182, 0.0465524973460913, 0.0464720435419177, 0.0463923971530968, 0.0463138527348181, 0.0462366362129754, 0.0461609174043216, 0.0460868203676226, 0.0460144319580649, 0.0459438088931750, 0.0458749835854031, 0.0458079689527213, 0.0457427623823397, 0.0456793489926264, 0.0456177043135153, 0.0455577964851157, 0.0454995880572642, 0.0454430374586101, 0.0453881001922050, 0.0453347298048383, 0.0452828786693675, 0.0452324986125916, 0.0451835414157220])) 49 | 50 | print(np.linalg.norm(r_Target-expected, ord=2)) 51 | -------------------------------------------------------------------------------- /stationary_bootstrap/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Gregor 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /stationary_bootstrap/README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 | 🐍 A block resampling method used for weakly-dependent stationary time-series data 🐍 4 | 5 |

6 | 7 | Methodology proposed in the 1994 paper by [Politis & Romano](https://www.researchgate.net/publication/254287565_The_Stationary_Bootstrap). 8 | 9 | ## Problem 10 | When using non-parametric tools to generate counterfactual scenarios or empirical distributions, bootstrapping methods proved to be a powerful and easy-to-use tools. However, the bootstrap in its simplest implementation assumes a time-series in which observations are independent. In a lot of applications this is not the case. 11 | 12 | An example of this is interest rate modelling when business cycles need to be considered. The presence of business cycles makes the time-series weakly time dependent. To account for this property, block-resampling techniques are used. 13 | 14 | ## Solution 15 | 16 | Stationary bootstrap is a block-resampling technique that relaxes the assumption in a classical bootstrap where the sampling block has a fixed-length. The user still needs to specify an average length, but because this is than applied as a statistical average, shorter/longer blocks are also present in the final sample. 17 | The algorithm works by randomly selecting a starting point in the time-series and at each step it either increases the block size by one or selects a new block with a new starting point. This choice happens with a fixed probability governed by the parametrisation. 18 | 19 | ### Input 20 | - A time-series that you want to bootstrap. 21 | - The parameter m describing the average duration of the blocks in the sample. 22 | - The length of the output sample. 23 | 24 | ### Output 25 | - Vector of bootstrapped values of specified length. 26 | 27 | ## Getting started 28 | 29 | Given the time-series with observed values 0.4, 0.2, 0.1, 0.4, 0.3, 0.1, 0.3, 0.4, 0.2, 0.5, 0.1, and 0.2, the user is looking to bootstrap a new sample of length 12 where the average block is of size 4. 30 | 31 | ```python 32 | import numpy as np 33 | from StationaryBootstrap import StationaryBootstrap 34 | 35 | # Original time-series 36 | data = np.array([0.4,0.2,0.1,0.4,0.3,0.1,0.3,0.4,0.2,0.5,0.1,0.2]) 37 | 38 | # Average length of the block 39 | m = 4 40 | 41 | # Length of output sample 42 | sample_length = 12 43 | 44 | ans = stationary_bootstrap(data, m, sample_length) 45 | 46 | print(ans) 47 | # Out[0]: np.array([0.3, 0.4, 0.2, 0.2, 0.4, 0.1, 0.3, 0.4, 0.2, 0.5, 0.4, 0.1]) 48 | ``` 49 | -------------------------------------------------------------------------------- /stationary_bootstrap/stationary_bootstrap.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def stationary_bootstrap(data: np.ndarray, m: float, sample_length: int)-> np.ndarray: 4 | """ 5 | Generate a bootstrapped sample of a time series using the stationary bootstrap method 6 | (Politis & Romano, 1994). This method resamples data with random-length blocks to 7 | preserve temporal dependency. 8 | 9 | Args: 10 | data (numpy.ndarray): A 1-dimensional array containing the time-series data.. 11 | m (float): The average block length for resampling. Must be positive. 12 | sample_length (int): The desired length of the bootstrapped sample. Must be positive. 13 | 14 | Returns: 15 | np.ndarray: An array of length `sample_length` containing the bootstrapped sample. 16 | 17 | Raises: 18 | ValueError: If m is not positive. 19 | ValueError: If sampleLength is not positive. 20 | ValueError: If data is not an numpy array. 21 | ValueError: If data array is empty. 22 | ValueError: If `data` is not a 1-dimensional numpy array. 23 | 24 | Example of use: 25 | >>> import numpy as np 26 | >>> data = np.array([1,2,3,4,5,6,7,8,9,10]) 27 | >>> m = 4 28 | >>> sample_length = 12 29 | >>> stationary_bootstrap(data, m, sample_length) 30 | Out[0]: array([9.,3.,4.,5.,6.,7.,8.,7.,2.,3.,4.,2.]) 31 | 32 | Reference: 33 | Dimitris N. Politis & Joseph P. Romano (1994) The Stationary Bootstrap, Journal of the American Statistical 34 | Association, 89:428, 1303-1313, DOI: 10.1080/01621459.1994.10476870 35 | 36 | Implemented by Gregor Fabjan from Qnity Consultants on 12/11/2021. 37 | """ 38 | 39 | # Input validation 40 | if m <= 0: 41 | raise ValueError("Block length 'm' must be positive") 42 | if sample_length <= 0: 43 | raise ValueError("Sample length must be positive") 44 | if not isinstance(data, np.ndarray): 45 | raise ValueError("data needs to be as a numpy array") 46 | if data.size == 0: 47 | raise ValueError("Data array cannot be empty") 48 | if data.ndim != 1: 49 | raise ValueError("data must be a 1-dimensional array") 50 | 51 | 52 | accept = 1/m 53 | data_length = data.shape[0] 54 | 55 | sample_index = np.random.randint(0,high =data_length,size=1)[0] 56 | sample = np.zeros((sample_length,)) 57 | for i_sample in range(sample_length): 58 | if np.random.uniform(0,1,1)>=accept: 59 | sample_index += 1 60 | if sample_index >= data_length: 61 | sample_index=0 62 | else: 63 | sample_index = np.random.randint(0,high = data_length, size=1)[0] 64 | 65 | sample[i_sample] = data[sample_index] 66 | return sample 67 | -------------------------------------------------------------------------------- /stationary_bootstrap/stationary_bootstrap_calibrate.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def OptimalLength(data: np.ndarray) ->float: 4 | """ 5 | Function calculates the optimal parameter value when using a stationary bootstraping algorithm. 6 | The method is based on the 2004 paper by Politis & White: 7 | Dimitris N. Politis & Halbert White (2004) Automatic Block-Length Selection for the Dependent Bootstrap, Econometric Reviews, 8 | 23:1, 53-70, DOI: 10.1081/ETC-120028836 9 | 10 | The code was modified compared to Patton's implementation in that it takes as input a one dimensional time-series 11 | and returns the optimalblock size only for the stationary bootstrap algorithm. 12 | 13 | Warning! The minimal size of the time series is 9 elements. 14 | 15 | 16 | Parameters 17 | ---------- 18 | data ... ndarray array containing the time-series that we wish to bootstrap. 19 | Ex. np.array([-1,0.2,0.3,0.7,0.5,0.1,0.4,0.3,0.5]) 20 | 21 | Returns 22 | ------- 23 | Bstar ... optimal value of the parameter m Ex. 1 24 | 25 | Example of use: 26 | >>> import numpy as np 27 | >>> data = np.array([0.4,0.2,0.1,0.4,0.3,0.1,0.3,0.4,0.2,0.5,0.1,0.2]) 28 | >>> OptimalLength(data) 29 | Out[0]: 4.0 30 | 31 | Original Matlab version written by: 32 | James P. LeSage, Dept of Economics 33 | University of Toledo 34 | 2801 W. Bancroft St, 35 | Toledo, OH 43606 36 | jpl@jpl.econ.utoledo.edu 37 | 38 | This Python implementation is based on Andrew J. Patton's Matlab code avalible at: 39 | http://public.econ.duke.edu/~ap172/ 40 | 41 | Implemented by Gregor Fabjan from Qnity Consultants on 12/11/2021. 42 | """ 43 | 44 | n = data.shape[0] 45 | kn = max(5,np.sqrt(np.log10(n))) 46 | mmax = int(np.ceil(np.sqrt(n))+kn) 47 | bmax = np.ceil(min(3*np.sqrt(n),n/3)) 48 | c = 2 49 | 50 | temp = mlag(data, mmax) 51 | temp = np.delete(temp,range(mmax), 0) # Remove first rows where there are 0`s 52 | corcoef = np.zeros(mmax) 53 | for iCor in range(0, mmax): 54 | corcoef[iCor] = np.corrcoef(data[mmax:],temp[:,iCor])[0,1] 55 | 56 | temp2 = np.transpose(mlag(corcoef,kn)) 57 | temp3 = np.zeros((kn,corcoef.shape[0]+1-kn)) 58 | 59 | for iRow in range(kn): 60 | temp3[iRow,:] = np.append(temp2[iRow,kn:corcoef.shape[0]],corcoef[len(corcoef)-kn+iRow-1]) 61 | 62 | treshold = abs(temp3) < (c* np.sqrt(np.log10(n)/n)) #Test if coeff bigger than triger 63 | treshold = np.sum(treshold,axis = 0 ) 64 | 65 | count = 0 66 | mhat = None 67 | for x in treshold: 68 | if (x==kn): 69 | mhat = count 70 | break 71 | count +=1 72 | 73 | if (mhat is None): 74 | # largest lag that is still significant 75 | seccrit = corcoef >(c* np.sqrt(np.log10(n)/n)) 76 | for iLag in range(seccrit.shape[0]-1,0,-1): 77 | if (seccrit[iLag]): 78 | mhat = iLag+1 79 | break 80 | if(mhat is None): 81 | M = 0 82 | elif (2*mhat > mmax): 83 | M = mmax 84 | else: 85 | M = 2*mhat 86 | 87 | # Computing the inputs to the function for Bstar 88 | kk = np.arange(-M, M+1, 1) 89 | 90 | if (M>0): 91 | temp = mlag(data,M) 92 | temp = np.delete(temp,range(M),0) 93 | temp2 = np.zeros((temp.shape[0],temp.shape[1]+1)) 94 | for iRow in range(len(data)-M): 95 | temp2[iRow,:] = np.hstack((data[M+iRow],temp[iRow,:])) 96 | 97 | temp2 = np.transpose(temp2) 98 | temp3 = np.cov(temp2) 99 | acv = temp3[:,0] 100 | 101 | acv2 = np.zeros((len(acv)-1,2)) 102 | acv2[:,0] = np.transpose(-np.linspace(1,M,M)) 103 | acv2[:,1] = acv[1:len(acv)] 104 | 105 | if acv2.shape[0]>1: 106 | acv2 =acv2[::-1] 107 | 108 | acv3 = np.zeros((acv2.shape[0]+acv.shape[0],1)) 109 | Counter = 0 110 | for iEl in range(acv2.shape[0]): 111 | acv3[Counter,0] = acv2[iEl,1] 112 | Counter +=1 113 | for iEl in range(acv.shape[0]): 114 | acv3[Counter,0] = acv[iEl] 115 | Counter +=1 116 | 117 | Ghat = 0 118 | DSBhat = 0 119 | LamTemp =lam(kk/M) 120 | 121 | for iHat in range(acv3.shape[0]): 122 | Ghat += LamTemp[iHat]* np.absolute(kk[iHat])*acv3[iHat,0] 123 | DSBhat += LamTemp[iHat]*acv3[iHat,0] 124 | DSBhat = 2* np.square(DSBhat) 125 | 126 | Bstar = np.power(2*np.square(Ghat)/DSBhat,1/3)*np.power(n,1/3) 127 | 128 | if Bstar>bmax: 129 | Bstar = bmax 130 | else: 131 | Bstar = 1 132 | return Bstar 133 | 134 | 135 | def mlag(x: np.ndarray,n)-> np.ndarray: 136 | """ 137 | Returns a numpy array in which the k-th column is the series x pushed down (lagged) by k places. 138 | 139 | Parameters 140 | ---------- 141 | x ... ndarray array for which the lagged matrix is calculated. np.array([1,2,3,4]) 142 | n ... integer specifying how many lags does the function consider 143 | 144 | Returns 145 | ------- 146 | xlag... ndarray contining the k-th lagged values in the k-th column of the matrix 147 | 148 | Example of use 149 | >>> import numpy as np 150 | >>> x = np.array([1,2,3,4]) 151 | >>> n = 2 152 | >>> mlag(x,n) 153 | Out[0]: array([[0, 0], 154 | [1, 0], 155 | [2, 1], 156 | [3, 2]]) 157 | The function was tested passing a numpy array (ndarray) as input and requires numpy to run. 158 | 159 | Original Matlab version written by: 160 | James P. LeSage, Dept of Economics 161 | University of Toledo 162 | 2801 W. Bancroft St, 163 | Toledo, OH 43606 164 | jpl@jpl.econ.utoledo.edu 165 | 166 | This Python implementation is based on Andrew J. Patton's Matlab code avalible at: 167 | http://public.econ.duke.edu/~ap172/ 168 | 169 | Implemented by Gregor Fabjan from Qnity Consultants on 12/11/2021 170 | """ 171 | nobs = x.shape[0] 172 | out = np.zeros((nobs,n)) 173 | for iLag in range(1,n+1): 174 | for iCol in range(nobs-iLag): 175 | out[iCol+iLag, iLag-1] = x[iCol] 176 | return out 177 | 178 | 179 | def lam(x: np.ndarray)-> np.ndarray: 180 | """ 181 | Returns the value at points x of the Trapezoidal function. Trapezoidal funcion maps all numbers bigger than 1 or smaller than -1 to zero. 182 | Values between -1/2 to 1/2 to 1 and the rest either on the line connecting (-1,0) to (-1/2,1) or (1/2,1) to (1,0). 183 | 184 | Parameters 185 | ---------- 186 | x ... ndarray array of points on which we wish to apply the trapezoidal mapping. 187 | Ex. np.array([-1,0.2,0.3,0.7]) 188 | 189 | Returns 190 | ------- 191 | out ... ndarray of mapped points Ex. array([0. , 1. , 1. , 0.6]) 192 | 193 | Example of use: 194 | >>> import numpy as np 195 | >>> x = np.array([0.55]) 196 | >>> lam(x) 197 | Out[0]: array([0.9]) 198 | 199 | Original Matlab version written by: 200 | James P. LeSage, Dept of Economics 201 | University of Toledo 202 | 2801 W. Bancroft St, 203 | Toledo, OH 43606 204 | jpl@jpl.econ.utoledo.edu 205 | 206 | This Python implementation is based on Andrew J. Patton's Matlab code avalible at: 207 | http://public.econ.duke.edu/~ap172/ 208 | 209 | Implemented by Gregor Fabjan from Qnity Consultants on 12/11/2021. 210 | """ 211 | 212 | nrow = x.shape[0] 213 | out = np.zeros(nrow) 214 | for row in range(nrow): 215 | out[row] = (abs(x[row])>=0) * (abs(x[row])<0.5) + 2 * (1-abs(x[row])) * (abs(x[row])>=0.5) * (abs(x[row])<=1) 216 | return out 217 | 218 | 219 | -------------------------------------------------------------------------------- /stationary_bootstrap/tests.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from stationary_bootstrap import stationary_bootstrap 4 | 5 | 6 | # Normal behaviour 7 | def test_normal(): 8 | data = np.array([0.4,0.2,0.1,0.4,0.3,0.1,0.3,0.4,0.2,0.5,0.1,0.2]) # Original time-series 9 | m = 4 # Average length of the block 10 | sample_length = 12 # Length of output sample 11 | ans = stationary_bootstrap(data, m, sample_length) 12 | assert(isinstance(ans, np.ndarray), "Output is not a numpy ndarray.") 13 | 14 | # Is output same length as sampleLength 15 | def test_correct_length(): 16 | data = np.array([0.4,0.2,0.1,0.4,0.3,0.1,0.3,0.4,0.2,0.5,0.1,0.2]) # Original time-series 17 | m = 4 # Average length of the block 18 | sample_length = 12 # Length of output sample 19 | ans = stationary_bootstrap(data, m, sample_length) 20 | assert(len(ans)== sample_length, "Sample length does not match the specified sample length.") 21 | 22 | # Is output same length as sampleLength 23 | def test_correct_shape(): 24 | data = np.array([0.4,0.2,0.1,0.4,0.3,0.1,0.3,0.4,0.2,0.5,0.1,0.2]) # Original time-series 25 | m = 4 # Average length of the block 26 | sample_length = 12 # Length of output sample 27 | ans = stationary_bootstrap(data, m, sample_length) 28 | assert(ans.shape ==(sample_length,), "Output is not the specified shape.") 29 | 30 | # Test if the output values are within the input data range 31 | def test_bootstrap_validity_of_values(): 32 | data = np.array([10, 20, 30, 40]) 33 | m = 1.5 34 | sample_length = 15 35 | result = stationary_bootstrap(data, m, sample_length) 36 | assert np.all(np.isin(result, data)), "Output contains values not in the original data." 37 | 38 | # One element sampled always 39 | def test_one_element_always_sampled(): 40 | data = np.array([0.4]) 41 | sampleLength = 4 42 | m = 4 43 | ans = stationary_bootstrap(data, m, sampleLength) 44 | assert(np.array_equal(ans, np.array([[0.4], [0.4], [0.4], [0.4]])), "Single element should be repeated in the output.") 45 | 46 | # Sample of length 1 47 | def test_sample_of_length_one(): 48 | data = np.array([0.5]) 49 | m = 4 50 | sampleLength = 1 51 | ans = stationary_bootstrap(data, m, sampleLength) 52 | assert(ans == np.array([0.5])) 53 | 54 | # Test if an error is raised for non-positive block length (m) 55 | def test_invalid_block_length(): 56 | data = np.array([1, 2, 3]) 57 | m = 0 # Invalid block length 58 | sample_length = 10 59 | with pytest.raises(ValueError, match="Block length 'm' must be positive"): 60 | stationary_bootstrap(data, m, sample_length) 61 | 62 | # Test if an error is raised when data array is empty 63 | def test_empty_data_array(): 64 | data = np.array([]) 65 | m = 2.0 66 | sample_length = 5 67 | with pytest.raises(ValueError, match="Data array cannot be empty"): 68 | stationary_bootstrap(data, m, sample_length) 69 | 70 | # Test if an error is raised for invalid sample length 71 | def test_invalid_sample_length(): 72 | data = np.array([1, 2, 3]) 73 | m = 1.0 74 | sample_length = -5 # Invalid sample length 75 | with pytest.raises(ValueError, match="Sample length must be positive"): 76 | stationary_bootstrap(data, m, sample_length) 77 | 78 | # Average length longer than sample 79 | def test_average_length_longer_than_sample(): 80 | data = np.array([0.4,0.2,0.1,0.4,0.3,0.1,0.3,0.4,0.2,0.5,0.1,0.2]) # Original time-series 81 | m = 20 # Average length of the block 82 | sample_length = 12 # Length of output sample 83 | ans = stationary_bootstrap(data, m, sample_length) 84 | assert len(ans)== sample_length 85 | 86 | # Data in columns 87 | def test_data_passed_in_column(): 88 | data = np.array([[0.4],[0.2],[0.1],[0.4],[0.3],[0.1],[0.3],[0.4],[0.2],[0.5],[0.1],[0.2]]) # Original time-series 89 | m = 4 # Average length of the block 90 | sample_length = 12 # Length of output sample 91 | with pytest.raises(ValueError, match="data must be a 1-dimensional array"): 92 | stationary_bootstrap(data, m, sample_length) 93 | 94 | # Negative data 95 | def test_negative_input_data(): 96 | data = np.array([-0.4,0.2,-0.1,0.4,-0.3,0.1,-0.3,0.4,-0.2,-0.5,0.1,-0.2]) # Original time-series 97 | m = 4 # Average length of the block 98 | sample_length = 12 # Length of output sample 99 | ans = stationary_bootstrap(data, m, sample_length) 100 | assert(len(ans)== sample_length) 101 | 102 | # Data not in numpy array 103 | def test_data_not_numpy(): 104 | data = [0.4,0.2,0.1,0.4,0.3,0.1,0.3,0.4,0.2,0.5,0.1,0.2] # Original time-series 105 | m = 4 # Average length of the block 106 | sample_length = 12 # Length of output sample 107 | with pytest.raises(ValueError, match="data needs to be as a numpy array"): 108 | stationary_bootstrap(data, m, sample_length) 109 | 110 | # Data contains strings 111 | def test_string_number_input_data(): 112 | data = np.array(["-0.4","0.2","-0.1","0.4","-0.3","0.1","0.3","0.4","0.2","0.5","0.1","0.2"]) # Original time-series 113 | m = 4 # Average length of the block 114 | sample_length = 12 # Length of output sample 115 | ans = stationary_bootstrap(data, m, sample_length) 116 | assert(len(ans)== sample_length) 117 | 118 | ## Test calibration 119 | 120 | from stationary_bootstrap_calibrate import OptimalLength, lam, mlag 121 | 122 | data = np.array([0.4, 0.2, 0.1, 0.4, 0.3, 0.1, 0.3, 0.4, 0.2, 0.5, 0.1, 0.2]) 123 | 124 | m = OptimalLength(data) 125 | print(m) 126 | 127 | 128 | # Test lambda on interval 0-0.5 129 | x = np.array([0.5]) 130 | out = lam(x) 131 | print(out) 132 | 133 | # Test on interval 0.5-1 and >1 134 | x = np.array([0.7]) 135 | out = lam(x) 136 | print(out) 137 | 138 | # Test on interval >1 139 | x = np.array([1.2]) 140 | out = lam(x) 141 | print(out) 142 | 143 | # Test on interval <0 144 | x = np.array([-1.2]) 145 | out = lam(x) 146 | print(out) 147 | 148 | # Test on multiple outputs 149 | x = np.array([-0.2, 0.1, 0.6, 0.8, 1.1]) 150 | out = lam(x) 151 | print(out) 152 | 153 | # Test mlag normal 154 | x = np.array([1,2,3,4]) 155 | n = 2 156 | print(mlag(x,n)) 157 | 158 | # Test mlag normal size 159 | x = np.array([1,2,3,4]) 160 | n = 2 161 | print(mlag(x,n).shape) 162 | 163 | # Test mlag single input 164 | x = np.array([1]) 165 | n = 2 166 | print(mlag(x,n)) 167 | 168 | # Test mlag single input 169 | x = np.array([1,2,3]) 170 | n = 1 171 | print(mlag(x,n)) 172 | 173 | # Test mlag single input 174 | x = np.array([1,2,3]) 175 | n = 1 176 | print(mlag(x,n)) 177 | 178 | # Test OptimalLength 179 | data = np.array([0.4,0.2,0.1,0.4,0.3,0.1,0.3,0.4,0.2,0.5,0.1,0.2]) 180 | print(OptimalLength(data).shape) 181 | 182 | # Test OptimalLength 183 | data = np.array([1,0.2,17,0.4,0.3,2,0.3,12,0.2,11,0.1]) 184 | print(OptimalLength(data)) 185 | 186 | 187 | 188 | 189 | 190 | 191 | -------------------------------------------------------------------------------- /stationary_bootstrap_calibration/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Gregor 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /stationary_bootstrap_calibration/README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 | 🐍 Automatic calibration of the stationary bootstrap algorithm 🐍 4 | 5 |

6 | 7 |
8 | 9 | ## Problem 10 | 11 | Implementation of a stationary bootstrap method for weakly dependent stationary data requires the selection of the average block length as input. This can be time-consuming and introduce a degree of subjectivity into the implementation. 12 | 13 | ## Solution 14 | 15 | The proposed methodology automatically estimates the optimal block size. As mentioned in the original paper, the methodology is based on the notion of spectral estimation via the flat-top lag-windows of Politis and Romano (1995). The proposed solution is described in the paper [Polis and White (2004)](http://public.econ.duke.edu/~ap172/Politis_White_2004.pdf) 16 | 17 | ### Input 18 | - The time-series for which the calibration is necessary `data`. 19 | 20 | ### Output 21 | - Integer specifying the optimal block length. 22 | 23 | ## Getting started 24 | Given a time series with values 0.4, 0.2, 0.1, 0.4, 0.3, 0.1, 0.3, 0.4, 0.2, 0.5, 0.1, and 0.2 the user desires to use the stationary bootstrap algorithm for resampling. The objective is to automatically retrieve the "optimal" value of the parameter needed for stationary bootstrap algorithm. 25 | 26 | ```python 27 | 28 | import numpy as np 29 | 30 | from stationary_bootstrap_calibrate import OptimalLength 31 | 32 | data = np.array([0.4, 0.2, 0.1, 0.4, 0.3, 0.1, 0.3, 0.4, 0.2, 0.5, 0.1, 0.2]) 33 | 34 | m = OptimalLength(data) 35 | # Out[0]: 4.0 36 | ``` 37 | -------------------------------------------------------------------------------- /stationary_bootstrap_calibration/stationary_bootstrap_calibrate.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def OptimalLength(data: np.ndarray) ->float: 4 | """ 5 | Function calculates the optimal parameter value when using a stationary bootstraping algorithm. 6 | The method is based on the 2004 paper by Politis & White: 7 | Dimitris N. Politis & Halbert White (2004) Automatic Block-Length Selection for the Dependent Bootstrap, Econometric Reviews, 8 | 23:1, 53-70, DOI: 10.1081/ETC-120028836 9 | 10 | The code was modified compared to Patton's implementation in that it takes as input a one dimensional time-series 11 | and returns the optimalblock size only for the stationary bootstrap algorithm. 12 | 13 | Warning! The minimal size of the time series is 9 elements. 14 | 15 | 16 | Parameters 17 | ---------- 18 | data ... ndarray array containing the time-series that we wish to bootstrap. 19 | Ex. np.array([-1,0.2,0.3,0.7,0.5,0.1,0.4,0.3,0.5]) 20 | 21 | Returns 22 | ------- 23 | Bstar ... optimal value of the parameter m Ex. 1 24 | 25 | Example of use: 26 | >>> import numpy as np 27 | >>> data = np.array([0.4,0.2,0.1,0.4,0.3,0.1,0.3,0.4,0.2,0.5,0.1,0.2]) 28 | >>> OptimalLength(data) 29 | Out[0]: 4.0 30 | 31 | Original Matlab version written by: 32 | James P. LeSage, Dept of Economics 33 | University of Toledo 34 | 2801 W. Bancroft St, 35 | Toledo, OH 43606 36 | jpl@jpl.econ.utoledo.edu 37 | 38 | This Python implementation is based on Andrew J. Patton's Matlab code avalible at: 39 | http://public.econ.duke.edu/~ap172/ 40 | 41 | Implemented by Gregor Fabjan from Qnity Consultants on 12/11/2021. 42 | """ 43 | 44 | n = data.shape[0] 45 | kn = max(5,np.sqrt(np.log10(n))) 46 | mmax = int(np.ceil(np.sqrt(n))+kn) 47 | bmax = np.ceil(min(3*np.sqrt(n),n/3)) 48 | c = 2 49 | 50 | temp = mlag(data, mmax) 51 | temp = np.delete(temp,range(mmax), 0) # Remove first rows where there are 0`s 52 | corcoef = np.zeros(mmax) 53 | # Calculate autocorelation R^hat (s) 54 | for iCor in range(0, mmax): 55 | corcoef[iCor] = np.corrcoef(data[mmax:],temp[:,iCor])[0,1] 56 | 57 | temp2 = np.transpose(mlag(corcoef,kn)) 58 | temp3 = np.zeros((kn,corcoef.shape[0]+1-kn)) 59 | for iRow in range(kn): # Create a matrix of autocorrelations R^hat (s) each row starts one lag further 60 | # To do this, take lagged correlations from mlag() and add to the last place the original corcoef 61 | temp3[iRow,:] = np.append(temp2[iRow,kn:corcoef.shape[0]],corcoef[len(corcoef)-kn+iRow-1]) 62 | 63 | treshold = abs(temp3) < (c* np.sqrt(np.log10(n)/n)) #Test if coeff bigger than triger. If true, then autocorrelation is "negligable" 64 | treshold = np.sum(treshold,axis = 0 ) 65 | 66 | # The first lag where all insignificant covariants are insignificants 67 | count = 1 # Counter of how many lags before you get to kn consecutive insignificant lags 68 | mhat = None # Will contain integer mhat or None if there are no such lags. 69 | for x in treshold: # if more than one collection is possible, choose the smallest m 70 | if (x==kn): 71 | mhat = count 72 | break 73 | count +=1 74 | 75 | if (mhat is None): # NO collection of KN autocorrels were all insignif, so pick largest significant lag 76 | seccrit = corcoef >(c* np.sqrt(np.log10(n)/n)) 77 | for iLag in range(seccrit.shape[0]-1,0,-1): # Find largest lag that is still significant 78 | if (seccrit[iLag]): 79 | mhat = iLag+1 80 | break 81 | 82 | if(mhat is None): # If no autocorrelation is significant, then use normal bootstrap 83 | M = 0 84 | elif (2*mhat > mmax): # Make sure that the mhat is not larger than the maximum number 85 | M = mmax 86 | else: 87 | M = 2*mhat 88 | 89 | # Computing the inputs to the function for Bstar 90 | kk = np.arange(-M, M+1, 1) 91 | 92 | if (M>0): # 93 | temp = mlag(data,M) 94 | temp = np.delete(temp,range(M),0) # Dropping the first mmax rows, as they're filled with zeros 95 | temp2 = np.zeros((temp.shape[0],temp.shape[1]+1)) 96 | for iRow in range(len(data)-M): 97 | temp2[iRow,:] = np.hstack((data[M+iRow],temp[iRow,:])) 98 | 99 | temp2 = np.transpose(temp2) 100 | temp3 = np.cov(temp2) 101 | acv = temp3[:,0] 102 | 103 | acv2 = np.zeros((len(acv)-1,2)) 104 | acv2[:,0] = np.transpose(-np.linspace(1,M,M)) 105 | acv2[:,1] = acv[1:len(acv)] 106 | 107 | if acv2.shape[0]>1: 108 | acv2 =acv2[::-1] 109 | 110 | acv3 = np.zeros((acv2.shape[0]+acv.shape[0],1)) 111 | Counter = 0 112 | for iEl in range(acv2.shape[0]): 113 | acv3[Counter,0] = acv2[iEl,1] 114 | Counter +=1 115 | for iEl in range(acv.shape[0]): 116 | acv3[Counter,0] = acv[iEl] 117 | Counter +=1 118 | 119 | Ghat = 0 120 | DSBhat = 0 121 | LamTemp =lam(kk/M) 122 | 123 | for iHat in range(acv3.shape[0]): 124 | Ghat += LamTemp[iHat]* np.absolute(kk[iHat])*acv3[iHat,0] 125 | DSBhat += LamTemp[iHat]*acv3[iHat,0] 126 | DSBhat = 2* np.square(DSBhat) 127 | 128 | Bstar = np.power(2*np.square(Ghat)/DSBhat,1/3)*np.power(n,1/3) 129 | 130 | if Bstar>bmax: 131 | Bstar = bmax 132 | else: 133 | Bstar = 1 134 | return Bstar 135 | 136 | 137 | def mlag(x: np.ndarray,n)-> np.ndarray: 138 | """ 139 | Returns a numpy array in which the k-th column is the series x pushed down (lagged) by k places. 140 | 141 | Parameters 142 | ---------- 143 | x ... ndarray array for which the lagged matrix is calculated. np.array([1,2,3,4]) 144 | n ... integer specifying how many lags does the function consider 145 | 146 | Returns 147 | ------- 148 | xlag... ndarray contining the k-th lagged values in the k-th column of the matrix 149 | 150 | Example of use 151 | >>> import numpy as np 152 | >>> x = np.array([1,2,3,4]) 153 | >>> n = 2 154 | >>> mlag(x,n) 155 | Out[0]: array([[0, 0], 156 | [1, 0], 157 | [2, 1], 158 | [3, 2]]) 159 | The function was tested passing a numpy array (ndarray) as input and requires numpy to run. 160 | 161 | Original Matlab version written by: 162 | James P. LeSage, Dept of Economics 163 | University of Toledo 164 | 2801 W. Bancroft St, 165 | Toledo, OH 43606 166 | jpl@jpl.econ.utoledo.edu 167 | 168 | This Python implementation is based on Andrew J. Patton's Matlab code avalible at: 169 | http://public.econ.duke.edu/~ap172/ 170 | 171 | Implemented by Gregor Fabjan from Qnity Consultants on 12/11/2021 172 | """ 173 | nobs = x.shape[0] 174 | out = np.zeros((nobs,n)) 175 | for iLag in range(1,n+1): 176 | for iCol in range(nobs-iLag): 177 | out[iCol+iLag, iLag-1] = x[iCol] 178 | return out 179 | 180 | 181 | def lam(x: np.ndarray)-> np.ndarray: 182 | """ 183 | Returns the value at points x of the Trapezoidal function. Trapezoidal funcion maps all numbers bigger than 1 or smaller than -1 to zero. 184 | Values between -1/2 to 1/2 to 1 and the rest either on the line connecting (-1,0) to (-1/2,1) or (1/2,1) to (1,0). 185 | 186 | Parameters 187 | ---------- 188 | x ... ndarray array of points on which we wish to apply the trapezoidal mapping. 189 | Ex. np.array([-1,0.2,0.3,0.7]) 190 | 191 | Returns 192 | ------- 193 | out ... ndarray of mapped points Ex. array([0. , 1. , 1. , 0.6]) 194 | 195 | Example of use: 196 | >>> import numpy as np 197 | >>> x = np.array([0.55]) 198 | >>> lam(x) 199 | Out[0]: array([0.9]) 200 | 201 | Original Matlab version written by: 202 | James P. LeSage, Dept of Economics 203 | University of Toledo 204 | 2801 W. Bancroft St, 205 | Toledo, OH 43606 206 | jpl@jpl.econ.utoledo.edu 207 | 208 | This Python implementation is based on Andrew J. Patton's Matlab code avalible at: 209 | http://public.econ.duke.edu/~ap172/ 210 | 211 | Implemented by Gregor Fabjan from Qnity Consultants on 12/11/2021. 212 | """ 213 | 214 | nrow = x.shape[0] 215 | out = np.zeros(nrow) 216 | for row in range(nrow): 217 | out[row] = (abs(x[row])>=0) * (abs(x[row])<0.5) + 2 * (1-abs(x[row])) * (abs(x[row])>=0.5) * (abs(x[row])<=1) 218 | return out 219 | 220 | 221 | import numpy as np 222 | data = np.array([0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0, 1.1, 0.3, 0.5]) 223 | print(OptimalLength(data)) 224 | -------------------------------------------------------------------------------- /stationary_bootstrap_calibration/tests_calibration.py: -------------------------------------------------------------------------------- 1 | ## Test calibration 2 | import numpy as np 3 | import pytest 4 | from stationary_bootstrap_calibrate import OptimalLength, lam, mlag 5 | 6 | 7 | #Test lam output type 8 | def test_output_numpy_array(): 9 | x = np.array([-0.2, 0.1, 0.6, 0.8, 1.1]) 10 | out = lam(x) 11 | assert isinstance (out, np.ndarray), "Output is not a numpy ndarray." 12 | 13 | # Test on interval -1 14 | def test_result_bellow_minue_one(): 15 | x = np.array([-1.2]) 16 | out = lam(x) 17 | assert out[0] == pytest.approx(0., 0.00001), "Output in the inteval bellow -1 is outside expectations." 18 | 19 | # Test on interval -0.5 - -1; should be 2(1-|-0.7|) 20 | def test_lambda_on_interval_mid_negative(): 21 | x = np.array([-0.7]) 22 | out = lam(x) 23 | assert out == pytest.approx(0.6, 0.00001) , "Output in the inteval bellow -0.50 and -1 is outside expectations." 24 | 25 | # Test on interval -0.5 - 0 26 | def test_lambda_on_interval_low_negative(): 27 | x = np.array([-0.4]) 28 | out = lam(x) 29 | assert out[0] == pytest.approx(1., 0.00001), "Output in the inteval between -0.5 and 0 is outside expectations." 30 | 31 | # Test lambda on interval 0-0.5; should be 1 32 | def test_lambda_on_interval_low_positive(): 33 | x = np.array([0.3]) 34 | out = lam(x) 35 | assert out == np.array([1.]), "Output in the inteval bellow 0 and 0.5 is outside expectations." 36 | 37 | # Test on interval 0.5 - 1; should be 2(1-|0.7|) = 2*0.3 = 0.6 38 | def test_lambda_on_interval_mid_positive(): 39 | x = np.array([0.7]) 40 | out = lam(x) 41 | assert out[0] == pytest.approx(0.6,0.00001), "Output in the inteval bellow 0.5 and 1 is outside expectations." 42 | 43 | # Test on interval bigger than 1; should be 0 44 | def test_lambda_on_interval_high_positive(): 45 | x = np.array([2.3]) 46 | out = lam(x) 47 | assert out[0] == pytest.approx(0.,0.00001), "Output in the inteval bigger than 1 is outside expectations." 48 | 49 | # Test on multiple outputs 50 | def test_lambda_multiple_outputs(): 51 | x = np.array([-0.2, 0.1, 0.6, 0.8, 1.1]) 52 | out = lam(x) 53 | assert out.size == 5, "Output is of different size than input." 54 | 55 | #Test lam output type 56 | def test_mlag_numpy_array(): 57 | x = np.array([1,2,3,4]) 58 | n = 2 59 | assert isinstance(mlag(x,n), np.ndarray), "Output is not a numpy ndarray." 60 | 61 | # Test mlag size 62 | def test_mlag_typical_input(): 63 | x = np.array([1,2,3,4]) 64 | n = 2 65 | assert mlag(x,n).shape == (4,2) 66 | 67 | # Test mlag normal 68 | def test_mlag_typical_input(): 69 | x = np.array([1,2,3,4]) 70 | n = 2 71 | out_hardcoded = np.array([[0,0], [1,0], [2,1], [3,2]]) 72 | out = mlag(x,n) 73 | assert np.array_equal(out, out_hardcoded), "Typical output 1,2,3,4 is not as originaly expected." 74 | 75 | # Test mlag single input 76 | def test_mlag_single_input(): 77 | x = np.array([1]) 78 | n = 2 79 | out = mlag(x,n) 80 | out_hardcoded = np.array([[0,0]]) 81 | assert np.array_equal(out, out_hardcoded) 82 | 83 | # Test mlag single lag 84 | def test_mlag_single_lag(): 85 | x = np.array([1,2,3]) 86 | n = 1 87 | out = mlag(x,n) 88 | out_hardcoded = np.array([[0.],[1.],[2.]]) 89 | assert np.array_equal(out, out_hardcoded) 90 | 91 | # Test OptimalLength 92 | data = np.array([0.4,0.2,0.1,0.4,0.3,0.1,0.3,0.4,0.2,0.5,0.1,0.2]) 93 | print(OptimalLength(data).shape) 94 | 95 | # Test OptimalLength 96 | data = np.array([1,0.2,17,0.4,0.3,2,0.3,12,0.2,11,0.1]) 97 | print(OptimalLength(data)) 98 | 99 | data = np.array([0.4, 0.2, 0.1, 0.4, 0.3, 0.1, 0.3, 0.4, 0.2, 0.5, 0.1, 0.2]) 100 | m = OptimalLength(data) 101 | print(m) 102 | 103 | 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /vasicek_one_factor/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 (Mostly) Financial Algorithms 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /vasicek_one_factor/README.md: -------------------------------------------------------------------------------- 1 | 2 |

3 | 4 | 🐍 Vasicek One-Factor model 🐍 5 | 6 |

7 | 8 | Vasicek one factor model for simulating the evolution of a credit instruments such as a government bonds. The Vasicek model assumes that the process evolves as an Ornstein-Uhlenbeck process. Ornstein-Uhlenbeck is a stochastic process where over time, the process tends to drift towards a long-term mean (mean reverting). 9 | 10 | ## Problem 11 | 12 | When trying to simulate for example credit spreads, there is a variety of models. The choice of the model and its limitations are a key factor in deciding which model to implement. There are compelling economic arguments in favour of mean reversion. 13 | 14 | ## Solution 15 | 16 | One of the simplest models, the [Vasicek one factor model](https://en.wikipedia.org/wiki/Vasicek_model) assumes that the credit market can be described by a simple mean reverting stochastic process with one source of uncertainty coming from a [Brownian motion](https://en.wikipedia.org/wiki/Brownian_motion). One limitation is that due to normal distribution of the noise, the process allows negative spreads which might be undesirable in certain circumstances. 17 | 18 | The stochastic differential equation (SDE) of the Vasicek model is shown on the Wiki page https://en.wikipedia.org/wiki/Vasicek_model#. 19 | 20 | ### Input 21 | 22 | - `r0` (float): starting interest rate of the Vasicek process. 23 | - `a` (float): speed of reversion" parameter that characterizes the velocity at which such trajectories will regroup around b in time. 24 | - `lam` (float): long term mean level. All future trajectories of r will evolve around this mean level in the long run. 25 | - `sigma` (float): instantaneous volatility measures instant by instant the amplitude of randomness entering the system. 26 | - `T` (integer): end modelling time. From 0 to T the time series runs. 27 | - `dt` (float): increment of time that the process runs on. Ex. dt = 0.1 then the time series is 0, 0.1, 0.2,... 28 | 29 | ### Output 30 | 31 | - N x 2 Pandas DataFrame with a sample path as values and modelling time as index. 32 | 33 | ## Getting started 34 | 35 | ```python 36 | import numpy as np 37 | import pandas as pd 38 | from Vasicek_one_factor import simulate_Vasicek_One_Factor 39 | 40 | r0 = 0.1 # The starting interest rate 41 | a = 1.0 # Speed of reversion parameter 42 | lam = 0.1 # Long-term mean interest rate level correction 43 | sigma = 0.2 # Instantaneous volatility 44 | T = 52 # End modelling time 45 | dt = 0.1 # Increments of time 46 | 47 | print(simulate_Vasicek_One_Factor(r0, a, lam, sigma, T, dt)) 48 | ``` 49 | -------------------------------------------------------------------------------- /vasicek_one_factor/Vasicek_one_factor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | def simulate_Vasicek_One_Factor(r0: float = 0.1, a: float = 1.0, lam: float = 0.1, sigma: float = 0.2, T: int = 52, dt = 0.1) -> pd.DataFrame: 5 | """ Simulates a temporal series of interest rates using the One Factor Vasicek model 6 | interest_rate_simulation = simulate_Vasicek_One_Factor(r0, a, lam, sigma, T, dt) 7 | 8 | Arguments: 9 | r0 = float, starting interest rate of the vasicek process 10 | a = float, speed of reversion" parameter that characterizes the velocity at which such trajectories will regroup around b in time 11 | lam = float, long term mean level that all future trajectories will evolve around 12 | sigma = float, instantaneous volatility measures instant by instant the amplitude of randomness entering the system 13 | T = integer, end modelling time. From 0 to T the time series runs. 14 | dt = float, increment of time that the process runs on. Ex. dt = 0.1 then the time series is 0, 0.1, 0.2,... 15 | 16 | Returns: 17 | interest_rate_simulation = N x 2 Pandas DataFrame where index is modelling time and values are a realisation of the underlying's price 18 | 19 | Example: 20 | Model the interest rate which is 10% today. The annualized instant volatility is 20%. The external analysis points out that the mean reversion parameter is 1 and the long-term interest rate level is 10 % therefore the mean reversion correction is theta = 10% * 1 = 10%. The user is interested in an interest rate projection of the next 10 years in increments of 6 months (0.5 years) 21 | 22 | import pandas as pd 23 | import numpy as np 24 | 25 | simulate_Vasicek_One_Factor(0.1, 1.0, 0.1, 0.2, 10, 0.5) 26 | [out] = Time Stock Price 27 | 0.000000 0.100000 28 | 0.526316 0.212055 29 | 1.052632 0.115934 30 | 1.578947 0.012870 31 | 2.105263 0.003295 32 | 2.631579 0.206635 33 | 3.157895 0.191319 34 | 3.684211 0.108299 35 | 4.210526 0.094983 36 | 4.736842 0.075903 37 | 5.263158 0.229143 38 | 5.789474 -0.111977 39 | 6.315789 0.120245 40 | 6.842105 0.116082 41 | 7.368421 0.230879 42 | 7.894737 0.266821 43 | 8.421053 0.229788 44 | 8.947368 0.304378 45 | 9.473684 0.217760 46 | 10.000000 0.217147 47 | For more information see https://en.wikipedia.org/wiki/Vasicek_model 48 | """ 49 | 50 | N = int(T / dt) + 1 # number of end-points of subintervals of length 1/dt between 0 and max modelling time T 51 | 52 | time, delta_t = np.linspace(0, T, num = N, retstep = True) 53 | 54 | r = np.ones(N) * r0 55 | 56 | for t in range(1,N): 57 | r[t] = r[t-1] * np.exp(-a*dt)+lam*(1-np.exp(-a*dt))+sigma*np.sqrt((1-np.exp(-2*a*dt))/(2*a))* np.random.normal(loc = 0,scale = 1) 58 | 59 | dict = {'Time' : time, 'Interest Rate' : r} 60 | 61 | interest_rate_simulation = pd.DataFrame.from_dict(data = dict) 62 | interest_rate_simulation.set_index('Time', inplace = True) 63 | 64 | return interest_rate_simulation 65 | -------------------------------------------------------------------------------- /vasicek_one_factor/maximum_likelihood_validation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from Vasicek_one_factor import simulate_Vasicek_One_Factor 3 | 4 | r0 = 3 # The starting interest rate 5 | a = 3 # speed of reversion parameter 6 | b = 1 # long term mean interest rate level correction. (Long-term mean is a * b) 7 | sigma = 0.5 # instantaneous volatility 8 | T = 500 # end modelling time 9 | dt = 0.01 # increments of time 10 | 11 | out = simulate_Vasicek_One_Factor(r0, a, b, sigma, T, dt) 12 | 13 | Yield = np.array(out.values) 14 | SampleSize = Yield.size 15 | 16 | Yieldx = Yield[0:(SampleSize-1)] 17 | Yieldy = Yield[1:SampleSize] 18 | 19 | Sx = sum(Yieldx) 20 | Sy = sum(Yieldy) 21 | Sxx = sum(Yieldx * Yieldx) 22 | Sxy = sum(Yieldx * Yieldy) 23 | Syy = sum(Yieldy * Yieldy) 24 | 25 | n = SampleSize-1 26 | 27 | a = (n * Sxy - Sx * Sy)/(n * Sxx - Sx**2) 28 | b = (Sy - a*Sx)/n 29 | sd = np.sqrt((n*Syy-Sy**2 - a*(n*Sxy-Sx*Sy))/(n*(n-2))) 30 | 31 | lam = -np.log(a)/dt 32 | mu = b/(1-a) 33 | sigma = sd * np.sqrt((-2*np.log(a))/(dt*(1-a**2))) 34 | 35 | MLmu = (Sy*Sxx - Sx*Sxy) / (n*(Sxx-Sxy)-(Sx**2 - Sx*Sy)) 36 | MLlam = -1/dt* np.log((Sxy-mu*Sx-mu*Sy+n*mu**2)/(Sxx-2*mu*Sx+n*mu**2)) 37 | 38 | alpha = np.exp(-lam*dt) 39 | sigmaHat = 1/n * (Syy - 2* alpha* Sxy + alpha **2 * Sxx-2*mu*(1-alpha)*(Sy-alpha*Sx)+n*mu**2 *(1-alpha)**2) 40 | MLsigmaSqrt = np.sqrt(sigmaHat * (2*lam)/(1-alpha**2)) 41 | 42 | print("ML mu for Vasicek is:") 43 | print(MLmu) 44 | print("ML lambda for Vasicek is:") 45 | print(MLlam) 46 | print("ML sigma for Vasicek is:") 47 | print(MLsigmaSqrt) 48 | -------------------------------------------------------------------------------- /vasicek_two_factor/Calibration.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | from scipy.optimize import minimize 5 | 6 | class Calibrator(): 7 | 8 | def __init__(self, method): 9 | self.method = method 10 | 11 | 12 | def swapRates(t, p, matrix): 13 | # SWAPRATES calculates the XXX 14 | # S = swapRates(t, p, matrix) 15 | # 16 | # Arguments: 17 | # t = 18 | # p = 19 | # matrix = 20 | # 21 | # Returns: 22 | # S = 23 | # 24 | # Example: 25 | # 26 | # For more information see SOURCE 27 | 28 | tmax = matrix[-1] 29 | 30 | ttemp = np.arange(0.5, tmax + 0.5, 0.5) 31 | ptemp = np.interp(ttemp, t, p) 32 | 33 | dis = np.cumsum(ptemp) 34 | pmatrix = np.interp(matrix, t, p) 35 | 36 | index = (2 * matrix).astype(int) - 1 37 | S = 100 * 2 * (1 - pmatrix) / dis[index] 38 | 39 | return S 40 | 41 | def rates(t, p, matrix): 42 | # RATES calculates the XXX 43 | # R = rates(t, p, matrix) 44 | # 45 | # Arguments: 46 | # t = 47 | # p = 48 | # matrix = 49 | # 50 | # Returns: 51 | # R = 52 | # 53 | # Example: 54 | # 55 | # For more information see SOURCE 56 | 57 | pmatrix = np.interp(matrix, t, p) 58 | R = 100 * (1. / pmatrix - 1) / matrix 59 | 60 | return R 61 | 62 | def objectiveFunction(params, t, RATES, SWAP): 63 | # OBJECTIVEFUNCTION calculates the XXX 64 | # mse = objectiveFunction(params, t, RATES, SWAP) 65 | # 66 | # Arguments: 67 | # params = 68 | # t = 69 | # RATES = 70 | # SWAP 71 | # 72 | # Returns: 73 | # mse = 74 | # 75 | # Example: 76 | # 77 | # For more information see SOURCE 78 | 79 | r0 = params[0] 80 | a = params[1] 81 | b = params[2] 82 | sigma = params[3] 83 | 84 | p = self.zeroCoupon(t, r0, a, b, sigma) 85 | S = self.swapRates(t, p, SWAP[:,0]) 86 | L = self.rates(t, p, RATES[:,0]) 87 | 88 | rel1 = (S - SWAP[:,1]) / SWAP[:,1] 89 | rel2 = (L - RATES[:,1]) / RATES[:,1] 90 | 91 | mse = np.sum(rel1**2) + np.sum(rel2**2) 92 | 93 | return mse 94 | 95 | def calibration(fun, param_0, t, RATES, SWAP): 96 | # CALIBRATION calculates the XXX 97 | # p, L, S = calibration(fun, param_0, t, RATES, SWAP) 98 | # 99 | # Arguments: 100 | # fun = 101 | # param_0 = 102 | # t = 103 | # RATES = 104 | # SWAP = 105 | # 106 | # Returns: 107 | # p = 108 | # L = 109 | # S = 110 | # 111 | # Example: 112 | # 113 | # For more information see SOURCE 114 | 115 | opt = {'maxiter':1000, 'maxfev':5e3} 116 | solution = minimize(fun, param_0, args = (t, RATES, SWAP, model), method='Nelder-Mead', options=opt) 117 | parameters = np.array(solution.x) 118 | 119 | r_star = parameters[0] 120 | a_star = parameters[1] 121 | b_star = parameters[2] 122 | sigma_star = parameters[3] 123 | 124 | p = self.zeroCoupon(t, r_star, a_star, b_star, sigma_star, model) 125 | R = rates(t, p, RATES[:, 0]) 126 | S = swapRates(t, p, SWAP[:, 0]) 127 | 128 | return p, L, S 129 | 130 | def calibrate(self, rates): 131 | # CALIBRATIE calculates the XXX 132 | # p, E = calibrate(self, rates) 133 | # 134 | # Arguments: 135 | # self = 136 | # rates = 137 | # 138 | # Returns: 139 | # p = 140 | # E = 141 | # 142 | # Example: 143 | # 144 | # For more information see SOURCE 145 | 146 | if self.method == 'Optimize Error': 147 | 148 | p, E = calibrate_Optimize_Error(objectiveFunction, [0.1, 1.0, 1.0, 0.2], 0.1, rates) 149 | 150 | return p, E 151 | 152 | def zeroCoupon(t: float = 1.0, r0: float = 0.01, a: float = 1.0, b: float = 1.0, sigma: float = 0.2): 153 | # ZEROCOUPON calculates the XXX 154 | # zc = zeroCoupon(t, r0, a, b, sigma) 155 | # 156 | # Arguments: 157 | # t = 158 | # r0 = 159 | # a = 160 | # b = 161 | # sigma = 162 | # 163 | # Returns: 164 | # zc = 165 | # 166 | # Example: 167 | # 168 | # For more information see SOURCE 169 | 170 | B = (1 - np.exp(-a * t)) / a 171 | A = (b - sigma**2 / (2 * a**2)) * (B - t) - (sigma**2 / (4 * a)) * B**2 172 | n = len(A) 173 | r = np.repeat(r0, n) 174 | zc = np.exp(A - B * r) 175 | 176 | return zc 177 | -------------------------------------------------------------------------------- /vasicek_two_factor/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 (Mostly) Financial Algorithms 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /vasicek_two_factor/Pricing.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from scipy import integrate 4 | 5 | from Vasicek import BrownianMotion 6 | 7 | class Swaption(object): 8 | 9 | def __init__(self, 10 | type: str, 11 | maturity: float = 1, 12 | exercise_date: float = 0.5, 13 | notional: float = 10**6, 14 | fixed_rate: float = 0.1, 15 | floating_leg_frequency: float = 0.5, 16 | payer: bool = True): 17 | 18 | receiver = not payer 19 | self._maturity = maturity 20 | self._exercise_date = exercise_date 21 | self._notional = notional 22 | self._fixed_rate = fixed_rate 23 | self._floating_leg_frequency = floating_leg_frequency 24 | self._is_payer = payer 25 | self._is_receiver = receiver 26 | self._type = type 27 | 28 | 29 | class ZeroCouponBond(): 30 | 31 | def __init__(self, 32 | maturity): 33 | 34 | self._T = maturity 35 | 36 | def price_Vasicek_Two_Factor(self, r0, a, b, sigma, rho, T, dt, nScen): 37 | # PRICE_VASICEK_TWO_FACTOR calculates the price of a zero cupon bond of maturity T using numeric integration 38 | # price_Vasicek_Two_Factor(self, r0, a, b, sigma, rho, T, dt, nScen) 39 | # 40 | # Arguments: 41 | # self = reference to the current instance of the class. 42 | # r0 = list with 2 floats, starting interest rate of each vasicek process 43 | # a = list with 2 floats, speed of reversion of each process that characterizes the velocity at which such trajectories will regroup around each b 44 | # b = list with 2 floats, long term mean level of each process. All future trajectories of r will evolve around a mean level b in the long run 45 | # sigma = list with 2 floats, instantaneous volatility, amplitude of randomness of each process 46 | # rho = float, specifying the correlation coefficient of the Brownian motion. ex. rho = 0.4 means that two 47 | # Brownian procesess on the same modeling time interval have a correlation coefficient of 0.4. SOURCE 48 | # T = integer specifying the maximum modeling time. ex. if T = 2 then modelling time will run from 0 to 2 49 | # dt = float specifying the length of each subinterval. ex. dt=10, then there will be 10 intervals of length 0.1 between two integers of modeling time 50 | # nScen = number of numeric integrations of which the mean is the price estimation 51 | # 52 | # Returns: 53 | # ZeroCouponBond object with the added property _price containing the price of a zero cupon bond 54 | # 55 | # Example: 56 | # TBD 57 | 58 | interest_rate_simulation = pd.DataFrame() 59 | brownian_motion = BrownianMotion() 60 | for i in range(nScen): 61 | interest_rate_simulation = pd.concat([interest_rate_simulation, 62 | brownian_motion.simulate_Vasicek_Two_Factor(r0, a, b, sigma, rho, T, dt)['Real Interest Rate']],axis = 1) 63 | integral = interest_rate_simulation.apply(integrate.trapz) 64 | self._price = np.mean(np.exp(-integral)) 65 | return self._price 66 | -------------------------------------------------------------------------------- /vasicek_two_factor/README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 | 🐍 Two factor Vasicek model 🐍 4 | 5 |

6 | 7 | A simple model for calculating the nominal interest rates. Used to add inflation to the simulation of interest rates. The model has two sources of randomness (Two correlated Brownian motions) 8 | 9 | ## Problem 10 | When modelling the nominal rate, both the real rate of return and the inflation should be considered. The correlation between them means that one should use a multifactor model as opposed to two independent models. Additionally, there is a robust body of literature showing that both real rates and the inflation are mean-reverting 11 | 12 | ## Solution 13 | The simplest model for modelling real rates and inflation together is the multifactor Vasicek model https://en.wikipedia.org/wiki/Vasicek_model. The Vasicek model is a short rate model describing the evolution of rates. Both the real rate process and the inflation rate process are assumed to follow a Vasicek model. The movement of the two curves is given by a two-dimensional correlated Brownian motion 14 | 15 | ### Input 16 | Vasicek model simulator: 17 | - `r0` ... Starting annualized real rate and inflation rate. ex. if the annualized real rate is 1.4% and inflation is 6%, then r0 = [0.014, 0.06] 18 | - `a` ... mean reversion speed for the real and inflation process. ex. if the reversion factor is 0.8 for real rates and 1 for inflation, a = [0.8, 1] 19 | - `b` ... long term mean level for the real and inflation process. ex. if the long-term real rate is 1% and long term inflation is 1.5%, b = [0.01, 0.015] 20 | - `sigma` ... instantaneous volatility of the real and inflation process. ex. volatility of the real rate process is 5% and inflation process is 4%, sigma = [0.05, 0.04] 21 | - `rho` ... correlation between the stochastic noise that generates the two processess. ex. if the calculated correlation coefficient is 0.t, rho = 0.6 22 | - `T` ... modelling time horizon. ex. if time horizon is 25 years, T = 25 23 | - `dt` ... time increments. ex. time increments are 6 months, dt = 0.5 24 | 25 | Vasicek model pricing: 26 | - `nScen` ... integer, number of simulations of the Monte Carlo method 27 | 28 | Vasicek model calibration: 29 | TBD 30 | 31 | ### Output 32 | Vasicek model simulator: 33 | - `interest_rate_simulation` is a pandas dataframe with one sample path generated by the model. One for the real rate process and the other for the nominal rates (real rate + inflation rate) 34 | 35 | Vasicek model pricing: 36 | - Price of a Zero-Coupon Bond with maturity T based on the model. The technique used is Monte Carlo with 1000 scenarios and numeric integration 37 | 38 | Vasicek model calibration: 39 | TBD 40 | 41 | ## Getting started 42 | ``` python 43 | import numpy as np 44 | import pandas as pd 45 | import datetime as dtm 46 | 47 | from Vasicek import BrownianMotion 48 | from Pricing import ZeroCouponBond 49 | 50 | from IPython.display import display 51 | import matplotlib.pyplot as plt 52 | 53 | # Vasicek model simulator 54 | r0 = [0.014, 0.06] # Starting annual real rate and annual inflation rate 55 | a = [0.8, 1] # mean reversion speed for real rate and inflation 56 | b = [0.01, 0.015] # long term trend for real rate and inflation 57 | sigma = [0.05, 0.04] # annualized volatility of real rate and inflation process 58 | rho = 0.6 # correlation 59 | T = 25 # time horizon 60 | dt = 0.5 # time increment 61 | 62 | brownian = BrownianMotion() 63 | interest_rate_simulation = brownian.simulate_Vasicek_Two_Factor(r0, a, b, sigma, rho, T, dt) 64 | 65 | display(interest_rate_simulation) 66 | 67 | interest_rate_simulation.plot(figsize = (15,9), grid = True) 68 | plt.legend() 69 | plt.show() 70 | 71 | # Vasicek model pricing of a Zero-Coupon Bond 72 | nScen = 1000 73 | zero_coupon_bond = ZeroCouponBond(1) 74 | zero_coupon_bond.price(r0, a, b, sigma, rho, T, dt, nScen) 75 | print(zero_coupon_bond._price) 76 | 77 | # Calibration of the Vasicek model 78 | # Defining a zero curve for the example 79 | # Dates = [[2010,1,1], [2011,1,1], [2013,1,1], [2015,1,1], [2017,1,1], [2020,1,1], [2030,1,1]] 80 | # curveDates = [] 81 | # for date in Dates: 82 | # curveDates.append(dtm.date(date[0],date[1],date[2])) 83 | # 84 | # zeroRates = np.array([1.0, 1.9, 2.6, 3.1, 3.5, 4.0, 4.3])/100 85 | 86 | # plt.figure(figsize = (15,9)) 87 | # plt.plot(curveDates,zeroRates) 88 | # plt.title('Yield Curve for ' + str(curveDates[0])) 89 | # plt.xlabel('Date') 90 | # plt.ylabel('Rate') 91 | # plt.show() 92 | # STILL TO DO 93 | ``` 94 | -------------------------------------------------------------------------------- /vasicek_two_factor/Vasicek.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from typing import Any 4 | from typing import List 5 | 6 | class BrownianMotion(): 7 | 8 | def __init__(self, x0: float = 0) -> None: 9 | 10 | self.x0 = float(x0) 11 | 12 | def generate_weiner_process(self, T: int = 1, dt: float = 0.001, rho: float = None) -> Any: 13 | # GENERATE_WEINER_PROCESS calculates the sample paths of a one-dimensional Brownian motion or a two-dimensional Brownian motion with a correlation coefficient of rho. 14 | # The function's output are two sample paths (realisations) of such a process, recorded on increments specified by dt. 15 | # W = generate_weiner_process(self, T, dt, rho) 16 | # 17 | # Arguments: 18 | # self = reference to the current instance of the class. This class includes the x0 parameter that specifies the starting value of the Brownian motion 19 | # T = integer, specifying the maximum modeling time. ex. if T = 2 then modelling time will run from 0 to 2 20 | # dt = float, specifying the length of each subinterval. ex. dt=10, then there will be 10 intervals of length 0.1 between two integers of modeling time 21 | # rho = float, specifying the correlation coefficient of the Brownian motion. ex. rho = 0.4 means that two 22 | # Brownian procesess on the same modeling time interval have a correlation coefficient of 0.4. SOURCE 23 | # 24 | # Returns: 25 | # W = N x 1 or N x 2 ndarray, where N is the number of subintervals, and the second dimension is eiter 1 or 2 depending if the function is called 26 | # to generate a one or two dimensional Brownian motion. Each column represents a sample path of a Brownian motion starting at x0 27 | # 28 | # Example: 29 | # The user wants to generate discreete sample paths of two Brownian motions with a correlation coefficient of 0.4. 30 | # The Brownian motions needs to start at 0 at time 0 and on for 3 units of time with an increment of 0.5. 31 | # 32 | # import numpy as np 33 | # from typing import Any 34 | # generate_weiner_process(0, 3, 0.5, 0.4) 35 | # [out] = [array([ 0. , -0.07839855, 0.26515158, 1.15447737, 1.04653442, 36 | # 0.81159737]), 37 | # array([ 0. , -0.78942881, -0.84976461, -1.06830757, -1.21829101, 38 | # -0.61179385])] 39 | # 40 | # Ideas for improvement: 41 | # Remove x0 as a necessary argument 42 | # Generate increments directly 43 | # 44 | # For more information see https://en.wikipedia.org/wiki/Brownian_motion 45 | 46 | N = int(T / dt) # number of subintervals of length 1/dt between 0 and max modeling time T 47 | 48 | if not rho: # if rho is empty, assume uncorrelated Brownian motion 49 | 50 | W = np.ones(N) * self.x0 # preallocate the output array holding the sample paths with the inital point 51 | 52 | for iter in range(1, N): # add a random normal increment at every step 53 | 54 | W[iter] = W[iter-1] + np.random.normal(scale = dt) 55 | 56 | return W 57 | 58 | if rho: # if rho is defined, that means that the output will be a 2-dimensional Brownian motion 59 | 60 | W_1 = np.ones(N) * self.x0 # preallocate the output array holding the sample paths with the inital point 61 | W_2 = np.ones(N) * self.x0 # preallocate the output array holding the sample paths with the inital point 62 | 63 | for iter in range(1, N): # generate two independent BMs and entangle them with the formula from SOURCE 64 | 65 | Z1 = np.random.normal(scale = dt) 66 | Z2 = np.random.normal(scale = dt) 67 | Z3 = rho * Z1 + np.sqrt(1 - rho**2) * Z2 68 | 69 | W_1[iter] = W_1[iter-1] + Z1 # Generate first BM 70 | W_2[iter] = W_2[iter-1] + Z3 # Generate second BM 71 | 72 | return [W_1, W_2] 73 | 74 | def simulate_Vasicek_Two_Factor(self, r0: List[float] = [0.1, 0.1], a: List[float] = [1.0, 1.0], b: List[float] = [0.1, 0.1], sigma: List[float] = [0.2, 0.2], rho: float = 0.5, T: int = 52, dt: float = 0.1) -> pd.DataFrame: 75 | # SIMULATE_VASICEK_TWO_FACTOR calculates a posible sample path of the nominal interest rate by simulating the real rate and inflation. Both are assumed to follow a mean-reverting vasicek process 76 | # interest_rate_simulation = simulate_Vasicek_Two_Factor(self, r0, a, b, sigma, rho, T, dt) 77 | # 78 | # Arguments: 79 | # self = reference to the current instance of the class. This class includes the x0 parameter that specifies the starting value of the Brownian motion 80 | # r0 = list with 2 floats, starting interest rate of each vasicek process 81 | # a = list with 2 floats, speed of reversion of each process that characterizes the velocity at which such trajectories will regroup around each b 82 | # b = list with 2 floats, long term mean level of each process. All future trajectories of r will evolve around a mean level b in the long run 83 | # sigma = list with 2 floats, instantaneous volatility, amplitude of randomness of each process 84 | # rho = float, specifying the correlation coefficient of the Brownian motion. ex. rho = 0.4 means that two 85 | # Brownian procesess on the same modeling time interval have a correlation coefficient of 0.4. SOURCE 86 | # T = integer specifying the maximum modeling time. ex. if T = 2 then modelling time will run from 0 to 2 87 | # dt = float specifying the length of each subinterval. ex. dt=10, then there will be 10 intervals of length 0.1 between two integers of modeling time 88 | # 89 | # Returns: 90 | # interest_rate_simulation = pandas dataframe with 3 columns. First is modelling time, second is the Nominal interest rate and the third is the Real interest rate 91 | # 92 | # Example: 93 | # 94 | # import numpy as np 95 | # import pandas as pd 96 | # from typing import any 97 | # simulate_Vasicek_Two_Factor([0.1, 0.2], [1.0, 0.5],[0.1, 0.2], [0.2, 0.2], 0.5, 52,0.1) 98 | # [out] pandas dataframes with 3 columns and 520 rows 99 | # 100 | # For more information see SOURCE 101 | 102 | N = int(T / dt) # number of subintervals of length 1/dt between 0 and max modeling time T 103 | 104 | time, delta_t = np.linspace(0, T, num = N, retstep = True) # time is a series from 0 to T 105 | 106 | weiner_process = self.generate_weiner_process(T, dt, rho) # This method generates increments from a Weiner process (more commonly known as a Brownian Motion) 107 | 108 | weiner_process_e = weiner_process[0] 109 | weiner_process_s = weiner_process[1] 110 | 111 | r_e, s = np.ones(N) * r0[0], np.ones(N) * r0[1] 112 | 113 | a_e, a_s = a[0], a[1] 114 | 115 | b_e, b_s = b[0], b[1] 116 | 117 | sigma_e, sigma_s = sigma[0], sigma[1] 118 | 119 | for t in range(1,N): 120 | r_e[t] = r_e[t-1] + a_e * (b_e - r_e[t-1]) * dt + sigma_e * (weiner_process_e[t] - weiner_process_e[t-1]) # Real interest rate 121 | s[t] = s[t-1] + a_s * (b_s - s[t-1]) * dt + sigma_s * (weiner_process_s[t] - weiner_process_s[t-1]) # Inflation rate 122 | 123 | r_s = r_e + s # Nominal interest rate as real interest rate plus inflation 124 | 125 | dict = {'Time' : time, 'Real Interest Rate' : r_e, 'Nominal Interest Rate' : r_s} 126 | 127 | interest_rate_simulation = pd.DataFrame.from_dict(data = dict) 128 | interest_rate_simulation.set_index('Time', inplace = True) 129 | 130 | return interest_rate_simulation 131 | -------------------------------------------------------------------------------- /vasicek_two_factor/main.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import datetime as dt 4 | 5 | from Vasicek import BrownianMotion 6 | from Pricing import Swaption 7 | from Pricing import ZeroCouponBond 8 | from Calibration import Calibrator 9 | 10 | from IPython.display import display 11 | import matplotlib.pyplot as plt 12 | 13 | brownian = BrownianMotion() 14 | interest_rate_simulation = brownian.simulate_Vasicek_Two_Factor() 15 | 16 | display(interest_rate_simulation) 17 | 18 | interest_rate_simulation.plot(figsize = (15,9), grid = True) 19 | plt.legend() 20 | plt.show() 21 | 22 | # Defining a zero curve for the example 23 | Dates = [[2010,1,1], [2011,1,1], [2013,1,1], [2015,1,1], [2017,1,1], [2020,1,1], [2030,1,1]] 24 | curveDates = [] 25 | for date in Dates: 26 | curveDates.append(dt.date(date[0],date[1],date[2])) 27 | 28 | zeroRates = np.array([1.0, 1.9, 2.6, 3.1, 3.5, 4.0, 4.3])/100 29 | 30 | plt.figure(figsize = (15,9)) 31 | plt.plot(curveDates,zeroRates) 32 | plt.title('Yield Curve for ' + str(curveDates[0])) 33 | plt.xlabel('Date') 34 | plt.ylabel('Rate') 35 | plt.show() 36 | 37 | zero_coupon_bond = ZeroCouponBond(1) 38 | zero_coupon_bond.price() 39 | print(zero_coupon_bond._price) 40 | --------------------------------------------------------------------------------