├── marketmoodring ├── __init__.py ├── tools │ ├── __init__.py │ ├── data_checks.py │ └── portfolio_opt.py ├── regime_detection │ ├── __init__.py │ ├── hidden_markov.py │ ├── base.py │ └── wasserstein_kmeans.py └── portfolio_optimization │ ├── __init__.py │ ├── base.py │ ├── simple_factor_model.py │ ├── factor_model.py │ ├── idosyncratic_factor_model.py │ └── joint_stochastic_prog.py ├── requirements.txt ├── reference └── project_paper.pdf ├── environment.yml ├── CITATION.cff ├── LICENSE ├── .gitignore └── README.md /marketmoodring/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /marketmoodring/tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | hmmlearn==0.3.* 3 | pandas 4 | pot==0.9.* 5 | pyportfolioopt 6 | statsmodels -------------------------------------------------------------------------------- /reference/project_paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yvesdhondt/MarketMoodRing/HEAD/reference/project_paper.pdf -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: marketmoodring-env 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python 6 | - numpy 7 | - hmmlearn==0.3.* 8 | - pandas 9 | - pot==0.9.* 10 | - pyportfolioopt 11 | - statsmodels -------------------------------------------------------------------------------- /marketmoodring/regime_detection/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | API reference documentation for the regime detection models 3 | """ 4 | from marketmoodring.regime_detection.base import RegimeDetectionError 5 | from marketmoodring.regime_detection.hidden_markov import HiddenMarkovRegimeDetection 6 | from marketmoodring.regime_detection.wasserstein_kmeans import WassersteinKMeansRegimeDetection 7 | -------------------------------------------------------------------------------- /marketmoodring/portfolio_optimization/__init__.py: -------------------------------------------------------------------------------- 1 | from marketmoodring.portfolio_optimization.base import PortfolioOptimizationError 2 | from marketmoodring.portfolio_optimization.simple_factor_model import SimpleFactorPortfolioOptimization 3 | from marketmoodring.portfolio_optimization.idosyncratic_factor_model import IdiosyncraticFactorPortfolioOptimization 4 | from marketmoodring.portfolio_optimization.joint_stochastic_prog import JointStochasticProgOptimization 5 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use this software, please cite it as below." 3 | authors: 4 | - family-names: "D'hondt" 5 | given-names: "Yves Alexander" 6 | - family-names: "Di Venti" 7 | given-names: "Matteo Mario" 8 | - family-names: "Rishi" 9 | given-names: "Rohan" 10 | - family-names: "Walker" 11 | given-names: "Jackson" 12 | title: "MarketMoodRing" 13 | version: 1.0.0 14 | date-released: 2023-07-27 15 | url: "https://github.com/yvesdhondt/MarketMoodRing" 16 | -------------------------------------------------------------------------------- /marketmoodring/tools/data_checks.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from typing import Union 4 | 5 | 6 | def reconcile_regime_data_arg(data: Union[np.ndarray, pd.DataFrame]) -> np.array: 7 | """ 8 | Internal function to verify data types and transform into numpy arrays 9 | 10 | Parameters 11 | ---------- 12 | data : np.ndarray or pd.DataFrame 13 | Data to check 14 | 15 | Returns 16 | ------- 17 | (data, index) : (np.ndarray, np.ndarray) 18 | Reconciled data 19 | """ 20 | if isinstance(data, np.ndarray): 21 | return data, None 22 | elif isinstance(data, pd.DataFrame): 23 | if isinstance(data.index, pd.DatetimeIndex): 24 | return data.to_numpy(), data.index.to_numpy() 25 | else: 26 | return data.to_numpy(), None 27 | else: 28 | raise ValueError("data must be a numpy Array or a pandas DataFrame") 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Yves D'hondt, Jackson Walker, Rohan Rishi, Matteo Di Venti 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /marketmoodring/portfolio_optimization/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | import numpy as np 3 | from typing import Union 4 | import pandas as pd 5 | 6 | 7 | class PortfolioOptimizationError(Exception): 8 | pass 9 | 10 | 11 | class PortfolioOptimizationBase(ABC): 12 | """ 13 | A base class for portfolio optimization. 14 | This class is not meant to be used directly, but rather to be inherited by other classes. 15 | The purpose of this class is to provide a common interface for all regime dependent optimization classes. 16 | 17 | The following methods must be implemented by any class that inherits this class: 18 | - calculate_weights 19 | """ 20 | 21 | def __init__(self, n_regimes, *args, **kwargs): 22 | self.n_regimes = n_regimes 23 | 24 | @abstractmethod 25 | def calculate_weights(self, fitted_states: np.ndarray, trans_mat: np.ndarray, 26 | index_data: Union[np.ndarray, pd.DataFrame], *args, **kwargs): 27 | """ 28 | Abstract method to calculate weights. 29 | 30 | Parameters: 31 | ---------- 32 | fitted_states : np.ndarray 33 | Array of fitted states. 34 | 35 | trans_mat : np.ndarray 36 | Transition matrix. 37 | 38 | index_data : Union[np.ndarray, pd.DataFrame] 39 | Index data used for weight calculation. 40 | 41 | *args, **kwargs : 42 | Additional arguments and keyword arguments. 43 | 44 | Returns: 45 | ------- 46 | None 47 | """ 48 | pass 49 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | .idea/ 163 | -------------------------------------------------------------------------------- /marketmoodring/portfolio_optimization/simple_factor_model.py: -------------------------------------------------------------------------------- 1 | from marketmoodring.portfolio_optimization.factor_model import FactorPortfolioOptimization 2 | import pandas as pd 3 | from statsmodels import api as sm 4 | from typing import Union 5 | import numpy as np 6 | 7 | 8 | class SimpleFactorPortfolioOptimization(FactorPortfolioOptimization): 9 | 10 | def _get_regime_based_mean_cov(self, index_data, factor_data, trans_mat: Union[np.ndarray, pd.DataFrame], 11 | fitted_states): 12 | """ 13 | Get the regime-dependent expected return vector and variance covariance matrix of the given assets according 14 | to the factor-based model proposed by Costa & Kwon (2019). Here idiosyncratic risk is assumed to be independent 15 | of regime shifts. 16 | 17 | Parameters 18 | ---------- 19 | index_data : pandas.DataFrame 20 | A DataFrame with the time series asset returns. 21 | factor_data : pandas.DataFrame 22 | A DataFrame with the time series factor returns. 23 | trans_mat : Union[numpy.ndarray, pandas.DataFrame] 24 | The transition probability matrix of the regime switching model. 25 | fitted_states : numpy.ndarray[numpy.int64] 26 | The fitted states of the regime switching model. 27 | 28 | Returns 29 | ------- 30 | Tuple[numpy.ndarray[numpy.float64], numpy.ndarray[numpy.float64]] 31 | A tuple (mu, sigma) containing the expected return vector and covariance matrix of the assets. 32 | """ 33 | factor_names = factor_data.columns 34 | n_factors = len(factor_names) 35 | current_state = int(fitted_states[-1]) 36 | 37 | Y = index_data.copy() 38 | X = factor_data.copy() 39 | 40 | # Transform factors by indicator function to allow for OLS estimation of regime-dependent FF3 model 41 | X["state"] = fitted_states 42 | state_factors = {} 43 | for state in range(self.n_regimes): 44 | state_factors[state] = {"names": []} 45 | for fn in factor_names: 46 | X[fn + "_" + str(state)] = X[[fn, "state"]].apply(lambda x: x[0] if x[1] == state else 0, axis=1) 47 | state_factors[state]["names"].append(fn + "_" + str(state)) 48 | 49 | x_names = [] 50 | for state in range(self.n_regimes): 51 | x_names += state_factors[state]["names"] 52 | 53 | X = X[x_names] 54 | 55 | # Fit regime-dependent Factor model 56 | ols = sm.OLS(Y, sm.add_constant(X)).fit() 57 | 58 | # Define parameters 59 | alpha = ols.params.values[0:1, :].reshape(-1, 1) 60 | 61 | for state in range(self.n_regimes): 62 | state_factors[state]["V"] = ols.params.values[1 + state * n_factors: 1 + (1 + state) * n_factors, :] 63 | state_factors[state]["F"] = factor_data[fitted_states == state].cov().values 64 | state_factors[state]["f_bar"] = factor_data[fitted_states == state].mean().values.reshape(-1, 1) 65 | 66 | D = ols.resid.cov().values 67 | 68 | # Construct regime-dependent expected return and variance-covariance matrices 69 | mu = alpha 70 | sigma = D 71 | for state in range(self.n_regimes): 72 | # update mu 73 | mu += trans_mat[current_state][state] * state_factors[state]["V"].T @ state_factors[state]["f_bar"] 74 | 75 | # update sigma 76 | sigma += trans_mat[current_state][state] \ 77 | * state_factors[state]["V"].T @ state_factors[state]["F"] @ state_factors[state]["V"] \ 78 | + trans_mat[current_state][state] * (1 - trans_mat[current_state][state]) \ 79 | * state_factors[state]["V"].T @ state_factors[state]["f_bar"] @ state_factors[state]["f_bar"].T \ 80 | @ state_factors[state]["V"] 81 | for other_state in range(self.n_regimes): 82 | if other_state == state: 83 | continue 84 | sigma -= trans_mat[current_state][state] * trans_mat[current_state][other_state] \ 85 | * state_factors[state]["V"].T @ state_factors[state]["f_bar"] \ 86 | @ state_factors[other_state]["f_bar"].T @ state_factors[other_state]["V"] 87 | 88 | return mu.reshape(-1, 1), sigma 89 | 90 | def __str__(self): 91 | return "SimpleFactorOpt" 92 | 93 | -------------------------------------------------------------------------------- /marketmoodring/regime_detection/hidden_markov.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from typing import Union 4 | import importlib 5 | 6 | from marketmoodring.regime_detection.base import (RegimeDetectionBase, 7 | RegimeDetectionError) 8 | 9 | 10 | class HiddenMarkovRegimeDetection(RegimeDetectionBase): 11 | hmm = None 12 | 13 | def __init__(self, n_regimes: int, hmm_type: str = "GaussianHMM", n_iter: int = 50, covar_type: str = "diag", 14 | *args, **kwargs) -> None: 15 | """ 16 | A class to handle regime detection through Gaussian HMM. 17 | 18 | Parameters 19 | ---------- 20 | n_regimes : int 21 | The number of regimes to transform the data to 22 | hmm_type : str 23 | The type of HMM model to use, options are "GaussianHMM" or "GMMHMM". 24 | n_iter : int 25 | The number of iterations to run the HMM model 26 | covar_type : str 27 | The type of covariance matrix to use, options are "spherical", "full", "diag" or "tied" 28 | """ 29 | super().__init__(n_regimes, *args, **kwargs) 30 | 31 | if hmm_type not in ("GaussianHMM", "GMMHMM"): 32 | raise RegimeDetectionError("given type is not one of the available options") 33 | if n_iter < 0: 34 | raise RegimeDetectionError("n_iter has to be larger than 0") 35 | 36 | self._type = hmm_type 37 | self._n_iter = n_iter 38 | self._covar_type = covar_type 39 | # Conditional import, only import if class is ever created 40 | if HiddenMarkovRegimeDetection.hmm is None: 41 | HiddenMarkovRegimeDetection.hmm = importlib.import_module( 42 | "hmmlearn.hmm" 43 | ) 44 | 45 | def _fit(self, data: np.ndarray, index: np.ndarray = None, *args, **kwargs): 46 | """ 47 | Fit a Gaussian HMM or GMM HMM on the given data. 48 | 49 | Parameters 50 | ---------- 51 | data : numpy.ndarray 52 | A matrix of time series data to fit the HMM on. 53 | index : numpy.ndarray, optional 54 | The time index of the time series data. 55 | *args : tuple 56 | Any additional positional arguments from the superclass, if any. 57 | **kwargs : dict 58 | Any additional keyword arguments from the superclass, if any. 59 | 60 | Returns 61 | ------- 62 | None 63 | This method does not return anything, but it sets the following instance variables: 64 | 65 | _model : hmmlearn.hmm.GaussianHMM or hmmlearn.hmm.GMMHMM 66 | The fitted HMM model. 67 | _fitted_states : numpy.ndarray 68 | The fitted states of the HMM model. 69 | _fitted_states_proba : numpy.ndarray 70 | The fitted state probabilities of the HMM model. 71 | _trans_mat : numpy.ndarray 72 | The transition probability matrix of the HMM model. 73 | """ 74 | # Create HMM Model 75 | if self._type == "GaussianHMM": 76 | self._model = self.hmm.GaussianHMM( 77 | n_components=self.n_regimes, n_iter=self._n_iter, covariance_type=self._covar_type, random_state=None 78 | ) 79 | elif self._type == "GMMHMM": 80 | self._model = self.hmm.GMMHMM( 81 | n_components=self.n_regimes, n_iter=self._n_iter, covariance_type=self._covar_type, random_state=None 82 | ) 83 | 84 | # Fit model 85 | self._model.fit(data) 86 | 87 | # Predict and store output of training data 88 | self._fitted_states = self._model.predict(data) 89 | self._fitted_states_proba = self._model.predict_proba(data) 90 | self._trans_mat = self._model.transmat_ 91 | 92 | def _transform(self, data: np.ndarray, *args, **kwargs): 93 | if not self._fit_called: 94 | raise RegimeDetectionError("fit must be called before transforming") 95 | 96 | transformed_states = self._model.predict(data) 97 | transformed_states_proba = self._model.predict_proba(data) 98 | 99 | return transformed_states, transformed_states_proba 100 | 101 | def fit_transform(self, data: Union[np.ndarray, pd.DataFrame], *args, **kwargs): 102 | self.fit(data, *args, **kwargs) 103 | 104 | return self._fitted_states, self._fitted_states_proba 105 | 106 | def __str__(self): 107 | return self._type 108 | -------------------------------------------------------------------------------- /marketmoodring/portfolio_optimization/factor_model.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | import numpy as np 3 | from typing import Union 4 | import pandas as pd 5 | from marketmoodring.portfolio_optimization.base import PortfolioOptimizationBase, PortfolioOptimizationError 6 | from marketmoodring.tools.portfolio_opt import ERC, MVO 7 | 8 | 9 | class FactorPortfolioOptimization(PortfolioOptimizationBase, ABC): 10 | def __init__(self, n_regimes, optimizer="ERC", *args, **kwargs): 11 | """ 12 | Initialize a factor portfolio optimizer. 13 | 14 | Parameters 15 | ---------- 16 | n_regimes : int 17 | The number of regimes used in the portfolio optimization. 18 | optimizer : str 19 | The portfolio optimization algorithm to use. Options are "ERC" and "MVO". 20 | *args : tuple 21 | Any additional positional arguments from the superclass, if any. 22 | **kwargs : dict 23 | Any additional keyword arguments from the superclass, if any. 24 | 25 | Raises 26 | ------ 27 | ValueError 28 | If the given optimizer is not one of "ERC" or "MVO". 29 | """ 30 | super().__init__(n_regimes, *args, **kwargs) 31 | if optimizer == "ERC": 32 | self._optimizer = ERC() 33 | elif optimizer == "MVO": 34 | self._optimizer = MVO() 35 | else: 36 | raise ValueError("Given optimizer is not one of ERC or MVO") 37 | 38 | def calculate_weights(self, fitted_states: np.ndarray, trans_mat: np.ndarray, 39 | index_data: Union[np.ndarray, pd.DataFrame], 40 | factor_data: Union[np.ndarray, pd.DataFrame] = None, *args, **kwargs): 41 | """ 42 | Calculate equal risk contribution (ERC) portfolio weights using a regime-dependent factor model with static 43 | idiosyncratic risk. 44 | 45 | Parameters 46 | ---------- 47 | fitted_states : numpy.ndarray 48 | An array of fitted state labels. 49 | trans_mat : numpy.ndarray 50 | A matrix of regime transition probabilities. 51 | index_data : numpy.ndarray or pandas.DataFrame 52 | A matrix of time series returns of the assets to be included in the portfolio. This time-series has to 53 | match 1:1 with the fitted_states labels. 54 | factor_data : numpy.ndarray or pandas.DataFrame, optional 55 | A matrix of time series returns of the factors to be used for the regime-dependent factor model. This 56 | time-series has to match 1:1 with the fitted_states labels. 57 | *args : tuple 58 | Any additional positional arguments from the superclass, if any. 59 | **kwargs : dict 60 | Any additional keyword arguments from the superclass, if any. 61 | 62 | Returns 63 | ------- 64 | numpy.ndarray 65 | An array of equal risk contribution (ERC) portfolio weights for the given assets using the simple factor 66 | regime model. 67 | """ 68 | mean, cov = self._get_regime_based_mean_cov(index_data, factor_data, trans_mat, fitted_states) 69 | 70 | return self._optimizer.calculate_weights(mean, cov) 71 | 72 | @abstractmethod 73 | def _get_regime_based_mean_cov(self, index_data, factor_data, trans_mat: Union[np.ndarray, pd.DataFrame], 74 | fitted_states): 75 | """ 76 | Get the regime-dependent expected return vector and variance covariance matrix of the given assets according 77 | to the factor-based model proposed by Costa & Kwon (2020). 78 | 79 | Parameters 80 | ---------- 81 | index_data : numpy.ndarray or pandas.DataFrame 82 | A matrix of time series asset returns. 83 | factor_data : numpy.ndarray or pandas.DataFrame 84 | A matrix of time series factor returns. 85 | trans_mat : numpy.ndarray or pandas.DataFrame 86 | The transition probability matrix of the regime switching model. 87 | fitted_states : numpy.ndarray 88 | The fitted states of the regime switching model. 89 | 90 | Returns 91 | ------- 92 | tuple 93 | A tuple (mu, sigma) containing the expected return vector and covariance matrix of the assets. 94 | 95 | mu : numpy.ndarray 96 | The regime-dependent expected return vector of the assets. 97 | sigma : numpy.ndarray 98 | The regime-dependent covariance matrix of the assets. 99 | """ 100 | pass 101 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MarketMoodRing🎭 2 | 3 | ## Description 4 | 5 | MarketMoodRing🎭 is a Python package designed for testing different regime detection models and portfolio optimizers. This tool is a product of research conducted by the UC Berkeley, Haas School of Business, Master of Financial Engineering, 2023. It aims to provide a framework for financial market regime analysis and portfolio management testing. 6 | 7 | ## Collaborators 8 | - [Yves D'hondt](https://github.com/yvesdhondt) 9 | - [Matteo Di Venti](https://github.com/MatteoMarioDiVenti) 10 | - [Rohan Rishi](https://github.com/RohanRishi) 11 | - [Jackson Walker](https://github.com/jacksonrgwalker/]) 12 | 13 | ## Reference 14 | 15 | The `\reference` folder contains the [project paper](reference/project_paper.pdf) from where this library originated. 16 | 17 | ## Features 18 | 19 | - **Regime Detection Models**: Currently includes Hidden Markov Models (HMM) and Wasserstein K-Means clustering (WKM). 20 | - **Portfolio Optimizers**: Implements different portfolio optimization strategies, including stochastic programming and factor-based optimization. 21 | 22 | 23 | ## Installation 24 | 25 | As of now, the MarketMoodRing package is not available on PyPI. To install and use this package, you need to clone the repository and reference it locally. Here are the steps to do so: 26 | 27 | 1. Clone the repository: 28 | 29 | ```bash 30 | git clone https://github.com/yvesdhondt/MarketMoodRing.git 31 | ``` 32 | 33 | 2. Navigate to the cloned directory: 34 | 35 | ```bash 36 | cd MarketMoodRing 37 | ``` 38 | 39 | 3. Now, you can import and use the package in your Python scripts. Make sure your script is in the same directory as the cloned repository or adjust the Python path accordingly. 40 | 41 | ```python 42 | import sys 43 | sys.path.insert(0, '/path/to/MarketMoodRing') 44 | 45 | from marketmoodring.regime_detection import HiddenMarkovRegimeDetection 46 | ``` 47 | 48 | Please replace `/path/to/MarketMoodRing` with the actual path to the cloned repository on your system. 49 | 50 | Remember to keep the repository updated with: 51 | 52 | ```bash 53 | git pull origin main 54 | ``` 55 | 56 | ## Dependencies 57 | 58 | The MarketMoodRing package requires several dependencies to function properly. These dependencies can be installed using either conda (recommended) or pip. 59 | 60 | ### Using Conda 61 | 62 | If you're using Conda, you can create a new environment and install all dependencies using the `environment.yml` file located in the root directory. Run the following command in your terminal / Anaconda Prompt once you've navigated to the cloned repository: 63 | 64 | ```bash 65 | conda env create -f environment.yml 66 | ``` 67 | 68 | This will create a new Conda environment called `marketmoodring-env` and install all necessary packages. To activate the environment, use: 69 | 70 | ```bash 71 | conda activate marketmoodring-env 72 | ``` 73 | 74 | ### Using pip 75 | 76 | If you prefer using pip, you can install all dependencies using the `requirements.txt` file also located in the root directory. Run the following command in your terminal: 77 | 78 | ```bash 79 | pip install -r requirements.txt 80 | ``` 81 | 82 | This will install all the necessary packages listed in the `requirements.txt` file. Please note that this will install the packages globally on your system, which can result in unexpected behavior. If you want to install the packages in a virtual environment, please refer to the [Python documentation](https://docs.python.org/3/tutorial/venv.html), or use the Conda environment as described above. 83 | 84 | ## Usage 85 | 86 | ```python 87 | from marketmoodring.regime_detection import HiddenMarkovRegimeDetection 88 | from marketmoodring.portfolio_optimization import JointStochasticProgOptimization 89 | 90 | # Read in your data 91 | index_data = pd.read_csv('path/to/index_data.csv') 92 | 93 | # Initialize regime detection model 94 | hmm_model = HiddenMarkovRegimeDetection(n_regimes=2, hmm_type='GMMHMM', covar_type="diag", n_iter=100) 95 | 96 | # Fit the model to your data and predict regimes 97 | fitted_states, fitted_states_proba = regime_model.fit_transform(index_data) 98 | 99 | # Initialize portfolio optimizer 100 | opt_model = JointStochasticProgOptimization(n_regimes=2, objective="max_avg_sharpe") 101 | 102 | # Fit the optimizer to your data and regime predictions and calculate portfolio weights 103 | weights = opt_model.calculate_weights( 104 | fitted_states = fitted_states, 105 | trans_mat = hmm_model.get_trans_mat() 106 | index_data = index_data, 107 | ) 108 | ``` 109 | 110 | ## License 111 | 112 | This project is licensed under the MIT License. See [LICENSE](LICENSE) for more details. 113 | -------------------------------------------------------------------------------- /marketmoodring/portfolio_optimization/idosyncratic_factor_model.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | import numpy as np 4 | import pandas as pd 5 | import statsmodels.api as sm 6 | 7 | from marketmoodring.portfolio_optimization.factor_model import FactorPortfolioOptimization 8 | 9 | 10 | class IdiosyncraticFactorPortfolioOptimization(FactorPortfolioOptimization): 11 | def _get_regime_based_mean_cov(self, index_data, factor_data, trans_mat: Union[np.ndarray, pd.DataFrame], 12 | fitted_states): 13 | """ 14 | Get the regime-dependent expected return vector and variance covariance matrix of the given assets according 15 | to the factor-based model proposed by Costa & Kwon (2020). Here idiosyncratic risk is assumed to be regime- 16 | dependant and factors are centered before being used. 17 | 18 | Parameters 19 | ---------- 20 | index_data : pandas.DataFrame 21 | A data frame with the time series asset returns. 22 | factor_data : pandas.DataFrame 23 | A data frame with the time series factor returns. 24 | trans_mat : numpy.ndarray or pandas.DataFrame 25 | The transition probability matrix of the regime switching model. 26 | fitted_states : numpy.ndarray 27 | The fitted states of the regime switching model. 28 | 29 | Returns 30 | ------- 31 | tuple 32 | A tuple (mu, sigma) containing the expected return vector and covariance matrix of the assets. 33 | 34 | mu : numpy.ndarray 35 | The regime-dependent expected return vector of the assets. 36 | sigma : numpy.ndarray 37 | The regime-dependent covariance matrix of the assets. 38 | """ 39 | factor_names = factor_data.columns 40 | n_factors = len(factor_names) 41 | current_state = int(fitted_states[-1]) 42 | 43 | Y = index_data.copy() 44 | X = factor_data.copy() 45 | # De-mean factors 46 | X = X - np.mean(X, axis=0) 47 | 48 | ols, state_factors = self._build_factor_model(X, Y, factor_names, fitted_states) 49 | 50 | for state in range(self.n_regimes): 51 | state_factors[state]["V"] = ols.params.values[state * (n_factors + 1): (1 + state) * (n_factors + 1) - 1, :] 52 | state_factors[state]["F"] = factor_data[fitted_states == state].cov().values 53 | state_factors[state]["mu"] = ols.params.values[(1 + state) * (n_factors + 1) - 1, :] 54 | state_factors[state]["D"] = ols.resid[fitted_states == state].cov().values 55 | 56 | # Construct regime-dependent expected return and variance-covariance matrices 57 | mu = 0 58 | sigma = 0 59 | for state in range(self.n_regimes): 60 | # update mu 61 | mu += trans_mat[current_state][state] * state_factors[state]["mu"] 62 | 63 | # update sigma 64 | sigma += trans_mat[current_state][state] \ 65 | * ( 66 | state_factors[state]["V"].T @ state_factors[state]["F"] @ state_factors[state]["V"] 67 | + state_factors[state]["D"] 68 | ) + trans_mat[current_state][state] * (1 - trans_mat[current_state][state]) \ 69 | * state_factors[state]["mu"] @ state_factors[state]["mu"].T 70 | 71 | for other_state in range(self.n_regimes): 72 | if other_state == state: 73 | continue 74 | sigma -= trans_mat[current_state][state] * trans_mat[current_state][other_state] \ 75 | * state_factors[state]["mu"] @ state_factors[state]["mu"].T 76 | 77 | return mu.reshape(-1, 1), sigma 78 | 79 | def _build_factor_model(self, X, Y, factor_names, fitted_states): 80 | """ 81 | Build a regime-factor model of target asset returns (Y) to factors (X) for the given factor_names and 82 | fitted_states. Alpha is regime-dependent in this model. 83 | 84 | Parameters 85 | ---------- 86 | X : numpy.ndarray 87 | A matrix of factor returns. 88 | Y : numpy.ndarray 89 | A matrix of target asset returns. 90 | factor_names : list of str 91 | A list of names of the factors in the factor model. 92 | fitted_states : numpy.ndarray 93 | An array of regime labels. 94 | 95 | Returns 96 | ------- 97 | tuple 98 | A tuple consisting of an OLS model and a dictionary of state-factor information. 99 | 100 | ols : statsmodels.regression.linear_model.RegressionResultsWrapper 101 | The OLS model. 102 | state_factors : dict 103 | A dictionary of state-factor information. 104 | 105 | names : list of str 106 | A list of names of the state factors. 107 | """ 108 | # Transform factors by indicator function to allow for OLS estimation of regime-dependent FF3 model 109 | X["state"] = fitted_states 110 | state_factors = {} 111 | for state in range(self.n_regimes): 112 | state_factors[state] = {"names": []} 113 | for fn in factor_names: 114 | X[fn + "_" + str(state)] = X[[fn, "state"]].apply(lambda x: x[0] if x[1] == state else 0, axis=1) 115 | state_factors[state]["names"].append(fn + "_" + str(state)) 116 | # Add regime-dependent constant 117 | X["mu_" + str(state)] = X["state"].apply(lambda x: 1 if x == state else 0) 118 | state_factors[state]["names"].append("mu_" + str(state)) 119 | x_names = [] 120 | for state in range(self.n_regimes): 121 | x_names += state_factors[state]["names"] 122 | 123 | X = X[x_names] 124 | # Fit regime-dependent Factor model 125 | ols = sm.OLS(Y, X).fit() 126 | return ols, state_factors 127 | 128 | def __str__(self): 129 | return "IdiosyncraticFactorOpt" 130 | -------------------------------------------------------------------------------- /marketmoodring/tools/portfolio_opt.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.optimize import minimize 3 | from abc import ABC, abstractmethod 4 | 5 | 6 | class PortfolioOptimizer(ABC): 7 | @abstractmethod 8 | def calculate_weights(self, mu, cov): 9 | """ 10 | Calculates the weights for the optimization. 11 | 12 | Parameters 13 | ---------- 14 | mu : np.ndarray 15 | The mean of the distribution. 16 | cov : np.ndarray 17 | The covariance of the distribution. 18 | 19 | Returns 20 | ------- 21 | weights : np.ndarray 22 | The weights for the optimization. 23 | """ 24 | pass 25 | 26 | 27 | class ERC(PortfolioOptimizer): 28 | def __init__(self): 29 | pass 30 | 31 | def calculate_weights(self, mu, cov): 32 | """ 33 | Calculate ERC portfolio weights 34 | 35 | Parameters 36 | ---------- 37 | mu : np.ndarray 38 | Mean vector 39 | sigma : np.ndarray 40 | Covariance matrix 41 | 42 | Returns 43 | ------- 44 | np.ndarray 45 | array with equal risk contribution (ERC) weights for the given assets 46 | """ 47 | # Set up optimization 48 | num_assets = cov.shape[0] 49 | w_guess = np.repeat(1 / num_assets, num_assets) 50 | constraints = ({'type': 'eq', 'fun': lambda w: np.sum(w) - 1.0}) 51 | bounds = tuple((0, 1) for asset in range(num_assets)) 52 | 53 | # Optimize 54 | result = minimize(self._erc_objective, w_guess, args=(cov), method='SLSQP', constraints=constraints, 55 | bounds=bounds) 56 | 57 | # Return weights 58 | return result.x 59 | 60 | def _calculate_var(self, w: np.ndarray, sigma: np.ndarray) -> float: 61 | """ 62 | Calculate the variance of the portfolio with the given weights, w, and asset covariances, sigma 63 | 64 | Parameters 65 | ---------- 66 | w : np.ndarray 67 | array of asset weights 68 | sigma : np.ndarray 69 | matrix of asset covariances 70 | 71 | Returns 72 | ------- 73 | float 74 | estimated variance of the portoflio 75 | """ 76 | # Calculate portfolio variance 77 | return w.T @ sigma @ w 78 | 79 | def _calculate_mctr(self, w: np.ndarray, sigma: np.ndarray) -> np.ndarray: 80 | """ 81 | Calculate the marginal contribution to risk (MCTR) of each portfolio asset for the given weights, w, and 82 | asset covariances, sigma 83 | 84 | Parameters 85 | ---------- 86 | w : np.ndarray 87 | array of asset weights 88 | sigma : np.ndarray 89 | matrix of asset covariances 90 | 91 | Returns 92 | ------- 93 | np.ndarray 94 | array of MCTR for each asset 95 | """ 96 | # Calculate risk contribution of each asset 97 | portfolio_var = self._calculate_var(w, sigma) 98 | return w * (sigma @ w) / portfolio_var 99 | 100 | def _erc_objective(self, w: np.ndarray, sigma: np.ndarray) -> float: 101 | """ 102 | Calculate the objective function for an equal risk contribution portfolio (ERC) 103 | 104 | Parameters 105 | ---------- 106 | w : np.ndarray 107 | array of asset weights 108 | sigma : np.ndarray 109 | matrix of asset covariances 110 | 111 | Returns 112 | ------- 113 | float 114 | MSE of the asset MCTRs vs the average MCTR 115 | """ 116 | # Objective function to minimize 117 | mctr = self._calculate_mctr(w, sigma) 118 | return np.sum((mctr - mctr.mean()) ** 2) 119 | 120 | 121 | class MVO(PortfolioOptimizer): 122 | def __init__(self): 123 | pass 124 | 125 | def calculate_weights(self, mu, sigma, rf=0): 126 | """ 127 | Calculate ERC portfolio weights 128 | 129 | Parameters 130 | ---------- 131 | mu : np.ndarray 132 | Mean vector 133 | sigma : np.ndarray 134 | Covariance matrix 135 | 136 | Returns 137 | ------- 138 | np.ndarray 139 | array with equal risk contribution (ERC) weights for the given assets 140 | """ 141 | """ 142 | ones = np.ones(sigma.shape[0]).reshape(-1, 1) 143 | 144 | return (inv(sigma) @ (mu - rf * ones) / ( 145 | ones.T @ inv(sigma) @ (mu - rf * ones) 146 | )).reshape(-1,) 147 | """ 148 | # Set up optimization 149 | num_assets = sigma.shape[0] 150 | w_guess = np.repeat(1 / num_assets, num_assets) 151 | 152 | args = (mu, sigma, rf) 153 | constraints = ({ 154 | 'type': 'eq', 155 | 'fun': lambda x: np.sum(x) - 1 156 | }) 157 | result = minimize( 158 | fun=self.mvo_objective, 159 | x0=w_guess, 160 | args=args, 161 | method='SLSQP', 162 | bounds=tuple((0.0, 1.0) for _ in range(num_assets)), 163 | constraints=constraints, 164 | tol=1e-5 165 | ) 166 | 167 | if result['success']: 168 | return (result['x'] / np.sum(result['x'])).reshape(-1,) 169 | else: 170 | return None 171 | 172 | def mvo_objective(self, weights, mu, sigma, rf=0): 173 | """ 174 | Objective function to minimize to find the MVO portfolio weights 175 | 176 | Parameters 177 | ---------- 178 | weights : np.ndarray 179 | array of asset weights 180 | mu : np.ndarray 181 | Mean vector 182 | sigma : np.ndarray 183 | Covariance matrix 184 | rf : float 185 | Risk free rate 186 | 187 | Returns 188 | ------- 189 | negative Sharpe Ratio for the given weights 190 | """ 191 | # Return negative Sharpe ratio 192 | weights = weights.reshape(-1, 1) 193 | return - (weights.T @ mu - rf) / np.sqrt(weights.T @ sigma @ weights) 194 | -------------------------------------------------------------------------------- /marketmoodring/regime_detection/base.py: -------------------------------------------------------------------------------- 1 | """ 2 | API reference documentation for the base regime detection class 3 | """ 4 | 5 | from abc import ABC, abstractmethod 6 | 7 | import numpy as np 8 | import pandas as pd 9 | from typing import Union 10 | 11 | from marketmoodring.tools.data_checks import reconcile_regime_data_arg 12 | 13 | 14 | class RegimeDetectionError(Exception): 15 | pass 16 | 17 | 18 | class RegimeDetectionBase(ABC): 19 | """ 20 | A base class for regime detection. 21 | This class is not meant to be used directly, but rather to be inherited by other classes. 22 | The purpose of this class is to provide a common interface for all regime detection classes. 23 | 24 | The following methods must be implemented by any class that inherits this class: 25 | - fit 26 | - transform 27 | 28 | The following method is optional to implement: 29 | - fit_transform 30 | """ 31 | 32 | def __init__(self, n_regimes: Union[int, None], *args, **kwargs) -> None: 33 | """ 34 | A class to handle regime detection. 35 | 36 | Parameters 37 | ---------- 38 | n_regimes : int 39 | The number of regimes to transform the data to 40 | """ 41 | if (n_regimes is not None) and n_regimes <= 0: 42 | raise ValueError("Number of regimes has to be larger than 0 or ") 43 | self.n_regimes = int(n_regimes) if n_regimes is not None else None 44 | self._fit_called = False 45 | self._fitted_states = None 46 | self._fitted_states_proba = None 47 | self._trans_mat = None 48 | self._model = None 49 | 50 | @abstractmethod 51 | def _fit(self, data: np.ndarray, index: np.ndarray = None, *args, **kwargs): 52 | """ 53 | Abstract method to be implemented by any class that inherits this class. 54 | Checking arguments is done by the `fit` method that wraps this method. 55 | """ 56 | pass 57 | 58 | def fit(self, data: Union[np.ndarray, pd.DataFrame], *args, **kwargs): 59 | """ 60 | Fits the model to the data. 61 | Wraps the _fit method which is implemented by any class that inherits this class. 62 | 63 | Parameters 64 | ---------- 65 | data : numpy Array or pandas DataFrame 66 | The data to fit the regimes on 67 | *args, **kwargs 68 | Any additional arguments to be passed to the _fit method 69 | 70 | Returns 71 | ------- 72 | None 73 | """ 74 | fit_data, fit_index = reconcile_regime_data_arg(data) 75 | self._fit(fit_data, fit_index, *args, **kwargs) 76 | self._fit_called = True 77 | 78 | @abstractmethod 79 | def _transform(self, data: np.ndarray, index: np.ndarray = None, *args, **kwargs): 80 | """ 81 | Abstract method to be implemented by any class that inherits this class. 82 | Checking arguments is done by the `transform` method that wraps this method. 83 | """ 84 | pass 85 | 86 | def transform(self, data: Union[np.ndarray, pd.DataFrame], *args, **kwargs): 87 | """ 88 | Transforms the data using the fitted model. 89 | Wraps the _transform method which is implemented by any class that inherits this class. 90 | 91 | Parameters 92 | ---------- 93 | data : np.ndarray or pd.DataFrame 94 | The data to transform using the fitted model 95 | *args, **kwargs 96 | Any additional arguments to be passed to the _transform method 97 | 98 | Returns 99 | ------- 100 | tuple 101 | Two numpy arrays containing the (1) predicted states and (2) predicted 102 | state probabilities 103 | """ 104 | 105 | transform_data, transform_index = reconcile_regime_data_arg(data) 106 | 107 | if not self._fit_called: 108 | raise RegimeDetectionError("fit must be called before transform") 109 | return self._transform(transform_data, transform_index, *args, **kwargs) 110 | 111 | def fit_transform(self, data: Union[np.ndarray, pd.DataFrame], *args, **kwargs): 112 | """ 113 | Fit and transform the given data with a single function call. This function behaves the same 114 | as sequentially calling fit and transform. 115 | 116 | Parameters 117 | ---------- 118 | data : np.ndarray or pd.DataFrame 119 | The data to fit and transform 120 | *args, **kwargs 121 | Any additional arguments to be passed to the fit_transform method 122 | 123 | Returns 124 | ------- 125 | tuple 126 | Two numpy arrays containing the (1) predicted states and (2) predicted 127 | state probabilities 128 | """ 129 | self.fit(data, *args, **kwargs) 130 | return self.transform(data, *args, **kwargs) 131 | 132 | def get_fitted_states(self): 133 | if not self._fit_called: 134 | raise RegimeDetectionError( 135 | "fit must be called before retrieving fitted states" 136 | ) 137 | return self._fitted_states 138 | 139 | def get_fitted_states_proba(self): 140 | if not self._fit_called: 141 | raise RegimeDetectionError( 142 | "fit must be called before retrieving fitted states" 143 | ) 144 | return self._fitted_states_proba 145 | 146 | def get_trans_mat(self): 147 | if not self._fit_called: 148 | raise RegimeDetectionError( 149 | "fit must be called before retrieving fitted transition matrix" 150 | ) 151 | return self._trans_mat 152 | 153 | 154 | class NonParametricRegimeDetection(RegimeDetectionBase, ABC): 155 | """ 156 | A class to handle non-parametric regime detection 157 | i.e. clustering methods, and not Hidden Markov Models 158 | """ 159 | 160 | def fit(self, data: Union[np.ndarray, pd.DataFrame], *args, **kwargs): 161 | super().fit(data, *args, **kwargs) 162 | self._fit_empirical_trans_matrix() 163 | 164 | def _fit_empirical_trans_matrix(self): 165 | """ 166 | Fit an empirical transition matrix using the fitted labeled data. 167 | 168 | Returns 169 | ------- 170 | trans_mat : np.ndarray 171 | An NxN matrix containing the transition matrix from each state to all other states 172 | """ 173 | if self._fitted_states is None: 174 | raise RegimeDetectionError("Model has not been fitted yet or did not produce fitted states") 175 | 176 | transitions = pd.Series(self._fitted_states).to_frame("start_state") 177 | transitions["end_state"] = transitions["start_state"].shift(-1) 178 | 179 | # Get the transition matrix 180 | trans_mat = ( 181 | transitions.value_counts(normalize=False) 182 | .reset_index() 183 | .pivot(index="start_state", columns="end_state") 184 | .fillna(0) 185 | ) 186 | trans_mat += 1 187 | trans_mat = trans_mat.div(trans_mat.sum(axis=1), axis=0) 188 | 189 | self._trans_mat = trans_mat.to_numpy() 190 | -------------------------------------------------------------------------------- /marketmoodring/portfolio_optimization/joint_stochastic_prog.py: -------------------------------------------------------------------------------- 1 | from marketmoodring.portfolio_optimization.base import ( 2 | PortfolioOptimizationBase, 3 | PortfolioOptimizationError, 4 | ) 5 | import numpy as np 6 | import numba 7 | from scipy.optimize import minimize 8 | from typing import Union 9 | import pandas as pd 10 | from scipy.stats import norm 11 | 12 | 13 | class JointStochasticProgOptimization(PortfolioOptimizationBase): 14 | def __init__(self, n_regimes: int, objective: str = "max_avg_sharpe", *args, **kwargs): 15 | """ 16 | Stochastic Programming Portfolio Optimization where each simulated regime sequence is jointly optimized for. 17 | 18 | Parameters 19 | ---------- 20 | n_regimes : int 21 | The number of regimes. 22 | objective : str, optional 23 | The objective function to optimize for. The options are "max_avg_sharpe" and "min_avg_VaR". 24 | """ 25 | super().__init__(n_regimes, *args, **kwargs) 26 | if objective not in ("max_avg_sharpe", "min_avg_VaR"): 27 | raise ValueError("objective is note one of max_avg_sharpe, min_avg_VaR") 28 | self._objective = objective 29 | 30 | def calculate_weights(self, fitted_states: np.ndarray, trans_mat: np.ndarray, 31 | index_data: Union[np.ndarray, pd.DataFrame], 32 | seq_length: int = 22, n_sequences: int = 1000, *args, **kwargs): 33 | """ 34 | Calculate the weights for the given fitted states and asset returns. 35 | 36 | Parameters 37 | ---------- 38 | fitted_states : np.ndarray 39 | The fitted states. 40 | trans_mat : np.ndarray 41 | The transition matrix. 42 | index_data : Union[np.ndarray, pd.DataFrame] 43 | The index or asset returns data. 44 | seq_length : int, optional 45 | The sequence length. The default is 22. 46 | n_sequences : int, optional 47 | The number of sequences. The default is 1000. 48 | 49 | Returns 50 | ------- 51 | weights : np.ndarray 52 | The weights that maximize the average expected Sharpe Ratio over all possible state sequences 53 | """ 54 | fitted_states = fitted_states.astype(int) 55 | states = np.unique(fitted_states) 56 | n_states = len(states) 57 | 58 | regime_groups = index_data.groupby(fitted_states) 59 | mu = regime_groups.mean().to_numpy() 60 | n_assets = index_data.shape[1] 61 | sigma = regime_groups.cov().to_numpy().reshape(n_states, n_assets, n_assets) 62 | 63 | sequences = _calc_n_sequences(n_sequences, fitted_states[-1], trans_mat, states, seq_length) 64 | # np.unique returns sorted unique values, we only care about counts 65 | state_counts = _count_states(sequences, n_states) 66 | 67 | if self._objective == "max_avg_sharpe": 68 | result = minimize( 69 | fun=lambda weights: _max_avg_sharpe(weights, mu, sigma, state_counts, seq_length), 70 | x0=np.ones(shape=n_assets) / n_assets, 71 | bounds=[(0, 1)] * n_assets, 72 | method="SLSQP", 73 | constraints={"type": "eq", "fun": lambda x: np.sum(x) - 1}, 74 | tol=1e-5 75 | ) 76 | elif self._objective == "min_avg_VaR": 77 | result = minimize( 78 | fun=lambda weights: _min_avg_VaR(weights, mu, sigma, state_counts), 79 | x0=np.ones(shape=n_assets) / n_assets, 80 | bounds=[(0, 1)] * n_assets, 81 | method="SLSQP", 82 | constraints={"type": "eq", "fun": lambda x: np.sum(x) - 1}, 83 | tol=1e-5 84 | ) 85 | 86 | return result.x 87 | 88 | def __str__(self): 89 | return "JointStochasticProgOpt" 90 | 91 | 92 | @numba.jit("int64[:](int64,float64[:,:],int64[:],int64)", nopython=True, nogil=True) 93 | def _calc_sequence(start_state, trans_mat, states, seq_length): 94 | """ 95 | Calculate a random sequence of states. 96 | 97 | Parameters 98 | ---------- 99 | start_state : numpy.int64 100 | The initial state of each sequence. 101 | trans_mat : numpy.ndarray[numpy.float64] 102 | The transition matrix that defines the probabilities of transitioning from one state to another. 103 | states : numpy.ndarray[numpy.int64] 104 | An array of possible states. 105 | seq_length : numpy.int64 106 | The length of each sequence to generate. 107 | 108 | Returns 109 | ------- 110 | sequence : numpy.ndarray[numpy.int64] 111 | An array of random sequence of states. 112 | 113 | Notes 114 | ----- 115 | This function uses Numba to speed up the calculation. 116 | """ 117 | sequence = np.empty(seq_length, dtype=np.int64) 118 | sequence[0] = start_state 119 | for i in range(1, seq_length): 120 | sequence[i] = states[np.searchsorted(np.cumsum(trans_mat[sequence[i-1]]), np.random.random(), side="right")] 121 | return sequence 122 | 123 | 124 | def _calc_n_sequences(n_sequences, start_state, trans_mat, states, seq_length): 125 | """ 126 | Calculate multiple random sequences of states. 127 | 128 | Parameters 129 | ---------- 130 | n_sequences : numpy.int64 131 | The number of sequences to generate. 132 | start_state : numpy.int64 133 | The initial state of each sequence. 134 | trans_mat : numpy.ndarray[numpy.float64] 135 | The transition matrix that defines the probabilities of transitioning from one state to another. 136 | states : numpy.ndarray[numpy.int64] 137 | An array of possible states. 138 | seq_length : numpy.int64 139 | The length of each sequence to generate. 140 | 141 | Returns 142 | ------- 143 | sequences : numpy.ndarray[numpy.int64] 144 | An array of random sequences of states. 145 | """ 146 | sequences = np.empty((n_sequences, seq_length)) 147 | return np.apply_along_axis( 148 | lambda _: _calc_sequence(start_state, trans_mat, states, seq_length), axis=1, arr=sequences 149 | ) 150 | 151 | 152 | def _calc_sharpes(weights, mu, sigma, state_counts, seq_length, rf=0): 153 | """ 154 | Calculate Sharpe Ratios. 155 | 156 | Parameters 157 | ---------- 158 | weights : numpy.ndarray[numpy.float64] 159 | The weights. 160 | mu : numpy.ndarray[numpy.float64] 161 | The expected returns. 162 | sigma : numpy.ndarray[numpy.float64] 163 | The covariance matrix. 164 | state_counts : numpy.ndarray[numpy.int64] 165 | The state counts for each sequence. 166 | seq_length : numpy.int64 167 | The sequence length. 168 | rf : numpy.float64, optional 169 | The risk free rate. Default is 0. 170 | 171 | Returns 172 | ------- 173 | srs : numpy.ndarray[numpy.float64] 174 | Sharpe ratios. 175 | """ 176 | w = weights.reshape(-1, 1) 177 | 178 | exp_r = state_counts @ mu @ w / seq_length 179 | exp_sigma = (w.T @ np.einsum('ij,jkl->ikl', state_counts, sigma) @ w).reshape(-1, 1) / np.sqrt(seq_length) 180 | 181 | srs = (exp_r - rf) / exp_sigma 182 | 183 | return srs 184 | 185 | 186 | def _max_avg_sharpe(weights, mu, sigma, state_counts, seq_length): 187 | srs = _calc_sharpes(weights, mu, sigma, state_counts, seq_length, rf=0) 188 | return -np.mean(srs) 189 | 190 | 191 | def _calc_value_at_risk(weights, mu, sigma, state_counts): 192 | """ 193 | Calculate the Value at Risk (VaR) for each state sequence. 194 | 195 | Parameters 196 | ---------- 197 | weights : numpy.ndarray[numpy.float64] 198 | The weights. 199 | mu : numpy.ndarray[numpy.float64] 200 | The expected returns. 201 | sigma : numpy.ndarray[numpy.float64] 202 | The covariance matrix. 203 | state_counts : numpy.ndarray[numpy.int64] 204 | The state counts for each sequence. 205 | 206 | Returns 207 | ------- 208 | vars : numpy.ndarray[numpy.float64] 209 | The 5% Value at Risk for each state sequence. 210 | """ 211 | w = weights.reshape(-1, 1) 212 | 213 | exp_r = state_counts @ mu @ w 214 | exp_sigma = (w.T @ np.einsum('ij,jkl->ikl', state_counts, sigma) @ w).reshape(-1, 1) 215 | exp_r_sigma = np.concatenate([exp_r, exp_sigma], axis=1) 216 | return np.apply_along_axis( 217 | lambda x: norm.ppf(0.05, x[0], x[1]), axis=1, arr=exp_r_sigma 218 | ) 219 | 220 | 221 | def _min_avg_VaR(weights, mu, sigma, state_counts): 222 | VaRs = _calc_value_at_risk(weights, mu, sigma, state_counts) 223 | return -np.mean(VaRs) 224 | 225 | 226 | @numba.jit("int64[:,:](int64[:,:],int64)", nopython=True, nogil=True) 227 | def _count_states(sequences, n_states): 228 | """ 229 | Count the number of each state in each sequence. 230 | 231 | Parameters 232 | ---------- 233 | sequences : numpy.ndarray[numpy.int64] 234 | An array of sequences. 235 | n_states : numpy.int64 236 | The number of possible states. 237 | 238 | Returns 239 | ------- 240 | state_counts : numpy.ndarray[numpy.int64] 241 | An array of state counts for each sequence. 242 | 243 | Notes 244 | ----- 245 | This function uses Numba to speed up the calculation. 246 | """ 247 | result = np.empty((len(sequences), n_states), dtype=np.int64) 248 | 249 | # np.unique returns sorted unique values, we only care about counts 250 | for i in range(len(sequences)): 251 | for s in range(n_states): 252 | result[i, s] = np.sum(sequences[i] == s) 253 | 254 | return result 255 | -------------------------------------------------------------------------------- /marketmoodring/regime_detection/wasserstein_kmeans.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.stats as sts 3 | from typing import Union 4 | import pandas as pd 5 | 6 | from marketmoodring.regime_detection.base import (NonParametricRegimeDetection) 7 | 8 | MAX_DRAW_KMEANS_INIT = 100 9 | 10 | 11 | class WassersteinKMeansRegimeDetection(NonParametricRegimeDetection): 12 | ot = None 13 | 14 | def __init__(self, n_regimes: int, frequency: str = "D", window_size: int = 5, 15 | n_bins: int = 21, min_range: float = -0.15, max_range: float = 0.15, 16 | p: int = 3, reg: float = 1e-3, max_iter: int = 200, kde_smoothing: float = 0.5) -> None: 17 | """ 18 | A class to handle regime detection through Wasserstein K-Means on rolling return distributions. 19 | Currently only accepts 1D time series. 20 | 21 | Parameters 22 | ---------- 23 | n_regimes : int 24 | The number of regimes to transform the data to 25 | frequency : str 26 | The frequency or unit on which to sample windows. For instance if frequency="D" and window_size=5, then 27 | windows will have a size of 5 days. Current options: "D" for daily 28 | window_size : int 29 | The window size of rolling return distributions 30 | n_bins : int 31 | The number of bins used for histograms in the WKM algorithm. A larger value results in more fine-grained 32 | but more jagged histograms 33 | min_range : float 34 | The minimum value of the histograms 35 | max_range : float 36 | The maximum value of the histograms 37 | p : int 38 | The p-Wasserstein distance to be used. p corresponds to the number of distribution moments that should 39 | be considered in the distance metric 40 | reg : float 41 | A regularization coefficient 42 | max_iter : int 43 | The maximum number of iterations of the WKM algorithm 44 | kde_smoothing : float 45 | A number between 0 (none) and 1 (full) indication the level of smoothing to be applied on the window 46 | distributions 47 | """ 48 | super().__init__(n_regimes) 49 | if frequency not in ("D",): 50 | raise ValueError("Sampling frequency is invalid. Must be any of (\"D\",)") 51 | if window_size <= 0: 52 | raise ValueError("Window Size must be at least 1") 53 | if kde_smoothing < 0 or kde_smoothing > 1: 54 | raise ValueError("KDE smoothing must lie between 0 and 1") 55 | # Model hyperparameters 56 | self._frequency = frequency 57 | self._window_size = window_size 58 | self._n_bins = n_bins 59 | self._min_range = min_range 60 | self._max_range = max_range 61 | self._p = p 62 | self._reg = reg 63 | self._max_iter = max_iter 64 | self._kde_smoothing = kde_smoothing 65 | # Model fitting parameters 66 | self._barycenters = None 67 | self._m = None 68 | # Conditional import, only import if class is ever created 69 | if WassersteinKMeansRegimeDetection.ot is None: 70 | WassersteinKMeansRegimeDetection.ot = __import__('ot') 71 | 72 | def fit(self, data: Union[np.ndarray, pd.DataFrame], *args, **kwargs): 73 | """ 74 | Fits the regime detection model to the given data. If data is provided as a pandas DataFrame with 75 | a resolution higher than daily, the data will be resampled to a daily resolution and the corresponding 76 | labels will apply to the days in the data. 77 | 78 | Parameters 79 | ---------- 80 | data : np.ndarray or pd.DataFrame 81 | The data to fit the regime detection model to 82 | args : tuple, optional 83 | kwargs : dict, optional 84 | 85 | Returns 86 | ------- 87 | None 88 | """ 89 | super().fit(data, *args, **kwargs) 90 | 91 | def _fit(self, data: np.ndarray, index: np.ndarray = None, reuse_last_run: bool = True, *args, **kwargs): 92 | # Dependency only necessary if _fit of WKM is ever called 93 | sample_distributions = self._sample_distributions(data, index) 94 | 95 | # Fit and store WKM model 96 | self._barycenters, _ = self._wkmeans(sample_distributions, reuse_last_run) 97 | 98 | # Prepend fitted states with 0, due to the window size the first few observations 99 | # cannot be clustered 100 | self._fitted_states, _ = self._transform(data, index) 101 | 102 | def _transform(self, data: np.ndarray, index: np.ndarray = None, *args, **kwargs): 103 | sample_distributions = self._sample_distributions(data, index, jump=False) 104 | 105 | if self._m is None: 106 | x = np.arange(self._n_bins, dtype=np.float64) 107 | self._m = self._p_wass_dist(x) 108 | m = self._m 109 | 110 | distances = self.update_distances(self._barycenters, sample_distributions, m) 111 | labels = np.argmin(distances, axis=1) 112 | 113 | # Prepend fitted states with 0, due to the window size the first few observations 114 | # cannot be clustered 115 | if index is None: 116 | labels = np.concatenate(( 117 | [np.nan] * (len(data) - len(labels)), 118 | labels 119 | )) 120 | if index is not None: 121 | index = index.astype('datetime64[D]') 122 | labels = np.concatenate(( 123 | [np.nan] * (len(np.unique(index)) - len(labels)), 124 | labels 125 | )) 126 | 127 | return labels, None 128 | 129 | def _sample_distributions(self, data: np.ndarray, index: np.ndarray = None, jump=True): 130 | """ 131 | Create sample distributions from the given 1D time series data and associated index 132 | 133 | Parameters 134 | ---------- 135 | data : np.ndarray 136 | A 1D time series 137 | index : np.ndarray 138 | An optional 1D date time index corresponding to the time series 139 | jump : bool 140 | True if and only if a jump should be applied in between rolling windows 141 | 142 | Returns 143 | ------- 144 | distributions : np.ndarray 145 | An array of sampled distributions 146 | """ 147 | # Set up linspace of histogram bucket midpoints 148 | x = ( 149 | np.linspace(self._min_range, self._max_range, self._n_bins + 1, dtype=np.float64)[1:] 150 | + np.linspace(self._min_range, self._max_range, self._n_bins + 1, dtype=np.float64)[:-1] 151 | ) / 2 152 | 153 | if index is None: 154 | # Sample distributions 155 | if jump: 156 | sample = np.array([ 157 | data[i:i + self._window_size] 158 | # TODO: self._window_size // 4 replace by variable 159 | for i in range(0, len(data) - self._window_size, self._window_size // 4) 160 | ]) 161 | else: 162 | sample = np.lib.stride_tricks.sliding_window_view(data.flatten(), (self._window_size,)) 163 | else: 164 | index = index.astype('datetime64[D]') 165 | sample = np.array([ 166 | data[ 167 | (index >= np.busday_offset(i, -self._window_size, roll='backward')) 168 | & (index <= i) 169 | ] for i in np.unique(index)[self._window_size:] 170 | ], dtype=object) 171 | 172 | # Create sample distributions 173 | dist_sample = np.zeros((len(sample), self._n_bins)) 174 | for i in range(len(sample)): 175 | s = sample[i] 176 | kde = sts.gaussian_kde(s.reshape(1, -1)).pdf(x) 177 | hist = np.histogram( 178 | s, bins=self._n_bins, 179 | range=(self._min_range, self._max_range), density=False 180 | )[0] 181 | hist = hist / np.sum(hist) 182 | dist_sample[i, :] = self._kde_smoothing * kde + (1 - self._kde_smoothing) * hist 183 | dist_sample[i, :] = dist_sample[i, :] / np.sum(dist_sample[i, :]) 184 | 185 | return dist_sample 186 | 187 | def _wkmeans(self, distributions, reuse_last_run): 188 | """ 189 | Fit a WKM model on the given distributions 190 | 191 | Parameters 192 | ---------- 193 | distributions : np.ndarray 194 | Historical return distributions 195 | reuse_last_run : bool 196 | True if and only if the last fitted model should be reused 197 | 198 | Returns 199 | ------- 200 | tuple : (np.ndarray, np.ndarray) 201 | A tuple with the cluster barycenters and predicted labels for each distribution 202 | """ 203 | if self._m is None: 204 | x = np.arange(self._n_bins, dtype=np.float64) 205 | self._m = self._p_wass_dist(x) 206 | m = self._m 207 | 208 | # Find appropriate initial points, try up to 100 random draws until one is found 209 | # where the distance is large enough to result in two distinct sets of distributions 210 | barycenters = None 211 | 212 | if self._barycenters is None or not reuse_last_run: 213 | for _ in range(MAX_DRAW_KMEANS_INIT): 214 | barycenters = distributions[np.random.choice(len(distributions), self.n_regimes, replace=False)] 215 | 216 | distances = self.update_distances(barycenters, distributions, m) 217 | labels = np.argmin(distances, axis=1) 218 | 219 | try: 220 | self._update_barycenters(barycenters, distributions, labels, m) 221 | 222 | break 223 | except ZeroDivisionError: 224 | continue 225 | else: 226 | barycenters = self._barycenters 227 | 228 | # Iterate and update the barycenters until convergence or the maximum amount of iterations is reached 229 | it = 0 230 | labels = None 231 | 232 | while it < self._max_iter: 233 | distances = self.update_distances(barycenters, distributions, m) 234 | old_labels = labels 235 | labels = np.argmin(distances, axis=1) 236 | 237 | if np.array_equal(old_labels, labels): 238 | break 239 | 240 | self._update_barycenters(barycenters, distributions, labels, m) 241 | 242 | it += 1 243 | 244 | return barycenters, labels 245 | 246 | def update_distances(self, barycenters, distributions, m_b): 247 | """ 248 | Calculate the distances from all given distributions to all given barycenters using the given loss matrix 249 | 250 | Parameters: 251 | ---------- 252 | barycenters : numpy.ndarray 253 | The barycenters to calculate distances from. 254 | 255 | distributions : numpy.ndarray 256 | The distributions to calculate distances to. 257 | 258 | m_b : numpy.ndarray 259 | The loss matrix for the distances. 260 | 261 | Returns: 262 | ------- 263 | numpy.ndarray 264 | A NxK numpy array of distances where N is the number of distributions and K is the number of regimes. 265 | """ 266 | distances = np.zeros((len(distributions), self.n_regimes)) 267 | 268 | for k in range(self.n_regimes): 269 | for d in range(len(distributions)): 270 | distances[d, k] = self.ot.emd2(distributions[d], barycenters[k], m_b) 271 | distances = np.power(distances, 1 / self._p) 272 | return distances 273 | 274 | def _update_barycenters(self, barycenters, distributions, labels, m_b): 275 | """ 276 | Update the barycenters in place after a single WKM iteration. 277 | 278 | Parameters: 279 | ---------- 280 | barycenters : numpy.ndarray 281 | The barycenters to update. 282 | 283 | distributions : numpy.ndarray 284 | The input distributions. 285 | 286 | labels : numpy.ndarray 287 | The classification labels of the current iteration. 288 | 289 | m_b : numpy.ndarray 290 | Loss matrix. 291 | """ 292 | for k in range(self.n_regimes): 293 | k_dists = distributions[labels == k] 294 | weights = np.array([1 / len(k_dists)] * len(k_dists)) 295 | 296 | barycenters[k] = self.ot.bregman.barycenter( 297 | np.vstack(k_dists).T, 298 | m_b, 299 | self._reg, 300 | weights, 301 | numItermax=20000 302 | ) 303 | 304 | barycenters[k] /= np.sum(barycenters[k]) 305 | 306 | def _p_wass_dist(self, x): 307 | """ 308 | Construct the p-Wasserstein distance matrix for the given linspace x. 309 | 310 | Parameters: 311 | ---------- 312 | x : numpy.ndarray 313 | A 1D linspace to construct a distance matrix on. 314 | 315 | Returns: 316 | ------- 317 | numpy.ndarray 318 | An NxN distance matrix where N is the length of x. 319 | """ 320 | m = self.ot.dist(x.reshape((self._n_bins, 1)), x.reshape((self._n_bins, 1)), metric="minkowski", p=self._p) 321 | m = m ** self._p 322 | m /= m.max() 323 | # m_b *= multiplier 324 | return m 325 | 326 | def __str__(self): 327 | return "WassersteinKMeans" 328 | --------------------------------------------------------------------------------