├── marketmoodring
    ├── __init__.py
    ├── tools
    │   ├── __init__.py
    │   ├── data_checks.py
    │   └── portfolio_opt.py
    ├── regime_detection
    │   ├── __init__.py
    │   ├── hidden_markov.py
    │   ├── base.py
    │   └── wasserstein_kmeans.py
    └── portfolio_optimization
    │   ├── __init__.py
    │   ├── base.py
    │   ├── simple_factor_model.py
    │   ├── factor_model.py
    │   ├── idosyncratic_factor_model.py
    │   └── joint_stochastic_prog.py
├── requirements.txt
├── reference
    └── project_paper.pdf
├── environment.yml
├── CITATION.cff
├── LICENSE
├── .gitignore
└── README.md


/marketmoodring/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/marketmoodring/tools/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | hmmlearn==0.3.*
3 | pandas
4 | pot==0.9.*
5 | pyportfolioopt
6 | statsmodels


--------------------------------------------------------------------------------
/reference/project_paper.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yvesdhondt/MarketMoodRing/HEAD/reference/project_paper.pdf


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: marketmoodring-env
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - python
 6 |   - numpy
 7 |   - hmmlearn==0.3.*
 8 |   - pandas
 9 |   - pot==0.9.*
10 |   - pyportfolioopt
11 |   - statsmodels


--------------------------------------------------------------------------------
/marketmoodring/regime_detection/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | API reference documentation for the regime detection models
3 | """
4 | from marketmoodring.regime_detection.base import RegimeDetectionError
5 | from marketmoodring.regime_detection.hidden_markov import HiddenMarkovRegimeDetection
6 | from marketmoodring.regime_detection.wasserstein_kmeans import WassersteinKMeansRegimeDetection
7 | 


--------------------------------------------------------------------------------
/marketmoodring/portfolio_optimization/__init__.py:
--------------------------------------------------------------------------------
1 | from marketmoodring.portfolio_optimization.base import PortfolioOptimizationError
2 | from marketmoodring.portfolio_optimization.simple_factor_model import SimpleFactorPortfolioOptimization
3 | from marketmoodring.portfolio_optimization.idosyncratic_factor_model import IdiosyncraticFactorPortfolioOptimization
4 | from marketmoodring.portfolio_optimization.joint_stochastic_prog import JointStochasticProgOptimization
5 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: "If you use this software, please cite it as below."
 3 | authors:
 4 | - family-names: "D'hondt"
 5 |   given-names: "Yves Alexander"
 6 | - family-names: "Di Venti"
 7 |   given-names: "Matteo Mario"
 8 | - family-names: "Rishi"
 9 |   given-names: "Rohan"
10 | - family-names: "Walker"
11 |   given-names: "Jackson"
12 | title: "MarketMoodRing"
13 | version: 1.0.0
14 | date-released: 2023-07-27
15 | url: "https://github.com/yvesdhondt/MarketMoodRing"
16 | 


--------------------------------------------------------------------------------
/marketmoodring/tools/data_checks.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from typing import Union
 4 | 
 5 | 
 6 | def reconcile_regime_data_arg(data: Union[np.ndarray, pd.DataFrame]) -> np.array:
 7 |     """
 8 |     Internal function to verify data types and transform into numpy arrays
 9 | 
10 |     Parameters
11 |     ----------
12 |     data : np.ndarray or pd.DataFrame
13 |         Data to check
14 | 
15 |     Returns
16 |     -------
17 |     (data, index) : (np.ndarray, np.ndarray)
18 |         Reconciled data
19 |     """
20 |     if isinstance(data, np.ndarray):
21 |         return data, None
22 |     elif isinstance(data, pd.DataFrame):
23 |         if isinstance(data.index, pd.DatetimeIndex):
24 |             return data.to_numpy(), data.index.to_numpy()
25 |         else:
26 |             return data.to_numpy(), None
27 |     else:
28 |         raise ValueError("data must be a numpy Array or a pandas DataFrame")
29 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Yves D'hondt, Jackson Walker, Rohan Rishi, Matteo Di Venti
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/marketmoodring/portfolio_optimization/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | import numpy as np
 3 | from typing import Union
 4 | import pandas as pd
 5 | 
 6 | 
 7 | class PortfolioOptimizationError(Exception):
 8 |     pass
 9 | 
10 | 
11 | class PortfolioOptimizationBase(ABC):
12 |     """
13 |     A base class for portfolio optimization.
14 |     This class is not meant to be used directly, but rather to be inherited by other classes.
15 |     The purpose of this class is to provide a common interface for all regime dependent optimization classes.
16 | 
17 |     The following methods must be implemented by any class that inherits this class:
18 |     - calculate_weights
19 |     """
20 | 
21 |     def __init__(self, n_regimes, *args, **kwargs):
22 |         self.n_regimes = n_regimes
23 | 
24 |     @abstractmethod
25 |     def calculate_weights(self, fitted_states: np.ndarray, trans_mat: np.ndarray,
26 |                           index_data: Union[np.ndarray, pd.DataFrame], *args, **kwargs):
27 |         """
28 |         Abstract method to calculate weights.
29 | 
30 |         Parameters:
31 |         ----------
32 |         fitted_states : np.ndarray
33 |             Array of fitted states.
34 | 
35 |         trans_mat : np.ndarray
36 |             Transition matrix.
37 | 
38 |         index_data : Union[np.ndarray, pd.DataFrame]
39 |             Index data used for weight calculation.
40 | 
41 |         *args, **kwargs :
42 |             Additional arguments and keyword arguments.
43 | 
44 |         Returns:
45 |         -------
46 |         None
47 |         """
48 |         pass
49 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | .idea/
163 | 


--------------------------------------------------------------------------------
/marketmoodring/portfolio_optimization/simple_factor_model.py:
--------------------------------------------------------------------------------
 1 | from marketmoodring.portfolio_optimization.factor_model import FactorPortfolioOptimization
 2 | import pandas as pd
 3 | from statsmodels import api as sm
 4 | from typing import Union
 5 | import numpy as np
 6 | 
 7 | 
 8 | class SimpleFactorPortfolioOptimization(FactorPortfolioOptimization):
 9 | 
10 |     def _get_regime_based_mean_cov(self, index_data, factor_data, trans_mat: Union[np.ndarray, pd.DataFrame],
11 |                                    fitted_states):
12 |         """
13 |         Get the regime-dependent expected return vector and variance covariance matrix of the given assets according
14 |         to the factor-based model proposed by Costa & Kwon (2019). Here idiosyncratic risk is assumed to be independent
15 |         of regime shifts.
16 | 
17 |         Parameters
18 |         ----------
19 |         index_data : pandas.DataFrame
20 |             A DataFrame with the time series asset returns.
21 |         factor_data : pandas.DataFrame
22 |             A DataFrame with the time series factor returns.
23 |         trans_mat : Union[numpy.ndarray, pandas.DataFrame]
24 |             The transition probability matrix of the regime switching model.
25 |         fitted_states : numpy.ndarray[numpy.int64]
26 |             The fitted states of the regime switching model.
27 | 
28 |         Returns
29 |         -------
30 |         Tuple[numpy.ndarray[numpy.float64], numpy.ndarray[numpy.float64]]
31 |             A tuple (mu, sigma) containing the expected return vector and covariance matrix of the assets.
32 |         """
33 |         factor_names = factor_data.columns
34 |         n_factors = len(factor_names)
35 |         current_state = int(fitted_states[-1])
36 | 
37 |         Y = index_data.copy()
38 |         X = factor_data.copy()
39 | 
40 |         # Transform factors by indicator function to allow for OLS estimation of regime-dependent FF3 model
41 |         X["state"] = fitted_states
42 |         state_factors = {}
43 |         for state in range(self.n_regimes):
44 |             state_factors[state] = {"names": []}
45 |             for fn in factor_names:
46 |                 X[fn + "_" + str(state)] = X[[fn, "state"]].apply(lambda x: x[0] if x[1] == state else 0, axis=1)
47 |                 state_factors[state]["names"].append(fn + "_" + str(state))
48 | 
49 |         x_names = []
50 |         for state in range(self.n_regimes):
51 |             x_names += state_factors[state]["names"]
52 | 
53 |         X = X[x_names]
54 | 
55 |         # Fit regime-dependent Factor model
56 |         ols = sm.OLS(Y, sm.add_constant(X)).fit()
57 | 
58 |         # Define parameters
59 |         alpha = ols.params.values[0:1, :].reshape(-1, 1)
60 | 
61 |         for state in range(self.n_regimes):
62 |             state_factors[state]["V"] = ols.params.values[1 + state * n_factors: 1 + (1 + state) * n_factors, :]
63 |             state_factors[state]["F"] = factor_data[fitted_states == state].cov().values
64 |             state_factors[state]["f_bar"] = factor_data[fitted_states == state].mean().values.reshape(-1, 1)
65 | 
66 |         D = ols.resid.cov().values
67 | 
68 |         # Construct regime-dependent expected return and variance-covariance matrices
69 |         mu = alpha
70 |         sigma = D
71 |         for state in range(self.n_regimes):
72 |             # update mu
73 |             mu += trans_mat[current_state][state] * state_factors[state]["V"].T @ state_factors[state]["f_bar"]
74 | 
75 |             # update sigma
76 |             sigma += trans_mat[current_state][state] \
77 |                      * state_factors[state]["V"].T @ state_factors[state]["F"] @ state_factors[state]["V"] \
78 |                      + trans_mat[current_state][state] * (1 - trans_mat[current_state][state]) \
79 |                      * state_factors[state]["V"].T @ state_factors[state]["f_bar"] @ state_factors[state]["f_bar"].T \
80 |                      @ state_factors[state]["V"]
81 |             for other_state in range(self.n_regimes):
82 |                 if other_state == state:
83 |                     continue
84 |                 sigma -= trans_mat[current_state][state] * trans_mat[current_state][other_state] \
85 |                          * state_factors[state]["V"].T @ state_factors[state]["f_bar"] \
86 |                          @ state_factors[other_state]["f_bar"].T @ state_factors[other_state]["V"]
87 | 
88 |         return mu.reshape(-1, 1), sigma
89 | 
90 |     def __str__(self):
91 |         return "SimpleFactorOpt"
92 | 
93 | 


--------------------------------------------------------------------------------
/marketmoodring/regime_detection/hidden_markov.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from typing import Union
  4 | import importlib
  5 | 
  6 | from marketmoodring.regime_detection.base import (RegimeDetectionBase,
  7 |                                                   RegimeDetectionError)
  8 | 
  9 | 
 10 | class HiddenMarkovRegimeDetection(RegimeDetectionBase):
 11 |     hmm = None
 12 | 
 13 |     def __init__(self, n_regimes: int, hmm_type: str = "GaussianHMM", n_iter: int = 50, covar_type: str = "diag",
 14 |                  *args, **kwargs) -> None:
 15 |         """
 16 |         A class to handle regime detection through Gaussian HMM.
 17 | 
 18 |         Parameters
 19 |         ----------
 20 |         n_regimes : int
 21 |             The number of regimes to transform the data to
 22 |         hmm_type : str
 23 |             The type of HMM model to use, options are "GaussianHMM" or "GMMHMM".
 24 |         n_iter : int
 25 |             The number of iterations to run the HMM model
 26 |         covar_type : str
 27 |             The type of covariance matrix to use, options are "spherical", "full", "diag" or "tied"
 28 |         """
 29 |         super().__init__(n_regimes, *args, **kwargs)
 30 | 
 31 |         if hmm_type not in ("GaussianHMM", "GMMHMM"):
 32 |             raise RegimeDetectionError("given type is not one of the available options")
 33 |         if n_iter < 0:
 34 |             raise RegimeDetectionError("n_iter has to be larger than 0")
 35 | 
 36 |         self._type = hmm_type
 37 |         self._n_iter = n_iter
 38 |         self._covar_type = covar_type
 39 |         # Conditional import, only import if class is ever created
 40 |         if HiddenMarkovRegimeDetection.hmm is None:
 41 |             HiddenMarkovRegimeDetection.hmm = importlib.import_module(
 42 |                 "hmmlearn.hmm"
 43 |             )
 44 | 
 45 |     def _fit(self, data: np.ndarray, index: np.ndarray = None, *args, **kwargs):
 46 |         """
 47 |         Fit a Gaussian HMM or GMM HMM on the given data.
 48 | 
 49 |         Parameters
 50 |         ----------
 51 |         data : numpy.ndarray
 52 |             A matrix of time series data to fit the HMM on.
 53 |         index : numpy.ndarray, optional
 54 |             The time index of the time series data.
 55 |         *args : tuple
 56 |             Any additional positional arguments from the superclass, if any.
 57 |         **kwargs : dict
 58 |             Any additional keyword arguments from the superclass, if any.
 59 | 
 60 |         Returns
 61 |         -------
 62 |         None
 63 |             This method does not return anything, but it sets the following instance variables:
 64 | 
 65 |             _model : hmmlearn.hmm.GaussianHMM or hmmlearn.hmm.GMMHMM
 66 |                 The fitted HMM model.
 67 |             _fitted_states : numpy.ndarray
 68 |                 The fitted states of the HMM model.
 69 |             _fitted_states_proba : numpy.ndarray
 70 |                 The fitted state probabilities of the HMM model.
 71 |             _trans_mat : numpy.ndarray
 72 |                 The transition probability matrix of the HMM model.
 73 |         """
 74 |         # Create HMM Model
 75 |         if self._type == "GaussianHMM":
 76 |             self._model = self.hmm.GaussianHMM(
 77 |                 n_components=self.n_regimes, n_iter=self._n_iter, covariance_type=self._covar_type, random_state=None
 78 |             )
 79 |         elif self._type == "GMMHMM":
 80 |             self._model = self.hmm.GMMHMM(
 81 |                 n_components=self.n_regimes, n_iter=self._n_iter, covariance_type=self._covar_type, random_state=None
 82 |             )
 83 | 
 84 |         # Fit model
 85 |         self._model.fit(data)
 86 | 
 87 |         # Predict and store output of training data
 88 |         self._fitted_states = self._model.predict(data)
 89 |         self._fitted_states_proba = self._model.predict_proba(data)
 90 |         self._trans_mat = self._model.transmat_
 91 | 
 92 |     def _transform(self, data: np.ndarray, *args, **kwargs):
 93 |         if not self._fit_called:
 94 |             raise RegimeDetectionError("fit must be called before transforming")
 95 | 
 96 |         transformed_states = self._model.predict(data)
 97 |         transformed_states_proba = self._model.predict_proba(data)
 98 | 
 99 |         return transformed_states, transformed_states_proba
100 | 
101 |     def fit_transform(self, data: Union[np.ndarray, pd.DataFrame], *args, **kwargs):
102 |         self.fit(data, *args, **kwargs)
103 | 
104 |         return self._fitted_states, self._fitted_states_proba
105 | 
106 |     def __str__(self):
107 |         return self._type
108 | 


--------------------------------------------------------------------------------
/marketmoodring/portfolio_optimization/factor_model.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | import numpy as np
  3 | from typing import Union
  4 | import pandas as pd
  5 | from marketmoodring.portfolio_optimization.base import PortfolioOptimizationBase, PortfolioOptimizationError
  6 | from marketmoodring.tools.portfolio_opt import ERC, MVO
  7 | 
  8 | 
  9 | class FactorPortfolioOptimization(PortfolioOptimizationBase, ABC):
 10 |     def __init__(self, n_regimes, optimizer="ERC", *args, **kwargs):
 11 |         """
 12 |         Initialize a factor portfolio optimizer.
 13 | 
 14 |         Parameters
 15 |         ----------
 16 |         n_regimes : int
 17 |             The number of regimes used in the portfolio optimization.
 18 |         optimizer : str
 19 |             The portfolio optimization algorithm to use. Options are "ERC" and "MVO".
 20 |         *args : tuple
 21 |             Any additional positional arguments from the superclass, if any.
 22 |         **kwargs : dict
 23 |             Any additional keyword arguments from the superclass, if any.
 24 | 
 25 |         Raises
 26 |         ------
 27 |         ValueError
 28 |             If the given optimizer is not one of "ERC" or "MVO".
 29 |         """
 30 |         super().__init__(n_regimes, *args, **kwargs)
 31 |         if optimizer == "ERC":
 32 |             self._optimizer = ERC()
 33 |         elif optimizer == "MVO":
 34 |             self._optimizer = MVO()
 35 |         else:
 36 |             raise ValueError("Given optimizer is not one of ERC or MVO")
 37 | 
 38 |     def calculate_weights(self, fitted_states: np.ndarray, trans_mat: np.ndarray,
 39 |                           index_data: Union[np.ndarray, pd.DataFrame],
 40 |                           factor_data: Union[np.ndarray, pd.DataFrame] = None, *args, **kwargs):
 41 |         """
 42 |         Calculate equal risk contribution (ERC) portfolio weights using a regime-dependent factor model with static
 43 |         idiosyncratic risk.
 44 | 
 45 |         Parameters
 46 |         ----------
 47 |         fitted_states : numpy.ndarray
 48 |             An array of fitted state labels.
 49 |         trans_mat : numpy.ndarray
 50 |             A matrix of regime transition probabilities.
 51 |         index_data : numpy.ndarray or pandas.DataFrame
 52 |             A matrix of time series returns of the assets to be included in the portfolio. This time-series has to
 53 |             match 1:1 with the fitted_states labels.
 54 |         factor_data : numpy.ndarray or pandas.DataFrame, optional
 55 |             A matrix of time series returns of the factors to be used for the regime-dependent factor model. This
 56 |             time-series has to match 1:1 with the fitted_states labels.
 57 |         *args : tuple
 58 |             Any additional positional arguments from the superclass, if any.
 59 |         **kwargs : dict
 60 |             Any additional keyword arguments from the superclass, if any.
 61 | 
 62 |         Returns
 63 |         -------
 64 |         numpy.ndarray
 65 |             An array of equal risk contribution (ERC) portfolio weights for the given assets using the simple factor
 66 |             regime model.
 67 |         """
 68 |         mean, cov = self._get_regime_based_mean_cov(index_data, factor_data, trans_mat, fitted_states)
 69 | 
 70 |         return self._optimizer.calculate_weights(mean, cov)
 71 | 
 72 |     @abstractmethod
 73 |     def _get_regime_based_mean_cov(self, index_data, factor_data, trans_mat: Union[np.ndarray, pd.DataFrame],
 74 |                                    fitted_states):
 75 |         """
 76 |         Get the regime-dependent expected return vector and variance covariance matrix of the given assets according
 77 |         to the factor-based model proposed by Costa & Kwon (2020).
 78 | 
 79 |         Parameters
 80 |         ----------
 81 |         index_data : numpy.ndarray or pandas.DataFrame
 82 |             A matrix of time series asset returns.
 83 |         factor_data : numpy.ndarray or pandas.DataFrame
 84 |             A matrix of time series factor returns.
 85 |         trans_mat : numpy.ndarray or pandas.DataFrame
 86 |             The transition probability matrix of the regime switching model.
 87 |         fitted_states : numpy.ndarray
 88 |             The fitted states of the regime switching model.
 89 | 
 90 |         Returns
 91 |         -------
 92 |         tuple
 93 |             A tuple (mu, sigma) containing the expected return vector and covariance matrix of the assets.
 94 | 
 95 |             mu : numpy.ndarray
 96 |                 The regime-dependent expected return vector of the assets.
 97 |             sigma : numpy.ndarray
 98 |                 The regime-dependent covariance matrix of the assets.
 99 |         """
100 |         pass
101 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # MarketMoodRing🎭
  2 | 
  3 | ## Description
  4 | 
  5 | MarketMoodRing🎭 is a Python package designed for testing different regime detection models and portfolio optimizers. This tool is a product of research conducted by the UC Berkeley, Haas School of Business, Master of Financial Engineering, 2023. It aims to provide a framework for financial market regime analysis and portfolio management testing.
  6 | 
  7 | ## Collaborators
  8 | - [Yves D'hondt](https://github.com/yvesdhondt)
  9 | - [Matteo Di Venti](https://github.com/MatteoMarioDiVenti)
 10 | - [Rohan Rishi](https://github.com/RohanRishi)
 11 | - [Jackson Walker](https://github.com/jacksonrgwalker/])
 12 | 
 13 | ## Reference
 14 | 
 15 | The `\reference` folder contains the [project paper](reference/project_paper.pdf) from where this library originated.
 16 | 
 17 | ## Features
 18 | 
 19 | - **Regime Detection Models**: Currently includes Hidden Markov Models (HMM) and Wasserstein K-Means clustering (WKM).
 20 | - **Portfolio Optimizers**: Implements different portfolio optimization strategies, including stochastic programming and factor-based optimization.
 21 | 
 22 | 
 23 | ## Installation
 24 | 
 25 | As of now, the MarketMoodRing package is not available on PyPI. To install and use this package, you need to clone the repository and reference it locally. Here are the steps to do so:
 26 | 
 27 | 1. Clone the repository:
 28 | 
 29 | ```bash
 30 | git clone https://github.com/yvesdhondt/MarketMoodRing.git
 31 | ```
 32 | 
 33 | 2. Navigate to the cloned directory:
 34 | 
 35 | ```bash
 36 | cd MarketMoodRing
 37 | ```
 38 | 
 39 | 3. Now, you can import and use the package in your Python scripts. Make sure your script is in the same directory as the cloned repository or adjust the Python path accordingly.
 40 | 
 41 | ```python
 42 | import sys
 43 | sys.path.insert(0, '/path/to/MarketMoodRing')
 44 | 
 45 | from marketmoodring.regime_detection import HiddenMarkovRegimeDetection
 46 | ```
 47 | 
 48 | Please replace `/path/to/MarketMoodRing` with the actual path to the cloned repository on your system.
 49 | 
 50 | Remember to keep the repository updated with:
 51 | 
 52 | ```bash
 53 | git pull origin main
 54 | ```
 55 | 
 56 | ## Dependencies
 57 | 
 58 | The MarketMoodRing package requires several dependencies to function properly. These dependencies can be installed using either conda (recommended) or pip.
 59 | 
 60 | ### Using Conda
 61 | 
 62 | If you're using Conda, you can create a new environment and install all dependencies using the `environment.yml` file located in the root directory. Run the following command in your terminal / Anaconda Prompt once you've navigated to the cloned repository:
 63 | 
 64 | ```bash
 65 | conda env create -f environment.yml
 66 | ```
 67 | 
 68 | This will create a new Conda environment called `marketmoodring-env` and install all necessary packages. To activate the environment, use:
 69 | 
 70 | ```bash
 71 | conda activate marketmoodring-env
 72 | ```
 73 | 
 74 | ### Using pip
 75 | 
 76 | If you prefer using pip, you can install all dependencies using the `requirements.txt` file also located in the root directory. Run the following command in your terminal:
 77 | 
 78 | ```bash
 79 | pip install -r requirements.txt
 80 | ```
 81 | 
 82 | This will install all the necessary packages listed in the `requirements.txt` file. Please note that this will install the packages globally on your system, which can result in unexpected behavior. If you want to install the packages in a virtual environment, please refer to the [Python documentation](https://docs.python.org/3/tutorial/venv.html), or use the Conda environment as described above.
 83 | 
 84 | ## Usage
 85 | 
 86 | ```python
 87 | from marketmoodring.regime_detection import HiddenMarkovRegimeDetection
 88 | from marketmoodring.portfolio_optimization import JointStochasticProgOptimization
 89 | 
 90 | # Read in your data
 91 | index_data = pd.read_csv('path/to/index_data.csv')
 92 | 
 93 | # Initialize regime detection model
 94 | hmm_model = HiddenMarkovRegimeDetection(n_regimes=2, hmm_type='GMMHMM', covar_type="diag", n_iter=100)
 95 | 
 96 | # Fit the model to your data and predict regimes
 97 | fitted_states, fitted_states_proba = regime_model.fit_transform(index_data)
 98 | 
 99 | # Initialize portfolio optimizer
100 | opt_model = JointStochasticProgOptimization(n_regimes=2, objective="max_avg_sharpe")
101 | 
102 | # Fit the optimizer to your data and regime predictions and calculate portfolio weights
103 | weights = opt_model.calculate_weights(
104 |                     fitted_states = fitted_states,
105 |                     trans_mat = hmm_model.get_trans_mat()
106 |                     index_data = index_data,
107 |                 )
108 | ```
109 | 
110 | ## License
111 | 
112 | This project is licensed under the MIT License. See [LICENSE](LICENSE) for more details.
113 | 


--------------------------------------------------------------------------------
/marketmoodring/portfolio_optimization/idosyncratic_factor_model.py:
--------------------------------------------------------------------------------
  1 | from typing import Union
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | import statsmodels.api as sm
  6 | 
  7 | from marketmoodring.portfolio_optimization.factor_model import FactorPortfolioOptimization
  8 | 
  9 | 
 10 | class IdiosyncraticFactorPortfolioOptimization(FactorPortfolioOptimization):
 11 |     def _get_regime_based_mean_cov(self, index_data, factor_data, trans_mat: Union[np.ndarray, pd.DataFrame],
 12 |                                    fitted_states):
 13 |         """
 14 |         Get the regime-dependent expected return vector and variance covariance matrix of the given assets according
 15 |         to the factor-based model proposed by Costa & Kwon (2020). Here idiosyncratic risk is assumed to be regime-
 16 |         dependant and factors are centered before being used.
 17 | 
 18 |         Parameters
 19 |         ----------
 20 |         index_data : pandas.DataFrame
 21 |             A data frame with the time series asset returns.
 22 |         factor_data : pandas.DataFrame
 23 |             A data frame with the time series factor returns.
 24 |         trans_mat : numpy.ndarray or pandas.DataFrame
 25 |             The transition probability matrix of the regime switching model.
 26 |         fitted_states : numpy.ndarray
 27 |             The fitted states of the regime switching model.
 28 | 
 29 |         Returns
 30 |         -------
 31 |         tuple
 32 |             A tuple (mu, sigma) containing the expected return vector and covariance matrix of the assets.
 33 | 
 34 |             mu : numpy.ndarray
 35 |                 The regime-dependent expected return vector of the assets.
 36 |             sigma : numpy.ndarray
 37 |                 The regime-dependent covariance matrix of the assets.
 38 |             """
 39 |         factor_names = factor_data.columns
 40 |         n_factors = len(factor_names)
 41 |         current_state = int(fitted_states[-1])
 42 | 
 43 |         Y = index_data.copy()
 44 |         X = factor_data.copy()
 45 |         # De-mean factors
 46 |         X = X - np.mean(X, axis=0)
 47 | 
 48 |         ols, state_factors = self._build_factor_model(X, Y, factor_names, fitted_states)
 49 | 
 50 |         for state in range(self.n_regimes):
 51 |             state_factors[state]["V"] = ols.params.values[state * (n_factors + 1): (1 + state) * (n_factors + 1) - 1, :]
 52 |             state_factors[state]["F"] = factor_data[fitted_states == state].cov().values
 53 |             state_factors[state]["mu"] = ols.params.values[(1 + state) * (n_factors + 1) - 1, :]
 54 |             state_factors[state]["D"] = ols.resid[fitted_states == state].cov().values
 55 | 
 56 |         # Construct regime-dependent expected return and variance-covariance matrices
 57 |         mu = 0
 58 |         sigma = 0
 59 |         for state in range(self.n_regimes):
 60 |             # update mu
 61 |             mu += trans_mat[current_state][state] * state_factors[state]["mu"]
 62 | 
 63 |             # update sigma
 64 |             sigma += trans_mat[current_state][state] \
 65 |                 * (
 66 |                     state_factors[state]["V"].T @ state_factors[state]["F"] @ state_factors[state]["V"]
 67 |                     + state_factors[state]["D"]
 68 |                 ) + trans_mat[current_state][state] * (1 - trans_mat[current_state][state]) \
 69 |                 * state_factors[state]["mu"] @ state_factors[state]["mu"].T
 70 | 
 71 |             for other_state in range(self.n_regimes):
 72 |                 if other_state == state:
 73 |                     continue
 74 |                 sigma -= trans_mat[current_state][state] * trans_mat[current_state][other_state] \
 75 |                     * state_factors[state]["mu"] @ state_factors[state]["mu"].T
 76 | 
 77 |         return mu.reshape(-1, 1), sigma
 78 | 
 79 |     def _build_factor_model(self, X, Y, factor_names, fitted_states):
 80 |         """
 81 |         Build a regime-factor model of target asset returns (Y) to factors (X) for the given factor_names and
 82 |         fitted_states. Alpha is regime-dependent in this model.
 83 | 
 84 |         Parameters
 85 |         ----------
 86 |         X : numpy.ndarray
 87 |             A matrix of factor returns.
 88 |         Y : numpy.ndarray
 89 |             A matrix of target asset returns.
 90 |         factor_names : list of str
 91 |             A list of names of the factors in the factor model.
 92 |         fitted_states : numpy.ndarray
 93 |             An array of regime labels.
 94 | 
 95 |         Returns
 96 |         -------
 97 |         tuple
 98 |             A tuple consisting of an OLS model and a dictionary of state-factor information.
 99 | 
100 |             ols : statsmodels.regression.linear_model.RegressionResultsWrapper
101 |                 The OLS model.
102 |             state_factors : dict
103 |                 A dictionary of state-factor information.
104 | 
105 |                 names : list of str
106 |                     A list of names of the state factors.
107 |         """
108 |         # Transform factors by indicator function to allow for OLS estimation of regime-dependent FF3 model
109 |         X["state"] = fitted_states
110 |         state_factors = {}
111 |         for state in range(self.n_regimes):
112 |             state_factors[state] = {"names": []}
113 |             for fn in factor_names:
114 |                 X[fn + "_" + str(state)] = X[[fn, "state"]].apply(lambda x: x[0] if x[1] == state else 0, axis=1)
115 |                 state_factors[state]["names"].append(fn + "_" + str(state))
116 |             # Add regime-dependent constant
117 |             X["mu_" + str(state)] = X["state"].apply(lambda x: 1 if x == state else 0)
118 |             state_factors[state]["names"].append("mu_" + str(state))
119 |         x_names = []
120 |         for state in range(self.n_regimes):
121 |             x_names += state_factors[state]["names"]
122 | 
123 |         X = X[x_names]
124 |         # Fit regime-dependent Factor model
125 |         ols = sm.OLS(Y, X).fit()
126 |         return ols, state_factors
127 | 
128 |     def __str__(self):
129 |         return "IdiosyncraticFactorOpt"
130 | 


--------------------------------------------------------------------------------
/marketmoodring/tools/portfolio_opt.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from scipy.optimize import minimize
  3 | from abc import ABC, abstractmethod
  4 | 
  5 | 
  6 | class PortfolioOptimizer(ABC):
  7 |     @abstractmethod
  8 |     def calculate_weights(self, mu, cov):
  9 |         """
 10 |         Calculates the weights for the optimization.
 11 | 
 12 |         Parameters
 13 |         ----------
 14 |         mu : np.ndarray
 15 |             The mean of the distribution.
 16 |         cov : np.ndarray
 17 |             The covariance of the distribution.
 18 | 
 19 |         Returns
 20 |         -------
 21 |         weights : np.ndarray
 22 |             The weights for the optimization.
 23 |         """
 24 |         pass
 25 | 
 26 | 
 27 | class ERC(PortfolioOptimizer):
 28 |     def __init__(self):
 29 |         pass
 30 | 
 31 |     def calculate_weights(self, mu, cov):
 32 |         """
 33 |         Calculate ERC portfolio weights
 34 | 
 35 |         Parameters
 36 |         ----------
 37 |         mu : np.ndarray
 38 |             Mean vector
 39 |         sigma : np.ndarray
 40 |             Covariance matrix
 41 | 
 42 |         Returns
 43 |         -------
 44 |         np.ndarray
 45 |             array with equal risk contribution (ERC) weights for the given assets
 46 |         """
 47 |         # Set up optimization
 48 |         num_assets = cov.shape[0]
 49 |         w_guess = np.repeat(1 / num_assets, num_assets)
 50 |         constraints = ({'type': 'eq', 'fun': lambda w: np.sum(w) - 1.0})
 51 |         bounds = tuple((0, 1) for asset in range(num_assets))
 52 | 
 53 |         # Optimize
 54 |         result = minimize(self._erc_objective, w_guess, args=(cov), method='SLSQP', constraints=constraints,
 55 |                           bounds=bounds)
 56 | 
 57 |         # Return weights
 58 |         return result.x
 59 | 
 60 |     def _calculate_var(self, w: np.ndarray, sigma: np.ndarray) -> float:
 61 |         """
 62 |         Calculate the variance of the portfolio with the given weights, w, and asset covariances, sigma
 63 | 
 64 |         Parameters
 65 |         ----------
 66 |         w : np.ndarray
 67 |             array of asset weights
 68 |         sigma : np.ndarray
 69 |             matrix of asset covariances
 70 | 
 71 |         Returns
 72 |         -------
 73 |         float
 74 |             estimated variance of the portoflio
 75 |         """
 76 |         # Calculate portfolio variance
 77 |         return w.T @ sigma @ w
 78 | 
 79 |     def _calculate_mctr(self, w: np.ndarray, sigma: np.ndarray) -> np.ndarray:
 80 |         """
 81 |         Calculate the marginal contribution to risk (MCTR) of each portfolio asset for the given weights, w, and
 82 |         asset covariances, sigma
 83 | 
 84 |         Parameters
 85 |         ----------
 86 |         w : np.ndarray
 87 |             array of asset weights
 88 |         sigma : np.ndarray
 89 |             matrix of asset covariances
 90 | 
 91 |         Returns
 92 |         -------
 93 |         np.ndarray
 94 |             array of MCTR for each asset
 95 |         """
 96 |         # Calculate risk contribution of each asset
 97 |         portfolio_var = self._calculate_var(w, sigma)
 98 |         return w * (sigma @ w) / portfolio_var
 99 | 
100 |     def _erc_objective(self, w: np.ndarray, sigma: np.ndarray) -> float:
101 |         """
102 |         Calculate the objective function for an equal risk contribution portfolio (ERC)
103 | 
104 |         Parameters
105 |         ----------
106 |         w : np.ndarray
107 |             array of asset weights
108 |         sigma : np.ndarray
109 |             matrix of asset covariances
110 | 
111 |         Returns
112 |         -------
113 |         float
114 |             MSE of the asset MCTRs vs the average MCTR
115 |         """
116 |         # Objective function to minimize
117 |         mctr = self._calculate_mctr(w, sigma)
118 |         return np.sum((mctr - mctr.mean()) ** 2)
119 | 
120 | 
121 | class MVO(PortfolioOptimizer):
122 |     def __init__(self):
123 |         pass
124 | 
125 |     def calculate_weights(self, mu, sigma, rf=0):
126 |         """
127 |         Calculate ERC portfolio weights
128 | 
129 |         Parameters
130 |         ----------
131 |         mu : np.ndarray
132 |             Mean vector
133 |         sigma : np.ndarray
134 |             Covariance matrix
135 | 
136 |         Returns
137 |         -------
138 |         np.ndarray
139 |             array with equal risk contribution (ERC) weights for the given assets
140 |         """
141 |         """
142 |         ones = np.ones(sigma.shape[0]).reshape(-1, 1)
143 | 
144 |         return (inv(sigma) @ (mu - rf * ones) / (
145 |             ones.T @ inv(sigma) @ (mu - rf * ones)
146 |         )).reshape(-1,)
147 |         """
148 |         # Set up optimization
149 |         num_assets = sigma.shape[0]
150 |         w_guess = np.repeat(1 / num_assets, num_assets)
151 | 
152 |         args = (mu, sigma, rf)
153 |         constraints = ({
154 |             'type': 'eq',
155 |             'fun': lambda x: np.sum(x) - 1
156 |         })
157 |         result = minimize(
158 |             fun=self.mvo_objective,
159 |             x0=w_guess,
160 |             args=args,
161 |             method='SLSQP',
162 |             bounds=tuple((0.0, 1.0) for _ in range(num_assets)),
163 |             constraints=constraints,
164 |             tol=1e-5
165 |         )
166 | 
167 |         if result['success']:
168 |             return (result['x'] / np.sum(result['x'])).reshape(-1,)
169 |         else:
170 |             return None
171 | 
172 |     def mvo_objective(self, weights, mu, sigma, rf=0):
173 |         """
174 |         Objective function to minimize to find the MVO portfolio weights
175 | 
176 |         Parameters
177 |         ----------
178 |         weights : np.ndarray
179 |             array of asset weights
180 |         mu : np.ndarray
181 |             Mean vector
182 |         sigma : np.ndarray
183 |             Covariance matrix
184 |         rf : float
185 |             Risk free rate
186 | 
187 |         Returns
188 |         -------
189 |         negative Sharpe Ratio for the given weights
190 |         """
191 |         # Return negative Sharpe ratio
192 |         weights = weights.reshape(-1, 1)
193 |         return - (weights.T @ mu - rf) / np.sqrt(weights.T @ sigma @ weights)
194 | 


--------------------------------------------------------------------------------
/marketmoodring/regime_detection/base.py:
--------------------------------------------------------------------------------
  1 | """
  2 | API reference documentation for the base regime detection class
  3 | """
  4 | 
  5 | from abc import ABC, abstractmethod
  6 | 
  7 | import numpy as np
  8 | import pandas as pd
  9 | from typing import Union
 10 | 
 11 | from marketmoodring.tools.data_checks import reconcile_regime_data_arg
 12 | 
 13 | 
 14 | class RegimeDetectionError(Exception):
 15 |     pass
 16 | 
 17 | 
 18 | class RegimeDetectionBase(ABC):
 19 |     """
 20 |     A base class for regime detection.
 21 |     This class is not meant to be used directly, but rather to be inherited by other classes.
 22 |     The purpose of this class is to provide a common interface for all regime detection classes.
 23 | 
 24 |     The following methods must be implemented by any class that inherits this class:
 25 |     - fit
 26 |     - transform
 27 | 
 28 |     The following method is optional to implement:
 29 |     - fit_transform
 30 |     """
 31 | 
 32 |     def __init__(self, n_regimes: Union[int, None], *args, **kwargs) -> None:
 33 |         """
 34 |         A class to handle regime detection.
 35 | 
 36 |         Parameters
 37 |         ----------
 38 |         n_regimes : int
 39 |             The number of regimes to transform the data to
 40 |         """
 41 |         if (n_regimes is not None) and n_regimes <= 0:
 42 |             raise ValueError("Number of regimes has to be larger than 0 or ")
 43 |         self.n_regimes = int(n_regimes) if n_regimes is not None else None
 44 |         self._fit_called = False
 45 |         self._fitted_states = None
 46 |         self._fitted_states_proba = None
 47 |         self._trans_mat = None
 48 |         self._model = None
 49 | 
 50 |     @abstractmethod
 51 |     def _fit(self, data: np.ndarray, index: np.ndarray = None, *args, **kwargs):
 52 |         """
 53 |         Abstract method to be implemented by any class that inherits this class.
 54 |         Checking arguments is done by the `fit` method that wraps this method.
 55 |         """
 56 |         pass
 57 | 
 58 |     def fit(self, data: Union[np.ndarray, pd.DataFrame], *args, **kwargs):
 59 |         """
 60 |         Fits the model to the data.
 61 |         Wraps the _fit method which is implemented by any class that inherits this class.
 62 | 
 63 |         Parameters
 64 |         ----------
 65 |         data : numpy Array or pandas DataFrame
 66 |             The data to fit the regimes on
 67 |         *args, **kwargs
 68 |             Any additional arguments to be passed to the _fit method
 69 | 
 70 |         Returns
 71 |         -------
 72 |         None
 73 |         """
 74 |         fit_data, fit_index = reconcile_regime_data_arg(data)
 75 |         self._fit(fit_data, fit_index, *args, **kwargs)
 76 |         self._fit_called = True
 77 | 
 78 |     @abstractmethod
 79 |     def _transform(self, data: np.ndarray, index: np.ndarray = None, *args, **kwargs):
 80 |         """
 81 |         Abstract method to be implemented by any class that inherits this class.
 82 |         Checking arguments is done by the `transform` method that wraps this method.
 83 |         """
 84 |         pass
 85 | 
 86 |     def transform(self, data: Union[np.ndarray, pd.DataFrame], *args, **kwargs):
 87 |         """
 88 |         Transforms the data using the fitted model.
 89 |         Wraps the _transform method which is implemented by any class that inherits this class.
 90 | 
 91 |         Parameters
 92 |         ----------
 93 |         data : np.ndarray or pd.DataFrame
 94 |             The data to transform using the fitted model
 95 |         *args, **kwargs
 96 |             Any additional arguments to be passed to the _transform method
 97 | 
 98 |         Returns
 99 |         -------
100 |         tuple
101 |             Two numpy arrays containing the (1) predicted states and (2) predicted
102 |             state probabilities
103 |         """
104 | 
105 |         transform_data, transform_index = reconcile_regime_data_arg(data)
106 | 
107 |         if not self._fit_called:
108 |             raise RegimeDetectionError("fit must be called before transform")
109 |         return self._transform(transform_data, transform_index, *args, **kwargs)
110 | 
111 |     def fit_transform(self, data: Union[np.ndarray, pd.DataFrame], *args, **kwargs):
112 |         """
113 |         Fit and transform the given data with a single function call. This function behaves the same
114 |         as sequentially calling fit and transform.
115 | 
116 |         Parameters
117 |         ----------
118 |         data : np.ndarray or pd.DataFrame
119 |             The data to fit and transform
120 |         *args, **kwargs
121 |             Any additional arguments to be passed to the fit_transform method
122 | 
123 |         Returns
124 |         -------
125 |         tuple
126 |             Two numpy arrays containing the (1) predicted states and (2) predicted
127 |             state probabilities
128 |         """
129 |         self.fit(data, *args, **kwargs)
130 |         return self.transform(data, *args, **kwargs)
131 | 
132 |     def get_fitted_states(self):
133 |         if not self._fit_called:
134 |             raise RegimeDetectionError(
135 |                 "fit must be called before retrieving fitted states"
136 |             )
137 |         return self._fitted_states
138 | 
139 |     def get_fitted_states_proba(self):
140 |         if not self._fit_called:
141 |             raise RegimeDetectionError(
142 |                 "fit must be called before retrieving fitted states"
143 |             )
144 |         return self._fitted_states_proba
145 | 
146 |     def get_trans_mat(self):
147 |         if not self._fit_called:
148 |             raise RegimeDetectionError(
149 |                 "fit must be called before retrieving fitted transition matrix"
150 |             )
151 |         return self._trans_mat
152 | 
153 | 
154 | class NonParametricRegimeDetection(RegimeDetectionBase, ABC):
155 |     """
156 |     A class to handle non-parametric regime detection
157 |     i.e. clustering methods, and not Hidden Markov Models
158 |     """
159 | 
160 |     def fit(self, data: Union[np.ndarray, pd.DataFrame], *args, **kwargs):
161 |         super().fit(data, *args, **kwargs)
162 |         self._fit_empirical_trans_matrix()
163 | 
164 |     def _fit_empirical_trans_matrix(self):
165 |         """
166 |         Fit an empirical transition matrix using the fitted labeled data.
167 | 
168 |         Returns
169 |         -------
170 |         trans_mat : np.ndarray
171 |             An NxN matrix containing the transition matrix from each state to all other states
172 |         """
173 |         if self._fitted_states is None:
174 |             raise RegimeDetectionError("Model has not been fitted yet or did not produce fitted states")
175 | 
176 |         transitions = pd.Series(self._fitted_states).to_frame("start_state")
177 |         transitions["end_state"] = transitions["start_state"].shift(-1)
178 | 
179 |         # Get the transition matrix
180 |         trans_mat = (
181 |             transitions.value_counts(normalize=False)
182 |             .reset_index()
183 |             .pivot(index="start_state", columns="end_state")
184 |             .fillna(0)
185 |         )
186 |         trans_mat += 1
187 |         trans_mat = trans_mat.div(trans_mat.sum(axis=1), axis=0)
188 | 
189 |         self._trans_mat = trans_mat.to_numpy()
190 | 


--------------------------------------------------------------------------------
/marketmoodring/portfolio_optimization/joint_stochastic_prog.py:
--------------------------------------------------------------------------------
  1 | from marketmoodring.portfolio_optimization.base import (
  2 |     PortfolioOptimizationBase,
  3 |     PortfolioOptimizationError,
  4 | )
  5 | import numpy as np
  6 | import numba
  7 | from scipy.optimize import minimize
  8 | from typing import Union
  9 | import pandas as pd
 10 | from scipy.stats import norm
 11 | 
 12 | 
 13 | class JointStochasticProgOptimization(PortfolioOptimizationBase):
 14 |     def __init__(self, n_regimes: int, objective: str = "max_avg_sharpe", *args, **kwargs):
 15 |         """
 16 |         Stochastic Programming Portfolio Optimization where each simulated regime sequence is jointly optimized for.
 17 | 
 18 |         Parameters
 19 |         ----------
 20 |         n_regimes : int
 21 |             The number of regimes.
 22 |         objective : str, optional
 23 |             The objective function to optimize for. The options are "max_avg_sharpe" and "min_avg_VaR".
 24 |         """
 25 |         super().__init__(n_regimes, *args, **kwargs)
 26 |         if objective not in ("max_avg_sharpe", "min_avg_VaR"):
 27 |             raise ValueError("objective is note one of max_avg_sharpe, min_avg_VaR")
 28 |         self._objective = objective
 29 | 
 30 |     def calculate_weights(self, fitted_states: np.ndarray, trans_mat: np.ndarray,
 31 |                           index_data: Union[np.ndarray, pd.DataFrame],
 32 |                           seq_length: int = 22, n_sequences: int = 1000, *args, **kwargs):
 33 |         """
 34 |         Calculate the weights for the given fitted states and asset returns.
 35 | 
 36 |         Parameters
 37 |         ----------
 38 |         fitted_states : np.ndarray
 39 |             The fitted states.
 40 |         trans_mat : np.ndarray
 41 |             The transition matrix.
 42 |         index_data : Union[np.ndarray, pd.DataFrame]
 43 |             The index or asset returns data.
 44 |         seq_length : int, optional
 45 |             The sequence length. The default is 22.
 46 |         n_sequences : int, optional
 47 |             The number of sequences. The default is 1000.
 48 | 
 49 |         Returns
 50 |         -------
 51 |         weights : np.ndarray
 52 |             The weights that maximize the average expected Sharpe Ratio over all possible state sequences
 53 |         """
 54 |         fitted_states = fitted_states.astype(int)
 55 |         states = np.unique(fitted_states)
 56 |         n_states = len(states)
 57 | 
 58 |         regime_groups = index_data.groupby(fitted_states)
 59 |         mu = regime_groups.mean().to_numpy()
 60 |         n_assets = index_data.shape[1]
 61 |         sigma = regime_groups.cov().to_numpy().reshape(n_states, n_assets, n_assets)
 62 | 
 63 |         sequences = _calc_n_sequences(n_sequences, fitted_states[-1], trans_mat, states, seq_length)
 64 |         # np.unique returns sorted unique values, we only care about counts
 65 |         state_counts = _count_states(sequences, n_states)
 66 | 
 67 |         if self._objective == "max_avg_sharpe":
 68 |             result = minimize(
 69 |                 fun=lambda weights: _max_avg_sharpe(weights, mu, sigma, state_counts, seq_length),
 70 |                 x0=np.ones(shape=n_assets) / n_assets,
 71 |                 bounds=[(0, 1)] * n_assets,
 72 |                 method="SLSQP",
 73 |                 constraints={"type": "eq", "fun": lambda x: np.sum(x) - 1},
 74 |                 tol=1e-5
 75 |             )
 76 |         elif self._objective == "min_avg_VaR":
 77 |             result = minimize(
 78 |                 fun=lambda weights: _min_avg_VaR(weights, mu, sigma, state_counts),
 79 |                 x0=np.ones(shape=n_assets) / n_assets,
 80 |                 bounds=[(0, 1)] * n_assets,
 81 |                 method="SLSQP",
 82 |                 constraints={"type": "eq", "fun": lambda x: np.sum(x) - 1},
 83 |                 tol=1e-5
 84 |             )
 85 | 
 86 |         return result.x
 87 | 
 88 |     def __str__(self):
 89 |         return "JointStochasticProgOpt"
 90 | 
 91 | 
 92 | @numba.jit("int64[:](int64,float64[:,:],int64[:],int64)", nopython=True, nogil=True)
 93 | def _calc_sequence(start_state, trans_mat, states, seq_length):
 94 |     """
 95 |     Calculate a random sequence of states.
 96 | 
 97 |     Parameters
 98 |     ----------
 99 |     start_state : numpy.int64
100 |         The initial state of each sequence.
101 |     trans_mat : numpy.ndarray[numpy.float64]
102 |         The transition matrix that defines the probabilities of transitioning from one state to another.
103 |     states : numpy.ndarray[numpy.int64]
104 |         An array of possible states.
105 |     seq_length : numpy.int64
106 |         The length of each sequence to generate.
107 | 
108 |     Returns
109 |     -------
110 |     sequence : numpy.ndarray[numpy.int64]
111 |         An array of random sequence of states.
112 | 
113 |     Notes
114 |     -----
115 |     This function uses Numba to speed up the calculation.
116 |     """
117 |     sequence = np.empty(seq_length, dtype=np.int64)
118 |     sequence[0] = start_state
119 |     for i in range(1, seq_length):
120 |         sequence[i] = states[np.searchsorted(np.cumsum(trans_mat[sequence[i-1]]), np.random.random(), side="right")]
121 |     return sequence
122 | 
123 | 
124 | def _calc_n_sequences(n_sequences, start_state, trans_mat, states, seq_length):
125 |     """
126 |     Calculate multiple random sequences of states.
127 | 
128 |     Parameters
129 |     ----------
130 |     n_sequences : numpy.int64
131 |         The number of sequences to generate.
132 |     start_state : numpy.int64
133 |         The initial state of each sequence.
134 |     trans_mat : numpy.ndarray[numpy.float64]
135 |         The transition matrix that defines the probabilities of transitioning from one state to another.
136 |     states : numpy.ndarray[numpy.int64]
137 |         An array of possible states.
138 |     seq_length : numpy.int64
139 |         The length of each sequence to generate.
140 | 
141 |     Returns
142 |     -------
143 |     sequences : numpy.ndarray[numpy.int64]
144 |         An array of random sequences of states.
145 |     """
146 |     sequences = np.empty((n_sequences, seq_length))
147 |     return np.apply_along_axis(
148 |         lambda _: _calc_sequence(start_state, trans_mat, states, seq_length), axis=1, arr=sequences
149 |     )
150 | 
151 | 
152 | def _calc_sharpes(weights, mu, sigma, state_counts, seq_length, rf=0):
153 |     """
154 |     Calculate Sharpe Ratios.
155 | 
156 |     Parameters
157 |     ----------
158 |     weights : numpy.ndarray[numpy.float64]
159 |         The weights.
160 |     mu : numpy.ndarray[numpy.float64]
161 |         The expected returns.
162 |     sigma : numpy.ndarray[numpy.float64]
163 |         The covariance matrix.
164 |     state_counts : numpy.ndarray[numpy.int64]
165 |         The state counts for each sequence.
166 |     seq_length : numpy.int64
167 |         The sequence length.
168 |     rf : numpy.float64, optional
169 |         The risk free rate. Default is 0.
170 | 
171 |     Returns
172 |     -------
173 |     srs : numpy.ndarray[numpy.float64]
174 |         Sharpe ratios.
175 |     """
176 |     w = weights.reshape(-1, 1)
177 | 
178 |     exp_r = state_counts @ mu @ w / seq_length
179 |     exp_sigma = (w.T @ np.einsum('ij,jkl->ikl', state_counts, sigma) @ w).reshape(-1, 1) / np.sqrt(seq_length)
180 | 
181 |     srs = (exp_r - rf) / exp_sigma
182 | 
183 |     return srs
184 | 
185 | 
186 | def _max_avg_sharpe(weights, mu, sigma, state_counts, seq_length):
187 |     srs = _calc_sharpes(weights, mu, sigma, state_counts, seq_length, rf=0)
188 |     return -np.mean(srs)
189 | 
190 | 
191 | def _calc_value_at_risk(weights, mu, sigma, state_counts):
192 |     """
193 |     Calculate the Value at Risk (VaR) for each state sequence.
194 | 
195 |     Parameters
196 |     ----------
197 |     weights : numpy.ndarray[numpy.float64]
198 |         The weights.
199 |     mu : numpy.ndarray[numpy.float64]
200 |         The expected returns.
201 |     sigma : numpy.ndarray[numpy.float64]
202 |         The covariance matrix.
203 |     state_counts : numpy.ndarray[numpy.int64]
204 |         The state counts for each sequence.
205 | 
206 |     Returns
207 |     -------
208 |     vars : numpy.ndarray[numpy.float64]
209 |         The 5% Value at Risk for each state sequence.
210 |     """
211 |     w = weights.reshape(-1, 1)
212 | 
213 |     exp_r = state_counts @ mu @ w
214 |     exp_sigma = (w.T @ np.einsum('ij,jkl->ikl', state_counts, sigma) @ w).reshape(-1, 1)
215 |     exp_r_sigma = np.concatenate([exp_r, exp_sigma], axis=1)
216 |     return np.apply_along_axis(
217 |         lambda x: norm.ppf(0.05, x[0], x[1]), axis=1, arr=exp_r_sigma
218 |     )
219 | 
220 | 
221 | def _min_avg_VaR(weights, mu, sigma, state_counts):
222 |     VaRs = _calc_value_at_risk(weights, mu, sigma, state_counts)
223 |     return -np.mean(VaRs)
224 | 
225 | 
226 | @numba.jit("int64[:,:](int64[:,:],int64)", nopython=True, nogil=True)
227 | def _count_states(sequences, n_states):
228 |     """
229 |     Count the number of each state in each sequence.
230 | 
231 |     Parameters
232 |     ----------
233 |     sequences : numpy.ndarray[numpy.int64]
234 |         An array of sequences.
235 |     n_states : numpy.int64
236 |         The number of possible states.
237 | 
238 |     Returns
239 |     -------
240 |     state_counts : numpy.ndarray[numpy.int64]
241 |         An array of state counts for each sequence.
242 | 
243 |     Notes
244 |     -----
245 |     This function uses Numba to speed up the calculation.
246 |     """
247 |     result = np.empty((len(sequences), n_states), dtype=np.int64)
248 | 
249 |     # np.unique returns sorted unique values, we only care about counts
250 |     for i in range(len(sequences)):
251 |         for s in range(n_states):
252 |             result[i, s] = np.sum(sequences[i] == s)
253 | 
254 |     return result
255 | 


--------------------------------------------------------------------------------
/marketmoodring/regime_detection/wasserstein_kmeans.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy.stats as sts
  3 | from typing import Union
  4 | import pandas as pd
  5 | 
  6 | from marketmoodring.regime_detection.base import (NonParametricRegimeDetection)
  7 | 
  8 | MAX_DRAW_KMEANS_INIT = 100
  9 | 
 10 | 
 11 | class WassersteinKMeansRegimeDetection(NonParametricRegimeDetection):
 12 |     ot = None
 13 | 
 14 |     def __init__(self, n_regimes: int, frequency: str = "D", window_size: int = 5,
 15 |                  n_bins: int = 21, min_range: float = -0.15, max_range: float = 0.15,
 16 |                  p: int = 3, reg: float = 1e-3, max_iter: int = 200, kde_smoothing: float = 0.5) -> None:
 17 |         """
 18 |         A class to handle regime detection through Wasserstein K-Means on rolling return distributions.
 19 |         Currently only accepts 1D time series.
 20 | 
 21 |         Parameters
 22 |         ----------
 23 |         n_regimes : int
 24 |             The number of regimes to transform the data to
 25 |         frequency : str
 26 |             The frequency or unit on which to sample windows. For instance if frequency="D" and window_size=5, then
 27 |             windows will have a size of 5 days. Current options: "D" for daily
 28 |         window_size : int
 29 |             The window size of rolling return distributions
 30 |         n_bins : int
 31 |             The number of bins used for histograms in the WKM algorithm. A larger value results in more fine-grained
 32 |             but more jagged histograms
 33 |         min_range : float
 34 |             The minimum value of the histograms
 35 |         max_range : float
 36 |             The maximum value of the histograms
 37 |         p : int
 38 |             The p-Wasserstein distance to be used. p corresponds to the number of distribution moments that should
 39 |             be considered in the distance metric
 40 |         reg : float
 41 |             A regularization coefficient
 42 |         max_iter : int
 43 |             The maximum number of iterations of the WKM algorithm
 44 |         kde_smoothing : float
 45 |             A number between 0 (none) and 1 (full) indication the level of smoothing to be applied on the window
 46 |             distributions
 47 |         """
 48 |         super().__init__(n_regimes)
 49 |         if frequency not in ("D",):
 50 |             raise ValueError("Sampling frequency is invalid. Must be any of (\"D\",)")
 51 |         if window_size <= 0:
 52 |             raise ValueError("Window Size must be at least 1")
 53 |         if kde_smoothing < 0 or kde_smoothing > 1:
 54 |             raise ValueError("KDE smoothing must lie between 0 and 1")
 55 |         # Model hyperparameters
 56 |         self._frequency = frequency
 57 |         self._window_size = window_size
 58 |         self._n_bins = n_bins
 59 |         self._min_range = min_range
 60 |         self._max_range = max_range
 61 |         self._p = p
 62 |         self._reg = reg
 63 |         self._max_iter = max_iter
 64 |         self._kde_smoothing = kde_smoothing
 65 |         # Model fitting parameters
 66 |         self._barycenters = None
 67 |         self._m = None
 68 |         # Conditional import, only import if class is ever created
 69 |         if WassersteinKMeansRegimeDetection.ot is None:
 70 |             WassersteinKMeansRegimeDetection.ot = __import__('ot')
 71 | 
 72 |     def fit(self, data: Union[np.ndarray, pd.DataFrame], *args, **kwargs):
 73 |         """
 74 |         Fits the regime detection model to the given data. If data is provided as a pandas DataFrame with
 75 |         a resolution higher than daily, the data will be resampled to a daily resolution and the corresponding
 76 |         labels will apply to the days in the data.
 77 | 
 78 |         Parameters
 79 |         ----------
 80 |         data : np.ndarray or pd.DataFrame
 81 |             The data to fit the regime detection model to
 82 |         args : tuple, optional
 83 |         kwargs : dict, optional
 84 | 
 85 |         Returns
 86 |         -------
 87 |         None
 88 |         """
 89 |         super().fit(data, *args, **kwargs)
 90 | 
 91 |     def _fit(self, data: np.ndarray, index: np.ndarray = None, reuse_last_run: bool = True, *args, **kwargs):
 92 |         # Dependency only necessary if _fit of WKM is ever called
 93 |         sample_distributions = self._sample_distributions(data, index)
 94 | 
 95 |         # Fit and store WKM model
 96 |         self._barycenters, _ = self._wkmeans(sample_distributions, reuse_last_run)
 97 | 
 98 |         # Prepend fitted states with 0, due to the window size the first few observations
 99 |         # cannot be clustered
100 |         self._fitted_states, _ = self._transform(data, index)
101 | 
102 |     def _transform(self, data: np.ndarray, index: np.ndarray = None, *args, **kwargs):
103 |         sample_distributions = self._sample_distributions(data, index, jump=False)
104 | 
105 |         if self._m is None:
106 |             x = np.arange(self._n_bins, dtype=np.float64)
107 |             self._m = self._p_wass_dist(x)
108 |         m = self._m
109 | 
110 |         distances = self.update_distances(self._barycenters, sample_distributions, m)
111 |         labels = np.argmin(distances, axis=1)
112 | 
113 |         # Prepend fitted states with 0, due to the window size the first few observations
114 |         # cannot be clustered
115 |         if index is None:
116 |             labels = np.concatenate((
117 |                 [np.nan] * (len(data) - len(labels)),
118 |                 labels
119 |             ))
120 |         if index is not None:
121 |             index = index.astype('datetime64[D]')
122 |             labels = np.concatenate((
123 |                 [np.nan] * (len(np.unique(index)) - len(labels)),
124 |                 labels
125 |             ))
126 | 
127 |         return labels, None
128 | 
129 |     def _sample_distributions(self, data: np.ndarray, index: np.ndarray = None, jump=True):
130 |         """
131 |         Create sample distributions from the given 1D time series data and associated index
132 | 
133 |         Parameters
134 |         ----------
135 |         data : np.ndarray
136 |             A 1D time series
137 |         index : np.ndarray
138 |             An optional 1D date time index corresponding to the time series
139 |         jump : bool
140 |             True if and only if a jump should be applied in between rolling windows
141 | 
142 |         Returns
143 |         -------
144 |         distributions : np.ndarray
145 |             An array of sampled distributions
146 |         """
147 |         # Set up linspace of histogram bucket midpoints
148 |         x = (
149 |                     np.linspace(self._min_range, self._max_range, self._n_bins + 1, dtype=np.float64)[1:]
150 |                     + np.linspace(self._min_range, self._max_range, self._n_bins + 1, dtype=np.float64)[:-1]
151 |             ) / 2
152 | 
153 |         if index is None:
154 |             # Sample distributions
155 |             if jump:
156 |                 sample = np.array([
157 |                     data[i:i + self._window_size]
158 |                     # TODO: self._window_size // 4 replace by variable
159 |                     for i in range(0, len(data) - self._window_size, self._window_size // 4)
160 |                 ])
161 |             else:
162 |                 sample = np.lib.stride_tricks.sliding_window_view(data.flatten(), (self._window_size,))
163 |         else:
164 |             index = index.astype('datetime64[D]')
165 |             sample = np.array([
166 |                 data[
167 |                     (index >= np.busday_offset(i, -self._window_size, roll='backward'))
168 |                     & (index <= i)
169 |                 ] for i in np.unique(index)[self._window_size:]
170 |             ], dtype=object)
171 | 
172 |         # Create sample distributions
173 |         dist_sample = np.zeros((len(sample), self._n_bins))
174 |         for i in range(len(sample)):
175 |             s = sample[i]
176 |             kde = sts.gaussian_kde(s.reshape(1, -1)).pdf(x)
177 |             hist = np.histogram(
178 |                     s, bins=self._n_bins,
179 |                     range=(self._min_range, self._max_range), density=False
180 |                 )[0]
181 |             hist = hist / np.sum(hist)
182 |             dist_sample[i, :] = self._kde_smoothing * kde + (1 - self._kde_smoothing) * hist
183 |             dist_sample[i, :] = dist_sample[i, :] / np.sum(dist_sample[i, :])
184 | 
185 |         return dist_sample
186 | 
187 |     def _wkmeans(self, distributions, reuse_last_run):
188 |         """
189 |         Fit a WKM model on the given distributions
190 | 
191 |         Parameters
192 |         ----------
193 |         distributions : np.ndarray
194 |             Historical return distributions
195 |         reuse_last_run : bool
196 |             True if and only if the last fitted model should be reused
197 | 
198 |         Returns
199 |         -------
200 |         tuple : (np.ndarray, np.ndarray)
201 |             A tuple with the cluster barycenters and predicted labels for each distribution
202 |         """
203 |         if self._m is None:
204 |             x = np.arange(self._n_bins, dtype=np.float64)
205 |             self._m = self._p_wass_dist(x)
206 |         m = self._m
207 | 
208 |         # Find appropriate initial points, try up to 100 random draws until one is found
209 |         # where the distance is large enough to result in two distinct sets of distributions
210 |         barycenters = None
211 | 
212 |         if self._barycenters is None or not reuse_last_run:
213 |             for _ in range(MAX_DRAW_KMEANS_INIT):
214 |                 barycenters = distributions[np.random.choice(len(distributions), self.n_regimes, replace=False)]
215 | 
216 |                 distances = self.update_distances(barycenters, distributions, m)
217 |                 labels = np.argmin(distances, axis=1)
218 | 
219 |                 try:
220 |                     self._update_barycenters(barycenters, distributions, labels, m)
221 | 
222 |                     break
223 |                 except ZeroDivisionError:
224 |                     continue
225 |         else:
226 |             barycenters = self._barycenters
227 | 
228 |         # Iterate and update the barycenters until convergence or the maximum amount of iterations is reached
229 |         it = 0
230 |         labels = None
231 | 
232 |         while it < self._max_iter:
233 |             distances = self.update_distances(barycenters, distributions, m)
234 |             old_labels = labels
235 |             labels = np.argmin(distances, axis=1)
236 | 
237 |             if np.array_equal(old_labels, labels):
238 |                 break
239 | 
240 |             self._update_barycenters(barycenters, distributions, labels, m)
241 | 
242 |             it += 1
243 | 
244 |         return barycenters, labels
245 | 
246 |     def update_distances(self, barycenters, distributions, m_b):
247 |         """
248 |         Calculate the distances from all given distributions to all given barycenters using the given loss matrix
249 | 
250 |         Parameters:
251 |         ----------
252 |         barycenters : numpy.ndarray
253 |             The barycenters to calculate distances from.
254 | 
255 |         distributions : numpy.ndarray
256 |             The distributions to calculate distances to.
257 | 
258 |         m_b : numpy.ndarray
259 |             The loss matrix for the distances.
260 | 
261 |         Returns:
262 |         -------
263 |         numpy.ndarray
264 |             A NxK numpy array of distances where N is the number of distributions and K is the number of regimes.
265 |         """
266 |         distances = np.zeros((len(distributions), self.n_regimes))
267 | 
268 |         for k in range(self.n_regimes):
269 |             for d in range(len(distributions)):
270 |                 distances[d, k] = self.ot.emd2(distributions[d], barycenters[k], m_b)
271 |         distances = np.power(distances, 1 / self._p)
272 |         return distances
273 | 
274 |     def _update_barycenters(self, barycenters, distributions, labels, m_b):
275 |         """
276 |         Update the barycenters in place after a single WKM iteration.
277 | 
278 |         Parameters:
279 |         ----------
280 |         barycenters : numpy.ndarray
281 |             The barycenters to update.
282 | 
283 |         distributions : numpy.ndarray
284 |             The input distributions.
285 | 
286 |         labels : numpy.ndarray
287 |             The classification labels of the current iteration.
288 | 
289 |         m_b : numpy.ndarray
290 |             Loss matrix.
291 |         """
292 |         for k in range(self.n_regimes):
293 |             k_dists = distributions[labels == k]
294 |             weights = np.array([1 / len(k_dists)] * len(k_dists))
295 | 
296 |             barycenters[k] = self.ot.bregman.barycenter(
297 |                 np.vstack(k_dists).T,
298 |                 m_b,
299 |                 self._reg,
300 |                 weights,
301 |                 numItermax=20000
302 |             )
303 | 
304 |             barycenters[k] /= np.sum(barycenters[k])
305 | 
306 |     def _p_wass_dist(self, x):
307 |         """
308 |         Construct the p-Wasserstein distance matrix for the given linspace x.
309 | 
310 |         Parameters:
311 |         ----------
312 |         x : numpy.ndarray
313 |             A 1D linspace to construct a distance matrix on.
314 | 
315 |         Returns:
316 |         -------
317 |         numpy.ndarray
318 |             An NxN distance matrix where N is the length of x.
319 |         """
320 |         m = self.ot.dist(x.reshape((self._n_bins, 1)), x.reshape((self._n_bins, 1)), metric="minkowski", p=self._p)
321 |         m = m ** self._p
322 |         m /= m.max()
323 |         # m_b *= multiplier
324 |         return m
325 | 
326 |     def __str__(self):
327 |         return "WassersteinKMeans"
328 | 


--------------------------------------------------------------------------------