├── CHANGES.rst ├── AUTHORS.rst ├── examples ├── __pycache__ │ └── utils.cpython-37.pyc ├── sim_funcs.py └── run_tebm_sim.py ├── lib └── tebm │ ├── __init__.py │ ├── utils.py │ ├── _utils.py │ ├── _tebm_fix.pyx │ ├── _tebmc_fix.pyx │ ├── stats.py │ ├── _tebmc_var.pyx │ ├── base_fix.py │ ├── tebm_fix.py │ ├── cthmm_fix.py │ └── cthmm_var.py ├── README.md ├── LICENSE.txt └── setup.py /CHANGES.rst: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /AUTHORS.rst: -------------------------------------------------------------------------------- 1 | Peter A. Wijeratne (p.wijeratne@pm.me) 2 | -------------------------------------------------------------------------------- /examples/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pawij/tebm/HEAD/examples/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /lib/tebm/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | tebm 3 | ======== 4 | 5 | ``tebm`` is a set of algorithms for learning and inference of 6 | Temporal Event-Based Models. 7 | """ 8 | 9 | try: 10 | import setuptools_scm 11 | __version__ = setuptools_scm.get_version( # xref setup.py 12 | root="../..", relative_to=__file__, 13 | version_scheme="post-release", local_scheme="node-and-date") 14 | except (ImportError, LookupError): 15 | try: 16 | from ._version import version as __version__ 17 | except ImportError: 18 | pass 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Temporal Event-Based Model (TEBM) 2 | The TEBM is a generative model that can estimate the timing and uncertainty of events from semi-longitudinal datasets with irregularly sampled and missing data. 3 | 4 | If you use the TEBM, please cite this paper: 5 | 6 | Wijeratne, P.A., Eshaghi, A., Scotton, W.J., et al. The temporal event-based model: learning event timelines in progressive diseases. Imaging Neuroscience 2023. doi: https://doi.org/10.1162/imag_a_00010 7 | 8 | # TEBM install requirements 9 | Linux OS (Ubuntu 16.04.1, or greater) 10 | g++>=7.5.0 11 | c++>=3.8.0 12 | python>=3.7 13 | numpy>=1.19.5 14 | scipy>=1.7.3 15 | pandas 16 | pickle 17 | pathos 18 | matplotlib 19 | 20 | Install and link "kde_ebm" package, available here: 21 | 22 | https://github.com/ucl-pond/kde_ebm 23 | 24 | Navigate to top directory and issue the following command: 25 | 26 | CC=g++ CFLAGS=-lstdc++ python setup.py install 27 | 28 | # Running the code 29 | Navigate to examples/ and issue the following command: 30 | 31 | python run_tebm_sim.py 32 | 33 | # Worked example 34 | To follow... -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2023 Peter Wijeratne 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Author: Peter Wijeratne (p.wijeratne@pm.me) 2 | 3 | import setuptools 4 | from setuptools import Extension, find_packages, setup 5 | from setuptools.command.build_ext import build_ext 6 | 7 | class build_ext(build_ext): 8 | 9 | def finalize_options(self): 10 | from Cython.Build import cythonize 11 | import numpy as np 12 | import numpy.distutils 13 | 14 | self.distribution.ext_modules[:] = cythonize("**/*.pyx") 15 | for ext in self.distribution.ext_modules: 16 | for k, v in np.distutils.misc_util.get_info("npymath").items(): 17 | setattr(ext, k, v) 18 | ext.include_dirs = [np.get_include()] 19 | 20 | super().finalize_options() 21 | 22 | def build_extensions(self): 23 | try: 24 | self.compiler.compiler_so.remove("-Wstrict-prototypes") 25 | except (AttributeError, ValueError): 26 | pass 27 | super().build_extensions() 28 | 29 | 30 | setup( 31 | name="tebm", 32 | description="Temporal Event-Based Models in Python with scikit-learn like API", 33 | maintainer="Peter Wijeratne", 34 | url="https://github.com/pawij/tebm", 35 | license="Academic Use License (TBC)", 36 | cmdclass={"build_ext": build_ext}, 37 | py_modules=[], 38 | packages=find_packages("lib"), 39 | package_dir={"": "lib"}, 40 | ext_modules=[Extension("", [])], 41 | package_data={}, 42 | python_requires=">=3.5", 43 | setup_requires=[ 44 | "Cython", 45 | "numpy>=1.10", 46 | "setuptools_scm>=3.3", 47 | ], 48 | install_requires=[ 49 | "numpy>=1.10", 50 | "scipy>=0.15", 51 | ], 52 | ) 53 | -------------------------------------------------------------------------------- /lib/tebm/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.special import logsumexp 3 | 4 | 5 | def normalize(a, axis=None): 6 | """ 7 | Normalizes the input array so that it sums to 1. 8 | 9 | Parameters 10 | ---------- 11 | a : array 12 | Non-normalized input data. 13 | 14 | axis : int 15 | Dimension along which normalization is performed. 16 | 17 | Notes 18 | ----- 19 | Modifies the input **inplace**. 20 | """ 21 | a_sum = a.sum(axis) 22 | if axis and a.ndim > 1: 23 | # Make sure we don't divide by zero. 24 | a_sum[a_sum == 0] = 1 25 | shape = list(a.shape) 26 | shape[axis] = 1 27 | a_sum.shape = shape 28 | 29 | a /= a_sum 30 | 31 | 32 | def log_normalize(a, axis=None): 33 | """ 34 | Normalizes the input array so that ``sum(exp(a)) == 1``. 35 | 36 | Parameters 37 | ---------- 38 | a : array 39 | Non-normalized input data. 40 | 41 | axis : int 42 | Dimension along which normalization is performed. 43 | 44 | Notes 45 | ----- 46 | Modifies the input **inplace**. 47 | """ 48 | if axis is not None and a.shape[axis] == 1: 49 | # Handle single-state GMMHMM in the degenerate case normalizing a 50 | # single -inf to zero. 51 | a[:] = 0 52 | else: 53 | with np.errstate(under="ignore"): 54 | a_lse = logsumexp(a, axis, keepdims=True) 55 | # a_lse = np.log(np.sum(np.exp(a), axis=1)) 56 | # a_lse = a_lse.reshape(len(a_lse),1) 57 | a -= a_lse 58 | 59 | 60 | def iter_from_X_lengths(X, lengths): 61 | if lengths is None: 62 | yield 0, len(X) 63 | else: 64 | n_samples = X.shape[0] 65 | end = np.cumsum(lengths).astype(np.int32) 66 | start = end - lengths 67 | if end[-1] > n_samples: 68 | raise ValueError("more than {:d} samples in lengths array {!s}" 69 | .format(n_samples, lengths)) 70 | 71 | for i in range(len(lengths)): 72 | yield start[i], end[i] 73 | 74 | 75 | def log_mask_zero(a): 76 | """Computes the log of input probabilities masking divide by zero in log. 77 | 78 | Notes 79 | ----- 80 | During the M-step of EM-algorithm, very small intermediate start 81 | or transition probabilities could be normalized to zero, causing a 82 | *RuntimeWarning: divide by zero encountered in log*. 83 | 84 | This function masks this unharmful warning. 85 | """ 86 | a = np.asarray(a) 87 | with np.errstate(divide="ignore"): 88 | return np.log(a) 89 | 90 | 91 | def fill_covars(covars, covariance_type='full', n_components=1, n_features=1): 92 | if covariance_type == 'full': 93 | return covars 94 | elif covariance_type == 'diag': 95 | return np.array(list(map(np.diag, covars))) 96 | elif covariance_type == 'tied': 97 | return np.tile(covars, (n_components, 1, 1)) 98 | elif covariance_type == 'spherical': 99 | eye = np.eye(n_features)[np.newaxis, :, :] 100 | covars = covars[:, np.newaxis, np.newaxis] 101 | return eye * covars 102 | -------------------------------------------------------------------------------- /lib/tebm/_utils.py: -------------------------------------------------------------------------------- 1 | """Private utilities.""" 2 | 3 | import numpy as np 4 | from sklearn.utils.validation import NotFittedError 5 | 6 | 7 | # Copied from scikit-learn 0.19. 8 | def _validate_covars(covars, covariance_type, n_components): 9 | """Do basic checks on matrix covariance sizes and values.""" 10 | from scipy import linalg 11 | if covariance_type == 'spherical': 12 | if len(covars) != n_components: 13 | raise ValueError("'spherical' covars have length n_components") 14 | elif np.any(covars <= 0): 15 | raise ValueError("'spherical' covars must be non-negative") 16 | elif covariance_type == 'tied': 17 | if covars.shape[0] != covars.shape[1]: 18 | raise ValueError("'tied' covars must have shape (n_dim, n_dim)") 19 | elif (not np.allclose(covars, covars.T) 20 | or np.any(linalg.eigvalsh(covars) <= 0)): 21 | raise ValueError("'tied' covars must be symmetric, " 22 | "positive-definite") 23 | elif covariance_type == 'diag': 24 | if len(covars.shape) != 2: 25 | raise ValueError("'diag' covars must have shape " 26 | "(n_components, n_dim)") 27 | elif np.any(covars <= 0): 28 | # raise ValueError("'diag' covars must be non-negative") 29 | print("'diag' covars must be non-negative") 30 | elif covariance_type == 'full': 31 | if len(covars.shape) != 3: 32 | raise ValueError("'full' covars must have shape " 33 | "(n_components, n_dim, n_dim)") 34 | elif covars.shape[1] != covars.shape[2]: 35 | raise ValueError("'full' covars must have shape " 36 | "(n_components, n_dim, n_dim)") 37 | for n, cv in enumerate(covars): 38 | if (not np.allclose(cv, cv.T) 39 | or np.any(linalg.eigvalsh(cv) <= 0)): 40 | raise ValueError("component %d of 'full' covars must be " 41 | "symmetric, positive-definite" % n) 42 | else: 43 | raise ValueError("covariance_type must be one of " + 44 | "'spherical', 'tied', 'diag', 'full'") 45 | 46 | 47 | # Copied from scikit-learn 0.19. 48 | def distribute_covar_matrix_to_match_covariance_type( 49 | tied_cv, covariance_type, n_components): 50 | """Create all the covariance matrices from a given template.""" 51 | if covariance_type == 'spherical': 52 | cv = np.tile(tied_cv.mean() * np.ones(tied_cv.shape[1]), 53 | (n_components, 1)) 54 | elif covariance_type == 'tied': 55 | cv = tied_cv 56 | elif covariance_type == 'diag': 57 | cv = np.tile(np.diag(tied_cv), (n_components, 1)) 58 | elif covariance_type == 'full': 59 | cv = np.tile(tied_cv, (n_components, 1, 1)) 60 | else: 61 | raise ValueError("covariance_type must be one of " + 62 | "'spherical', 'tied', 'diag', 'full'") 63 | return cv 64 | 65 | 66 | # Adapted from scikit-learn 0.21. 67 | def check_is_fitted(estimator, attribute): 68 | if not hasattr(estimator, attribute): 69 | raise NotFittedError( 70 | "This %s instance is not fitted yet. Call 'fit' with " 71 | "appropriate arguments before using this method." 72 | % type(estimator).__name__) 73 | -------------------------------------------------------------------------------- /lib/tebm/_tebm_fix.pyx: -------------------------------------------------------------------------------- 1 | # cython: language_level=3, boundscheck=False, wraparound=False 2 | 3 | from cython cimport view 4 | from numpy.math cimport expl, logl, log1pl, isinf, fabsl, INFINITY 5 | 6 | import numpy as np 7 | 8 | ctypedef double dtype_t 9 | 10 | 11 | cdef inline int _argmax(dtype_t[:] X) nogil: 12 | cdef dtype_t X_max = -INFINITY 13 | cdef int pos = 0 14 | cdef int i 15 | for i in range(X.shape[0]): 16 | if X[i] > X_max: 17 | X_max = X[i] 18 | pos = i 19 | return pos 20 | 21 | 22 | cdef inline dtype_t _max(dtype_t[:] X) nogil: 23 | return X[_argmax(X)] 24 | 25 | 26 | cdef inline dtype_t _logsumexp(dtype_t[:] X) nogil: 27 | cdef dtype_t X_max = _max(X) 28 | if isinf(X_max): 29 | return -INFINITY 30 | 31 | cdef dtype_t acc = 0 32 | for i in range(X.shape[0]): 33 | acc += expl(X[i] - X_max) 34 | 35 | return logl(acc) + X_max 36 | 37 | 38 | cdef inline dtype_t _logaddexp(dtype_t a, dtype_t b) nogil: 39 | if isinf(a) and a < 0: 40 | return b 41 | elif isinf(b) and b < 0: 42 | return a 43 | else: 44 | return max(a, b) + log1pl(expl(-fabsl(a - b))) 45 | 46 | def _forward(int n_samples, int n_components, 47 | dtype_t[:] log_startprob, 48 | dtype_t[:, :] log_transmat, 49 | dtype_t[:, :] framelogprob, 50 | dtype_t[:, :] fwdlattice): 51 | 52 | cdef int t, i, j 53 | cdef dtype_t[::view.contiguous] work_buffer = np.zeros(n_components) 54 | 55 | with nogil: 56 | for i in range(n_components): 57 | fwdlattice[0, i] = log_startprob[i] + framelogprob[0, i] 58 | 59 | for t in range(1, n_samples): 60 | for j in range(n_components): 61 | for i in range(n_components): 62 | work_buffer[i] = fwdlattice[t - 1, i] + log_transmat[i, j] 63 | 64 | fwdlattice[t, j] = _logsumexp(work_buffer) + framelogprob[t, j] 65 | 66 | 67 | def _backward(int n_samples, int n_components, 68 | dtype_t[:] log_startprob, 69 | dtype_t[:, :] log_transmat, 70 | dtype_t[:, :] framelogprob, 71 | dtype_t[:, :] bwdlattice): 72 | 73 | cdef int t, i, j 74 | cdef dtype_t[::view.contiguous] work_buffer = np.zeros(n_components) 75 | 76 | with nogil: 77 | for i in range(n_components): 78 | bwdlattice[n_samples - 1, i] = 0.0 79 | 80 | for t in range(n_samples - 2, -1, -1): 81 | for i in range(n_components): 82 | for j in range(n_components): 83 | work_buffer[j] = (log_transmat[i, j] 84 | + framelogprob[t + 1, j] 85 | + bwdlattice[t + 1, j]) 86 | bwdlattice[t, i] = _logsumexp(work_buffer) 87 | 88 | 89 | def _compute_log_prob_tau(int n_samples, int n_components, 90 | dtype_t[:, :] fwdlattice, 91 | dtype_t[:, :] log_transmat, 92 | dtype_t[:, :] bwdlattice, 93 | dtype_t[:, :] framelogprob, 94 | dtype_t[:, :] log_xi_sum): 95 | 96 | cdef int t, i, j 97 | cdef dtype_t[:, ::view.contiguous] work_buffer = \ 98 | np.full((n_components, n_components), -INFINITY) 99 | cdef dtype_t logprob = _logsumexp(fwdlattice[n_samples - 1]) 100 | 101 | with nogil: 102 | for t in range(n_samples - 1): 103 | for i in range(n_components): 104 | for j in range(n_components): 105 | work_buffer[i, j] = (fwdlattice[t, i] 106 | + log_transmat[i, j] 107 | + framelogprob[t + 1, j] 108 | + bwdlattice[t + 1, j] 109 | - logprob) 110 | 111 | for i in range(n_components): 112 | for j in range(n_components): 113 | log_xi_sum[i, j] = _logaddexp(log_xi_sum[i, j], 114 | work_buffer[i, j]) 115 | 116 | 117 | def _viterbi(int n_samples, int n_components, 118 | dtype_t[:] log_startprob, 119 | dtype_t[:, :] log_transmat, 120 | dtype_t[:, :] framelogprob): 121 | 122 | cdef int i, j, t, where_from 123 | 124 | cdef int[::view.contiguous] state_sequence = \ 125 | np.empty(n_samples, dtype=np.int32) 126 | cdef dtype_t[:, ::view.contiguous] viterbi_lattice = \ 127 | np.zeros((n_samples, n_components)) 128 | cdef dtype_t[::view.contiguous] work_buffer = np.empty(n_components) 129 | 130 | with nogil: 131 | for i in range(n_components): 132 | viterbi_lattice[0, i] = log_startprob[i] + framelogprob[0, i] 133 | 134 | # Induction 135 | for t in range(1, n_samples): 136 | for i in range(n_components): 137 | for j in range(n_components): 138 | work_buffer[j] = (log_transmat[j, i] 139 | + viterbi_lattice[t - 1, j]) 140 | 141 | viterbi_lattice[t, i] = _max(work_buffer) + framelogprob[t, i] 142 | 143 | # Observation traceback 144 | state_sequence[n_samples - 1] = where_from = \ 145 | _argmax(viterbi_lattice[n_samples - 1]) 146 | 147 | for t in range(n_samples - 2, -1, -1): 148 | for i in range(n_components): 149 | work_buffer[i] = (viterbi_lattice[t, i] 150 | + log_transmat[i, where_from]) 151 | 152 | state_sequence[t] = where_from = _argmax(work_buffer) 153 | 154 | return np.asarray(state_sequence) 155 | -------------------------------------------------------------------------------- /lib/tebm/_tebmc_fix.pyx: -------------------------------------------------------------------------------- 1 | # cython: language_level=3, boundscheck=False, wraparound=False 2 | 3 | from cython cimport view 4 | from numpy.math cimport expl, logl, log1pl, isinf, fabsl, INFINITY 5 | 6 | import numpy as np 7 | 8 | ctypedef double dtype_t 9 | 10 | 11 | cdef inline int _argmax(dtype_t[:] X) nogil: 12 | cdef dtype_t X_max = -INFINITY 13 | cdef int pos = 0 14 | cdef int i 15 | for i in range(X.shape[0]): 16 | if X[i] > X_max: 17 | X_max = X[i] 18 | pos = i 19 | return pos 20 | 21 | 22 | cdef inline dtype_t _max(dtype_t[:] X) nogil: 23 | return X[_argmax(X)] 24 | 25 | 26 | cdef inline dtype_t _logsumexp(dtype_t[:] X) nogil: 27 | cdef dtype_t X_max = _max(X) 28 | if isinf(X_max): 29 | return -INFINITY 30 | 31 | cdef dtype_t acc = 0 32 | for i in range(X.shape[0]): 33 | acc += expl(X[i] - X_max) 34 | 35 | return logl(acc) + X_max 36 | 37 | 38 | cdef inline dtype_t _logaddexp(dtype_t a, dtype_t b) nogil: 39 | if isinf(a) and a < 0: 40 | return b 41 | elif isinf(b) and b < 0: 42 | return a 43 | else: 44 | return max(a, b) + log1pl(expl(-fabsl(a - b))) 45 | 46 | 47 | def _forward(int n_samples, int n_components, 48 | dtype_t[:] log_startprob, 49 | dtype_t[:, :] log_transmat, 50 | dtype_t[:, :] framelogprob, 51 | dtype_t[:, :] fwdlattice): 52 | 53 | cdef int t, i, j 54 | cdef dtype_t[::view.contiguous] work_buffer = np.zeros(n_components) 55 | 56 | with nogil: 57 | for i in range(n_components): 58 | fwdlattice[0, i] = log_startprob[i] + framelogprob[0, i] 59 | 60 | for t in range(1, n_samples): 61 | for j in range(n_components): 62 | for i in range(n_components): 63 | work_buffer[i] = fwdlattice[t - 1, i] + log_transmat[i, j] 64 | 65 | fwdlattice[t, j] = _logsumexp(work_buffer) + framelogprob[t, j] 66 | 67 | 68 | def _backward(int n_samples, int n_components, 69 | dtype_t[:] log_startprob, 70 | dtype_t[:, :] log_transmat, 71 | dtype_t[:, :] framelogprob, 72 | dtype_t[:, :] bwdlattice): 73 | 74 | cdef int t, i, j 75 | cdef dtype_t[::view.contiguous] work_buffer = np.zeros(n_components) 76 | 77 | with nogil: 78 | for i in range(n_components): 79 | bwdlattice[n_samples - 1, i] = 0.0 80 | 81 | for t in range(n_samples - 2, -1, -1): 82 | for i in range(n_components): 83 | for j in range(n_components): 84 | work_buffer[j] = (log_transmat[i, j] 85 | + framelogprob[t + 1, j] 86 | + bwdlattice[t + 1, j]) 87 | bwdlattice[t, i] = _logsumexp(work_buffer) 88 | 89 | 90 | def _compute_log_xi_sum(int n_samples, int n_components, 91 | dtype_t[:, :] fwdlattice, 92 | dtype_t[:, :] log_transmat, 93 | dtype_t[:, :] bwdlattice, 94 | dtype_t[:, :] framelogprob, 95 | dtype_t[:, :] log_xi_sum): 96 | 97 | cdef int t, i, j 98 | cdef dtype_t[:, ::view.contiguous] work_buffer = \ 99 | np.full((n_components, n_components), -INFINITY) 100 | cdef dtype_t logprob = _logsumexp(fwdlattice[n_samples - 1]) 101 | 102 | with nogil: 103 | for t in range(n_samples - 1): 104 | for i in range(n_components): 105 | for j in range(n_components): 106 | work_buffer[i, j] = (fwdlattice[t, i] 107 | + log_transmat[i, j] 108 | + framelogprob[t + 1, j] 109 | + bwdlattice[t + 1, j] 110 | - logprob) 111 | 112 | for i in range(n_components): 113 | for j in range(n_components): 114 | log_xi_sum[i, j] = _logaddexp(log_xi_sum[i, j], 115 | work_buffer[i, j]) 116 | 117 | 118 | def _viterbi(int n_samples, int n_components, 119 | dtype_t[:] log_startprob, 120 | dtype_t[:, :] log_transmat, 121 | dtype_t[:, :] framelogprob): 122 | 123 | cdef int i, j, t, where_from 124 | cdef dtype_t logprob 125 | 126 | cdef int[::view.contiguous] state_sequence = \ 127 | np.empty(n_samples, dtype=np.int32) 128 | cdef dtype_t[:, ::view.contiguous] viterbi_lattice = \ 129 | np.zeros((n_samples, n_components)) 130 | cdef dtype_t[::view.contiguous] work_buffer = np.empty(n_components) 131 | 132 | with nogil: 133 | for i in range(n_components): 134 | viterbi_lattice[0, i] = log_startprob[i] + framelogprob[0, i] 135 | 136 | # Induction 137 | for t in range(1, n_samples): 138 | for i in range(n_components): 139 | for j in range(n_components): 140 | work_buffer[j] = (log_transmat[j, i] 141 | + viterbi_lattice[t - 1, j]) 142 | 143 | viterbi_lattice[t, i] = _max(work_buffer) + framelogprob[t, i] 144 | 145 | # Observation traceback 146 | state_sequence[n_samples - 1] = where_from = \ 147 | _argmax(viterbi_lattice[n_samples - 1]) 148 | logprob = viterbi_lattice[n_samples - 1, where_from] 149 | 150 | for t in range(n_samples - 2, -1, -1): 151 | for i in range(n_components): 152 | work_buffer[i] = (viterbi_lattice[t, i] 153 | + log_transmat[i, where_from]) 154 | 155 | state_sequence[t] = where_from = _argmax(work_buffer) 156 | 157 | return np.asarray(state_sequence), logprob 158 | -------------------------------------------------------------------------------- /lib/tebm/stats.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy import linalg, stats 3 | 4 | def log_multivariate_normal_density(X, means, covars, covariance_type='diag'): 5 | """Compute the log probability under a multivariate Gaussian distribution. 6 | Parameters 7 | ---------- 8 | X : array_like, shape (n_samples, n_features) 9 | List of n_features-dimensional data points. Each row corresponds to a 10 | single data point. 11 | means : array_like, shape (n_components, n_features) 12 | List of n_features-dimensional mean vectors for n_components Gaussians. 13 | Each row corresponds to a single mean vector. 14 | covars : array_like 15 | List of n_components covariance parameters for each Gaussian. The shape 16 | depends on `covariance_type`: 17 | (n_components, n_features) if 'spherical', 18 | (n_features, n_features) if 'tied', 19 | (n_components, n_features) if 'diag', 20 | (n_components, n_features, n_features) if 'full' 21 | covariance_type : string 22 | Type of the covariance parameters. Must be one of 23 | 'spherical', 'tied', 'diag', 'full'. Defaults to 'diag'. 24 | Returns 25 | ------- 26 | lpr : array_like, shape (n_samples, n_components) 27 | Array containing the log probabilities of each data point in 28 | X under each of the n_components multivariate Gaussian distributions. 29 | """ 30 | log_multivariate_normal_density_dict = { 31 | 'spherical': _log_multivariate_normal_density_spherical, 32 | 'tied': _log_multivariate_normal_density_tied, 33 | 'diag': _log_multivariate_normal_density_diag, 34 | 'full': _log_multivariate_normal_density_full} 35 | 36 | return log_multivariate_normal_density_dict[covariance_type]( 37 | X, means, covars 38 | ) 39 | 40 | def _log_multivariate_normal_density_diag(X, means, covars): 41 | """Compute Gaussian log-density at X for a diagonal model.""" 42 | # X: (ns, nf); means: (nc, nf); covars: (nc, nf) -> (ns, nc) 43 | n_samples, n_dim = X.shape 44 | # Avoid 0 log 0 = nan in degenerate covariance case. 45 | covars = np.maximum(covars, np.finfo(float).tiny) 46 | with np.errstate(over="ignore"): 47 | return -0.5 * (n_dim * np.log(2 * np.pi) 48 | + np.log(covars).sum(axis=-1) 49 | + ((X[:, None, :] - means) ** 2 / covars).sum(axis=-1)) 50 | """ 51 | with np.errstate(over="ignore"): 52 | S = weights[1] 53 | weights = weights[0] 54 | def calc_coeff(sig): 55 | return 1./np.sqrt(np.pi*2.0)*1./sig 56 | def calc_exp(x,mu,sig): 57 | x = (x-mu)/sig 58 | return np.exp(-.5*x*x) 59 | def normPdf(x,mu,sig): 60 | return calc_coeff(sig)*calc_exp(x,mu,sig) 61 | prob = normPdf(X[:, None, :], means, np.sqrt(covars))*weights 62 | # prob = stats.norm.pdf(X[:, None, :], loc=means, scale=np.sqrt(covars))*weights 63 | # normalise 64 | for i in range(prob.shape[0]): 65 | for j in range(prob.shape[2]): 66 | bm_pos = np.where(S == j)[0][0] 67 | prob_h = prob[i,:,j][0] 68 | prob_d = prob[i,:,j][-1] 69 | if prob_h==0 and prob_d==0: 70 | print (X) 71 | print (means) 72 | print (np.sqrt(covars)) 73 | print (weights) 74 | print (prob) 75 | quit() 76 | if np.isnan(prob_h) or np.isnan(prob_d): 77 | prob_h = .5 78 | else: 79 | prob_h = prob_h / (prob_h+prob_d) 80 | prob_d = 1-prob_h 81 | prob[i,:bm_pos+1,j] = prob_h 82 | prob[i,bm_pos+1:,j] = prob_d 83 | prob[prob == 0] = np.finfo(float).eps 84 | like = np.nansum(np.log(prob),axis=-1) 85 | return like 86 | """ 87 | def _log_multivariate_normal_density_spherical(X, means, covars): 88 | """Compute Gaussian log-density at X for a spherical model.""" 89 | cv = covars.copy() 90 | if covars.ndim == 1: 91 | cv = cv[:, np.newaxis] 92 | if cv.shape[1] == 1: 93 | cv = np.tile(cv, (1, X.shape[-1])) 94 | return _log_multivariate_normal_density_diag(X, means, cv) 95 | 96 | 97 | def _log_multivariate_normal_density_tied(X, means, covars): 98 | """Compute Gaussian log-density at X for a tied model.""" 99 | cv = np.tile(covars, (means.shape[0], 1, 1)) 100 | return _log_multivariate_normal_density_full(X, means, cv) 101 | 102 | 103 | def _log_multivariate_normal_density_full(X, means, covars, min_covar=1.e-7): 104 | """Log probability for full covariance matrices.""" 105 | n_samples, n_dim = X.shape 106 | nmix = len(means) 107 | log_prob = np.empty((n_samples, nmix)) 108 | for c, (mu, cv) in enumerate(zip(means, covars)): 109 | try: 110 | cv_chol = linalg.cholesky(cv, lower=True) 111 | except linalg.LinAlgError: 112 | # The model is most probably stuck in a component with too 113 | # few observations, we need to reinitialize this components 114 | try: 115 | cv_chol = linalg.cholesky(cv + min_covar * np.eye(n_dim), 116 | lower=True) 117 | except linalg.LinAlgError: 118 | raise ValueError("'covars' must be symmetric, " 119 | "positive-definite") 120 | 121 | cv_log_det = 2 * np.sum(np.log(np.diagonal(cv_chol))) 122 | cv_sol = linalg.solve_triangular(cv_chol, (X - mu).T, lower=True).T 123 | log_prob[:, c] = - .5 * (np.sum(cv_sol ** 2, axis=1) + 124 | n_dim * np.log(2 * np.pi) + cv_log_det) 125 | 126 | return log_prob 127 | -------------------------------------------------------------------------------- /lib/tebm/_tebmc_var.pyx: -------------------------------------------------------------------------------- 1 | # cython: language_level=3, boundscheck=False, wraparound=False 2 | 3 | from cython cimport view 4 | from numpy.math cimport expl, logl, log1pl, isinf, fabsl, INFINITY 5 | 6 | import numpy as np 7 | 8 | ctypedef double dtype_t 9 | 10 | 11 | cdef inline int _argmax(dtype_t[:] X) nogil: 12 | cdef dtype_t X_max = -INFINITY 13 | cdef int pos = 0 14 | cdef int i 15 | for i in range(X.shape[0]): 16 | if X[i] > X_max: 17 | X_max = X[i] 18 | pos = i 19 | return pos 20 | 21 | 22 | cdef inline dtype_t _max(dtype_t[:] X) nogil: 23 | return X[_argmax(X)] 24 | 25 | 26 | cdef inline dtype_t _logsumexp(dtype_t[:] X) nogil: 27 | cdef dtype_t X_max = _max(X) 28 | if isinf(X_max): 29 | return -INFINITY 30 | 31 | cdef dtype_t acc = 0 32 | for i in range(X.shape[0]): 33 | acc += expl(X[i] - X_max) 34 | 35 | return logl(acc) + X_max 36 | 37 | 38 | cdef inline dtype_t _logaddexp(dtype_t a, dtype_t b) nogil: 39 | if isinf(a) and a < 0: 40 | return b 41 | elif isinf(b) and b < 0: 42 | return a 43 | else: 44 | return max(a, b) + log1pl(expl(-fabsl(a - b))) 45 | 46 | 47 | def _forward(int n_samples, int n_components, 48 | dtype_t[:] log_startprob, 49 | dtype_t[:, :, :] log_transmat, 50 | dtype_t[:, :] framelogprob, 51 | dtype_t[:, :] fwdlattice): 52 | 53 | cdef int t, i, j 54 | cdef dtype_t[::view.contiguous] work_buffer = np.zeros(n_components) 55 | 56 | with nogil: 57 | for i in range(n_components): 58 | fwdlattice[0, i] = log_startprob[i] + framelogprob[0, i] 59 | 60 | for t in range(1, n_samples): 61 | for j in range(n_components): 62 | for i in range(n_components): 63 | # PW use transition matrix from this time interval 64 | # FIXME: check 65 | work_buffer[i] = fwdlattice[t - 1, i] + log_transmat[t - 1, i, j] 66 | 67 | fwdlattice[t, j] = _logsumexp(work_buffer) + framelogprob[t, j] 68 | 69 | 70 | def _backward(int n_samples, int n_components, 71 | dtype_t[:] log_startprob, 72 | dtype_t[:, :, :] log_transmat, 73 | dtype_t[:, :] framelogprob, 74 | dtype_t[:, :] bwdlattice): 75 | 76 | cdef int t, i, j 77 | cdef dtype_t[::view.contiguous] work_buffer = np.zeros(n_components) 78 | 79 | with nogil: 80 | for i in range(n_components): 81 | bwdlattice[n_samples - 1, i] = 0.0 82 | 83 | for t in range(n_samples - 2, -1, -1): 84 | for i in range(n_components): 85 | for j in range(n_components): 86 | # PW use transition matrix from this time interval 87 | # FIXME: check 88 | work_buffer[j] = (log_transmat[t, i, j] 89 | + framelogprob[t + 1, j] 90 | + bwdlattice[t + 1, j]) 91 | bwdlattice[t, i] = _logsumexp(work_buffer) 92 | 93 | 94 | def _compute_log_xi_sum(int n_samples, int n_components, 95 | dtype_t[:, :] fwdlattice, 96 | # dtype_t[:, :, :] log_transmat, 97 | dtype_t[:, :] log_transmat, 98 | dtype_t[:, :] bwdlattice, 99 | dtype_t[:, :] framelogprob, 100 | dtype_t[:, :] log_xi_sum): 101 | 102 | cdef int t, i, j 103 | cdef dtype_t[:, ::view.contiguous] work_buffer = \ 104 | np.full((n_components, n_components), -INFINITY) 105 | cdef dtype_t logprob = _logsumexp(fwdlattice[n_samples - 1]) 106 | 107 | with nogil: 108 | for t in range(n_samples - 1): 109 | for i in range(n_components): 110 | for j in range(n_components): 111 | work_buffer[i, j] = (fwdlattice[t, i] 112 | # PW use transition matrix from this time interval 113 | # FIXME: check 114 | # + log_transmat[t, i, j] 115 | + log_transmat[i, j] 116 | + framelogprob[t + 1, j] 117 | + bwdlattice[t + 1, j] 118 | - logprob) 119 | 120 | for i in range(n_components): 121 | for j in range(n_components): 122 | log_xi_sum[i, j] = _logaddexp(log_xi_sum[i, j], 123 | work_buffer[i, j]) 124 | 125 | 126 | def _viterbi(int n_samples, int n_components, 127 | dtype_t[:] log_startprob, 128 | dtype_t[:, :, :] log_transmat, 129 | dtype_t[:, :] framelogprob): 130 | 131 | cdef int i, j, t, where_from 132 | cdef dtype_t logprob 133 | 134 | cdef int[::view.contiguous] state_sequence = \ 135 | np.empty(n_samples, dtype=np.int32) 136 | cdef dtype_t[:, ::view.contiguous] viterbi_lattice = \ 137 | np.zeros((n_samples, n_components)) 138 | cdef dtype_t[::view.contiguous] work_buffer = np.empty(n_components) 139 | 140 | with nogil: 141 | for i in range(n_components): 142 | viterbi_lattice[0, i] = log_startprob[i] + framelogprob[0, i] 143 | 144 | # Induction 145 | for t in range(1, n_samples): 146 | for i in range(n_components): 147 | for j in range(n_components): 148 | # PW use transition matrix from this time interval 149 | # FIXME: check 150 | work_buffer[j] = (log_transmat[t - 1, j, i] 151 | + viterbi_lattice[t - 1, j]) 152 | 153 | viterbi_lattice[t, i] = _max(work_buffer) + framelogprob[t, i] 154 | 155 | # Observation traceback 156 | state_sequence[n_samples - 1] = where_from = \ 157 | _argmax(viterbi_lattice[n_samples - 1]) 158 | logprob = viterbi_lattice[n_samples - 1, where_from] 159 | 160 | for t in range(n_samples - 2, -1, -1): 161 | for i in range(n_components): 162 | # PW use transition matrix from this time interval 163 | # FIXME: check 164 | work_buffer[i] = (viterbi_lattice[t, i] 165 | + log_transmat[t, i, where_from]) 166 | 167 | state_sequence[t] = where_from = _argmax(work_buffer) 168 | 169 | return np.asarray(state_sequence), logprob 170 | -------------------------------------------------------------------------------- /lib/tebm/base_fix.py: -------------------------------------------------------------------------------- 1 | # Fixed time interval Temporal Event-Based Model 2 | # Base class 3 | # Author: Peter Wijeratne (p.wijeratne@sussex.ac.uk) 4 | 5 | import numpy as np 6 | from scipy.special import logsumexp 7 | from sklearn.base import BaseEstimator 8 | 9 | from . import _tebm_fix 10 | 11 | import warnings 12 | warnings.filterwarnings('ignore', message='divide by zero encountered in log') 13 | 14 | class BaseTEBM(BaseEstimator): 15 | 16 | def __init__(self, 17 | X=None, 18 | lengths=None, 19 | n_stages=None, 20 | time_mean=None, 21 | n_iter=None, 22 | fwd_only=False, 23 | order=None, 24 | algo='viterbi', 25 | verbose=False): 26 | self.X = X 27 | self.lengths = lengths 28 | self.n_stages = n_stages 29 | if time_mean: 30 | self.time_mean = time_mean 31 | else: 32 | self.time_mean = 1 33 | self.n_iter = n_iter 34 | self.n_obs = X.shape[0] 35 | self.n_features = X.shape[1] 36 | self.fwd_only = fwd_only 37 | if order: 38 | self.order = order 39 | else: 40 | self.order = self.n_stages-1 41 | self.algo = algo 42 | # currently only do a single EM iteration, but this might change in the future 43 | self.tol = 1E-3 44 | self.verbose = verbose 45 | # initialise p_vec and a_mat 46 | self.p_vec_prior = np.full(self.n_stages, 1./self.n_stages) 47 | self.p_vec = self.p_vec_prior 48 | self.a_mat_prior = np.ones((n_stages,n_stages)) 49 | if self.fwd_only: 50 | for i in range(len(self.a_mat_prior)): 51 | self.a_mat_prior[i,i] = self.time_mean 52 | self.a_mat_prior[i,:i] = 0. 53 | if (i+self.order+1) < len(self.a_mat_prior): 54 | self.a_mat_prior[i,i+self.order+1:] = 0. 55 | count_nonzero = np.count_nonzero(self.a_mat_prior[i]!=0) 56 | # distribute probability to nonzero states 57 | for j in range(n_stages): 58 | # self.a_mat_prior[i,:i] = 0. 59 | if i!=j and self.a_mat_prior[i,j]!=0.: 60 | self.a_mat_prior[i,j] = (1-self.a_mat_prior[i,i])/(count_nonzero-1) 61 | elif i==(n_stages-1) and (j==n_stages-1): 62 | self.a_mat_prior[i,j] = 1. 63 | else: 64 | self.a_mat_prior = np.full((self.n_stages, self.n_stages), 1./self.n_stages) 65 | self.a_mat = self.a_mat_prior 66 | 67 | def reinit(self): 68 | self.p_vec = self.p_vec_prior 69 | self.a_mat = self.a_mat_prior 70 | 71 | def compute_forward(self, loglike_i): 72 | n_samples, n_stages = loglike_i.shape 73 | alpha_i = np.zeros((n_samples, n_stages)) 74 | _tebm_fix._forward(n_samples, 75 | n_stages, 76 | np.log(self.p_vec), 77 | np.log(self.a_mat), 78 | loglike_i, 79 | alpha_i) 80 | return alpha_i 81 | 82 | def compute_backward(self, loglike_i): 83 | n_samples, n_stages = loglike_i.shape 84 | beta_i = np.zeros((n_samples, n_stages)) 85 | _tebm_fix._backward(n_samples, 86 | n_stages, 87 | np.log(self.p_vec), 88 | np.log(self.a_mat), 89 | loglike_i, 90 | beta_i) 91 | return beta_i 92 | 93 | def compute_posteriors(self, alpha_i, beta_i): 94 | post = alpha_i + beta_i 95 | post -= logsumexp(post, axis=1, keepdims=True) 96 | return np.exp(post) 97 | 98 | def update_params(self, 99 | p_vec, 100 | a_mat, 101 | loglike_i, 102 | post_i, 103 | alpha_i, 104 | beta_i): 105 | # initial probability 106 | p_vec += post_i[0] 107 | # transition matrix 108 | n_samples, n_stages = loglike_i.shape 109 | # skip if only one observation - no temporal info 110 | if n_samples == 1: 111 | return 112 | log_prob_tau = np.full((n_stages, n_stages), -np.inf) 113 | _tebm_fix._compute_log_prob_tau(n_samples, 114 | n_stages, 115 | alpha_i, 116 | np.log(self.a_mat), 117 | beta_i, 118 | loglike_i, 119 | log_prob_tau) 120 | a_mat += np.exp(log_prob_tau) 121 | 122 | def m_step(self, p_vec, a_mat): 123 | # update initial probability 124 | # apply prior 125 | p_vec = np.maximum(self.p_vec_prior - 1 + p_vec, 0) 126 | # prevent forbidden transitions 127 | self.p_vec = np.where(self.p_vec == 0, 0, p_vec) 128 | # normalise 129 | self.p_vec = self.p_vec / self.p_vec.sum() 130 | # update transition matrix 131 | # apply prior 132 | a_mat = np.maximum(self.a_mat_prior - 1 + a_mat, 0) 133 | # prevent forbidden transitions 134 | self.a_mat = np.where(self.a_mat == 0, 0, a_mat) 135 | # normalise 136 | row_sums = self.a_mat.sum(axis=1) 137 | row_sums[row_sums==0] = 1 138 | self.a_mat = self.a_mat / row_sums[:, np.newaxis] 139 | 140 | def fit(self): 141 | self.reinit() 142 | curr_loglike = -np.inf 143 | for n in range(self.n_iter): 144 | p_vec = np.zeros(self.n_stages) 145 | a_mat = np.zeros((self.n_stages, self.n_stages)) 146 | loglike = 0 147 | for i in range(len(self.lengths)): 148 | s_idx, e_idx = int(np.sum(self.lengths[:i])), int(np.sum(self.lengths[:i])+self.lengths[i]) 149 | X_i = self.X[s_idx:e_idx] 150 | loglike_i = self.compute_log_likelihood(X_i, s_idx, e_idx) 151 | alpha_i = self.compute_forward(loglike_i) 152 | beta_i = self.compute_backward(loglike_i) 153 | post_i = self.compute_posteriors(alpha_i, beta_i) 154 | self.update_params(p_vec, a_mat, loglike_i, post_i, alpha_i, beta_i) 155 | loglike += logsumexp(alpha_i[-1]) 156 | self.m_step(p_vec, a_mat) 157 | # check likelihood for convergence - currently we don't use this, as default self.n_iter = 1 158 | if self.verbose: 159 | print (n, loglike-curr_loglike) 160 | if loglike-curr_loglike < self.tol: 161 | break 162 | curr_loglike = loglike 163 | 164 | def compute_viterbi(self, X, i, j): 165 | loglike_i = self.compute_log_likelihood(X, i, j) 166 | n_samples, n_stages = loglike_i.shape 167 | stages = _tebm_fix._viterbi(n_samples, 168 | n_stages, 169 | np.log(self.p_vec), 170 | np.log(self.a_mat), 171 | loglike_i) 172 | return stages 173 | 174 | def compute_map(self, X, i, j): 175 | posteriors = self.posteriors(X) 176 | stages = np.argmax(posteriors, axis=1) 177 | return stages 178 | 179 | def posteriors_X(self, X, lengths=None): 180 | n_samples = X.shape[0] 181 | posteriors = np.zeros((n_samples, self.n_stages)) 182 | for i in range(len(lengths)): 183 | s_idx, e_idx = int(np.sum(lengths[:i])), int(np.sum(lengths[:i])+lengths[i]) 184 | X_i = X[s_idx:e_idx] 185 | loglike_i = self.compute_log_likelihood(X_i, s_idx, e_idx) 186 | alpha_i = self.compute_forward(loglike_i) 187 | beta_i = self.compute_backward(loglike_i) 188 | posteriors[s_idx:e_idx] = self.compute_posteriors(alpha_i, beta_i) 189 | return posteriors 190 | 191 | def prob_X(self, X, lengths=None): 192 | n_samples = X.shape[0] 193 | prob = np.zeros((n_samples, self.n_stages)) 194 | for i in range(len(lengths)): 195 | s_idx, e_idx = int(np.sum(lengths[:i])), int(np.sum(lengths[:i])+lengths[i]) 196 | X_i = X[s_idx:e_idx] 197 | loglike_i = self.compute_log_likelihood(X_i, s_idx, e_idx) 198 | alpha_i = self.compute_forward(loglike_i) 199 | prob[s_idx:e_idx] = alpha_i 200 | return prob 201 | 202 | def compute_model_log_likelihood(self, X, lengths=None): 203 | loglike = 0 204 | for i in range(len(lengths)): 205 | s_idx, e_idx = int(np.sum(lengths[:i])), int(np.sum(lengths[:i])+lengths[i]) 206 | X_i = X[s_idx:e_idx] 207 | loglike_i = self.compute_log_likelihood(X_i, s_idx, e_idx) 208 | alpha_i = self.compute_forward(loglike_i) 209 | loglike += logsumexp(alpha_i[-1]) 210 | return loglike 211 | 212 | def stage_X(self, X, lengths=None, algo=None): 213 | if self.algo == 'viterbi': 214 | stage_algo = self.compute_viterbi 215 | elif self.algo == 'map': 216 | stage_algo = self.compute_map 217 | n_samples = X.shape[0] 218 | stages = np.empty(n_samples, dtype=int) 219 | for i in range(len(lengths)): 220 | s_idx, e_idx = int(np.sum(lengths[:i])), int(np.sum(lengths[:i])+lengths[i]) 221 | X_i = X[s_idx:e_idx] 222 | stagesij = stage_algo(X_i, s_idx, e_idx) 223 | stages[s_idx:e_idx] = stagesij 224 | return stages 225 | -------------------------------------------------------------------------------- /examples/sim_funcs.py: -------------------------------------------------------------------------------- 1 | # TEBM simulation functions 2 | # Author: Peter Wijeratne (p.wijeratne@pm.me) 3 | # Functions "gen_data_zscore", "gen_model_zscore", "gen_data_mixture", "gen_model_mixture" are adapted from pySuStaIn (https://github.com/ucl-pond/pySuStaIn) 4 | 5 | import numpy as np 6 | from scipy.stats import norm 7 | 8 | def gen_data(n_subtypes, 9 | n_ppl, 10 | n_bms, 11 | n_obs, 12 | n_components, 13 | model_type='GMM', 14 | is_cut=False, 15 | n_zscores=None, 16 | z_max=None, 17 | sigma_noise=1.0, 18 | seq=[], 19 | fractions=[1], 20 | fwd_only=True, 21 | order=1, 22 | time_mean=1, 23 | verbose=False): 24 | # intialise z-score stuff 25 | if model_type=='Zscore': 26 | z_val_arr = np.array([[x+1 for x in range(n_zscores)]]*n_bms) 27 | z_max_arr = np.array([z_max]*n_bms) 28 | IX_vals = np.array([[x for x in range(n_bms)]*n_zscores]).T 29 | stage_biomarker_index = np.array([y for x in IX_vals.T for y in x]) 30 | stage_zscore = np.array([y for x in z_val_arr.T for y in x]) 31 | stage_biomarker_index = stage_biomarker_index.reshape(1,len(stage_biomarker_index)) 32 | stage_zscore = stage_zscore.reshape(1,len(stage_zscore)) 33 | min_biomarker_zscore = [0]*n_bms 34 | max_biomarker_zscore = z_max_arr 35 | # transition generator matrix and initial probability 36 | # TODO: design suitable timescale and observation times for simulations 37 | # FIXME: also allow for fixed intervals (see HACK below) 38 | Q_subtypes, pi0_subtypes = [], [] 39 | for s in range(n_subtypes): 40 | Q = np.zeros((n_components, n_components)) 41 | for i in range(n_components): 42 | vec = np.ones(n_components-1) 43 | vec /= np.sum(vec) 44 | Q[i,:i] = vec[:i] 45 | Q[i,i+1:] = vec[i:] 46 | # Q[i,i] = -time_mean[s] 47 | # Q[i,i] = -np.random.rand() 48 | # Q[i,i] = -(.25 + np.random.rand()*3.75) 49 | # Q[i,i] = -(.5 + np.random.rand()*1.5) 50 | Q[i,i] = -(.1 + np.random.rand()*3.9) 51 | # zero-out forbidden states 52 | if fwd_only: 53 | for i in range(n_components): 54 | for j in range(n_components): 55 | if j=(i-order)): 61 | Q[i,j] = 0 62 | # renormalise 63 | for i in range(n_components): 64 | scale = np.sum([x if jj!=i else 0 for jj,x in enumerate(Q[i])]) 65 | for j in range(n_components): 66 | if i!=j: 67 | if scale!=0: 68 | Q[i,j] *= -Q[i,i]/scale 69 | else: 70 | Q[i,j] = 0. 71 | elif i==(n_components-1) and j==(n_components-1) and fwd_only: 72 | Q[i,j] = 0. 73 | """ 74 | rates = np.array([-1.*(.5+np.random.rand()*.5) for i in range(n_components)]) 75 | Q = np.zeros((n_components, n_components)) 76 | for i in range(n_components-1): 77 | temp = [] 78 | for j in range(i): 79 | temp.append(0) 80 | temp.append(rates[i]) 81 | temp.append(-rates[i]) 82 | for j in range(2+i,n_components): 83 | temp.append(0) 84 | Q[i] = temp 85 | """ 86 | # always set initial probability as uniform 87 | pi0 = np.ones(len(Q)) 88 | Q_subtypes.append(Q) 89 | pi0_subtypes.append(pi0) 90 | # true sojourns from generated transition rate matrix 91 | sojourns_true = [] 92 | for i in range(len(Q)-1): 93 | # temp = [(1/Q[i,i])*np.log(np.random.rand()) for x in range(1000)] 94 | # print ('True mean duration', np.mean(temp)) 95 | sojourn_i = -1/Q[i,i] 96 | if verbose: 97 | print ('Stage',i,'true sojourn', sojourn_i) 98 | sojourns_true.append(sojourn_i) 99 | if verbose: 100 | print ('Total sequence true sojourn', np.sum(sojourns_true)) 101 | # Markov sequence generation 102 | stages, times, jumps = [], [], [] 103 | # total time spent in each state 104 | sojourn = np.zeros(n_components) 105 | # number of occurrences of each state 106 | counts = np.zeros(n_components) 107 | for i in range(n_ppl): 108 | # generate full jump process 109 | # tvec is time of transition, xvec is stage at corresponding time 110 | # tvec, xvec, dt = sim_markov(Q_subtypes[subtypes[i]], pi0_subtypes[subtypes[i]]) 111 | tvec, xvec, dt = sim_markov(Q_subtypes[0], pi0_subtypes[0]) 112 | sojourn += dt 113 | for j in range(len(xvec)): 114 | counts[int(xvec[j])] += 1 115 | # each subsequent time step should be distributed around 1 unit of time 116 | # simulate 100 observation times to generate exact same dataset each run for direct comparison between models, then select number of desired observations after 117 | # first observation time = 0 to ensure process starts in first state 118 | time_steps = [0] 119 | # FIXME: change the range to allow irregular sampling 120 | for j in range(9): # can set this to whatever 121 | # time_steps.append(1 + np.random.normal(scale=.05)) 122 | #FIXME: set minmax timestep and scale externally 123 | # time_steps.append(np.random.randint(1,4)*time_mean[0]) 124 | time_steps.append(np.random.randint(1,5)) 125 | time_i = np.cumsum(time_steps) 126 | times.append(time_i) 127 | jump_i = np.diff(time_i) 128 | jump_i = np.insert(jump_i, 0, 0) 129 | ###FIXME: HACK 130 | jumps.append(jump_i) 131 | # print ('HACKING SIMULATED JUMPS!') 132 | # jumps.append(np.array([0 if ii==0 else 1 for ii in range(len(jump_i))])) 133 | ### 134 | # sample stages corresponding to these times from the full jump process 135 | stages.append(step_fun(time_i, tvec, xvec)) 136 | stages = np.array(stages) 137 | times = np.array(times) 138 | jumps = np.array(jumps) 139 | if is_cut: 140 | # for testing - cut some people at > stage_threshold at baseline 141 | if model_type=='Zscore': 142 | stage_threshold = n_components-3 143 | else: 144 | stage_threshold = n_components-1 145 | del_idxs = [] 146 | for i in range(len(stages)): 147 | if stages[i,0] >= stage_threshold:# and np.random.rand() > .5: 148 | del_idxs.append(i) 149 | stages = np.delete(stages, del_idxs, axis=0) 150 | times = np.delete(times, del_idxs, axis=0) 151 | jumps = np.delete(jumps, del_idxs, axis=0) 152 | print ('##########################################################') 153 | print ('Cut', n_ppl-stages.shape[0], 'individuals at baseline for testing') 154 | print ('##########################################################') 155 | n_ppl = len(stages) 156 | # generate subtypes 157 | subtypes = np.random.choice(range(n_subtypes), n_ppl, replace=True, p=fractions).astype(int) 158 | # reduce to the desired number of observations 159 | if n_obs: 160 | stages = stages[:,:n_obs] 161 | times = times[:,:n_obs] 162 | jumps = jumps[:,:n_obs] 163 | lengths = np.array([n_obs for x in range(n_ppl)]) 164 | else: 165 | lengths = [] 166 | for row in stages: 167 | print (row.shape) 168 | lengths.append(row.shape[0]) 169 | lengths = np.array(lengths) 170 | """ 171 | else: 172 | lengths = [] 173 | for i in range(n_ppl): 174 | nobs_i = np.random.randint(1,3) 175 | lengths.append(nobs_i) 176 | lengths = np.array(lengths).astype(int) 177 | times_temp, stages_temp, jumps_temp = [], [], [] 178 | for i in range(n_ppl): 179 | stages_i = stages[i,:lengths[i]] 180 | times_i = times[i,:lengths[i]] 181 | jumps_i = jumps[i,:lengths[i]] 182 | stages_temp.append(stages_i) 183 | times_temp.append(times_i) 184 | jumps_temp.append(jumps_i) 185 | stages = np.array(stages_temp) 186 | times = np.array(times_temp) 187 | jumps = np.array(jumps_temp) 188 | """ 189 | # generate data 190 | if model_type=='Zscore': 191 | if len(seq)==0: 192 | seq = gen_model_zscore(stage_zscore, stage_biomarker_index, n_subtypes) 193 | X, X_denoised = gen_data_zscore(subtypes, 194 | stages, 195 | seq, 196 | min_biomarker_zscore, 197 | max_biomarker_zscore, 198 | [sigma_noise]*n_bms, 199 | stage_zscore, 200 | stage_biomarker_index) 201 | else: 202 | if len(seq)==0: 203 | seq = gen_model_mixture(n_bms) 204 | X, X_denoised = gen_data_mixture(stages, seq, 'mixture_GMM', sigma_noise) 205 | # true sojourns from generated data 206 | for s in range(n_subtypes): 207 | stages_s = stages[subtypes==s] 208 | if verbose: 209 | print ('Subtype',s) 210 | print ('n_ppl',len(stages_s)) 211 | sojourns_true = [] 212 | for i in range(len(Q)-1): 213 | mask = stages_s[:,0]==i 214 | den = np.sum(mask) 215 | num = 0 216 | for j in range(1,n_obs): 217 | num += np.sum(stages_s[mask][:,j]!=i) 218 | prob_diag_i = 1-num/den 219 | sojourn_i = 1/(1-prob_diag_i) 220 | sojourns_true.append(sojourn_i) 221 | if verbose: 222 | print ('Stage',i,'true diagonal probability',round(prob_diag_i,2)) 223 | print ('Stage',i,'true generated sojourn',round(sojourn_i,2)) 224 | sojourns_true = np.array(sojourns_true) 225 | if verbose: 226 | print ('Total sequence true generated sojourn',round(np.nansum(sojourns_true[~np.isinf(sojourns_true)]),2)) 227 | # get data in long format for TEBM 228 | X0 = [] 229 | stages_0 = [] 230 | for i in range(n_ppl): 231 | X0.append(X[i][:,0]) 232 | stages_0.append(stages[i][0]) 233 | X0 = np.array(X0) 234 | stages_0 = np.array(stages_0) 235 | X_temp, stages_temp, times_temp, jumps_temp = [], [], [], [] 236 | for i in range(n_ppl): 237 | X_i = X[i] 238 | for j in range(X_i.shape[1]): 239 | X_temp.append(X_i[:,j]) 240 | stage_i = stages[i] 241 | for j in range(stage_i.shape[0]): 242 | stages_temp.append(stage_i[j]) 243 | time_i = times[i] 244 | for j in range(time_i.shape[0]): 245 | times_temp.append(time_i[j]) 246 | jump_i = jumps[i] 247 | for j in range(jump_i.shape[0]): 248 | jumps_temp.append(jump_i[j]) 249 | X = np.array(X_temp) 250 | stages = np.array(stages_temp) 251 | times = np.array(times_temp) 252 | jumps = np.array(jumps_temp) 253 | # choose which subjects will be cases and which will be controls 254 | MIN_CASE_STAGE = np.round((n_bms + 1) * 0.8) 255 | index_case = np.where(stages_0 >= MIN_CASE_STAGE)[0] 256 | index_control = np.where(stages_0 == 0)[0] 257 | labels = 2 * np.ones(n_ppl, dtype=int) # 2 - intermediate value, not used in mixture model fitting 258 | labels[index_case] = 1 # 1 - cases 259 | labels[index_control] = 0 # 0 - controls 260 | return X, lengths, jumps, labels, X0, stages, times, seq, Q_subtypes, pi0_subtypes, subtypes 261 | 262 | def sim_markov(Q, 263 | pi0, 264 | n_jumps=None): 265 | """ 266 | Gillespie's direct stochastic simulation algorithm for a single Markov chain with absorbing final state 267 | """ 268 | n_s = len(pi0) 269 | xvec = np.zeros(n_s) 270 | tvec = np.zeros(n_s) 271 | x = np.random.choice(n_s, size=1, p=pi0/np.sum(pi0))[0] 272 | t = 0 273 | xvec[0] = x 274 | tvec[0] = 0. 275 | if not n_jumps: 276 | n_jumps = n_s-1 277 | for i in range(n_jumps): 278 | # final state is absorbing - don't increment time 279 | if Q[x,x] != 0: 280 | t += (1/Q[x,x])*np.log(np.random.random()) 281 | weights = Q[x].copy() 282 | weights[x] = 0 283 | # final state is absorbing - don't change state 284 | if np.sum(weights) != 0: 285 | x = np.random.choice(n_s, size=1, p=weights/np.sum(weights))[0] 286 | xvec[i+1] = x 287 | tvec[i+1] = t 288 | # time spent in each state 289 | dt = np.zeros(len(pi0)) 290 | for i in range(len(tvec)): 291 | if i < (len(tvec)-1): 292 | dt[int(xvec[i])] += tvec[i+1] - tvec[i] 293 | return tvec, xvec, dt 294 | 295 | def step_fun(x, xvec, yvec): 296 | y = [] 297 | for i in range(len(x)): 298 | for j in range(len(xvec)-1): 299 | if x[i] >= xvec[j] and x[i] < xvec[j+1]: 300 | y.append(yvec[j]) 301 | elif x[i] >= xvec[-1]: 302 | y.append(yvec[-1]) 303 | break 304 | return y 305 | 306 | def gen_data_zscore(subtypes, 307 | stages, 308 | gt_ordering, 309 | min_biomarker_zscore, 310 | max_biomarker_zscore, 311 | std_biomarker_zscore, 312 | stage_zscore, 313 | stage_biomarker_index): 314 | 315 | N = stage_biomarker_index.shape[1] 316 | N_S = gt_ordering.shape[0] 317 | possible_biomarkers = np.unique(stage_biomarker_index) 318 | B = len(possible_biomarkers) 319 | stage_value = np.zeros((B,N+2,N_S)) 320 | for s in range(N_S): 321 | S = gt_ordering[s,:] 322 | S_inv = np.array([0]*N) 323 | S_inv[S.astype(int)] = np.arange(N) 324 | for i in range(B): 325 | b = possible_biomarkers[i] 326 | event_location = np.concatenate([[0], S_inv[(stage_biomarker_index == b)[0]], [N]]) 327 | event_value = np.concatenate([[min_biomarker_zscore[i]], stage_zscore[stage_biomarker_index == b], [max_biomarker_zscore[i]]]) 328 | for j in range(len(event_location)-1): 329 | if j == 0: # FIXME: nasty hack to get Matlab indexing to match up - necessary here because indices are used for linspace limits 330 | index = np.arange(event_location[j],event_location[j+1]+2) 331 | stage_value[i,index,s] = np.linspace(event_value[j],event_value[j+1],event_location[j+1]-event_location[j]+2) 332 | else: 333 | index = np.arange(event_location[j] + 1, event_location[j + 1] + 2) 334 | stage_value[i,index,s] = np.linspace(event_value[j],event_value[j+1],event_location[j+1]-event_location[j]+1) 335 | stage_value = 0.5 * stage_value[:, :stage_value.shape[1] - 1, :] + 0.5 * stage_value[:, 1:, :] 336 | M = stages.shape[0] 337 | # initialise variable observation length arrays 338 | data = [] 339 | for i in range(len(stages)): 340 | data.append(np.zeros((B, len(stages[i])))) 341 | data_denoised = [] 342 | for i in range(len(stages)): 343 | data_denoised.append(np.zeros((B, len(stages[i])))) 344 | # set data 345 | for i in range(M): 346 | stage_i = stages[i] 347 | # assume noise homoskedastic 348 | noise = np.random.normal(np.zeros(B), std_biomarker_zscore, B) 349 | for t in range(len(stage_i)): 350 | for j in range(B): 351 | data_denoised[i][j][t] = stage_value[:,int(stage_i[t]),subtypes[i]][j] # last index would be "subtypes[i]" 352 | data[i][j][t] = data_denoised[i][j][t] + noise[j] 353 | return data, data_denoised 354 | 355 | def gen_model_zscore(stage_zscore, 356 | stage_biomarker_index, 357 | N_S): 358 | 359 | N = np.array(stage_zscore).shape[1] 360 | S = np.zeros((N_S,N)) 361 | for s in range(N_S): 362 | for i in range(N): 363 | IS_min_stage_zscore = np.array([False]*N) 364 | possible_biomarkers = np.unique(stage_biomarker_index) 365 | for j in range(len(possible_biomarkers)): 366 | IS_unselected = [False]*N 367 | for k in set(range(N))-set(S[s][:i]): 368 | IS_unselected[k] = True 369 | this_biomarkers = np.array([(np.array(stage_biomarker_index)[0]==possible_biomarkers[j]).astype(int)+(np.array(IS_unselected)==1).astype(int)])==2 370 | if not np.any(this_biomarkers): 371 | this_min_stage_zscore = 0 372 | else: 373 | this_min_stage_zscore = min(stage_zscore[this_biomarkers]) 374 | if(this_min_stage_zscore): 375 | temp = ((this_biomarkers.astype(int)+(stage_zscore==this_min_stage_zscore).astype(int))==2).T 376 | temp = temp.reshape(len(temp),) 377 | IS_min_stage_zscore[temp]=True 378 | events = np.array(range(N)) 379 | possible_events = np.array(events[IS_min_stage_zscore]) 380 | this_index = np.ceil(np.random.rand()*((len(possible_events))))-1 381 | S[s][i] = possible_events[int(this_index)] 382 | return S 383 | 384 | def gen_model_mixture(N_biomarkers): 385 | return np.array([np.random.permutation(N_biomarkers)]).astype(float) 386 | 387 | def gen_data_mixture(stages, 388 | gt_ordering, 389 | mixture_style, 390 | sigma_noise=1.): 391 | N_biomarkers = gt_ordering.shape[1] 392 | N_subjects = len(stages) 393 | #controls are always drawn from N(0, 1) distribution 394 | mean_controls = np.array([0] * N_biomarkers) 395 | std_controls = np.array([sigma_noise] * N_biomarkers) 396 | #mean and variance for cases 397 | #if using mixture_GMM, use normal distribution with mean 1 and std. devs sampled from a range 398 | if mixture_style == 'mixture_GMM': 399 | # mean_cases = np.array(np.random.uniform(size=N_biomarkers)+1.35) # PW: 1.5 to look more like ADNI SNR 400 | mean_cases = np.array(np.random.uniform(size=N_biomarkers)+1.5) # PW: 1.5 to look more like ADNI SNR 401 | std_cases = np.array([sigma_noise] * N_biomarkers) 402 | #if using mixture_KDE, use log normal with mean 0.5 and std devs sampled from a range 403 | elif mixture_style == 'mixture_KDE': 404 | mean_cases = np.array([0.5] * N_biomarkers) 405 | std_cases = np.random.uniform(0.2, 0.5, N_biomarkers) 406 | # initialise variable observation length arrays 407 | data = [] 408 | for i in range(len(stages)): 409 | data.append(np.zeros((N_biomarkers, len(stages[i])))) 410 | data_denoised = [] 411 | for i in range(len(stages)): 412 | data_denoised.append(np.zeros((N_biomarkers, len(stages[i])))) 413 | #loop over all subjects, creating measurment for each biomarker based on what subtype and stage they're in 414 | for i in range(N_subjects): 415 | stage_i = stages[i] 416 | for t in range(len(stage_i)): 417 | S_i = gt_ordering[0, :].astype(int) # first index would be subtype 418 | stage_i_t = stage_i[t].astype(int) 419 | #fill in with ABNORMAL values up to the subject's stage 420 | for j in range(stage_i_t): 421 | if mixture_style == 'mixture_KDE': 422 | sample_j = np.random.lognormal(mean_cases[S_i[j]], std_cases[S_i[j]]) 423 | elif mixture_style == 'mixture_GMM': 424 | sample_j = np.random.normal(mean_cases[S_i[j]], std_cases[S_i[j]]) 425 | data[i][S_i[j]][t] = sample_j 426 | data_denoised[i][S_i[j]][t] = mean_cases[S_i[j]] 427 | # fill in with NORMAL values from the subject's stage+1 to last stage 428 | for j in range(stage_i_t, N_biomarkers): 429 | data[i][S_i[j]][t] = np.random.normal(mean_controls[S_i[j]], std_controls[S_i[j]]) 430 | data_denoised[i][S_i[j]][t] = mean_controls[S_i[j]] 431 | return data, data_denoised 432 | -------------------------------------------------------------------------------- /examples/run_tebm_sim.py: -------------------------------------------------------------------------------- 1 | # Run TEBM simulation 2 | # Author: Peter Wijeratne (p.wijeratne@pm.me) 3 | 4 | import sys 5 | from tebm import tebm_fix, tebm_var 6 | from kde_ebm import plotting 7 | from kde_ebm.mixture_model import fit_all_gmm_models, get_prob_mat 8 | 9 | import numpy as np 10 | import pandas as pd 11 | import scipy as sp 12 | import pickle 13 | import matplotlib.pyplot as plt 14 | import multiprocessing 15 | 16 | from sim_funcs import gen_data 17 | import warnings 18 | warnings.filterwarnings("ignore", message="Casting complex values to real discards the imaginary part") 19 | 20 | ############################################################################################################## USER INPUT START 21 | is_ebm = False # set True if you want to use the standard EBM 22 | is_cut = False # cut people starting in final state(s); for testing effect of right-censoring 23 | sigma_noise = 0.1 # simulated measurement noise 24 | n_ppl = 100 # number of people 25 | n_bms = 5 # number of biomarkers 26 | n_obs = 2 # number of observations per person. Set = None for random number of observations per person 27 | model_type = 'GMM' # type of likelihood model, can be one of: 'GMM', KDE', 'Zscore' 28 | if model_type == 'Zscore': 29 | n_zscores = 3 # number of z-score events per biomarker 30 | z_max = 5 # maximum z-score event per biomarker 31 | order = 1#int(n_bms*n_zscores) # transition order. For unconstrained matrix, set order = number of events-1 32 | n_components = int(n_bms*n_zscores + 1) 33 | else: 34 | order = 1 # transition order. For unconstrained matrix, set order = n_bms 35 | n_components = n_bms + 1 36 | fwd_only = True # set True to only allow forward transitions 37 | ############################################################################################################## USER INPUT END 38 | scale = 1 # mean time scale for each event 39 | time_mean = scale # used when simulating data 40 | init_params = 's' # initialise start / initial probability to uniform prior 41 | fit_params = 'st' # fit start probability and transition matrix 42 | tol = 1E-3 # tolerance for both inner and outer EM 43 | n_cores = 1 # number of cores used for parallelising start point 44 | n_start = 4 # number of outer EM start points 45 | # set random seed 46 | try: 47 | seed = int(sys.argv[1]) 48 | except IndexError: 49 | seed = 42 50 | np.random.seed(seed) 51 | if model_type=='Zscore': 52 | n_stages = int(n_zscores*n_bms + 1) 53 | else: 54 | n_stages = n_bms + 1 55 | algo = 'viterbi' # staging algorithm 56 | n_iter_outer = 100 # maximum number of outer EM iterations (for fitting sequence) 57 | # if fitting standard EBM then we don't need the inner EM loop (for fitting start probability and transition matrix) 58 | if is_ebm: 59 | n_iter_inner = 0 60 | else: 61 | n_iter_inner = 1 62 | biom_labels = [] 63 | if model_type=='Zscore': 64 | for i in range(n_bms): 65 | for j in range(n_zscores): 66 | biom_labels.append('BM'+str(i)+'Z'+str(j)) 67 | else: 68 | for i in range(n_bms): 69 | biom_labels.append('BM'+str(i)) 70 | plot_raw_data = False 71 | # generate data 72 | if model_type=='Zscore': 73 | # X, lengths, jumps, labels, X0, stages_true, times, seq_true, Q, pi0 = gen_data(n_ppl, n_bms, n_obs, n_stages, model_type=model_type, is_cut=is_cut, n_zscores=n_zscores, z_max=z_max, sigma_noise=sigma_noise) 74 | X, lengths, jumps, labels, X0, stages_true, times, seq_true, Q, pi0, _ = gen_data(1, n_ppl, n_bms, n_obs, n_components, model_type=model_type, is_cut=is_cut, n_zscores=n_zscores, z_max=z_max, sigma_noise=sigma_noise, seq=[], fractions=[1], fwd_only=fwd_only, order=order, time_mean=[1/scale]) 75 | else: 76 | X, lengths, jumps, labels, X0, stages_true, times, seq_true, Q, pi0, _ = gen_data(1, n_ppl, n_bms, n_obs, n_components, model_type=model_type, is_cut=is_cut, n_zscores=None, z_max=None, sigma_noise=sigma_noise, seq=[], fractions=[1], fwd_only=fwd_only, order=order, time_mean=[1/scale]) 77 | 78 | if True: 79 | n_x = np.round(np.sqrt(n_bms)).astype(int) 80 | n_y = np.ceil(np.sqrt(n_bms)).astype(int) 81 | fig, ax = plt.subplots(n_y, n_x, figsize=(10, 10)) 82 | for i in range(n_bms): 83 | for j in range(len(lengths)): 84 | nobs_i = lengths[j] 85 | s_idx, e_idx = int(np.sum(lengths[:j])), int(np.sum(lengths[:j])+nobs_i) 86 | ax[i // n_x, i % n_x].plot(stages_true[s_idx:e_idx],X[s_idx:e_idx,i]) 87 | ax[i // n_x, i % n_x].scatter(stages_true[s_idx:e_idx],X[s_idx:e_idx,i]) 88 | ax[i // n_x, i % n_x].set_title(biom_labels[i]) 89 | # plt.show() 90 | 91 | save_variables = {} 92 | save_variables["X"] = X 93 | save_variables["lengths"] = lengths 94 | save_variables["jumps"] = jumps 95 | save_variables["labels"] = labels 96 | save_variables["X0"] = X0 97 | save_variables["seq_true"] = seq_true 98 | save_variables["times"] = times 99 | save_variables["Q"] = Q 100 | save_variables["pi0"] = pi0 101 | pickle_file = open('./simdata_Nppl'+str(n_ppl)+'_Nbms'+str(n_bms)+'_Nobs'+str(n_obs)+'_Nintervals'+str(len(np.unique(jumps))-1)+'.pickle', 'wb') 102 | pickle_output = pickle.dump(save_variables, pickle_file) 103 | pickle_file.close() 104 | 105 | """ 106 | pickle_file = open('simdata_Nppl'+str(n_ppl)+'_Nbms'+str(n_bms)+'_Nobs'+str(n_obs)+'_Nintervals'+str(len(np.unique(jumps))-1)+'.pickle', 'rb') 107 | loaded_variables = pickle.load(pickle_file) 108 | X = loaded_variables["X"] 109 | lengths = loaded_variables["lengths"] 110 | jumps = loaded_variables["jumps"] 111 | labels = loaded_variables["labels"] 112 | X0 = loaded_variables["X0"] 113 | seq_true = loaded_variables["seq_true"] 114 | times = loaded_variables["times"] 115 | Q = loaded_variables["Q"] 116 | pi0 = loaded_variables["pi0"] 117 | """ 118 | # EBM treats repeated measurements as from separate individuals 119 | if is_ebm: 120 | labels0 = labels.copy() 121 | labels_temp = [] 122 | for i in range(len(lengths)): 123 | nobs_i = lengths[i] 124 | for j in range(nobs_i): 125 | labels_temp.append(labels[i]) 126 | labels = np.array(labels_temp) 127 | lengths = np.ones(X.shape[0]).astype(int) 128 | 129 | print (X.shape, lengths.shape) 130 | # 131 | 132 | labels_long = [] 133 | for i in range(len(lengths)): 134 | nobs_i = lengths[i] 135 | for j in range(nobs_i): 136 | labels_long.append(labels[i]) 137 | labels_long = np.array(labels_long) 138 | snr = np.nanmean(X[labels_long!=0], axis=0)/np.nanstd(X[labels_long==0], axis=0) 139 | #for i in range(len(biom_labels)): 140 | # print (biom_labels[i]+' SNR =', snr[i]) 141 | print ('SNR', snr) 142 | 143 | # this is currently redundant, but not for long... 144 | obs_type = 'Var' 145 | # 146 | print ('n_ppl', X0.shape[0], 'n_bms', X0.shape[1], 'n_obs', X.shape[0], 'n_intervals', len(np.unique(jumps))-1, 'n_stages', n_stages, 'order', order, 'fwd_only', fwd_only) 147 | 148 | print ('Fitting '+model_type+'-'+obs_type+'-TEBM...') 149 | if model_type == 'GMM' or model_type == 'KDE': 150 | if obs_type == 'Fix': 151 | model = tebm_fix.MixtureTEBM(X=X, 152 | lengths=lengths, 153 | n_stages=n_stages, 154 | time_mean=time_mean, 155 | n_iter=n_iter_inner, 156 | fwd_only=fwd_only, 157 | order=order, 158 | algo=algo) 159 | seq_model, mixtures = model.fit_tebm(labels, n_start=n_start, n_iter=n_iter_outer, n_cores=n_cores, model_type=model_type, cut_controls=False) 160 | else: 161 | model = tebm_var.MixtureTEBM(X=X, lengths=lengths, jumps=jumps, 162 | n_components=n_components, time_mean=time_mean, covariance_type="diag", 163 | n_iter=n_iter_inner, tol=tol, 164 | init_params=init_params, params=fit_params, 165 | algorithm=algo, verbose=False, allow_nan=True, 166 | fwd_only=fwd_only, order=order) 167 | seq_model, mixtures = model._fit_tebm(labels, n_start=n_start, n_iter=n_iter_outer, n_cores=n_cores, model_type=model_type, cut_controls=False) 168 | elif model_type == 'Zscore': 169 | model = tebm_fix.ZscoreTEBM(X=X, 170 | lengths=lengths, 171 | n_stages=n_stages, 172 | time_mean=1/n_stages, 173 | n_iter=n_iter_inner, 174 | fwd_only=fwd_only, 175 | order=order, 176 | algo=algo) 177 | seq_model = model.fit_tebm(n_zscores=n_zscores, z_max=z_max, n_start=n_start, n_iter=n_iter_outer, n_cores=n_cores, cut_controls=False) 178 | else: 179 | print ('Likelihood model not recognised! quit()') 180 | quit() 181 | 182 | if model_type == 'GMM': 183 | fig, ax = plotting.mixture_model_grid(X0, labels, mixtures, biom_labels) 184 | for i in range(len(ax)): 185 | for j in range(len(ax)-1): 186 | ax[i,j].set_yscale('log') 187 | 188 | print ('True seq',seq_true[0]) 189 | print ('MaxL seq',seq_model[0]) 190 | print ('Kendall tau',sp.stats.kendalltau(seq_true[0], seq_model[0])) 191 | 192 | if not is_ebm: 193 | n_iter_inner = 100 194 | 195 | # refit with 100 iterations 196 | if model_type == 'GMM': 197 | if obs_type == 'Var': 198 | model = tebm_var.MixtureTEBM(X=X, lengths=lengths, jumps=jumps, 199 | n_components=n_components, time_mean=time_mean, covariance_type="diag", 200 | n_iter=n_iter_inner, tol=tol, 201 | init_params=init_params, params=fit_params, 202 | algorithm=algo, verbose=True, allow_nan=True, 203 | fwd_only=fwd_only, order=order) 204 | else: 205 | model = tebm_fix.MixtureTEBM(X=X, 206 | lengths=lengths, 207 | n_stages=n_stages, 208 | time_mean=time_mean, 209 | n_iter=n_iter_inner, 210 | fwd_only=fwd_only, 211 | order=order, 212 | algo=algo, 213 | verbose=True) 214 | else: 215 | if obs_type == 'Var': 216 | model = tebm_var.ZscoreTEBM(X=X, 217 | lengths=lengths, 218 | jumps=jumps, 219 | n_components=n_components, 220 | # time_mean=time_mean, 221 | covariance_type="diag", 222 | n_iter=n_iter_inner, 223 | init_params=init_params, 224 | params=fit_params, 225 | fwd_only=fwd_only, 226 | order=order)#, 227 | # algo=algo) 228 | print ('!') 229 | else: 230 | model = tebm_fix.ZscoreTEBM(X=X, 231 | lengths=lengths, 232 | n_stages=n_stages, 233 | time_mean=1/n_stages, 234 | n_iter=n_iter_inner, 235 | fwd_only=fwd_only, 236 | order=order, 237 | algo=algo) 238 | 239 | model.S = seq_model[0] 240 | if model_type=='GMM': 241 | model.prob_mat = get_prob_mat(X, mixtures) 242 | model.mixtures = mixtures 243 | model.fit() 244 | else: 245 | # intialise z-score stuff 246 | z_val_arr = np.array([[x+1 for x in range(n_zscores)]]*n_bms) 247 | z_max_arr = np.array([z_max]*n_bms) 248 | IX_vals = np.array([[x for x in range(model.n_features)]*n_zscores]).T 249 | stage_biomarker_index = np.array([y for x in IX_vals.T for y in x]) 250 | stage_zscore = np.array([y for x in z_val_arr.T for y in x]) 251 | model.stage_biomarker_index = stage_biomarker_index.reshape(1,len(stage_biomarker_index)) 252 | model.stage_zscore = stage_zscore.reshape(1,len(stage_zscore)) 253 | model.min_biomarker_zscore = [0]*n_bms 254 | model.max_biomarker_zscore = z_max_arr 255 | model.covars_prior = np.tile(np.identity(1), (n_components, n_bms)) 256 | model.covars_ = model.covars_prior 257 | model.means_ = model._get_means() 258 | model.fit() 259 | 260 | if is_ebm: 261 | fout_name = 'simrun'+str(seed)+'_'+model_type+'-ebm_Nppl'+str(n_ppl)+'_Nstates'+str(n_stages)+'_Nobs'+str(n_obs)+'_Nintervals'+str(len(np.unique(jumps))-1)+'_Nstart'+str(n_start)+'_iscut_'+str(is_cut)+'_noise_'+str(sigma_noise)[0]+'p'+str(sigma_noise)[2]+'_order_'+str(order)+'_fwdonly_'+str(fwd_only)+'_Nits_'+str(n_iter_inner)+'.pickle' 262 | else: 263 | fout_name = 'simrun'+str(seed)+'_'+model_type+'-tebm_Nppl'+str(n_ppl)+'_Nstates'+str(n_stages)+'_Nobs'+str(n_obs)+'_Nintervals'+str(len(np.unique(jumps))-1)+'_Nstart'+str(n_start)+'_iscut_'+str(is_cut)+'_noise_'+str(sigma_noise)[0]+'p'+str(sigma_noise)[2]+'_order_'+str(order)+'_fwdonly_'+str(fwd_only)+'_Nits_'+str(n_iter_inner)+'.pickle' 264 | save_variables = {} 265 | save_variables["X"] = X 266 | save_variables["lengths"] = lengths 267 | save_variables["jumps"] = jumps 268 | save_variables["labels"] = labels 269 | save_variables["seq_true"] = seq_true 270 | save_variables["stages_true"] = stages_true 271 | save_variables["Q_true"] = Q[0] 272 | save_variables["p_vec_true"] = pi0 273 | save_variables["seq_model"] = seq_model 274 | if obs_type == 'Var': 275 | save_variables["Q_model"] = model.Q_ 276 | save_variables["p_vec_model"] = model.startprob_ 277 | 278 | pickle_file = open('./'+fout_name, 'wb') 279 | pickle_output = pickle.dump(save_variables, pickle_file) 280 | pickle_file.close() 281 | 282 | if plot_raw_data: 283 | n_x = np.round(np.sqrt(n_bms)).astype(int) 284 | n_y = np.ceil(np.sqrt(n_bms)).astype(int) 285 | fig, ax = plt.subplots(n_y, n_x, figsize=(10, 10)) 286 | for i in range(n_bms): 287 | for j in range(len(lengths)): 288 | nobs_i = lengths[j] 289 | s_idx, e_idx = int(np.sum(lengths[:j])), int(np.sum(lengths[:j])+nobs_i) 290 | ax[i // n_x, i % n_x].plot(stages_true[s_idx:e_idx],X[s_idx:e_idx,i]) 291 | ax[i // n_x, i % n_x].scatter(stages_true[s_idx:e_idx],X[s_idx:e_idx,i]) 292 | ax[i // n_x, i % n_x].set_title(biom_labels[i]) 293 | 294 | # plots 295 | if plot_raw_data: 296 | if model_type == 'GMM' or model_type == 'KDE': 297 | # biomarker distributions and mixture model fits 298 | fig, ax = plotting.mixture_model_grid(X0, labels, mixtures, biom_labels) 299 | 300 | # true transition matrix 301 | transmat = np.zeros((n_stages,n_stages)) 302 | startprob = np.zeros(n_stages) 303 | for i in range(len(lengths)): 304 | nobs_i = lengths[i] 305 | s_idx, e_idx = int(np.sum(lengths[:i])), int(np.sum(lengths[:i])+nobs_i) 306 | stages_i = stages_true[s_idx:e_idx].astype(int) 307 | startprob[stages_i[0]] += 1 308 | for j in range(1,len(stages_i)): 309 | transmat[stages_i[j-1],stages_i[j]] += 1 310 | # normalise across rows 311 | startprob /= np.sum(startprob) 312 | for i in range(transmat.shape[0]): 313 | transmat[i] /= np.sum(transmat[i]) 314 | # plot true initial probability 315 | fig, ax = plt.subplots() 316 | ax.bar(np.arange(n_stages),startprob) 317 | ax.set_xlabel('Stage', fontsize=18, labelpad=8) 318 | ax.set_ylabel('Probability', fontsize=18, labelpad=2) 319 | ax.tick_params(labelsize=18) 320 | plt.subplots_adjust(top=0.95, right=0.99, bottom=.15) 321 | ax.set_title('True pi0') 322 | # initial probability 323 | if obs_type == 'Fix': 324 | startprob = model.p_vec 325 | else: 326 | startprob = model.startprob_ 327 | fig, ax = plt.subplots() 328 | ax.bar(np.arange(n_stages),startprob) 329 | ax.set_xlabel('Stage', fontsize=18, labelpad=8) 330 | ax.set_ylabel('Probability', fontsize=18, labelpad=2) 331 | ax.tick_params(labelsize=18) 332 | plt.subplots_adjust(top=0.95, right=0.99, bottom=.15) 333 | ax.set_title('Fitted pi0') 334 | # sojourn times 335 | sojourns_reco, sojourns_true = [], [] 336 | sojourns_reco.append(0) 337 | sojourns_true.append(0) 338 | for i in range(len(transmat)-1): # skip final stage (absorbing) 339 | if obs_type == 'Fix': 340 | sojourns_reco.append(1/(1-transmat[i,i])/scale) 341 | else: 342 | sojourns_reco.append(-1/model.Q_[i,i]/scale) 343 | sojourns_true.append(-1/Q[0][i,i]/scale) 344 | # print ('Stage',i,'duration',round(sojourns_reco[i+1],2)) 345 | sojourns_reco = np.array(sojourns_reco) 346 | sojourns_true = np.array(sojourns_true) 347 | print ('np.sum(sojourns_reco)',round(np.sum(sojourns_reco),2)) 348 | print ('np.sum(sojourns_true)',round(np.sum(sojourns_true),2)) 349 | print ('np.abs(np.sum(sojourns_reco)-np.sum(sojourns_true))',np.abs(np.sum(sojourns_reco)-np.sum(sojourns_true))) 350 | print ('np.sqrt(np.sum(np.power(sojourns_reco-sojourns_true, 2))/len(sojourns))',np.sqrt(np.sum(np.power(sojourns_reco-sojourns_true, 2))/len(sojourns_reco))) 351 | 352 | # staging 353 | if obs_type == 'Fix': 354 | stages_model = model.stages(X, lengths) 355 | else: 356 | stages_model, _ = model.predict(X, lengths, jumps) 357 | stages_true = stages_true.flatten() 358 | scale = [10.]*len(stages_true) 359 | for i in range(len(stages_true)): 360 | x0 = stages_true[i] 361 | x1 = stages_model[i] 362 | for j in range(len(stages_true)): 363 | if x0 == stages_true[j] and x1 == stages_model[j]: 364 | scale[i] += 20. 365 | fig, ax = plt.subplots() 366 | ax.scatter(stages_true.flatten(), stages_model, s=scale) 367 | ax.set_xlabel('Stage (true)') 368 | ax.set_ylabel('Stage (reco)') 369 | ax.grid() 370 | 371 | if obs_type == 'Var': 372 | # true transition rate matrix 373 | transmat = np.real(Q[0]) 374 | fig, ax = plt.subplots() 375 | ax.imshow(transmat, interpolation='nearest', cmap=plt.cm.Blues) 376 | for i in range(transmat.shape[0]): 377 | for j in range(transmat.shape[1]): 378 | if abs(round(transmat[i, j], 3)) > 1E-3: 379 | text = ax.text(j, i, round(transmat[i, j], 3), ha="center", va="center", color="black", size=10) 380 | event_labels = np.array(biom_labels)[seq_true[0].astype(int)] 381 | event_labels = np.insert(event_labels, 0, 'None') 382 | ax.set_xticks(np.arange(len(event_labels))) 383 | ax.set_yticks(np.arange(len(event_labels))) 384 | xticklabels = [] 385 | for x in event_labels: 386 | xticklabels.append(str(x)+' (t1)') 387 | ax.set_xticklabels(xticklabels, ha='right', rotation=45, rotation_mode='anchor', fontsize=12) 388 | yticklabels = [] 389 | for x in event_labels: 390 | yticklabels.append(str(x)+' (t0)') 391 | ax.set_yticklabels(yticklabels, ha='right', rotation_mode='anchor', fontsize=12) 392 | plt.subplots_adjust(bottom=.2, top=.95) 393 | bottom, top = ax.get_ylim() 394 | ax.set_ylim(bottom + 0.5, top - 0.5) 395 | ax.set_title('True Q') 396 | # prior transition rate matrix 397 | transmat = np.real(model.Q_prior) 398 | fig, ax = plt.subplots() 399 | ax.imshow(transmat, interpolation='nearest', cmap=plt.cm.Blues) 400 | for i in range(transmat.shape[0]): 401 | for j in range(transmat.shape[1]): 402 | if abs(round(transmat[i, j], 3)) > 1E-3: 403 | text = ax.text(j, i, round(transmat[i, j], 3), ha="center", va="center", color="black", size=10) 404 | event_labels = np.array(biom_labels)[seq_true[0].astype(int)] 405 | event_labels = np.insert(event_labels, 0, 'None') 406 | ax.set_xticks(np.arange(len(event_labels))) 407 | ax.set_yticks(np.arange(len(event_labels))) 408 | xticklabels = [] 409 | for x in event_labels: 410 | xticklabels.append(str(x)+' (t1)') 411 | ax.set_xticklabels(xticklabels, ha='right', rotation=45, rotation_mode='anchor', fontsize=12) 412 | yticklabels = [] 413 | for x in event_labels: 414 | yticklabels.append(str(x)+' (t0)') 415 | ax.set_yticklabels(yticklabels, ha='right', rotation_mode='anchor', fontsize=12) 416 | plt.subplots_adjust(bottom=.2, top=.95) 417 | bottom, top = ax.get_ylim() 418 | ax.set_ylim(bottom + 0.5, top - 0.5) 419 | ax.set_title('Prior Q') 420 | # fitted transition rate matrix 421 | transmat = np.real(model.Q_) 422 | fig, ax = plt.subplots() 423 | ax.imshow(transmat, interpolation='nearest', cmap=plt.cm.Blues) 424 | for i in range(transmat.shape[0]): 425 | for j in range(transmat.shape[1]): 426 | if abs(round(transmat[i, j], 3)) > 1E-3: 427 | text = ax.text(j, i, round(transmat[i, j], 3), ha="center", va="center", color="black", size=10) 428 | event_labels = np.array(biom_labels)[seq_model[0].astype(int)] 429 | event_labels = np.insert(event_labels, 0, 'None') 430 | ax.set_xticks(np.arange(len(event_labels))) 431 | ax.set_yticks(np.arange(len(event_labels))) 432 | xticklabels = [] 433 | for x in event_labels: 434 | xticklabels.append(str(x)+' (t1)') 435 | ax.set_xticklabels(xticklabels, ha='right', rotation=45, rotation_mode='anchor', fontsize=12) 436 | yticklabels = [] 437 | for x in event_labels: 438 | yticklabels.append(str(x)+' (t0)') 439 | ax.set_yticklabels(yticklabels, ha='right', rotation_mode='anchor', fontsize=12) 440 | plt.subplots_adjust(bottom=.2, top=.95) 441 | bottom, top = ax.get_ylim() 442 | ax.set_ylim(bottom + 0.5, top - 0.5) 443 | ax.set_title('Fitted Q') 444 | # print ('sum(diag(Q_true)-diag(Q_reco))', np.abs(np.sum(np.abs(np.diag(Q[0]))-np.abs(np.diag(model.Q_))))) 445 | # print ('sum(diag(Q_true)-diag(Q_reco))', np.sum(np.abs(np.diag(Q[0])-np.diag(model.Q_)))) 446 | print ('sum(diag(Q_true)-diag(Q_reco))', np.sqrt(np.sum(np.power(np.diag(Q[0])-np.diag(model.Q_), 2))/model.Q_.shape[0])) 447 | plt.show() 448 | # write 449 | # write data 450 | if is_ebm: 451 | fout_name = 'simrun'+str(seed)+'_'+model_type+'-ebm_Nppl'+str(n_ppl)+'_Nstates'+str(n_stages)+'_Nobs'+str(n_obs)+'_Nintervals'+str(len(np.unique(jumps))-1)+'_Nstart'+str(n_start)+'_iscut_'+str(is_cut)+'_noise_'+str(sigma_noise)[0]+'p'+str(sigma_noise)[2]+'_order_'+str(order)+'_fwdonly_'+str(fwd_only)+'_Nits_'+str(n_iter_inner)+'.pickle' 452 | else: 453 | fout_name = 'simrun'+str(seed)+'_'+model_type+'-tebm_Nppl'+str(n_ppl)+'_Nstates'+str(n_stages)+'_Nobs'+str(n_obs)+'_Nintervals'+str(len(np.unique(jumps))-1)+'_Nstart'+str(n_start)+'_iscut_'+str(is_cut)+'_noise_'+str(sigma_noise)[0]+'p'+str(sigma_noise)[2]+'_order_'+str(order)+'_fwdonly_'+str(fwd_only)+'_Nits_'+str(n_iter_inner)+'.pickle' 454 | save_variables = {} 455 | save_variables["X"] = X 456 | save_variables["lengths"] = lengths 457 | save_variables["jumps"] = jumps 458 | save_variables["labels"] = labels 459 | save_variables["seq_true"] = seq_true 460 | save_variables["stages_true"] = stages_true 461 | save_variables["Q_true"] = Q[0] 462 | save_variables["p_vec_true"] = pi0 463 | save_variables["seq_model"] = seq_model 464 | save_variables["stages_model"] = stages_model 465 | save_variables["Q_model"] = model.Q_ 466 | save_variables["p_vec_model"] = startprob 467 | 468 | pickle_file = open('./'+fout_name, 'wb') 469 | pickle_output = pickle.dump(save_variables, pickle_file) 470 | pickle_file.close() 471 | 472 | plt.show() 473 | -------------------------------------------------------------------------------- /lib/tebm/tebm_fix.py: -------------------------------------------------------------------------------- 1 | # Fixed time interval Temporal Event-Based Model 2 | # Derived class from base_fix.py 3 | # Author: Peter Wijeratne (p.wijeratne@sussex.ac.uk) 4 | # Adapted from code written for 'hmmlearn' (https://github.com/hmmlearn/hmmlearn) 5 | 6 | import numpy as np 7 | from scipy.special import logsumexp 8 | from functools import partial 9 | import pathos 10 | 11 | from .base_fix import BaseTEBM 12 | from kde_ebm.mixture_model import fit_all_kde_models, fit_all_gmm_models, get_prob_mat 13 | 14 | class MixtureTEBM(BaseTEBM): 15 | 16 | def __init__(self, 17 | X=None, 18 | lengths=None, 19 | n_stages=None, 20 | time_mean=None, 21 | n_iter=None, 22 | fwd_only=False, 23 | order=None, 24 | algo='viterbi', 25 | verbose=False): 26 | 27 | BaseTEBM.__init__(self, 28 | X=X, 29 | lengths=lengths, 30 | n_stages=n_stages, 31 | time_mean=time_mean, 32 | n_iter=n_iter, 33 | fwd_only=fwd_only, 34 | order=order, 35 | algo=algo, 36 | verbose=verbose) 37 | 38 | def compute_log_likelihood(self, X, start_i, end_i): 39 | n_samples = end_i-start_i 40 | S_int = self.S.astype(int) 41 | arange_Np1 = np.arange(0, self.n_features+1) 42 | p_perm_k = np.zeros((n_samples, self.n_features+1)) 43 | p_yes = np.array(self.prob_mat[start_i:end_i, :, 1]) 44 | p_no = np.array(self.prob_mat[start_i:end_i, :, 0]) 45 | # Leon's clever cumulative probability code 46 | cp_yes = np.cumprod(p_yes[:, S_int], 1) 47 | cp_no = np.cumprod(p_no[:, S_int[::-1]], 1) 48 | for i in arange_Np1: 49 | if i == 0: 50 | p_perm_k[:, i] = cp_no[:,self.n_features-1] 51 | elif i == self.n_features: 52 | p_perm_k[:, i] = cp_yes[:,self.n_features-1] 53 | else: 54 | p_perm_k[:, i] = cp_yes[:,i-1] * cp_no[:,self.n_features-i-1] 55 | p_perm_k[p_perm_k==0] = np.finfo(float).eps 56 | return np.log(p_perm_k) 57 | 58 | def stages(self, X, lengths=None): 59 | ### FIXME: is there a general way of doing this? 60 | self.X = X 61 | self.lengths = lengths 62 | self.prob_mat = get_prob_mat(X, self.mixtures) 63 | ### 64 | stage_sequence = self.stage_X(X, lengths) 65 | return stage_sequence 66 | 67 | def posteriors(self, X, lengths=None): 68 | ### FIXME: is there a general way of doing this? 69 | self.X = X 70 | self.lengths = lengths 71 | self.prob_mat = get_prob_mat(X, self.mixtures) 72 | ### 73 | posteriors = self.posteriors_X(X, lengths) 74 | return posteriors 75 | 76 | def gen_sample(self, stage): 77 | ### FIXME: this won't work for KDE EBM 78 | def _get_params_ebm(means, sdevs, mixes): 79 | n_bms = self.n_features 80 | seq_means = np.tile(means.T[0], (n_bms+1, 1)).T 81 | seq_sdevs = np.tile(sdevs.T[0], (n_bms+1, 1)).T 82 | seq_mixes = np.tile(mixes.T[0], (n_bms+1, 1)).T 83 | # seq_means[0] = healthy distributions for all biomarkers 84 | for i in range(n_bms): 85 | bm_pos = np.where(self.S == i)[0][0] 86 | seq_means[i, bm_pos+1:] = means[i][1] 87 | seq_sdevs[i, bm_pos+1:] = sdevs[i][1] 88 | seq_mixes[i, bm_pos+1:] = mixes[i][1] 89 | return seq_means.T, seq_sdevs.T, seq_mixes.T 90 | def _return_gmm_fits(mixtures): 91 | n_bms = self.n_features 92 | fit_means = np.zeros((n_bms, 2)) 93 | fit_std = np.zeros((n_bms, 2)) 94 | fit_mixes = np.zeros((n_bms, 2)) 95 | for i in range(n_bms): 96 | theta_i = mixtures[i].theta 97 | fit_means[i] = theta_i[[0,2]] 98 | fit_std[i] = theta_i[[1,3]] 99 | fit_mixes[i] = [theta_i[4],1-theta_i[4]] 100 | return fit_means, fit_std, fit_mixes 101 | fit_means, fit_std, fit_mixes = _return_gmm_fits(self.mixtures) 102 | theta = _get_params_ebm(fit_means, fit_std, fit_mixes) 103 | self.means = theta[0] 104 | self.covars = np.power(theta[1],2) 105 | ### 106 | return np.random.multivariate_normal(self.means[stage], self.covars[stage]) 107 | 108 | def optimise_seq(self, S): 109 | N = self.n_features 110 | max_S = S.copy() 111 | # calculate likelihoods over permutations 112 | order_bio = np.random.permutation(N) 113 | for count,i in enumerate(order_bio): 114 | current_sequence = max_S 115 | assert(len(current_sequence)==N) 116 | current_location = np.array([0] * N) 117 | current_location[current_sequence.astype(int)] = np.arange(N) 118 | selected_event = i 119 | move_event_from = current_location[selected_event] 120 | possible_positions = np.arange(N) 121 | possible_sequences = np.zeros((len(possible_positions), N)) 122 | possible_likelihood = np.full((len(possible_positions), 1), -np.inf) 123 | for index in range(len(possible_positions)): 124 | current_sequence = max_S 125 | # choose a position in the sequence to move an event to 126 | move_event_to = possible_positions[index] 127 | # move this event in its new position 128 | current_sequence = np.delete(current_sequence, move_event_from, 0) 129 | new_sequence = np.concatenate([current_sequence[np.arange(move_event_to)], [selected_event], current_sequence[np.arange(move_event_to, N - 1)]]) 130 | # fit TEBM 131 | self.S = new_sequence 132 | self.fit() 133 | possible_likelihood[index] = self.compute_model_log_likelihood(self.X, self.lengths) 134 | possible_sequences[index, :] = self.S 135 | max_likelihood = max(possible_likelihood) 136 | max_S = possible_sequences[np.where(possible_likelihood == max_likelihood)[0][0]] 137 | if count<(N-1): 138 | print (str(round((count+1)/len(order_bio)*100,2))+'% complete') 139 | return max_S, max_likelihood 140 | 141 | def seq_em(self, S, n_iter, seed_num): 142 | # parse out sequences by seed number 143 | S = np.array(S[seed_num]) 144 | print ('Startpoint',seed_num) 145 | cur_seq = S 146 | cur_like = -np.inf 147 | flag = False 148 | for opt_i in range(int(n_iter)): 149 | print ('EM iteration',opt_i+1) 150 | seq, like = self.optimise_seq(cur_seq) 151 | print ('current', like, seq, 'max', cur_like, cur_seq) 152 | if like-cur_like < 1E-3: 153 | print ('EM converged in',opt_i+1,'iterations') 154 | flag = True 155 | elif like > cur_like: 156 | cur_seq = seq 157 | cur_like = like 158 | if flag: 159 | break 160 | return cur_seq, cur_like 161 | 162 | def fit_tebm(self, labels, n_start, n_iter, n_cores, model_type='GMM', constrained=False, cut_controls=False): 163 | # only use baseline data to fit mixture models 164 | X0 = [] 165 | for i in range(len(self.lengths)): 166 | X0.append(self.X[np.sum(self.lengths[:i])]) 167 | X0 = np.array(X0) 168 | if model_type == 'KDE': 169 | mixtures = fit_all_kde_models(X0, labels) 170 | else: 171 | mixtures = fit_all_gmm_models(X0, labels)#, constrained) 172 | # might want to fit sequence without controls 173 | if cut_controls: 174 | print ('Cutting controls from sequence fit!') 175 | X, lengths = [], [] 176 | for i in range(len(self.lengths)): 177 | if labels[i] != 0: 178 | nobs_i = self.lengths[i] 179 | for x in self.X[np.sum(self.lengths[:i]):np.sum(self.lengths[:i])+nobs_i]: 180 | X.append(x) 181 | lengths.append(self.lengths[i]) 182 | self.X = np.array(X) 183 | self.lengths = np.array(lengths) 184 | # calculate likelihood lookup table 185 | self.prob_mat = get_prob_mat(self.X, mixtures) 186 | # set mixture models 187 | self.mixtures = mixtures 188 | # do EM 189 | ml_seq_mat = np.zeros((1,self.X.shape[1],n_start)) 190 | ml_like_mat = np.zeros(n_start) 191 | if n_cores>1: 192 | pool = pathos.multiprocessing.ProcessingPool() 193 | pool.ncpus = n_cores 194 | else: 195 | # FIXME: serial version doesn't work 196 | # pool = pathos.serial.SerialPool() 197 | pool = pathos.multiprocessing.ProcessingPool() 198 | pool.ncpus = n_cores 199 | # instantiate function as class to pass to pool.map 200 | # first calculate array of sequences - do this first or same random number will be used simultaneously on each processor 201 | # will return shape (n_start, 1) 202 | copier = partial(self.init_seq) 203 | # will return shape (n_start, 1) 204 | seq_mat = np.array(pool.map(copier, range(n_start))) 205 | # now optimise 206 | copier = partial(self.seq_em, 207 | seq_mat[:,0], 208 | n_iter) 209 | # will return shape (n_start, 2) 210 | par_mat = np.array(pool.map(copier, range(n_start))) 211 | # distribute to local matrices 212 | for i in range(n_start): 213 | ml_seq_mat[:, :, i] = par_mat[i, 0] 214 | ml_like_mat[i] = par_mat[i, 1] 215 | ix = np.argmax(ml_like_mat) 216 | ml_seq = ml_seq_mat[:, :, ix] 217 | ml_like = ml_like_mat[ix] 218 | # refit model on ML sequence 219 | self.S = ml_seq[0] 220 | self.fit() 221 | return ml_seq, self.mixtures 222 | 223 | def init_seq(self, seed_num): 224 | #FIXME: issue with seeding by seed_num is that every time you call fit_tebm, it will initialise the same sequences 225 | # ensure randomness across parallel processes 226 | np.random.seed(seed_num) 227 | S = np.arange(self.n_features) 228 | np.random.shuffle(S) 229 | return [S] 230 | 231 | class ZscoreTEBM(BaseTEBM): 232 | 233 | def __init__(self, 234 | X=None, 235 | lengths=None, 236 | n_stages=None, 237 | time_mean=None, 238 | n_iter=None, 239 | fwd_only=False, 240 | order=None, 241 | algo='viterbi', 242 | verbose=False): 243 | 244 | BaseTEBM.__init__(self, 245 | X=X, 246 | lengths=lengths, 247 | n_stages=n_stages, 248 | time_mean=time_mean, 249 | n_iter=n_iter, 250 | fwd_only=fwd_only, 251 | order=order, 252 | algo=algo, 253 | verbose=verbose) 254 | 255 | def compute_log_likelihood(self, X, start_i, end_i): 256 | n_samples, n_dim = X.shape 257 | return -0.5 * (n_dim * np.log(2 * np.pi) 258 | + np.log(self.covars).sum(axis=-1) 259 | + ((X[:, None, :] - self.means) ** 2 / self.covars).sum(axis=-1)) 260 | 261 | def stages(self, X, lengths=None): 262 | ### FIXME: is there a general way of doing this? 263 | self.X = X 264 | self.lengths = lengths 265 | ### 266 | stage_sequence = self.stage_X(X, lengths) 267 | return stage_sequence 268 | 269 | def posteriors(self, X, lengths=None): 270 | ### FIXME: is there a general way of doing this? 271 | self.X = X 272 | self.lengths = lengths 273 | ### 274 | posteriors = self.posteriors_X(X, lengths) 275 | return posteriors 276 | 277 | def gen_sample(self, n_samples=1): 278 | p_vec_cdf = np.cumsum(self.p_vec) 279 | a_mat_cdf = np.cumsum(self.a_mat, axis=1) 280 | X_sample, k_sample = [], [] 281 | k_i = (p_vec_cdf > np.random.rand()).argmax() 282 | for i in range(n_samples): 283 | k_i = (a_mat_cdf[k_i] > np.random.rand()).argmax() 284 | k_sample.append(k_i) 285 | # X_sample.append(np.random.multivariate_normal(self.means[k_i], self.covars[k_i])) 286 | #FIXME: make multivariate 287 | X_sample.append(np.random.normal(self.means[k_i], self.covars[k_i])) 288 | return np.array(X_sample), np.array(k_sample) 289 | 290 | def init_seq(self, seed_num): 291 | np.random.seed(seed_num) 292 | N = np.array(self.stage_zscore).shape[1] 293 | S = np.zeros(N) 294 | for i in range(N): 295 | IS_min_stage_zscore = np.array([False] * N) 296 | possible_biomarkers = np.unique(self.stage_biomarker_index) 297 | for j in range(len(possible_biomarkers)): 298 | IS_unselected = [False] * N 299 | for k in set(range(N)) - set(S[:i]): 300 | IS_unselected[k] = True 301 | this_biomarkers = np.array([(np.array(self.stage_biomarker_index)[0] == possible_biomarkers[j]).astype(int) + (np.array(IS_unselected) == 1).astype(int)]) == 2 302 | if not np.any(this_biomarkers): 303 | this_min_stage_zscore = 0 304 | else: 305 | this_min_stage_zscore = min(self.stage_zscore[this_biomarkers]) 306 | if (this_min_stage_zscore): 307 | temp = ((this_biomarkers.astype(int) + (self.stage_zscore == this_min_stage_zscore).astype(int)) == 2).T 308 | temp = temp.reshape(len(temp), ) 309 | IS_min_stage_zscore[temp] = True 310 | events = np.array(range(N)) 311 | possible_events = np.array(events[IS_min_stage_zscore]) 312 | this_index = np.ceil(np.random.rand() * ((len(possible_events)))) - 1 313 | S[i] = possible_events[int(this_index)] 314 | S = S.reshape(1, len(S)) 315 | return S 316 | 317 | def get_means(self): 318 | def linspace_local2(a, b, N, arange_N): 319 | return a + (b - a) / (N - 1.) * arange_N 320 | N = self.stage_biomarker_index.shape[1] 321 | S_inv = np.array([ 0 ] * N) 322 | S_inv[self.S.astype(int)] = np.arange(N) 323 | possible_biomarkers = np.unique(self.stage_biomarker_index) 324 | B = len(possible_biomarkers) 325 | # value of mean function at integral limits 326 | point_value = np.zeros((B, N + 2)) 327 | # all the arange you'll need below 328 | arange_N = np.arange(N + 2) 329 | for i in range(B): 330 | b = possible_biomarkers[i] 331 | # position of this biomarker's z-score events in the sequence 332 | event_location = np.concatenate([[0], S_inv[(self.stage_biomarker_index == b)[0]], [N]]) 333 | # z-score reached at each event 334 | event_value = np.concatenate([[self.min_biomarker_zscore[i]], self.stage_zscore[self.stage_biomarker_index == b], [self.max_biomarker_zscore[i]]]) 335 | for j in range(len(event_location) - 1): 336 | if j == 0: # FIXME: nasty hack to get Matlab indexing to match up - necessary here because indices are used for linspace limits 337 | temp = arange_N[event_location[j]:(event_location[j + 1] + 2)] 338 | N_j = event_location[j + 1] - event_location[j] + 2 339 | point_value[i, temp] = linspace_local2(event_value[j], event_value[j + 1], N_j, arange_N[0:N_j]) 340 | else: 341 | temp = arange_N[(event_location[j] + 1):(event_location[j + 1] + 2)] 342 | N_j = event_location[j + 1] - event_location[j] + 1 343 | point_value[i, temp] = linspace_local2(event_value[j], event_value[j + 1], N_j, arange_N[0:N_j]) 344 | # integrate (approximation) 345 | stage_value = 0.5 * point_value[:, :point_value.shape[1] - 1] + 0.5 * point_value[:, 1:] 346 | return stage_value.T 347 | 348 | def optimise_seq(self, S): 349 | N = self.stage_zscore.shape[1] 350 | max_S = S.copy() 351 | # calculate likelihoods over permutations 352 | order_bio = np.random.permutation(N) 353 | for count,i in enumerate(order_bio): 354 | current_sequence = max_S 355 | assert(len(current_sequence)==N) 356 | current_location = np.array([0]*len(current_sequence)) 357 | current_location[current_sequence.astype(int)] = np.arange(len(current_sequence)) 358 | selected_event = i 359 | move_event_from = current_location[selected_event] 360 | this_stage_zscore = self.stage_zscore[0, selected_event] 361 | selected_biomarker = self.stage_biomarker_index[0, selected_event] 362 | possible_zscores_biomarker = self.stage_zscore[self.stage_biomarker_index == selected_biomarker] 363 | min_filter = possible_zscores_biomarker < this_stage_zscore 364 | max_filter = possible_zscores_biomarker > this_stage_zscore 365 | events = np.array(range(N)) 366 | if np.any(min_filter): 367 | min_zscore_bound = max(possible_zscores_biomarker[min_filter]) 368 | min_zscore_bound_event = events[((self.stage_zscore[0] == min_zscore_bound).astype(int) + ( 369 | self.stage_biomarker_index[0] == selected_biomarker).astype(int)) == 2] 370 | move_event_to_lower_bound = current_location[min_zscore_bound_event] + 1 371 | else: 372 | move_event_to_lower_bound = 0 373 | if np.any(max_filter): 374 | max_zscore_bound = min(possible_zscores_biomarker[max_filter]) 375 | max_zscore_bound_event = events[((self.stage_zscore[0] == max_zscore_bound).astype(int) + ( 376 | self.stage_biomarker_index[0] == selected_biomarker).astype(int)) == 2] 377 | move_event_to_upper_bound = current_location[max_zscore_bound_event] 378 | else: 379 | move_event_to_upper_bound = N 380 | if move_event_to_lower_bound == move_event_to_upper_bound: 381 | possible_positions = np.array([0]) 382 | else: 383 | possible_positions = np.arange(move_event_to_lower_bound, move_event_to_upper_bound) 384 | possible_sequences = np.zeros((len(possible_positions), N)) 385 | possible_likelihood = np.full((len(possible_positions), 1), -np.inf) 386 | for index in range(len(possible_positions)): 387 | current_sequence = max_S 388 | # choose a position in the sequence to move an event to 389 | move_event_to = possible_positions[index] 390 | # move this event in its new position 391 | current_sequence = np.delete(current_sequence, move_event_from, 0) 392 | new_sequence = np.concatenate([current_sequence[np.arange(move_event_to)], [selected_event], current_sequence[np.arange(move_event_to, N - 1)]]) 393 | # fit TEBM 394 | self.S = new_sequence 395 | self.means = self.get_means() 396 | self.covars = self.covars_prior 397 | self.fit() 398 | possible_likelihood[index] = self.compute_model_log_likelihood(self.X, self.lengths) 399 | possible_sequences[index, :] = self.S 400 | max_likelihood = max(possible_likelihood) 401 | max_S = possible_sequences[np.where(possible_likelihood == max_likelihood)[0][0]] 402 | if count<(N-1): 403 | print (str(round((count+1)/len(order_bio)*100,2))+'% complete') 404 | return max_S, max_likelihood 405 | 406 | def seq_em(self, S, n_iter, seed_num): 407 | # parse out sequences by seed number 408 | S = np.array(S[seed_num]) 409 | print ('Startpoint',seed_num) 410 | cur_seq = S 411 | cur_like = -np.inf 412 | flag = False 413 | for opt_i in range(int(n_iter)): 414 | print ('EM iteration',opt_i+1) 415 | seq, like = self.optimise_seq(cur_seq) 416 | print ('current', like, seq, 'max', cur_like, cur_seq) 417 | if like-cur_like < 1E-3: 418 | print ('EM converged in',opt_i+1,'iterations') 419 | flag = True 420 | elif like > cur_like: 421 | cur_seq = seq 422 | cur_like = like 423 | if flag: 424 | break 425 | return cur_seq, cur_like 426 | 427 | def fit_tebm(self, n_zscores, z_max, n_start, n_iter, n_cores, cut_controls=False): 428 | # intialise z-score stuff 429 | z_val_arr = np.array([[x+1 for x in range(n_zscores)]]*self.n_features) 430 | z_max_arr = np.array([z_max]*self.n_features) 431 | IX_vals = np.array([[x for x in range(self.n_features)]*n_zscores]).T 432 | stage_biomarker_index = np.array([y for x in IX_vals.T for y in x]) 433 | stage_zscore = np.array([y for x in z_val_arr.T for y in x]) 434 | self.stage_biomarker_index = stage_biomarker_index.reshape(1,len(stage_biomarker_index)) 435 | self.stage_zscore = stage_zscore.reshape(1,len(stage_zscore)) 436 | self.min_biomarker_zscore = [0]*self.n_features 437 | self.max_biomarker_zscore = z_max_arr 438 | self.covars_prior = np.tile(np.identity(1), (self.n_stages, self.n_features)) 439 | # might want to fit sequence without controls 440 | if cut_controls: 441 | print ('Cutting controls from sequence fit!') 442 | X, lengths = [], [] 443 | for i in range(len(self.lengths)): 444 | if labels[i] != 0: 445 | nobs_i = self.lengths[i] 446 | for x in self.X[np.sum(self.lengths[:i]):np.sum(self.lengths[:i])+nobs_i]: 447 | X.append(x) 448 | lengths.append(self.lengths[i]) 449 | self.X = np.array(X) 450 | self.lengths = np.array(lengths) 451 | # do EM 452 | ml_seq_mat = np.zeros((1,self.stage_zscore.shape[1],n_start)) 453 | ml_like_mat = np.zeros(n_start) 454 | if n_cores>1: 455 | pool = pathos.multiprocessing.ProcessingPool() 456 | pool.ncpus = n_cores 457 | else: 458 | # FIXME: serial version doesn't work 459 | # pool = pathos.serial.SerialPool() 460 | pool = pathos.multiprocessing.ProcessingPool() 461 | pool.ncpus = n_cores 462 | # instantiate function as class to pass to pool.map 463 | # first calculate array of sequences - do this first or same random number will be used simultaneously on each processor 464 | # will return shape (n_start, 1) 465 | copier = partial(self.init_seq) 466 | # will return shape (n_start, 1) 467 | seq_mat = np.array(pool.map(copier, range(n_start))) 468 | # now optimise 469 | copier = partial(self.seq_em, 470 | seq_mat[:,0], 471 | n_iter) 472 | # will return shape (n_start, 2) 473 | par_mat = np.array(pool.map(copier, range(n_start))) 474 | # distribute to local matrices 475 | for i in range(n_start): 476 | ml_seq_mat[:, :, i] = par_mat[i, 0] 477 | ml_like_mat[i] = par_mat[i, 1] 478 | ix = np.argmax(ml_like_mat) 479 | ml_seq = ml_seq_mat[:, :, ix] 480 | ml_like = ml_like_mat[ix] 481 | # refit model on ML sequence 482 | self.S = ml_seq[0] 483 | self.covars = self.covars_prior 484 | self.means = self.get_means() 485 | self.fit() 486 | return ml_seq 487 | -------------------------------------------------------------------------------- /lib/tebm/cthmm_fix.py: -------------------------------------------------------------------------------- 1 | # Fixed interval CTHMM 2 | # Author: Peter Wijeratne (p.wijeratne@pm.me) 3 | # Adapted from code written for 'hmmlearn' (https://github.com/hmmlearn/hmmlearn) 4 | 5 | import logging 6 | 7 | import numpy as np 8 | from scipy.special import logsumexp 9 | from sklearn import cluster 10 | from sklearn.utils import check_random_state 11 | 12 | from . import _utils 13 | from .stats import log_multivariate_normal_density 14 | from .base_fix import BaseTEBM 15 | from .utils import ( 16 | fill_covars, iter_from_X_lengths, log_mask_zero, log_normalize, normalize) 17 | __all__ = ["MixtureCTHMM", "MultinomialCTHMM", "GMMCTHMM"] 18 | 19 | _log = logging.getLogger(__name__) 20 | COVARIANCE_TYPES = frozenset(("spherical", "diag", "full", "tied")) 21 | 22 | def _check_and_set_gaussian_n_features(model): 23 | _, n_features = model.X.shape 24 | if hasattr(model, "n_features") and model.n_features != n_features: 25 | raise ValueError("Unexpected number of dimensions, got {} but " 26 | "expected {}".format(n_features, model.n_features)) 27 | model.n_features = n_features 28 | 29 | 30 | class GaussianCTHMM(BaseTEBM): 31 | r"""Hidden Markov Model with Gaussian emissions. 32 | 33 | Parameters 34 | ---------- 35 | n_components : int 36 | Number of states. 37 | 38 | covariance_type : string, optional 39 | String describing the type of covariance parameters to 40 | use. Must be one of 41 | 42 | * "spherical" --- each state uses a single variance value that 43 | applies to all features. 44 | * "diag" --- each state uses a diagonal covariance matrix. 45 | * "full" --- each state uses a full (i.e. unrestricted) 46 | covariance matrix. 47 | * "tied" --- all states use **the same** full covariance matrix. 48 | 49 | Defaults to "diag". 50 | 51 | min_covar : float, optional 52 | Floor on the diagonal of the covariance matrix to prevent 53 | overfitting. Defaults to 1e-3. 54 | 55 | startprob_prior : array, shape (n_components, ), optional 56 | Parameters of the Dirichlet prior distribution for 57 | :attr:`startprob_`. 58 | 59 | transmat_prior : array, shape (n_components, n_components), optional 60 | Parameters of the Dirichlet prior distribution for each row 61 | of the transition probabilities :attr:`transmat_`. 62 | 63 | means_prior, means_weight : array, shape (n_components, ), optional 64 | Mean and precision of the Normal prior distribtion for 65 | :attr:`means_`. 66 | 67 | covars_prior, covars_weight : array, shape (n_components, ), optional 68 | Parameters of the prior distribution for the covariance matrix 69 | :attr:`covars_`. 70 | 71 | If :attr:`covariance_type` is "spherical" or "diag" the prior is 72 | the inverse gamma distribution, otherwise --- the inverse Wishart 73 | distribution. 74 | 75 | algorithm : string, optional 76 | Decoder algorithm. Must be one of "viterbi" or`"map". 77 | Defaults to "viterbi". 78 | 79 | random_state: RandomState or an int seed, optional 80 | A random number generator instance. 81 | 82 | n_iter : int, optional 83 | Maximum number of iterations to perform. 84 | 85 | tol : float, optional 86 | Convergence threshold. EM will stop if the gain in log-likelihood 87 | is below this value. 88 | 89 | verbose : bool, optional 90 | When ``True`` per-iteration convergence reports are printed 91 | to :data:`sys.stderr`. You can diagnose convergence via the 92 | :attr:`monitor_` attribute. 93 | 94 | params : string, optional 95 | Controls which parameters are updated in the training 96 | process. Can contain any combination of 's' for startprob, 97 | 't' for transmat, 'm' for means and 'c' for covars. Defaults 98 | to all parameters. 99 | 100 | init_params : string, optional 101 | Controls which parameters are initialized prior to 102 | training. Can contain any combination of 's' for 103 | startprob, 't' for transmat, 'm' for means and 'c' for covars. 104 | Defaults to all parameters. 105 | 106 | Attributes 107 | ---------- 108 | n_features : int 109 | Dimensionality of the Gaussian emissions. 110 | 111 | monitor\_ : ConvergenceMonitor 112 | Monitor object used to check the convergence of EM. 113 | 114 | startprob\_ : array, shape (n_components, ) 115 | Initial state occupation distribution. 116 | 117 | transmat\_ : array, shape (n_components, n_components) 118 | Matrix of transition probabilities between states. 119 | 120 | means\_ : array, shape (n_components, n_features) 121 | Mean parameters for each state. 122 | 123 | covars\_ : array 124 | Covariance parameters for each state. 125 | 126 | The shape depends on :attr:`covariance_type`:: 127 | 128 | (n_components, ) if "spherical", 129 | (n_components, n_features) if "diag", 130 | (n_components, n_features, n_features) if "full" 131 | (n_features, n_features) if "tied", 132 | 133 | Examples 134 | -------- 135 | >>> from tebm.tebm import MixtureTEBM 136 | >>> MixtureTEBM(n_components=2) #doctest: +ELLIPSIS 137 | MixtureTEBM(algorithm='viterbi',... 138 | """ 139 | def __init__(self, X=None, lengths=None, 140 | n_components=1, startprob_prior=None, transmat_prior=None, 141 | means_prior=0, means_weight=0, covars_prior=1e-2, covars_weight=1, covariance_type='diag', min_covar=1e-3, 142 | algorithm="viterbi", random_state=None, n_iter=10, 143 | tol=1e-2, verbose=False, params="st", 144 | init_params="st", allow_nan=False): 145 | BaseTEBM.__init__(self, X=X, lengths=lengths, 146 | n_components=n_components, startprob_prior=startprob_prior, transmat_prior=transmat_prior, 147 | algorithm=algorithm, random_state=random_state, n_iter=n_iter, 148 | tol=tol, verbose=verbose, params=params, 149 | init_params=init_params, allow_nan=allow_nan) 150 | self.covariance_type = covariance_type 151 | self.min_covar = min_covar 152 | self.means_prior = means_prior 153 | self.means_weight = means_weight 154 | self.covars_prior = covars_prior 155 | self.covars_weight = covars_weight 156 | 157 | @property 158 | def covars_(self): 159 | """Return covars as a full matrix.""" 160 | return fill_covars(self._covars_, self.covariance_type, 161 | self.n_components, self.n_features) 162 | 163 | @covars_.setter 164 | def covars_(self, covars): 165 | covars = np.array(covars, copy=True) 166 | _utils._validate_covars(covars, self.covariance_type, 167 | self.n_components) 168 | self._covars_ = covars 169 | """ 170 | def _check(self): 171 | super()._check() 172 | 173 | self.means_ = np.asarray(self.means_) 174 | self.n_features = self.means_.shape[1] 175 | 176 | if self.covariance_type not in COVARIANCE_TYPES: 177 | raise ValueError('covariance_type must be one of {}' 178 | .format(COVARIANCE_TYPES)) 179 | """ 180 | def _get_n_fit_scalars_per_param(self): 181 | nc = self.n_components 182 | nf = self.n_features 183 | return { 184 | "s": nc - 1, 185 | "t": nc * (nc - 1), 186 | "m": nc * nf, 187 | "c": { 188 | "spherical": nc, 189 | "diag": nc * nf, 190 | "full": nc * nf * (nf + 1) // 2, 191 | "tied": nf * (nf + 1) // 2, 192 | }[self.covariance_type], 193 | } 194 | 195 | def _init(self, X, lengths=None): 196 | _check_and_set_gaussian_n_features(self) 197 | super()._init(X, lengths=lengths) 198 | 199 | if 'm' in self.init_params: 200 | kmeans = cluster.KMeans(n_clusters=self.n_components, 201 | random_state=self.random_state) 202 | kmeans.fit(X) 203 | self.means_ = kmeans.cluster_centers_ 204 | if 'c' in self.init_params: 205 | cv = np.cov(X.T) + self.min_covar * np.eye(X.shape[1]) 206 | if not cv.shape: 207 | cv.shape = (1, 1) 208 | self.covars_ = \ 209 | _utils.distribute_covar_matrix_to_match_covariance_type( 210 | cv, self.covariance_type, self.n_components).copy() 211 | 212 | def _compute_log_likelihood(self, X, i, j): 213 | return log_multivariate_normal_density( 214 | self.X[i:j], self.means_, self._covars_, self.covariance_type) 215 | 216 | def predict(self, X, lengths=None): 217 | # FIXME: is there a general way of doing this? 218 | self.X = X 219 | self.lengths = lengths 220 | logprob, state_sequence = self.decode(X, lengths) 221 | return state_sequence, logprob 222 | 223 | def predict_proba(self, X, lengths=None): 224 | # FIXME: is there a general way of doing this? 225 | self.X = X 226 | self.lengths = lengths 227 | #### 228 | _, posteriors = self.score_samples(X, lengths) 229 | return posteriors 230 | 231 | def _generate_sample_from_state(self, state, random_state=None): 232 | random_state = check_random_state(random_state) 233 | return random_state.multivariate_normal( 234 | self.means_[state], self.covars_[state] 235 | ) 236 | 237 | def _initialize_sufficient_statistics(self): 238 | stats = super()._initialize_sufficient_statistics() 239 | stats['post'] = np.zeros(self.n_components) 240 | stats['obs'] = np.zeros((self.n_components, self.n_features)) 241 | stats['obs**2'] = np.zeros((self.n_components, self.n_features)) 242 | if self.covariance_type in ('tied', 'full'): 243 | stats['obs*obs.T'] = np.zeros((self.n_components, self.n_features, 244 | self.n_features)) 245 | return stats 246 | 247 | def _accumulate_sufficient_statistics(self, stats, obs, framelogprob, posteriors, 248 | fwdlattice, bwdlattice): 249 | super()._accumulate_sufficient_statistics( 250 | stats, obs, framelogprob, posteriors, fwdlattice, bwdlattice) 251 | 252 | if 'm' in self.params or 'c' in self.params: 253 | stats['post'] += posteriors.sum(axis=0) 254 | stats['obs'] += np.dot(posteriors.T, obs) 255 | 256 | if 'c' in self.params: 257 | if self.covariance_type in ('spherical', 'diag'): 258 | stats['obs**2'] += np.dot(posteriors.T, obs ** 2) 259 | elif self.covariance_type in ('tied', 'full'): 260 | # posteriors: (nt, nc); obs: (nt, nf); obs: (nt, nf) 261 | # -> (nc, nf, nf) 262 | stats['obs*obs.T'] += np.einsum( 263 | 'ij,ik,il->jkl', posteriors, obs, obs) 264 | 265 | def _do_mstep(self, stats): 266 | super()._do_mstep(stats) 267 | 268 | means_prior = self.means_prior 269 | means_weight = self.means_weight 270 | 271 | # TODO: find a proper reference for estimates for different 272 | # covariance models. 273 | # Based on Huang, Acero, Hon, "Spoken Language Processing", 274 | # p. 443 - 445 275 | denom = stats['post'][:, np.newaxis] 276 | if 'm' in self.params: 277 | self.means_ = ((means_weight * means_prior + stats['obs']) 278 | / (means_weight + denom)) 279 | 280 | if 'c' in self.params: 281 | covars_prior = self.covars_prior 282 | covars_weight = self.covars_weight 283 | meandiff = self.means_ - means_prior 284 | 285 | if self.covariance_type in ('spherical', 'diag'): 286 | cv_num = (means_weight * meandiff**2 287 | + stats['obs**2'] 288 | - 2 * self.means_ * stats['obs'] 289 | + self.means_**2 * denom) 290 | cv_den = max(covars_weight - 1, 0) + denom 291 | self._covars_ = \ 292 | (covars_prior + cv_num) / np.maximum(cv_den, 1e-5) 293 | if self.covariance_type == 'spherical': 294 | self._covars_ = np.tile( 295 | self._covars_.mean(1)[:, np.newaxis], 296 | (1, self._covars_.shape[1])) 297 | elif self.covariance_type in ('tied', 'full'): 298 | cv_num = np.empty((self.n_components, self.n_features, 299 | self.n_features)) 300 | for c in range(self.n_components): 301 | obsmean = np.outer(stats['obs'][c], self.means_[c]) 302 | 303 | cv_num[c] = (means_weight * np.outer(meandiff[c], 304 | meandiff[c]) 305 | + stats['obs*obs.T'][c] 306 | - obsmean - obsmean.T 307 | + np.outer(self.means_[c], self.means_[c]) 308 | * stats['post'][c]) 309 | cvweight = max(covars_weight - self.n_features, 0) 310 | if self.covariance_type == 'tied': 311 | self._covars_ = ((covars_prior + cv_num.sum(axis=0)) / 312 | (cvweight + stats['post'].sum())) 313 | elif self.covariance_type == 'full': 314 | self._covars_ = ((covars_prior + cv_num) / 315 | (cvweight + stats['post'][:, None, None])) 316 | 317 | class MultinomialCTHMM(BaseTEBM): 318 | r"""Hidden Markov Model with multinomial (discrete) emissions 319 | 320 | Parameters 321 | ---------- 322 | 323 | n_components : int 324 | Number of states. 325 | 326 | startprob_prior : array, shape (n_components, ), optional 327 | Parameters of the Dirichlet prior distribution for 328 | :attr:`startprob_`. 329 | 330 | transmat_prior : array, shape (n_components, n_components), optional 331 | Parameters of the Dirichlet prior distribution for each row 332 | of the transition probabilities :attr:`transmat_`. 333 | 334 | algorithm : string, optional 335 | Decoder algorithm. Must be one of "viterbi" or "map". 336 | Defaults to "viterbi". 337 | 338 | random_state: RandomState or an int seed, optional 339 | A random number generator instance. 340 | 341 | n_iter : int, optional 342 | Maximum number of iterations to perform. 343 | 344 | tol : float, optional 345 | Convergence threshold. EM will stop if the gain in log-likelihood 346 | is below this value. 347 | 348 | verbose : bool, optional 349 | When ``True`` per-iteration convergence reports are printed 350 | to :data:`sys.stderr`. You can diagnose convergence via the 351 | :attr:`monitor_` attribute. 352 | 353 | params : string, optional 354 | Controls which parameters are updated in the training 355 | process. Can contain any combination of 's' for startprob, 356 | 't' for transmat, 'e' for emissionprob. 357 | Defaults to all parameters. 358 | 359 | init_params : string, optional 360 | Controls which parameters are initialized prior to 361 | training. Can contain any combination of 's' for 362 | startprob, 't' for transmat, 'e' for emissionprob. 363 | Defaults to all parameters. 364 | 365 | Attributes 366 | ---------- 367 | n_features : int 368 | Number of possible symbols emitted by the model (in the samples). 369 | 370 | monitor\_ : ConvergenceMonitor 371 | Monitor object used to check the convergence of EM. 372 | 373 | startprob\_ : array, shape (n_components, ) 374 | Initial state occupation distribution. 375 | 376 | transmat\_ : array, shape (n_components, n_components) 377 | Matrix of transition probabilities between states. 378 | 379 | emissionprob\_ : array, shape (n_components, n_features) 380 | Probability of emitting a given symbol when in each state. 381 | 382 | Examples 383 | -------- 384 | >>> from tebm.tebm import MultinomialTEBM 385 | >>> MultinomialTEBM(n_components=2) #doctest: +ELLIPSIS 386 | MultinomialTEBM(algorithm='viterbi',... 387 | """ 388 | # TODO: accept the prior on emissionprob_ for consistency. 389 | def __init__(self, n_components=1, 390 | startprob_prior=1.0, transmat_prior=1.0, 391 | algorithm="viterbi", random_state=None, 392 | n_iter=10, tol=1e-2, verbose=False, 393 | params="ste", init_params="ste"): 394 | BaseTEBM.__init__(self, n_components, 395 | startprob_prior=startprob_prior, 396 | transmat_prior=transmat_prior, 397 | algorithm=algorithm, 398 | random_state=random_state, 399 | n_iter=n_iter, tol=tol, verbose=verbose, 400 | params=params, init_params=init_params) 401 | 402 | def _get_n_fit_scalars_per_param(self): 403 | nc = self.n_components 404 | nf = self.n_features 405 | return { 406 | "s": nc - 1, 407 | "t": nc * (nc - 1), 408 | "e": nc * (nf - 1), 409 | } 410 | 411 | def _init(self, X, lengths=None): 412 | self._check_and_set_n_features(X) 413 | super()._init(X, lengths=lengths) 414 | self.random_state = check_random_state(self.random_state) 415 | 416 | if 'e' in self.init_params: 417 | self.emissionprob_ = self.random_state \ 418 | .rand(self.n_components, self.n_features) 419 | normalize(self.emissionprob_, axis=1) 420 | 421 | def _check(self): 422 | super()._check() 423 | 424 | self.emissionprob_ = np.atleast_2d(self.emissionprob_) 425 | n_features = getattr(self, "n_features", self.emissionprob_.shape[1]) 426 | if self.emissionprob_.shape != (self.n_components, n_features): 427 | raise ValueError( 428 | "emissionprob_ must have shape (n_components, n_features)") 429 | else: 430 | self.n_features = n_features 431 | 432 | def _compute_log_likelihood(self, X): 433 | return log_mask_zero(self.emissionprob_)[:, np.concatenate(X)].T 434 | 435 | def _generate_sample_from_state(self, state, random_state=None): 436 | cdf = np.cumsum(self.emissionprob_[state, :]) 437 | random_state = check_random_state(random_state) 438 | return [(cdf > random_state.rand()).argmax()] 439 | 440 | def _initialize_sufficient_statistics(self): 441 | stats = super()._initialize_sufficient_statistics() 442 | stats['obs'] = np.zeros((self.n_components, self.n_features)) 443 | return stats 444 | 445 | def _accumulate_sufficient_statistics(self, stats, X, framelogprob, 446 | posteriors, fwdlattice, bwdlattice): 447 | super()._accumulate_sufficient_statistics( 448 | stats, X, framelogprob, posteriors, fwdlattice, bwdlattice) 449 | if 'e' in self.params: 450 | for t, symbol in enumerate(np.concatenate(X)): 451 | stats['obs'][:, symbol] += posteriors[t] 452 | 453 | def _do_mstep(self, stats): 454 | super()._do_mstep(stats) 455 | if 'e' in self.params: 456 | self.emissionprob_ = (stats['obs'] 457 | / stats['obs'].sum(axis=1)[:, np.newaxis]) 458 | 459 | def _check_and_set_n_features(self, X): 460 | """ 461 | Check if ``X`` is a sample from a Multinomial distribution, i.e. an 462 | array of non-negative integers. 463 | """ 464 | if not np.issubdtype(X.dtype, np.integer): 465 | raise ValueError("Symbols should be integers") 466 | if X.min() < 0: 467 | raise ValueError("Symbols should be nonnegative") 468 | if hasattr(self, "n_features"): 469 | if self.n_features - 1 < X.max(): 470 | raise ValueError( 471 | "Largest symbol is {} but the model only emits " 472 | "symbols up to {}" 473 | .format(X.max(), self.n_features - 1)) 474 | self.n_features = X.max() + 1 475 | 476 | 477 | class GMMCTHMM(BaseTEBM): 478 | r"""Hidden Markov Model with Gaussian mixture emissions. 479 | 480 | Parameters 481 | ---------- 482 | n_components : int 483 | Number of states in the model. 484 | 485 | n_mix : int 486 | Number of states in the GMM. 487 | 488 | covariance_type : string, optional 489 | String describing the type of covariance parameters to 490 | use. Must be one of 491 | 492 | * "spherical" --- each state uses a single variance value that 493 | applies to all features. 494 | * "diag" --- each state uses a diagonal covariance matrix. 495 | * "full" --- each state uses a full (i.e. unrestricted) 496 | covariance matrix. 497 | * "tied" --- all mixture components of each state use **the same** full 498 | covariance matrix (note that this is not the same as for 499 | `MixtureTEBM`). 500 | 501 | Defaults to "diag". 502 | 503 | min_covar : float, optional 504 | Floor on the diagonal of the covariance matrix to prevent 505 | overfitting. Defaults to 1e-3. 506 | 507 | startprob_prior : array, shape (n_components, ), optional 508 | Parameters of the Dirichlet prior distribution for 509 | :attr:`startprob_`. 510 | 511 | transmat_prior : array, shape (n_components, n_components), optional 512 | Parameters of the Dirichlet prior distribution for each row 513 | of the transition probabilities :attr:`transmat_`. 514 | 515 | weights_prior : array, shape (n_mix, ), optional 516 | Parameters of the Dirichlet prior distribution for 517 | :attr:`weights_`. 518 | 519 | means_prior, means_weight : array, shape (n_mix, ), optional 520 | Mean and precision of the Normal prior distribtion for 521 | :attr:`means_`. 522 | 523 | covars_prior, covars_weight : array, shape (n_mix, ), optional 524 | Parameters of the prior distribution for the covariance matrix 525 | :attr:`covars_`. 526 | 527 | If :attr:`covariance_type` is "spherical" or "diag" the prior is 528 | the inverse gamma distribution, otherwise --- the inverse Wishart 529 | distribution. 530 | 531 | algorithm : string, optional 532 | Decoder algorithm. Must be one of "viterbi" or "map". 533 | Defaults to "viterbi". 534 | 535 | random_state: RandomState or an int seed, optional 536 | A random number generator instance. 537 | 538 | n_iter : int, optional 539 | Maximum number of iterations to perform. 540 | 541 | tol : float, optional 542 | Convergence threshold. EM will stop if the gain in log-likelihood 543 | is below this value. 544 | 545 | verbose : bool, optional 546 | When ``True`` per-iteration convergence reports are printed 547 | to :data:`sys.stderr`. You can diagnose convergence via the 548 | :attr:`monitor_` attribute. 549 | 550 | init_params : string, optional 551 | Controls which parameters are initialized prior to training. Can 552 | contain any combination of 's' for startprob, 't' for transmat, 'm' 553 | for means, 'c' for covars, and 'w' for GMM mixing weights. 554 | Defaults to all parameters. 555 | 556 | params : string, optional 557 | Controls which parameters are updated in the training process. Can 558 | contain any combination of 's' for startprob, 't' for transmat, 'm' for 559 | means, and 'c' for covars, and 'w' for GMM mixing weights. 560 | Defaults to all parameters. 561 | 562 | Attributes 563 | ---------- 564 | monitor\_ : ConvergenceMonitor 565 | Monitor object used to check the convergence of EM. 566 | 567 | startprob\_ : array, shape (n_components, ) 568 | Initial state occupation distribution. 569 | 570 | transmat\_ : array, shape (n_components, n_components) 571 | Matrix of transition probabilities between states. 572 | 573 | weights\_ : array, shape (n_components, n_mix) 574 | Mixture weights for each state. 575 | 576 | means\_ : array, shape (n_components, n_mix) 577 | Mean parameters for each mixture component in each state. 578 | 579 | covars\_ : array 580 | Covariance parameters for each mixture components in each state. 581 | 582 | The shape depends on :attr:`covariance_type`:: 583 | 584 | (n_components, n_mix) if "spherical", 585 | (n_components, n_mix, n_features) if "diag", 586 | (n_components, n_mix, n_features, n_features) if "full" 587 | (n_components, n_features, n_features) if "tied", 588 | """ 589 | 590 | def __init__(self, n_components=1, n_mix=1, 591 | min_covar=1e-3, startprob_prior=1.0, transmat_prior=1.0, 592 | weights_prior=1.0, means_prior=0.0, means_weight=0.0, 593 | covars_prior=None, covars_weight=None, 594 | algorithm="viterbi", covariance_type="diag", 595 | random_state=None, n_iter=10, tol=1e-2, 596 | verbose=False, params="stmcw", 597 | init_params="stmcw"): 598 | BaseTEBM.__init__(self, n_components, 599 | startprob_prior=startprob_prior, 600 | transmat_prior=transmat_prior, 601 | algorithm=algorithm, random_state=random_state, 602 | n_iter=n_iter, tol=tol, verbose=verbose, 603 | params=params, init_params=init_params) 604 | self.covariance_type = covariance_type 605 | self.min_covar = min_covar 606 | self.n_mix = n_mix 607 | self.weights_prior = weights_prior 608 | self.means_prior = means_prior 609 | self.means_weight = means_weight 610 | self.covars_prior = covars_prior 611 | self.covars_weight = covars_weight 612 | 613 | def _get_n_fit_scalars_per_param(self): 614 | nc = self.n_components 615 | nf = self.n_features 616 | nm = self.n_mix 617 | return { 618 | "s": nc - 1, 619 | "t": nc * (nc - 1), 620 | "m": nc * nm * nf, 621 | "c": { 622 | "spherical": nc * nm, 623 | "diag": nc * nm * nf, 624 | "full": nc * nm * nf * (nf + 1) // 2, 625 | "tied": nc * nf * (nf + 1) // 2, 626 | }[self.covariance_type], 627 | "w": nm - 1, 628 | } 629 | 630 | def _init(self, X, lengths=None): 631 | _check_and_set_gaussian_n_features(self) 632 | super()._init(X, lengths=lengths) 633 | nc = self.n_components 634 | nf = self.n_features 635 | nm = self.n_mix 636 | 637 | # Default values for covariance prior parameters 638 | self._init_covar_priors() 639 | self._fix_priors_shape() 640 | 641 | main_kmeans = cluster.KMeans(n_clusters=nc, 642 | random_state=self.random_state) 643 | labels = main_kmeans.fit_predict(X) 644 | kmeanses = [] 645 | for label in range(nc): 646 | kmeans = cluster.KMeans(n_clusters=nm, 647 | random_state=self.random_state) 648 | kmeans.fit(X[np.where(labels == label)]) 649 | kmeanses.append(kmeans) 650 | 651 | if 'w' in self.init_params or not hasattr(self, "weights_"): 652 | self.weights_ = np.ones((nc, nm)) / (np.ones((nc, 1)) * nm) 653 | 654 | if 'm' in self.init_params or not hasattr(self, "means_"): 655 | self.means_ = np.stack( 656 | [kmeans.cluster_centers_ for kmeans in kmeanses]) 657 | 658 | if 'c' in self.init_params or not hasattr(self, "covars_"): 659 | cv = np.cov(X.T) + self.min_covar * np.eye(nf) 660 | if not cv.shape: 661 | cv.shape = (1, 1) 662 | if self.covariance_type == 'tied': 663 | self.covars_ = np.zeros((nc, nf, nf)) 664 | self.covars_[:] = cv 665 | elif self.covariance_type == 'full': 666 | self.covars_ = np.zeros((nc, nm, nf, nf)) 667 | self.covars_[:] = cv 668 | elif self.covariance_type == 'diag': 669 | self.covars_ = np.zeros((nc, nm, nf)) 670 | self.covars_[:] = np.diag(cv) 671 | elif self.covariance_type == 'spherical': 672 | self.covars_ = np.zeros((nc, nm)) 673 | self.covars_[:] = cv.mean() 674 | 675 | def _init_covar_priors(self): 676 | if self.covariance_type == "full": 677 | if self.covars_prior is None: 678 | self.covars_prior = 0.0 679 | if self.covars_weight is None: 680 | self.covars_weight = -(1.0 + self.n_features + 1.0) 681 | elif self.covariance_type == "tied": 682 | if self.covars_prior is None: 683 | self.covars_prior = 0.0 684 | if self.covars_weight is None: 685 | self.covars_weight = -(self.n_mix + self.n_features + 1.0) 686 | elif self.covariance_type == "diag": 687 | if self.covars_prior is None: 688 | self.covars_prior = -1.5 689 | if self.covars_weight is None: 690 | self.covars_weight = 0.0 691 | elif self.covariance_type == "spherical": 692 | if self.covars_prior is None: 693 | self.covars_prior = -(self.n_mix + 2.0) / 2.0 694 | if self.covars_weight is None: 695 | self.covars_weight = 0.0 696 | 697 | def _fix_priors_shape(self): 698 | nc = self.n_components 699 | nf = self.n_features 700 | nm = self.n_mix 701 | 702 | # If priors are numbers, this function will make them into a 703 | # matrix of proper shape 704 | self.weights_prior = np.broadcast_to( 705 | self.weights_prior, (nc, nm)).copy() 706 | self.means_prior = np.broadcast_to( 707 | self.means_prior, (nc, nm, nf)).copy() 708 | self.means_weight = np.broadcast_to( 709 | self.means_weight, (nc, nm)).copy() 710 | 711 | if self.covariance_type == "full": 712 | self.covars_prior = np.broadcast_to( 713 | self.covars_prior, (nc, nm, nf, nf)).copy() 714 | self.covars_weight = np.broadcast_to( 715 | self.covars_weight, (nc, nm)).copy() 716 | elif self.covariance_type == "tied": 717 | self.covars_prior = np.broadcast_to( 718 | self.covars_prior, (nc, nf, nf)).copy() 719 | self.covars_weight = np.broadcast_to( 720 | self.covars_weight, nc).copy() 721 | elif self.covariance_type == "diag": 722 | self.covars_prior = np.broadcast_to( 723 | self.covars_prior, (nc, nm, nf)).copy() 724 | self.covars_weight = np.broadcast_to( 725 | self.covars_weight, (nc, nm, nf)).copy() 726 | elif self.covariance_type == "spherical": 727 | self.covars_prior = np.broadcast_to( 728 | self.covars_prior, (nc, nm)).copy() 729 | self.covars_weight = np.broadcast_to( 730 | self.covars_weight, (nc, nm)).copy() 731 | 732 | def _check(self): 733 | super()._check() 734 | if not hasattr(self, "n_features"): 735 | self.n_features = self.means_.shape[2] 736 | nc = self.n_components 737 | nf = self.n_features 738 | nm = self.n_mix 739 | 740 | self._init_covar_priors() 741 | self._fix_priors_shape() 742 | 743 | # Checking covariance type 744 | if self.covariance_type not in COVARIANCE_TYPES: 745 | raise ValueError("covariance_type must be one of {}" 746 | .format(COVARIANCE_TYPES)) 747 | 748 | self.weights_ = np.array(self.weights_) 749 | # Checking mixture weights' shape 750 | if self.weights_.shape != (nc, nm): 751 | raise ValueError("mixture weights must have shape " 752 | "(n_components, n_mix), actual shape: {}" 753 | .format(self.weights_.shape)) 754 | 755 | # Checking mixture weights' mathematical correctness 756 | if not np.allclose(np.sum(self.weights_, axis=1), np.ones(nc)): 757 | raise ValueError("mixture weights must sum up to 1") 758 | 759 | # Checking means' shape 760 | self.means_ = np.array(self.means_) 761 | if self.means_.shape != (nc, nm, nf): 762 | raise ValueError("mixture means must have shape " 763 | "(n_components, n_mix, n_features), " 764 | "actual shape: {}".format(self.means_.shape)) 765 | 766 | # Checking covariances' shape 767 | self.covars_ = np.array(self.covars_) 768 | covars_shape = self.covars_.shape 769 | needed_shapes = { 770 | "spherical": (nc, nm), 771 | "tied": (nc, nf, nf), 772 | "diag": (nc, nm, nf), 773 | "full": (nc, nm, nf, nf), 774 | } 775 | needed_shape = needed_shapes[self.covariance_type] 776 | if covars_shape != needed_shape: 777 | raise ValueError("{!r} mixture covars must have shape {}, " 778 | "actual shape: {}" 779 | .format(self.covariance_type, 780 | needed_shape, covars_shape)) 781 | 782 | # Checking covariances' mathematical correctness 783 | from scipy import linalg 784 | 785 | if (self.covariance_type == "spherical" or 786 | self.covariance_type == "diag"): 787 | if np.any(self.covars_ < 0): 788 | raise ValueError("{!r} mixture covars must be non-negative" 789 | .format(self.covariance_type)) 790 | if np.any(self.covars_ == 0): 791 | _log.warning("Degenerate mixture covariance") 792 | elif self.covariance_type == "tied": 793 | for i, covar in enumerate(self.covars_): 794 | if not np.allclose(covar, covar.T): 795 | raise ValueError("Covariance of state #{} is not symmetric" 796 | .format(i)) 797 | min_eigvalsh = np.linalg.eigvalsh(covar).min() 798 | if min_eigvalsh < 0: 799 | raise ValueError("Covariance of state #{} is not positive " 800 | "definite".format(i)) 801 | if min_eigvalsh == 0: 802 | _log.warning("Covariance of state #%d has a null " 803 | "eigenvalue.", i) 804 | elif self.covariance_type == "full": 805 | for i, mix_covars in enumerate(self.covars_): 806 | for j, covar in enumerate(mix_covars): 807 | if not np.allclose(covar, covar.T): 808 | raise ValueError( 809 | "Covariance of state #{}, mixture #{} is not " 810 | "symmetric".format(i, j)) 811 | min_eigvalsh = np.linalg.eigvalsh(covar).min() 812 | if min_eigvalsh < 0: 813 | raise ValueError( 814 | "Covariance of state #{}, mixture #{} is not " 815 | "positive definite".format(i, j)) 816 | if min_eigvalsh == 0: 817 | _log.warning("Covariance of state #%d, mixture #%d " 818 | "has a null eigenvalue.", i, j) 819 | 820 | def _generate_sample_from_state(self, state, random_state=None): 821 | if random_state is None: 822 | random_state = self.random_state 823 | random_state = check_random_state(random_state) 824 | 825 | cur_weights = self.weights_[state] 826 | i_gauss = random_state.choice(self.n_mix, p=cur_weights) 827 | if self.covariance_type == 'tied': 828 | # self.covars_.shape == (n_components, n_features, n_features) 829 | # shouldn't that be (n_mix, ...)? 830 | covs = self.covars_ 831 | else: 832 | covs = self.covars_[:, i_gauss] 833 | covs = fill_covars(covs, self.covariance_type, 834 | self.n_components, self.n_features) 835 | return random_state.multivariate_normal( 836 | self.means_[state, i_gauss], covs[state] 837 | ) 838 | 839 | def _compute_log_weighted_gaussian_densities(self, X, i_comp): 840 | cur_means = self.means_[i_comp] 841 | cur_covs = self.covars_[i_comp] 842 | if self.covariance_type == 'spherical': 843 | cur_covs = cur_covs[:, np.newaxis] 844 | log_cur_weights = np.log(self.weights_[i_comp]) 845 | 846 | return log_multivariate_normal_density( 847 | X, cur_means, cur_covs, self.covariance_type 848 | ) + log_cur_weights 849 | 850 | def _compute_log_likelihood(self, X): 851 | n_samples, _ = X.shape 852 | res = np.zeros((n_samples, self.n_components)) 853 | 854 | for i in range(self.n_components): 855 | log_denses = self._compute_log_weighted_gaussian_densities(X, i) 856 | with np.errstate(under="ignore"): 857 | res[:, i] = logsumexp(log_denses, axis=1) 858 | 859 | return res 860 | 861 | def _initialize_sufficient_statistics(self): 862 | stats = super()._initialize_sufficient_statistics() 863 | stats['n_samples'] = 0 864 | stats['post_comp_mix'] = None 865 | stats['post_mix_sum'] = np.zeros((self.n_components, self.n_mix)) 866 | stats['post_sum'] = np.zeros(self.n_components) 867 | stats['samples'] = None 868 | stats['centered'] = None 869 | return stats 870 | 871 | def _accumulate_sufficient_statistics(self, stats, X, framelogprob, 872 | post_comp, fwdlattice, bwdlattice): 873 | 874 | # TODO: support multiple frames 875 | 876 | super()._accumulate_sufficient_statistics( 877 | stats, X, framelogprob, post_comp, fwdlattice, bwdlattice 878 | ) 879 | 880 | n_samples, _ = X.shape 881 | 882 | stats['n_samples'] = n_samples 883 | stats['samples'] = X 884 | 885 | post_mix = np.zeros((n_samples, self.n_components, self.n_mix)) 886 | for p in range(self.n_components): 887 | log_denses = self._compute_log_weighted_gaussian_densities(X, p) 888 | log_normalize(log_denses, axis=-1) 889 | with np.errstate(under="ignore"): 890 | post_mix[:, p, :] = np.exp(log_denses) 891 | 892 | with np.errstate(under="ignore"): 893 | post_comp_mix = post_comp[:, :, np.newaxis] * post_mix 894 | stats['post_comp_mix'] = post_comp_mix 895 | 896 | stats['post_mix_sum'] = np.sum(post_comp_mix, axis=0) 897 | stats['post_sum'] = np.sum(post_comp, axis=0) 898 | 899 | stats['centered'] = X[:, np.newaxis, np.newaxis, :] - self.means_ 900 | 901 | def _do_mstep(self, stats): 902 | super()._do_mstep(stats) 903 | nc = self.n_components 904 | nf = self.n_features 905 | nm = self.n_mix 906 | 907 | n_samples = stats['n_samples'] 908 | 909 | # Maximizing weights 910 | alphas_minus_one = self.weights_prior - 1 911 | new_weights_numer = stats['post_mix_sum'] + alphas_minus_one 912 | new_weights_denom = ( 913 | stats['post_sum'] + np.sum(alphas_minus_one, axis=1) 914 | )[:, np.newaxis] 915 | new_weights = new_weights_numer / new_weights_denom 916 | 917 | # Maximizing means 918 | lambdas, mus = self.means_weight, self.means_prior 919 | new_means_numer = ( 920 | np.einsum('ijk,il->jkl', stats['post_comp_mix'], stats['samples']) 921 | + lambdas[:, :, np.newaxis] * mus 922 | ) 923 | new_means_denom = (stats['post_mix_sum'] + lambdas)[:, :, np.newaxis] 924 | new_means = new_means_numer / new_means_denom 925 | 926 | # Maximizing covariances 927 | centered_means = self.means_ - mus 928 | 929 | if self.covariance_type == 'full': 930 | centered = stats['centered'].reshape((n_samples, nc, nm, nf, 1)) 931 | centered_t = stats['centered'].reshape((n_samples, nc, nm, 1, nf)) 932 | centered_dots = centered * centered_t 933 | 934 | psis_t = np.transpose(self.covars_prior, axes=(0, 1, 3, 2)) 935 | nus = self.covars_weight 936 | 937 | centr_means_resh = centered_means.reshape((nc, nm, nf, 1)) 938 | centr_means_resh_t = centered_means.reshape((nc, nm, 1, nf)) 939 | centered_means_dots = centr_means_resh * centr_means_resh_t 940 | 941 | new_cov_numer = ( 942 | np.einsum( 943 | 'ijk,ijklm->jklm', stats['post_comp_mix'], centered_dots) 944 | + psis_t 945 | + lambdas[:, :, np.newaxis, np.newaxis] * centered_means_dots 946 | ) 947 | new_cov_denom = ( 948 | stats['post_mix_sum'] + 1 + nus + nf + 1 949 | )[:, :, np.newaxis, np.newaxis] 950 | new_cov = new_cov_numer / new_cov_denom 951 | 952 | elif self.covariance_type == 'diag': 953 | centered2 = stats['centered'] ** 2 954 | centered_means2 = centered_means ** 2 955 | 956 | alphas = self.covars_prior 957 | betas = self.covars_weight 958 | 959 | new_cov_numer = ( 960 | np.einsum('ijk,ijkl->jkl', stats['post_comp_mix'], centered2) 961 | + lambdas[:, :, np.newaxis] * centered_means2 962 | + 2 * betas 963 | ) 964 | new_cov_denom = ( 965 | stats['post_mix_sum'][:, :, np.newaxis] + 1 + 2 * (alphas + 1) 966 | ) 967 | new_cov = new_cov_numer / new_cov_denom 968 | 969 | elif self.covariance_type == 'spherical': 970 | centered_norm2 = np.sum(stats['centered'] ** 2, axis=-1) 971 | 972 | alphas = self.covars_prior 973 | betas = self.covars_weight 974 | 975 | centered_means_norm2 = np.sum(centered_means ** 2, axis=-1) 976 | 977 | new_cov_numer = ( 978 | np.einsum( 979 | 'ijk,ijk->jk', stats['post_comp_mix'], centered_norm2) 980 | + lambdas * centered_means_norm2 981 | + 2 * betas 982 | ) 983 | new_cov_denom = nf * (stats['post_mix_sum'] + 1) + 2 * (alphas + 1) 984 | new_cov = new_cov_numer / new_cov_denom 985 | 986 | elif self.covariance_type == 'tied': 987 | centered = stats['centered'].reshape((n_samples, nc, nm, nf, 1)) 988 | centered_t = stats['centered'].reshape((n_samples, nc, nm, 1, nf)) 989 | centered_dots = centered * centered_t 990 | 991 | psis_t = np.transpose(self.covars_prior, axes=(0, 2, 1)) 992 | nus = self.covars_weight 993 | 994 | centr_means_resh = centered_means.reshape((nc, nm, nf, 1)) 995 | centr_means_resh_t = centered_means.reshape((nc, nm, 1, nf)) 996 | centered_means_dots = centr_means_resh * centr_means_resh_t 997 | 998 | lambdas_cmdots_prod_sum = ( 999 | np.einsum('ij,ijkl->ikl', lambdas, centered_means_dots)) 1000 | 1001 | new_cov_numer = ( 1002 | np.einsum( 1003 | 'ijk,ijklm->jlm', stats['post_comp_mix'], centered_dots) 1004 | + lambdas_cmdots_prod_sum + psis_t) 1005 | new_cov_denom = ( 1006 | stats['post_sum'] + nm + nus + nf + 1 1007 | )[:, np.newaxis, np.newaxis] 1008 | new_cov = new_cov_numer / new_cov_denom 1009 | 1010 | # Assigning new values to class members 1011 | self.weights_ = new_weights 1012 | self.means_ = new_means 1013 | self.covars_ = new_cov 1014 | -------------------------------------------------------------------------------- /lib/tebm/cthmm_var.py: -------------------------------------------------------------------------------- 1 | # Variable interval CTHMM 2 | # Author: Peter Wijeratne (p.wijeratne@pm.me) 3 | 4 | import logging 5 | 6 | import numpy as np 7 | from scipy.special import logsumexp 8 | from sklearn import cluster 9 | from sklearn.utils import check_random_state 10 | 11 | from . import _utils 12 | from .stats import log_multivariate_normal_density 13 | from .base_var import _BaseTEBM 14 | from .utils import ( 15 | fill_covars, iter_from_X_lengths, log_mask_zero, log_normalize, normalize) 16 | __all__ = ["MixtureCTHMM", "MultinomialCTHMM", "GMMCTHMM"] 17 | 18 | _log = logging.getLogger(__name__) 19 | COVARIANCE_TYPES = frozenset(("spherical", "diag", "full", "tied")) 20 | 21 | def _check_and_set_gaussian_n_features(model): 22 | _, n_features = model.X.shape 23 | if hasattr(model, "n_features") and model.n_features != n_features: 24 | raise ValueError("Unexpected number of dimensions, got {} but " 25 | "expected {}".format(n_features, model.n_features)) 26 | model.n_features = n_features 27 | 28 | 29 | class GaussianCTHMM(_BaseTEBM): 30 | r"""Hidden Markov Model with Gaussian emissions. 31 | 32 | Parameters 33 | ---------- 34 | n_components : int 35 | Number of states. 36 | 37 | covariance_type : string, optional 38 | String describing the type of covariance parameters to 39 | use. Must be one of 40 | 41 | * "spherical" --- each state uses a single variance value that 42 | applies to all features. 43 | * "diag" --- each state uses a diagonal covariance matrix. 44 | * "full" --- each state uses a full (i.e. unrestricted) 45 | covariance matrix. 46 | * "tied" --- all states use **the same** full covariance matrix. 47 | 48 | Defaults to "diag". 49 | 50 | min_covar : float, optional 51 | Floor on the diagonal of the covariance matrix to prevent 52 | overfitting. Defaults to 1e-3. 53 | 54 | startprob_prior : array, shape (n_components, ), optional 55 | Parameters of the Dirichlet prior distribution for 56 | :attr:`startprob_`. 57 | 58 | transmat_prior : array, shape (n_components, n_components), optional 59 | Parameters of the Dirichlet prior distribution for each row 60 | of the transition probabilities :attr:`transmat_`. 61 | 62 | means_prior, means_weight : array, shape (n_components, ), optional 63 | Mean and precision of the Normal prior distribtion for 64 | :attr:`means_`. 65 | 66 | covars_prior, covars_weight : array, shape (n_components, ), optional 67 | Parameters of the prior distribution for the covariance matrix 68 | :attr:`covars_`. 69 | 70 | If :attr:`covariance_type` is "spherical" or "diag" the prior is 71 | the inverse gamma distribution, otherwise --- the inverse Wishart 72 | distribution. 73 | 74 | algorithm : string, optional 75 | Decoder algorithm. Must be one of "viterbi" or`"map". 76 | Defaults to "viterbi". 77 | 78 | random_state: RandomState or an int seed, optional 79 | A random number generator instance. 80 | 81 | n_iter : int, optional 82 | Maximum number of iterations to perform. 83 | 84 | tol : float, optional 85 | Convergence threshold. EM will stop if the gain in log-likelihood 86 | is below this value. 87 | 88 | verbose : bool, optional 89 | When ``True`` per-iteration convergence reports are printed 90 | to :data:`sys.stderr`. You can diagnose convergence via the 91 | :attr:`monitor_` attribute. 92 | 93 | params : string, optional 94 | Controls which parameters are updated in the training 95 | process. Can contain any combination of 's' for startprob, 96 | 't' for transmat, 'm' for means and 'c' for covars. Defaults 97 | to all parameters. 98 | 99 | init_params : string, optional 100 | Controls which parameters are initialized prior to 101 | training. Can contain any combination of 's' for 102 | startprob, 't' for transmat, 'm' for means and 'c' for covars. 103 | Defaults to all parameters. 104 | 105 | Attributes 106 | ---------- 107 | n_features : int 108 | Dimensionality of the Gaussian emissions. 109 | 110 | monitor\_ : ConvergenceMonitor 111 | Monitor object used to check the convergence of EM. 112 | 113 | startprob\_ : array, shape (n_components, ) 114 | Initial state occupation distribution. 115 | 116 | transmat\_ : array, shape (n_components, n_components) 117 | Matrix of transition probabilities between states. 118 | 119 | means\_ : array, shape (n_components, n_features) 120 | Mean parameters for each state. 121 | 122 | covars\_ : array 123 | Covariance parameters for each state. 124 | 125 | The shape depends on :attr:`covariance_type`:: 126 | 127 | (n_components, ) if "spherical", 128 | (n_components, n_features) if "diag", 129 | (n_components, n_features, n_features) if "full" 130 | (n_features, n_features) if "tied", 131 | 132 | Examples 133 | -------- 134 | >>> from tebm.tebm import MixtureTEBM 135 | >>> MixtureTEBM(n_components=2) #doctest: +ELLIPSIS 136 | MixtureTEBM(algorithm='viterbi',... 137 | """ 138 | def __init__(self, X=None, lengths=None, jumps=None, 139 | n_components=1, startprob_prior=None, transmat_prior=None, 140 | means_prior=0, means_weight=0, covars_prior=1e-2, covars_weight=1, covariance_type='diag', min_covar=1e-3, 141 | algorithm="viterbi", random_state=None, n_iter=10, 142 | tol=1e-2, verbose=False, params="st", 143 | init_params="st", allow_nan=False): 144 | _BaseTEBM.__init__(self, X=X, lengths=lengths, jumps=jumps, 145 | n_components=n_components, startprob_prior=startprob_prior, transmat_prior=transmat_prior, 146 | algorithm=algorithm, random_state=random_state, n_iter=n_iter, 147 | tol=tol, verbose=verbose, params=params, 148 | init_params=init_params, allow_nan=allow_nan) 149 | self.covariance_type = covariance_type 150 | self.min_covar = min_covar 151 | self.means_prior = means_prior 152 | self.means_weight = means_weight 153 | self.covars_prior = covars_prior 154 | self.covars_weight = covars_weight 155 | #FIXME 156 | # self.mixtures = [] 157 | 158 | @property 159 | def covars_(self): 160 | """Return covars as a full matrix.""" 161 | return fill_covars(self._covars_, self.covariance_type, 162 | self.n_components, self.n_features) 163 | 164 | @covars_.setter 165 | def covars_(self, covars): 166 | covars = np.array(covars, copy=True) 167 | _utils._validate_covars(covars, self.covariance_type, 168 | self.n_components) 169 | self._covars_ = covars 170 | """ 171 | def _check(self): 172 | super()._check() 173 | 174 | self.means_ = np.asarray(self.means_) 175 | self.n_features = self.means_.shape[1] 176 | 177 | if self.covariance_type not in COVARIANCE_TYPES: 178 | raise ValueError('covariance_type must be one of {}' 179 | .format(COVARIANCE_TYPES)) 180 | """ 181 | def _get_n_fit_scalars_per_param(self): 182 | nc = self.n_components 183 | nf = self.n_features 184 | return { 185 | "s": nc - 1, 186 | "t": nc * (nc - 1), 187 | "m": nc * nf, 188 | "c": { 189 | "spherical": nc, 190 | "diag": nc * nf, 191 | "full": nc * nf * (nf + 1) // 2, 192 | "tied": nf * (nf + 1) // 2, 193 | }[self.covariance_type], 194 | } 195 | 196 | def _init(self, X, lengths=None): 197 | _check_and_set_gaussian_n_features(self) 198 | super()._init(X, lengths=lengths) 199 | 200 | if 'm' in self.init_params: 201 | kmeans = cluster.KMeans(n_clusters=self.n_components, 202 | random_state=self.random_state) 203 | kmeans.fit(X) 204 | self.means_ = kmeans.cluster_centers_ 205 | if 'c' in self.init_params: 206 | cv = np.cov(X.T) + self.min_covar * np.eye(X.shape[1]) 207 | if not cv.shape: 208 | cv.shape = (1, 1) 209 | self.covars_ = \ 210 | _utils.distribute_covar_matrix_to_match_covariance_type( 211 | cv, self.covariance_type, self.n_components).copy() 212 | 213 | def _compute_log_likelihood_S(self, i, j): 214 | return log_multivariate_normal_density( 215 | self.X[i:j], self.means_, self._covars_, self.covariance_type) 216 | 217 | def predict(self, X, lengths=None, jumps=None): 218 | # FIXME: is there a general way of doing this? 219 | self.X = X 220 | self.lengths = lengths 221 | self.jumps = jumps 222 | logprob, state_sequence = self.decode() 223 | return state_sequence, logprob 224 | 225 | def predict_proba(self, X, lengths=None, jumps=None): 226 | # FIXME: is there a general way of doing this? 227 | self.X = X 228 | self.lengths = lengths 229 | self.jumps = jumps 230 | n_samples = self.X.shape[0] 231 | logprob = 0 232 | posteriors = np.zeros((n_samples, self.n_components)) 233 | for i, j in iter_from_X_lengths(self.X, self.lengths): 234 | _, posteriors[i:j] = self.score_samples(i, j) 235 | return posteriors 236 | 237 | def _generate_sample_from_state(self, state, random_state=None): 238 | random_state = check_random_state(random_state) 239 | return random_state.multivariate_normal( 240 | self.means_[state], self.covars_[state] 241 | ) 242 | 243 | def _initialize_sufficient_statistics(self): 244 | stats = super()._initialize_sufficient_statistics() 245 | stats['post'] = np.zeros(self.n_components) 246 | stats['obs'] = np.zeros((self.n_components, self.n_features)) 247 | stats['obs**2'] = np.zeros((self.n_components, self.n_features)) 248 | if self.covariance_type in ('tied', 'full'): 249 | stats['obs*obs.T'] = np.zeros((self.n_components, self.n_features, 250 | self.n_features)) 251 | return stats 252 | 253 | def _accumulate_sufficient_statistics(self, stats, obs, framelogprob, posteriors, 254 | fwdlattice, bwdlattice, framejumps): 255 | super()._accumulate_sufficient_statistics( 256 | stats, obs, framelogprob, posteriors, fwdlattice, bwdlattice, framejumps) 257 | 258 | if 'm' in self.params or 'c' in self.params: 259 | stats['post'] += posteriors.sum(axis=0) 260 | stats['obs'] += np.dot(posteriors.T, obs) 261 | 262 | if 'c' in self.params: 263 | if self.covariance_type in ('spherical', 'diag'): 264 | stats['obs**2'] += np.dot(posteriors.T, obs ** 2) 265 | elif self.covariance_type in ('tied', 'full'): 266 | # posteriors: (nt, nc); obs: (nt, nf); obs: (nt, nf) 267 | # -> (nc, nf, nf) 268 | stats['obs*obs.T'] += np.einsum( 269 | 'ij,ik,il->jkl', posteriors, obs, obs) 270 | 271 | def _do_mstep(self, stats): 272 | super()._do_mstep(stats) 273 | 274 | means_prior = self.means_prior 275 | means_weight = self.means_weight 276 | 277 | # TODO: find a proper reference for estimates for different 278 | # covariance models. 279 | # Based on Huang, Acero, Hon, "Spoken Language Processing", 280 | # p. 443 - 445 281 | denom = stats['post'][:, np.newaxis] 282 | if 'm' in self.params: 283 | self.means_ = ((means_weight * means_prior + stats['obs']) 284 | / (means_weight + denom)) 285 | 286 | if 'c' in self.params: 287 | covars_prior = self.covars_prior 288 | covars_weight = self.covars_weight 289 | meandiff = self.means_ - means_prior 290 | 291 | if self.covariance_type in ('spherical', 'diag'): 292 | cv_num = (means_weight * meandiff**2 293 | + stats['obs**2'] 294 | - 2 * self.means_ * stats['obs'] 295 | + self.means_**2 * denom) 296 | cv_den = max(covars_weight - 1, 0) + denom 297 | self._covars_ = \ 298 | (covars_prior + cv_num) / np.maximum(cv_den, 1e-5) 299 | if self.covariance_type == 'spherical': 300 | self._covars_ = np.tile( 301 | self._covars_.mean(1)[:, np.newaxis], 302 | (1, self._covars_.shape[1])) 303 | elif self.covariance_type in ('tied', 'full'): 304 | cv_num = np.empty((self.n_components, self.n_features, 305 | self.n_features)) 306 | for c in range(self.n_components): 307 | obsmean = np.outer(stats['obs'][c], self.means_[c]) 308 | 309 | cv_num[c] = (means_weight * np.outer(meandiff[c], 310 | meandiff[c]) 311 | + stats['obs*obs.T'][c] 312 | - obsmean - obsmean.T 313 | + np.outer(self.means_[c], self.means_[c]) 314 | * stats['post'][c]) 315 | cvweight = max(covars_weight - self.n_features, 0) 316 | if self.covariance_type == 'tied': 317 | self._covars_ = ((covars_prior + cv_num.sum(axis=0)) / 318 | (cvweight + stats['post'].sum())) 319 | elif self.covariance_type == 'full': 320 | self._covars_ = ((covars_prior + cv_num) / 321 | (cvweight + stats['post'][:, None, None])) 322 | 323 | class MultinomialCTHMM(_BaseTEBM): 324 | r"""Hidden Markov Model with multinomial (discrete) emissions 325 | 326 | Parameters 327 | ---------- 328 | 329 | n_components : int 330 | Number of states. 331 | 332 | startprob_prior : array, shape (n_components, ), optional 333 | Parameters of the Dirichlet prior distribution for 334 | :attr:`startprob_`. 335 | 336 | transmat_prior : array, shape (n_components, n_components), optional 337 | Parameters of the Dirichlet prior distribution for each row 338 | of the transition probabilities :attr:`transmat_`. 339 | 340 | algorithm : string, optional 341 | Decoder algorithm. Must be one of "viterbi" or "map". 342 | Defaults to "viterbi". 343 | 344 | random_state: RandomState or an int seed, optional 345 | A random number generator instance. 346 | 347 | n_iter : int, optional 348 | Maximum number of iterations to perform. 349 | 350 | tol : float, optional 351 | Convergence threshold. EM will stop if the gain in log-likelihood 352 | is below this value. 353 | 354 | verbose : bool, optional 355 | When ``True`` per-iteration convergence reports are printed 356 | to :data:`sys.stderr`. You can diagnose convergence via the 357 | :attr:`monitor_` attribute. 358 | 359 | params : string, optional 360 | Controls which parameters are updated in the training 361 | process. Can contain any combination of 's' for startprob, 362 | 't' for transmat, 'e' for emissionprob. 363 | Defaults to all parameters. 364 | 365 | init_params : string, optional 366 | Controls which parameters are initialized prior to 367 | training. Can contain any combination of 's' for 368 | startprob, 't' for transmat, 'e' for emissionprob. 369 | Defaults to all parameters. 370 | 371 | Attributes 372 | ---------- 373 | n_features : int 374 | Number of possible symbols emitted by the model (in the samples). 375 | 376 | monitor\_ : ConvergenceMonitor 377 | Monitor object used to check the convergence of EM. 378 | 379 | startprob\_ : array, shape (n_components, ) 380 | Initial state occupation distribution. 381 | 382 | transmat\_ : array, shape (n_components, n_components) 383 | Matrix of transition probabilities between states. 384 | 385 | emissionprob\_ : array, shape (n_components, n_features) 386 | Probability of emitting a given symbol when in each state. 387 | 388 | Examples 389 | -------- 390 | >>> from tebm.tebm import MultinomialTEBM 391 | >>> MultinomialTEBM(n_components=2) #doctest: +ELLIPSIS 392 | MultinomialTEBM(algorithm='viterbi',... 393 | """ 394 | # TODO: accept the prior on emissionprob_ for consistency. 395 | def __init__(self, n_components=1, 396 | startprob_prior=1.0, transmat_prior=1.0, 397 | algorithm="viterbi", random_state=None, 398 | n_iter=10, tol=1e-2, verbose=False, 399 | params="ste", init_params="ste"): 400 | _BaseTEBM.__init__(self, n_components, 401 | startprob_prior=startprob_prior, 402 | transmat_prior=transmat_prior, 403 | algorithm=algorithm, 404 | random_state=random_state, 405 | n_iter=n_iter, tol=tol, verbose=verbose, 406 | params=params, init_params=init_params) 407 | 408 | def _get_n_fit_scalars_per_param(self): 409 | nc = self.n_components 410 | nf = self.n_features 411 | return { 412 | "s": nc - 1, 413 | "t": nc * (nc - 1), 414 | "e": nc * (nf - 1), 415 | } 416 | 417 | def _init(self, X, lengths=None): 418 | self._check_and_set_n_features(X) 419 | super()._init(X, lengths=lengths) 420 | self.random_state = check_random_state(self.random_state) 421 | 422 | if 'e' in self.init_params: 423 | self.emissionprob_ = self.random_state \ 424 | .rand(self.n_components, self.n_features) 425 | normalize(self.emissionprob_, axis=1) 426 | 427 | def _check(self): 428 | super()._check() 429 | 430 | self.emissionprob_ = np.atleast_2d(self.emissionprob_) 431 | n_features = getattr(self, "n_features", self.emissionprob_.shape[1]) 432 | if self.emissionprob_.shape != (self.n_components, n_features): 433 | raise ValueError( 434 | "emissionprob_ must have shape (n_components, n_features)") 435 | else: 436 | self.n_features = n_features 437 | 438 | def _compute_log_likelihood(self, X): 439 | return log_mask_zero(self.emissionprob_)[:, np.concatenate(X)].T 440 | 441 | def _generate_sample_from_state(self, state, random_state=None): 442 | cdf = np.cumsum(self.emissionprob_[state, :]) 443 | random_state = check_random_state(random_state) 444 | return [(cdf > random_state.rand()).argmax()] 445 | 446 | def _initialize_sufficient_statistics(self): 447 | stats = super()._initialize_sufficient_statistics() 448 | stats['obs'] = np.zeros((self.n_components, self.n_features)) 449 | return stats 450 | 451 | def _accumulate_sufficient_statistics(self, stats, X, framelogprob, 452 | posteriors, fwdlattice, bwdlattice): 453 | super()._accumulate_sufficient_statistics( 454 | stats, X, framelogprob, posteriors, fwdlattice, bwdlattice) 455 | if 'e' in self.params: 456 | for t, symbol in enumerate(np.concatenate(X)): 457 | stats['obs'][:, symbol] += posteriors[t] 458 | 459 | def _do_mstep(self, stats): 460 | super()._do_mstep(stats) 461 | if 'e' in self.params: 462 | self.emissionprob_ = (stats['obs'] 463 | / stats['obs'].sum(axis=1)[:, np.newaxis]) 464 | 465 | def _check_and_set_n_features(self, X): 466 | """ 467 | Check if ``X`` is a sample from a Multinomial distribution, i.e. an 468 | array of non-negative integers. 469 | """ 470 | if not np.issubdtype(X.dtype, np.integer): 471 | raise ValueError("Symbols should be integers") 472 | if X.min() < 0: 473 | raise ValueError("Symbols should be nonnegative") 474 | if hasattr(self, "n_features"): 475 | if self.n_features - 1 < X.max(): 476 | raise ValueError( 477 | "Largest symbol is {} but the model only emits " 478 | "symbols up to {}" 479 | .format(X.max(), self.n_features - 1)) 480 | self.n_features = X.max() + 1 481 | 482 | 483 | class GMMCTHMM(_BaseTEBM): 484 | r"""Hidden Markov Model with Gaussian mixture emissions. 485 | 486 | Parameters 487 | ---------- 488 | n_components : int 489 | Number of states in the model. 490 | 491 | n_mix : int 492 | Number of states in the GMM. 493 | 494 | covariance_type : string, optional 495 | String describing the type of covariance parameters to 496 | use. Must be one of 497 | 498 | * "spherical" --- each state uses a single variance value that 499 | applies to all features. 500 | * "diag" --- each state uses a diagonal covariance matrix. 501 | * "full" --- each state uses a full (i.e. unrestricted) 502 | covariance matrix. 503 | * "tied" --- all mixture components of each state use **the same** full 504 | covariance matrix (note that this is not the same as for 505 | `MixtureTEBM`). 506 | 507 | Defaults to "diag". 508 | 509 | min_covar : float, optional 510 | Floor on the diagonal of the covariance matrix to prevent 511 | overfitting. Defaults to 1e-3. 512 | 513 | startprob_prior : array, shape (n_components, ), optional 514 | Parameters of the Dirichlet prior distribution for 515 | :attr:`startprob_`. 516 | 517 | transmat_prior : array, shape (n_components, n_components), optional 518 | Parameters of the Dirichlet prior distribution for each row 519 | of the transition probabilities :attr:`transmat_`. 520 | 521 | weights_prior : array, shape (n_mix, ), optional 522 | Parameters of the Dirichlet prior distribution for 523 | :attr:`weights_`. 524 | 525 | means_prior, means_weight : array, shape (n_mix, ), optional 526 | Mean and precision of the Normal prior distribtion for 527 | :attr:`means_`. 528 | 529 | covars_prior, covars_weight : array, shape (n_mix, ), optional 530 | Parameters of the prior distribution for the covariance matrix 531 | :attr:`covars_`. 532 | 533 | If :attr:`covariance_type` is "spherical" or "diag" the prior is 534 | the inverse gamma distribution, otherwise --- the inverse Wishart 535 | distribution. 536 | 537 | algorithm : string, optional 538 | Decoder algorithm. Must be one of "viterbi" or "map". 539 | Defaults to "viterbi". 540 | 541 | random_state: RandomState or an int seed, optional 542 | A random number generator instance. 543 | 544 | n_iter : int, optional 545 | Maximum number of iterations to perform. 546 | 547 | tol : float, optional 548 | Convergence threshold. EM will stop if the gain in log-likelihood 549 | is below this value. 550 | 551 | verbose : bool, optional 552 | When ``True`` per-iteration convergence reports are printed 553 | to :data:`sys.stderr`. You can diagnose convergence via the 554 | :attr:`monitor_` attribute. 555 | 556 | init_params : string, optional 557 | Controls which parameters are initialized prior to training. Can 558 | contain any combination of 's' for startprob, 't' for transmat, 'm' 559 | for means, 'c' for covars, and 'w' for GMM mixing weights. 560 | Defaults to all parameters. 561 | 562 | params : string, optional 563 | Controls which parameters are updated in the training process. Can 564 | contain any combination of 's' for startprob, 't' for transmat, 'm' for 565 | means, and 'c' for covars, and 'w' for GMM mixing weights. 566 | Defaults to all parameters. 567 | 568 | Attributes 569 | ---------- 570 | monitor\_ : ConvergenceMonitor 571 | Monitor object used to check the convergence of EM. 572 | 573 | startprob\_ : array, shape (n_components, ) 574 | Initial state occupation distribution. 575 | 576 | transmat\_ : array, shape (n_components, n_components) 577 | Matrix of transition probabilities between states. 578 | 579 | weights\_ : array, shape (n_components, n_mix) 580 | Mixture weights for each state. 581 | 582 | means\_ : array, shape (n_components, n_mix) 583 | Mean parameters for each mixture component in each state. 584 | 585 | covars\_ : array 586 | Covariance parameters for each mixture components in each state. 587 | 588 | The shape depends on :attr:`covariance_type`:: 589 | 590 | (n_components, n_mix) if "spherical", 591 | (n_components, n_mix, n_features) if "diag", 592 | (n_components, n_mix, n_features, n_features) if "full" 593 | (n_components, n_features, n_features) if "tied", 594 | """ 595 | 596 | def __init__(self, n_components=1, n_mix=1, 597 | min_covar=1e-3, startprob_prior=1.0, transmat_prior=1.0, 598 | weights_prior=1.0, means_prior=0.0, means_weight=0.0, 599 | covars_prior=None, covars_weight=None, 600 | algorithm="viterbi", covariance_type="diag", 601 | random_state=None, n_iter=10, tol=1e-2, 602 | verbose=False, params="stmcw", 603 | init_params="stmcw"): 604 | _BaseTEBM.__init__(self, n_components, 605 | startprob_prior=startprob_prior, 606 | transmat_prior=transmat_prior, 607 | algorithm=algorithm, random_state=random_state, 608 | n_iter=n_iter, tol=tol, verbose=verbose, 609 | params=params, init_params=init_params) 610 | self.covariance_type = covariance_type 611 | self.min_covar = min_covar 612 | self.n_mix = n_mix 613 | self.weights_prior = weights_prior 614 | self.means_prior = means_prior 615 | self.means_weight = means_weight 616 | self.covars_prior = covars_prior 617 | self.covars_weight = covars_weight 618 | 619 | def _get_n_fit_scalars_per_param(self): 620 | nc = self.n_components 621 | nf = self.n_features 622 | nm = self.n_mix 623 | return { 624 | "s": nc - 1, 625 | "t": nc * (nc - 1), 626 | "m": nc * nm * nf, 627 | "c": { 628 | "spherical": nc * nm, 629 | "diag": nc * nm * nf, 630 | "full": nc * nm * nf * (nf + 1) // 2, 631 | "tied": nc * nf * (nf + 1) // 2, 632 | }[self.covariance_type], 633 | "w": nm - 1, 634 | } 635 | 636 | def _init(self, X, lengths=None): 637 | _check_and_set_gaussian_n_features(self) 638 | super()._init(X, lengths=lengths) 639 | nc = self.n_components 640 | nf = self.n_features 641 | nm = self.n_mix 642 | 643 | # Default values for covariance prior parameters 644 | self._init_covar_priors() 645 | self._fix_priors_shape() 646 | 647 | main_kmeans = cluster.KMeans(n_clusters=nc, 648 | random_state=self.random_state) 649 | labels = main_kmeans.fit_predict(X) 650 | kmeanses = [] 651 | for label in range(nc): 652 | kmeans = cluster.KMeans(n_clusters=nm, 653 | random_state=self.random_state) 654 | kmeans.fit(X[np.where(labels == label)]) 655 | kmeanses.append(kmeans) 656 | 657 | if 'w' in self.init_params or not hasattr(self, "weights_"): 658 | self.weights_ = np.ones((nc, nm)) / (np.ones((nc, 1)) * nm) 659 | 660 | if 'm' in self.init_params or not hasattr(self, "means_"): 661 | self.means_ = np.stack( 662 | [kmeans.cluster_centers_ for kmeans in kmeanses]) 663 | 664 | if 'c' in self.init_params or not hasattr(self, "covars_"): 665 | cv = np.cov(X.T) + self.min_covar * np.eye(nf) 666 | if not cv.shape: 667 | cv.shape = (1, 1) 668 | if self.covariance_type == 'tied': 669 | self.covars_ = np.zeros((nc, nf, nf)) 670 | self.covars_[:] = cv 671 | elif self.covariance_type == 'full': 672 | self.covars_ = np.zeros((nc, nm, nf, nf)) 673 | self.covars_[:] = cv 674 | elif self.covariance_type == 'diag': 675 | self.covars_ = np.zeros((nc, nm, nf)) 676 | self.covars_[:] = np.diag(cv) 677 | elif self.covariance_type == 'spherical': 678 | self.covars_ = np.zeros((nc, nm)) 679 | self.covars_[:] = cv.mean() 680 | 681 | def _init_covar_priors(self): 682 | if self.covariance_type == "full": 683 | if self.covars_prior is None: 684 | self.covars_prior = 0.0 685 | if self.covars_weight is None: 686 | self.covars_weight = -(1.0 + self.n_features + 1.0) 687 | elif self.covariance_type == "tied": 688 | if self.covars_prior is None: 689 | self.covars_prior = 0.0 690 | if self.covars_weight is None: 691 | self.covars_weight = -(self.n_mix + self.n_features + 1.0) 692 | elif self.covariance_type == "diag": 693 | if self.covars_prior is None: 694 | self.covars_prior = -1.5 695 | if self.covars_weight is None: 696 | self.covars_weight = 0.0 697 | elif self.covariance_type == "spherical": 698 | if self.covars_prior is None: 699 | self.covars_prior = -(self.n_mix + 2.0) / 2.0 700 | if self.covars_weight is None: 701 | self.covars_weight = 0.0 702 | 703 | def _fix_priors_shape(self): 704 | nc = self.n_components 705 | nf = self.n_features 706 | nm = self.n_mix 707 | 708 | # If priors are numbers, this function will make them into a 709 | # matrix of proper shape 710 | self.weights_prior = np.broadcast_to( 711 | self.weights_prior, (nc, nm)).copy() 712 | self.means_prior = np.broadcast_to( 713 | self.means_prior, (nc, nm, nf)).copy() 714 | self.means_weight = np.broadcast_to( 715 | self.means_weight, (nc, nm)).copy() 716 | 717 | if self.covariance_type == "full": 718 | self.covars_prior = np.broadcast_to( 719 | self.covars_prior, (nc, nm, nf, nf)).copy() 720 | self.covars_weight = np.broadcast_to( 721 | self.covars_weight, (nc, nm)).copy() 722 | elif self.covariance_type == "tied": 723 | self.covars_prior = np.broadcast_to( 724 | self.covars_prior, (nc, nf, nf)).copy() 725 | self.covars_weight = np.broadcast_to( 726 | self.covars_weight, nc).copy() 727 | elif self.covariance_type == "diag": 728 | self.covars_prior = np.broadcast_to( 729 | self.covars_prior, (nc, nm, nf)).copy() 730 | self.covars_weight = np.broadcast_to( 731 | self.covars_weight, (nc, nm, nf)).copy() 732 | elif self.covariance_type == "spherical": 733 | self.covars_prior = np.broadcast_to( 734 | self.covars_prior, (nc, nm)).copy() 735 | self.covars_weight = np.broadcast_to( 736 | self.covars_weight, (nc, nm)).copy() 737 | 738 | def _check(self): 739 | super()._check() 740 | if not hasattr(self, "n_features"): 741 | self.n_features = self.means_.shape[2] 742 | nc = self.n_components 743 | nf = self.n_features 744 | nm = self.n_mix 745 | 746 | self._init_covar_priors() 747 | self._fix_priors_shape() 748 | 749 | # Checking covariance type 750 | if self.covariance_type not in COVARIANCE_TYPES: 751 | raise ValueError("covariance_type must be one of {}" 752 | .format(COVARIANCE_TYPES)) 753 | 754 | self.weights_ = np.array(self.weights_) 755 | # Checking mixture weights' shape 756 | if self.weights_.shape != (nc, nm): 757 | raise ValueError("mixture weights must have shape " 758 | "(n_components, n_mix), actual shape: {}" 759 | .format(self.weights_.shape)) 760 | 761 | # Checking mixture weights' mathematical correctness 762 | if not np.allclose(np.sum(self.weights_, axis=1), np.ones(nc)): 763 | raise ValueError("mixture weights must sum up to 1") 764 | 765 | # Checking means' shape 766 | self.means_ = np.array(self.means_) 767 | if self.means_.shape != (nc, nm, nf): 768 | raise ValueError("mixture means must have shape " 769 | "(n_components, n_mix, n_features), " 770 | "actual shape: {}".format(self.means_.shape)) 771 | 772 | # Checking covariances' shape 773 | self.covars_ = np.array(self.covars_) 774 | covars_shape = self.covars_.shape 775 | needed_shapes = { 776 | "spherical": (nc, nm), 777 | "tied": (nc, nf, nf), 778 | "diag": (nc, nm, nf), 779 | "full": (nc, nm, nf, nf), 780 | } 781 | needed_shape = needed_shapes[self.covariance_type] 782 | if covars_shape != needed_shape: 783 | raise ValueError("{!r} mixture covars must have shape {}, " 784 | "actual shape: {}" 785 | .format(self.covariance_type, 786 | needed_shape, covars_shape)) 787 | 788 | # Checking covariances' mathematical correctness 789 | from scipy import linalg 790 | 791 | if (self.covariance_type == "spherical" or 792 | self.covariance_type == "diag"): 793 | if np.any(self.covars_ < 0): 794 | raise ValueError("{!r} mixture covars must be non-negative" 795 | .format(self.covariance_type)) 796 | if np.any(self.covars_ == 0): 797 | _log.warning("Degenerate mixture covariance") 798 | elif self.covariance_type == "tied": 799 | for i, covar in enumerate(self.covars_): 800 | if not np.allclose(covar, covar.T): 801 | raise ValueError("Covariance of state #{} is not symmetric" 802 | .format(i)) 803 | min_eigvalsh = np.linalg.eigvalsh(covar).min() 804 | if min_eigvalsh < 0: 805 | raise ValueError("Covariance of state #{} is not positive " 806 | "definite".format(i)) 807 | if min_eigvalsh == 0: 808 | _log.warning("Covariance of state #%d has a null " 809 | "eigenvalue.", i) 810 | elif self.covariance_type == "full": 811 | for i, mix_covars in enumerate(self.covars_): 812 | for j, covar in enumerate(mix_covars): 813 | if not np.allclose(covar, covar.T): 814 | raise ValueError( 815 | "Covariance of state #{}, mixture #{} is not " 816 | "symmetric".format(i, j)) 817 | min_eigvalsh = np.linalg.eigvalsh(covar).min() 818 | if min_eigvalsh < 0: 819 | raise ValueError( 820 | "Covariance of state #{}, mixture #{} is not " 821 | "positive definite".format(i, j)) 822 | if min_eigvalsh == 0: 823 | _log.warning("Covariance of state #%d, mixture #%d " 824 | "has a null eigenvalue.", i, j) 825 | 826 | def _generate_sample_from_state(self, state, random_state=None): 827 | if random_state is None: 828 | random_state = self.random_state 829 | random_state = check_random_state(random_state) 830 | 831 | cur_weights = self.weights_[state] 832 | i_gauss = random_state.choice(self.n_mix, p=cur_weights) 833 | if self.covariance_type == 'tied': 834 | # self.covars_.shape == (n_components, n_features, n_features) 835 | # shouldn't that be (n_mix, ...)? 836 | covs = self.covars_ 837 | else: 838 | covs = self.covars_[:, i_gauss] 839 | covs = fill_covars(covs, self.covariance_type, 840 | self.n_components, self.n_features) 841 | return random_state.multivariate_normal( 842 | self.means_[state, i_gauss], covs[state] 843 | ) 844 | 845 | def _compute_log_weighted_gaussian_densities(self, X, i_comp): 846 | cur_means = self.means_[i_comp] 847 | cur_covs = self.covars_[i_comp] 848 | if self.covariance_type == 'spherical': 849 | cur_covs = cur_covs[:, np.newaxis] 850 | log_cur_weights = np.log(self.weights_[i_comp]) 851 | 852 | return log_multivariate_normal_density( 853 | X, cur_means, cur_covs, self.covariance_type 854 | ) + log_cur_weights 855 | 856 | def _compute_log_likelihood(self, X): 857 | n_samples, _ = X.shape 858 | res = np.zeros((n_samples, self.n_components)) 859 | 860 | for i in range(self.n_components): 861 | log_denses = self._compute_log_weighted_gaussian_densities(X, i) 862 | with np.errstate(under="ignore"): 863 | res[:, i] = logsumexp(log_denses, axis=1) 864 | 865 | return res 866 | 867 | def _initialize_sufficient_statistics(self): 868 | stats = super()._initialize_sufficient_statistics() 869 | stats['n_samples'] = 0 870 | stats['post_comp_mix'] = None 871 | stats['post_mix_sum'] = np.zeros((self.n_components, self.n_mix)) 872 | stats['post_sum'] = np.zeros(self.n_components) 873 | stats['samples'] = None 874 | stats['centered'] = None 875 | return stats 876 | 877 | def _accumulate_sufficient_statistics(self, stats, X, framelogprob, 878 | post_comp, fwdlattice, bwdlattice): 879 | 880 | # TODO: support multiple frames 881 | 882 | super()._accumulate_sufficient_statistics( 883 | stats, X, framelogprob, post_comp, fwdlattice, bwdlattice 884 | ) 885 | 886 | n_samples, _ = X.shape 887 | 888 | stats['n_samples'] = n_samples 889 | stats['samples'] = X 890 | 891 | post_mix = np.zeros((n_samples, self.n_components, self.n_mix)) 892 | for p in range(self.n_components): 893 | log_denses = self._compute_log_weighted_gaussian_densities(X, p) 894 | log_normalize(log_denses, axis=-1) 895 | with np.errstate(under="ignore"): 896 | post_mix[:, p, :] = np.exp(log_denses) 897 | 898 | with np.errstate(under="ignore"): 899 | post_comp_mix = post_comp[:, :, np.newaxis] * post_mix 900 | stats['post_comp_mix'] = post_comp_mix 901 | 902 | stats['post_mix_sum'] = np.sum(post_comp_mix, axis=0) 903 | stats['post_sum'] = np.sum(post_comp, axis=0) 904 | 905 | stats['centered'] = X[:, np.newaxis, np.newaxis, :] - self.means_ 906 | 907 | def _do_mstep(self, stats): 908 | super()._do_mstep(stats) 909 | nc = self.n_components 910 | nf = self.n_features 911 | nm = self.n_mix 912 | 913 | n_samples = stats['n_samples'] 914 | 915 | # Maximizing weights 916 | alphas_minus_one = self.weights_prior - 1 917 | new_weights_numer = stats['post_mix_sum'] + alphas_minus_one 918 | new_weights_denom = ( 919 | stats['post_sum'] + np.sum(alphas_minus_one, axis=1) 920 | )[:, np.newaxis] 921 | new_weights = new_weights_numer / new_weights_denom 922 | 923 | # Maximizing means 924 | lambdas, mus = self.means_weight, self.means_prior 925 | new_means_numer = ( 926 | np.einsum('ijk,il->jkl', stats['post_comp_mix'], stats['samples']) 927 | + lambdas[:, :, np.newaxis] * mus 928 | ) 929 | new_means_denom = (stats['post_mix_sum'] + lambdas)[:, :, np.newaxis] 930 | new_means = new_means_numer / new_means_denom 931 | 932 | # Maximizing covariances 933 | centered_means = self.means_ - mus 934 | 935 | if self.covariance_type == 'full': 936 | centered = stats['centered'].reshape((n_samples, nc, nm, nf, 1)) 937 | centered_t = stats['centered'].reshape((n_samples, nc, nm, 1, nf)) 938 | centered_dots = centered * centered_t 939 | 940 | psis_t = np.transpose(self.covars_prior, axes=(0, 1, 3, 2)) 941 | nus = self.covars_weight 942 | 943 | centr_means_resh = centered_means.reshape((nc, nm, nf, 1)) 944 | centr_means_resh_t = centered_means.reshape((nc, nm, 1, nf)) 945 | centered_means_dots = centr_means_resh * centr_means_resh_t 946 | 947 | new_cov_numer = ( 948 | np.einsum( 949 | 'ijk,ijklm->jklm', stats['post_comp_mix'], centered_dots) 950 | + psis_t 951 | + lambdas[:, :, np.newaxis, np.newaxis] * centered_means_dots 952 | ) 953 | new_cov_denom = ( 954 | stats['post_mix_sum'] + 1 + nus + nf + 1 955 | )[:, :, np.newaxis, np.newaxis] 956 | new_cov = new_cov_numer / new_cov_denom 957 | 958 | elif self.covariance_type == 'diag': 959 | centered2 = stats['centered'] ** 2 960 | centered_means2 = centered_means ** 2 961 | 962 | alphas = self.covars_prior 963 | betas = self.covars_weight 964 | 965 | new_cov_numer = ( 966 | np.einsum('ijk,ijkl->jkl', stats['post_comp_mix'], centered2) 967 | + lambdas[:, :, np.newaxis] * centered_means2 968 | + 2 * betas 969 | ) 970 | new_cov_denom = ( 971 | stats['post_mix_sum'][:, :, np.newaxis] + 1 + 2 * (alphas + 1) 972 | ) 973 | new_cov = new_cov_numer / new_cov_denom 974 | 975 | elif self.covariance_type == 'spherical': 976 | centered_norm2 = np.sum(stats['centered'] ** 2, axis=-1) 977 | 978 | alphas = self.covars_prior 979 | betas = self.covars_weight 980 | 981 | centered_means_norm2 = np.sum(centered_means ** 2, axis=-1) 982 | 983 | new_cov_numer = ( 984 | np.einsum( 985 | 'ijk,ijk->jk', stats['post_comp_mix'], centered_norm2) 986 | + lambdas * centered_means_norm2 987 | + 2 * betas 988 | ) 989 | new_cov_denom = nf * (stats['post_mix_sum'] + 1) + 2 * (alphas + 1) 990 | new_cov = new_cov_numer / new_cov_denom 991 | 992 | elif self.covariance_type == 'tied': 993 | centered = stats['centered'].reshape((n_samples, nc, nm, nf, 1)) 994 | centered_t = stats['centered'].reshape((n_samples, nc, nm, 1, nf)) 995 | centered_dots = centered * centered_t 996 | 997 | psis_t = np.transpose(self.covars_prior, axes=(0, 2, 1)) 998 | nus = self.covars_weight 999 | 1000 | centr_means_resh = centered_means.reshape((nc, nm, nf, 1)) 1001 | centr_means_resh_t = centered_means.reshape((nc, nm, 1, nf)) 1002 | centered_means_dots = centr_means_resh * centr_means_resh_t 1003 | 1004 | lambdas_cmdots_prod_sum = ( 1005 | np.einsum('ij,ijkl->ikl', lambdas, centered_means_dots)) 1006 | 1007 | new_cov_numer = ( 1008 | np.einsum( 1009 | 'ijk,ijklm->jlm', stats['post_comp_mix'], centered_dots) 1010 | + lambdas_cmdots_prod_sum + psis_t) 1011 | new_cov_denom = ( 1012 | stats['post_sum'] + nm + nus + nf + 1 1013 | )[:, np.newaxis, np.newaxis] 1014 | new_cov = new_cov_numer / new_cov_denom 1015 | 1016 | # Assigning new values to class members 1017 | self.weights_ = new_weights 1018 | self.means_ = new_means 1019 | self.covars_ = new_cov 1020 | --------------------------------------------------------------------------------