├── .gitignore ├── .travis.yml ├── LICENSE.txt ├── README.md ├── SimpleHOHMM ├── __init__.py ├── builder.py ├── model.py ├── package_info.json └── utility.py ├── docs ├── Makefile ├── make.bat └── source │ ├── api_reference.rst │ ├── conf.py │ ├── getting_started.rst │ ├── index.rst │ ├── license.rst │ ├── references.rst │ └── tutorials.rst ├── requirements.txt ├── setup.py └── test ├── __init__.py ├── test_builder.py ├── test_hmm.py └── test_utility.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | 3 | language: python 4 | python: 5 | - "2.7" 6 | - "3.4" 7 | - "3.5" 8 | - "pypy" 9 | - "pypy3" 10 | 11 | branches: 12 | only: 13 | - master 14 | 15 | install: 16 | - pip install coveralls 17 | 18 | script: 19 | - coverage run -m unittest discover -s test 20 | 21 | after_success: 22 | - coveralls 23 | 24 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Jacob Krantz 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Simple-HOHMM 2 | 3 | [![Build Status](https://travis-ci.org/jacobkrantz/Simple-HOHMM.svg?branch=master)](https://travis-ci.org/jacobkrantz/Simple-HOHMM) 4 | [![Coverage Status](https://coveralls.io/repos/github/jacobkrantz/Simple-HOHMM/badge.svg?branch=master)](https://coveralls.io/github/jacobkrantz/Simple-HOHMM?branch=master) 5 | [![Documentation Status](https://readthedocs.org/projects/simple-hohmm/badge/?version=latest)](http://simple-hohmm.readthedocs.io/en/latest/?badge=latest) 6 | 7 | Simple-HOHMM is an end-to-end sequence classifier using Hidden Markov Models. Let the builder construct a model for you based on chosen model attributes. Now you can solve the classic problems of HMMs: evaluating, decoding, and learning. Play with different orders of history to maximize the accuracy of your model! 8 | 9 | ## General 10 | 11 | #### Solving Fundamental Problems 12 | * **Evaluation** 13 | Given an observation sequence and an HMM, determine the probability that the HMM would emit that exact observation sequence. Done with the *Forward Algorithm*. 14 | * **Decoding** 15 | Given an observation sequence and an HMM, determine the most likely hidden state sequence that would emit the observation sequence. Done with the *Viterbi Algorithm*. 16 | * **Learning** 17 | Given a set of observation sequences and an HMM, reestimate the model parameters so as to maximize the probabilities resulting from the Evaluation problem. Done with the *Baum Welch EM Algorithm*. 18 | 19 | #### Features 20 | * Learning is done in any manner desired: **supervised**, **semi-supervised**, or **unsupervised**. Supervised is done with training examples of explicit counts. Semi-supervised is generated with some examples followed by a learning algorithm. Unsupervised is done by creating a model of either uniformly or randomly distributed parameters followed by a learning algorithm. 21 | * Discrete (Multinomial) emissions only. 22 | * Ergotic state transitions are assumed by the model, but setting certain probabilities to zero effectively emulates unreachable states. 23 | * Smoothing of model parameters is done with additive k-smoothing to avoid cases of zero probability, especially useful for higher order modeling. 24 | * `HiddenMarkovModel` can be trained using `HiddenMarkovModelBuilder` or by passing in explicit HMM parameter values. 25 | 26 | ## Getting Started 27 | 28 | #### Requirements 29 | This project is currently written in pure python code with zero dependencies for installation. Code has been tested and runs with Python 2, Python 3, and [pypy](https://pypy.org/). Running with pypy offers drastic speed improvements, consider this when working with large models. 30 | 31 | #### Installing Simple-HOHMM 32 | No distribution exists on PyPI yet. To use the code now, you can install it directly from the repository: 33 | `>>> pip install git+https://github.com/jacobkrantz/Simple-HOHMM.git` 34 | Take a look at the documentation to view all methods of installation. 35 | 36 | #### Documentation 37 | [Documentation](http://simple-hohmm.readthedocs.io/en/latest/?badge=latest) consisting of API reference and basic tutorials is live but the API reference not been developed yet. The tutorials there should get you up and running in the general use cases. 38 | 39 | ## Contributions 40 | Contributions are welcome. We have not hashed out exactly what that will look like yet. For now, feel free to fork the repository and dive in as you see fit, whether that is making/improving documentation, tutorials, test cases, issues, or source code. Contributors should have all dependencies installed from `requirements.txt`. This can be done using: 41 | `>>> pip install -r requirements.txt` 42 | 43 | #### Testing 44 | Run the unit tests before opening a pull request to ensure the build does not break. 45 | * Testing is done through the Python module `unittest`. 46 | * Automated testing is performed by Travis CI. 47 | * All test cases are located in `/test`. 48 | 49 | To run the entire suite of tests locally, execute: 50 | `>>> python -m unittest discover -s test` 51 | alternatively: 52 | `>>> python setup.py test` 53 | 54 | #### Documentation 55 | Docs are built using Sphinx and hosted using ReadTheDocs. You can edit the docs by updating the `.rst` files in the `/docs` folder. 56 | Make the documentation: 57 | ``` 58 | >>> cd docs 59 | >>> make html 60 | ``` 61 | View in browser: 62 | `>>> xdg-open build/html/index.html` (if using linux) 63 | `>>> open build/html/index.html` (if mac) 64 | 65 | #### Viewing Code Coverage 66 | View code coverage before opening a pull request to ensure coverage is maintained or improved. 67 | run the unit tests using coverage: 68 | `>>> coverage run -m unittest discover -s test` 69 | View the coverage report: 70 | `>>> coverage report -m` 71 | -------------------------------------------------------------------------------- /SimpleHOHMM/__init__.py: -------------------------------------------------------------------------------- 1 | import json 2 | from os.path import dirname 3 | 4 | from .builder import HiddenMarkovModelBuilder 5 | from .model import HiddenMarkovModel 6 | 7 | with open(dirname(__file__) + '/package_info.json') as f: 8 | _info = json.load(f) 9 | 10 | __version__ = str(_info["version"]) 11 | __author__ = str(_info["author"]) 12 | __contact__ = str(_info["author_email"]) 13 | -------------------------------------------------------------------------------- /SimpleHOHMM/builder.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from itertools import product 3 | import random as ran 4 | 5 | from .model import HiddenMarkovModel as HMM 6 | from .utility import init_matrix, init_matrix_uniform, init_matrix_random 7 | 8 | class HiddenMarkovModelBuilder: 9 | 10 | def __init__(self): 11 | self._obs_sequences = list() 12 | self._state_sequences = list() 13 | self._single_states = None 14 | self._all_obs = None 15 | 16 | def add_training_example(self, o, s): 17 | """ 18 | Adds a single training example to the model builder. 19 | Args: 20 | o (list): Observation sequence 21 | s (list): Hidden state sequence 22 | """ 23 | self._obs_sequences.append(o) 24 | self._state_sequences.append(s) 25 | 26 | def add_batch_training_examples(self, o_lst, s_lst): 27 | """ 28 | Adds a batch of training examples to the model builder. 29 | Args: 30 | o_lst (list>): Observation sequences 31 | s_lst (list>): Hidden state sequences 32 | """ 33 | self._obs_sequences += o_lst 34 | self._state_sequences += s_lst 35 | 36 | def set_single_states(self, single_states): 37 | """ 38 | Sets the singular hidden states vocabulary for the HMM. If called 39 | multiple times, the vocabulary is overwritten. 40 | Args: 41 | single_states (list): list of possible singular hidden 42 | states. These states should disregard HMM order. 43 | """ 44 | self._single_states = list(single_states) 45 | 46 | def set_all_obs(self, all_obs): 47 | """ 48 | Sets the observation vocabulary for the HMM. If called multiple 49 | times, the vocabulary is overwritten. 50 | Args: 51 | all_obs (list): list of possible model observations. 52 | """ 53 | self._all_obs = list(all_obs) 54 | 55 | def build(self, highest_order=1, k_smoothing=0.0, synthesize_states=False, include_pi=True): 56 | """ 57 | Builds a Hidden Markov Model based on the previously added 58 | training examples. 59 | Args: 60 | highest_order (int): History window of hidden states. Defaults to 1. 61 | k_smoothing (float): Parameter for add-k smoothing, a 62 | generalization of Laplace smoothing. Defaults to 0.0. 63 | synthesize_states (boolean): Generate all states from permutations 64 | of single states. Avoids OOV for higher order models and 65 | and ensures model is fully ergodic. 66 | include_pi (boolean): True if the starting probabilities should be 67 | calculated from explicit training counts. False if the starting 68 | probabilities should all be set to 1 and thus ignored. 69 | Returns: 70 | HiddenMarkovModel: capable of evaluating, decoding, and learning. 71 | """ 72 | if(highest_order < 1): 73 | raise ValueError("highest order must be 1 or greater.") 74 | 75 | # build state and observation sets 76 | if(self._all_obs is None): 77 | all_obs = self._get_unique_elements(self._obs_sequences) 78 | else: 79 | all_obs = self._all_obs 80 | 81 | if(self._single_states is None): 82 | single_states = self._get_higher_order_states(self._state_sequences, 1) 83 | if(synthesize_states): 84 | all_states = self._make_permutations(single_states, highest_order) 85 | else: 86 | all_states = self._get_higher_order_states(self._state_sequences, highest_order) 87 | else: 88 | synthesize_states = True 89 | single_states = self._single_states 90 | all_states = self._make_permutations(single_states, highest_order) 91 | 92 | # build probability distribution parameters 93 | start_probs = list() 94 | for i in range(highest_order): 95 | start_probs.append(self._calculate_start_probs( 96 | state_sequences = self._state_sequences, 97 | single_states = single_states, 98 | order = i+1, 99 | k_smoothing = k_smoothing, 100 | synthesize_states = synthesize_states, 101 | set_to_1 = not include_pi 102 | )) 103 | 104 | trans_probs = self._calculate_transition_probs(all_states, highest_order, k_smoothing) 105 | emission_probs = self._calculate_emission_probs(single_states, all_obs, k_smoothing) 106 | 107 | # combine all parameters to build final model 108 | return HMM( 109 | trans_probs, 110 | emission_probs, 111 | start_probs, 112 | all_obs, 113 | all_states, 114 | single_states=single_states, 115 | order=highest_order 116 | ) 117 | 118 | def build_unsupervised(self, single_states=None, all_obs=None, distribution="random", highest_order=1): 119 | """ 120 | Builds a Hidden Markov Model based on a uniform probability 121 | distribution. 122 | Args: 123 | single_states (list<>): list of unique elements detailing all 124 | possible hidden states the model should account for. If default, 125 | uses the values set previously through 'set_single_states'. 126 | all_obs (list<>): list of unique elements detailing all possible 127 | observation elements the model should account for. If default, 128 | uses the values set previously through 'set_all_obs'. 129 | distribution (string): either 'random' for a random probability 130 | distribution, or 'uniform' for a uniform probability 131 | distribution. defaults to 'random'. 132 | highest_order (int): History window of hidden states. Defaults to 1. 133 | Returns: 134 | HiddenMarkovModel: capable of evaluating, decoding, and learning. 135 | """ 136 | if(distribution not in ('random', 'uniform')): 137 | raise ValueError("parameter 'distribution must be either 'random' or 'uniform'") 138 | if(single_states is None): 139 | single_states = self._single_states 140 | if(all_obs is None): 141 | all_obs = self._all_obs 142 | 143 | single_states = list(set(single_states)) 144 | all_obs = list(set(all_obs)) 145 | all_states = self._make_permutations(single_states, highest_order) 146 | num_states = len(all_states) 147 | if(distribution == 'uniform'): 148 | trans_probs = init_matrix_uniform(num_states, num_states) 149 | emission_probs = init_matrix_uniform(num_states, len(all_obs)) 150 | start_probs = self._init_uniform_start_probs( 151 | single_states, 152 | highest_order 153 | ) 154 | else: # 'random' 155 | trans_probs = init_matrix_random(num_states, num_states) 156 | emission_probs = init_matrix_random(num_states, len(all_obs)) 157 | start_probs = self._init_random_start_probs( 158 | single_states, 159 | highest_order 160 | ) 161 | 162 | # combine all parameters to build final model 163 | return HMM( 164 | trans_probs, 165 | emission_probs, 166 | start_probs, 167 | all_obs, 168 | all_states, 169 | single_states=single_states, 170 | order=highest_order 171 | ) 172 | 173 | def clear_all_sets(self): 174 | """ 175 | Deletes all training examples previously in the builder. 176 | Deletes observation and hidden state vocabularies. 177 | """ 178 | self._obs_sequences = list() 179 | self._state_sequences = list() 180 | self._single_states = None 181 | self._all_obs = None 182 | 183 | # ----------------- # 184 | # Private # 185 | # ----------------- # 186 | 187 | def _get_unique_elements(self, set_of_lists): 188 | unique_set = set() 189 | for obs_lst in set_of_lists: 190 | unique_set.update(set(obs_lst)) 191 | return list(unique_set) 192 | 193 | def _calculate_transition_probs(self, all_states, order, k_smoothing): 194 | matrix_size = len(all_states) 195 | state_trans_dict = dict() 196 | 197 | # initialize matrix and normalization dict 198 | trans_probs = init_matrix(matrix_size, matrix_size, "int") 199 | for state in all_states: 200 | state_trans_dict[state] = 0 201 | 202 | # insert counts of transitions 203 | state_sequences = self._make_higher_order_states( 204 | self._state_sequences, 205 | order 206 | ) 207 | 208 | for states in state_sequences: 209 | for i in range(1, len(states)): 210 | prev_index = all_states.index(states[i - 1]) 211 | cur_index = all_states.index(states[i]) 212 | trans_probs[prev_index][cur_index] += 1 213 | state_trans_dict[all_states[prev_index]] += 1 214 | 215 | # normalize such that for all rows sum(trans_probs[state][s0...sn]) == 1 216 | for prev_index in range(matrix_size): 217 | divisor = state_trans_dict[all_states[prev_index]] 218 | if divisor == 0 and k_smoothing == 0: 219 | continue # avoid ZeroDivisionError 220 | 221 | for cur_index in range(matrix_size): 222 | trans_probs[prev_index][cur_index] += k_smoothing 223 | trans_probs[prev_index][cur_index] /= float( 224 | divisor + (matrix_size * k_smoothing) 225 | ) 226 | 227 | return trans_probs 228 | 229 | def _calculate_emission_probs(self, all_states, all_obs, k_smoothing): 230 | rows = len(all_states) 231 | columns = len(all_obs) 232 | state_emission_dict = dict() 233 | 234 | # initializate matrix and normalization dict 235 | emission_probs = init_matrix(rows, columns, "int") 236 | for state in all_states: 237 | state_emission_dict[state] = 0 + k_smoothing 238 | 239 | # insert counts of emissions 240 | for i in range(len(self._obs_sequences)): 241 | obs_lst = self._obs_sequences[i] 242 | states_lst = self._state_sequences[i] 243 | for j in range(len(obs_lst)): 244 | obs = obs_lst[j] 245 | obs_index = all_obs.index(obs) 246 | 247 | state = states_lst[j] 248 | state_index = all_states.index(state) 249 | 250 | emission_probs[state_index][obs_index] += 1 251 | state_emission_dict[state] += 1 252 | 253 | # normalize such that for all rows sum(emission_probs[state][o0...on]) == 1 254 | for row in range(rows): 255 | divisor = float(state_emission_dict[all_states[row]]) 256 | for column in range(columns): 257 | emission_probs[row][column] += k_smoothing 258 | emission_probs[row][column] /= float( 259 | divisor + (rows * k_smoothing) 260 | ) 261 | 262 | return emission_probs 263 | 264 | def _get_higher_order_states(self, state_sequences, order): 265 | if(order == 1): 266 | return self._get_unique_elements(state_sequences) 267 | 268 | all_states_set = set() 269 | 270 | for sequence in state_sequences: 271 | if(len(sequence) <= order): 272 | continue 273 | 274 | for i in range(order - 1, len(sequence)): 275 | state = "" 276 | for j in range(i-order+1, i+1): 277 | state += (sequence[j] + '-') 278 | 279 | all_states_set.add(state[:len(state)-1]) 280 | 281 | return list(all_states_set) 282 | 283 | def _calculate_start_probs(self, state_sequences, single_states, order, k_smoothing, synthesize_states, set_to_1): 284 | """ 285 | Calculates the starting probability distribution for a given order. 286 | Args: 287 | state_sequences (list>): Hidden state sequences 288 | single_states (list): list of possible singular hidden 289 | states. These states should disregard HMM order. 290 | order (int): History window of hidden states. 291 | k_smoothing (float): Parameter for add-k smoothing, a 292 | generalization of Laplace smoothing. 293 | synthesize_states (boolean): if True, creates states 294 | set_to_1 (boolean): set all starting probabilities to 1 if true. 295 | Otherwise, calculate and normalize from training counts. 296 | Returns: 297 | dict[state:probability] 298 | """ 299 | start_probs_dict = dict() 300 | 301 | # initialize dictionary to state:initial count 302 | if synthesize_states: 303 | states = self._make_permutations(single_states, order) 304 | else: 305 | states = self._get_higher_order_states(state_sequences, order) 306 | 307 | for state in states: 308 | start_probs_dict[state] = 1 if set_to_1 else k_smoothing 309 | 310 | if set_to_1: 311 | return start_probs_dict 312 | 313 | # insert counts 314 | start_state_emissions = 0 315 | for state_seq in state_sequences: 316 | if(len(state_seq) < order): 317 | continue 318 | 319 | state = "" 320 | for i in range(order): 321 | state += (state_seq[i] + '-') 322 | start_probs_dict[state[:len(state)-1]] += 1 323 | start_state_emissions += 1 324 | 325 | # normalize dictionary such that sum(start_probs_dict[s0...sn]) = 1 326 | for state in start_probs_dict.keys(): 327 | start_probs_dict[state] /= float( 328 | start_state_emissions 329 | + (len(states) * k_smoothing) 330 | ) 331 | 332 | return start_probs_dict 333 | 334 | def _init_uniform_start_probs(self, states, highest_order): 335 | start_probs = [] 336 | for i in range(highest_order): 337 | start_probs_dict = dict() 338 | states_of_order = self._make_permutations(states, i + 1) 339 | value = float(1.0 / len(states_of_order)) 340 | for i, state in enumerate(states_of_order): 341 | start_probs_dict[state] = value 342 | 343 | start_probs.append(start_probs_dict) 344 | 345 | return start_probs 346 | 347 | def _init_random_start_probs(self, states, highest_order): 348 | start_probs = [] 349 | for i in range(highest_order): 350 | start_probs_dict = dict() 351 | states_of_order = self._make_permutations(states, i + 1) 352 | values = [ran.random() for i in range(len(states_of_order))] 353 | for i, state in enumerate(states_of_order): 354 | start_probs_dict[state] = values[i] / sum(values) 355 | 356 | start_probs.append(start_probs_dict) 357 | 358 | return start_probs 359 | 360 | def _make_higher_order_states(self, state_sequences, order): 361 | """ 362 | Args: 363 | state_sequences (list>): states to convert to a 364 | given order. 365 | order (int): n-gram value of history. 366 | Returns: 367 | list> state_sequences mapped to n-grams. 368 | Example: 369 | state_sequences = [['a', 'b', 'c', 'd', 'e', 'f']] 370 | order = 1: [['a', 'b', 'c', 'd', 'e', 'f']] 371 | order = 2: [['a-b', 'b-c', 'c-d', 'd-e', 'e-f']] 372 | order = 3: [['a-b-c', 'b-c-d', 'c-d-e', 'd-e-f']] 373 | """ 374 | if(order == 1): 375 | return state_sequences 376 | 377 | new_sequences = [] 378 | for sequence in state_sequences: 379 | new_sequence = [] 380 | for i in range(order-1, len(sequence)): 381 | state = "" 382 | for j in range(i-order+1, i+1): 383 | state += (sequence[j] + '-') 384 | 385 | new_sequence.append(state[:len(state)-1]) 386 | 387 | new_sequences.append(new_sequence) 388 | 389 | return new_sequences 390 | 391 | def _make_permutations(self, states, highest_order): 392 | """ makes a list of all permutation states from a single state. """ 393 | if(highest_order == 1): 394 | return states 395 | 396 | states_lists = product(states, repeat = highest_order) 397 | new_states = [] 398 | for states_lst in states_lists: 399 | state = "" 400 | for i in range(len(states_lst)): 401 | state += (states_lst[i] + '-') 402 | 403 | new_states.append(state[:len(state)-1]) 404 | 405 | return new_states 406 | -------------------------------------------------------------------------------- /SimpleHOHMM/model.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | from itertools import chain 4 | from math import log 5 | 6 | from .utility import init_matrix, init_3d_matrix 7 | 8 | class HiddenMarkovModel: 9 | """ 10 | Notation used: 11 | HMM: Hidden Markov Model 12 | O: Observation sequence 13 | S: Hidden state sequence 14 | A: State transition probability distribution matrix 15 | B: Observation emission probability distribution matrix 16 | pi: Initial state probability distribution vector 17 | lambda: A HMM comprised of (A,B,pi) 18 | """ 19 | def __init__(self, A, B, pi, all_obs, all_states, single_states=None, order=1): 20 | if(single_states == None): 21 | self._single_states = all_states 22 | else: 23 | self._single_states = single_states 24 | self._all_states = all_states 25 | self._all_obs = all_obs 26 | self._A = A 27 | self._B = B 28 | self._pi = pi 29 | self._highest_order = order 30 | 31 | def evaluate(self, sequence): 32 | """ 33 | Evaluation Problem: Calculate P(O|lambda). 34 | Calculates the probability of emitting the given observation 35 | sequence based on the HMM. Uses the forward algorithm. 36 | Args: 37 | sequence (list): observation sequence O 38 | Returns: 39 | float: probability of sequence being emitted 40 | """ 41 | self._check_legal_sequence(sequence) 42 | if(len(sequence) == 0): 43 | return 0 44 | 45 | alpha = self._forward(sequence) 46 | fwd_probability = sum(map( 47 | lambda s: alpha[s][len(sequence) - 1], 48 | range(len(self._all_states))) 49 | ) 50 | return fwd_probability 51 | 52 | def decode(self, sequence): 53 | """ 54 | Decoding Problem: Given O and lambda, find S such that S 'best' 55 | describes O using lambda. Uses the Viterbi Algorithm. 56 | Args: 57 | sequence (list): observation sequence O 58 | Returns: 59 | list: hidden state sequence S 60 | """ 61 | self._check_legal_sequence(sequence) 62 | if(len(sequence) == 0): 63 | return [] 64 | return self._viterbi(sequence) 65 | 66 | def learn(self, sequences, delta=0.0001, k_smoothing=0.0, iterations=-1): 67 | """ 68 | Learning Problem: Reestimate the model parameters (A,B,pi) iteratively 69 | using the Baum-Welch Algorithm (EM). Maximize P(O|lambda). 70 | It should be known that pi is currently not fully updated for HMMs 71 | of order greater than one. 72 | Args: 73 | sequences (list): list of observations O = (O1,O2,...On) used 74 | to train the initial (A,B,pi) parameters. 75 | delta (float): log value of iterative improvement such that when 76 | evaluation probabilities improve by less than delta the 77 | learning process is complete. 78 | k_smoothing (float): Smoothing parameter for add-k smoothing to 79 | avoid zero probability. Value should be between [0.0, 1.0]. 80 | iterations (int): number of iterations to perform. Will return 81 | if convergence is found before all iterations 82 | have been performed. 83 | Returns: 84 | (int): number of iterations to achieve convergence. 85 | """ 86 | self._check_legal_sequence(set(chain.from_iterable(sequences))) 87 | num_sequences = len(sequences) 88 | 89 | cur_iterations = 0 90 | if(num_sequences == 0): 91 | return cur_iterations 92 | 93 | prior_score = sum(map( 94 | lambda O: log(self.evaluate(O)), 95 | sequences 96 | )) / num_sequences 97 | 98 | while True: 99 | for seq in sequences: 100 | self._train(seq, k_smoothing) 101 | 102 | cur_iterations += 1 103 | new_score = sum(map( 104 | lambda O: log(self.evaluate(O)), 105 | sequences 106 | )) / num_sequences 107 | 108 | if(abs(prior_score - new_score) < delta): 109 | break 110 | if(iterations > -1 and cur_iterations >= iterations): 111 | break 112 | prior_score = new_score 113 | 114 | return cur_iterations 115 | 116 | def get_parameters(self): 117 | """ Dictionary of all model parameters. """ 118 | return { 119 | "A": self._A, 120 | "B": self._B, 121 | "pi": self._pi, 122 | "all_obs": self._all_obs, 123 | "all_states": self._all_states, 124 | "single_states": self._single_states 125 | } 126 | 127 | def display_parameters(self): 128 | """ Display the lambda parameters (A,B,pi) on the console. """ 129 | names = [ 130 | "Starting probabilities (pi):", 131 | "Transition probabilities (A):", 132 | "Emission probabilities (B):" 133 | ] 134 | for i, parameter in enumerate([self._pi, self._A, self._B]): 135 | print(names[i]) 136 | for element in parameter: 137 | print(element) 138 | 139 | # ----------------- # 140 | # Private # 141 | # ----------------- # 142 | 143 | def _check_legal_sequence(self, seq): 144 | """ Throws ValueError if an element of seq is not in self._all_obs """ 145 | illegal_obs = list([x for x in seq if x not in self._all_obs]) 146 | if(len(illegal_obs) == 0): 147 | return True 148 | 149 | if(len(illegal_obs) == 1): 150 | msg = "Observation out of vocabulary: '" 151 | else: 152 | msg = "Observations out of vocabulary: '" 153 | raise ValueError(msg + ", ".join(illegal_obs) + "'") 154 | 155 | def _forward(self, sequence): 156 | rows = len(self._all_states) 157 | columns = len(sequence) 158 | alpha = init_matrix(rows, columns, "float") 159 | 160 | # initialization step 161 | for s_index, state in enumerate(self._single_states): 162 | o_index = self._all_obs.index(sequence[0]) 163 | alpha[s_index][0] = ( 164 | self._pi[0][state] 165 | * self._B[s_index][o_index] 166 | ) 167 | 168 | # iterative step 169 | for t_index in range(columns - 1): 170 | obs = sequence[t_index + 1] 171 | for s_index, state in enumerate(self._all_states): 172 | single_state_index = self._single_states.index( 173 | self._get_state_by_order(state, 1) 174 | ) 175 | for s_prime in range(len(self._all_states)): 176 | if(t_index + 1 < self._highest_order): 177 | state_by_order = self._get_state_by_order( 178 | self._all_states[s_index], 179 | t_index + 2 180 | ) 181 | a_prob = self._pi[t_index + 1][state_by_order] 182 | else: 183 | a_prob = self._A[s_prime][s_index] 184 | 185 | alpha[s_index][t_index + 1] += ( 186 | alpha[s_prime][t_index] 187 | * a_prob 188 | * self._B[single_state_index][self._all_obs.index(obs)] 189 | ) 190 | 191 | return alpha 192 | 193 | def _backward(self, sequence): 194 | rows = len(self._all_states) 195 | columns = len(sequence) 196 | beta = init_matrix(rows, columns, "float") 197 | 198 | # initialization step 199 | for s_index, state in enumerate(self._all_states): 200 | beta[s_index][-1] = 1 201 | 202 | # iterative step 203 | for t_index in reversed(range(columns-1)): 204 | obs = sequence[t_index + 1] 205 | for s_index in range(len(self._all_states)): 206 | for s_prime, state in enumerate(self._all_states): 207 | single_state_index = self._single_states.index( 208 | self._get_state_by_order(state, 1) 209 | ) 210 | beta[s_index][t_index] += ( 211 | beta[s_prime][t_index + 1] 212 | * self._A[s_index][s_prime] 213 | * self._B[single_state_index][self._all_obs.index(obs)] 214 | ) 215 | 216 | return beta 217 | 218 | def _viterbi(self, sequence): 219 | """ 220 | Notation used: 221 | delta: matrix holding the highest probability state path 222 | at observation time t. 223 | psi: backpointer matrix maintaining which state maximized delta. 224 | Args: 225 | sequence (list): observation sequence O 226 | Returns: 227 | list: hidden state sequence S 228 | """ 229 | delta, psi = self._viterbi_forward(sequence) 230 | return self._viterbi_backward(delta, psi, sequence) 231 | 232 | def _viterbi_forward(self, sequence): 233 | """ build probability quantities delta and backpointers psi """ 234 | rows = len(self._all_states) 235 | columns = len(sequence) 236 | 237 | delta = init_matrix(rows, columns, "int") 238 | psi = init_matrix(rows, columns, 'int,int') 239 | 240 | # initialization step 241 | obs_index = self._all_obs.index(sequence[0]) 242 | for s_index, state in enumerate(self._all_states): 243 | single_state = self._get_state_by_order(state, 1) 244 | single_state_index = self._single_states.index(single_state) 245 | delta[s_index][0] = ( 246 | self._pi[0][single_state] 247 | * self._B[single_state_index][obs_index] 248 | ) 249 | 250 | # iterative step 251 | for o_index in range(1, columns): 252 | o_master_index = self._all_obs.index(sequence[o_index]) 253 | for s_index, state in enumerate(self._all_states): 254 | max_prob = 0 255 | row_back = 0 256 | col_back = 0 257 | 258 | single_state_index = self._single_states.index(self._get_state_by_order(state, 1)) 259 | emission_multiplier = self._B[single_state_index][o_master_index] 260 | 261 | # a multiplier of 0.0 nullfies the following computation 262 | if emission_multiplier == 0.0: 263 | continue 264 | 265 | for prev_s_index in range(rows): 266 | transition_multiplier = 0 267 | if(o_index < self._highest_order): 268 | state_by_order = self._get_state_by_order( 269 | self._all_states[s_index], 270 | o_index + 1 271 | ) 272 | transition_multiplier = self._pi[o_index][state_by_order] 273 | else: 274 | transition_multiplier = self._A[prev_s_index][s_index] 275 | 276 | cur_prob = ( 277 | delta[prev_s_index][o_index - 1] 278 | * transition_multiplier 279 | * emission_multiplier 280 | ) 281 | if cur_prob > max_prob: 282 | max_prob = cur_prob 283 | row_back = prev_s_index 284 | col_back = o_index - 1 285 | 286 | delta[s_index][o_index] = max_prob 287 | psi[s_index][o_index] = (row_back, col_back) 288 | 289 | return delta, psi 290 | 291 | def _viterbi_backward(self, delta, psi, sequence): 292 | """ Decode by following the backpointers of psi """ 293 | rev_output = [] 294 | j_max = len(sequence) 295 | max_final = 0 296 | i_final = 0 297 | 298 | # find highest probability start state 299 | for i in range(len(self._all_states)): 300 | current_final = delta[i][j_max - 1] 301 | if current_final > max_final: 302 | max_final = current_final 303 | i_final = i 304 | 305 | rev_output.append(self._get_state_by_order(self._all_states[i_final], 1)) 306 | i_cur = psi[i_final][j_max - 1][0] 307 | j_cur = psi[i_final][j_max - 1][1] 308 | 309 | for j in range(j_max - 2, -1, -1): 310 | rev_output.append(self._get_state_by_order(self._all_states[i_cur], 1)) 311 | i_cur_old = i_cur 312 | i_cur = psi[i_cur][j_cur][0] 313 | j_cur = psi[i_cur_old][j_cur][1] 314 | 315 | return rev_output[::-1] 316 | 317 | def _train(self, sequence, k_smoothing=0.0): 318 | """ 319 | Use the Baum-Welch Algorithm which utilizes Expectation-Maximization 320 | and the Forward-Backward algorithm to find the maximum likelihood 321 | estimate for parameters (A,B,pi). 322 | Notation used: 323 | gamma: Probability of being in state i at time t 324 | given O and (A,B,pi). 325 | Row: state. Column: observation 326 | xi: Joint probability of being in state i at time t and 327 | state (i + 1) at time (t + 1) given O and (A,B,pi). 328 | xi[state i][state j][time t] 329 | Args: 330 | sequence (list): Observation sequence O 331 | k_smoothing (float): Smoothing parameter for add-k smoothing to 332 | avoid zero probability. Value should be between [0.0, 1.0]. 333 | """ 334 | rows = len(self._all_states) 335 | columns = len(sequence) 336 | 337 | alpha = self._forward(sequence) 338 | beta = self._backward(sequence) 339 | 340 | # build gamma 341 | gamma = init_matrix(rows, columns, "float") 342 | for s_index in range(rows): 343 | for o_index in range(columns): 344 | prob = alpha[s_index][o_index] * beta[s_index][o_index] 345 | prob /= sum(map( 346 | lambda j: alpha[j][o_index] * beta[j][o_index], 347 | range(rows) 348 | )) 349 | gamma[s_index][o_index] = prob 350 | 351 | # buid xi 352 | xi = init_3d_matrix(rows, rows, columns - 1) 353 | for o_index in range(columns - 1): 354 | obs = sequence[o_index] 355 | obs_next = sequence[o_index + 1] 356 | 357 | denominator = 0.0 358 | for s_from in range(rows): 359 | for s_to, state_to in enumerate(self._all_states): 360 | single_state_index = self._single_states.index( 361 | self._get_state_by_order(state_to, 1) 362 | ) 363 | prob = ( 364 | alpha[s_from][o_index] 365 | * beta[s_to][o_index + 1] 366 | * self._A[s_from][s_to] 367 | * self._B[single_state_index][self._all_obs.index(obs_next)] 368 | ) 369 | xi[s_from][s_to][o_index] = prob 370 | denominator += prob 371 | 372 | if denominator == 0: 373 | continue 374 | 375 | for s_from in range(rows): 376 | for s_to in range(rows): 377 | xi[s_from][s_to][o_index] /= denominator 378 | 379 | # update all parameters (A,B,pi). 380 | for s_index, state in enumerate(self._all_states): 381 | # update pi 382 | self._pi[self._highest_order - 1][state] = ( 383 | (gamma[s_index][0] + k_smoothing) 384 | / (1 + rows * k_smoothing) 385 | ) 386 | 387 | # update A 388 | gamma_sum = sum(map( 389 | lambda o_index: gamma[s_index][o_index], 390 | range(columns - 1) 391 | )) 392 | if(gamma_sum == 0): 393 | for s_prime in range(rows): 394 | self._A[s_index][s_prime] = 0 395 | else: 396 | for s_prime in range(rows): 397 | xi_sum = sum(map( 398 | lambda o_index: xi[s_index][s_prime][o_index], 399 | range(columns - 1) 400 | )) 401 | self._A[s_index][s_prime] = ( 402 | (xi_sum + k_smoothing) 403 | / (gamma_sum + (rows * k_smoothing)) 404 | ) 405 | 406 | # update B 407 | gamma_sum += gamma[s_index][columns - 1] 408 | single_state_index = self._single_states.index( 409 | self._get_state_by_order(state, 1) 410 | ) 411 | if(gamma_sum == 0): 412 | for o_index in range(columns): 413 | self._B[single_state_index][o_index] = 0 414 | else: 415 | gamma_b_sum = list(map( 416 | lambda x: 0, 417 | range(len(self._all_obs)) 418 | )) 419 | 420 | for o_index in range(columns): 421 | full_obs_index = self._all_obs.index(sequence[o_index]) 422 | gamma_b_sum[full_obs_index] += gamma[s_index][o_index] 423 | 424 | for o_index in range(len(self._all_obs)): 425 | self._B[single_state_index][o_index] = ( 426 | (gamma_b_sum[o_index] + k_smoothing) 427 | / (gamma_sum + (columns * k_smoothing)) 428 | ) 429 | 430 | def _get_state_by_order(self, state, order): 431 | """ 432 | Gets single state for any order HMM. 433 | Examples (let order == 1): 434 | 'a1-b0-a0' => 'a0' 435 | 'a1-b0' => 'b0' 436 | 'a1' => 'a1' 437 | Args: 438 | state: '-' delimited composite state 439 | order: desired order state to return 440 | Returns: 441 | string: modified state 442 | """ 443 | if(self._highest_order == 1): 444 | return state 445 | split_state = state.split('-') 446 | l = len(split_state) 447 | if(order > l): 448 | raise ValueError("Specified order is higher than given state.") 449 | 450 | return '-'.join(split_state[l - order:l]) 451 | -------------------------------------------------------------------------------- /SimpleHOHMM/package_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "author":"Jacob Krantz", 3 | "author_email":"jkrantz@zagmail.gonzaga.edu", 4 | "version": "0.0.3" 5 | } 6 | -------------------------------------------------------------------------------- /SimpleHOHMM/utility.py: -------------------------------------------------------------------------------- 1 | 2 | from copy import deepcopy 3 | import random as ran 4 | 5 | def init_matrix(rows, columns, data_type="float"): 6 | """ 7 | Initialize a matrix using lists with provided size: (rows,columns) 8 | Args: 9 | rows (int) 10 | columns (int) 11 | data_type (string) must be one of: 12 | 'float': 0.0 | 'int': 0 | 'int,int': tuple(0,0) 13 | Return: 14 | the zero matrix of specified size and type 15 | """ 16 | if(data_type == 'int'): 17 | item = 0 18 | elif(data_type == 'float'): 19 | item = 0.0 20 | elif(data_type == 'int,int'): 21 | item = (0,0) 22 | 23 | matrix = [] 24 | row = [] 25 | for i in range(0, columns): 26 | row.append(item) 27 | for j in range(0, rows): 28 | matrix.append(deepcopy(row)) 29 | return matrix 30 | 31 | def init_3d_matrix(x, y, z): 32 | """ 33 | Initialize a 3-dim matrix using lists with provided size: (X,Y,Z). 34 | Args: 35 | x (int) 36 | y (int) 37 | z (int) 38 | Return: 39 | the zero matrix of specified size with zeroed float values. 40 | """ 41 | d1 = [] 42 | for i in range(z): 43 | d1.append(0.0) 44 | 45 | d2 = [] 46 | for j in range(y): 47 | d2.append(deepcopy(d1)) 48 | 49 | matrix = [] 50 | for k in range(x): 51 | matrix.append(deepcopy(d2)) 52 | 53 | return matrix 54 | 55 | def init_matrix_uniform(row_len, column_len): 56 | """ 57 | Initialize a matrix such that all rows sum to 1 and 58 | all elements in a row are the same. 59 | Args: 60 | row_len (int): Number of rows the matrix will have. 61 | column_len (int): Number of columns matrix will have. 62 | Returns: 63 | list>: uniformly distributed matrix. 64 | """ 65 | value = float(1.0 / column_len) 66 | row = list(map(lambda x : value, range(column_len))) 67 | return list(map(lambda x : deepcopy(row), range(row_len))) 68 | 69 | def init_matrix_random(row_len, column_len): 70 | """ 71 | Initialize a matrix such that all rows sum to 1 and elements are 72 | generated pseudo-randomly. 73 | Args: 74 | row_len (int): Number of rows the matrix will have. 75 | column_len (int): Number of columns matrix will have. 76 | Returns: 77 | list>: randomly distributed matrix. 78 | """ 79 | return list(map(lambda x : _make_random_row(column_len), range(row_len))) 80 | 81 | def _make_random_row(num_elements): 82 | """ Generates a list of row_len random floats that sum to 1. """ 83 | row = [ran.random() for i in range(num_elements)] 84 | s = sum(row) 85 | return [ i / s for i in row ] 86 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = Simple-HOHMM 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | set SPHINXPROJ=Simple-HOHMM 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 20 | echo.installed, then set the SPHINXBUILD environment variable to point 21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 22 | echo.may add the Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /docs/source/api_reference.rst: -------------------------------------------------------------------------------- 1 | API Reference 2 | ============= 3 | 4 | TODO: detailed reference guide to using the API 5 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Simple-HOHMM documentation build configuration file, created by 4 | # sphinx-quickstart on Fri Dec 29 20:17:14 2017. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | # If extensions (or modules to document with autodoc) are in another directory, 16 | # add these directories to sys.path here. If the directory is relative to the 17 | # documentation root, use os.path.abspath to make it absolute, like shown here. 18 | # 19 | # import os 20 | # import sys 21 | # sys.path.insert(0, os.path.abspath('.')) 22 | 23 | 24 | # -- General configuration ------------------------------------------------ 25 | 26 | # If your documentation needs a minimal Sphinx version, state it here. 27 | # 28 | # needs_sphinx = '1.0' 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = ['sphinx.ext.autodoc'] 34 | 35 | # Add any paths that contain templates here, relative to this directory. 36 | templates_path = ['_templates'] 37 | 38 | # The suffix(es) of source filenames. 39 | # You can specify multiple suffix as a list of string: 40 | # 41 | # source_suffix = ['.rst', '.md'] 42 | source_suffix = '.rst' 43 | 44 | # The master toctree document. 45 | master_doc = 'index' 46 | 47 | # General information about the project. 48 | project = u'Simple-HOHMM' 49 | copyright = u'2017, Jacob Krantz' 50 | author = u'Jacob Krantz' 51 | 52 | # The version info for the project you're documenting, acts as replacement for 53 | # |version| and |release|, also used in various other places throughout the 54 | # built documents. 55 | # 56 | # The short X.Y version. 57 | version = u'0.0.3' 58 | # The full version, including alpha/beta/rc tags. 59 | release = u'0.0.3' 60 | 61 | # The language for content autogenerated by Sphinx. Refer to documentation 62 | # for a list of supported languages. 63 | # 64 | # This is also used if you do content translation via gettext catalogs. 65 | # Usually you set "language" from the command line for these cases. 66 | language = None 67 | 68 | # List of patterns, relative to source directory, that match files and 69 | # directories to ignore when looking for source files. 70 | # This patterns also effect to html_static_path and html_extra_path 71 | exclude_patterns = [] 72 | 73 | # The name of the Pygments (syntax highlighting) style to use. 74 | pygments_style = 'sphinx' 75 | 76 | # If true, `todo` and `todoList` produce output, else they produce nothing. 77 | todo_include_todos = False 78 | 79 | 80 | # -- Options for HTML output ---------------------------------------------- 81 | 82 | # The theme to use for HTML and HTML Help pages. See the documentation for 83 | # a list of builtin themes. 84 | # 85 | html_theme = 'sphinx_rtd_theme' 86 | 87 | # Theme options are theme-specific and customize the look and feel of a theme 88 | # further. For a list of options available for each theme, see the 89 | # documentation. 90 | # 91 | # html_theme_options = {} 92 | 93 | # Add any paths that contain custom static files (such as style sheets) here, 94 | # relative to this directory. They are copied after the builtin static files, 95 | # so a file named "default.css" will overwrite the builtin "default.css". 96 | html_static_path = ['_static'] 97 | 98 | # Custom sidebar templates, must be a dictionary that maps document names 99 | # to template names. 100 | # 101 | # This is required for the alabaster theme 102 | # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars 103 | html_sidebars = { 104 | '**': [ 105 | 'relations.html', # needs 'show_related': True theme option to display 106 | 'searchbox.html', 107 | ] 108 | } 109 | 110 | 111 | # -- Options for HTMLHelp output ------------------------------------------ 112 | 113 | # Output file base name for HTML help builder. 114 | htmlhelp_basename = 'Simple-HOHMMdoc' 115 | 116 | 117 | # -- Options for LaTeX output --------------------------------------------- 118 | 119 | latex_elements = { 120 | # The paper size ('letterpaper' or 'a4paper'). 121 | # 122 | # 'papersize': 'letterpaper', 123 | 124 | # The font size ('10pt', '11pt' or '12pt'). 125 | # 126 | # 'pointsize': '10pt', 127 | 128 | # Additional stuff for the LaTeX preamble. 129 | # 130 | # 'preamble': '', 131 | 132 | # Latex figure (float) alignment 133 | # 134 | # 'figure_align': 'htbp', 135 | } 136 | 137 | # Grouping the document tree into LaTeX files. List of tuples 138 | # (source start file, target name, title, 139 | # author, documentclass [howto, manual, or own class]). 140 | latex_documents = [ 141 | (master_doc, 'Simple-HOHMM.tex', u'Simple-HOHMM Documentation', 142 | u'Jacob Krantz', 'manual'), 143 | ] 144 | 145 | 146 | # -- Options for manual page output --------------------------------------- 147 | 148 | # One entry per manual page. List of tuples 149 | # (source start file, name, description, authors, manual section). 150 | man_pages = [ 151 | (master_doc, 'simple-hohmm', u'Simple-HOHMM Documentation', 152 | [author], 1) 153 | ] 154 | 155 | 156 | # -- Options for Texinfo output ------------------------------------------- 157 | 158 | # Grouping the document tree into Texinfo files. List of tuples 159 | # (source start file, target name, title, author, 160 | # dir menu entry, description, category) 161 | texinfo_documents = [ 162 | (master_doc, 'Simple-HOHMM', u'Simple-HOHMM Documentation', 163 | author, 'Simple-HOHMM', 'One line description of project.', 164 | 'Miscellaneous'), 165 | ] 166 | -------------------------------------------------------------------------------- /docs/source/getting_started.rst: -------------------------------------------------------------------------------- 1 | Getting Started 2 | =============== 3 | 4 | Installation for Python 2 or 3 5 | ------------------------------ 6 | 7 | ``Simple-HOHMM`` can be installed directly from Github using ``pip``. You must have ``git`` installed for this process to work. 8 | :: 9 | 10 | >>> pip install git+https://github.com/jacobkrantz/Simple-HOHMM.git 11 | 12 | If you want the most recent staging build: 13 | :: 14 | 15 | >>> pip install git+https://github.com/jacobkrantz/Simple-HOHMM.git@staging 16 | 17 | Alternative: to view the source code and run the tests before installation: 18 | :: 19 | 20 | >>> git clone https://github.com/jacobkrantz/Simple-HOHMM.git 21 | >>> cd Simple-HOHMM 22 | >>> python setup.py test 23 | >>> python setup.py install 24 | 25 | Installation for Pypy 26 | --------------------- 27 | 28 | For usage with ``pypy``, you must install with ``pip`` inside ``pypy``: 29 | :: 30 | 31 | >>> pypy -m pip install git+https://github.com/jacobkrantz/Simple-HOHMM.git 32 | 33 | If this fails, try installing ``pip`` for ``pypy`` first: 34 | :: 35 | 36 | >>> curl -O https://bootstrap.pypa.io/get-pip.py 37 | >>> pypy get-pip.py 38 | 39 | If you want the most recent staging build still with ``pypy``: 40 | :: 41 | 42 | >>> pypy -m pip install git+https://github.com/jacobkrantz/Simple-HOHMM.git@staging 43 | 44 | Alternative staging branch with ``pypy``: 45 | :: 46 | 47 | >>> sudo pypy -m pip install --upgrade https://github.com/jacobkrantz/Simple-HOHMM/archive/staging.zip 48 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | Simple-HOHMM documentation 2 | ========================== 3 | 4 | Simple-HOHMM is an end-to-end sequence classifier using Hidden Markov Models. Let the builder construct a model for you based on chosen model attributes. Now you can solve the classic problems of HMMs: evaluating, decoding, and learning. Play with different orders of history to maximize the accuracy of your model. 5 | 6 | This documentation is under development, but the tutorials are a good place to start. 7 | 8 | .. toctree:: 9 | :maxdepth: 2 10 | :caption: Topics 11 | 12 | getting_started 13 | tutorials 14 | api_reference 15 | references 16 | license 17 | -------------------------------------------------------------------------------- /docs/source/license.rst: -------------------------------------------------------------------------------- 1 | License 2 | ======= 3 | 4 | The MIT License (MIT) 5 | 6 | Copyright (c) 2017 Jacob Krantz 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 13 | -------------------------------------------------------------------------------- /docs/source/references.rst: -------------------------------------------------------------------------------- 1 | Implementation References 2 | ========================= 3 | 4 | [1] L. R. Rabiner, "A tutorial on hidden Markov models and selected applications in speech recognition," in Proceedings of the IEEE, vol. 77, no. 2, pp. 257-286, Feb 1989. 5 | doi: 10.1109/5.18626 6 | URL: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=18626&isnumber=698 7 | 8 | [2] Daniel Jurafsky & James H. Martin. (2016). 9 | *Speech and Language Processing*. Draft of August 7, 2017. 10 | URL: https://web.stanford.edu/~jurafsky/slp3/ 11 | 12 | [3] Du Preez, J.A., *Efficient high-order hidden Markov modelling.* 13 | PhD Dissertation, University of Stellenbosch, South Africa, 1998. 14 | URL: http://www.ussigbase.org/downloads/jadp_phd.pdf 15 | 16 | Web articles 17 | ------------ 18 | 19 | * https://en.wikipedia.org/wiki/Forward_algorithm 20 | * https://en.wikipedia.org/wiki/Forward%E2%80%93backward_algorithm 21 | * https://en.wikipedia.org/wiki/Viterbi_algorithm 22 | * https://en.wikipedia.org/wiki/Baum%E2%80%93Welch_algorithm 23 | -------------------------------------------------------------------------------- /docs/source/tutorials.rst: -------------------------------------------------------------------------------- 1 | Tutorials 2 | ========= 3 | 4 | The following tutorials are meant to give you a jump start in applying the tools of Simple-HOHMM. To see what model attributes are adjustable, view the API Reference. 5 | 6 | Supervised 7 | ---------- 8 | The following example is adapted from `Wikipedia `_. 9 | 10 | Suppose villagers are either healthy or have a fever. Fevers are diagnosed by the doctor asking patients how they feel (normal, dizzy, or cold). Assuming their health can be modeled by a discrete Markov chain, the observations are ``(normal, dizzy, cold)`` and the hidden states are ``(healthy, fever)``. The doctor has seen patients in the past, and kept that data. The observations are in one list and the states are in another such that ``states[i]`` corresponds to ``observations[i]``: 11 | :: 12 | 13 | observations = [ 14 | ['normal', 'cold', 'dizzy', 'dizzy','normal','normal'], 15 | ['cold', 'cold', 'dizzy', 'normal','normal','normal'], 16 | ['dizzy', 'dizzy', 'cold', 'normal', 'dizzy', 'normal'], 17 | ['normal', 'normal', 'cold', 'dizzy', 'dizzy', 'dizzy'] 18 | ] 19 | states = [ 20 | ['healthy', 'healthy', 'fever', 'fever', 'healthy', 'healthy'], 21 | ['healthy', 'fever', 'fever', 'healthy', 'healthy', 'fever'], 22 | ['fever', 'fever', 'fever', 'healthy', 'healthy', 'healthy'], 23 | ['healthy', 'healthy', 'healthy', 'fever', 'fever', 'fever'] 24 | ] 25 | 26 | We can now build a first order Hidden Markov Model based on the observations and states above: 27 | :: 28 | 29 | from SimpleHOHMM import HiddenMarkovModelBuilder as Builder 30 | builder = Builder() 31 | builder.add_batch_training_examples(observations, states) 32 | hmm = builder.build() 33 | 34 | Now suppose a patient has been seeing the doctor for three days and felt ``(normal, cold, dizzy)``. What might the doctor guess about this patient's health? This is solved with Viterbi decoding: 35 | :: 36 | 37 | obs = ['normal', 'cold', 'dizzy'] 38 | states = hmm.decode(obs) 39 | print(states) # prints: ['healthy', 'healthy', 'fever'] 40 | 41 | We can also determine the likelihood of a patient feeling ``(normal, cold, dizzy)``: 42 | :: 43 | 44 | obs = ['normal', 'cold', 'dizzy'] 45 | likelihood = hmm.evaluate(obs) 46 | print(likelihood) # prints: 0.0433770021525 47 | 48 | 49 | Semi-Supervised 50 | --------------- 51 | For this example, we will use the same ``observations`` and ``states`` as the Supervised example. 52 | Here we initialize our model just as before: 53 | :: 54 | 55 | from SimpleHOHMM import HiddenMarkovModelBuilder as Builder 56 | builder = Builder() 57 | builder.add_batch_training_examples(observations, states) 58 | hmm = builder.build() 59 | 60 | From here we can improve the model's training even further by exposing it to observations it has not seen before. Since we are using a small set, we will limit the learning process to one iteration instead of delta convergence by utilizing the ``iterations=1`` parameter. Also, we use ``k_smoothing=0.05`` to avoid cases of zero probability: 61 | :: 62 | 63 | sequences = [ 64 | ['normal', 'cold', 'dizzy','normal','normal'], 65 | ['normal', 'cold', 'normal','dizzy','normal'], 66 | ['dizzy', 'dizzy', 'dizzy','cold','normal'], 67 | ['dizzy', 'dizzy', 'normal','normal','normal'], 68 | ['cold', 'cold', 'dizzy','normal','normal'], 69 | ['normal', 'dizzy', 'dizzy','normal','cold'], 70 | ['normal', 'cold', 'dizzy', 'cold'], 71 | ['normal', 'cold', 'dizzy'] 72 | ] 73 | hmm.learn(sequences, k_smoothing=0.05, iterations=1) 74 | 75 | We now determine the updated likelihood and hidden state sequence. Notice that running hmm.learn() has increased the likelihood of our observation: 76 | :: 77 | 78 | obs = ['normal', 'cold', 'dizzy'] 79 | print(hmm.evaluate(obs)) # prints 0.052111435936 80 | print(hmm.decode(obs)) # prints ['healthy', 'fever', 'fever'] 81 | 82 | Unsupervised 83 | ------------ 84 | 85 | In fully unsupervised scenarios, we build and train a model with no prior training examples to draw from. The only data we supply to our model is the set of possible observations, the set of possible hidden states, and a collection of observation sequences to optimize for. 86 | 87 | We first gather the data to supply to our model: 88 | :: 89 | 90 | possible_observations = ['normal', 'healthy', 'dizzy'] 91 | possible_states = ['healthy', 'fever'] 92 | sequences = [ 93 | ['normal', 'cold', 'dizzy','normal','normal'], 94 | ['normal', 'cold', 'normal','dizzy','normal'], 95 | ['dizzy', 'dizzy', 'dizzy','cold','normal'], 96 | ['dizzy', 'dizzy', 'normal','normal','normal'], 97 | ['cold', 'cold', 'dizzy','normal','normal'], 98 | ['normal', 'dizzy', 'dizzy','normal','cold'], #start new here 99 | ['normal', 'cold', 'dizzy', 'dizzy','normal','normal'], 100 | ['dizzy', 'cold', 'dizzy', 'normal','normal','normal'], 101 | ['dizzy', 'cold', 'dizzy', 'normal','normal','normal'], 102 | ['normal', 'cold', 'dizzy', 'dizzy','cold','normal'], 103 | ['dizzy', 'dizzy', 'dizzy', 'dizzy', 'cold', 'cold'], 104 | ['cold', 'cold', 'cold', 'normal', 'dizzy', 'normal'], 105 | ['dizzy', 'normal', 'cold', 'cold', 'dizzy', 'dizzy'] 106 | ] 107 | 108 | There are two initial distributions to choose from, either ``uniform`` or ``random``. This selection applies to model parameters A, B, pi. In our case we will initialize with a random distribution: 109 | :: 110 | 111 | from SimpleHOHMM import HiddenMarkovModelBuilder as Builder 112 | builder = Builder() 113 | hmm = builder.build_unsupervised( 114 | single_states=possible_states, 115 | all_obs=possible_observations, 116 | distribution="random", 117 | highest_order=2 118 | ) 119 | 120 | We can view the initial model parameters, train our model using Baum-Welch EM, then again view our parameters to see how they have been modified: 121 | :: 122 | 123 | hmm.display_parameters() 124 | hmm.learn(sequences, k_smoothing=0.001) 125 | hmm.display_parameters() 126 | 127 | Results may be inconsistent due to the random initial distributions. You can play with different k_smoothing values, delta values, and sequence selection. Of course, train on prior examples where possible. 128 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | coverage==4.4.2 2 | Sphinx==1.6.5 3 | sphinx-rtd-theme==0.2.4 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import json 5 | import sys 6 | 7 | try: 8 | from setuptools.core import setup 9 | except ImportError: 10 | from distutils.core import setup 11 | 12 | with open('SimpleHOHMM/package_info.json') as f: 13 | _info = json.load(f) 14 | 15 | def setup_package(): 16 | needs_sphinx = {'build_sphinx', 'upload_docs'}.intersection(sys.argv) 17 | sphinx = ['sphinx'] if needs_sphinx else [] 18 | setup( 19 | setup_requires=sphinx, 20 | name='SimpleHOHMM', 21 | version=_info["version"], 22 | author=_info["author"], 23 | author_email=_info["author_email"], 24 | packages=['SimpleHOHMM'], 25 | package_data={'SimpleHOHMM': ['package_info.json']}, 26 | url='https://simple-hohmm.readthedocs.io', 27 | license='LICENSE.txt', 28 | description='High Order Hidden Markov Model for sequence classification', 29 | test_suite='test.test_suite', 30 | ) 31 | 32 | if __name__ == "__main__": 33 | setup_package() 34 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from .test_builder import TestHMMBuilder 3 | from .test_hmm import TestHMM 4 | 5 | def test_suite(): 6 | loader = unittest.TestLoader() 7 | 8 | test_classes_to_run = [TestHMMBuilder, TestHMM] 9 | suites_list = [] 10 | 11 | for test_class in test_classes_to_run: 12 | suite = loader.loadTestsFromTestCase(test_class) 13 | suites_list.append(suite) 14 | 15 | return unittest.TestSuite(suites_list) 16 | -------------------------------------------------------------------------------- /test/test_builder.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from SimpleHOHMM import HiddenMarkovModelBuilder as Builder 4 | 5 | class TestHMMBuilder(unittest.TestCase): 6 | 7 | def setUp(self): 8 | self._obs = [ 9 | ['normal', 'cold', 'dizzy', 'dizzy','normal','normal'], 10 | ['dizzy', 'cold', 'dizzy', 'normal','normal','normal'], 11 | ['dizzy', 'cold', 'dizzy', 'normal','normal','normal'], 12 | ['normal', 'cold', 'dizzy', 'dizzy','cold','normal'], 13 | ['dizzy', 'dizzy', 'dizzy', 'dizzy', 'cold', 'cold'], 14 | ['cold', 'cold', 'cold', 'normal', 'dizzy', 'normal'], 15 | ['dizzy', 'normal', 'cold', 'cold', 'dizzy', 'dizzy'] 16 | ] 17 | self._states = [ 18 | ['healthy', 'healthy', 'fever', 'fever', 'healthy', 'healthy'], 19 | ['fever', 'fever', 'fever', 'healthy', 'healthy', 'fever'], 20 | ['fever', 'fever', 'fever', 'healthy', 'healthy', 'fever'], 21 | ['healthy', 'healthy', 'fever', 'fever', 'fever', 'healthy'], 22 | ['fever', 'fever', 'fever', 'fever', 'fever', 'fever'], 23 | ['fever', 'fever', 'fever', 'healthy', 'fever', 'healthy'], 24 | ['fever', 'healthy', 'fever', 'fever', 'fever', 'fever'] 25 | ] 26 | 27 | def tearDown(self): 28 | self._obs = None 29 | self._states = None 30 | 31 | def test_build(self): 32 | builder = Builder() 33 | builder.add_training_example(self._obs[0], self._states[0]) 34 | builder.add_batch_training_examples(self._obs[1:], self._states[1:]) 35 | for do_synthesize in [True, False]: 36 | for order in range(1, 5): 37 | hmm = builder.build( 38 | highest_order=order, 39 | k_smoothing=.01, 40 | synthesize_states=do_synthesize 41 | ) 42 | self._test_parameters(hmm.get_parameters(), order) 43 | 44 | def test_build_synthesize(self): 45 | builder = Builder() 46 | builder.add_batch_training_examples(self._obs, self._states) 47 | hmm_synth = builder.build( 48 | highest_order=3, 49 | k_smoothing=.01, 50 | synthesize_states=True 51 | ) 52 | hmm_no_synth = builder.build( 53 | highest_order=3, 54 | k_smoothing=.01, 55 | synthesize_states=False 56 | ) 57 | params_synth = hmm_synth.get_parameters() 58 | params = hmm_no_synth.get_parameters() 59 | # there should be more possible starting states with params_synth 60 | self.assertGreater(len(params_synth["pi"][2]), len(params["pi"][2])) 61 | # there should be more possible state transitions with params_synth 62 | self.assertGreater(len(params_synth["A"]), len(params["A"])) 63 | self.assertEqual(len(params_synth["B"]), len(params["B"])) 64 | 65 | def test_set_states_before_build(self): 66 | builder = Builder() 67 | builder.add_batch_training_examples(self._obs, self._states) 68 | builder.set_all_obs(['normal', 'cold', 'dizzy']) 69 | builder.set_single_states(['fever', 'healthy', 'blah']) 70 | hmm = builder.build( 71 | highest_order=2, 72 | k_smoothing=.01, 73 | synthesize_states=False 74 | ) 75 | hmm2 = builder.build( 76 | highest_order=2, 77 | k_smoothing=.01, 78 | synthesize_states=True 79 | ) 80 | self._test_parameters(hmm.get_parameters(), 2) 81 | self.assertEqual(hmm.get_parameters(), hmm2.get_parameters()) 82 | 83 | def test_build_uniform(self): 84 | builder = Builder() 85 | builder.set_all_obs(['normal', 'cold', 'dizzy']) 86 | builder.set_single_states(['healthy', 'fever']) 87 | uniform_hmm = builder.build_unsupervised(distribution="uniform") 88 | uniform_hmm_2 = builder.build_unsupervised(distribution="uniform") 89 | self.assertEqual( 90 | uniform_hmm.get_parameters(), 91 | uniform_hmm_2.get_parameters() 92 | ) 93 | 94 | params = uniform_hmm.get_parameters() 95 | self.assertEqual(len(set(params["pi"][0].values())), 1) 96 | for row in params["A"]: 97 | self.assertEqual(len(set(row)), 1) 98 | self.assertAlmostEqual(sum(row), 1) 99 | for row in params["B"]: 100 | self.assertEqual(len(set(row)), 1) 101 | self.assertAlmostEqual(sum(row), 1) 102 | 103 | def test_build_random(self): 104 | builder = Builder() 105 | builder.set_all_obs(['normal', 'cold', 'dizzy']) 106 | builder.set_single_states(['healthy', 'fever']) 107 | random_hmm = builder.build_unsupervised(distribution="random") 108 | random_hmm_2 = builder.build_unsupervised(distribution="random") 109 | self.assertNotEqual( # ignore small chance they could be the same 110 | random_hmm.get_parameters(), 111 | random_hmm_2.get_parameters() 112 | ) 113 | 114 | params = random_hmm.get_parameters() 115 | self.assertAlmostEqual(sum(params["pi"][0].values()), 1) 116 | self.assertGreater(len(params["A"]), 1) 117 | for row in params["A"]: 118 | self.assertGreater(len(row), 1) 119 | self.assertAlmostEqual(sum(row), 1) 120 | 121 | self.assertGreater(len(params["B"]), 1) 122 | for row in params["B"]: 123 | self.assertGreater(len(row), 1) 124 | self.assertAlmostEqual(sum(row), 1) 125 | 126 | def _test_parameters(self, params, order): 127 | for value in params.values(): 128 | self.assertIsNotNone(value) 129 | for i in range(order): 130 | self.assertAlmostEqual(sum(params["pi"][i].values()), 1) 131 | self.assertLessEqual( 132 | len(params["single_states"]), 133 | len(params["all_states"]) 134 | ) 135 | if(order > 1): 136 | return 137 | 138 | for i in range(2): 139 | self.assertAlmostEqual(sum(params["A"][i]), 1) 140 | for i in range(2): 141 | self.assertAlmostEqual(sum(params["B"][i]), 1) 142 | 143 | def test_start_probs_parameters(self): 144 | # test for when include_pi = false (all entries should be 1) 145 | builder = Builder() 146 | builder.add_batch_training_examples(self._obs, self._states) 147 | for order in range(1, 3): 148 | hmm = builder.build( 149 | highest_order=order, 150 | k_smoothing=.01, 151 | synthesize_states=True, 152 | include_pi=False 153 | ) 154 | params = hmm.get_parameters() 155 | pi = params["pi"] 156 | for i in range(order): 157 | [self.assertEqual(v, 1) for v in pi[i].values()] 158 | 159 | def test_clear_all_sets(self): 160 | builder = Builder() 161 | builder.add_training_example(self._obs[0], self._states[0]) 162 | builder.clear_all_sets() 163 | self.assertEqual(len(builder._obs_sequences), 0) 164 | self.assertEqual(len(builder._state_sequences), 0) 165 | self.assertIsNone(builder._single_states) 166 | self.assertIsNone(builder._all_obs) 167 | -------------------------------------------------------------------------------- /test/test_hmm.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from SimpleHOHMM import HiddenMarkovModel as HMM 4 | 5 | class TestHMM(unittest.TestCase): 6 | 7 | def setUp(self): 8 | all_observations = ['normal', 'cold', 'dizzy'] 9 | all_states = ['healthy', 'fever'] 10 | start_probs = [{"healthy": 0.6, "fever": 0.4}] 11 | 12 | trans_probs = [ 13 | [0.7, 0.3], 14 | [0.4, 0.6] 15 | ] 16 | 17 | emission_probs = [ 18 | [0.5, 0.4, 0.1], 19 | [0.1, 0.3, 0.6] 20 | ] 21 | 22 | self._hmm = HMM( 23 | A=trans_probs, 24 | B=emission_probs, 25 | pi=start_probs, 26 | all_obs=all_observations, 27 | all_states=all_states 28 | ) 29 | self._sequence = ['normal', 'cold', 'dizzy', 'dizzy','cold','normal'] 30 | 31 | def tearDown(self): 32 | self._hmm = None 33 | self._sequence = None 34 | 35 | def test_hmm_evaluate(self): 36 | eval = self._hmm.evaluate(self._sequence) 37 | self.assertGreater(eval, 0) 38 | self.assertLess(eval, 1) 39 | 40 | def test_hmm_decode(self): 41 | decoded = self._hmm.decode(self._sequence) 42 | self.assertEqual(len(decoded), len(self._sequence)) 43 | for state in decoded: 44 | self.assertFalse(state in self._sequence) 45 | 46 | def test_hmm_high_order(self): 47 | pi = [{ 48 | 'healthy': 0.2863247863247863, 49 | 'fever': 0.7136752136752137 50 | }, { 51 | 'healthy-healthy': 0.2855113636363636, 52 | 'fever-fever': 0.5696022727272727, 53 | 'healthy-fever': 0.0014204545454545455, 54 | 'fever-healthy': 0.1434659090909091 55 | }] 56 | A = [[ 57 | 0.0024752475247524753, 0.9925742574257426, 58 | 0.0024752475247524753, 0.0024752475247524753 59 | ],[ 60 | 0.0024752475247524753, 0.0024752475247524753, 61 | 0.25, 0.745049504950495 62 | ],[ 63 | 0.5972222222222222, 0.3988095238095238, 64 | 0.001984126984126984, 0.001984126984126984 65 | ],[ 66 | 0.0006648936170212767, 0.0006648936170212767, 67 | 0.33311170212765956, 0.6655585106382979 68 | ]] 69 | B = [ 70 | [0.0007127583749109052, 0.8560228082679971, 0.14326443335709194], 71 | [0.5711737424188371, 0.07170888333927934, 0.3571173742418837] 72 | ] 73 | all_states = [ 74 | 'healthy-healthy', 'healthy-fever', 75 | 'fever-healthy', 'fever-fever' 76 | ] 77 | hmm = HMM( 78 | A=A, 79 | B=B, 80 | pi=pi, 81 | all_obs=['normal', 'cold', 'dizzy'], 82 | all_states=all_states, 83 | single_states=['healthy', 'fever'], 84 | order=2 85 | ) 86 | self.assertEqual(len(hmm.decode(self._sequence)), len(self._sequence)) 87 | 88 | def test_hmm_learn(self): 89 | sequences = [ 90 | ['normal', 'cold', 'dizzy','normal','normal'], 91 | ['normal', 'cold', 'normal','dizzy','normal'], 92 | ['dizzy', 'dizzy', 'dizzy','cold','normal'], 93 | ['dizzy', 'dizzy', 'normal','normal','normal'], 94 | ['cold', 'cold', 'dizzy','normal','normal'], 95 | ['normal', 'dizzy', 'dizzy','normal','cold'], 96 | ] 97 | num_iterations = self._hmm.learn(sequences, k_smoothing=0.005) 98 | self.assertGreater(num_iterations, 0) 99 | self.test_hmm_evaluate() 100 | self.test_hmm_decode() 101 | -------------------------------------------------------------------------------- /test/test_utility.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobkrantz/Simple-HOHMM/73d0da85e2e06c7ec7683b2e28079fbf6991580e/test/test_utility.py --------------------------------------------------------------------------------