├── .gitignore ├── .travis.yml ├── CHANGELOG.md ├── LICENSE ├── README.md ├── doc ├── covariance.pdf └── source │ ├── class_ref.rst │ ├── conf.py │ ├── discounting.rst │ ├── example.rst │ ├── example1.rst │ ├── img │ ├── example_plot_a.png │ ├── example_plot_all.png │ ├── example_plot_b.png │ ├── intro_discount_09.png │ ├── intro_discount_1.png │ ├── intro_plot_all.png │ ├── intro_plot_comp_mean.png │ ├── intro_plot_comp_mean_ar3.png │ ├── intro_plot_in_1.png │ ├── intro_plot_state.png │ ├── intro_plot_wo_smooth.png │ ├── readmePlot1.png │ ├── readmePlot2.png │ ├── readmePlot3.png │ ├── readmePlot4.png │ ├── readmePlot5.png │ ├── unemployment_1.png │ ├── unemployment_10-1.png │ ├── unemployment_10-2.png │ ├── unemployment_10.png │ ├── unemployment_11.png │ ├── unemployment_12.png │ ├── unemployment_2.png │ ├── unemployment_3-1.png │ ├── unemployment_3-2.png │ ├── unemployment_3.png │ ├── unemployment_4-1.png │ ├── unemployment_4-2.png │ ├── unemployment_4.png │ ├── unemployment_5.png │ ├── unemployment_6.png │ ├── unemployment_7.png │ ├── unemployment_8-1.png │ ├── unemployment_8-2.png │ ├── unemployment_8.png │ ├── unemployment_9-1.png │ ├── unemployment_9-2.png │ └── unemployment_9.png │ ├── index.rst │ ├── installation.rst │ ├── pydlm_user_guide.rst │ └── simple_example.rst ├── examples └── unemployment_insurance │ ├── data.csv │ └── main.py ├── pydlm_lite ├── __init__.py ├── base │ ├── __init__.py │ ├── baseModel.py │ ├── kalmanFilter.py │ └── tools.py ├── dlm.py ├── func │ ├── __init__.py │ └── _dlm.py ├── modeler │ ├── __init__.py │ ├── autoReg.py │ ├── builder.py │ ├── component.py │ ├── dynamic.py │ ├── longSeason.py │ ├── matrixTools.py │ ├── seasonality.py │ └── trends.py ├── tests │ ├── __init__.py │ ├── base │ │ ├── __init__.py │ │ └── testKalmanFilter.py │ ├── func │ │ ├── __init__.py │ │ └── test_dlm.py │ ├── modeler │ │ ├── __init__.py │ │ ├── testAutoReg.py │ │ ├── testBuilder.py │ │ ├── testDynamic.py │ │ ├── testLongSeason.py │ │ ├── testSeasonality.py │ │ └── testTrends.py │ ├── testDlm.py │ └── tuner │ │ └── testDlmTuner.py └── tuner │ ├── __init__.py │ └── dlmTuner.py ├── setup.cfg └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # ignore all .py~ files 2 | changelog~ 3 | *.md~ 4 | *.yml~ 5 | *.py~ 6 | *.gitignore~ 7 | *.pyc 8 | *.py# 9 | *_flymake.py 10 | .#* 11 | *.rst~ 12 | build/ 13 | *.sh 14 | *.sh~ 15 | *.csv~ 16 | dist/ 17 | pydlm.egg-info/ -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "2.7" 4 | - "3.4" 5 | - "3.5" 6 | - "3.5-dev" # 3.5 development branch 7 | - "3.6" 8 | - "3.6-dev" # 3.6 development branch 9 | #- "3.7-dev" # 3.7 development branch 10 | #- "nightly" # currently points to 3.7-dev 11 | # command to install dependencies. 12 | install: 13 | - python setup.py install 14 | - pip install coveralls 15 | script: 16 | - python -m unittest discover 17 | - coverage run --source . -m unittest discover 18 | after_success: coveralls 19 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | Changelog 2 | ---------------- 3 | 4 | updates in 0.1.1.1 5 | 6 | * Branched from `pydlm` 7 | * Removed plotting function and all dependencies on `matplotlib` 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017, Xiangyu Wang (wwrechard@gmail.com) 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of the nor the 12 | names of its contributors may be used to endorse or promote products 13 | derived from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDER BE LIABLE FOR ANY 19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [PyDLM-Lite](https://pydlm.github.io/) [![Build Status](https://travis-ci.org/wwrechard/pydlm-lite.svg?branch=master)](https://travis-ci.org/wwrechard/pydlm) [![Coverage Status](https://coveralls.io/repos/github/wwrechard/pydlm-lite/badge.svg?branch=master)](https://coveralls.io/github/wwrechard/pydlm-lite?branch=master) 2 | ======================================================= 3 | 4 | The lite version of the main `pydlm` package where the plotting functionality and the dependency on the `matplotlib` has been removed. Most refactoring work will be conducted on this package to improve the class on multi-threading and online learning. In the meanwhile, the main `pydlm` package will remain in its current structure for future development. 5 | 6 | Going forward, the two packages will be developed under two different principles: 7 | 8 | 1. `pydlm` will support more sophisticated models and more advanced algorithm such as sequential monte carlo. The algorithm will be optimized in terms of accuracy rather than latency. The primary use case is on advanced inference and data analysis with small datasets. 9 | 10 | 2. `pydlm-lite` will mainly focus on normal-normal and poisson-gamma models with the fastest possible fitting algorithm. The class design will support concurrency and online updating. The primary use case is scalable anomaly detection and forecasting with millions of time series. 11 | 12 | Updates in the github version 13 | ------------------------------------------- 14 | * Plan to refactor the `dlm` class. Align with the goal to separate model and data, I'm going to refactor the `dlm` class such that 15 | 1. The main `dlm` class will only contain model build information and is supposed to be 'const' after construction. 16 | 2. Time series data will be passed in as an argument to the `fit` or `forwardFilter` and the fitted result will be returned as well as the model status. 17 | 3. Model status can also be passed into `fit` and `forwardFilter` as a prior. 18 | The goal is to make the `dlm` class state-independent, so that the class is thread-safe and can be shared by multiple threads for parallel processing. While in progress, all the old class behavior will be kept. 19 | 20 | Documentation 21 | ------------- 22 | Detailed documentation is provided in [PyDLM](https://pydlm.github.io/) with special attention to the [User manual](https://pydlm.github.io/#dynamic-linear-models-user-manual). 23 | -------------------------------------------------------------------------------- /doc/covariance.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/covariance.pdf -------------------------------------------------------------------------------- /doc/source/class_ref.rst: -------------------------------------------------------------------------------- 1 | .. py:currentmodule:: pydlm 2 | 3 | Class Reference 4 | =============== 5 | 6 | :class:`dlm` 7 | ------------ 8 | 9 | .. autoclass:: pydlm.dlm 10 | :members: 11 | 12 | :class:`trend` 13 | -------------- 14 | 15 | .. autoclass:: pydlm.trend 16 | :members: 17 | 18 | :class:`seasonality` 19 | -------------------- 20 | 21 | .. autoclass:: pydlm.seasonality 22 | :members: 23 | 24 | :class:`dynamic` 25 | ---------------- 26 | 27 | .. autoclass:: pydlm.dynamic 28 | :members: 29 | 30 | 31 | :class:`autoReg` 32 | ---------------- 33 | 34 | .. autoclass:: pydlm.autoReg 35 | :members: 36 | 37 | :class:`longSeason` 38 | ------------------- 39 | 40 | .. autoclass:: pydlm.longSeason 41 | :members: 42 | 43 | :class:`modelTuner` 44 | ------------------- 45 | 46 | .. autoclass:: pydlm.modelTuner 47 | :members: 48 | -------------------------------------------------------------------------------- /doc/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # PyDLM documentation build configuration file, created by 4 | # sphinx-quickstart on Fri Sep 9 23:34:57 2016. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | # If extensions (or modules to document with autodoc) are in another directory, 16 | # add these directories to sys.path here. If the directory is relative to the 17 | # documentation root, use os.path.abspath to make it absolute, like shown here. 18 | # 19 | # import os 20 | import sys 21 | # sys.path.insert(0, os.path.abspath('.')) 22 | sys.path.insert(0, '/Users/samuel/Documents/Github/PyDLM') 23 | 24 | # -- General configuration ------------------------------------------------ 25 | 26 | # If your documentation needs a minimal Sphinx version, state it here. 27 | # 28 | # needs_sphinx = '1.0' 29 | import sphinx_rtd_theme 30 | 31 | html_theme = "sphinx_rtd_theme" 32 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 33 | #collapse_navigation = True 34 | 35 | # Add any Sphinx extension module names here, as strings. They can be 36 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 37 | # ones. 38 | extensions = [ 39 | 'sphinx.ext.autodoc', 40 | 'sphinx.ext.mathjax', 41 | 'sphinx.ext.githubpages', 42 | 'sphinx.ext.napoleon', 43 | ] 44 | 45 | # Add any paths that contain templates here, relative to this directory. 46 | templates_path = ['_templates'] 47 | 48 | # The suffix(es) of source filenames. 49 | # You can specify multiple suffix as a list of string: 50 | # 51 | # source_suffix = ['.rst', '.md'] 52 | source_suffix = '.rst' 53 | 54 | # The encoding of source files. 55 | # 56 | # source_encoding = 'utf-8-sig' 57 | 58 | # The master toctree document. 59 | master_doc = 'index' 60 | 61 | # General information about the project. 62 | project = u'PyDLM' 63 | copyright = u'2016, Xiangyu Wang' 64 | author = u'Xiangyu Wang' 65 | 66 | # The version info for the project you're documenting, acts as replacement for 67 | # |version| and |release|, also used in various other places throughout the 68 | # built documents. 69 | # 70 | # The short X.Y version. 71 | version = u'0.1.1' 72 | # The full version, including alpha/beta/rc tags. 73 | release = u'0.1.1' 74 | 75 | # The language for content autogenerated by Sphinx. Refer to documentation 76 | # for a list of supported languages. 77 | # 78 | # This is also used if you do content translation via gettext catalogs. 79 | # Usually you set "language" from the command line for these cases. 80 | language = None 81 | 82 | # There are two options for replacing |today|: either, you set today to some 83 | # non-false value, then it is used: 84 | # 85 | # today = '' 86 | # 87 | # Else, today_fmt is used as the format for a strftime call. 88 | # 89 | # today_fmt = '%B %d, %Y' 90 | 91 | # List of patterns, relative to source directory, that match files and 92 | # directories to ignore when looking for source files. 93 | # This patterns also effect to html_static_path and html_extra_path 94 | exclude_patterns = [] 95 | 96 | # The reST default role (used for this markup: `text`) to use for all 97 | # documents. 98 | # 99 | # default_role = None 100 | 101 | # If true, '()' will be appended to :func: etc. cross-reference text. 102 | # 103 | add_function_parentheses = True 104 | 105 | # If true, the current module name will be prepended to all description 106 | # unit titles (such as .. function::). 107 | # 108 | # add_module_names = True 109 | 110 | # If true, sectionauthor and moduleauthor directives will be shown in the 111 | # output. They are ignored by default. 112 | # 113 | # show_authors = False 114 | 115 | # The name of the Pygments (syntax highlighting) style to use. 116 | pygments_style = 'sphinx' 117 | 118 | # A list of ignored prefixes for module index sorting. 119 | # modindex_common_prefix = [] 120 | 121 | # If true, keep warnings as "system message" paragraphs in the built documents. 122 | # keep_warnings = False 123 | 124 | # If true, `todo` and `todoList` produce output, else they produce nothing. 125 | todo_include_todos = False 126 | 127 | 128 | # -- Options for HTML output ---------------------------------------------- 129 | 130 | # The theme to use for HTML and HTML Help pages. See the documentation for 131 | # a list of builtin themes. 132 | # 133 | # html_theme = 'pyramid' 134 | 135 | # Theme options are theme-specific and customize the look and feel of a theme 136 | # further. For a list of options available for each theme, see the 137 | # documentation. 138 | # 139 | # html_theme_options = {} 140 | 141 | # Add any paths that contain custom themes here, relative to this directory. 142 | # html_theme_path = [] 143 | 144 | # The name for this set of Sphinx documents. 145 | # " v documentation" by default. 146 | # 147 | # html_title = u'PyDLM v0.1.0' 148 | 149 | # A shorter title for the navigation bar. Default is the same as html_title. 150 | # 151 | # html_short_title = None 152 | 153 | # The name of an image file (relative to this directory) to place at the top 154 | # of the sidebar. 155 | # 156 | # html_logo = None 157 | 158 | # The name of an image file (relative to this directory) to use as a favicon of 159 | # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 160 | # pixels large. 161 | # 162 | # html_favicon = None 163 | 164 | # Add any paths that contain custom static files (such as style sheets) here, 165 | # relative to this directory. They are copied after the builtin static files, 166 | # so a file named "default.css" will overwrite the builtin "default.css". 167 | html_static_path = ['_static'] 168 | 169 | # Add any extra paths that contain custom files (such as robots.txt or 170 | # .htaccess) here, relative to this directory. These files are copied 171 | # directly to the root of the documentation. 172 | # 173 | # html_extra_path = [] 174 | 175 | # If not None, a 'Last updated on:' timestamp is inserted at every page 176 | # bottom, using the given strftime format. 177 | # The empty string is equivalent to '%b %d, %Y'. 178 | # 179 | # html_last_updated_fmt = None 180 | 181 | # If true, SmartyPants will be used to convert quotes and dashes to 182 | # typographically correct entities. 183 | # 184 | # html_use_smartypants = True 185 | 186 | # Custom sidebar templates, maps document names to template names. 187 | # 188 | # html_sidebars = {} 189 | 190 | # Additional templates that should be rendered to pages, maps page names to 191 | # template names. 192 | # 193 | # html_additional_pages = {} 194 | 195 | # If false, no module index is generated. 196 | # 197 | # html_domain_indices = True 198 | 199 | # If false, no index is generated. 200 | # 201 | # html_use_index = True 202 | 203 | # If true, the index is split into individual pages for each letter. 204 | # 205 | # html_split_index = False 206 | 207 | # If true, links to the reST sources are added to the pages. 208 | # 209 | # html_show_sourcelink = True 210 | 211 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 212 | # 213 | # html_show_sphinx = True 214 | 215 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 216 | # 217 | # html_show_copyright = True 218 | 219 | # If true, an OpenSearch description file will be output, and all pages will 220 | # contain a tag referring to it. The value of this option must be the 221 | # base URL from which the finished HTML is served. 222 | # 223 | # html_use_opensearch = '' 224 | 225 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 226 | # html_file_suffix = None 227 | 228 | # Language to be used for generating the HTML full-text search index. 229 | # Sphinx supports the following languages: 230 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' 231 | # 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh' 232 | # 233 | # html_search_language = 'en' 234 | 235 | # A dictionary with options for the search language support, empty by default. 236 | # 'ja' uses this config value. 237 | # 'zh' user can custom change `jieba` dictionary path. 238 | # 239 | # html_search_options = {'type': 'default'} 240 | 241 | # The name of a javascript file (relative to the configuration directory) that 242 | # implements a search results scorer. If empty, the default will be used. 243 | # 244 | # html_search_scorer = 'scorer.js' 245 | 246 | # Output file base name for HTML help builder. 247 | htmlhelp_basename = 'PyDLMdoc' 248 | 249 | # -- Options for LaTeX output --------------------------------------------- 250 | 251 | latex_elements = { 252 | # The paper size ('letterpaper' or 'a4paper'). 253 | # 254 | # 'papersize': 'letterpaper', 255 | 256 | # The font size ('10pt', '11pt' or '12pt'). 257 | # 258 | # 'pointsize': '10pt', 259 | 260 | # Additional stuff for the LaTeX preamble. 261 | # 262 | # 'preamble': '', 263 | 264 | # Latex figure (float) alignment 265 | # 266 | # 'figure_align': 'htbp', 267 | } 268 | 269 | # Grouping the document tree into LaTeX files. List of tuples 270 | # (source start file, target name, title, 271 | # author, documentclass [howto, manual, or own class]). 272 | latex_documents = [ 273 | (master_doc, 'PyDLM.tex', u'PyDLM Documentation', 274 | u'Xiangyu Wang', 'manual'), 275 | ] 276 | 277 | # The name of an image file (relative to this directory) to place at the top of 278 | # the title page. 279 | # 280 | # latex_logo = None 281 | 282 | # For "manual" documents, if this is true, then toplevel headings are parts, 283 | # not chapters. 284 | # 285 | # latex_use_parts = False 286 | 287 | # If true, show page references after internal links. 288 | # 289 | # latex_show_pagerefs = False 290 | 291 | # If true, show URL addresses after external links. 292 | # 293 | # latex_show_urls = False 294 | 295 | # Documents to append as an appendix to all manuals. 296 | # 297 | # latex_appendices = [] 298 | 299 | # It false, will not define \strong, \code, itleref, \crossref ... but only 300 | # \sphinxstrong, ..., \sphinxtitleref, ... To help avoid clash with user added 301 | # packages. 302 | # 303 | # latex_keep_old_macro_names = True 304 | 305 | # If false, no module index is generated. 306 | # 307 | # latex_domain_indices = True 308 | 309 | 310 | # -- Options for manual page output --------------------------------------- 311 | 312 | # One entry per manual page. List of tuples 313 | # (source start file, name, description, authors, manual section). 314 | man_pages = [ 315 | (master_doc, 'pydlm', u'PyDLM Documentation', 316 | [author], 1) 317 | ] 318 | 319 | # If true, show URL addresses after external links. 320 | # 321 | # man_show_urls = False 322 | 323 | 324 | # -- Options for Texinfo output ------------------------------------------- 325 | 326 | # Grouping the document tree into Texinfo files. List of tuples 327 | # (source start file, target name, title, author, 328 | # dir menu entry, description, category) 329 | texinfo_documents = [ 330 | (master_doc, 'PyDLM', u'PyDLM Documentation', 331 | author, 'PyDLM', 'One line description of project.', 332 | 'Miscellaneous'), 333 | ] 334 | 335 | # Documents to append as an appendix to all manuals. 336 | # 337 | # texinfo_appendices = [] 338 | 339 | # If false, no module index is generated. 340 | # 341 | # texinfo_domain_indices = True 342 | 343 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 344 | # 345 | # texinfo_show_urls = 'footnote' 346 | 347 | # If true, do not generate a @detailmenu in the "Top" node's menu. 348 | # 349 | # texinfo_no_detailmenu = False 350 | -------------------------------------------------------------------------------- /doc/source/discounting.rst: -------------------------------------------------------------------------------- 1 | .. py:currentmodule:: pydlm 2 | 3 | The discouting factor 4 | ===================== 5 | 6 | The discounting factor is a technique introduced in Harrison and 7 | West (1999) to avoid estimating the two tuning parameters in the usual 8 | kalman filter. The basic idea is as follows. 9 | 10 | Kalman filter assumes the same innovation for every step, 11 | which is both hard to estimate and unnecessary. When we think of the 12 | role of an innovation matrix, it is just to dilute the past 13 | information and provide more uncertainty when new data comes in. Thus, 14 | why not let the innovation depends on the posterior covariance of 15 | the current state, for example, let the innovation be 1% of the 16 | current posterior variance? 17 | 18 | More precisely, suppose the prior distribution of the latent state 19 | today is N(0, 1). After observed today's data, the posterior 20 | distribution of the latent state becomes N(0, 0.8). Now to forecast 21 | into tomorrow, we would like to use today's posterior to be the tomorrow's 22 | prior distribution, with innovation added. So it is natural to relate 23 | the innovation to the current posterior distribution. For instance, we 24 | would like the innovation to be 1% of the current posterior, which 25 | will be N(0, 0.08), resulting in a prior for tomorrow as N(0, 0.88). 26 | 27 | Another way to understand the discounting factor is to think of the 28 | information pass through. When the innovation (noise) are added, the 29 | information of the past are diluted. Therefore, discounting factor 30 | stands for the percentage of how much information will be passed 31 | towards future. In the previous example, only 99% information is 32 | passed to the new next day. 33 | 34 | It is obvious that if 100% of information are carried over 35 | to the future, the fitted model will be very stable, as the new data 36 | is just a tiny portion compared to the existing information. The chain 37 | will not moved much even an extreme point appears. By contrast, when 38 | the discounting factor is small, say, only 90% information are passed 39 | through every day, then any new data will account for 10% of the 40 | performance of the time series for a given date, thus the model will 41 | adapt to any new data or change point fairly rapid. 42 | 43 | To set the discounting factor is simple in `pydlm`. Discounting 44 | factors are assigned within each component, i.e., different components 45 | can have different discounting factors:: 46 | 47 | from pydlm import dlm, trend, seasonality 48 | data = [0] * 100 + [3] * 100 49 | myDLM = dlm(data) + trend(2, discount = 0.9) + seasonality(7, 50 | discount = 0.99) 51 | 52 | 53 | Numerical stability and the renewal strategy 54 | -------------------------------------------- 55 | 56 | One big caveat for using discounting factor is its numerical 57 | instability. The original kalman filter already suffers from the 58 | numerical issue due to the matrix multiplication and 59 | subtraction. Discounting factor adds another layer to that by multiply 60 | a scalar which is greater than 1. Usually, for models with big 61 | discounting factor, say 0.99, its performance is close to the usual 62 | kalman filter. However, for models with small discounting factor such 63 | as 0.8, the numerical performance would be a nightmare. This is easy 64 | to understand, as every step we are losing 20% information which could 65 | means one-digit loss or so. Therefore, the fitting usually dies after around 50 66 | steps fitting. 67 | 68 | To amelioerate this issue, I come up with this `renewal strategy`. Notice that for 69 | a model with a discounting factor of 0.8, fitting the whole time 70 | series is meaningless. For data that is 40-day old, its impact on the 71 | current state is less than 0.1%. Thus, if we disgard the whole time 72 | series except for the last 40 days, the latent states will at most be 73 | affected by 0.1%. This motives the following strategy: For small 74 | discounting model, we set a acceptable threshold, say 0.1%, to compute 75 | the corresponding renewal length. In this case, the renewal length is 76 | about 40 days. In the actual model fitting, we then refit the model state 77 | using only the past 40 days after every 40 steps. Following is a 78 | simple illutration. 79 | 80 | +---------------+---------------------------+ 81 | |day1 - day39 | regular kalman filter | 82 | +---------------+---------------------------+ 83 | |day40 | refit using day1 - day39 | 84 | +---------------+---------------------------+ 85 | |day41 - day79 | regular kalman filter | 86 | +---------------+---------------------------+ 87 | |day80 | refit using day41 - day79 | 88 | +---------------+---------------------------+ 89 | 90 | With this `renewal strategy`, the longest length the discounting model 91 | needs to fit is limited by the twice of the renewal length and thus 92 | suppress the numerical issue. 93 | 94 | -------------------------------------------------------------------------------- /doc/source/example.rst: -------------------------------------------------------------------------------- 1 | .. py:currentmodule:: pydlm 2 | 3 | Examples 4 | ================ 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | 9 | simple_example 10 | example1 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /doc/source/example1.rst: -------------------------------------------------------------------------------- 1 | .. py:currentmodule:: pydlm 2 | 3 | Google data science post example 4 | ================================ 5 | 6 | We use the example from the `Google data science post `_ to showcase `pydlm`. The code and data is placed under `examples/unemployment_insurance/...`. The data is the weekly counts of initial claims for unemployment during 2004 - 2012 and is available from the R package `bsts` (which is a popular R package for time series modeling). The raw data is shown below (left) 7 | 8 | .. image:: ./img/unemployment_2.png 9 | :width: 48% 10 | .. image:: ./img/unemployment_1.png 11 | :width: 48% 12 | 13 | We can see strong annual pattern and some local trend from the data. 14 | 15 | A simple model 16 | -------------- 17 | Following the post, we first build a simple model with only local 18 | linear :class:`trend` and :class:`seasonality` component:: 19 | 20 | from pydlm import dlm, trend, seasonality 21 | # A linear trend 22 | linear_trend = trend(degree=1, discount=0.95, name='linear_trend', w=10) 23 | # A seasonality 24 | seasonal52 = seasonality(period=52, discount=0.99, name='seasonal52', w=10) 25 | # Build a simple dlm 26 | simple_dlm = dlm(time_series) + linear_trend + seasonal52 27 | 28 | In the actual code, the time series data is scored in the variable 29 | `time_series`. `degree=1` indicates the trend is a linear (2 stands 30 | for quadratic) and `period=52` means the seasonality has a periodicy 31 | of 52. Since the seasonality is generally more stable, we set its 32 | discount factor to 0.99. For local linear trend, we use 0.95 to allow 33 | for some flexibility. `w=10` is the prior guess on the variance of 34 | each component, the larger number the more uncertain. For actual 35 | meaning of these parameters, please refer to the `user manual `_. After the model built, we can fit the model and plot the result (shown above, right figure):: 36 | 37 | # Fit the model 38 | simple_dlm.fit() 39 | # Plot the fitted results 40 | simple_dlm.turnOff('data points') 41 | simple_dlm.plot() 42 | 43 | The blue curve is the forward filtering result, the green curve is the one-day ahead prediction and the red curve is the backward smoothed result. The light-colored ribbon around the curve is the confidence interval (you might need to zoom-in to see it). The one-day ahead prediction shows this simple model captures the time series somewhat good but loses accuracy around the peak crisis at Week 280 (which is between year 2008 - 2009). The one-day-ahead mean squared prediction error is **0.173** which can be obtaied by calling:: 44 | 45 | simple_dlm.getMSE() 46 | 47 | We can decompose the time series into each of its components:: 48 | 49 | # Plot each component (attribute the time series to each component) 50 | simple_dlm.turnOff('predict plot') 51 | simple_dlm.turnOff('filtered plot') 52 | simple_dlm.plot('linear_trend') 53 | simple_dlm.plot('seasonal52') 54 | 55 | .. image:: ./img/unemployment_3-1.png 56 | :width: 49% 57 | .. image:: ./img/unemployment_4-1.png 58 | :width: 49% 59 | 60 | Most of the time series shape is attributed to the local linear trend 61 | and the strong seasonality pattern is easily seen. To further verify 62 | the performance, we use this simple model for long-term 63 | forecasting. In particular, we use the previous **351 week** 's data to 64 | forecast the next **200 weeks** and the previous **251 week** 's data 65 | to forecast the next **200 weeks**. We lay the predicted results on 66 | top of the real data:: 67 | 68 | # Plot the prediction give the first 351 weeks and forcast the next 200 weeks. 69 | simple_dlm.plotPredictN(date=350, N=200) 70 | # Plot the prediction give the first 251 weeks and forcast the next 200 weeks. 71 | simple_dlm.plotPredictN(date=250, N=200) 72 | 73 | .. image:: ./img/unemployment_5.png 74 | :width: 49% 75 | .. image:: ./img/unemployment_6.png 76 | :width: 49% 77 | 78 | From the figure we see that after the crisis peak around 2008 - 2009 (Week 280), the simple model can accurately forecast the next 200 weeks (left figure) given the first 351 weeks. However, the model fails to capture the change near the peak if the forecasting start before Week 280 (right figure). 79 | 80 | Dynamic linear regression 81 | ------------------------- 82 | Now we build a more sophiscated model with extra variables in the data 83 | file. The extra variables are stored in the variable `features` in the 84 | actual code. To build the dynamic linear regression model, we simply 85 | add a new component:: 86 | 87 | # Build a dynamic regression model 88 | from pydlm import dynamic 89 | regressor10 = dynamic(features=features, discount=1.0, name='regressor10', w=10) 90 | drm = dlm(time_series) + linear_trend + seasonal52 + regressor10 91 | drm.fit() 92 | drm.getMSE() 93 | 94 | # Plot the fitted results 95 | drm.turnOff('data points') 96 | drm.plot() 97 | 98 | :class:`dynamic` is the component for modeling dynamically changing predictors, which accepts :attr:`features` as its argument. The above code plots the fitted result (top left). 99 | 100 | .. image:: ./img/unemployment_7.png 101 | :width: 48% 102 | .. image:: ./img/unemployment_8-1.png 103 | :width: 48% 104 | .. image:: ./img/unemployment_9-1.png 105 | :width: 48% 106 | .. image:: ./img/unemployment_10-1.png 107 | :width: 48% 108 | 109 | The one-day ahead prediction looks much better than the simple model, 110 | particularly around the crisis peak. The mean prediction error is 111 | **0.099** which is a 100% improvement over the simple 112 | model. Similarly, we also decompose the time series into the three 113 | components:: 114 | 115 | drm.turnOff('predict plot') 116 | drm.turnOff('filtered plot') 117 | drm.plot('linear_trend') 118 | drm.plot('seasonal52') 119 | drm.plot('regressor10') 120 | 121 | This time, the shape of the time series is mostly attributed to the 122 | regressor and the linear trend looks more linear. If we do long-term 123 | forecasting again, i.e., use the previous **301 week** 's data to 124 | forecast the next **150 weeks** and the previous **251 week** 's data 125 | to forecast the next **200 weeks**:: 126 | 127 | drm.plotPredictN(date=300, N=150) 128 | drm.plotPredictN(date=250, N=200) 129 | 130 | .. image:: ./img/unemployment_11.png 131 | :width: 48% 132 | .. image:: ./img/unemployment_12.png 133 | :width: 48% 134 | 135 | The results look much better compared to the simple model 136 | -------------------------------------------------------------------------------- /doc/source/img/example_plot_a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/example_plot_a.png -------------------------------------------------------------------------------- /doc/source/img/example_plot_all.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/example_plot_all.png -------------------------------------------------------------------------------- /doc/source/img/example_plot_b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/example_plot_b.png -------------------------------------------------------------------------------- /doc/source/img/intro_discount_09.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/intro_discount_09.png -------------------------------------------------------------------------------- /doc/source/img/intro_discount_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/intro_discount_1.png -------------------------------------------------------------------------------- /doc/source/img/intro_plot_all.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/intro_plot_all.png -------------------------------------------------------------------------------- /doc/source/img/intro_plot_comp_mean.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/intro_plot_comp_mean.png -------------------------------------------------------------------------------- /doc/source/img/intro_plot_comp_mean_ar3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/intro_plot_comp_mean_ar3.png -------------------------------------------------------------------------------- /doc/source/img/intro_plot_in_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/intro_plot_in_1.png -------------------------------------------------------------------------------- /doc/source/img/intro_plot_state.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/intro_plot_state.png -------------------------------------------------------------------------------- /doc/source/img/intro_plot_wo_smooth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/intro_plot_wo_smooth.png -------------------------------------------------------------------------------- /doc/source/img/readmePlot1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/readmePlot1.png -------------------------------------------------------------------------------- /doc/source/img/readmePlot2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/readmePlot2.png -------------------------------------------------------------------------------- /doc/source/img/readmePlot3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/readmePlot3.png -------------------------------------------------------------------------------- /doc/source/img/readmePlot4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/readmePlot4.png -------------------------------------------------------------------------------- /doc/source/img/readmePlot5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/readmePlot5.png -------------------------------------------------------------------------------- /doc/source/img/unemployment_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/unemployment_1.png -------------------------------------------------------------------------------- /doc/source/img/unemployment_10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/unemployment_10-1.png -------------------------------------------------------------------------------- /doc/source/img/unemployment_10-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/unemployment_10-2.png -------------------------------------------------------------------------------- /doc/source/img/unemployment_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/unemployment_10.png -------------------------------------------------------------------------------- /doc/source/img/unemployment_11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/unemployment_11.png -------------------------------------------------------------------------------- /doc/source/img/unemployment_12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/unemployment_12.png -------------------------------------------------------------------------------- /doc/source/img/unemployment_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/unemployment_2.png -------------------------------------------------------------------------------- /doc/source/img/unemployment_3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/unemployment_3-1.png -------------------------------------------------------------------------------- /doc/source/img/unemployment_3-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/unemployment_3-2.png -------------------------------------------------------------------------------- /doc/source/img/unemployment_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/unemployment_3.png -------------------------------------------------------------------------------- /doc/source/img/unemployment_4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/unemployment_4-1.png -------------------------------------------------------------------------------- /doc/source/img/unemployment_4-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/unemployment_4-2.png -------------------------------------------------------------------------------- /doc/source/img/unemployment_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/unemployment_4.png -------------------------------------------------------------------------------- /doc/source/img/unemployment_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/unemployment_5.png -------------------------------------------------------------------------------- /doc/source/img/unemployment_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/unemployment_6.png -------------------------------------------------------------------------------- /doc/source/img/unemployment_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/unemployment_7.png -------------------------------------------------------------------------------- /doc/source/img/unemployment_8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/unemployment_8-1.png -------------------------------------------------------------------------------- /doc/source/img/unemployment_8-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/unemployment_8-2.png -------------------------------------------------------------------------------- /doc/source/img/unemployment_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/unemployment_8.png -------------------------------------------------------------------------------- /doc/source/img/unemployment_9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/unemployment_9-1.png -------------------------------------------------------------------------------- /doc/source/img/unemployment_9-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/unemployment_9-2.png -------------------------------------------------------------------------------- /doc/source/img/unemployment_9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/doc/source/img/unemployment_9.png -------------------------------------------------------------------------------- /doc/source/index.rst: -------------------------------------------------------------------------------- 1 | .. module:: pydlm 2 | 3 | `PyDLM `_ 4 | ======================================================= 5 | 6 | Welcome to `PyDLM `_, a flexible, 7 | user-friendly and rich functionality 8 | time series modeling library for python. This package implementes the 9 | Bayesian dynamic linear model (Harrison and West, 1999) for time 10 | series data analysis. Modeling and fitting is simple and easy with :mod:`pydlm`. 11 | Complex models can be constructed via simple operations:: 12 | 13 | # import dlm and its modeling components 14 | from pydlm import dlm, trend, seasonality, dynamic, autoReg, longSeason 15 | # randomly generate data 16 | data = [0] * 100 + [3] * 100 17 | # construct the base 18 | myDLM = dlm(data) 19 | # adding model components 20 | # add a first-order trend (linear trending) with prior covariance 1.0 21 | myDLM = myDLM + trend(1, name='lineTrend', w=1.0) 22 | # add a 7 day seasonality with prior covariance 1.0 23 | myDLM = myDLM + seasonality(7, name='7day', w=1.0) 24 | # add a 3 step auto regression 25 | myDLM = myDLM + autoReg(degree=3, data=data, name='ar3', w=1.0) 26 | # show the added components 27 | myDLM.ls() 28 | # delete unwanted component 29 | myDLM.delete('7day') 30 | myDLM.ls() 31 | 32 | Users can then analyze the data with the constructed model:: 33 | 34 | # fit forward filter 35 | myDLM.fitForwardFilter() 36 | # fit backward smoother 37 | myDLM.fitBackwardSmoother() 38 | 39 | and plot the results easily:: 40 | 41 | # plot the results 42 | myDLM.plot() 43 | # plot only the filtered results 44 | myDLM.turnOff('smoothed plot') 45 | myDLM.plot() 46 | # plot in one figure 47 | myDLM.turnOff('multiple plots') 48 | myDLM.plot() 49 | 50 | The three images show 51 | 52 | .. image:: ./img/intro_plot_all.png 53 | :width: 33% 54 | .. image:: ./img/intro_plot_wo_smooth.png 55 | :width: 33% 56 | .. image:: ./img/intro_plot_in_1.png 57 | :width: 33% 58 | 59 | User can also plot the mean of a component (the time series value that 60 | attributed to this component):: 61 | 62 | # plot the component mean of 'ar3' 63 | myDLM.turnOn('smoothed plot') 64 | myDLM.turnOff('predict') 65 | myDLM.plot(name='ar3') 66 | 67 | and also the latent states for a given component:: 68 | 69 | # plot the latent states of the 'ar3' 70 | myDLM.plotCoef(name='ar3') 71 | 72 | which result in 73 | 74 | .. image:: ./img/intro_plot_comp_mean_ar3.png 75 | :width: 49% 76 | .. image:: ./img/intro_plot_state.png 77 | :width: 49% 78 | 79 | The 'ar3' has three latent states (`today - 3`, `today - 2`, `today - 1`), 80 | and the states are aligned in the order fo [`today - 3`, `today - 2`, 81 | `today - 1`], which means the current model attributes a lot of weight 82 | to the `today - 1` latent state. 83 | 84 | If users are unsatisfied with the model results, they can simply 85 | reconstruct the model and refit:: 86 | 87 | myDLM = myDLM + seasonality(4) 88 | myDLM.ls() 89 | myDLM.fit() 90 | 91 | `pydlm` supports missing observations:: 92 | 93 | data = [1, 0, 0, 1, 0, 0, None, 0, 1, None, None, 0, 0] 94 | myDLM = dlm(data) + trend(1, w=1.0) 95 | myDLM.fit() # fit() will fit both forward filter and backward smoother 96 | 97 | It also includes the discounting factor, which can be used to control how rapidly the model should adapt to the new data:: 98 | 99 | data = [0] * 100 + [3] * 100 100 | myDLM = dlm(data) + trend(1, discount=1.0, w=1.0) 101 | myDLM.fit() 102 | myDLM.plot() 103 | 104 | myDLM.delete('trend') 105 | myDLM = myDLM + trend(1, discount=0.8, w=1.0) 106 | myDLM.fit() 107 | myDLM.plot() 108 | 109 | The two different settings give different adaptiveness 110 | 111 | .. image:: ./img/intro_discount_1.png 112 | :width: 49% 113 | .. image:: ./img/intro_discount_09.png 114 | :width: 49% 115 | 116 | The discounting factor can be auto-tuned by the :class:`modelTuner` 117 | provided by the package:: 118 | 119 | from pydlm import modelTuner 120 | myTuner = modelTuner(method='gradient_descent', loss='mse') 121 | tunedDLM = myTuner.tune(myDLM, maxit=100) 122 | 123 | and users can get the MSE of each model for performance comparison:: 124 | 125 | myDLM_mse = myDLM.getMSE() 126 | tunedDLM.fit() 127 | tunedDLM_mse = tunedDLM.getMSE() 128 | 129 | The filtered results and latent states can be retrieved easily:: 130 | 131 | # get the filtered and smoothed results 132 | filteredMean = myDLM.getMean(filterType='forwardFilter') 133 | smoothedMean = myDLM.getMean(filterType='backwardSmoother') 134 | filteredVar = myDLM.getVar(filterType='forwardFilter') 135 | smoothedVar = myDLM.getVar(filterType='backwardSmoother') 136 | 137 | filteredCI = myDLM.getInterval(filterType='forwardFilter') 138 | smoothedCI = myDLM.getInterval(filterType='backwardSmoother') 139 | 140 | # get the residual time series 141 | residual = myDLM.getResidual(filterType='backwardSmoother') 142 | 143 | # get the filtered and smoothed mean for a given component 144 | filteredTrend = myDLM.getMean(filterType='forwardFilter', name='lineTrend') 145 | smoothedTrend = myDLM.getMean(filterType='backwardSmoother', name='lineTrend') 146 | 147 | # get the latent states 148 | allStates = myDLM.getLatentState(filterType='forwardFilter') 149 | trendStates = myDLM.getLatentState(filterType='forwardFilter', name='lineTrend') 150 | 151 | For online updates:: 152 | 153 | myDLM = dlm([]) + trend(1) + seasonality(7) 154 | for t in range(0, len(data)): 155 | ... myDLM.append([data[t]]) 156 | ... myDLM.fitForwardFilter() 157 | filteredObs = myDLM.getFilteredObs() 158 | 159 | Documentation 160 | ============= 161 | .. toctree:: 162 | :maxdepth: 2 163 | 164 | installation 165 | example 166 | pydlm_user_guide 167 | discounting 168 | class_ref 169 | -------------------------------------------------------------------------------- /doc/source/installation.rst: -------------------------------------------------------------------------------- 1 | .. py:currentmodule:: pydlm 2 | 3 | Installation 4 | ============ 5 | You can now get the package from `PyPI`:: 6 | 7 | $ pip install pydlm 8 | 9 | You can also get the latest from `github 10 | `_:: 11 | 12 | $ git clone git@github.com:wwrechard/pydlm.git pydlm 13 | $ cd pydlm 14 | $ sudo python setup.py install 15 | 16 | :mod:`pydlm` depends on the following modules, 17 | 18 | * :mod:`numpy` (for core functionality) 19 | * :mod:`matplotlib` (for plotting results) 20 | * :mod:`Sphinx` (for generating documentation) 21 | * :mod:`unittest` (for tests) 22 | -------------------------------------------------------------------------------- /doc/source/pydlm_user_guide.rst: -------------------------------------------------------------------------------- 1 | .. py:currentmodule:: pydlm 2 | 3 | Dynamic linear models --- user manual 4 | ===================================== 5 | 6 | This package implements the Bayesian dynamic linear model (DLM, Harrison 7 | and West, 1999) for time series analysis. 8 | The DLM is built upon two layers. The first layer is 9 | the fitting algorithm. DLM adopts a modified Kalman filter with a 10 | unique discounting technique from 11 | Harrison and West (1999). Like the usual Kalman filter, it accepts a 12 | transition matrix, a measurement matrix, an observation, a latent 13 | state, an innovation and an error covariance matrix and return the 14 | updated state and error covariance. These quantities will all be 15 | supplied internally -- users are free from any annoying 16 | calculations. Different from the usual Kalman filter, the modified 17 | Kalman filter does not require the tuning of the two parameters: the 18 | error covariance matrix and the observational variance, so the model 19 | fitting is extremely efficient (could be up to 1000 times faster than 20 | the EM algorithm), more details will be provided in the section of the 21 | discounting technique. 22 | 23 | The second layer of DLM is itsmodeling feature. Nicely summarized in 24 | Harrison and West (1999), most common models can be expressed in 25 | one unified form -- canonical form, which is closely related to the 26 | Jordan decomposition. Thanks to this keen observation, the DLM can 27 | easily incorporate most modeling components and turn them into the 28 | corresponding transition matrices and other quantities to be supplied 29 | to the Kalman filter. Examples are trend, seasonality, holidays, 30 | control variables and auto-regressive, which could appear 31 | simultaneously in one model. Due to this nice property, users of this 32 | package can construct models simply by "adding" some component into 33 | the model as:: 34 | 35 | myDLM = dlm(data) + trend(1) 36 | 37 | The modeling process is simple. 38 | 39 | The purpose of the modeling is to better understand the time series 40 | data and for to forecast into the future. So the key output from the 41 | model are the filtered time series, smoothed time series and one-step 42 | ahead prediction. We will cover this topic later in this section. 43 | 44 | The advantage of :mod:`pydlm`: 45 | 46 | + flexibility in constructing complicated models 47 | 48 | + Extremely efficient model fitting with the discounting technique 49 | 50 | + user-specific adjustment on the adaptive property of the model 51 | 52 | The disadvantage of :mod:`pydlm`: 53 | 54 | + only for Gaussian noise 55 | 56 | 57 | Miscellaneous 58 | ------------- 59 | + `PyDLM` index starts at 0 instead of 1, i.e., for any prediction or 60 | modification that involves `date` argument, it corresponds to the 61 | actual index in the array. For instance, for a time series with 62 | length 10, the date of the last day is 9. 63 | 64 | Modeling 65 | -------- 66 | 67 | As discussed in the beginning, the modeling process is very simple 68 | with :mod:`pydlm`, most modeling functions are integrated in the class 69 | :class:`dlm`. Following is an example for constructing a dlm with 70 | linear trend, 7-day seasonality and control variables:: 71 | 72 | from pydlm import dlm, trend, seasonality, dynamic, autoReg, longSeason 73 | data = [0] * 100 + [3] * 100 74 | SP500Index = [[2000] for i in range(100)] + [[2010] for i in range(100)] 75 | page = [[i, i + 1, i + 2, i + 3] for i in range(200)] 76 | myDLM = dlm(data) 77 | myDLM = myDLM + trend(degree=1, discount=0.95, name='trend1') 78 | myDLM = myDLM + seasonality(period=7, discount=0.99, name='week') 79 | myDLM = myDLM + dynamic(features=SP500Index, discount=1, name='SP500') 80 | myDLM = myDLM + dynamic(features=page, discount=1, name='page') 81 | 82 | User can also use :func:`dlm.add` method to add new component:: 83 | 84 | myDLM.add(trend(degree=0, discount=0.99, name='trend2')) 85 | 86 | The imput :attr:`data` must be an 1d array or a list, since the current 87 | :class:`dlm` only supports one dimensional time series. Supporting for 88 | multivariate time series will be built upon this one dimensional class 89 | and added in the future. 90 | 91 | Now the variable `myDLM` contains the data and the modeling 92 | information. It will construct the corresponding transition, 93 | measurement, innovation, latent states and error covariance matrix 94 | once model fitting is called. Modify an existing model is also 95 | simple. User can brows the existing components of the model by:: 96 | 97 | myDLM.ls() 98 | 99 | It will show all the existing components and their corresponding 100 | names. Name can be specified when the component is added to the `dlm`, 101 | for example:: 102 | 103 | myDLM = myDLM + seasonality(4, name = 'day4') 104 | myDLM.ls() 105 | 106 | We can also easily delete the unwanted component by using `delete`:: 107 | 108 | myDLM.delete('trend2') 109 | myDLM.delete('day4') 110 | 111 | After the building steps, we can specify some parameters for the model 112 | fitting, the most common one is the prior guess on the observational 113 | noise. The default value is 1.0. To change that to 10 you can do:: 114 | 115 | myDLM.noisePrior(10.0) 116 | 117 | Such change usually has small impact on the model and is almost 118 | ignorable. 119 | 120 | Model components 121 | ---------------- 122 | 123 | There are four model components provided with this 124 | package: trend, seasonality, dynamic and the auto-regression. 125 | 126 | Trend 127 | ````` 128 | :class:`trend` class is a model component for trend 129 | behavior. The data might be const or increasing linearly or 130 | quadraticly, which can all be captured by :class:`trend`. The degree 131 | argument specifics the shape of the trend. `degree=0` indicates this 132 | is a const, `degree=1` indicates a line and `degree=2` stands for a 133 | quadratic curve and so on so forth. `w` sets the prior 134 | covariance for the trend component (same for all the other 135 | components). The discounting factor will be explained later in next 136 | section:: 137 | 138 | linearTrend = trend(degree=1, discount=0.99, name='trend1', w=1e7) 139 | 140 | Seasonality 141 | ``````````` 142 | The :class:`seasonality` class models the periodic behavior of the 143 | data. Compared to the sine or cosine periodic curves, 144 | :class:`seasonality` in this packages is more flexible, since it can 145 | turn into any shapes, much broader than the triangular families:: 146 | 147 | weekPeriod = seasonality(period=7, discount=0.99, name='week', w=1e7) 148 | 149 | In the package, we implements the seasonality component in a 150 | `form-free` way (Harrison and West, 1999) to avoid the identifiability 151 | issue. The states of one seasonality component are always summed up to 152 | zero, so that it will not tangle with the :class:`trend` component. 153 | 154 | Dynamic 155 | ``````` 156 | The :class:`dynamic` class offers the modeling ability to add any additional 157 | observed time series as a controlled variable to the current one. For 158 | example, when studying the stock price, the 'SP500' index could be a 159 | good indicator for the modeling stock. A dynamic component need the 160 | user to supply the necessary information of the control variable over 161 | time:: 162 | 163 | SP500 = dynamic(features=SP500Index, discount=0.99, name='SP500', w=1e7) 164 | 165 | The input :attr:`features` for :class:`dynamic` should be a list of 166 | lists, since multi-dimension features are allowed. Following is one 167 | simple example:: 168 | 169 | Features = [[2000], [2010], [2020], [2030]] 170 | Features = [[1.0, 2.0], [1.0, 3.0], [3.0, 3.0]] 171 | 172 | 173 | Auto-regression 174 | ``````````````` 175 | The :class:`autoReg` class constructs the auto-regressive component on 176 | the model, i.e., the direct linear or non-linear dependency between 177 | the current observation and the previous days. User needs to specify 178 | the number of days of the dependency:: 179 | 180 | AR3 = autoReg(degree=3, discount=0.99, name='ar3', w=1e7) 181 | 182 | There is once a `data` argument needed for constructing autoregression 183 | features but is now deprecated. :class:`autoReg` can now fetch the 184 | data directly from the main :class:`dlm` class and no need to provide 185 | during instantiation. 186 | 187 | In this example, the latent stats for Auto-regression are aligned in a 188 | way of [today - 3, today - 2, today - 1]. So when fetching the 189 | coefficients from the latent states, this will be the correct order to 190 | read the coefficients. 191 | 192 | Long-seasonality 193 | ```````````````` 194 | The :class:`longSeason` class is a complement class for 195 | :class:`seasonality`. It allows constructing seasonality component that 196 | does not change every step. For example, the time unit is day, but 197 | user wants to add a monthly seasonality, then :class:`longSeason` is 198 | the correct choice:: 199 | 200 | monthly = longSeason(period=12, stay=30, data=data, name='monthly', w=1e7) 201 | 202 | These five classes of model components offer abundant modeling 203 | possiblities of the Bayesian dynamic linear model. Users can construct 204 | very complicated models using these components, such as hourly, weekly or 205 | monthly periodicy and holiday indicator and many other features. 206 | 207 | Model fitting 208 | ------------- 209 | 210 | Entailed before, the fitting of the dlm is fulfilled by a modified 211 | Kalman filter. Once the user finished constructing the model by adding 212 | different components. the :class:`dlm` will compute all the necessary 213 | quantities internally for using Kalman filter. So users can simply 214 | call :func:`dlm.fitForwardFilter`, :func:`dlm.fitBackwardSmoother` or 215 | even simply :func:`dlm.fit` to fit both forward filter and backward 216 | smoother:: 217 | 218 | myDLM.fitForwardFilter() 219 | myDLM.fitBackwardSmoother() 220 | myDLM.fit() 221 | 222 | The :func:`dlm.fitForwardFilter` is implemented in an online 223 | manner. It keeps an internal count on the filtered dates and once new 224 | data comes in, it only filters the new data without touching the 225 | existing results. In addition, this function also allows a rolling 226 | window fitting on the data, i.e., there will be a moving window and 227 | for each date, the kalman filter will only use the data within the 228 | window to filter the observation. This is equivalent to that the model 229 | only remembers a fixed length of dates:: 230 | 231 | myDLM.fitForwardFilter(useRollingWindow=True, windowLength=30) 232 | myDLM.fitBackwardSmoother() 233 | 234 | For :func:`dlm.backwardSmoother`, it has to use the whole time series 235 | to smooth the latent states once new data comes in. The smoothing 236 | provides a good retrospective analysis on our past decision of the 237 | data. For example, we might initially believe the time series is 238 | stable, while that could be a random behavior within a volatile time 239 | series, and the user learn this from the smoother. 240 | 241 | Once the model fitting is completed, users can fetch the filtered or 242 | smoothed results from :class:`dlm`:: 243 | 244 | myDLM.getMean(filterType='forwardFilter') 245 | myDLM.getMean(filterType='backwardSmoother') 246 | myDLM.getMean(filterType='predict') 247 | 248 | myDLM.getVar(filterType='forwardFilter') 249 | myDLM.getVar(filterType='backwardSmoother') 250 | myDLM.getVar(filterType='predict') 251 | 252 | The :class:`dlm` recomputes a wide variety of model quantities that 253 | can be extracted by the user. For example, user can get the filtered 254 | states and covariance by typing:: 255 | 256 | myDLM.getLatentState(filterType='forwardFilter') 257 | myDLM.getLatentState(filterType='backwardSmoother') 258 | 259 | myDLM.getLatentCov(filterType='forwardFilter') 260 | myDLM.getLatentCov(filterType='backwardSmoother') 261 | 262 | This can be specified into individual component. For example, assume the 263 | model contains a :class:`trend` component with a name of `trend1`, we 264 | can extract the corresponding latent state only for `trend1` as:: 265 | 266 | myDLM.getLatentState(filterType='forwardFilter', name='trend1') 267 | myDLM.getLatentState(filterType='backwardSmoother', name='trend1') 268 | 269 | myDLM.getLatentCov(filterType='forwardFilter', name='trend1') 270 | myDLM.getLatentCov(filterType='backwardSmoother', name='trend1') 271 | 272 | as well as the mean of `trend1` (evaluation * latent states):: 273 | 274 | myDLM.getMean(filterType='forwardFilter', name='trend1') 275 | myDLM.getVar(filterType='forwardFilter', name='trend1') 276 | 277 | One can also get the confidence interval on the filtered time series:: 278 | 279 | myDLM.geInterval(filterType='forwardFilter', p = 0.99) 280 | 281 | There are also corresponding methods for smoothed and predicted 282 | results. For more detail, please refer to the :class:`dlm` class 283 | documentation. 284 | 285 | Model prediction 286 | ---------------- 287 | :class:`dlm` provides three predict functions: :func:`dlm.predict` and 288 | :func:`dlm.continuePredict` and :func:`dlm.predictN`. The last one is 289 | wrapper of the former two and is recommended to use. (The former two 290 | will be gradually deprecated). 291 | 292 | The :func:`dlm.predict` is a one-day ahead 293 | prediction function based on a user given date and feature set:: 294 | 295 | # predict next date after the time series 296 | featureDict = {'SP500':[2090], 'page':[1, 2, 3, 4]} 297 | (predictMean, predictVar) = myDLM.predict(date=myDLM.n - 1, featureDict=featureDict) 298 | 299 | The function returns a tuple of predicted mean and predicted variance. 300 | The `featureDict` argument is a dictionary contains the feature 301 | information for :class:`dynamic` component. Suppose the model contains 302 | a one-dimensional dynamic component named `SP500` and another 303 | four-dimensional dynamic component `page`, then the featureDict takes the 304 | following Form:: 305 | 306 | featureDict = {'SP500':[2090], 'page':[1, 2, 3, 4]} 307 | 308 | If the `featureDict` is not supplied but the date is not the last day, 309 | then the algorithm will automatically fetch from the old data about 310 | the feature value of all the dynamic component:: 311 | 312 | # predict a day in the middle 313 | (predictMean, predictVar) = myDLM.predict(date=myDLM.n - 10) 314 | 315 | The algorithm will use the feature on the date of `myDLM.n - 9` in 316 | `featureDict`. If date is the last day but the featureDict is not 317 | provided, then an error will be raised. 318 | 319 | If the user is interested beyond one-day ahead prediction, they can 320 | use :func:`dlm.continuePredict` for multiple-day ahead prediction, 321 | after using :func:`dlm.predict`:: 322 | 323 | feature1 = {'SP500':[2090], 'page':[10, 20, 30, 40]} 324 | feature2 = {'SP500':[2010], 'page':[11, 21, 31, 41]} 325 | feature3 = {'SP500':[1990], 'page':[12, 22, 32, 42]} 326 | 327 | # one-day ahead prediction after the last day 328 | (predictMean, predictVar) = myDLM.predict(date=myDLM.n - 1, featureDict=feature1) 329 | # we continue to two-day ahead prediction after the last day 330 | (predictMean, predictVar) = myDLM.continuePredict(featureDict=feature2) 331 | # we continue to three-day ahead prediction after the last day 332 | (predictMean, predictVar) = myDLM.continuePredict(featureDict=feature3) 333 | 334 | :func:`dlm.continuePredict` can only be used after :func:`dlm.predict` 335 | for multiple-day prediction. The `featureDict` can also be ignored if 336 | the prediction is requested on dates before the last day and the 337 | features on the predict day can be found from the old data. 338 | 339 | :func:`dlm.predictN` (recommended) predicts over multiple days and is 340 | a wrapper of the two functions above. Using the same example, the 341 | results can be obtained by just called:: 342 | 343 | features = {'SP500':[[2090], [2010], [1990]], 'page':[[10, 20, 30, 344 | 40], [11, 21, 31, 41], [12, 22, 32, 42]]} 345 | (predictMean, predictVar) = myDLM.predictN(N=3, date=myDLM.n-1, 346 | featureDict=features) 347 | 348 | The `predictMean` and `predictVar` will be two lists of three elements 349 | containing the predicted mean and variance. 350 | 351 | Model amending 352 | -------------- 353 | 354 | The user can still add, delete, modify data even when the model has 355 | been constructed. 356 | 357 | Adding new data 358 | ``````````````` 359 | For adding more data, user can opt to 360 | :func:`dlm.append`:: 361 | 362 | newData = [0, 1, 2] 363 | myDLM.append(newData, component = 'main') 364 | 365 | If the model contains :class:`dynamic` component, the corresponding 366 | features need to be updated as well:: 367 | 368 | newSP500 = [[2000], [2100], [2200]] 369 | myDLM.append(data = newSP500, component = 'SP500') 370 | 371 | Then the user can rerun the forward filter:: 372 | 373 | myDLM.fitForwardFilter() 374 | 375 | The package will continue running the forward filter on the three new 376 | data ponts. 377 | 378 | Deleting existing data 379 | `````````````````````` 380 | To delete any existing data, user can simply use the :func:`dlm.popout` 381 | function from :class:`dlm` on a specific date, for example:: 382 | 383 | myDLM.popout(0) 384 | 385 | Different from :func:`dlm.append`, :func:`dlm.popout` will be executed 386 | automatically for all components, so the user does not need to conduct 387 | the deletion mannually for each component. After the deletion, the 388 | forward filter needs to be rerun following the deleted date:: 389 | 390 | myDLM.fitForwardFilter() 391 | 392 | Again, the package will automatically recognize the date and fit only 393 | the necessary period of time. 394 | 395 | Ignoring a date 396 | ``````````````` 397 | Ignoring is very similar to deleting. The only difference is the time 398 | counts. Because deleting will delete the data entirely, the time 399 | counts will therefore reduce by 1. By contrast, ignoring will treat 400 | the specific date as missing data, so the time count will not 401 | change. This difference is important when preriodicy is 402 | concerned. Changing of time counts will have high impacts on 403 | :class:`seasonality` components. 404 | 405 | :func:`dlm.ignore` simply set the data of a specific date to be None:: 406 | 407 | myDLM.ignore(2) 408 | 409 | modify data 410 | ``````````` 411 | The :class:`dlm` also provides user the ability to modify the data on a 412 | specific date and a specific component. This function enables possible 413 | future extension to interactive anomaly detection and data debugging:: 414 | 415 | myDLM.alter(date = 2, data = 0, component = 'main') 416 | 417 | 418 | Model plotting 419 | -------------- 420 | 421 | This package offers rich ploting options for illustrating the 422 | results. User can simply call :func:`dlm.plot` to directly plot the 423 | results once the models are fitted:: 424 | 425 | myDLM.plot() 426 | 427 | # plot the mean of a given component 428 | myDLM.plot(name=the_component_name) 429 | 430 | # plot the latent state of a given component 431 | myDLM.plotCoef(name=the_component_name) 432 | 433 | User can choose which results to plot via :func:`dlm.turnOn` and 434 | :func:`dlm.turnOff`:: 435 | 436 | myDLM.turnOn('filtered plot') 437 | myDLM.turnOff('predict plot') 438 | myDLM.turnOff('smoothed plot') 439 | 440 | User can also choose whether to plot the confidence interval and 441 | whether plot the results in one figure or separate figures. The 442 | default is to plot the confidence interval and in separate plots. To 443 | change that:: 444 | 445 | myDLM.turnOff('confidence') 446 | myDLM.turnOff('multiple plots') 447 | 448 | The quantile of the confidence interval can be set via 449 | :func:`dlm.setConfidence`:: 450 | 451 | myDLM.setConfidence(p = 0.95) 452 | 453 | Currently there are two types of confidence interval realization. 454 | The default is 'ribbon' and the alternative is 'line'. Users can 455 | change the confidence interval plots by:: 456 | 457 | myDLM.setIntervalType('ribbon') 458 | myDLM.setIntervalType('line') 459 | 460 | The default colors for the plots are: 461 | 462 | + original data: 'black' 463 | + filtered results: 'blue' 464 | + one-step ahead prediction: 'green' 465 | + smoothed results: 'red' 466 | 467 | User can change the color setting via :func:`dlm.setColor`. The color 468 | space is the same as the `matplotlib`:: 469 | 470 | myDLM.setColor('filtered plot', 'yellow') 471 | myDLM.setColor('data', 'blue') 472 | 473 | If user decide to go back to the original setting, they can use 474 | :func:`dlm.resetPlotOptions` to reset all the plot option:: 475 | 476 | myDLM.resetPlotOptions() 477 | 478 | Model Tuning and evaluation 479 | --------------------------- 480 | The discounting factor of DLM determines how fast the model adapts to 481 | the new data. It could cause troublesome when users actually want the 482 | model itself to figure that out. The :class:`modelTuner` provides 483 | users automatic tool for tuning the discounting factors:: 484 | 485 | from pydlm import modelTuner 486 | myTuner = modelTuner(method='gradient_descent', loss='mse') 487 | tunedDLM = myTuner.tune(myDLM, maxit=100) 488 | 489 | The tuned discounting factor will be set inside the `tunedDLM`. Users 490 | can examine the tuned value from `myTuner`:: 491 | 492 | tuned_discounts = myTuner.getDiscounts() 493 | 494 | After tuning, `myDLM` will remain unchanged and `tunedDLM` will 495 | contain the tuned discounting factos and will be in uninitialized 496 | status. Users need to run:: 497 | 498 | tunedDLM.fit() 499 | 500 | before any other analysis on `tunedDLM`. The :class:`dlm` also provides 501 | a simpler way to tune the discounting factor if the user would like 502 | `myDLM` to be altered directly:: 503 | 504 | myDLM.tune() 505 | 506 | The tuner makes use of the MSE (one-day ahead prediction loss) and the 507 | gradient descent algorithm to tune the discounting factor to achieve 508 | the minimum loss. The discounting factors are assumed to be different 509 | across components but the same within a component. For now, only the 510 | MSE loss and the gradient descent algorithm is supported. 511 | 512 | To ease the evaluation of the performance of the model fitting, the 513 | model also provides the residual time series and the one-day a head 514 | prediction error via:: 515 | 516 | mse = myDLM.getMSE() 517 | residual = myDLM.getResidual(filterType='backwardSmoother') 518 | 519 | Users can use these values to evaluate and choose the optimal model. 520 | 521 | Advanced Settings 522 | ----------------- 523 | 524 | This part of settings closely relate to the algorithm behavior and 525 | offers some advanced features, some of which are still under 526 | development. Currently implemented is the :func:`dlm.stableMode` 527 | function and the :func:`dlm.evolveMode`. The :func:`dlm.stableMode` is 528 | turned on by default, and you can turn it off by:: 529 | 530 | myDLM.stableModel(False) 531 | 532 | This mode helps increasing the numerical stability of the :class:`dlm` 533 | when small discounting factor is used. Details about discounting 534 | factor will be covered in next section. The :func:`dlm.evolveMode` is 535 | used to control how different components evolve over time. See 536 | Harrison and West (1999, Page 202). They could 537 | evolve independently, which is equivalent to assume the innovation 538 | matrix is a block-diagonal matrix. The default assumption is 539 | 'independent' and to change to 'dependent', we can simply type:: 540 | 541 | myDLM.evolveMode('dependent') 542 | 543 | The difference between 'independent' and 'dependent' is best explained 544 | when there are multiple components with different discounting factor 545 | and one of them is One. In the 'dependent' mode, the smoothed latent 546 | states of the component with discount factor 1 will be a value 547 | fluctuating around a constant, while in the 'independent' mode, it 548 | will be an exact constant. User can choose which to use depending on 549 | their own use case. 550 | 551 | In the future, following functionalities are planned to be added: 552 | feature selection among dynamic components, factor models for high 553 | dimensional latent states. 554 | -------------------------------------------------------------------------------- /doc/source/simple_example.rst: -------------------------------------------------------------------------------- 1 | .. py:currentmodule:: pydlm 2 | 3 | A simple example 4 | ================ 5 | 6 | In this section, we give a simple example on linear regression to 7 | illustrate how to use the `pydlm` for analyzing data. The data is 8 | generated via the following process:: 9 | 10 | import numpy as np 11 | n = 100 12 | a = 1.0 + np.random.normal(0, 5, n) # the intercept 13 | x = np.random.normal(0, 2, n) # the control variable 14 | b = 3.0 # the coefficient 15 | y = a + b * x 16 | 17 | In the above code, `a` is the baseline random walk centered around 1.0 18 | and `b` is the coefficient for a control variable. The goal is to 19 | decompose `y` and learn the value of `a` and `b`. We first build the 20 | model:: 21 | 22 | from pydlm import dlm, trend, dynamic 23 | mydlm = dlm(y) 24 | mydlm = mydlm + trend(degree=0, discount=0.98, name='a', w=10.0) 25 | mydlm = mydlm + dynamic(features=[[v] for v in x], discount=1, name='b', w=10.0) 26 | 27 | In the model, we add two components :class:`trend` and 28 | :class:`dynamic`. The trend `a` is one of the systematical components 29 | that used to characterize a time series, and trend is particularly 30 | suitable for this case. `degree=0` indicates this is a constant and 31 | `degree=1` indicates a line and so on so forth. It has a 32 | discount factor of 0.98 as we believe the baseline can gradually shift 33 | overtime and we specify a prior covariance with 10.0 on the 34 | diagonal. The dynamic component `b` is modeling the regression 35 | component. We specify its discounting factor to be 1.0 since we 36 | believe `b` should be a constant. The :class:`dynamic` class only accepts 2-d 37 | list for feature arugment (since the control variable could be 38 | multi-dimensional). Thus, we change `x` to 2d list. In addition, we 39 | believe these two processes `a` and `b` evolve independently and set 40 | (This is currently the default assumption, so actually no need to set):: 41 | 42 | mydlm.evolveMode('independent') 43 | 44 | This can also be set to 'dependent' if the computation efficiency is a 45 | concern. The default prior on the covariance of each component is a 46 | diagonal matrix with 1e7 on the diagonal, we changed this value in 47 | building the component (more details please refer to the user manual). 48 | The prior on the observational noise (default to 1.0) can be set by:: 49 | 50 | mydlm.noisePrior(2.0) 51 | 52 | We then fit the model by typing:: 53 | 54 | mydlm.fit() 55 | 56 | After some information printed on the screen, we are done (yah! :p) 57 | and we can fetch and examine our results. We 58 | first visualize the fitted results and see how well the model fits the 59 | data:: 60 | 61 | mydlm.plot() 62 | 63 | The result shows 64 | 65 | .. figure:: ./img/example_plot_all.png 66 | 67 | It looks pretty nice for the one-day ahead prediction accuracy. 68 | We can also plot the two coefficients `a` and `b` and see how they 69 | change when more data is added:: 70 | 71 | mydlm.turnOff('predict') 72 | mydlm.plotCoef(name='a') 73 | mydlm.plotCoef(name='b') 74 | 75 | and we have 76 | 77 | .. image:: ./img/example_plot_a.png 78 | :width: 49% 79 | .. image:: ./img/example_plot_b.png 80 | :width: 49% 81 | 82 | We see that the latent state of `b` quickly shift from 0 (which is our 83 | initial guess on the parameter) to around 3.0 and the confidence 84 | interval explodes and then narrows down as more data is added. 85 | 86 | Once we are happy about the result, we can fetch the results::: 87 | 88 | # get the smoothed results 89 | smoothedResult = mydlm.getMean(filterType='backwardSmoother') 90 | smoothedVar = mydlm.getVar(filterType='backwardSmoother') 91 | smoothedCI = mydlm.getInterval(filterType='backwardSmoother') 92 | 93 | # get the coefficients 94 | coef_a = mydlm.getLatentState(filterType='backwardSmoother', name='a') 95 | coef_a_var = mydlm.getLatentCov(filterType='backwardSmoother', name='a') 96 | coef_b = mydlm.getLatentState(filterType='backwardSmoother', name='b') 97 | coef_b_var = mydlm.getLatentCov(filterType='backwardSmoother', name='b') 98 | 99 | We can then use `coef_a` and `coef_b` for further analysis. If we 100 | want to predict the future observation based on the current data, we 101 | can do:: 102 | 103 | # prepare the new feature 104 | newData1 = {'b': [5]} 105 | # one-day ahead prediction from the last day 106 | (predictMean, predictVar) = mydlm.predict(date=mydlm.n-1, featureDict=newData1) 107 | 108 | # continue predicting for next day 109 | newData2 = {'b': [4]} 110 | (predictMean, predictVar) = mydlm.continuePredict(featureDict=newData2) 111 | 112 | # continue predicting for the third day 113 | newData3 = {'b': [3]} 114 | (predictMean, predictVar) = mydlm.continuePredict(featureDict=newData3) 115 | 116 | or using the simpler :func:`dlm.predictN`:: 117 | 118 | newData = {'b': [[5], [4], [3]]} 119 | (predictMean, predictVar) = mydlm.predictN(N=3, date=mydlm.n-1, featureDict=newData) 120 | -------------------------------------------------------------------------------- /examples/unemployment_insurance/main.py: -------------------------------------------------------------------------------- 1 | #================================================================== 2 | # 3 | # This example use the data from Google data science blog post 4 | # http://www.unofficialgoogledatascience.com/2017/07/fitting-bayesian-structural-time-series.html 5 | # 6 | # Make sure your working directly is pointing to the folder where 7 | # `main.py` locates and `data.csv` is in the same folder. 8 | # 9 | #================================================================== 10 | 11 | # Read data file 12 | import os 13 | this_dir = os.getcwd() 14 | DATA_PATH = os.path.join(this_dir, "data.csv") 15 | data_file = open(DATA_PATH, 'r') 16 | 17 | variables = data_file.readline().strip().split(',') 18 | data_map = {} 19 | for var in variables: 20 | data_map[var] = [] 21 | 22 | for line in data_file: 23 | for i, data_piece in enumerate(line.strip().split(',')): 24 | data_map[variables[i]].append(float(data_piece)) 25 | 26 | # Extract and store the data. 27 | time_series = data_map[variables[0]] 28 | features = [[data_map[variables[j]][i] for j in range(1, len(variables)) ] 29 | for i in range(len(time_series))] 30 | 31 | # Build a simple model 32 | from pydlm import dlm, trend, seasonality 33 | 34 | # A linear trend 35 | linear_trend = trend(degree=1, discount=0.95, name='linear_trend', w=10) 36 | # A seasonality 37 | seasonal52 = seasonality(period=52, discount=0.99, name='seasonal52', w=10) 38 | 39 | simple_dlm = dlm(time_series) + linear_trend + seasonal52 40 | simple_dlm.fit() 41 | 42 | simple_dlm.getMean(filterType='backwardSmoother', name='linear_trend') 43 | simple_dlm.getMean(filterType='backwardSmoother', name='seasonal52') 44 | # Plot the prediction give the first 350 weeks and forcast the next 200 weeks. 45 | simple_dlm.predictN(N=200, date=350) 46 | # Plot the prediction give the first 250 weeks and forcast the next 200 weeks. 47 | simple_dlm.predictN(N=200, date=250) 48 | 49 | # Build a dynamic regression model 50 | from pydlm import dynamic 51 | regressor10 = dynamic(features=features, discount=1.0, name='regressor10', w=10) 52 | drm = dlm(time_series) + linear_trend + seasonal52 + regressor10 53 | drm.fit() 54 | 55 | drm.getMean(filterType='backwardSmoother', name='linear_trend') 56 | drm.getMean(filterType='backwardSmoother', name='seasonal52') 57 | drm.getMean(filterType='backwardSmoother', name='regressor10') 58 | # Predict given the first 300 weeks and forcast the next 150 weeks. 59 | drm.predictN(N=150, date=300) 60 | # Predict given the first 250 weeks and forcast the next 200 weeks. 61 | drm.predictN(N=200, date=250) 62 | -------------------------------------------------------------------------------- /pydlm_lite/__init__.py: -------------------------------------------------------------------------------- 1 | # This is the PyDLM package 2 | 3 | __all__ = ['dlm', 'trend', 'seasonality', 'dynamic', 'autoReg', 'longSeason', 'modelTuner'] 4 | 5 | from pydlm_lite.dlm import dlm 6 | from pydlm_lite.modeler.trends import trend 7 | from pydlm_lite.modeler.seasonality import seasonality 8 | from pydlm_lite.modeler.dynamic import dynamic 9 | from pydlm_lite.modeler.autoReg import autoReg 10 | from pydlm_lite.modeler.longSeason import longSeason 11 | from pydlm_lite.tuner.dlmTuner import modelTuner 12 | -------------------------------------------------------------------------------- /pydlm_lite/base/__init__.py: -------------------------------------------------------------------------------- 1 | # the base kalmanFilter for filtering 2 | 3 | #__all__ = ['kalmanFilter', 'baseModel', 'tools'] 4 | 5 | #import pydlm.base.kalmanFilter 6 | #from .tools import tools 7 | #from .baseModel import baseModel 8 | -------------------------------------------------------------------------------- /pydlm_lite/base/baseModel.py: -------------------------------------------------------------------------------- 1 | """ 2 | ================================================================= 3 | 4 | Code for the base model structure 5 | 6 | ================================================================= 7 | 8 | This piece of code provides the basic model structure for dynamic linear model. 9 | It stores all the necessary components for kalmanFilter and save the results 10 | 11 | """ 12 | # dependencies 13 | import numpy as np 14 | import pydlm_lite.base.tools as tl 15 | 16 | # define the basic structure for a dlm model 17 | class baseModel: 18 | """ The baseModel class that provides the basic model structure for dlm. 19 | 20 | Attributes: 21 | transition: the transition matrix G 22 | evaluation: the evaluation F 23 | noiseVar: the variance of the observation noise 24 | sysVar: the covariance of the underlying states 25 | innovation: the incremnent of the latent covariance W 26 | state: the latent states 27 | df: the degree of freedom (= number of data points) 28 | obs: the expectation of the observation 29 | obsVar: the variance of the observation 30 | 31 | Methods: 32 | initializeObservation: initialize the obs and obsVar 33 | validation: validate the matrix dimensions are consistent. 34 | """ 35 | 36 | # define the components of a baseModel 37 | def __init__(self, transition = None, evaluation = None, noiseVar = None, \ 38 | sysVar = None, innovation = None, state = None, df = None): 39 | self.transition = transition 40 | self.evaluation = evaluation 41 | self.noiseVar = noiseVar 42 | self.sysVar = sysVar 43 | self.innovation = innovation 44 | self.state = state 45 | self.df = df 46 | self.obs = None 47 | self.obsVar = None 48 | 49 | # a hidden data field used only for model prediction 50 | self.prediction = __model__() 51 | 52 | # initialize the observation mean and variance 53 | def initializeObservation(self): 54 | """ Initialize the value of obs and obsVar 55 | 56 | """ 57 | self.validation() 58 | self.obs = np.dot(self.evaluation, self.state) 59 | self.obsVar = np.dot(np.dot(self.evaluation, self.sysVar), self.evaluation.T) \ 60 | + self.noiseVar 61 | 62 | # checking if the dimension matches with each other 63 | def validation(self): 64 | """ Validate the model components are consistent 65 | 66 | """ 67 | # check symmetric 68 | tl.checker.checkSymmetry(self.transition) 69 | tl.checker.checkSymmetry(self.sysVar) 70 | if self.innovation is not None: 71 | tl.checker.checkSymmetry(self.innovation) 72 | 73 | # check wether dimension match 74 | tl.checker.checkMatrixDimension(self.transition, self.sysVar) 75 | if self.innovation is not None: 76 | tl.checker.checkMatrixDimension(self.transition, self.innovation) 77 | tl.checker.checkVectorDimension(self.evaluation, self.transition) 78 | tl.checker.checkVectorDimension(self.state, self.transition) 79 | 80 | 81 | # define an inner class to store intermediate results 82 | class __model__: 83 | 84 | # to store result for prediction 85 | def __init__(self, step = 0, state = None, obs = None, sysVar = None, obsVar = None): 86 | self.step = 0 87 | self.state = state 88 | self.obs = obs 89 | self.sysVar = sysVar 90 | self.obsVar = obsVar 91 | -------------------------------------------------------------------------------- /pydlm_lite/base/kalmanFilter.py: -------------------------------------------------------------------------------- 1 | """ 2 | =============================================== 3 | 4 | Base code for running Kalman filter 5 | 6 | =============================================== 7 | 8 | This module implements a Kalman filter that is slightly different fromt the 9 | standard one, following West and Harrison (1999). This Kalman filter accepts 10 | one-dimension discounting factor to adaptively learn the innovation matrix 11 | itself, instead of accepting it from the user. (Although such option is still 12 | provided) 13 | 14 | """ 15 | # This code take care of the Kalman filter 16 | import numpy as np 17 | import pydlm_lite.base.tools as tl 18 | 19 | # Define the class of Kalman filter which offers a forward filter 20 | # backward smoother and backward sampler for one-step move 21 | 22 | class kalmanFilter: 23 | """ The kalmanFilter class the provide the basic functionalities 24 | 25 | Attributes: 26 | discount: the discounting factor determining how much information to carry on 27 | updateInnovation: indicate whether the innovation matrix should be updated. 28 | default to True. 29 | 30 | Methods: 31 | predict: predict one step ahead of the current state 32 | forwardFilter: one step filter on the model given a new observation 33 | backwardSmoother: one step backward smooth given the future model and the 34 | filtered state and systematic covariance 35 | backwardSampler: similar to backwardSmoother, using sampling instead of 36 | deterministic equations. 37 | updateDiscount: for updating the discount factors 38 | """ 39 | 40 | def __init__(self, discount=[0.99], \ 41 | updateInnovation='whole', 42 | index=None): 43 | """ Initializing the kalmanFilter class 44 | 45 | Args: 46 | discount: the discounting factor, could be a vector 47 | updateInnovation: the indicator for whether updating innovation matrix 48 | 49 | """ 50 | 51 | self.__checkDiscount__(discount) 52 | self.discount = np.matrix(np.diag(1 / np.sqrt(np.array(discount)))) 53 | self.updateInnovation = updateInnovation 54 | self.index = index 55 | 56 | def predict(self, model, dealWithMissingEvaluation = False): 57 | """ Predict the next states of the model by one step 58 | 59 | Args: 60 | model: the @baseModel class provided all necessary information 61 | dealWithMissingValue: indicate whether we need to treat the missing value. 62 | it will be turned off when used in forwardFilter as 63 | missing cases have already been address by forwardFilter 64 | Returns: 65 | The predicted result is stored in 'model.prediction' 66 | 67 | """ 68 | # check whether evaluation has missing data, if so, we need to take care of it 69 | if dealWithMissingEvaluation: 70 | loc = self._modifyTransitionAccordingToMissingValue(model) 71 | 72 | # if the step number == 0, we use result from the model state 73 | if model.prediction.step == 0: 74 | model.prediction.state = np.dot(model.transition, model.state) 75 | model.prediction.obs = np.dot(model.evaluation, model.prediction.state) 76 | model.prediction.sysVar = np.dot(np.dot(model.transition, model.sysVar), 77 | model.transition.T) 78 | 79 | # update the innovation 80 | if self.updateInnovation == 'whole': 81 | self.__updateInnovation__(model) 82 | elif self.updateInnovation == 'component': 83 | self.__updateInnovation2__(model) 84 | 85 | # add the innovation to the system variance 86 | model.prediction.sysVar += model.innovation 87 | 88 | model.prediction.obsVar = np.dot(np.dot(model.evaluation, \ 89 | model.prediction.sysVar), \ 90 | model.evaluation.T) + model.noiseVar 91 | model.prediction.step = 1 92 | 93 | # otherwise, we use previous result to predict next time stamp 94 | else: 95 | model.prediction.state = np.dot(model.transition, model.prediction.state) 96 | model.prediction.obs = np.dot(model.evaluation, model.prediction.state) 97 | model.prediction.sysVar = np.dot(np.dot(model.transition, \ 98 | model.prediction.sysVar),\ 99 | model.transition.T) 100 | model.prediction.obsVar = np.dot(np.dot(model.evaluation, \ 101 | model.prediction.sysVar), \ 102 | model.evaluation.T) + model.noiseVar 103 | model.prediction.step += 1 104 | 105 | # recover the evaluation and the transition matrix 106 | if dealWithMissingEvaluation: 107 | self._recoverTransitionAndEvaluation(model, loc) 108 | 109 | def forwardFilter(self, model, y, dealWithMissingEvaluation = False): 110 | """ The forwardFilter used to run one step filtering given new data 111 | 112 | Args: 113 | model: the @baseModel provided the basic information 114 | y: the newly observed data 115 | 116 | Returns: 117 | The filtered result is stored in the 'model' replacing the old states 118 | 119 | """ 120 | # check whether evaluation has missing data, if so, we need to take care of it 121 | if dealWithMissingEvaluation: 122 | loc = self._modifyTransitionAccordingToMissingValue(model) 123 | 124 | # since we have delt with the missing value, we don't need to double treat it. 125 | self.predict(model, dealWithMissingEvaluation=False) 126 | 127 | # when y is not a missing data 128 | if y is not None: 129 | 130 | # first obtain the predicted status 131 | # we make the prediction step equal to 0 to ensure the prediction 132 | # is based on the model state and innovation is updated correctlly 133 | # model.prediction.step = 0 134 | model.prediction.step = 0 135 | 136 | # the prediction error and the correction matrix 137 | err = y - model.prediction.obs 138 | correction = np.dot(model.prediction.sysVar, model.evaluation.T) \ 139 | / model.prediction.obsVar 140 | 141 | # update new states 142 | model.df += 1 143 | lastNoiseVar = model.noiseVar # for updating model.sysVar 144 | model.noiseVar = model.noiseVar * \ 145 | (1.0 - 1.0 / model.df + \ 146 | err * err / model.df / model.prediction.obsVar) 147 | 148 | model.state = model.prediction.state + correction * err 149 | 150 | model.sysVar = model.noiseVar[0, 0] / lastNoiseVar[0, 0] * \ 151 | (model.prediction.sysVar - np.dot(correction, correction.T) * \ 152 | model.prediction.obsVar[0, 0]) 153 | 154 | model.obs = np.dot(model.evaluation, model.state) 155 | model.obsVar = np.dot(np.dot(model.evaluation, model.sysVar), \ 156 | model.evaluation.T) + model.noiseVar 157 | # update the innovation using discount 158 | # model.innovation = model.sysVar * (1 / self.discount - 1) 159 | 160 | # when y is missing, then we update the status by the predicted results 161 | else: 162 | # we do not update the model.predict.step because we need to take of the case 163 | # [5, None, None, None]. In such case, we do not add more innovation, because 164 | # no information is comming in. 165 | # This is correct because 166 | # 1. for the first 'None', the step starts from 0 because '5' appears before 167 | # 2. for the second 'None', the step starts from 1, but the prediction.state 168 | # is correct, because now model.state = model.prediciton.state 169 | # 3. The last 'None' follows the same 170 | 171 | model.state = model.prediction.state 172 | model.sysVar = model.prediction.sysVar 173 | model.obs = model.prediction.obs 174 | model.obsVar = model.prediction.obsVar 175 | 176 | # recover the evaluation and the transition matrix 177 | if dealWithMissingEvaluation: 178 | self._recoverTransitionAndEvaluation(model, loc) 179 | 180 | # The backward smoother for a given unsmoothed states at time t 181 | # what model should store: 182 | # model.state: the last smoothed states (t + 1) 183 | # model.sysVar: the last smoothed system variance (t + 1) 184 | # model.transition: the transition at time t + 1 185 | # model.evaluation: the evaluation vector at time t 186 | # model.prediction.sysVar: the predicted system variance for time t + 1 187 | # model.prediction.state: the predicted state for time t + 1 188 | # rawState: the unsmoothed state at time t 189 | # rawSysVar: the unsmoothed system variance at time t 190 | def backwardSmoother(self, model, rawState, rawSysVar): 191 | 192 | """ The backwardSmoother for one step backward smoothing 193 | 194 | Args: 195 | model: the @baseModel used for backward smoothing, the model shall store 196 | the following information 197 | model.state: the last smoothed states (t + 1) 198 | model.sysVar: the last smoothed system variance (t + 1) 199 | model.transition: the transition at time t + 1 200 | model.evaluation: the evaluation vector at time t 201 | model.prediction.sysVar: the predicted system variance for time t + 1 202 | model.prediction.state: the predicted state for time t + 1 203 | rawState: the unsmoothed state at time t 204 | rawSysVar: the unsmoothed system variance at time t 205 | rawState: the filtered state at the current time stamp 206 | rawSysVar: the filtered systematic covariance at the current time stamp 207 | 208 | Returns: 209 | The smoothed results are stored in the 'model' replacing the filtered result. 210 | """ 211 | 212 | # check whether evaluation has missing data, if so, we need to take care of it 213 | # if dealWithMissingEvaluation: 214 | # loc = self._modifyTransitionAccordingToMissingValue(model) 215 | 216 | #### use generalized inverse to ensure the computation stability ####### 217 | 218 | predSysVarInv = self._gInverse(model.prediction.sysVar) 219 | 220 | ################################################ 221 | 222 | backward = np.dot(np.dot(rawSysVar, model.transition.T), predSysVarInv) 223 | model.state = rawState + np.dot(backward, (model.state - model.prediction.state)) 224 | model.sysVar = rawSysVar + \ 225 | np.dot(np.dot(backward, \ 226 | (model.sysVar - model.prediction.sysVar)), backward.T) 227 | model.obs = np.dot(model.evaluation, model.state) 228 | model.obsVar = np.dot(np.dot(model.evaluation, model.sysVar), \ 229 | model.evaluation.T) + model.noiseVar 230 | 231 | # recover the evaluation and the transition matrix 232 | #if dealWithMissingEvaluation: 233 | # self._recoverTransitionAndEvaluation(model, loc) 234 | 235 | def backwardSampler(self, model, rawState, rawSysVar): 236 | """ The backwardSampler for one step backward sampling 237 | 238 | Args: 239 | model: the @baseModel used for backward sampling, the model shall store 240 | the following information 241 | model.state: the last smoothed states (t + 1) 242 | model.sysVar: the last smoothed system variance (t + 1) 243 | model.transition: the transition at time t + 1 244 | model.evaluation: the evaluation vector at time t 245 | model.prediction.sysVar: the predicted system variance for time t + 1 246 | model.prediction.state: the predicted state for time t + 1 247 | rawState: the unsmoothed state at time t 248 | rawSysVar: the unsmoothed system variance at time t 249 | rawState: the filtered state at the current time stamp 250 | rawSysVar: the filtered systematic covariance at the current time stamp 251 | 252 | Returns: 253 | The sampled results are stored in the 'model' replacing the filtered result. 254 | """ 255 | #### use generalized inverse to ensure the computation stability ####### 256 | 257 | predSysVarInv = self._gInverse(model.prediction.sysVar) 258 | 259 | ################################################ 260 | 261 | backward = np.dot(np.dot(rawSysVar, model.transition.T), predSysVarInv) 262 | model.state = rawState + np.dot(backward, (model.state - model.prediction.state)) 263 | model.sysVar = rawSysVar + \ 264 | np.dot(np.dot(backward, \ 265 | (model.sysVar - model.prediction.sysVar)), backward.T) 266 | model.state = np.matrix(np.random.multivariate_normal(model.state.A1, \ 267 | model.sysVar)).T 268 | model.obs = np.dot(model.evaluation, model.state) 269 | model.obsVar = np.dot(np.dot(model.evaluation, model.sysVar), \ 270 | model.evaluation.T) + model.noiseVar 271 | model.obs = np.matrix(np.random.multivariate_normal(model.obs.A1, \ 272 | model.obsVar)).T 273 | 274 | # for updating the discounting factor 275 | def updateDiscount(self, newDiscount): 276 | """ For updating the discounting factor 277 | 278 | Args: 279 | newDiscount: the new discount factor 280 | """ 281 | 282 | self.__checkDiscount__(newDiscount) 283 | self.discount = np.matrix(np.diag(1 / np.sqrt(newDiscount))) 284 | 285 | def __checkDiscount__(self, discount): 286 | """ Check whether the discount fact is within (0, 1) 287 | 288 | """ 289 | 290 | for i in range(len(discount)): 291 | if discount[i] < 0 or discount[i] > 1: 292 | raise tl.matrixErrors('discount factor must be between 0 and 1') 293 | 294 | 295 | # update the innovation 296 | def __updateInnovation__(self, model): 297 | """ update the innovation matrix of the model 298 | 299 | """ 300 | 301 | model.innovation = np.dot(np.dot(self.discount, model.prediction.sysVar), \ 302 | self.discount) - model.prediction.sysVar 303 | 304 | # update the innovation 305 | def __updateInnovation2__(self, model): 306 | """ update the innovation matrix of the model, but only for component 307 | indepdently. (i.e., only add innovation to block diagonals, not on off 308 | block diagonals) 309 | 310 | """ 311 | 312 | innovation = np.dot(np.dot(self.discount, model.prediction.sysVar), \ 313 | self.discount) - model.prediction.sysVar 314 | model.innovation = np.matrix(np.zeros(innovation.shape)) 315 | for name in self.index: 316 | indx = self.index[name] 317 | model.innovation[indx[0]: (indx[1] + 1), indx[0]: (indx[1] + 1)] = \ 318 | innovation[indx[0]: (indx[1] + 1), indx[0]: (indx[1] + 1)] 319 | 320 | # a generalized inverse of matrix A 321 | def _gInverse(self, A): 322 | """ A generalized inverse of matrix A 323 | 324 | """ 325 | return np.linalg.pinv(A) 326 | 327 | def _modifyTransitionAccordingToMissingValue(self, model): 328 | """ When evaluation contains None value, we modify the corresponding entries 329 | in the transition to deal with the missing value 330 | 331 | """ 332 | loc = [] 333 | for i in range(model.evaluation.shape[1]): 334 | if model.evaluation[0, i] is None: 335 | loc.append(i) 336 | model.transition[i, i] = 0.0 337 | model.evaluation[0, i] = 0.0 338 | return loc 339 | 340 | def _recoverTransitionAndEvaluation(self, model, loc): 341 | """ We recover the transition and evaluation use the results from 342 | _modifyTransitionAccordingToMissingValue 343 | 344 | """ 345 | for i in loc: 346 | model.evaluation[0, i] = None 347 | model.transition[i, i] = 1.0 348 | 349 | -------------------------------------------------------------------------------- /pydlm_lite/base/tools.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | # define the error class for exceptions 4 | class matrixErrors(Exception): 5 | 6 | def __init__(self, value): 7 | self.value = value 8 | 9 | def __str__(self): 10 | return repr(self.value) 11 | 12 | 13 | # The class to check matrixErrors 14 | class checker: 15 | 16 | # checking if two matrix has the same dimension 17 | @staticmethod 18 | def checkMatrixDimension(A, B): 19 | if A.shape != B.shape: 20 | raise matrixErrors('The dimensions do not match!') 21 | 22 | # checking if a vector has the dimension as matrix 23 | @staticmethod 24 | def checkVectorDimension(v, A): 25 | vDim = v.shape 26 | ADim = A.shape 27 | if vDim[0] != ADim[0] and vDim[0] != ADim[1] and \ 28 | vDim[1] != ADim[0] and vDim[1] != ADim[1]: 29 | raise matrixErrors('The dimensions do not match!') 30 | 31 | # checking if a matrix is symmetric 32 | @staticmethod 33 | def checkSymmetry(A): 34 | ADim = A.shape 35 | if ADim[0] != ADim[1]: 36 | raise matrixErrors('The matrix is not symmetric!') 37 | 38 | 39 | # a completely unshared copy of lists 40 | def duplicateList(aList): 41 | if isinstance(aList, list): 42 | return list(map(duplicateList, aList)) 43 | return aList 44 | 45 | 46 | # inverse normal cdf function 47 | def rational_approximation(t): 48 | 49 | # Abramowitz and Stegun formula 26.2.23. 50 | # The absolute value of the error should be less than 4.5 e-4. 51 | c = [2.515517, 0.802853, 0.010328] 52 | d = [1.432788, 0.189269, 0.001308] 53 | numerator = (c[2]*t + c[1])*t + c[0] 54 | denominator = ((d[2]*t + d[1])*t + d[0])*t + 1.0 55 | return t - numerator / denominator 56 | 57 | 58 | def normal_CDF_inverse(p): 59 | assert p > 0.0 and p < 1 60 | 61 | # See article above for explanation of this section. 62 | if p < 0.5: 63 | # F^-1(p) = - G^-1(p) 64 | return -rational_approximation( math.sqrt(-2.0*math.log(p)) ) 65 | else: 66 | # F^-1(p) = G^-1(1-p) 67 | return rational_approximation( math.sqrt(-2.0*math.log(1.0-p)) ) 68 | 69 | 70 | def getInterval(means, var, p): 71 | alpha = abs(normal_CDF_inverse(min(1 - p, p) / 2)) 72 | upper = [0] * len(means) 73 | lower = [0] * len(means) 74 | for i in range(0, len(means)): 75 | upper[i] = means[i] + alpha * math.sqrt(var[i]) 76 | lower[i] = means[i] - alpha * math.sqrt(var[i]) 77 | 78 | return (upper, lower) 79 | -------------------------------------------------------------------------------- /pydlm_lite/func/__init__.py: -------------------------------------------------------------------------------- 1 | #__all__ = ['_dlm'] 2 | 3 | #import pydlm.func._dlm as _dlm 4 | -------------------------------------------------------------------------------- /pydlm_lite/modeler/__init__.py: -------------------------------------------------------------------------------- 1 | # this module defines the tools for modeling 2 | 3 | # __all__ = ['trends', 'seasonality', 'dynamic', 'autoReg', 'longSeason', 'builder'] 4 | 5 | # import pydlm.modeler.trends as trends 6 | # import pydlm.modeler.seasonality as seasonality 7 | # import pydlm.modeler.dynamic as dynamic 8 | # import pydlm.modeler.builder as builder 9 | # import pydlm.modeler.autoReg as autoReg 10 | # import pydlm.modeler.longSeason as longSeason 11 | -------------------------------------------------------------------------------- /pydlm_lite/modeler/autoReg.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================================================== 3 | 4 | The code for autoregressive components 5 | 6 | =========================================================================== 7 | 8 | This code implements the autoregressive component as a sub-class of dynamic. 9 | Different from the dynamic component, the features in the autoReg is generated 10 | from the data, and updated according to the data. All other features are 11 | similar to @dynamic. 12 | 13 | """ 14 | from numpy import matrix 15 | from warnings import warn 16 | from .component import component 17 | 18 | import numpy as np 19 | import pydlm_lite.base.tools as tl 20 | 21 | 22 | class autoReg(component): 23 | """ The autoReg class allows user to add an autoregressive component to the dlm. 24 | This code implements the autoregressive component as a child class of 25 | component. Different from the dynamic component, the features in the 26 | autoReg is generated from the data, and updated according to the data. 27 | 28 | The latent states of autoReg are aligned in the order of 29 | [today - degree, today - degree + 1, ..., today - 2, today - 1]. Thus, 30 | when fetching the latents from autoReg component, use this order to 31 | correctly align the coefficients. 32 | 33 | Args: 34 | data (deprecated): Users get a warning if this argument is used. 35 | degree: the order of the autoregressive component 36 | discount: the discount factor 37 | name: the name of the trend component 38 | w: the value to set the prior covariance. Default to a diagonal 39 | matrix with 1e7 on the diagonal. 40 | padding: either 0 or None. The number to be padded for the first degree 41 | days, as no previous data is observed to form the feature 42 | matrix 43 | Examples: 44 | >>> # create a auto regression component: 45 | >>> autoReg8 = autoReg(degree=8, name='autoreg8', discount = 0.99) 46 | >>> # change the autoReg8 to have covariance with diagonals are 2 and state 1 47 | >>> autoReg8.createCovPrior(cov = 2) 48 | >>> autoReg8.createMeanPrior(mean = 1) 49 | 50 | Attributes: 51 | d: the degree of autoregressive, i.e., how many days to look back 52 | data (deprecatd): Users get a warning if this argument is used. 53 | discount factor: the discounting factor 54 | name: the name of the component 55 | padding: either 0 or None. The number to be padded for the first degree 56 | days, as no previous data is observed to form the feature 57 | matrix 58 | evaluation: the evaluation matrix for this component 59 | transition: the transition matrix for this component 60 | covPrior: the prior guess of the covariance matrix of the latent states 61 | meanPrior: the prior guess of the latent states 62 | 63 | """ 64 | 65 | def __init__(self, 66 | data=None, # DEPRECATED 67 | degree=2, 68 | discount=0.99, 69 | name='ar2', 70 | w=100, 71 | padding=0): 72 | 73 | if data is not None: 74 | warn('The data argument in autoReg is deprecated. Please avoid using it.') 75 | 76 | self.componentType = 'autoReg' 77 | self.d = degree 78 | self.name = name 79 | self.discount = np.ones(self.d) * discount 80 | self.padding = padding 81 | 82 | # Initialize all basic quantities 83 | self.evaluation = None 84 | self.transition = None 85 | self.covPrior = None 86 | self.meanPrior = None 87 | 88 | # create all basic quantities 89 | self.createTransition() 90 | self.createCovPrior(scale=w) 91 | self.createMeanPrior() 92 | 93 | # record current step in case of lost 94 | self.step = 0 95 | 96 | def createEvaluation(self, step, data): 97 | """ The evaluation matrix for auto regressor. 98 | 99 | """ 100 | if step > len(data): 101 | raise NameError("There is no sufficient data for creating autoregressor.") 102 | # We pad numbers if the step is too early 103 | self.evaluation = matrix([[self.padding] * (self.d - step) + 104 | list(data[max(0, (step - self.d)) : step])]) 105 | 106 | def createTransition(self): 107 | """ Create the transition matrix. 108 | 109 | For the auto regressor component, the transition matrix is just the identity matrix 110 | 111 | """ 112 | self.transition = np.matrix(np.eye(self.d)) 113 | 114 | def createCovPrior(self, cov = None, scale = 1e6): 115 | """ Create the prior covariance matrix for the latent states 116 | 117 | """ 118 | if cov is None: 119 | self.covPrior = np.matrix(np.eye(self.d)) * scale 120 | else: 121 | self.covPrior = cov * scale 122 | 123 | def createMeanPrior(self, mean = None, scale = 1): 124 | """ Create the prior latent state 125 | 126 | """ 127 | if mean is None: 128 | self.meanPrior = np.matrix(np.zeros((self.d, 1))) * scale 129 | else: 130 | self.meanPrior = mean * scale 131 | 132 | def checkDimensions(self): 133 | """ if user supplies their own covPrior and meanPrior, this can 134 | be used to check if the dimension matches 135 | 136 | """ 137 | tl.checker.checkVectorDimension(self.meanPrior, self.covPrior) 138 | print('The dimesnion looks good!') 139 | 140 | def updateEvaluation(self, date, data): 141 | self.createEvaluation(step=date, data=data) 142 | -------------------------------------------------------------------------------- /pydlm_lite/modeler/builder.py: -------------------------------------------------------------------------------- 1 | """ 2 | ============================================================================== 3 | 4 | Code for builder of a dynamic linear model 5 | 6 | ============================================================================== 7 | 8 | This piece of code provides the basic functionality for constructing the model 9 | of a dlm. It allows flexible modeling by users. User can add to, delete and 10 | view componets of a given dlm. Builder will finally assemble all the components 11 | to a final big model. 12 | 13 | """ 14 | # this class provide all the model building operations for constructing 15 | # customized model 16 | import numpy as np 17 | from pydlm_lite.base.baseModel import baseModel 18 | from copy import deepcopy 19 | from pydlm_lite.modeler.matrixTools import matrixTools as mt 20 | 21 | # The builder will be the main class for construting dlm 22 | # it featues two types of evaluation matrix and evaluation matrix 23 | # The static evaluation remains the same over time which is used to 24 | # record the trend and seasonality. 25 | # 26 | # The dynamic evaluation vector changes over time, it is basically 27 | # the other variables that might have impact on the time series 28 | # We need to update this vector as time going forward 29 | 30 | 31 | class builder: 32 | """ The main modeling part of a dynamic linear model. It allows the users to 33 | custemize their own model. User can add, delete any components like trend 34 | or seasonality to the builder, or view the existing components. Builder 35 | will finally assemble all components to a big model for further training 36 | and inference. 37 | 38 | Attributes: 39 | model: the model structure from @baseModel, stores all the necessary 40 | quantities initialized: indicates whether the model has been built 41 | staticComponents: stores all the static components (trend and 42 | seasonality) 43 | dynamicComponents: stores all the dynamic components 44 | componentIndex: the location of each component in the latent states 45 | statePrior: the prior mean of the latent state 46 | sysVarPrior: the prior of the covariance of the latent states 47 | noiseVar: the prior of the observation noise 48 | initialDegreeFreedom: the initial degree of freedom. 49 | discount: the discounting factor, please refer to @kalmanFilter for 50 | more details 51 | renewTerm: used for aiding the stability of the model. The renewTerm 52 | is computed according to the discount fact. When the filter 53 | goes over certain steps, the information contribution of the 54 | previous data has decayed to minimum. We then ignore those 55 | days and refit the time series starting from current - 56 | renewTerm. Thus, the effective sample size of the dlm is 57 | twice renewTerm. When discount = 1, there will be no 58 | renewTerm, since all the information will be passed along. 59 | renewDiscount: the minimum discount of seasonality component, 60 | or if there is no seasonality, this will be the minimum 61 | discount of all components. Used for computing 62 | renewTerm. 63 | 64 | Methods: 65 | add: add new component 66 | ls: list out all components 67 | delete: delete a specific component by its name 68 | initialize: assemble all the component to construt a big model 69 | updateEvaluation: update the valuation matrix of the big model 70 | """ 71 | 72 | # create members 73 | def __init__(self): 74 | 75 | # the basic model structure for running kalman filter 76 | self.model = None 77 | self.initialized = False 78 | 79 | # to store all components. Separate the two as the evaluation 80 | # for dynamic component needs update each iteration 81 | # (new) added the third category, which is dynamic but the 82 | # features can be automatically computed from the data 83 | 84 | # store all the static components, i.e., the evaluation vector 85 | # do not change over time 86 | self.staticComponents = {} 87 | 88 | # store all the dynamic components, i.e., the evaluation vector 89 | # changes over time 90 | self.dynamicComponents = {} 91 | 92 | # store all the components that are dynamic, but the evaluation 93 | # vector can be inferred directly from the main data instead 94 | # of other sources 95 | self.automaticComponents = {} 96 | 97 | # store the index (location in the latent states) of all components 98 | # can be used to extract information for each componnet 99 | self.componentIndex = {} 100 | 101 | # record the prior guess on the latent state and system covariance 102 | self.statePrior = None 103 | self.sysVarPrior = None 104 | self.noiseVar = None 105 | self.initialDegreeFreedom = 1 106 | 107 | # record the discount factor for the model 108 | self.discount = None 109 | 110 | # renew term used to indicate the effective length of data, i.e., 111 | # for days before this length will have little impact on 112 | # the current result 113 | self.renewTerm = -1.0 114 | self.renewDiscount = None # used for adjusting renewTerm 115 | 116 | # flag for determining whether the system info should be printed. 117 | self._printInfo = True 118 | 119 | # The function that allows the user to add components 120 | def add(self, component): 121 | """ Add a new model component to the builder. 122 | 123 | Args: 124 | component: a model component, any class implements @component class 125 | 126 | """ 127 | 128 | self.__add__(component) 129 | 130 | def __add__(self, component): 131 | if component.componentType == 'dynamic': 132 | if component.name in self.dynamicComponents: 133 | raise NameError('Please rename the component to a' 134 | + ' different name.') 135 | self.dynamicComponents[component.name] = component 136 | 137 | if component.componentType == 'autoReg' \ 138 | or component.componentType == 'longSeason': 139 | if component.name in self.automaticComponents: 140 | raise NameError('Please rename the component to a' 141 | + ' different name.') 142 | self.automaticComponents[component.name] = component 143 | 144 | if component.componentType == 'trend' \ 145 | or component.componentType == 'seasonality': 146 | if component.name in self.staticComponents: 147 | raise NameError('Please rename the component' + 148 | ' to a different name.') 149 | self.staticComponents[component.name] = component 150 | 151 | # we use seasonality's discount to adjust the renewTerm 152 | if component.componentType == 'seasonality': 153 | if self.renewDiscount is None: 154 | self.renewDiscount = 1.0 155 | self.renewDiscount = min(self.renewDiscount, 156 | min(component.discount)) 157 | self.initialized = False 158 | return self 159 | 160 | # print all components to the client 161 | def ls(self): 162 | """ List out all the existing components to the model 163 | 164 | """ 165 | 166 | if len(self.staticComponents) > 0: 167 | print('The static components are') 168 | for name in self.staticComponents: 169 | comp = self.staticComponents[name] 170 | print(comp.name + ' (degree = ' + str(comp.d) + ')') 171 | print(' ') 172 | else: 173 | print('There is no static component.') 174 | print(' ') 175 | 176 | if len(self.dynamicComponents) > 0: 177 | print('The dynamic components are') 178 | for name in self.dynamicComponents: 179 | comp = self.dynamicComponents[name] 180 | print(comp.name + ' (dimension = ' + str(comp.d) + ')') 181 | print(' ') 182 | else: 183 | print('There is no dynamic component.') 184 | print(' ') 185 | 186 | if len(self.automaticComponents) > 0: 187 | print('The automatic components are') 188 | for name in self.automaticComponents: 189 | comp = self.automaticComponents[name] 190 | print(comp.name + ' (dimension = ' + str(comp.d) + ')') 191 | else: 192 | print('There is no automatic component.') 193 | 194 | # delete the componet that pointed out by the client 195 | def delete(self, name): 196 | """ Delete a specific component from dlm by its name. 197 | 198 | Args: 199 | name: the name of the component. Can be read from ls() 200 | 201 | """ 202 | 203 | if name in self.staticComponents: 204 | del self.staticComponents[name] 205 | elif name in self.dynamicComponents: 206 | del self.dynamicComponents[name] 207 | elif name in self.automaticComponents: 208 | del self.automaticComponents[name] 209 | else: 210 | raise NameError('Such component does not exisit!') 211 | 212 | self.initialized = False 213 | 214 | # initialize model for all the quantities 215 | # noise is the prior guess of the variance of the observed data 216 | # data is used by auto regressor. 217 | def initialize(self, data=[], noise=1): 218 | """ Initialize the model. It construct the baseModel by assembling all 219 | quantities from the components. 220 | 221 | Args: 222 | noise: the initial guess of the variance of the observation noise. 223 | """ 224 | if len(self.staticComponents) == 0 and \ 225 | len(self.dynamicComponents) == 0 and \ 226 | len(self.automaticComponents) == 0: 227 | 228 | raise NameError('The model must contain at least' + 229 | ' one component') 230 | 231 | # construct transition, evaluation, prior state, prior covariance 232 | if self._printInfo: 233 | print('Initializing models...') 234 | transition = None 235 | evaluation = None 236 | state = None 237 | sysVar = None 238 | self.discount = np.array([]) 239 | 240 | # first construct for the static components 241 | # the evaluation will be treated separately for static or dynamic 242 | # as the latter one will change over time 243 | currentIndex = 0 # used for compute the index 244 | for i in self.staticComponents: 245 | comp = self.staticComponents[i] 246 | transition = mt.matrixAddInDiag(transition, comp.transition) 247 | evaluation = mt.matrixAddByCol(evaluation, 248 | comp.evaluation) 249 | state = mt.matrixAddByRow(state, comp.meanPrior) 250 | sysVar = mt.matrixAddInDiag(sysVar, comp.covPrior) 251 | self.discount = np.concatenate((self.discount, comp.discount)) 252 | self.componentIndex[i] = (currentIndex, currentIndex + comp.d - 1) 253 | currentIndex += comp.d 254 | 255 | # if the model contains the dynamic part, we add the dynamic components 256 | if len(self.dynamicComponents) > 0: 257 | self.dynamicEvaluation = None 258 | for i in self.dynamicComponents: 259 | comp = self.dynamicComponents[i] 260 | comp.updateEvaluation(0) 261 | transition = mt.matrixAddInDiag(transition, comp.transition) 262 | evaluation = mt.matrixAddByCol(evaluation, 263 | comp.evaluation) 264 | state = mt.matrixAddByRow(state, comp.meanPrior) 265 | sysVar = mt.matrixAddInDiag(sysVar, comp.covPrior) 266 | self.discount = np.concatenate((self.discount, comp.discount)) 267 | self.componentIndex[i] = (currentIndex, 268 | currentIndex + comp.d - 1) 269 | currentIndex += comp.d 270 | 271 | # if the model contains the automatic dynamic part, we add 272 | # them to the builder 273 | if len(self.automaticComponents) > 0: 274 | self.automaticEvaluation = None 275 | for i in self.automaticComponents: 276 | comp = self.automaticComponents[i] 277 | comp.updateEvaluation(0, data) 278 | transition = mt.matrixAddInDiag(transition, comp.transition) 279 | evaluation = mt.matrixAddByCol(evaluation, 280 | comp.evaluation) 281 | state = mt.matrixAddByRow(state, comp.meanPrior) 282 | sysVar = mt.matrixAddInDiag(sysVar, comp.covPrior) 283 | self.discount = np.concatenate((self.discount, comp.discount)) 284 | self.componentIndex[i] = (currentIndex, 285 | currentIndex + comp.d - 1) 286 | currentIndex += comp.d 287 | 288 | self.statePrior = state 289 | self.sysVarPrior = sysVar 290 | self.noiseVar = np.matrix(noise) 291 | self.model = baseModel(transition=transition, 292 | evaluation=evaluation, 293 | noiseVar=np.matrix(noise), 294 | sysVar=sysVar, 295 | state=state, 296 | df=self.initialDegreeFreedom) 297 | self.model.initializeObservation() 298 | 299 | # compute the renew period 300 | if self.renewDiscount is None: 301 | self.renewDiscount = np.min(self.discount) 302 | 303 | if self.renewDiscount < 1.0 - 1e-8: 304 | self.renewTerm = np.log(0.001 * (1 - self.renewDiscount)) \ 305 | / np.log(self.renewDiscount) 306 | 307 | self.initialized = True 308 | if self._printInfo: 309 | print('Initialization finished.') 310 | 311 | # Initialize from another builder exported from other dlm class 312 | def initializeFromBuilder(self, data, exported_builder): 313 | # Copy the components 314 | self.staticComponents = deepcopy(exported_builder.staticComponents) 315 | self.automaticComponents = deepcopy(exported_builder.automaticComponents) 316 | self.dynamicComponents = deepcopy(exported_builder.dynamicComponents) 317 | self.componentIndex = deepcopy(exported_builder.componentIndex) 318 | self.discount = deepcopy(exported_builder.discount) 319 | self.initialDegreeFreedom = exported_builder.model.df 320 | 321 | # Copy the model states 322 | self.statePrior = exported_builder.model.state 323 | self.sysVarPrior = exported_builder.model.sysVar 324 | self.noiseVar = exported_builder.model.noiseVar 325 | self.transition = exported_builder.transition 326 | self.evaluation = exported_builder.evaluation 327 | # update the evaluation to the current. 328 | self.updateEvaluation(step=0, data=data) 329 | 330 | 331 | self.model = baseModel(transition=self.transition, 332 | evaluation=self.evaluation, 333 | noiseVar=self.noiseVar, 334 | sysVar=self.sysVarPrior, 335 | state=self.statePrior, 336 | df=self.initialDegreeFreedom) 337 | self.model.initializeObservation() 338 | 339 | # compute the renew period 340 | if self.renewDiscount is None: 341 | self.renewDiscount = np.min(self.discount) 342 | 343 | if self.renewDiscount < 1.0 - 1e-8: 344 | self.renewTerm = np.log(0.001 * (1 - self.renewDiscount)) \ 345 | / np.log(self.renewDiscount) 346 | 347 | self.initialized = True 348 | if self._printInfo: 349 | print('Initialization finished.') 350 | 351 | # This function allows the model to update the dynamic evaluation vector, 352 | # so that the model can handle control variables 353 | # This function should be called only when dynamicComponents is not empty 354 | # data is used by auto regressor. 355 | def updateEvaluation(self, step, data): 356 | """ Update the evaluation matrix of the model to a specific date. 357 | It loops over all dynamic components and update their evaluation 358 | matrix and then reconstruct the model evaluation matrix by 359 | incorporating the new evaluations 360 | 361 | Arges: 362 | step: the date at which the evaluation matrix is needed. 363 | 364 | """ 365 | 366 | # if len(self.dynamicComponents) == 0 and \ 367 | # len(self.automaticComponents) == 0: 368 | # raise NameError('This shall only be used when there' + 369 | # ' are dynamic or automatic components!') 370 | 371 | # update the dynamic evaluation vector 372 | # We need first update all dynamic components by 1 step 373 | for i in self.dynamicComponents: 374 | comp = self.dynamicComponents[i] 375 | comp.updateEvaluation(step) 376 | self.model.evaluation[0, self.componentIndex[i][0]: 377 | (self.componentIndex[i][1] + 1)] = comp.evaluation 378 | 379 | for i in self.automaticComponents: 380 | comp = self.automaticComponents[i] 381 | comp.updateEvaluation(step, data) 382 | self.model.evaluation[0, self.componentIndex[i][0]: 383 | (self.componentIndex[i][1] + 1)] = comp.evaluation 384 | -------------------------------------------------------------------------------- /pydlm_lite/modeler/component.py: -------------------------------------------------------------------------------- 1 | """ 2 | ========================================================================= 3 | 4 | Code for the abstract component 5 | 6 | ========================================================================= 7 | 8 | This piece of code provide the basic building block for the dynamic linear model. 9 | It provide the fundamental struture for all model components. We implement different 10 | components based on this abstract class. 11 | 12 | """ 13 | from abc import ABCMeta, abstractmethod 14 | 15 | # We define an abstract class which can further be used 16 | # to create different types of model components, inclusing 17 | # trend, seasonality and other structures 18 | 19 | class component: 20 | """ The abstract class provides the basic structure for all model components 21 | 22 | Methods: 23 | createEvaluation: create the initial evaluation matrix 24 | createTransition: create the initial transition matrix 25 | createCovPrior: create a simple prior covariance matrix 26 | createMeanPrior: create a simple prior latent state 27 | checkDimensions: if user supplies their own covPrior and meanPrior, this can 28 | be used to check if the dimension matches 29 | 30 | """ 31 | __metaclass__ = ABCMeta 32 | 33 | # define the evaluation matrix for the component 34 | @abstractmethod 35 | def createEvaluation(self): pass 36 | """ Create the evaluation matrix 37 | 38 | """ 39 | 40 | # define the transition matrix for the component 41 | @abstractmethod 42 | def createTransition(self): pass 43 | """ Create the transition matrix 44 | 45 | """ 46 | 47 | # define the prior distribution for the covariance for the component 48 | @abstractmethod 49 | def createCovPrior(self): pass 50 | """ Create the prior covariance matrix for the latent states 51 | 52 | """ 53 | 54 | # define the prior distribution for the mean vector for the component 55 | @abstractmethod 56 | def createMeanPrior(self): pass 57 | """ Create the prior latent state 58 | 59 | """ 60 | 61 | # check the matrix dimensions in case user supplied matrices are wrong 62 | @abstractmethod 63 | def checkDimensions(self): pass 64 | """ Check the dimensionality of the state and covariance 65 | 66 | """ 67 | -------------------------------------------------------------------------------- /pydlm_lite/modeler/dynamic.py: -------------------------------------------------------------------------------- 1 | """ 2 | ========================================================================= 3 | 4 | Code for the dynamic component 5 | 6 | ========================================================================= 7 | 8 | This piece of code provide one building block for the dynamic linear model. 9 | It decribes a dynamic component in the time series data. It basically allows 10 | user to supply covariate or controlled variable into the dlm, 11 | and the coefficients of the features will be trained as the latent states. 12 | Examples are holiday indicators, other observed variables and so on. 13 | 14 | The name dynamic means that the features are changing over time. 15 | 16 | """ 17 | import numpy as np 18 | from collections import MutableSequence 19 | from copy import deepcopy 20 | 21 | import pydlm_lite.base.tools as tl 22 | from .component import component 23 | # create trend component 24 | # We create the trend using the component class 25 | 26 | class dynamic(component): 27 | """ The dynamic component that allows user to add controlled variables, 28 | providing one building block for the dynamic linear model. 29 | It decribes a dynamic component in the time series data. It basically allows 30 | user to supply covariate or controlled variable into the dlm, 31 | and the coefficients of the features will be trained as the latent states. 32 | Examples are holiday indicators, other observed variables and so on. 33 | 34 | Args: 35 | features: the feature matrix of the dynamic component 36 | discount: the discount factor 37 | name: the name of the dynamic component 38 | w: the value to set the prior covariance. Default to a diagonal 39 | matrix with 1e7 on the diagonal. 40 | 41 | Examples: 42 | >>> # create a dynamic component: 43 | >>> features = [[1.0, 2.0] for i in range(10)] 44 | >>> ctrend = dynamic(features = features, name = 'random', discount = 0.99) 45 | >>> # change the ctrend to have covariance with diagonals are 2 and state 1 46 | >>> ctrend.createCovPrior(cov = 2) 47 | >>> ctrend.createMeanPrior(mean = 1) 48 | 49 | Attributes: 50 | d: the dimension of the features (number of latent states) 51 | n: the number of observation 52 | componentType: the type of the component, in this case, 'dynamic' 53 | name: the name of the trend component, to be supplied by user 54 | used in modeling and result extraction 55 | discount: the discount factor for this component. Details please refer 56 | to the @kalmanFilter 57 | evaluation: the evaluation matrix for this component 58 | transition: the transition matrix for this component 59 | covPrior: the prior guess of the covariance matrix of the latent states 60 | meanPrior: the prior guess of the latent states 61 | 62 | """ 63 | def __init__(self, 64 | features = None, 65 | discount = 0.99, 66 | name = 'dynamic', 67 | w=100): 68 | 69 | self.n = len(features) 70 | self.d = len(features[0]) 71 | 72 | if self.hasMissingData(features): 73 | raise NameError("The current version does not support missing data" + 74 | "in the features.") 75 | 76 | self.features = deepcopy(features) 77 | if isinstance(features, np.matrix): 78 | self.features = self.features.tolist() 79 | self.componentType = 'dynamic' 80 | self.name = name 81 | self.discount = np.ones(self.d) * discount 82 | 83 | # Initialize all basic quantities 84 | self.evaluation = None 85 | self.transition = None 86 | self.covPrior = None 87 | self.meanPrior = None 88 | 89 | # create all basic quantities 90 | self.createEvaluation(0) 91 | self.createTransition() 92 | self.createCovPrior(scale=w) 93 | self.createMeanPrior() 94 | 95 | # record current step in case of lost 96 | self.step = 0 97 | 98 | def createEvaluation(self, step): 99 | """ The evaluation matrix for the dynamic component change over time. 100 | It equals to the value of the features or the controlled variables at a 101 | given date 102 | 103 | """ 104 | self.evaluation = np.matrix([self.features[step]]) 105 | 106 | def createTransition(self): 107 | """ Create the transition matrix. 108 | 109 | For the dynamic component, the transition matrix is just the identity matrix 110 | 111 | """ 112 | self.transition = np.matrix(np.eye(self.d)) 113 | 114 | def createCovPrior(self, cov = None, scale = 1e6): 115 | """ Create the prior covariance matrix for the latent states 116 | 117 | """ 118 | if cov is None: 119 | self.covPrior = np.matrix(np.eye(self.d)) * scale 120 | else: 121 | self.covPrior = cov * scale 122 | 123 | def createMeanPrior(self, mean = None, scale = 1): 124 | """ Create the prior latent state 125 | 126 | """ 127 | if mean is None: 128 | self.meanPrior = np.matrix(np.zeros((self.d, 1))) * scale 129 | else: 130 | self.meanPrior = mean * scale 131 | 132 | def checkDimensions(self): 133 | """ if user supplies their own covPrior and meanPrior, this can 134 | be used to check if the dimension matches 135 | 136 | """ 137 | tl.checker.checkVectorDimension(self.meanPrior, self.covPrior) 138 | print('The dimesnion looks good!') 139 | 140 | # Recursively heck if there is any none data. We currently don't support 141 | # missing data for features. 142 | def hasMissingData(self, features): 143 | """ Check whether the list contains None 144 | 145 | """ 146 | for item in features: 147 | if isinstance(item, MutableSequence): 148 | if self.hasMissingData(item): 149 | return True 150 | else: 151 | if item is None: 152 | return True 153 | return False 154 | 155 | def updateEvaluation(self, step): 156 | """ update the evaluation matrix to a specific date 157 | This function is used when fitting the forward filter and backward smoother 158 | in need of updating the correct evaluation matrix 159 | 160 | """ 161 | if step < self.n: 162 | self.evaluation = np.matrix([self.features[step]]) 163 | self.step = step 164 | else: 165 | raise NameError('The step is out of range') 166 | 167 | def appendNewData(self, newData): 168 | """ For updating feature matrix when new data is added. 169 | 170 | Args: 171 | newData: is a list of list. The inner list is the feature vector. The outer 172 | list may contain multiple feature vectors. 173 | 174 | """ 175 | if self.hasMissingData(newData): 176 | raise NameError("The current version does not support missing data" + 177 | "in the features.") 178 | 179 | self.features.extend(tl.duplicateList(newData)) 180 | self.n = len(self.features) 181 | 182 | def popout(self, date): 183 | """ For deleting the feature data of a specific date. 184 | 185 | Args: 186 | date: the index of which to be deleted. 187 | 188 | """ 189 | self.features.pop(date) 190 | self.n -= 1 191 | 192 | def alter(self, date, feature): 193 | """ Change the corresponding 194 | feature matrix. 195 | 196 | Args: 197 | date: The date to be modified. 198 | dataPoint: The new feature to be filled in. 199 | 200 | """ 201 | if self.hasMissingData(feature): 202 | raise NameError("The current version does not support missing data" + 203 | "in the features.") 204 | else: 205 | self.features[date] = feature 206 | 207 | -------------------------------------------------------------------------------- /pydlm_lite/modeler/longSeason.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================================================== 3 | 4 | The code for long seasonality components 5 | 6 | =========================================================================== 7 | 8 | This code implements the long seasonality component as a sub-class of dynamic. 9 | The difference between the long seasonality is that 10 | 1) The seasonality component use each date as a unit and change in a given 11 | periodicy. For example, 1, 2, 3, 4, 1, 2, 3, 4. 12 | 2) However, the long seasonality is capable to group a couple of days as the 13 | basic unit and change in a periodicy. For example, 1, 1, 1, 2, 2, 2, 3, 3, 3, 14 | 4, 4, 4. 15 | The usecase for long seasonality is to model longer seasonality with the 16 | short-term seasonality. For example, the short-term seansonality can be 17 | used to model the day of a weak patten and the long seasonality can be used 18 | to model the week of a month patten in the same model. 19 | Different from the dynamic component, the features in the autoReg is generated 20 | from the data, and updated according to the data. All other features are 21 | similar to @dynamic. 22 | 23 | """ 24 | from numpy import matrix 25 | from .dynamic import dynamic 26 | 27 | 28 | class longSeason(dynamic): 29 | """ The longSeason class alows user to add a long seasonality component 30 | to the dlm. The difference between the long seasonality is that 31 | 1) The seasonality component use each date as a unit and change in a given 32 | periodicity. For example, 1, 2, 3, 4, 1, 2, 3, 4. 33 | 2) However, the long seasonality is capable to group couple of days as the 34 | basic unit and change in a periodicity. For example, 1, 1, 1, 2, 2, 2, 35 | 3, 3, 3, 4, 4, 4. 36 | The usecase for long seasonality is to model longer seasonality with the 37 | short-term seasonality. For example, the short-term seansonality can be 38 | used to model the day of a weak patten and the long seasonality can be used 39 | to model the week of a month patten in the same model. 40 | This code implements the longSeason component as a sub-class of 41 | dynamic. Different from the dynamic component, the features in the 42 | autoReg is generated from the data, and updated according to the data. 43 | All other features are similar to @dynamic. 44 | 45 | Args: 46 | data: the time series data 47 | period: the periodicy of the longSeason component 48 | stay: the length of each state lasts 49 | discount: the discount factor 50 | name: the name of the trend component 51 | w: the value to set the prior covariance. Default to a diagonal 52 | matrix with 1e7 on the diagonal. 53 | 54 | Attributes: 55 | period: the periodicity, i.e., how many different states it has in 56 | one period 57 | stay: the length of a state last. 58 | discount factor: the discounting factor 59 | name: the name of the component 60 | 61 | """ 62 | 63 | def __init__(self, 64 | data=None, 65 | period=4, 66 | stay=7, 67 | discount=0.99, 68 | name='longSeason', 69 | w=100): 70 | 71 | self.period = period 72 | self.stay = stay 73 | if data is None: 74 | raise NameError('Data must be provided for longSeason.') 75 | 76 | # create features. nextState and state are used to 77 | # remember the next feature shap 78 | features, self.nextState = self.createFeatureMatrix(period=period, 79 | stay=stay, 80 | n=len(data), 81 | state=[0, 0]) 82 | 83 | dynamic.__init__(self, 84 | features=features, 85 | discount=discount, 86 | name=name, 87 | w=w) 88 | self.checkDataLength() 89 | 90 | # modify the type to be autoReg 91 | self.componentType = 'longSeason' 92 | 93 | def createFeatureMatrix(self, period, stay, n, state): 94 | """ Create the feature matrix based on the supplied data and the degree. 95 | 96 | Args: 97 | period: the periodicity of the component 98 | stay: the length of the base unit, i.e, how long before change to 99 | change to the next state. 100 | """ 101 | 102 | # initialize feature matrix 103 | nextState = state 104 | features = [] 105 | 106 | count = 0 107 | while count < n: 108 | # create new feature for next state 109 | new_feature = [0] * period 110 | new_feature[nextState[0]] = 1 111 | features.append(new_feature) 112 | 113 | # update the state 114 | nextState[1] = (nextState[1] + 1) % stay 115 | if nextState[1] == 0: 116 | nextState[0] = (nextState[0] + 1) % period 117 | 118 | count += 1 119 | 120 | return features, nextState 121 | 122 | # the degree cannot be longer than data 123 | def checkDataLength(self): 124 | """ Check whether the degree is less than the time series length 125 | 126 | """ 127 | if self.d >= self.n: 128 | raise NameError('The degree cannot be longer than the data series') 129 | 130 | # override 131 | def appendNewData(self, newData): 132 | """ Append new data to the existing features. Overriding the same method in 133 | @dynamic 134 | 135 | Args: 136 | newData: a list of new data 137 | 138 | """ 139 | # create the new features 140 | incrementLength = len(newData) + self.n - len(self.features) 141 | if incrementLength > 0: 142 | newFeatures, \ 143 | self.nextState = self.createFeatureMatrix(period=self.period, 144 | stay=self.stay, 145 | n=incrementLength, 146 | state=self.nextState) 147 | self.features.extend(newFeatures) 148 | self.n += len(newData) 149 | 150 | # override 151 | def popout(self, date): 152 | """ Pop out the data of a specific date and rewrite the correct feature matrix. 153 | 154 | Args: 155 | date: the index of which to be deleted. 156 | 157 | """ 158 | # Since the seasonality is a fixed patten, 159 | # no matter what date is popped out 160 | # we just need to remove the last date, 161 | # otherwise the feature patten will be 162 | # changed. 163 | 164 | # if you want to delete a date and change 165 | # the underlying patten, i.e., shorten 166 | # the periodicity of the period that date 167 | # is presented, you should use ignore 168 | # instead 169 | print('Popout the date will change the whole' + 170 | ' seasonality patten on all the' + 171 | 'future days. If you want to keep the' + 172 | ' seasonality patten on the future' + 173 | 'days unchanged. Please use ignore instead') 174 | 175 | self.features.pop() 176 | self.n -= 1 177 | 178 | # push currentState back by 1 day. Need to take care of all 179 | # corner cases. 180 | if self.nextState[1] == 0: 181 | self.nextState[1] = self.stay - 1 182 | if self.nextState[0] == 0: 183 | self.nextState[0] = self.period - 1 184 | else: 185 | self.nextState[0] -= 1 186 | else: 187 | self.nextState[1] -= 1 188 | 189 | def alter(self, date, dataPoint): 190 | """ We do nothing to longSeason, when altering the main data 191 | 192 | """ 193 | 194 | # do nothing 195 | pass 196 | 197 | def updateEvaluation(self, step): 198 | """ update the evaluation matrix to a specific date 199 | This function is used when fitting the forward filter and 200 | backward smoother 201 | in need of updating the correct evaluation matrix 202 | 203 | """ 204 | if step < len(self.features): 205 | self.evaluation = matrix(self.features[step]) 206 | else: 207 | newFeatures, \ 208 | self.nextState = self.createFeatureMatrix( 209 | period=self.period, 210 | stay=self.stay, 211 | n=step + 1 - len(self.features), 212 | state=self.nextState) 213 | self.features.extend(newFeatures) 214 | self.evaluation = matrix(self.features[step]) 215 | self.step = step 216 | -------------------------------------------------------------------------------- /pydlm_lite/modeler/matrixTools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ## tools for concatenate matrix ### 4 | class matrixTools: 5 | @staticmethod 6 | def matrixAddInDiag(A, B): 7 | if A is None: 8 | return np.matrix(B) 9 | elif B is None: 10 | return np.matrix(A) 11 | else: 12 | (An, Ap) = A.shape 13 | (Bn, Bp) = B.shape 14 | 15 | newMatrixA = np.concatenate((A, np.matrix(np.zeros((An, Bp)))), 1) 16 | newMatrixB = np.concatenate((np.matrix(np.zeros((Bn, Ap))), B), 1) 17 | return np.concatenate((newMatrixA, newMatrixB), 0) 18 | 19 | # A + B = (A; B) 20 | @staticmethod 21 | def matrixAddByRow(A, B): 22 | if A is None: 23 | return B 24 | elif B is None: 25 | return A 26 | else: 27 | return np.concatenate((A, B), 0) 28 | 29 | # A + B = (A B) 30 | @staticmethod 31 | def matrixAddByCol(A, B): 32 | if A is None: 33 | return np.matrix(B) 34 | elif B is None: 35 | return np.matrix(A) 36 | else: 37 | return np.concatenate((A, B), 1) 38 | 39 | @staticmethod 40 | def AddTwoVectors(a, b): 41 | if a is None: 42 | return np.array(b) 43 | elif b is None: 44 | return np.array(a) 45 | else: 46 | return np.concatenate((a, b)) 47 | -------------------------------------------------------------------------------- /pydlm_lite/modeler/seasonality.py: -------------------------------------------------------------------------------- 1 | """ 2 | ========================================================================= 3 | 4 | Code for the seasonality component 5 | 6 | ========================================================================= 7 | 8 | This piece of code provide one building block for the dynamic linear model. 9 | It decribes a latent seasonality trending in the time series data. The user 10 | can use this class to construct any periodicy component to the model, for 11 | instance, the hourly, weekly or monthly behavior. Different from the Fourier 12 | series, the seasonality components are nonparametric, i.e., there is no sin 13 | or cos relationship between each state. They can be arbitrarily valued. 14 | 15 | """ 16 | import numpy as np 17 | from .component import component 18 | import pydlm_lite.base.tools as tl 19 | 20 | # create seasonality component 21 | # We create the seasonality using the component class 22 | 23 | class seasonality(component): 24 | """The seasonality component that features the periodicity behavior, 25 | providing one building block for the dynamic linear model. 26 | It decribes a latent seasonality trending in the time series data. The user 27 | can use this class to construct any periodicy component to the model, for 28 | instance, the hourly, weekly or monthly behavior. Different from the Fourier 29 | series, the seasonality components are nonparametric, i.e., there is no sin 30 | or cos relationship between each state. They can be arbitrarily valued. 31 | 32 | Args: 33 | period: the period of the 34 | discount: the discount factor 35 | name: the name of the trend component 36 | w: the value to set the prior covariance. Default to a diagonal 37 | matrix with 100 on the diagonal. 38 | 39 | Examples: 40 | >>> # create a 7-day seasonality: 41 | >>> ctrend = seasonality(period = 7, name = 'weekly', discount = 0.99) 42 | >>> # change the ctrend to have covariance with diagonals are 2 and state 1 43 | >>> ctrend.createCovPrior(cov = 2) 44 | >>> ctrend.createMeanPrior(mean = 1) 45 | >>> ctrend.freeForm() 46 | 47 | Attributes: 48 | d: the period of the seasonality 49 | componentType: the type of the component, in this case, 'seasonality' 50 | name: the name of the seasonality component, to be supplied by user 51 | used in modeling and result extraction 52 | discount: the discount factor for this component. Details please refer 53 | to the @kalmanFilter 54 | evaluation: the evaluation matrix for this component 55 | transition: the transition matrix for this component 56 | covPrior: the prior guess of the covariance matrix of the latent states 57 | meanPrior: the prior guess of the latent states 58 | 59 | 60 | """ 61 | def __init__(self, 62 | period = 7, 63 | discount = 0.99, 64 | name = 'seasonality', 65 | w=100): 66 | 67 | if period <= 1: 68 | raise NameError('Period has to be greater than 1.') 69 | self.d = period 70 | self.componentType = 'seasonality' 71 | self.name = name 72 | self.discount = np.ones(self.d) * discount 73 | 74 | # Initialize all basic quantities 75 | self.evaluation = None 76 | self.transition = None 77 | self.covPrior = None 78 | self.meanPrior = None 79 | 80 | # create all basic quantities 81 | self.createEvaluation() 82 | self.createTransition() 83 | self.createCovPrior(cov=w) 84 | self.createMeanPrior() 85 | 86 | # create form free seasonality component 87 | self.freeForm() 88 | 89 | def createEvaluation(self): 90 | """ Create the evaluation matrix 91 | 92 | """ 93 | self.evaluation = np.matrix(np.zeros((1, self.d))) 94 | self.evaluation[0, 0] = 1 95 | 96 | # The transition matrix takes special form as 97 | # G = [0 1 0] 98 | # [0 0 1] 99 | # [1 0 0] 100 | # So everyt time, when G * G, we rotate the vector once, which results 101 | # in the seasonality performance 102 | def createTransition(self): 103 | """ Create the transition matrix. 104 | 105 | According to Hurrison and West (1999), the transition matrix of seasonality 106 | takes a form of\n 107 | 108 | [[0 1 0 0],\n 109 | [0 0 1 0],\n 110 | [0 0 0 1],\n 111 | [1 0 0 0]] 112 | 113 | """ 114 | self.transition = np.matrix(np.diag(np.ones(self.d - 1), 1)) 115 | self.transition[self.d - 1, 0] = 1 116 | 117 | def createCovPrior(self, cov = 1e7): 118 | """Create the prior covariance matrix for the latent states. 119 | 120 | """ 121 | self.covPrior = np.matrix(np.eye(self.d)) * cov 122 | 123 | def createMeanPrior(self, mean = 0): 124 | """ Create the prior latent state 125 | 126 | """ 127 | self.meanPrior = np.matrix(np.ones((self.d, 1))) * mean 128 | 129 | # Form free seasonality component ensures that sum(mean) = 0 130 | # We use the formular from "Bayesian forecasting and dynamic linear models" 131 | # Page 242 132 | def freeForm(self): 133 | """ The technique used in Hurrison and West (1999). After calling this method, 134 | The latent states sum up to 0 and the covariance matrix is degenerate to have 135 | rank d - 1, so that the sum of the latent states will never change when the 136 | system evolves 137 | 138 | """ 139 | if self.covPrior is None or self.meanPrior is None: 140 | raise NameError('freeForm can only be called after prior created.') 141 | else: 142 | u = np.sum(np.sum(self.covPrior, 0), 1)[0, 0] 143 | A = np.sum(self.covPrior, 1) / u 144 | self.meanPrior = self.meanPrior - A * np.sum(self.meanPrior, 0)[0, 0] 145 | self.covPrior = self.covPrior - np.dot(A, A.T) * u 146 | 147 | def checkDimensions(self): 148 | """ if user supplies their own covPrior and meanPrior, this can 149 | be used to check if the dimension matches 150 | 151 | """ 152 | tl.checker.checkVectorDimension(self.meanPrior, self.covPrior) 153 | print('The dimension looks good!') 154 | -------------------------------------------------------------------------------- /pydlm_lite/modeler/trends.py: -------------------------------------------------------------------------------- 1 | """ 2 | ========================================================================= 3 | 4 | Code for the trend component 5 | 6 | ========================================================================= 7 | 8 | This piece of code provide one building block for the dynamic linear model. 9 | It decribes a latent polynomial trending in the time series data. 10 | 11 | """ 12 | import numpy as np 13 | from .component import component 14 | import pydlm_lite.base.tools as tl 15 | 16 | # create trend component 17 | # We create the trend using the component class 18 | 19 | class trend(component): 20 | """ The trend component that features the polynomial trending, 21 | providing one building block for the dynamic linear model. 22 | It decribes a latent polynomial trending in the time series data. 23 | 24 | Args: 25 | degree: the degree of the polynomial. 0: constant; 1: linear... 26 | discount: the discount factor 27 | name: the name of the trend component 28 | w: the value to set the prior covariance. Default to a diagonal 29 | matrix with 1e7 on the diagonal. 30 | 31 | Examples: 32 | >>> # create a constant trend 33 | >>> ctrend = trend(degree = 1, name = 'Const', discount = 0.99) 34 | >>> # change the ctrend to have covariance with diagonals are 2 and state 1 35 | >>> ctrend.createCovPrior(cov = 2) 36 | >>> ctrend.createMeanPrior(mean = 1) 37 | 38 | Attributes: 39 | d: the dimension of the latent states of the polynomial trend 40 | componentType: the type of the component, in this case, 'trend' 41 | name: the name of the trend component, to be supplied by user 42 | used in modeling and result extraction 43 | discount: the discount factor for this component. Details please refer 44 | to the @kalmanFilter 45 | evaluation: the evaluation matrix for this component 46 | transition: the transition matrix for this component 47 | covPrior: the prior guess of the covariance matrix of the latent states 48 | meanPrior: the prior guess of the latent states 49 | 50 | """ 51 | 52 | def __init__(self, 53 | degree = 0, 54 | discount = 0.99, 55 | name = 'trend', 56 | w=100): 57 | 58 | if degree < 0: 59 | raise NameError('degree has to be non-negative') 60 | self.d = degree + 1 61 | self.name = name 62 | self.componentType = 'trend' 63 | self.discount = np.ones(self.d) * discount 64 | 65 | # Initialize all basic quantities 66 | self.evaluation = None 67 | self.transition = None 68 | self.covPrior = None 69 | self.meanPrior = None 70 | 71 | # create all basic quantities 72 | self.createEvaluation() 73 | self.createTransition() 74 | self.createCovPrior(cov=w) 75 | self.createMeanPrior() 76 | 77 | def createEvaluation(self): 78 | """ Create the evaluation matrix 79 | 80 | """ 81 | self.evaluation = np.matrix(np.zeros((1, self.d))) 82 | self.evaluation[0, 0] = 1 83 | 84 | def createTransition(self): 85 | """Create the transition matrix 86 | 87 | According Hurrison and West (1999), the transition matrix of trend takes 88 | a form of \n 89 | 90 | [[1 1 1 1],\n 91 | [0 1 1 1],\n 92 | [0 0 1 1],\n 93 | [0 0 0 1]] 94 | 95 | """ 96 | self.transition = np.matrix(np.zeros((self.d, self.d))) 97 | self.transition[np.triu_indices(self.d)] = 1 98 | 99 | def createCovPrior(self, cov=1e7): 100 | """Create the prior covariance matrix for the latent states. 101 | 102 | """ 103 | self.covPrior = np.matrix(np.eye(self.d)) * cov 104 | 105 | def createMeanPrior(self, mean=0): 106 | """ Create the prior latent state 107 | 108 | """ 109 | self.meanPrior = np.matrix(np.ones((self.d, 1))) * mean 110 | 111 | def checkDimensions(self): 112 | """ if user supplies their own covPrior and meanPrior, this can 113 | be used to check if the dimension matches 114 | 115 | """ 116 | tl.checker.checkVectorDimension(self.meanPrior, self.covPrior) 117 | print('The dimesnion looks good!') 118 | -------------------------------------------------------------------------------- /pydlm_lite/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/pydlm_lite/tests/__init__.py -------------------------------------------------------------------------------- /pydlm_lite/tests/base/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/pydlm_lite/tests/base/__init__.py -------------------------------------------------------------------------------- /pydlm_lite/tests/base/testKalmanFilter.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import unittest 3 | 4 | from pydlm_lite.modeler.trends import trend 5 | from pydlm_lite.modeler.seasonality import seasonality 6 | from pydlm_lite.modeler.builder import builder 7 | from pydlm_lite.base.kalmanFilter import kalmanFilter 8 | 9 | class testKalmanFilter(unittest.TestCase): 10 | 11 | def setUp(self): 12 | self.kf1 = kalmanFilter(discount=[1]) 13 | self.kf0 = kalmanFilter(discount=[1e-10]) 14 | self.kf11 = kalmanFilter(discount=[1, 1]) 15 | self.trend0 = trend(degree=0, discount=1, w=1.0) 16 | self.trend0_90 = trend(degree=0, discount=0.9, w=1.0) 17 | self.trend0_98 = trend(degree=0, discount=0.98, w=1.0, name='a') 18 | self.trend1 = trend(degree=1, discount=1, w=1.0) 19 | 20 | def testForwardFilter(self): 21 | dlm = builder() 22 | dlm.add(self.trend0) 23 | dlm.initialize() 24 | self.kf1.predict(dlm.model) 25 | self.assertAlmostEqual(dlm.model.prediction.obs, 0) 26 | 27 | # the prior on the mean is zero, but observe 1, with 28 | # discount = 1, one should expect the filterd mean to be 0.5 29 | self.kf1.forwardFilter(dlm.model, 1) 30 | self.assertAlmostEqual(dlm.model.obs, 0.5) 31 | self.assertAlmostEqual(dlm.model.prediction.obs, 0) 32 | self.assertAlmostEqual(dlm.model.sysVar, 0.375) 33 | 34 | self.kf1.predict(dlm.model) 35 | self.assertAlmostEqual(dlm.model.obs, 0.5) 36 | self.assertAlmostEqual(dlm.model.prediction.obs, 0.5) 37 | 38 | dlm.initialize() 39 | self.kf0.predict(dlm.model) 40 | self.assertAlmostEqual(dlm.model.prediction.obs, 0) 41 | 42 | # the prior on the mean is zero, but observe 1, with discount = 0 43 | # one should expect the filtered mean close to 1 44 | self.kf0.forwardFilter(dlm.model, 1) 45 | self.assertAlmostEqual(dlm.model.obs[0, 0], 1) 46 | self.assertAlmostEqual(dlm.model.prediction.obs[0, 0], 0) 47 | self.assertAlmostEqual(dlm.model.sysVar[0, 0], 0.5) 48 | 49 | self.kf0.predict(dlm.model) 50 | self.assertAlmostEqual(dlm.model.obs[0, 0], 1) 51 | self.assertAlmostEqual(dlm.model.prediction.obs[0, 0], 1) 52 | 53 | def testForwardFilterMultiDim(self): 54 | dlm = builder() 55 | dlm.add(seasonality(period=2, discount=1, w=1.0)) 56 | dlm.initialize() 57 | 58 | self.kf11.forwardFilter(dlm.model, 1) 59 | self.assertAlmostEqual(dlm.model.state[0][0, 0], 0.33333333333) 60 | self.assertAlmostEqual(dlm.model.state[1][0, 0], -0.33333333333) 61 | 62 | self.kf11.forwardFilter(dlm.model, -1) 63 | self.assertAlmostEqual(dlm.model.state[0][0, 0], -0.5) 64 | self.assertAlmostEqual(dlm.model.state[1][0, 0], 0.5) 65 | 66 | def testBackwardSmoother(self): 67 | dlm = builder() 68 | dlm.add(self.trend0) 69 | dlm.initialize() 70 | 71 | # with mean being 0 and observe 1 and 0 consectively, one shall 72 | # expect the smoothed mean at 1 will be 1/3, for discount = 1 73 | self.kf1.forwardFilter(dlm.model, 1) 74 | self.kf1.forwardFilter(dlm.model, 0) 75 | self.kf1.backwardSmoother(dlm.model, \ 76 | np.matrix([[0.5]]), \ 77 | np.matrix([[0.375]])) 78 | self.assertAlmostEqual(dlm.model.obs[0, 0], 1.0/3) 79 | self.assertAlmostEqual(dlm.model.sysVar[0, 0], 0.18518519) 80 | 81 | # second order trend with discount = 1. The smoothed result should be 82 | # equal to a direct fit on the three data points, 0, 1, -1. Thus, the 83 | # smoothed observation should be 0.0 84 | def testBackwardSmootherMultiDim(self): 85 | dlm = builder() 86 | dlm.add(self.trend1) 87 | dlm.initialize() 88 | 89 | self.kf11.forwardFilter(dlm.model, 1) 90 | state1 = dlm.model.state 91 | cov1 = dlm.model.sysVar 92 | 93 | self.kf11.forwardFilter(dlm.model, -1) 94 | self.kf11.backwardSmoother(dlm.model, \ 95 | rawState = state1, \ 96 | rawSysVar = cov1) 97 | 98 | self.assertAlmostEqual(dlm.model.obs[0, 0], 0.0) 99 | 100 | def testMissingData(self): 101 | dlm = builder() 102 | dlm.add(self.trend0) 103 | dlm.initialize() 104 | 105 | self.kf0.forwardFilter(dlm.model, 1) 106 | self.assertAlmostEqual(dlm.model.obs[0, 0], 1.0) 107 | self.assertAlmostEqual(dlm.model.obsVar[0, 0], 1.0) 108 | 109 | self.kf0.forwardFilter(dlm.model, None) 110 | self.assertAlmostEqual(dlm.model.obs[0, 0], 1.0) 111 | self.assertAlmostEqual(dlm.model.obsVar[0, 0]/1e10, 0.5) 112 | 113 | self.kf0.forwardFilter(dlm.model, None) 114 | self.assertAlmostEqual(dlm.model.obs[0, 0], 1.0) 115 | self.assertAlmostEqual(dlm.model.obsVar[0, 0]/1e10, 0.5) 116 | 117 | self.kf0.forwardFilter(dlm.model, 0) 118 | self.assertAlmostEqual(dlm.model.obs[0, 0], 0.0) 119 | 120 | def testMissingEvaluation(self): 121 | dlm = builder() 122 | dlm.add(self.trend0) 123 | dlm.initialize() 124 | 125 | dlm.model.evaluation = np.matrix([[None]]) 126 | self.kf1.forwardFilter(dlm.model, 1.0, dealWithMissingEvaluation = True) 127 | self.assertAlmostEqual(dlm.model.obs, 0.0) 128 | self.assertAlmostEqual(dlm.model.transition, 1.0) 129 | 130 | def testEvolveMode(self): 131 | dlm = builder() 132 | dlm.add(self.trend0_90) 133 | dlm.add(self.trend0_98) 134 | dlm.initialize() 135 | 136 | kf2 = kalmanFilter(discount=[0.9, 0.98], 137 | updateInnovation='component', 138 | index=dlm.componentIndex) 139 | kf2.forwardFilter(dlm.model, 1.0) 140 | self.assertAlmostEqual(dlm.model.innovation[0, 1], 0.0) 141 | self.assertAlmostEqual(dlm.model.innovation[1, 0], 0.0) 142 | 143 | if __name__ == '__main__': 144 | unittest.main() 145 | -------------------------------------------------------------------------------- /pydlm_lite/tests/func/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/pydlm_lite/tests/func/__init__.py -------------------------------------------------------------------------------- /pydlm_lite/tests/func/test_dlm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import unittest 3 | 4 | from pydlm_lite.modeler.trends import trend 5 | from pydlm_lite.modeler.seasonality import seasonality 6 | from pydlm_lite.modeler.dynamic import dynamic 7 | from pydlm_lite.modeler.autoReg import autoReg 8 | from pydlm_lite.func._dlm import _dlm 9 | 10 | 11 | class test_dlm(unittest.TestCase): 12 | 13 | def setUp(self): 14 | self.data = [0] * 9 + [1] + [0] * 10 15 | self.data5 = range(100) 16 | self.dlm1 = _dlm(self.data) 17 | self.dlm2 = _dlm(self.data) 18 | self.dlm3 = _dlm([-1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1]) 19 | self.dlm4 = _dlm([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) 20 | self.dlm5 = _dlm(self.data5) 21 | self.dlm6 = _dlm(self.data5) 22 | self.dlm7 = _dlm([0, 1, None, 1, 0, 1, -1]) 23 | self.dlm1.builder + trend(degree=0, discount=1, w=1.0) 24 | self.dlm2.builder + trend(degree=0, discount=1e-12, w=1.0) 25 | self.dlm3.builder + seasonality(period=2, discount=1, w=1.0) 26 | self.dlm4.builder + dynamic(features=[[0] for i in range(5)] + 27 | [[1] for i in range(5)], discount=1, 28 | w=1.0) 29 | self.dlm5.builder + trend(degree=0, discount=1, w=1.0) + \ 30 | autoReg(degree=1, discount=1, w=1.0) 31 | self.dlm6.builder + trend(degree=0, discount=0.9, w=1.0) + \ 32 | seasonality(period=2, discount=0.8, w=1.0) + \ 33 | autoReg(degree=3, discount=1.0) 34 | self.dlm7.builder + trend(degree=0, discount=1, w=1.0) 35 | self.dlm1._initialize() 36 | self.dlm2._initialize() 37 | self.dlm3._initialize() 38 | self.dlm4._initialize() 39 | self.dlm5._initialize() 40 | self.dlm6._initialize() 41 | self.dlm7._initialize() 42 | self.dlm1.options.innovationType='whole' 43 | self.dlm2.options.innovationType='whole' 44 | self.dlm3.options.innovationType='whole' 45 | self.dlm4.options.innovationType='whole' 46 | self.dlm5.options.innovationType='whole' 47 | self.dlm6.options.innovationType='whole' 48 | self.dlm7.options.innovationType='whole' 49 | 50 | def testForwardFilter(self): 51 | self.dlm1._forwardFilter(start=0, end=19, renew=False) 52 | self.assertAlmostEqual(np.sum(self.dlm1.result.filteredObs[0:9]), 0) 53 | self.assertAlmostEqual(self.dlm1.result.filteredObs[9][0, 0], 1.0/11) 54 | self.assertAlmostEqual(self.dlm1.result.filteredObs[19][0, 0], 1.0/21) 55 | 56 | self.dlm2._forwardFilter(start=0, end=19) 57 | self.assertAlmostEqual(np.sum(self.dlm2.result.filteredObs[0:9]), 0.0) 58 | self.assertAlmostEqual(self.dlm2.result.filteredObs[9][0, 0], 1.0) 59 | self.assertAlmostEqual(self.dlm2.result.filteredObs[19][0, 0], 0.0) 60 | 61 | def testResetModelStatus(self): 62 | self.dlm1._forwardFilter(start = 0, end = 19, renew = False) 63 | self.dlm1.result.filteredSteps = (0, 19) 64 | self.assertAlmostEqual(self.dlm1.builder.model.obs, \ 65 | self.dlm1.result.filteredObs[19]) 66 | 67 | self.dlm1._resetModelStatus() 68 | self.assertAlmostEqual(np.sum(self.dlm1.builder.model.state \ 69 | - self.dlm1.builder.statePrior), 0.0) 70 | def testSetModelStatus(self): 71 | self.dlm1._forwardFilter(start = 0, end = 19, renew = False) 72 | self.dlm1.result.filteredSteps = (0, 19) 73 | self.assertAlmostEqual(self.dlm1.builder.model.obs, \ 74 | self.dlm1.result.filteredObs[19]) 75 | self.dlm1._setModelStatus(date = 12) 76 | self.assertAlmostEqual(self.dlm1.builder.model.obs, \ 77 | self.dlm1.result.filteredObs[12]) 78 | 79 | def testForwaredFilterConsectiveness(self): 80 | self.dlm1._forwardFilter(start = 0, end = 19, renew = False) 81 | filtered1 = self.dlm1.result.filteredObs 82 | 83 | self.dlm1._initialize() 84 | 85 | self.dlm1._forwardFilter(start = 0, end = 13) 86 | self.dlm1.result.filteredSteps = (0, 13) 87 | self.dlm1._forwardFilter(start = 13, end = 19) 88 | filtered2 = self.dlm1.result.filteredObs 89 | 90 | self.assertAlmostEqual(np.sum(np.array(filtered1) - np.array(filtered2)), 0.0) 91 | 92 | def testBackwardSmoother(self): 93 | self.dlm1._forwardFilter(start = 0, end = 19, renew = False) 94 | self.dlm1.result.filteredSteps = (0, 19) 95 | self.dlm1._backwardSmoother(start = 19) 96 | self.assertAlmostEqual(self.dlm1.result.smoothedObs[0][0, 0], 1.0/21) 97 | self.assertAlmostEqual(self.dlm1.result.smoothedObs[19][0, 0], 1.0/21) 98 | 99 | self.dlm2._forwardFilter(start = 0, end = 19) 100 | self.dlm2.result.filteredSteps = (0, 19) 101 | self.dlm2._backwardSmoother(start = 19) 102 | self.assertAlmostEqual(self.dlm2.result.smoothedObs[0][0, 0], 0.0) 103 | self.assertAlmostEqual(self.dlm2.result.smoothedObs[19][0, 0], 0.0) 104 | self.assertAlmostEqual(self.dlm2.result.smoothedObs[9][0, 0], 1.0) 105 | 106 | def testOneDayAheadPredictWithoutDynamic(self): 107 | self.dlm3._forwardFilter(start=0, end=11, renew=False) 108 | self.dlm3.result.filteredSteps = (0, 11) 109 | (obs, var) = self.dlm3._oneDayAheadPredict(date=11) 110 | self.assertAlmostEqual(obs, -6.0/7) 111 | self.assertAlmostEqual(self.dlm3.result.predictStatus, 112 | [11, 12, [-6.0/7]]) 113 | 114 | (obs, var) = self.dlm3._oneDayAheadPredict(date=2) 115 | self.assertAlmostEqual(obs, 3.0/5) 116 | # notice that the two latent states always sum up to 0 117 | self.assertAlmostEqual(self.dlm3.result.predictStatus, 118 | [2, 3, [3.0/5]]) 119 | 120 | def testOneDayAheadPredictWithDynamic(self): 121 | self.dlm4._forwardFilter(start=0, end=9, renew=False) 122 | self.dlm4.result.filteredSteps = (0, 9) 123 | featureDict = {'dynamic': 2.0} 124 | (obs, var) = self.dlm4._oneDayAheadPredict(date=9, 125 | featureDict=featureDict) 126 | self.assertAlmostEqual(obs, 5.0/6 * 2) 127 | 128 | def testContinuePredictWithoutDynamic(self): 129 | self.dlm3._forwardFilter(start=0, end=11, renew=False) 130 | self.dlm3.result.filteredSteps = (0, 11) 131 | (obs, var) = self.dlm3._oneDayAheadPredict(date=11) 132 | self.assertAlmostEqual(self.dlm3.result.predictStatus, 133 | [11, 12, [-6.0/7]]) 134 | (obs, var) = self.dlm3._continuePredict() 135 | self.assertAlmostEqual(self.dlm3.result.predictStatus, 136 | [11, 13, [-6.0/7, 6.0/7]]) 137 | 138 | def testContinuePredictWithDynamic(self): 139 | self.dlm4._forwardFilter(start=0, end=9, renew=False) 140 | self.dlm4.result.filteredSteps = (0, 9) 141 | featureDict = {'dynamic': 2.0} 142 | (obs, var) = self.dlm4._oneDayAheadPredict(date=9, 143 | featureDict=featureDict) 144 | self.assertAlmostEqual(self.dlm4.result.predictStatus, 145 | [9, 10, [5.0/6 * 2]]) 146 | 147 | featureDict = {'dynamic': 3.0} 148 | (obs, var) = self.dlm4._continuePredict(featureDict=featureDict) 149 | self.assertAlmostEqual(self.dlm4.result.predictStatus, 150 | [9, 11, [5.0/6 * 2, 5.0/6 * 3]]) 151 | 152 | def testPredictWithAutoReg(self): 153 | self.dlm5._forwardFilter(start=0, end=99, renew=False) 154 | self.dlm5.result.filteredSteps = [0, 99] 155 | (obs, var) = self.dlm5._oneDayAheadPredict(date=99) 156 | self.assertAlmostEqual(obs[0, 0], 100.03682874) 157 | (obs, var) = self.dlm5._continuePredict() 158 | self.assertAlmostEqual(obs[0, 0], 101.07480945) 159 | 160 | def testGetLatentState(self): 161 | # for forward filter 162 | self.dlm5._forwardFilter(start=0, end=99, renew=False) 163 | self.dlm5.result.filteredSteps = [0, 99] 164 | filteredTrend = self.dlm5._getLatentState( 165 | filterType='forwardFilter', name='trend', start=0, end=99) 166 | diff = 0.0 167 | for i in range(len(filteredTrend)): 168 | diff += abs(filteredTrend[i][0] - 169 | self.dlm5.result.filteredState[i][0, 0]) 170 | self.assertAlmostEqual(diff, 0) 171 | 172 | # for prediction 173 | predictedTrend = self.dlm5._getLatentState( 174 | filterType='predict', name='trend', start=0, end=99) 175 | diff = 0.0 176 | for i in range(len(predictedTrend)): 177 | diff += abs(predictedTrend[i][0] - 178 | self.dlm5.result.predictedState[i][0, 0]) 179 | self.assertAlmostEqual(diff, 0) 180 | 181 | # for backward smoother 182 | self.dlm5._backwardSmoother(start=99) 183 | self.dlm5.result.smoothedSteps = [0, 99] 184 | smoothedTrend = self.dlm5._getLatentState( 185 | filterType='backwardSmoother', name='trend', start=0, end=99) 186 | diff = 0.0 187 | for i in range(len(smoothedTrend)): 188 | diff += abs(smoothedTrend[i][0] - 189 | self.dlm5.result.smoothedState[i][0, 0]) 190 | self.assertAlmostEqual(diff, 0) 191 | 192 | def testGetLatentCov(self): 193 | # for forward filter 194 | self.dlm5._forwardFilter(start=0, end=99, renew=False) 195 | self.dlm5.result.filteredSteps = [0, 99] 196 | filteredTrend = self.dlm5._getLatentCov( 197 | filterType='forwardFilter', name='trend', start=0, end=99) 198 | diff = 0.0 199 | for i in range(len(filteredTrend)): 200 | diff += abs(filteredTrend[i][0, 0] - 201 | self.dlm5.result.filteredCov[i][0, 0]) 202 | self.assertAlmostEqual(diff, 0) 203 | 204 | # for prediction 205 | predictedTrend = self.dlm5._getLatentCov( 206 | filterType='predict', name='trend', start=0, end=99) 207 | diff = 0.0 208 | for i in range(len(predictedTrend)): 209 | diff += abs(predictedTrend[i][0, 0] - 210 | self.dlm5.result.predictedCov[i][0, 0]) 211 | self.assertAlmostEqual(diff, 0) 212 | 213 | # for backward smoother 214 | self.dlm5._backwardSmoother(start=99) 215 | self.dlm5.result.smoothedSteps = [0, 99] 216 | smoothedTrend = self.dlm5._getLatentCov( 217 | filterType='backwardSmoother', name='trend', start=0, end=99) 218 | diff = 0.0 219 | for i in range(len(smoothedTrend)): 220 | diff += abs(smoothedTrend[i][0, 0] - 221 | self.dlm5.result.smoothedCov[i][0, 0]) 222 | self.assertAlmostEqual(diff, 0) 223 | 224 | def testComponentMean(self): 225 | self.dlm5._forwardFilter(start=0, end=99, renew=False) 226 | self.dlm5.result.filteredSteps = [0, 99] 227 | # for component with forward filter 228 | arTrend = self.dlm5._getComponentMean(filterType='forwardFilter', 229 | name='ar2', start=0, end=99) 230 | trueAr = [item[1, 0] for item in self.dlm5.result.filteredState] 231 | comp = self.dlm5.builder.automaticComponents['ar2'] 232 | for i in range(len(trueAr)): 233 | comp.updateEvaluation(i, self.data5) 234 | trueAr[i] = comp.evaluation * trueAr[i] 235 | 236 | diff = 0.0 237 | for i in range(len(arTrend)): 238 | diff += abs(arTrend[i] - trueAr[i]) 239 | self.assertAlmostEqual(diff, 0) 240 | 241 | # for component with backward smoother 242 | self.dlm5._backwardSmoother(start=99) 243 | self.dlm5.result.smoothedSteps = [0, 99] 244 | arTrend = self.dlm5._getComponentMean(filterType='backwardSmoother', 245 | name='ar2', start=0, end=99) 246 | trueAr = [item[1, 0] for item in self.dlm5.result.smoothedState] 247 | comp = self.dlm5.builder.automaticComponents['ar2'] 248 | for i in range(len(trueAr)): 249 | comp.updateEvaluation(i, self.data5) 250 | trueAr[i] = comp.evaluation * trueAr[i] 251 | 252 | diff = 0.0 253 | for i in range(len(arTrend)): 254 | diff += abs(arTrend[i] - trueAr[i]) 255 | self.assertAlmostEqual(diff, 0) 256 | 257 | def testComponentVar(self): 258 | self.dlm5._forwardFilter(start=0, end=99, renew=False) 259 | self.dlm5.result.filteredSteps = [0, 99] 260 | # for component with forward filter 261 | arTrend = self.dlm5._getComponentVar(filterType='forwardFilter', 262 | name='ar2', start=0, end=99) 263 | trueAr = [item[1, 1] for item in self.dlm5.result.filteredCov] 264 | comp = self.dlm5.builder.automaticComponents['ar2'] 265 | for i in range(len(trueAr)): 266 | comp.updateEvaluation(i, self.data5) 267 | trueAr[i] = comp.evaluation * trueAr[i] * comp.evaluation.T 268 | 269 | diff = 0.0 270 | for i in range(len(arTrend)): 271 | diff += abs(arTrend[i] - trueAr[i]) 272 | self.assertAlmostEqual(diff, 0) 273 | 274 | # for component with backward smoother 275 | self.dlm5._backwardSmoother(start=99) 276 | self.dlm5.result.smoothedSteps = [0, 99] 277 | arTrend = self.dlm5._getComponentVar(filterType='backwardSmoother', 278 | name='ar2', start=0, end=99) 279 | trueAr = [item[1, 1] for item in self.dlm5.result.smoothedCov] 280 | comp = self.dlm5.builder.automaticComponents['ar2'] 281 | for i in range(len(trueAr)): 282 | comp.updateEvaluation(i, self.data5) 283 | trueAr[i] = comp.evaluation * trueAr[i] * comp.evaluation.T 284 | 285 | diff = 0.0 286 | for i in range(len(arTrend)): 287 | diff += abs(arTrend[i] - trueAr[i]) 288 | self.assertAlmostEqual(diff, 0) 289 | 290 | def testComputeMSE(self): 291 | self.dlm1._forwardFilter(start=0, end=19, renew=False) 292 | self.dlm1.result.filteredSteps=(0, 19) 293 | mse1 = self.dlm1._getMSE() 294 | mse_expect = 0 295 | for i in range(20): 296 | mse_expect += (self.dlm1.result.predictedObs[i] - 297 | self.data[i]) ** 2 298 | mse_expect /= 20 299 | self.assertAlmostEqual(mse1, mse_expect) 300 | 301 | self.dlm2._forwardFilter(start=0, end=19, renew=False) 302 | self.dlm2.result.filteredSteps=(0, 19) 303 | mse2 = self.dlm2._getMSE() 304 | mse_expect = 2.0/20 305 | self.assertAlmostEqual(mse2, mse_expect) 306 | 307 | # Test missing data 308 | self.dlm7._forwardFilter(start=0, end=6, renew=False) 309 | self.dlm7.result.filteredSteps=(0, 6) 310 | mse3 = self.dlm7._getMSE() 311 | mse_expect = 0 312 | for i in range(7): 313 | if self.dlm7.data[i] is not None: 314 | mse_expect += (self.dlm7.result.predictedObs[i] - 315 | self.dlm7.data[i]) ** 2 316 | mse_expect /= 7 317 | self.assertAlmostEqual(mse3, mse_expect) 318 | 319 | def testGetDiscount(self): 320 | discounts = self.dlm6._getDiscounts() 321 | self.assertTrue(0.9 in discounts) 322 | self.assertTrue(0.8 in discounts) 323 | self.assertTrue(1.0 in discounts) 324 | 325 | def testSetDiscount(self): 326 | self.dlm6._setDiscounts([0.0, 0.1, 0.2], False) 327 | self.assertTrue(0.0 in self.dlm6.builder.discount) 328 | self.assertTrue(0.1 in self.dlm6.builder.discount) 329 | self.assertTrue(0.2 in self.dlm6.builder.discount) 330 | self.assertTrue(0.9 not in self.dlm6.builder.discount) 331 | self.assertTrue(0.8 not in self.dlm6.builder.discount) 332 | self.assertTrue(1.0 not in self.dlm6.builder.discount) 333 | 334 | self.assertAlmostEqual(self.dlm6.builder.staticComponents['trend'].discount, 335 | 0.9) 336 | 337 | self.dlm6._setDiscounts([0.0, 0.1, 0.2], True) 338 | self.assertTrue(self.dlm6.builder.staticComponents['trend'].discount in 339 | [0.0, 0.1, 0.2]) 340 | 341 | if __name__ == '__main__': 342 | unittest.main() 343 | -------------------------------------------------------------------------------- /pydlm_lite/tests/modeler/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/pydlm_lite/tests/modeler/__init__.py -------------------------------------------------------------------------------- /pydlm_lite/tests/modeler/testAutoReg.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from pydlm_lite.modeler.autoReg import autoReg 3 | 4 | 5 | class testAutoReg(unittest.TestCase): 6 | 7 | def setUp(self): 8 | self.data = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3] 9 | self.ar4 = autoReg(degree=4, name='ar4', padding=0, w=1.0) 10 | 11 | def testFeatureMatrix(self): 12 | trueFeatures = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 2], 13 | [0, 1, 2, 3], [1, 2, 3, 0], [2, 3, 0, 1], [3, 0, 1, 2], 14 | [0, 1, 2, 3], [1, 2, 3, 0], [2, 3, 0, 1], [3, 0, 1, 2]] 15 | actualFeatures = [] 16 | for i in range(12): 17 | self.ar4.updateEvaluation(i, self.data) 18 | actualFeatures.append(self.ar4.evaluation.A1.tolist()) 19 | 20 | self.assertEqual(actualFeatures, trueFeatures) 21 | 22 | if __name__ == '__main__': 23 | unittest.main() 24 | -------------------------------------------------------------------------------- /pydlm_lite/tests/modeler/testBuilder.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import unittest 3 | from pydlm_lite.modeler.builder import builder 4 | from pydlm_lite.modeler.trends import trend 5 | from pydlm_lite.modeler.seasonality import seasonality 6 | from pydlm_lite.modeler.dynamic import dynamic 7 | from pydlm_lite.modeler.autoReg import autoReg 8 | from pydlm_lite.modeler.matrixTools import matrixTools as mt 9 | 10 | 11 | class testBuilder(unittest.TestCase): 12 | 13 | def setUp(self): 14 | self.data = np.random.rand(10).tolist() 15 | self.features = np.random.rand(10, 2).tolist() 16 | self.trend = trend(degree=2, w=1.0) 17 | self.seasonality = seasonality(period=7, w=1.0) 18 | self.dynamic = dynamic(self.features, w=1.0) 19 | self.autoReg = autoReg(degree=3, 20 | w=1.0) 21 | self.builder1 = builder() 22 | 23 | def testInitialization(self): 24 | 25 | self.assertEqual(len(self.builder1.staticComponents), 0) 26 | self.assertEqual(len(self.builder1.dynamicComponents), 0) 27 | self.assertEqual(len(self.builder1.automaticComponents), 0) 28 | 29 | def testAddAndDelete(self): 30 | self.builder1 = self.builder1 + self.trend 31 | self.assertEqual(len(self.builder1.staticComponents), 1) 32 | self.assertEqual(len(self.builder1.dynamicComponents), 0) 33 | self.assertEqual(len(self.builder1.automaticComponents), 0) 34 | 35 | self.builder1 = self.builder1 + self.dynamic 36 | self.assertEqual(len(self.builder1.staticComponents), 1) 37 | self.assertEqual(len(self.builder1.dynamicComponents), 1) 38 | self.assertEqual(len(self.builder1.automaticComponents), 0) 39 | 40 | self.builder1 = self.builder1 + self.seasonality 41 | self.assertEqual(len(self.builder1.staticComponents), 2) 42 | self.assertEqual(len(self.builder1.dynamicComponents), 1) 43 | self.assertEqual(len(self.builder1.automaticComponents), 0) 44 | 45 | self.builder1.delete('seasonality') 46 | self.assertEqual(len(self.builder1.staticComponents), 1) 47 | self.assertEqual(len(self.builder1.dynamicComponents), 1) 48 | self.assertEqual(len(self.builder1.automaticComponents), 0) 49 | 50 | self.assertEqual(self.builder1.staticComponents['trend'], self.trend) 51 | 52 | self.builder1 = self.builder1 + self.autoReg 53 | self.assertEqual(len(self.builder1.automaticComponents), 1) 54 | 55 | def testInitialize(self): 56 | self.builder1 = self.builder1 + self.trend + self.dynamic \ 57 | + self.autoReg 58 | 59 | self.builder1.initialize(data=self.data) 60 | self.assertAlmostEqual(np.sum( 61 | np.abs(self.builder1.model.evaluation 62 | - mt.matrixAddByCol(mt.matrixAddByCol( 63 | self.trend.evaluation, 64 | self.dynamic.evaluation), 65 | self.autoReg.evaluation))), 0.0) 66 | 67 | def testInitializeEvaluatoin(self): 68 | self.builder1 = self.builder1 + self.trend + self.dynamic 69 | self.builder1.dynamicComponents['dynamic'].updateEvaluation(8) 70 | self.builder1.initialize(data=self.data) 71 | self.assertAlmostEqual(np.sum( 72 | np.abs(self.builder1.model.evaluation - 73 | mt.matrixAddByCol(self.trend.evaluation, 74 | self.dynamic.evaluation))), 0.0) 75 | 76 | def testUpdate(self): 77 | self.builder1 = self.builder1 + self.trend + self.dynamic \ 78 | + self.autoReg 79 | 80 | self.builder1.initialize(data=self.data) 81 | self.builder1.updateEvaluation(2, self.data) 82 | self.assertAlmostEqual(np.sum( 83 | np.abs(self.builder1.model.evaluation 84 | - mt.matrixAddByCol(mt.matrixAddByCol( 85 | self.trend.evaluation, 86 | np.matrix([self.features[2]])), 87 | np.matrix(self.autoReg.evaluation)))), 0.0) 88 | 89 | if __name__ == '__main__': 90 | unittest.main() 91 | -------------------------------------------------------------------------------- /pydlm_lite/tests/modeler/testDynamic.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from pydlm_lite.modeler.dynamic import dynamic 3 | import unittest 4 | 5 | 6 | class testDynamic(unittest.TestCase): 7 | 8 | def setUp(self): 9 | self.features = np.matrix(np.random.rand(10, 2)).tolist() 10 | self.newDynamic = dynamic(features=self.features, w=1.0) 11 | 12 | def testInputNumpyMatrix(self): 13 | dynamic(features=np.random.rand(10, 2), w=1.0) 14 | pass 15 | 16 | def testInitialization(self): 17 | self.assertEqual(self.newDynamic.d, 2) 18 | self.assertEqual(self.newDynamic.n, 10) 19 | self.assertAlmostEqual(np.abs(np.sum(np.matrix( 20 | self.newDynamic.features) 21 | - np.matrix(self.features))), 0) 22 | 23 | def testUpdate(self): 24 | self.newDynamic.updateEvaluation(3) 25 | self.assertAlmostEqual(np.abs( 26 | np.sum(np.array(self.newDynamic.evaluation) 27 | - np.array(self.features[3]))), 0) 28 | 29 | def testAppendNewData(self): 30 | self.newDynamic.appendNewData([[1, 2]]) 31 | self.assertAlmostEqual(np.abs( 32 | np.sum(np.array(self.newDynamic.features[-1]) 33 | - np.array([1, 2]))), 0) 34 | 35 | def testPopout(self): 36 | self.newDynamic.popout(0) 37 | self.assertAlmostEqual(np.abs(np.sum( 38 | np.matrix(self.newDynamic.features) 39 | - np.matrix(self.features[1:]))), 0) 40 | 41 | def testAlter(self): 42 | self.newDynamic.alter(1, [0, 0]) 43 | self.assertAlmostEqual(self.newDynamic.features[1], 44 | [0, 0]) 45 | 46 | if __name__ == '__main__': 47 | unittest.main() 48 | -------------------------------------------------------------------------------- /pydlm_lite/tests/modeler/testLongSeason.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from pydlm_lite.modeler.longSeason import longSeason 3 | 4 | 5 | class testLongSeason(unittest.TestCase): 6 | 7 | def setUp(self): 8 | data = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3] 9 | self.longSeason = longSeason(data=data, period=4, stay=4, w=1.0) 10 | 11 | def testFeatureMatrix(self): 12 | trueFeatures = [[1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0], 13 | [0, 1, 0, 0], [0, 1, 0, 0], [0, 1, 0, 0], [0, 1, 0, 0], 14 | [0, 0, 1, 0], [0, 0, 1, 0], [0, 0, 1, 0], [0, 0, 1, 0]] 15 | self.assertEqual(self.longSeason.features, trueFeatures) 16 | self.assertEqual(self.longSeason.nextState, [3, 0]) 17 | self.assertEqual(self.longSeason.n, 12) 18 | 19 | def testAppendNewData(self): 20 | trueFeatures = [[1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0], 21 | [0, 1, 0, 0], [0, 1, 0, 0], [0, 1, 0, 0], [0, 1, 0, 0], 22 | [0, 0, 1, 0], [0, 0, 1, 0], [0, 0, 1, 0], [0, 0, 1, 0], 23 | [0, 0, 0, 1]] 24 | self.longSeason.appendNewData([4]) 25 | self.assertEqual(self.longSeason.features, trueFeatures) 26 | self.assertEqual(self.longSeason.nextState, [3, 1]) 27 | self.assertEqual(self.longSeason.n, 13) 28 | 29 | def testPopoutTheLast(self): 30 | trueFeatures = [[1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0], 31 | [0, 1, 0, 0], [0, 1, 0, 0], [0, 1, 0, 0], [0, 1, 0, 0], 32 | [0, 0, 1, 0], [0, 0, 1, 0], [0, 0, 1, 0]] 33 | self.longSeason.popout(11) 34 | self.assertEqual(self.longSeason.features, trueFeatures) 35 | self.assertEqual(self.longSeason.nextState, [2, 3]) 36 | self.assertEqual(self.longSeason.n, 11) 37 | 38 | def testPopoutInMiddel(self): 39 | trueFeatures = [[1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0], 40 | [0, 1, 0, 0], [0, 1, 0, 0], [0, 1, 0, 0], [0, 1, 0, 0], 41 | [0, 0, 1, 0], [0, 0, 1, 0], [0, 0, 1, 0]] 42 | self.longSeason.popout(1) 43 | self.assertEqual(self.longSeason.features, trueFeatures) 44 | self.assertEqual(self.longSeason.nextState, [2, 3]) 45 | self.assertEqual(self.longSeason.n, 11) 46 | 47 | def testUpdateEvaluation(self): 48 | trueFeatures = [[1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0], 49 | [0, 1, 0, 0], [0, 1, 0, 0], [0, 1, 0, 0], [0, 1, 0, 0], 50 | [0, 0, 1, 0], [0, 0, 1, 0], [0, 0, 1, 0], [0, 0, 1, 0], 51 | [0, 0, 0, 1]] 52 | self.longSeason.updateEvaluation(12) 53 | self.assertEqual(self.longSeason.features, trueFeatures) 54 | self.assertEqual(self.longSeason.nextState, [3, 1]) 55 | self.assertEqual(self.longSeason.n, 12) 56 | 57 | if __name__ == '__main__': 58 | unittest.main() 59 | -------------------------------------------------------------------------------- /pydlm_lite/tests/modeler/testSeasonality.py: -------------------------------------------------------------------------------- 1 | import pydlm_lite 2 | import unittest 3 | 4 | class testSeasonality(unittest.TestCase): 5 | 6 | def setUp(self): 7 | self.DEGREE = 7 8 | 9 | def testInitialization(self): 10 | newSeasonality = pydlm_lite.modeler.seasonality.seasonality(self.DEGREE) 11 | newSeasonality.checkDimensions() 12 | 13 | if __name__ == '__main__': 14 | unittest.main() 15 | 16 | -------------------------------------------------------------------------------- /pydlm_lite/tests/modeler/testTrends.py: -------------------------------------------------------------------------------- 1 | import pydlm_lite 2 | import unittest 3 | 4 | class testTrend(unittest.TestCase): 5 | 6 | def setUp(self): 7 | self.DEGREE = 3 8 | 9 | def testInitialization(self): 10 | newTrend = pydlm_lite.modeler.trends.trend(self.DEGREE) 11 | newTrend.checkDimensions() 12 | 13 | if __name__ == '__main__': 14 | unittest.main() 15 | -------------------------------------------------------------------------------- /pydlm_lite/tests/testDlm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import unittest 3 | 4 | from pydlm_lite.modeler.trends import trend 5 | from pydlm_lite.modeler.seasonality import seasonality 6 | from pydlm_lite.modeler.dynamic import dynamic 7 | from pydlm_lite.modeler.autoReg import autoReg 8 | from pydlm_lite.dlm import dlm 9 | 10 | class testDlm(unittest.TestCase): 11 | 12 | def setUp(self): 13 | self.data = [0] * 9 + [1] + [0] * 10 14 | self.data5 = range(100) 15 | self.features = np.random.random((20, 2)).tolist() 16 | self.trend0 = trend(degree=0, discount=1.0, w=1.0) 17 | self.trend1 = trend(degree=0, discount=1.0) 18 | self.dlm1 = dlm(self.data) 19 | self.dlm2 = dlm(self.data) 20 | self.dlm3 = dlm([-1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1]) 21 | self.dlm4 = dlm([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) 22 | self.dlm5 = dlm(self.data5) 23 | self.dlm6 = dlm(self.data5) 24 | self.dlm1 + trend(degree=0, discount=1, w=1.0) 25 | self.dlm2 + trend(degree=0, discount=1e-12, w=1.0) 26 | self.dlm3 + seasonality(period=2, discount=1, w=1.0) 27 | self.dlm4 + dynamic(features=[[0] for i in range(5)] + 28 | [[1] for i in range(5)], discount=1, w=1.0) 29 | self.dlm5 + trend(degree=0, discount=1, w=1.0) + \ 30 | autoReg(degree=1, discount=1, w=1.0) 31 | self.dlm6 + trend(degree=0, discount=1, w=1.0) + \ 32 | autoReg(degree=2, discount=1, w=1.0) 33 | self.dlm1.evolveMode('dependent') 34 | self.dlm2.evolveMode('dependent') 35 | self.dlm3.evolveMode('dependent') 36 | self.dlm4.evolveMode('dependent') 37 | self.dlm5.evolveMode('dependent') 38 | self.dlm6.evolveMode('dependent') 39 | 40 | def testAdd(self): 41 | trend2 = trend(2, name='trend2') 42 | self.dlm1 = self.dlm1 + trend2 43 | self.assertEqual(self.dlm1.builder.staticComponents['trend2'], trend2) 44 | 45 | dynamic2 = dynamic(features=self.features, name='d2') 46 | self.dlm1 = self.dlm1 + dynamic2 47 | self.assertEqual(self.dlm1.builder.dynamicComponents['d2'], dynamic2) 48 | 49 | ar3 = autoReg(degree=3, name='ar3') 50 | self.dlm1 = self.dlm1 + ar3 51 | self.assertEqual(self.dlm1.builder.automaticComponents['ar3'], ar3) 52 | 53 | def testDelete(self): 54 | trend2 = trend(2, name='trend2') 55 | self.dlm1 = self.dlm1 + trend2 56 | self.dlm1.delete('trend2') 57 | self.assertEqual(len(self.dlm1.builder.staticComponents), 1) 58 | 59 | def testFitForwardFilter(self): 60 | self.dlm1.fitForwardFilter(useRollingWindow = False) 61 | self.assertEqual(self.dlm1.result.filteredSteps, [0, 19]) 62 | self.assertAlmostEqual(np.sum(self.dlm1.result.filteredObs[0:9]), 0) 63 | self.assertAlmostEqual(self.dlm1.result.filteredObs[9][0, 0], 1.0/11) 64 | self.assertAlmostEqual(self.dlm1.result.filteredObs[19][0, 0], 1.0/21) 65 | 66 | self.dlm2.fitForwardFilter(useRollingWindow = False) 67 | self.assertAlmostEqual(np.sum(self.dlm2.result.filteredObs[0:9]), 0.0) 68 | self.assertAlmostEqual(self.dlm2.result.filteredObs[9][0, 0], 1.0) 69 | self.assertAlmostEqual(self.dlm2.result.filteredObs[19][0, 0], 0.0) 70 | 71 | def testFitBackwardSmoother(self): 72 | self.dlm1.fitForwardFilter() 73 | self.dlm1.fitBackwardSmoother() 74 | self.assertEqual(self.dlm1.result.smoothedSteps, [0, 19]) 75 | self.assertAlmostEqual(self.dlm1.result.smoothedObs[0][0, 0], 1.0/21) 76 | self.assertAlmostEqual(self.dlm1.result.smoothedObs[19][0, 0], 1.0/21) 77 | 78 | self.dlm2.fitForwardFilter() 79 | self.dlm2.fitBackwardSmoother() 80 | self.assertAlmostEqual(self.dlm2.result.smoothedObs[0][0, 0], 0.0) 81 | self.assertAlmostEqual(self.dlm2.result.smoothedObs[19][0, 0], 0.0) 82 | self.assertAlmostEqual(self.dlm2.result.smoothedObs[9][0, 0], 1.0) 83 | 84 | def testAppend(self): 85 | dlm4 = dlm(self.data[0:11]) 86 | dlm4 + self.trend0 87 | dlm4.evolveMode('dependent') 88 | dlm4.fitForwardFilter() 89 | self.assertEqual(dlm4.n, 11) 90 | 91 | dlm4.append(self.data[11 : 20]) 92 | self.assertEqual(dlm4.n, 20) 93 | dlm4.fitForwardFilter() 94 | 95 | self.dlm1.fitForwardFilter() 96 | self.assertAlmostEqual(np.sum(np.array(dlm4.result.filteredObs) - 97 | np.array(self.dlm1.result.filteredObs)), 0.0) 98 | 99 | def testAppendDynamic(self): 100 | # we feed the data to dlm4 via two segments 101 | dlm4 = dlm(self.data[0:11]) 102 | dlm4 + self.trend1 + dynamic(features = self.features[0:11], 103 | discount = 1) 104 | dlm4.fitForwardFilter() 105 | dlm4.append(self.data[11 : 20]) 106 | dlm4.append(self.features[11 : 20], component = 'dynamic') 107 | dlm4.fitForwardFilter() 108 | 109 | # we feed the data to dlm5 all at once 110 | dlm5 = dlm(self.data) 111 | dlm5 + self.trend1 + dynamic(features = self.features, 112 | discount = 1) 113 | dlm5.fitForwardFilter() 114 | self.assertAlmostEqual(np.sum(np.array(dlm4.result.filteredObs) - 115 | np.array(dlm5.result.filteredObs)), 0.0) 116 | 117 | def testPopout(self): 118 | dlm4 = dlm(self.data) 119 | dlm4 + self.trend1 + dynamic(features = self.features, discount = 1) 120 | dlm4.fitForwardFilter() 121 | # the filtered step range should be (0, 19) 122 | self.assertEqual(dlm4.result.filteredSteps, [0, 19]) 123 | 124 | # pop out the first date, the filtered range should be (0, -1) 125 | dlm4.popout(0) 126 | self.assertEqual(dlm4.result.filteredSteps, [0, -1]) 127 | 128 | dlm4.fitForwardFilter() 129 | dlm5 = dlm(self.data[1 : 20]) 130 | dlm5 + self.trend1 + dynamic(features = self.features[1 : 20], 131 | discount = 1) 132 | dlm5.fitForwardFilter() 133 | 134 | # The two chain should have the same filtered obs 135 | self.assertAlmostEqual(np.sum(np.array(dlm4.result.filteredObs) - 136 | np.array(dlm5.result.filteredObs)), 0.0) 137 | 138 | def testAlter(self): 139 | dlm4 = dlm(self.data) 140 | dlm4 + self.trend1 + dynamic(features = self.features, discount = 1) 141 | dlm4.fitForwardFilter() 142 | # the filtered step range should be (0, 19) 143 | self.assertEqual(dlm4.result.filteredSteps, [0, 19]) 144 | 145 | dlm4.alter(date = 15, data = 1, component = 'main') 146 | self.assertEqual(dlm4.result.filteredSteps, [0, 14]) 147 | dlm4.fitForwardFilter() 148 | 149 | newData = [0] * 9 + [1] + [0] * 10 150 | newData[15] = 1 151 | dlm5 = dlm(newData) 152 | dlm5 + self.trend1 + dynamic(features = self.features, discount = 1) 153 | dlm5.fitForwardFilter() 154 | 155 | # The two chain should have the same filtered obs 156 | self.assertAlmostEqual(np.sum(np.array(dlm4.result.filteredObs) - \ 157 | np.array(dlm5.result.filteredObs)), 0.0) 158 | 159 | # test alter the feature 160 | dlm4.alter(date=0, data=[1,1], component='dynamic') 161 | self.assertAlmostEqual(dlm4.builder.dynamicComponents['dynamic'].features[0], 162 | [1, 1]) 163 | 164 | def testOneDayAheadPredictWithoutDynamic(self): 165 | self.dlm3.fitForwardFilter() 166 | (obs, var) = self.dlm3.predict(date=11) 167 | self.assertAlmostEqual(obs, -6.0/7) 168 | self.assertAlmostEqual(self.dlm3._predictModel.result.predictStatus, 169 | [11, 12, [-6.0/7]]) 170 | 171 | (obs, var) = self.dlm3.predict(date=2) 172 | self.assertAlmostEqual(obs, 3.0/5) 173 | # notice that the two latent states always sum up to 0 174 | self.assertAlmostEqual(self.dlm3._predictModel.result.predictStatus, 175 | [2, 3, [3.0/5]]) 176 | 177 | def testOneDayAheadPredictWithDynamic(self): 178 | self.dlm4.fitForwardFilter() 179 | featureDict = {'dynamic': 2.0} 180 | (obs, var) = self.dlm4.predict(date=9, 181 | featureDict=featureDict) 182 | self.assertAlmostEqual(obs, 5.0/6 * 2) 183 | 184 | def testContinuePredictWithoutDynamic(self): 185 | self.dlm3.fitForwardFilter() 186 | (obs, var) = self.dlm3.predict(date=11) 187 | self.assertAlmostEqual(self.dlm3._predictModel.result.predictStatus, 188 | [11, 12, [-6.0/7]]) 189 | (obs, var) = self.dlm3.continuePredict() 190 | self.assertAlmostEqual(self.dlm3._predictModel.result.predictStatus, 191 | [11, 13, [-6.0/7, 6.0/7]]) 192 | 193 | def testContinuePredictWithDynamic(self): 194 | self.dlm4.fitForwardFilter() 195 | featureDict = {'dynamic': [2.0]} 196 | (obs, var) = self.dlm4.predict(date=9, 197 | featureDict=featureDict) 198 | self.assertAlmostEqual(self.dlm4._predictModel.result.predictStatus, 199 | [9, 10, [5.0/6 * 2]]) 200 | 201 | featureDict = {'dynamic': [3.0]} 202 | (obs, var) = self.dlm4.continuePredict(featureDict=featureDict) 203 | self.assertAlmostEqual(self.dlm4._predictModel.result.predictStatus, 204 | [9, 11, [5.0/6 * 2, 5.0/6 * 3]]) 205 | 206 | def testPredictWithAutoReg(self): 207 | self.dlm5.fitForwardFilter() 208 | (obs, var) = self.dlm5.predict(date=99) 209 | self.assertAlmostEqual(obs[0, 0], 100.03682874) 210 | (obs, var) = self.dlm5.continuePredict() 211 | self.assertAlmostEqual(obs[0, 0], 101.07480945) 212 | 213 | def testPredictWithAutoReg2(self): 214 | self.dlm6.fitForwardFilter() 215 | (obs, var) = self.dlm6.predict(date=99) 216 | self.assertAlmostEqual(obs[0, 0], 100.02735) 217 | (obs, var) = self.dlm6.continuePredict() 218 | self.assertAlmostEqual(obs[0, 0], 101.06011996) 219 | (obs, var) = self.dlm6.continuePredict() 220 | self.assertAlmostEqual(obs[0, 0], 102.0946503) 221 | 222 | def testPredictNWithoutDynamic(self): 223 | self.dlm3.fitForwardFilter() 224 | (obs, var) = self.dlm3.predictN(N=2, date=11) 225 | self.assertAlmostEqual(self.dlm3._predictModel.result.predictStatus, 226 | [11, 13, [-6.0/7, 6.0/7]]) 227 | 228 | def testPredictNWithDynamic(self): 229 | self.dlm4.fitForwardFilter() 230 | featureDict = {'dynamic': [[2.0], [3.0]]} 231 | (obs, var) = self.dlm4.predictN(N=2, date=9, 232 | featureDict=featureDict) 233 | self.assertAlmostEqual(self.dlm4._predictModel.result.predictStatus, 234 | [9, 11, [5.0/6 * 2, 5.0/6 * 3]]) 235 | 236 | def testPredictNWithAutoReg(self): 237 | self.dlm5.fitForwardFilter() 238 | (obs, var) = self.dlm5.predictN(N=2, date=99) 239 | self.assertAlmostEqual(obs[0], 100.03682874) 240 | self.assertAlmostEqual(obs[1], 101.07480945) 241 | 242 | def testPredictNWithDynamicMatrixInput(self): 243 | self.dlm4.fitForwardFilter() 244 | featureDict = {'dynamic': np.matrix([[2.0], [3.0]])} 245 | (obs, var) = self.dlm4.predictN(N=2, date=9, 246 | featureDict=featureDict) 247 | self.assertAlmostEqual(self.dlm4._predictModel.result.predictStatus, 248 | [9, 11, [5.0/6 * 2, 5.0/6 * 3]]) 249 | 250 | def testPredictionNotChangeModel(self): 251 | timeSeries = [1, 2, 1, 5, 3, 5, 4, 8, 1, 2] 252 | 253 | dlm1 = dlm(timeSeries) + trend(degree=2, discount=0.95) 254 | dlm1.fitForwardFilter() 255 | (obs1, var1) = dlm1.predictN(N=1, date=dlm1.n-1) 256 | 257 | dlm2 = dlm([]) + trend(degree=2, discount=0.95) 258 | for d in timeSeries: 259 | dlm2.append([d], component='main') 260 | dlm2.fitForwardFilter() 261 | (obs2, var2) = dlm2.predictN(N=1, date=dlm2.n-1) 262 | 263 | self.assertAlmostEqual(obs1, obs2) 264 | self.assertAlmostEqual(var1, var2) 265 | 266 | def testGetLatentState(self): 267 | # for forward filter 268 | self.dlm5.fitForwardFilter() 269 | filteredTrend = self.dlm5.getLatentState( 270 | filterType='forwardFilter', name='trend') 271 | diff = 0.0 272 | for i in range(len(filteredTrend)): 273 | diff += abs(filteredTrend[i][0] - 274 | self.dlm5.result.filteredState[i][0, 0]) 275 | self.assertAlmostEqual(diff, 0) 276 | 277 | # for backward smoother 278 | self.dlm5.fitBackwardSmoother() 279 | smoothedTrend = self.dlm5.getLatentState( 280 | filterType='backwardSmoother', name='trend') 281 | diff = 0.0 282 | for i in range(len(smoothedTrend)): 283 | diff += abs(smoothedTrend[i][0] - 284 | self.dlm5.result.smoothedState[i][0, 0]) 285 | self.assertAlmostEqual(diff, 0) 286 | 287 | def testGetLatentCov(self): 288 | # for forward filter 289 | self.dlm5.fitForwardFilter() 290 | filteredTrend = self.dlm5.getLatentCov( 291 | filterType='forwardFilter', name='trend') 292 | diff = 0.0 293 | for i in range(len(filteredTrend)): 294 | diff += abs(filteredTrend[i][0, 0] - 295 | self.dlm5.result.filteredCov[i][0, 0]) 296 | self.assertAlmostEqual(diff, 0) 297 | 298 | # for backward smoother 299 | self.dlm5.fitBackwardSmoother() 300 | smoothedTrend = self.dlm5.getLatentCov( 301 | filterType='backwardSmoother', name='trend') 302 | diff = 0.0 303 | for i in range(len(smoothedTrend)): 304 | diff += abs(smoothedTrend[i][0, 0] - 305 | self.dlm5.result.smoothedCov[i][0, 0]) 306 | self.assertAlmostEqual(diff, 0) 307 | 308 | def testGetMean(self): 309 | # for forward filter 310 | self.dlm5.fitForwardFilter() 311 | filteredTrend = self.dlm5.getMean(filterType='forwardFilter') 312 | self.assertEqual(len(filteredTrend), self.dlm5.n) 313 | diff = 0.0 314 | for i in range(len(filteredTrend)): 315 | diff += abs(filteredTrend[i] - 316 | self.dlm5.result.filteredObs[i][0, 0]) 317 | self.assertAlmostEqual(diff, 0) 318 | 319 | # for component with forward filter 320 | arTrend = self.dlm5.getMean(filterType='forwardFilter', 321 | name='ar2') 322 | trueAr = [item[1, 0] for item in self.dlm5.result.filteredState] 323 | comp = self.dlm5.builder.automaticComponents['ar2'] 324 | for i in range(len(trueAr)): 325 | comp.updateEvaluation(i, self.data5) 326 | trueAr[i] = comp.evaluation * trueAr[i] 327 | 328 | diff = 0.0 329 | for i in range(len(arTrend)): 330 | diff += abs(arTrend[i] - trueAr[i]) 331 | self.assertAlmostEqual(diff, 0) 332 | 333 | # for backward smoother 334 | self.dlm5.fitBackwardSmoother() 335 | filteredTrend = self.dlm5.getMean(filterType='backwardSmoother') 336 | diff = 0.0 337 | for i in range(len(filteredTrend)): 338 | diff += abs(filteredTrend[i] - 339 | self.dlm5.result.smoothedObs[i][0, 0]) 340 | self.assertAlmostEqual(diff, 0) 341 | 342 | # for component with backward smoother 343 | arTrend = self.dlm5.getMean(filterType='backwardSmoother', 344 | name='ar2') 345 | trueAr = [item[1, 0] for item in self.dlm5.result.smoothedState] 346 | comp = self.dlm5.builder.automaticComponents['ar2'] 347 | for i in range(len(trueAr)): 348 | comp.updateEvaluation(i, self.data5) 349 | trueAr[i] = comp.evaluation * trueAr[i] 350 | 351 | diff = 0.0 352 | for i in range(len(arTrend)): 353 | diff += abs(arTrend[i] - trueAr[i]) 354 | self.assertAlmostEqual(diff, 0) 355 | 356 | def testGetVar(self): 357 | # for forward filter 358 | self.dlm5.fitForwardFilter() 359 | filteredTrend = self.dlm5.getVar(filterType='forwardFilter') 360 | self.assertEqual(len(filteredTrend), self.dlm5.n) 361 | diff = 0.0 362 | for i in range(len(filteredTrend)): 363 | diff += abs(filteredTrend[i] - 364 | self.dlm5.result.filteredObsVar[i][0, 0]) 365 | self.assertAlmostEqual(diff, 0) 366 | 367 | # for component with forward filter 368 | arTrend = self.dlm5.getVar(filterType='forwardFilter', 369 | name='ar2') 370 | trueAr = [item[1, 1] for item in self.dlm5.result.filteredCov] 371 | comp = self.dlm5.builder.automaticComponents['ar2'] 372 | for i in range(len(trueAr)): 373 | comp.updateEvaluation(i, self.data5) 374 | trueAr[i] = comp.evaluation * trueAr[i] * comp.evaluation.T 375 | 376 | diff = 0.0 377 | for i in range(len(arTrend)): 378 | diff += abs(arTrend[i] - trueAr[i]) 379 | self.assertAlmostEqual(diff, 0) 380 | 381 | # for backward smoother 382 | self.dlm5.fitBackwardSmoother() 383 | filteredTrend = self.dlm5.getVar(filterType='backwardSmoother') 384 | diff = 0.0 385 | for i in range(len(filteredTrend)): 386 | diff += abs(filteredTrend[i] - 387 | self.dlm5.result.smoothedObsVar[i][0, 0]) 388 | self.assertAlmostEqual(diff, 0) 389 | 390 | # for component with backward smoother 391 | arTrend = self.dlm5.getVar(filterType='backwardSmoother', 392 | name='ar2') 393 | trueAr = [item[1, 1] for item in self.dlm5.result.smoothedCov] 394 | comp = self.dlm5.builder.automaticComponents['ar2'] 395 | for i in range(len(trueAr)): 396 | comp.updateEvaluation(i, self.data5) 397 | trueAr[i] = comp.evaluation * trueAr[i] * comp.evaluation.T 398 | 399 | diff = 0.0 400 | for i in range(len(arTrend)): 401 | diff += abs(arTrend[i] - trueAr[i]) 402 | self.assertAlmostEqual(diff, 0) 403 | 404 | def testGetMSE(self): 405 | self.dlm1.stableMode(False) 406 | self.dlm1.fitForwardFilter() 407 | mse1 = self.dlm1.getMSE() 408 | mse_expect = 0 409 | for i in range(20): 410 | mse_expect += (self.dlm1.result.predictedObs[i] - 411 | self.data[i]) ** 2 412 | mse_expect /= 20 413 | self.assertAlmostEqual(mse1, mse_expect) 414 | 415 | self.dlm2.stableMode(False) 416 | self.dlm2.fitForwardFilter() 417 | self.dlm2.result.filteredSteps = (0, 19) 418 | mse2 = self.dlm2.getMSE() 419 | mse_expect = 2.0/20 420 | 421 | self.assertAlmostEqual(mse2, mse_expect) 422 | 423 | def testGetResidual(self): 424 | # for forward filter 425 | filter_type = 'forwardFilter' 426 | self.dlm5.fitForwardFilter() 427 | filteredTrend = self.dlm5.getMean(filterType=filter_type) 428 | filteredResidual = self.dlm5.getResidual(filterType=filter_type) 429 | self.assertEqual(len(filteredResidual), self.dlm5.n) 430 | diff = 0.0 431 | for i in range(len(filteredTrend)): 432 | diff += abs(- filteredTrend[i] - filteredResidual[i] + 433 | self.dlm5.data[i]) 434 | self.assertAlmostEqual(diff, 0) 435 | 436 | # for backward smoother 437 | filter_type = 'backwardSmoother' 438 | self.dlm5.fitBackwardSmoother() 439 | filteredTrend = self.dlm5.getMean(filterType=filter_type) 440 | filteredResidual = self.dlm5.getResidual(filterType=filter_type) 441 | diff = 0.0 442 | for i in range(len(filteredTrend)): 443 | diff += abs(- filteredTrend[i] - filteredResidual[i] + 444 | self.dlm5.data[i]) 445 | self.assertAlmostEqual(diff, 0) 446 | 447 | def testTune(self): 448 | # just make sure the tune can run 449 | self.dlm5.fit() 450 | self.dlm5.tune(maxit=10) 451 | 452 | if __name__ == '__main__': 453 | unittest.main() 454 | -------------------------------------------------------------------------------- /pydlm_lite/tests/tuner/testDlmTuner.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import unittest 3 | 4 | from copy import deepcopy 5 | from pydlm.tuner.dlmTuner import modelTuner 6 | from pydlm.modeler.trends import trend 7 | from pydlm.dlm import dlm 8 | 9 | class testModelTuner(unittest.TestCase): 10 | 11 | def setUp(self): 12 | self.mydlm = dlm(np.random.random(100)) + trend(2, discount=0.95) 13 | self.mytuner = modelTuner() 14 | 15 | def testFind_gradient(self): 16 | mydlm2 = deepcopy(self.mydlm) 17 | self.mydlm.fitForwardFilter() 18 | mydlm2.fitForwardFilter() 19 | mse0 = mydlm2._getMSE() 20 | mydlm2._setDiscounts(map(lambda x: x + self.mytuner.err, 21 | self.mydlm._getDiscounts())) 22 | mydlm2.fitForwardFilter() 23 | mse1 = mydlm2._getMSE() 24 | expect_gradient = (mse1 - mse0) / self.mytuner.err 25 | self.assertAlmostEqual( 26 | expect_gradient, self.mytuner.find_gradient( 27 | self.mydlm._getDiscounts(), self.mydlm)) 28 | 29 | def testTune(self): 30 | tunedDLM = self.mytuner.tune(untunedDLM = self.mydlm, maxit=100) 31 | self.mydlm.fit() 32 | tunedDLM.fit() 33 | self.assertTrue(tunedDLM._getMSE() < self.mydlm._getMSE()) 34 | self.assertTrue( 35 | max(tunedDLM._getDiscounts()) >= 1.0 - 2 * self.mytuner.err) 36 | 37 | if __name__ == '__main__': 38 | unittest.main() 39 | -------------------------------------------------------------------------------- /pydlm_lite/tuner/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wwrechard/pydlm-lite/72e745c3ac9eae0da08dfc2c780e1c344e47ab0e/pydlm_lite/tuner/__init__.py -------------------------------------------------------------------------------- /pydlm_lite/tuner/dlmTuner.py: -------------------------------------------------------------------------------- 1 | """ 2 | =============================================================== 3 | 4 | The discount factor tuner for dlm 5 | 6 | =============================================================== 7 | 8 | The modelTuner class provides the tuning functionality for the dlm class. 9 | It makes use of the gradient descent to optimize the discount factor for 10 | each component (jointly) based on the one-day ahead prediction error. 11 | 12 | >>> import modelTuner 13 | >>> myTuner = modelTuner() 14 | >>> tunedDLM = myTuner(untunedDLM, maxit=100) 15 | 16 | The tunedDLM will be saved in tunedDLM while the untunedDLM remains unchangd. 17 | An alternative way to call this class is via the tuner method within dlm class. 18 | 19 | >>> mydlm.tune(maxit=100) 20 | 21 | This will permenantly change the discouting factor in mydlm. So if the user 22 | prefer to build a new dlm with the new discount factor without changing the 23 | original one, one should opt to use the modelTuner class. 24 | 25 | """ 26 | from copy import deepcopy 27 | from numpy import array 28 | 29 | class modelTuner: 30 | """ The main class for modelTuner 31 | 32 | Attributes: 33 | method: the optimization method. Currently only 'gradient_descent' 34 | is supported. 35 | loss: the optimization loss function. Currently only 'mse' (one-day 36 | ahead prediction) is supported. 37 | 38 | """ 39 | 40 | def __init__(self, method='gradient_descent', loss='mse'): 41 | 42 | self.method = method 43 | self.loss = loss 44 | self.current_mse = None 45 | self.err = 1e-4 46 | self.discounts = None 47 | 48 | def tune(self, untunedDLM, maxit=100, step = 1.0): 49 | """ Main function for tuning the DLM model. 50 | 51 | Args: 52 | untunedDLM: The DLM object that needs tuning 53 | maxit: The maximum number of iteractions for gradient descent. 54 | step: the moving length at each iteraction. 55 | 56 | Returns: 57 | A tuned DLM object in unintialized status. 58 | """ 59 | # make a deep copy of the original dlm 60 | tunedDLM = deepcopy(untunedDLM) 61 | tunedDLM.showInternalMessage(False) 62 | 63 | if not tunedDLM.initialized: 64 | tunedDLM.fitForwardFilter() 65 | discounts = array(tunedDLM._getDiscounts()) 66 | self.current_mse = tunedDLM._getMSE() 67 | 68 | # using gradient descent 69 | if self.method == 'gradient_descent': 70 | for i in range(maxit): 71 | gradient = self.find_gradient(discounts, tunedDLM) 72 | discounts -= gradient * step 73 | discounts = list(map(lambda x: self.cutoff(x), discounts)) 74 | tunedDLM._setDiscounts(discounts) 75 | tunedDLM.fitForwardFilter() 76 | self.current_mse = tunedDLM._getMSE() 77 | 78 | if i < maxit - 1: 79 | print('Converge successfully!') 80 | else: 81 | print('The algorithm stops without converging.') 82 | if min(discounts) <= 0.7 + self.err or max(discounts) >= 1 - 2 * self.err: 83 | print('Possible reason: some discount is too close to 1 or 0.7' + 84 | ' (0.7 is smallest discount that is permissible.') 85 | else: 86 | print('It might require more step to converge.' + 87 | ' Use tune(..., maixt = ) instead.') 88 | 89 | self.discounts = discounts 90 | tunedDLM._setDiscounts(discounts, change_component=True) 91 | return tunedDLM 92 | 93 | def getDiscounts(self): 94 | """ Get the tuned discounting factors. One for each component (even the 95 | component being multi-dimensional, only one discounting factor will 96 | be assigned to one component). Initialized to None. 97 | 98 | """ 99 | return self.discounts 100 | 101 | def find_gradient(self, discounts, DLM): 102 | if self.current_mse is None: 103 | self.current_mse = DLM._getMSE() 104 | 105 | gradient = array([0.0] * len(discounts)) 106 | 107 | for i in range(len(discounts)): 108 | discounts_err = discounts 109 | discounts_err[i] = self.cutoff(discounts_err[i] + self.err) 110 | 111 | DLM._setDiscounts(discounts_err) 112 | DLM.fitForwardFilter() 113 | gradient[i] = (DLM._getMSE() - self.current_mse) / self.err 114 | 115 | return gradient 116 | 117 | def cutoff(self, a): 118 | if a < 0.7: 119 | return 0.7 120 | 121 | if a >= 1: 122 | return 0.99999 123 | 124 | return a 125 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | # This flag says that the code is written to work on both Python 2 and Python 3 | # 3. If at all possible, it is good practice to do this. If you cannot, you 4 | # will need to generate wheels for each Python version that you support. 5 | universal=1 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name = 'pydlm-lite', 5 | version = '0.1.1.1', 6 | author = 'Xiangyu Wang', 7 | author_email = 'wwrechard@gmail.com', 8 | description = ('A python library for the Bayesian dynamic linear ' + 9 | 'model for time series modeling'), 10 | license = 'BSD', 11 | keywords = 'dlm bayes bayesian kalman filter smoothing dynamic model multi-threading online', 12 | url = 'https://github.com/wwrechard/pydlm/tree.pydlm-lite', 13 | packages = find_packages(), 14 | classifiers = [ 15 | 'Development Status :: 3 - Alpha', 16 | 'Intended Audience :: Science/Research', 17 | 'License :: OSI Approved :: BSD License', 18 | 'Operating System :: OS Independent', 19 | 'Programming Language :: Python', 20 | 'Programming Language :: Python :: 3', 21 | 'Topic :: Scientific/Engineering :: Artificial Intelligence' 22 | ], 23 | include_package_data = False, 24 | install_requires = [ 25 | 'numpy', 26 | ], 27 | tests_require = [ 28 | 'unittest', 29 | ], 30 | extras_require = { 31 | 'docs': [ 32 | 'Sphinx', 33 | ], 34 | 'tests': [ 35 | 'unittest', 36 | ], 37 | }, 38 | ) 39 | --------------------------------------------------------------------------------