├── .github
    └── pull_request_template.md
├── .gitignore
├── .travis.yml
├── AUTHORS.txt
├── CHANGELOG.md
├── CODE_OF_CONDUCT.rst
├── CONTRIBUTING.rst
├── LICENSE
├── MANIFEST.in
├── README.md
├── docs
    ├── Makefile
    ├── _static
    │   ├── LR_ELBO.png
    │   ├── LR_data.png
    │   ├── LR_summary.png
    │   └── LR_traceplot.png
    ├── api.rst
    ├── api
    │   ├── modules.rst
    │   ├── pymc3_models.models.rst
    │   └── pymc3_models.rst
    ├── conf.py
    ├── examples.rst
    ├── getting_started.rst
    ├── index.rst
    └── intro.rst
├── notebooks
    ├── HierarchicalLogisticRegression.ipynb
    ├── LinearRegression.ipynb
    ├── LogisticRegression.ipynb
    ├── NaiveBayes.ipynb
    └── figures
    │   └── naive_bayes
    │       ├── naive_bayes.pdf
    │       └── naive_bayes.png
├── pymc3_models
    ├── __init__.py
    ├── _version.py
    ├── exc.py
    ├── models
    │   ├── HierarchicalLogisticRegression.py
    │   ├── LinearRegression.py
    │   ├── LogisticRegression.py
    │   ├── NaiveBayes.py
    │   └── __init__.py
    └── utils.py
├── requirements-dev.txt
├── requirements.txt
├── setup.cfg
├── setup.py
└── tests
    ├── __init__.py
    └── models
        ├── __init__.py
        ├── test_BayesianModel.py
        ├── test_HierarchicalLogisticRegression.py
        ├── test_LinearRegression.py
        ├── test_LogisticRegression.py
        └── test_NaiveBayes.py


/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | Pull Request Checklist
2 |  - [ ] Linter passes locally
3 |  - [ ] All tests in the `tests` folder pass with a local build
4 |  - [ ] CHANGELOG has been updated
5 |  - [ ] Version in `_version.py` has been updated
6 |  - [ ] README has been updated (if applicable)


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 
103 | #Apple
104 | .DS_Store
105 | 
106 | # Pickles
107 | notebooks/pickle_jar/
108 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - "2.7"
 4 |   - "3.4"
 5 | install:
 6 |   - pip install --upgrade pip
 7 |   - pip install -r requirements.txt 
 8 |   - pip install -r requirements-dev.txt
 9 | script:
10 |   - flake8 pymc3_models tests
11 |   - pytest -v
12 | branches:
13 |   only:
14 |     - master
15 | 


--------------------------------------------------------------------------------
/AUTHORS.txt:
--------------------------------------------------------------------------------
1 | Nicole Carlson <nicole@parsingscience.com>, <ncarlson@shoprunner.com>
2 | Rémi Louf <remilouf@gmail.com>, <remi@sounds.am>
3 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | All notable changes to this project will be documented in this file.
 3 | 
 4 | The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 5 | 
 6 | ## [2.1.0] - 2019-01-11
 7 | ### Added
 8 | - Logistic Regression model
 9 | 
10 | ## [2.0.0] - 2019-01-11
11 | ### Changed
12 | - num_advi_sample_draws can be input to fit methods instead of hardcoded into class
13 | - num_ppc_samples can be input to predict methods (where applicable)
14 | ### Fixed
15 | - Made formatting in docstrings consistent
16 | 
17 | ## [1.4.0] - 2019-01-10
18 | ### Added
19 | - Gaussian Naive Bayes model
20 | 
21 | ## [1.3.0] - 2019-01-08
22 | ### Added
23 | - Travis CI integration
24 | ### Changed 
25 | - Switch to using pytest for unittests
26 | 
27 | ## [1.2.2] - 2019-01-07
28 | ### Fixed
29 | - License switched to classifier instead of text
30 | 
31 | ## [1.2.1] - 2019-01-07
32 | ### Fixed
33 | - Missing comma in setup.py
34 | 
35 | ## [1.2.0] - 2019-01-07
36 | ### Added
37 | - flake8 linting
38 | - version file
39 | - less strict version requirements in requirements.txt
40 | - PR template
41 | 
42 | ## [1.1.3] - 2018-05-25
43 | ### Fixed
44 | - HLR fit method sets shared vars if no minibatch_size given
45 | 
46 | ## [1.1.2] - 2018-05-20
47 | ### Fixed
48 | - df_summary deprecated in pymc3 release 3.3, changed to summary
49 | 
50 | ## [1.1.1] - 2018-05-20
51 | ### Fixed
52 | - Minibatches for ADVI in HLR require model_output to be cast as int
53 | 
54 | ## [1.1.0] - 2018-01-30
55 | ### Added
56 | - New class property for default number of draws for advi sampling
57 | 
58 | ## [1.0.3] - 2018-01-05
59 | ### Fixed
60 | - LICENSE file name changed to correct version
61 | - Had to skip 1.0.2 due to PyPi uploading fiasco
62 | 
63 | ## [1.0.1] - 2018-01-05
64 | ### Fixed
65 | - Messed up uploading to PyPi
66 | 
67 | ## [1.0.0] - 2018-01-05
68 | ### Added
69 | - First version of the library 
70 | - Hierarchical Logistic Regression and Linear Regression models
71 | - Documentation
72 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.rst:
--------------------------------------------------------------------------------
 1 | ============================
 2 | CONTRIBUTOR CODE OF CONDUCT
 3 | ============================
 4 | 
 5 | As contributors and maintainers of this project, and in the interest of fostering an open and welcoming community, we pledge to respect all people who contribute through reporting issues, posting feature requests, updating documentation, submitting pull requests or patches, and other activities.
 6 | 
 7 | We are committed to making participation in this project a harassment-free experience for everyone, regardless of level of experience, gender, gender identity and expression, sexual orientation, disability, personal appearance, body size, race, ethnicity, age, religion, or nationality.
 8 | 
 9 | Examples of unacceptable behavior by participants include:
10 | 
11 |     The use of sexualized language or imagery
12 |     Personal attacks
13 |     Trolling or insulting/derogatory comments
14 |     Public or private harassment
15 |     Publishing other’s private information, such as physical or electronic addresses, without explicit permission
16 |     Other unethical or unprofessional conduct
17 | 
18 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct. By adopting this Code of Conduct, project maintainers commit themselves to fairly and consistently applying these principles to every aspect of managing this project. Project maintainers who do not follow or enforce the Code of Conduct may be permanently removed from the project team.
19 | 
20 | This code of conduct applies both within project spaces and in public spaces when an individual is representing the project or its community.
21 | 
22 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by opening an issue or contacting one or more of the project maintainers (nicole@parsingscience.com).
23 | 
24 | This Code of Conduct is adapted from the Contributor Covenant, version 1.2.0, available at https://www.contributor-covenant.org/version/1/2/0/code-of-conduct.html.
25 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.rst:
--------------------------------------------------------------------------------
 1 | Introduction
 2 | ================
 3 | 
 4 | Thank you for considering contributing to PyMC3 Models! This project is intended to be a space where anyone can share models they've built.
 5 | 
 6 | Please read these guidelines before submitting anything to the project. As of the first release, I'm the only person working on this project so respecting these guidelines will help me get back to you more quickly.
 7 | 
 8 | Some ways to contribute:
 9 | 
10 | - Open an issue on the `Github Issue Tracker <https://github.com/parsing-science/pymc3_models/issues>`__. (Please check that it has not already been reported or addressed in a PR.)
11 | - Improve the docs!
12 | - Add a new model. Please follow the guidelines below.
13 | - Add/change existing functionality in the base model class
14 | - Something I haven't thought of?
15 |   
16 | Pull Requests
17 | ------------------
18 | To create a PR against this library, please fork the project and work from there.
19 | 
20 | Steps
21 | ++++++
22 | 
23 | 1. Fork the project via the Fork button on Github
24 | 
25 | 2. Clone the repo to your local disk.
26 | 
27 | 3. Create a new branch for your PR.
28 | 
29 |     ::
30 | 
31 |         git checkout -b my-awesome-new-feature
32 | 
33 | 4. Install requirements (probably in a virtual environment)
34 | 
35 |     ::
36 | 
37 |         virtualenv venv
38 |         source venv/bin/activate
39 |         pip install -r requirements-dev.txt
40 |         pip install -r requirements.txt
41 | 
42 | 5. Develop your feature
43 |    
44 | 6. Submit a PR!
45 |    
46 | PR Checklist
47 | +++++++++++++
48 | 
49 | - Ensure your code has followed the Style Guidelines below
50 | - Run the linter on your code
51 | 
52 | ::
53 | 
54 |     source venv/bin/activate
55 |     flake8 pymc3_models tests
56 | 
57 | - Make sure you have written unittests where appropriate
58 | - Make sure the unittests pass
59 | 
60 | ::
61 | 
62 |     source venv/bin/activate
63 |     pytest -v
64 | 
65 | - Update the docs where appropriate. You can rebuild them with the commands below.
66 | 
67 | ::
68 | 
69 |     cd pymc3_models/docs
70 |     sphinx-apidoc -f -o api/ ../pymc3_models/
71 |     make html
72 | 
73 | - Update the CHANGELOG
74 | 
75 | Notes for new models
76 | ++++++++++++++++++++++++++
77 | 
78 | - New models should be put into the models directory. 
79 | - Make the file name the same as the class name; be explicit, e.g. HierarchicalLogisticRegression, not HLR.
80 | - Try to write some simple unittests for your model. I do not recommend using NUTS in your unittests if you have a complex model because the tests will take hours to run.
81 | - [Optional] Please create a Jupyter notebook in the notebooks folder with the same name as your model class. In it, show a simple example of how to use your model. Synthetic data is fine to use.
82 | 
83 | Style Guidelines
84 | ++++++++++++++++++++++++++
85 | 
86 | For the most part, this library follows PEP8 with a couple of exceptions. 
87 | 
88 | Notes:
89 | 
90 | - Indent with 4 spaces
91 | - Lines can be 110 characters long
92 | - Docstrings should be written as numpy docstrings
93 | - Your code should be Python 3 compatible
94 | - When in doubt, follow the style of the existing code
95 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include AUTHORS.txt LICENSE README.rst  
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PyMC3 Models
 2 | 
 3 | Custom PyMC3 models built on top of the scikit-learn API. Check out the [docs](http://pymc3-models.readthedocs.io/).
 4 | 
 5 | ## Features
 6 | 
 7 | - Reusable PyMC3 models including LinearRegression and HierarchicalLogisticRegression
 8 | - A base class, BayesianModel, for building your own PyMC3 models
 9 | 
10 | ## Installation
11 | The latest release of PyMC3 Models can be installed from PyPI using `pip`:
12 | 
13 | ``` bash
14 | pip install pymc3_models
15 | ```
16 | 
17 | The current development branch of PyMC3 Models can be installed from GitHub, also using `pip`:
18 | 
19 | ``` bash
20 | pip install git+https://github.com/parsing-science/pymc3_models.git
21 | ```
22 | 
23 | To run the package locally (in a virtual environment):
24 | 
25 | ```
26 | git clone https://github.com/parsing-science/pymc3_models.git
27 | cd pymc3_models
28 | virtualenv venv
29 | source venv/bin/activate
30 | pip install -r requirements.txt
31 | ```
32 | 
33 | ## Usage
34 | Since PyMC3 Models is built on top of scikit-learn, you can use the same methods as with a scikit-learn model.
35 | 
36 | ``` python
37 | from pymc3_models import LinearRegression
38 | 
39 | LR = LinearRegression()
40 | LR.fit(X, Y)
41 | LR.predict(X)
42 | LR.score(X, Y)
43 | ```
44 | 
45 | ## Contribute
46 | For more info, see [CONTRIBUTING](https://github.com/parsing-science/pymc3_models/blob/master/CONTRIBUTING.rst).
47 | 
48 | ### Contributor Code of Conduct
49 | 
50 | Please note that this project is released with a [Contributor Code of Conduct](http://contributor-covenant.org/). By participating in this project you agree to abide by its terms. See [CODE_OF_CONDUCT](https://github.com/parsing-science/pymc3_models/blob/master/CODE_OF_CONDUCT.rst).
51 | 
52 | ## Acknowledgments
53 | This library is built on top of [PyMC3](http://docs.pymc.io/) and [scikit-learn](http://scikit-learn.org).
54 | 
55 | ## License
56 | [Apache License, Version 2.0](https://github.com/parsing-science/pymc3_models/blob/master/LICENSE)
57 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = PyMC3Models
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/_static/LR_ELBO.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parsing-science/pymc3_models/1b8cca86a2ce05dfec3df81ea57d17c7defb51ed/docs/_static/LR_ELBO.png


--------------------------------------------------------------------------------
/docs/_static/LR_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parsing-science/pymc3_models/1b8cca86a2ce05dfec3df81ea57d17c7defb51ed/docs/_static/LR_data.png


--------------------------------------------------------------------------------
/docs/_static/LR_summary.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parsing-science/pymc3_models/1b8cca86a2ce05dfec3df81ea57d17c7defb51ed/docs/_static/LR_summary.png


--------------------------------------------------------------------------------
/docs/_static/LR_traceplot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parsing-science/pymc3_models/1b8cca86a2ce05dfec3df81ea57d17c7defb51ed/docs/_static/LR_traceplot.png


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
1 | API
2 | ========================================
3 | 
4 | .. toctree::
5 |    :maxdepth: 2
6 | 
7 |    api/modules
8 | 


--------------------------------------------------------------------------------
/docs/api/modules.rst:
--------------------------------------------------------------------------------
1 | pymc3_models
2 | ============
3 | 
4 | .. toctree::
5 |    :maxdepth: 4
6 | 
7 |    pymc3_models
8 | 


--------------------------------------------------------------------------------
/docs/api/pymc3_models.models.rst:
--------------------------------------------------------------------------------
 1 | pymc3\_models.models package
 2 | ============================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | pymc3\_models.models.HierarchicalLogisticRegression module
 8 | ----------------------------------------------------------
 9 | 
10 | .. automodule:: pymc3_models.models.HierarchicalLogisticRegression
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | pymc3\_models.models.LinearRegression module
16 | --------------------------------------------
17 | 
18 | .. automodule:: pymc3_models.models.LinearRegression
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | pymc3\_models.models.NaiveBayes module
24 | --------------------------------------
25 | 
26 | .. automodule:: pymc3_models.models.NaiveBayes
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 
31 | 
32 | Module contents
33 | ---------------
34 | 
35 | .. automodule:: pymc3_models.models
36 |     :members:
37 |     :undoc-members:
38 |     :show-inheritance:
39 | 


--------------------------------------------------------------------------------
/docs/api/pymc3_models.rst:
--------------------------------------------------------------------------------
 1 | pymc3\_models package
 2 | =====================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |     pymc3_models.models
10 | 
11 | Submodules
12 | ----------
13 | 
14 | pymc3\_models.exc module
15 | ------------------------
16 | 
17 | .. automodule:: pymc3_models.exc
18 |     :members:
19 |     :undoc-members:
20 |     :show-inheritance:
21 | 
22 | 
23 | Module contents
24 | ---------------
25 | 
26 | .. automodule:: pymc3_models
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # PyMC3 Models documentation build configuration file, created by
  5 | # sphinx-quickstart on Thu Dec 28 11:42:02 2017.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | #
 20 | import os
 21 | import sys
 22 | sys.path.insert(0, os.path.abspath('pymc3_models/pymc3_models'))
 23 | sys.path.insert(0, os.path.abspath('../'))
 24 | 
 25 | # -- General configuration ------------------------------------------------
 26 | 
 27 | # If your documentation needs a minimal Sphinx version, state it here.
 28 | #
 29 | # needs_sphinx = '1.0'
 30 | 
 31 | # Add any Sphinx extension module names here, as strings. They can be
 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 33 | # ones.
 34 | extensions = [
 35 |     'sphinx.ext.autosummary',
 36 |     'sphinx.ext.autodoc',
 37 |     'sphinx.ext.coverage',
 38 |     'sphinx.ext.doctest',
 39 |     'sphinx.ext.githubpages',
 40 |     'sphinx.ext.intersphinx',
 41 |     'sphinx.ext.mathjax',
 42 |     'sphinx.ext.napoleon',
 43 |     'sphinx.ext.todo',
 44 |     'sphinx.ext.viewcode',
 45 |     'matplotlib.sphinxext.only_directives',
 46 |     'matplotlib.sphinxext.plot_directive',
 47 |     'numpydoc',
 48 | ]
 49 | 
 50 | #Napoleon settings
 51 | napoleon_use_param = False
 52 | 
 53 | # Add any paths that contain templates here, relative to this directory.
 54 | templates_path = ['_templates']
 55 | 
 56 | # The suffix(es) of source filenames.
 57 | # You can specify multiple suffix as a list of string:
 58 | #
 59 | # source_suffix = ['.rst', '.md']
 60 | source_suffix = '.rst'
 61 | 
 62 | # The master toctree document.
 63 | master_doc = 'index'
 64 | 
 65 | # General information about the project.
 66 | project = 'PyMC3 Models'
 67 | copyright = '2017, Nicole Carlson'
 68 | author = 'Nicole Carlson'
 69 | 
 70 | # The version info for the project you're documenting, acts as replacement for
 71 | # |version| and |release|, also used in various other places throughout the
 72 | # built documents.
 73 | #
 74 | # The short X.Y version.
 75 | version = '1.0'
 76 | # The full version, including alpha/beta/rc tags.
 77 | release = '1.0'
 78 | 
 79 | # The language for content autogenerated by Sphinx. Refer to documentation
 80 | # for a list of supported languages.
 81 | #
 82 | # This is also used if you do content translation via gettext catalogs.
 83 | # Usually you set "language" from the command line for these cases.
 84 | language = None
 85 | 
 86 | # List of patterns, relative to source directory, that match files and
 87 | # directories to ignore when looking for source files.
 88 | # This patterns also effect to html_static_path and html_extra_path
 89 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 90 | 
 91 | # The name of the Pygments (syntax highlighting) style to use.
 92 | pygments_style = 'sphinx'
 93 | 
 94 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 95 | todo_include_todos = True
 96 | 
 97 | 
 98 | # -- Options for HTML output ----------------------------------------------
 99 | 
100 | # The theme to use for HTML and HTML Help pages.  See the documentation for
101 | # a list of builtin themes.
102 | #
103 | html_theme = "sphinx_rtd_theme"
104 | 
105 | # Theme options are theme-specific and customize the look and feel of a theme
106 | # further.  For a list of options available for each theme, see the
107 | # documentation.
108 | #
109 | # html_theme_options = {}
110 | 
111 | # Add any paths that contain custom static files (such as style sheets) here,
112 | # relative to this directory. They are copied after the builtin static files,
113 | # so a file named "default.css" will overwrite the builtin "default.css".
114 | html_static_path = ['_static']
115 | 
116 | # Custom sidebar templates, must be a dictionary that maps document names
117 | # to template names.
118 | #
119 | # This is required for the alabaster theme
120 | # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
121 | html_sidebars = {
122 |     '**': [
123 |         'globaltoc.html',
124 |         'relations.html',  # needs 'show_related': True theme option to display
125 |         'searchbox.html',
126 |     ]
127 | }
128 | 
129 | 
130 | # -- Options for HTMLHelp output ------------------------------------------
131 | 
132 | # Output file base name for HTML help builder.
133 | htmlhelp_basename = 'PyMC3Modelsdoc'
134 | 
135 | 
136 | # -- Options for LaTeX output ---------------------------------------------
137 | 
138 | latex_elements = {
139 |     # The paper size ('letterpaper' or 'a4paper').
140 |     #
141 |     # 'papersize': 'letterpaper',
142 | 
143 |     # The font size ('10pt', '11pt' or '12pt').
144 |     #
145 |     # 'pointsize': '10pt',
146 | 
147 |     # Additional stuff for the LaTeX preamble.
148 |     #
149 |     # 'preamble': '',
150 | 
151 |     # Latex figure (float) alignment
152 |     #
153 |     # 'figure_align': 'htbp',
154 | }
155 | 
156 | # Grouping the document tree into LaTeX files. List of tuples
157 | # (source start file, target name, title,
158 | #  author, documentclass [howto, manual, or own class]).
159 | latex_documents = [
160 |     (master_doc, 'PyMC3Models.tex', 'PyMC3 Models Documentation',
161 |      'Nicole Carlson', 'manual'),
162 | ]
163 | 
164 | 
165 | # -- Options for manual page output ---------------------------------------
166 | 
167 | # One entry per manual page. List of tuples
168 | # (source start file, name, description, authors, manual section).
169 | man_pages = [
170 |     (master_doc, 'pymc3models', 'PyMC3 Models Documentation',
171 |      [author], 1)
172 | ]
173 | 
174 | 
175 | # -- Options for Texinfo output -------------------------------------------
176 | 
177 | # Grouping the document tree into Texinfo files. List of tuples
178 | # (source start file, target name, title, author,
179 | #  dir menu entry, description, category)
180 | texinfo_documents = [
181 |     (master_doc, 'PyMC3Models', 'PyMC3 Models Documentation',
182 |      author, 'PyMC3Models', 'One line description of project.',
183 |      'Miscellaneous'),
184 | ]
185 | 
186 | 
187 | 
188 | # -- Options for Epub output ----------------------------------------------
189 | 
190 | # Bibliographic Dublin Core info.
191 | epub_title = project
192 | epub_author = author
193 | epub_publisher = author
194 | epub_copyright = copyright
195 | 
196 | # The unique identifier of the text. This can be a ISBN number
197 | # or the project homepage.
198 | #
199 | # epub_identifier = ''
200 | 
201 | # A unique identification for the text.
202 | #
203 | # epub_uid = ''
204 | 
205 | # A list of files that should not be packed into the epub file.
206 | epub_exclude_files = ['search.html']
207 | 
208 | 
209 | 
210 | # Example configuration for intersphinx: refer to the Python standard library.
211 | intersphinx_mapping = {'https://docs.python.org/': None}
212 | 


--------------------------------------------------------------------------------
/docs/examples.rst:
--------------------------------------------------------------------------------
 1 | Examples
 2 | ========================================
 3 | 
 4 | Check out the `notebooks folder <https://github.com/parsing-science/pymc3_models/tree/master/notebooks>`__.
 5 | 
 6 | Currently, the following models have been implemented:
 7 | 
 8 | - Linear Regression
 9 | - Hierarchical Logistic Regression
10 | 


--------------------------------------------------------------------------------
/docs/getting_started.rst:
--------------------------------------------------------------------------------
  1 | Getting Started
  2 | ========================================
  3 | 
  4 | This section is adapted from my `2017 PyData NYC talk <https://www.youtube.com/watch?v=zGRnirbHWJ8>`__. 
  5 | 
  6 | To demonstrate how to get started with PyMC3 Models, I'll walk through a simple Linear Regression example. First, I'll go through the example using just PyMC3. Then I'll show you the same example using PyMC3 Models.
  7 | 
  8 | Generate Synthetic Data
  9 | ------------------------
 10 | ::
 11 | 
 12 |     X = np.random.randn(1000, 1)
 13 |     noise = 2 * np.random.randn(1000, 1)
 14 |     Y = 4 * X + 3 + noise
 15 | 
 16 | .. image:: _static/LR_data.png
 17 | 
 18 | Fit a model with PyMC3
 19 | -----------------------
 20 | 
 21 | Step 1: Set up the PyMC3 model
 22 | +++++++++++++++++++++++++++++++++++++++
 23 | ::
 24 | 
 25 |     lin_reg_model = pm.Model()
 26 | 
 27 |     model_input = theano.shared(X)
 28 | 
 29 |     model_output = theano.shared(Y)
 30 | 
 31 |     with lin_reg_model:
 32 | 
 33 |         alpha = pm.Normal('alpha', mu=0, sd=100, shape=(1))
 34 |         beta = pm.Normal('beta', mu=0, sd=100, shape=(1))
 35 |         
 36 |         s = pm.HalfNormal('s', tau=1)
 37 |         
 38 |         mean = alpha + beta * model_input
 39 | 
 40 |         y = pm.Normal('y', mu=mean , sd=s, observed=model_output)
 41 | 
 42 | Step 2: Infer your parameters
 43 | +++++++++++++++++++++++++++++++++++++++
 44 | ::
 45 | 
 46 | 
 47 |     with lin_reg_model:
 48 |         inference = pm.ADVI()
 49 |         approx = pm.fit(
 50 |             n=20000, 
 51 |             method=inference, 
 52 |             more_replacements={
 53 |                 model_input: pm.Minibatch(X), 
 54 |                 model_output: pm.Minibatch(Y)
 55 |             }
 56 |         )
 57 | 
 58 | Check if minibatch ADVI converged by plotting the ELBO
 59 | 
 60 | ::
 61 |     
 62 |     plt.plot(-inference.hist)
 63 |     plt.ylabel('ELBO')
 64 |     plt.xlabel('iteration')
 65 | 
 66 | .. image:: _static/LR_ELBO.png
 67 | 
 68 | Step 3: Interpret your parameters
 69 | +++++++++++++++++++++++++++++++++++++++
 70 | To make things a bit easier, I draw samples from the approximation to generate a trace.
 71 | 
 72 | ::
 73 | 
 74 |     trace = approx.sample(draws=5000)
 75 |     summary(trace)
 76 | 
 77 | .. image:: _static/LR_summary.png
 78 |    
 79 | ::
 80 | 
 81 |     traceplot(trace)
 82 | 
 83 | .. image:: _static/LR_traceplot.png
 84 | 
 85 | Step 4: Predict data by creating posterior predictive samples
 86 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 87 | ::
 88 | 
 89 |     ppc = pm.sample_ppc(
 90 |         trace[1000:],
 91 |         model=lin_reg_model,
 92 |         samples=2000
 93 |     )
 94 |     pred = ppc['y'].mean(axis=0)
 95 |     r2_score(Y, pred)
 96 |     0.79444136879972738
 97 | 
 98 | Fit a model with PyMC3 Models
 99 | ----------------------------------------------
100 | Now, we can build a Linear Regression model using PyMC3 models.
101 | 
102 | The following is equivalent to Steps 1 and 2 above.
103 | 
104 | ::
105 | 
106 |     LR = LinearRegression()
107 |     LR.fit(X, Y, minibatch_size=100)
108 |     LR.plot_elbo()
109 | 
110 | .. image:: _static/LR_ELBO.png
111 | 
112 | The following is equivalent to Step 3 above. 
113 | Since the trace is saved directly, you can use the same PyMC3 functions (summary and traceplot).
114 | 
115 | :: 
116 | 
117 |     traceplot(LR.trace)
118 | 
119 | .. image:: _static/LR_traceplot.png
120 | 
121 | The following is equivalent to Step 4.
122 | 
123 | ::
124 | 
125 |     Y_predict = LR.predict(X)
126 |     LR.score(X, Y)
127 | 
128 | The same type of model can be fit in fewer lines, and the model class follows the scikit-learn API.
129 | 
130 | If you want a point estimate, you can use the saved summary dataframe:
131 | 
132 | ::
133 | 
134 |     beta = LR.summary['mean']['betas__0_0']
135 |     alpha = LR.summary['mean']['alpha__0']
136 | 
137 | Advanced
138 | ----------
139 | Saving and Loading your model
140 | ++++++++++++++++++++++++++++++++++++++++++++++++++
141 | 
142 | ::
143 | 
144 |     LR.save('pickle_jar/LR_jar/')
145 |     LR2 = LinearRegression()
146 |     LR2.load('pickle_jar/LR_jar/')
147 | 
148 | NUTS Inference
149 | +++++++++++++++++++++++++
150 | The default method of inference for PyMC3 models is minibatch ADVI. This is typically much faster than other methods. However, in some cases, you may want to use the NUTS sampler.
151 | 
152 | ::
153 | 
154 |     LR3 = LinearRegression()
155 |     LR3.fit(X, Y, inference_type='nuts', inference_args={'draws': 2000})
156 | 
157 | Now you can use the predict, score methods, etc as above.
158 | 
159 | Inference Args
160 | +++++++++++++++++++++++++
161 | If you don't want to use the default arguments for inference, you can pass in inference_args. Check out the `PyMC3 documentation <http://docs.pymc.io/>`__ for permissible values for the inference_type you are using.
162 | 
163 | Building your own models
164 | +++++++++++++++++++++++++
165 | Lastly, if you want to build your own models, you can build them on top of the BayesianModel base class.
166 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to PyMC3 Models's documentation!
 2 | ========================================
 3 | 
 4 | .. include:: ../README.rst
 5 | 
 6 | Contents
 7 | ---------
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 | 
12 |    intro
13 |    getting_started
14 |    examples
15 |    api
16 | 
17 | Indices and tables
18 | ------------------
19 | 
20 | * :ref:`genindex`
21 | * :ref:`modindex`
22 | * :ref:`search`
23 | 


--------------------------------------------------------------------------------
/docs/intro.rst:
--------------------------------------------------------------------------------
 1 | Introduction to PyMC3 models
 2 | ========================================
 3 | 
 4 | This library was inspired by my own work creating a re-usable Hierarchical Logistic Regression model.
 5 | 
 6 | To learn more, you can read this section, watch a 
 7 | `video from PyData NYC 2017 <https://www.youtube.com/watch?v=zGRnirbHWJ8>`__, or check out the 
 8 | `slides <https://github.com/parsing-science/pydata_nyc_nov_2017>`__ .
 9 | 
10 | Quick intro to PyMC3
11 | --------------------
12 | When building a model with PyMC3, you will usually follow the same four steps:
13 | 
14 | - **Step 1: Set up** Parameterize your model, choose priors, and insert training data
15 | - **Step 2: Inference** infer your parameters using MCMC sampling (e.g. NUTS) or variational inference (e.g. ADVI)
16 | - **Step 3: Interpret** Check your parameter distributions and model fit
17 | - **Step 4: Predict data** Create posterior samples with your inferred parameters
18 | 
19 | For a longer discussion of these steps, see :doc:`getting_started`.
20 | 
21 | Mapping between scikit-learn and PyMC3
22 | --------------------------------------
23 | This library builds a mapping between the steps above with the methods used by scikit-learn models.
24 | 
25 | +----------------+--------------------------------------+ 
26 | | scikit-learn   | PyMC3                                | 
27 | +================+======================================+
28 | | Fit            | Step 1: Set up, Step 2: Inference    | 
29 | +----------------+--------------------------------------+
30 | | Predict        | Step 4: Predict Data                 | 
31 | +----------------+--------------------------------------+
32 | | Score          | Step 4: Predict data                 | 
33 | +----------------+--------------------------------------+ 
34 | | Save/Load      | ??                                   |
35 | +----------------+--------------------------------------+
36 | | ??             | Step 3: Interpret                    |
37 | +----------------+--------------------------------------+
38 | 
39 | The question marks represent things that don't exist in the two libraries on their own. 
40 | 
41 | 
42 | Comparing scitkit-learn, PyMC3, and PyMC3 Models
43 | ------------------------------------------------
44 | Using the mapping above, this library creates easy to use PyMC3 models.
45 | 
46 | +----------------------------+-------------+-------------+--------------+
47 | |                            |scikit-learn | PyMC3       | PyMC3 models | 
48 | +============================+=============+=============+==============+
49 | | Find model parameters      | Easy        | Medium      | Easy         |
50 | +----------------------------+-------------+-------------+--------------+
51 | | Predict new data           | Easy        | Difficult   | Easy         |
52 | +----------------------------+-------------+-------------+--------------+
53 | | Score a model              | Easy        | Difficult   | Easy         |
54 | +----------------------------+-------------+-------------+--------------+
55 | | Save a trained model       | Easy        | Impossible? | Easy         |
56 | +----------------------------+-------------+-------------+--------------+
57 | | Load a trained model       | Easy        | Impossible? | Easy         |
58 | +----------------------------+-------------+-------------+--------------+
59 | | Interpret Parameterization | N/A         | Easy        | Easy         |
60 | +----------------------------+-------------+-------------+--------------+
61 | 
62 | 


--------------------------------------------------------------------------------
/notebooks/figures/naive_bayes/naive_bayes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parsing-science/pymc3_models/1b8cca86a2ce05dfec3df81ea57d17c7defb51ed/notebooks/figures/naive_bayes/naive_bayes.pdf


--------------------------------------------------------------------------------
/notebooks/figures/naive_bayes/naive_bayes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parsing-science/pymc3_models/1b8cca86a2ce05dfec3df81ea57d17c7defb51ed/notebooks/figures/naive_bayes/naive_bayes.png


--------------------------------------------------------------------------------
/pymc3_models/__init__.py:
--------------------------------------------------------------------------------
1 | from pymc3_models.models.HierarchicalLogisticRegression import HierarchicalLogisticRegression
2 | from pymc3_models.models.LinearRegression import LinearRegression
3 | from pymc3_models.models.LogisticRegression import LogisticRegression
4 | from pymc3_models.models.NaiveBayes import GaussianNaiveBayes
5 | 


--------------------------------------------------------------------------------
/pymc3_models/_version.py:
--------------------------------------------------------------------------------
1 | __version__ = '2.1.0'
2 | 


--------------------------------------------------------------------------------
/pymc3_models/exc.py:
--------------------------------------------------------------------------------
1 | class PyMC3ModelsError(Exception):
2 |     pass
3 | 


--------------------------------------------------------------------------------
/pymc3_models/models/HierarchicalLogisticRegression.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pymc3 as pm
  3 | from sklearn.metrics import accuracy_score
  4 | import theano
  5 | import theano.tensor as T
  6 | 
  7 | from pymc3_models.exc import PyMC3ModelsError
  8 | from pymc3_models.models import BayesianModel
  9 | 
 10 | 
 11 | class HierarchicalLogisticRegression(BayesianModel):
 12 |     """
 13 |     Custom Hierachical Logistic Regression built using PyMC3.
 14 |     """
 15 | 
 16 |     def __init__(self):
 17 |         super(HierarchicalLogisticRegression, self).__init__()
 18 |         self.num_cats = None
 19 | 
 20 |     def create_model(self):
 21 |         """
 22 |         Creates and returns the PyMC3 model.
 23 | 
 24 |         Note: The size of the shared variables must match the size of the training data.
 25 |         Otherwise, setting the shared variables later will raise an error.
 26 |         See http://docs.pymc.io/advanced_theano.html
 27 | 
 28 |         Returns
 29 |         -------
 30 |         the PyMC3 model
 31 |         """
 32 |         model_input = theano.shared(np.zeros([self.num_training_samples, self.num_pred]))
 33 | 
 34 |         model_output = theano.shared(np.zeros(self.num_training_samples, dtype='int'))
 35 | 
 36 |         model_cats = theano.shared(np.zeros(self.num_training_samples, dtype='int'))
 37 | 
 38 |         self.shared_vars = {
 39 |             'model_input': model_input,
 40 |             'model_output': model_output,
 41 |             'model_cats': model_cats
 42 |         }
 43 | 
 44 |         model = pm.Model()
 45 | 
 46 |         with model:
 47 |             mu_alpha = pm.Normal('mu_alpha', mu=0, sd=100)
 48 |             sigma_alpha = pm.HalfNormal('sigma_alpha', sd=100)
 49 | 
 50 |             mu_beta = pm.Normal('mu_beta', mu=0, sd=100)
 51 |             sigma_beta = pm.HalfNormal('sigma_beta', sd=100)
 52 | 
 53 |             alpha = pm.Normal('alpha', mu=mu_alpha, sd=sigma_alpha, shape=(self.num_cats,))
 54 |             betas = pm.Normal('beta', mu=mu_beta, sd=sigma_beta, shape=(self.num_cats, self.num_pred))
 55 | 
 56 |             c = model_cats
 57 | 
 58 |             temp = alpha[c] + T.sum(betas[c] * model_input, 1)
 59 | 
 60 |             p = pm.invlogit(temp)
 61 | 
 62 |             o = pm.Bernoulli('o', p, observed=model_output)
 63 | 
 64 |         return model
 65 | 
 66 |     def fit(
 67 |         self,
 68 |         X,
 69 |         y,
 70 |         cats,
 71 |         inference_type='advi',
 72 |         num_advi_sample_draws=10000,
 73 |         minibatch_size=None,
 74 |         inference_args=None
 75 |     ):
 76 |         """
 77 |         Train the Hierarchical Logistic Regression model
 78 | 
 79 |         Parameters
 80 |         ----------
 81 |         X : numpy array
 82 |             shape [num_training_samples, num_pred]
 83 | 
 84 |         y : numpy array
 85 |             shape [num_training_samples, ]
 86 | 
 87 |         cats : numpy array
 88 |             shape [num_training_samples, ]
 89 | 
 90 |         inference_type : str (defaults to 'advi')
 91 |             specifies which inference method to call
 92 |             Currently, only 'advi' and 'nuts' are supported.
 93 | 
 94 |         num_advi_sample_draws : int (defaults to 10000)
 95 |             Number of samples to draw from ADVI approximation after it has been fit;
 96 |             not used if inference_type != 'advi'
 97 | 
 98 |         minibatch_size : int (defaults to None)
 99 |             number of samples to include in each minibatch for ADVI
100 |             If None, minibatch is not run.
101 | 
102 |         inference_args : dict (defaults to None)
103 |             arguments to be passed to the inference methods
104 |             Check the PyMC3 docs for permissable values.
105 |             If None, default values will be set.
106 |         """
107 |         self.num_cats = len(np.unique(cats))
108 |         self.num_training_samples, self.num_pred = X.shape
109 | 
110 |         self.inference_type = inference_type
111 | 
112 |         if y.ndim != 1:
113 |             y = np.squeeze(y)
114 | 
115 |         if not inference_args:
116 |             inference_args = self._set_default_inference_args()
117 | 
118 |         if self.cached_model is None:
119 |             self.cached_model = self.create_model()
120 | 
121 |         if minibatch_size:
122 |             with self.cached_model:
123 |                 minibatches = {
124 |                     self.shared_vars['model_input']: pm.Minibatch(X, batch_size=minibatch_size),
125 |                     self.shared_vars['model_output']: pm.Minibatch(y, batch_size=minibatch_size),
126 |                     self.shared_vars['model_cats']: pm.Minibatch(cats, batch_size=minibatch_size)
127 |                 }
128 | 
129 |                 inference_args['more_replacements'] = minibatches
130 |         else:
131 |             self._set_shared_vars({
132 |                 'model_input': X,
133 |                 'model_output': y,
134 |                 'model_cats': cats
135 |             })
136 | 
137 |         self._inference(inference_type, inference_args, num_advi_sample_draws=num_advi_sample_draws)
138 | 
139 |         return self
140 | 
141 |     def predict_proba(self, X, cats, return_std=False, num_ppc_samples=2000):
142 |         """
143 |         Predicts probabilities of new data with a trained Hierarchical Logistic Regression
144 | 
145 |         Parameters
146 |         ----------
147 |         X : numpy array
148 |             shape [num_training_samples, num_pred]
149 | 
150 |         cats : numpy array
151 |             shape [num_training_samples, ]
152 | 
153 |         return_std : bool (defaults to False)
154 |             Flag of whether to return standard deviations with mean probabilities
155 | 
156 |         num_ppc_samples : int (defaults to 2000)
157 |             'samples' parameter passed to pm.sample_ppc
158 |         """
159 | 
160 |         if self.trace is None:
161 |             raise PyMC3ModelsError('Run fit on the model before predict.')
162 | 
163 |         num_samples = X.shape[0]
164 | 
165 |         if self.cached_model is None:
166 |             self.cached_model = self.create_model()
167 | 
168 |         self._set_shared_vars({
169 |             'model_input': X,
170 |             'model_output': np.zeros(num_samples, dtype='int'),
171 |             'model_cats': cats
172 |         })
173 | 
174 |         ppc = pm.sample_ppc(self.trace, model=self.cached_model, samples=num_ppc_samples)
175 | 
176 |         if return_std:
177 |             return ppc['o'].mean(axis=0), ppc['o'].std(axis=0)
178 |         else:
179 |             return ppc['o'].mean(axis=0)
180 | 
181 |     def predict(self, X, cats, num_ppc_samples=2000):
182 |         """
183 |         Predicts labels of new data with a trained model
184 | 
185 |         Parameters
186 |         ----------
187 |         X : numpy array
188 |             shape [num_training_samples, num_pred]
189 | 
190 |         cats : numpy array
191 |             shape [num_training_samples, ]
192 | 
193 |         num_ppc_samples : int (defaults to 2000)
194 |             'samples' parameter passed to pm.sample_ppc
195 |         """
196 |         ppc_mean = self.predict_proba(X, cats, num_ppc_samples=2000)
197 | 
198 |         pred = ppc_mean > 0.5
199 | 
200 |         return pred
201 | 
202 |     def score(self, X, y, cats, num_ppc_samples=2000):
203 |         """
204 |         Scores new data with a trained model with sklearn's accuracy_score.
205 | 
206 |         Parameters
207 |         ----------
208 |         X : numpy array
209 |             shape [num_training_samples, num_pred]
210 | 
211 |         y : numpy array
212 |             shape [num_training_samples, ]
213 | 
214 |         cats : numpy array
215 |             shape [num_training_samples, ]
216 | 
217 |         num_ppc_samples : int (defaults to 2000)
218 |             'samples' parameter passed to pm.sample_ppc
219 |         """
220 | 
221 |         return accuracy_score(y, self.predict(X, cats, num_ppc_samples=num_ppc_samples))
222 | 
223 |     def save(self, file_prefix):
224 |         params = {
225 |             'inference_type': self.inference_type,
226 |             'num_cats': self.num_cats,
227 |             'num_pred': self.num_pred,
228 |             'num_training_samples': self.num_training_samples
229 |         }
230 | 
231 |         super(HierarchicalLogisticRegression, self).save(file_prefix, params)
232 | 
233 |     def load(self, file_prefix):
234 |         params = super(HierarchicalLogisticRegression, self).load(file_prefix, load_custom_params=True)
235 | 
236 |         self.inference_type = params['inference_type']
237 |         self.num_cats = params['num_cats']
238 |         self.num_pred = params['num_pred']
239 |         self.num_training_samples = params['num_training_samples']
240 | 


--------------------------------------------------------------------------------
/pymc3_models/models/LinearRegression.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pymc3 as pm
  3 | from sklearn.metrics import r2_score
  4 | import theano
  5 | import theano.tensor as T
  6 | 
  7 | from pymc3_models.exc import PyMC3ModelsError
  8 | from pymc3_models.models import BayesianModel
  9 | 
 10 | 
 11 | class LinearRegression(BayesianModel):
 12 |     """
 13 |     Linear Regression built using PyMC3.
 14 |     """
 15 | 
 16 |     def __init__(self):
 17 |         super(LinearRegression, self).__init__()
 18 | 
 19 |     def create_model(self):
 20 |         """
 21 |         Creates and returns the PyMC3 model.
 22 | 
 23 |         Note: The size of the shared variables must match the size of the training data.
 24 |         Otherwise, setting the shared variables later will raise an error.
 25 |         See http://docs.pymc.io/advanced_theano.html
 26 | 
 27 |         Returns
 28 |         -------
 29 |         the PyMC3 model
 30 |         """
 31 |         model_input = theano.shared(np.zeros([self.num_training_samples, self.num_pred]))
 32 | 
 33 |         model_output = theano.shared(np.zeros(self.num_training_samples))
 34 | 
 35 |         self.shared_vars = {
 36 |             'model_input': model_input,
 37 |             'model_output': model_output,
 38 |         }
 39 | 
 40 |         model = pm.Model()
 41 | 
 42 |         with model:
 43 |             alpha = pm.Normal('alpha', mu=0, sd=100, shape=(1))
 44 |             betas = pm.Normal('betas', mu=0, sd=100, shape=(1, self.num_pred))
 45 | 
 46 |             s = pm.HalfNormal('s', tau=1)
 47 | 
 48 |             mean = alpha + T.sum(betas * model_input, 1)
 49 | 
 50 |             y = pm.Normal('y', mu=mean, sd=s, observed=model_output)
 51 | 
 52 |         return model
 53 | 
 54 |     def fit(
 55 |         self,
 56 |         X,
 57 |         y,
 58 |         inference_type='advi',
 59 |         num_advi_sample_draws=10000,
 60 |         minibatch_size=None,
 61 |         inference_args=None,
 62 |     ):
 63 |         """
 64 |         Train the Linear Regression model
 65 | 
 66 |         Parameters
 67 |         ----------
 68 |         X : numpy array
 69 |             shape [num_training_samples, num_pred]
 70 | 
 71 |         y : numpy array
 72 |             shape [num_training_samples, ]
 73 | 
 74 |         inference_type : str (defaults to 'advi')
 75 |             specifies which inference method to call
 76 |             Currently, only 'advi' and 'nuts' are supported.
 77 | 
 78 |         num_advi_sample_draws : int (defaults to 10000)
 79 |             Number of samples to draw from ADVI approximation after it has been fit;
 80 |             not used if inference_type != 'advi'
 81 | 
 82 |         minibatch_size : int (defaults to None)
 83 |             number of samples to include in each minibatch for ADVI
 84 |             If None, minibatch is not run.
 85 | 
 86 |         inference_args : dict (defaults to None)
 87 |             arguments to be passed to the inference methods.
 88 |             Check the PyMC3 docs for permissable values.
 89 |             If None, default values will be set.
 90 |         """
 91 |         self.num_training_samples, self.num_pred = X.shape
 92 | 
 93 |         self.inference_type = inference_type
 94 | 
 95 |         if y.ndim != 1:
 96 |             y = np.squeeze(y)
 97 | 
 98 |         if not inference_args:
 99 |             inference_args = self._set_default_inference_args()
100 | 
101 |         if self.cached_model is None:
102 |             self.cached_model = self.create_model()
103 | 
104 |         if minibatch_size:
105 |             with self.cached_model:
106 |                 minibatches = {
107 |                     self.shared_vars['model_input']: pm.Minibatch(X, batch_size=minibatch_size),
108 |                     self.shared_vars['model_output']: pm.Minibatch(y, batch_size=minibatch_size),
109 |                 }
110 | 
111 |                 inference_args['more_replacements'] = minibatches
112 |         else:
113 |             self._set_shared_vars({'model_input': X, 'model_output': y})
114 | 
115 |         self._inference(inference_type, inference_args, num_advi_sample_draws=num_advi_sample_draws)
116 | 
117 |         return self
118 | 
119 |     def predict(self, X, return_std=False, num_ppc_samples=2000):
120 |         """
121 |         Predicts values of new data with a trained Linear Regression model
122 | 
123 |         Parameters
124 |         ----------
125 |         X : numpy array
126 |             shape [num_training_samples, num_pred]
127 | 
128 |         return_std : bool (defaults to False)
129 |             flag of whether to return standard deviations with mean values
130 | 
131 |         num_ppc_samples : int (defaults to 2000)
132 |             'samples' parameter passed to pm.sample_ppc
133 |         """
134 | 
135 |         if self.trace is None:
136 |             raise PyMC3ModelsError('Run fit on the model before predict.')
137 | 
138 |         num_samples = X.shape[0]
139 | 
140 |         if self.cached_model is None:
141 |             self.cached_model = self.create_model()
142 | 
143 |         self._set_shared_vars({'model_input': X, 'model_output': np.zeros(num_samples)})
144 | 
145 |         ppc = pm.sample_ppc(self.trace, model=self.cached_model, samples=num_ppc_samples)
146 | 
147 |         if return_std:
148 |             return ppc['y'].mean(axis=0), ppc['y'].std(axis=0)
149 |         else:
150 |             return ppc['y'].mean(axis=0)
151 | 
152 |     def score(self, X, y, num_ppc_samples=2000):
153 |         """
154 |         Scores new data with a trained model with sklearn's r2_score.
155 | 
156 |         Parameters
157 |         ----------
158 |         X : numpy array
159 |             shape [num_training_samples, num_pred]
160 | 
161 |         y : numpy array
162 |             shape [num_training_samples, ]
163 | 
164 |         num_ppc_samples : int (defaults to 2000)
165 |             'samples' parameter passed to pm.sample_ppc
166 |         """
167 | 
168 |         return r2_score(y, self.predict(X, num_ppc_samples=num_ppc_samples))
169 | 
170 |     def save(self, file_prefix):
171 |         params = {
172 |             'inference_type': self.inference_type,
173 |             'num_pred': self.num_pred,
174 |             'num_training_samples': self.num_training_samples
175 |         }
176 | 
177 |         super(LinearRegression, self).save(file_prefix, params)
178 | 
179 |     def load(self, file_prefix):
180 |         params = super(LinearRegression, self).load(file_prefix, load_custom_params=True)
181 | 
182 |         self.inference_type = params['inference_type']
183 |         self.num_pred = params['num_pred']
184 |         self.num_training_samples = params['num_training_samples']
185 | 


--------------------------------------------------------------------------------
/pymc3_models/models/LogisticRegression.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pymc3 as pm
  3 | from sklearn.metrics import accuracy_score
  4 | import theano
  5 | import theano.tensor as T
  6 | 
  7 | from pymc3_models.exc import PyMC3ModelsError
  8 | from pymc3_models.models import BayesianModel
  9 | 
 10 | 
 11 | class LogisticRegression(BayesianModel):
 12 |     """
 13 |     Logistic Regression built using PyMC3.
 14 |     """
 15 | 
 16 |     def __init__(self):
 17 |         super(LogisticRegression, self).__init__()
 18 | 
 19 |     def create_model(self):
 20 |         """
 21 |         Creates and returns the PyMC3 model.
 22 | 
 23 |         Note: The size of the shared variables must match the size of the training data.
 24 |         Otherwise, setting the shared variables later will raise an error.
 25 |         See http://docs.pymc.io/advanced_theano.html
 26 | 
 27 |         Returns
 28 |         -------
 29 |         the PyMC3 model
 30 |         """
 31 |         model_input = theano.shared(np.zeros([self.num_training_samples, self.num_pred]))
 32 | 
 33 |         model_output = theano.shared(np.zeros(self.num_training_samples, dtype='int'))
 34 | 
 35 |         self.shared_vars = {
 36 |             'model_input': model_input,
 37 |             'model_output': model_output,
 38 |         }
 39 | 
 40 |         model = pm.Model()
 41 | 
 42 |         with model:
 43 |             alpha = pm.Normal('alpha', mu=0, sd=100, shape=(1))
 44 |             betas = pm.Normal('betas', mu=0, sd=100, shape=(1, self.num_pred))
 45 | 
 46 |             temp = alpha + T.sum(betas * model_input, 1)
 47 | 
 48 |             p = pm.invlogit(temp)
 49 | 
 50 |             o = pm.Bernoulli('o', p, observed=model_output)
 51 | 
 52 |         return model
 53 | 
 54 |     def fit(
 55 |         self,
 56 |         X,
 57 |         y,
 58 |         inference_type='advi',
 59 |         num_advi_sample_draws=10000,
 60 |         minibatch_size=None,
 61 |         inference_args=None
 62 |     ):
 63 |         """
 64 |         Train the Logistic Regression model
 65 | 
 66 |         Parameters
 67 |         ----------
 68 |         X : numpy array
 69 |             shape [num_training_samples, num_pred]
 70 | 
 71 |         y : numpy array
 72 |             shape [num_training_samples, ]
 73 | 
 74 |         inference_type : str (defaults to 'advi')
 75 |             specifies which inference method to call
 76 |             Currently, only 'advi' and 'nuts' are supported.
 77 | 
 78 |         num_advi_sample_draws : int (defaults to 10000)
 79 |             Number of samples to draw from ADVI approximation after it has been fit;
 80 |             not used if inference_type != 'advi'
 81 | 
 82 |         minibatch_size : int (defaults to None)
 83 |             number of samples to include in each minibatch for ADVI
 84 |             If None, minibatch is not run.
 85 | 
 86 |         inference_args : dict (defaults to None)
 87 |             arguments to be passed to the inference methods.
 88 |             Check the PyMC3 docs for permissable values.
 89 |             If None, default values will be set.
 90 |         """
 91 |         self.num_training_samples, self.num_pred = X.shape
 92 | 
 93 |         self.inference_type = inference_type
 94 | 
 95 |         if y.ndim != 1:
 96 |             y = np.squeeze(y)
 97 | 
 98 |         if not inference_args:
 99 |             inference_args = self._set_default_inference_args()
100 | 
101 |         if self.cached_model is None:
102 |             self.cached_model = self.create_model()
103 | 
104 |         if minibatch_size:
105 |             with self.cached_model:
106 |                 minibatches = {
107 |                     self.shared_vars['model_input']: pm.Minibatch(X, batch_size=minibatch_size),
108 |                     self.shared_vars['model_output']: pm.Minibatch(y, batch_size=minibatch_size),
109 |                 }
110 | 
111 |                 inference_args['more_replacements'] = minibatches
112 |         else:
113 |             self._set_shared_vars({'model_input': X, 'model_output': y})
114 | 
115 |         self._inference(inference_type, inference_args, num_advi_sample_draws=num_advi_sample_draws)
116 | 
117 |         return self
118 | 
119 |     def predict_proba(self, X, return_std=False, num_ppc_samples=2000):
120 |         """
121 |         Predicts probabilities of new data with a trained Logistic Regression
122 | 
123 |         Parameters
124 |         ----------
125 |         X : numpy array
126 |             shape [num_training_samples, num_pred]
127 | 
128 |         return_std : bool (defaults to False)
129 |             Flag of whether to return standard deviations with mean probabilities
130 | 
131 |         num_ppc_samples : int (defaults to 2000)
132 |             'samples' parameter passed to pm.sample_ppc
133 |         """
134 | 
135 |         if self.trace is None:
136 |             raise PyMC3ModelsError('Run fit on the model before predict.')
137 | 
138 |         num_samples = X.shape[0]
139 | 
140 |         if self.cached_model is None:
141 |             self.cached_model = self.create_model()
142 | 
143 |         self._set_shared_vars({
144 |             'model_input': X,
145 |             'model_output': np.zeros(num_samples, dtype='int')
146 |         })
147 | 
148 |         ppc = pm.sample_ppc(self.trace, model=self.cached_model, samples=num_ppc_samples)
149 | 
150 |         if return_std:
151 |             return ppc['o'].mean(axis=0), ppc['o'].std(axis=0)
152 |         else:
153 |             return ppc['o'].mean(axis=0)
154 | 
155 |     def predict(self, X, num_ppc_samples=2000):
156 |         """
157 |         Predicts labels of new data with a trained model
158 | 
159 |         Parameters
160 |         ----------
161 |         X : numpy array
162 |             shape [num_training_samples, num_pred]
163 | 
164 |         num_ppc_samples : int (defaults to 2000)
165 |             'samples' parameter passed to pm.sample_ppc
166 |         """
167 |         ppc_mean = self.predict_proba(X, num_ppc_samples=num_ppc_samples)
168 | 
169 |         pred = ppc_mean > 0.5
170 | 
171 |         return pred
172 | 
173 |     def score(self, X, y, num_ppc_samples=2000):
174 |         """
175 |         Scores new data with a trained model with sklearn's accuracy_score.
176 | 
177 |         Parameters
178 |         ----------
179 |         X : numpy array
180 |             shape [num_training_samples, num_pred]
181 | 
182 |         y : numpy array
183 |             shape [num_training_samples, ]
184 | 
185 |         num_ppc_samples : int (defaults to 2000)
186 |             'samples' parameter passed to pm.sample_ppc
187 |         """
188 | 
189 |         return accuracy_score(y, self.predict(X, num_ppc_samples=num_ppc_samples))
190 | 
191 |     def save(self, file_prefix):
192 |         params = {
193 |             'inference_type': self.inference_type,
194 |             'num_pred': self.num_pred,
195 |             'num_training_samples': self.num_training_samples
196 |         }
197 | 
198 |         super(LogisticRegression, self).save(file_prefix, params)
199 | 
200 |     def load(self, file_prefix):
201 |         params = super(LogisticRegression, self).load(file_prefix, load_custom_params=True)
202 | 
203 |         self.inference_type = params['inference_type']
204 |         self.num_pred = params['num_pred']
205 |         self.num_training_samples = params['num_training_samples']
206 | 


--------------------------------------------------------------------------------
/pymc3_models/models/NaiveBayes.py:
--------------------------------------------------------------------------------
  1 | import functools as ft
  2 | 
  3 | import numpy as np
  4 | import pymc3 as pm
  5 | import scipy.stats
  6 | from sklearn.metrics import accuracy_score
  7 | import theano
  8 | 
  9 | from pymc3_models.exc import PyMC3ModelsError
 10 | from pymc3_models.models import BayesianModel
 11 | from pymc3_models.utils import normalize
 12 | 
 13 | 
 14 | class GaussianNaiveBayes(BayesianModel):
 15 |     """
 16 |     Naive Bayes classification built using PyMC3.
 17 | 
 18 |     The Gaussian Naive Bayes algorithm assumes that the random variables
 19 |     that describe each class and each feature are independent and distributed
 20 |     according to Normal distributions.
 21 | 
 22 |     Example
 23 |     -------
 24 |     >>> import pymc3_models as pmo
 25 |     >>>
 26 |     >>> model = pmo.GaussianNaiveBayes()
 27 |     >>> model.fit(X,y)
 28 |     >>> model.predict_proba(X)
 29 |     >>> model.predict(X)
 30 | 
 31 |     See the documentation of the `create_model` method for details on the model
 32 |     itself.
 33 |     """
 34 | 
 35 |     def __init__(self):
 36 |         super(GaussianNaiveBayes, self).__init__()
 37 | 
 38 |     def create_model(self):
 39 |         """
 40 |         Creates and returns the PyMC3 model.
 41 | 
 42 |         We note :math:`x_{jc}` the value of the j-th element of the data vector :math:`x`
 43 |         conditioned on x belonging to the class :math:`c`. The Gaussian Naive Bayes
 44 |         algorithm models :math:`x_{jc}` as:
 45 | 
 46 |         .. math::
 47 | 
 48 |             x_{jc} \\sim Normal(\\mu_{jc}, \\sigma_{jc})
 49 | 
 50 |         While the probability that :math:`x` belongs to the class :math:`c` is given by the
 51 |         categorical distribution:
 52 | 
 53 |         .. math::
 54 | 
 55 |             P(y=c|x_i) = Cat(\\pi_1, \\dots, \\pi_C)
 56 | 
 57 |         where :math:`\\pi_i` is the probability that a vector belongs to category :math:`i`.
 58 | 
 59 |         We assume that the :math:`\\pi_i` follow a Dirichlet distribution:
 60 | 
 61 |         .. math::
 62 | 
 63 |             \\pi \\sim Dirichlet(\\alpha)
 64 | 
 65 |         with hyperparameter :math:`\\alpha = [1, .., 1]`. The :math:`\\mu_{jc}`
 66 |         are sampled from a Normal distribution centred on :math:`0` with
 67 |         variance :math:`100`, and the :math:`\\sigma_{jc}` are sampled from a
 68 |         HalfNormal distribuion of variance :math:`100`:
 69 | 
 70 |         .. math::
 71 | 
 72 |             \\mu_{jc} \\sim Normal(0, 100)
 73 | 
 74 |             \\sigma_{jc} \\sim HalfNormal(100)
 75 | 
 76 |         Note that the Gaussian Naive Bayes model is equivalent to a Gaussian
 77 |         mixture with a diagonal covariance [1].
 78 | 
 79 |         Returns
 80 |         -------
 81 |         A PyMC3 model
 82 | 
 83 |         References
 84 |         ----------
 85 |         .. [1] Murphy, K. P. (2012). Machine learning: a probabilistic perspective.
 86 |         """
 87 | 
 88 |         # The data
 89 |         X = theano.shared(np.zeros((self.num_training_samples, self.num_pred)))
 90 |         y = theano.shared(np.zeros(self.num_training_samples, dtype=int))
 91 | 
 92 |         self.shared_vars = {
 93 |             'model_input': X,
 94 |             'model_output': y
 95 |         }
 96 | 
 97 |         model = pm.Model()
 98 |         with model:
 99 |             # Priors
100 |             alpha = np.ones(self.num_cats)
101 |             pi = pm.Dirichlet('pi', alpha, shape=self.num_cats)
102 |             mu = pm.Normal('mu', mu=0, sd=100, shape=(self.num_cats, self.num_pred))
103 |             sigma = pm.HalfNormal('sigma', 100, shape=(self.num_cats, self.num_pred))
104 | 
105 |             # Assign classes to data points
106 |             z = pm.Categorical('z', pi, shape=self.num_training_samples, observed=y)
107 | 
108 |             # The components are independent and normally distributed
109 |             xi = pm.Normal('xi', mu=mu[z], sd=sigma[z], observed=X)
110 | 
111 |         return model
112 | 
113 |     def fit(
114 |         self,
115 |         X,
116 |         y,
117 |         inference_type='advi',
118 |         num_advi_sample_draws=10000,
119 |         minibatch_size=None,
120 |         inference_args=None
121 |     ):
122 |         """
123 |         Train the Naive Bayes model.
124 | 
125 |         Parameters
126 |         ----------
127 |         X : numpy array
128 |             shape [num_training_samples, num_pred]. Contains the data points
129 | 
130 |         y : numpy array
131 |             shape [num_training_samples,]. Contains the category of the data points
132 | 
133 |         inference_type : str (defaults to 'advi')
134 |             specifies which inference method to call
135 |             Currently, only 'advi' and 'nuts' are supported.
136 | 
137 |         num_advi_sample_draws : int (defaults to 10000)
138 |             Number of samples to draw from ADVI approximation after it has been fit;
139 |             not used if inference_type != 'advi'
140 | 
141 |         minibatch_size : int (defaults to None)
142 |             number of samples to include in each minibatch for ADVI
143 |             If None, minibatch is not run.
144 | 
145 |         inference_args : dict (defaults to None)
146 |             arguments to be passed to the inference methods
147 |             Check the PyMC3 docs for permissable values.
148 |             If None, default values will be set.
149 | 
150 |         Returns
151 |         -------
152 |         The current instance of the GaussianNaiveBayes class.
153 |         """
154 |         self.num_training_samples, self.num_pred = X.shape
155 |         self.num_cats = len(np.unique(y))
156 |         self.inference_type = inference_type
157 | 
158 |         if not inference_args:
159 |             inference_args = self._set_default_inference_args()
160 | 
161 |         if not self.cached_model:
162 |             self.cached_model = self.create_model()
163 | 
164 |         if minibatch_size:
165 |             with self.cached_model:
166 |                 minibatches = {
167 |                     self.shared_vars['model_input']: pm.Minibatch(X, batch_size=minibatch_size),
168 |                     self.shared_vars['model_output']: pm.Minibatch(y, batch_size=minibatch_size),
169 |                 }
170 | 
171 |                 inference_args['more_replacements'] = minibatches
172 |         else:
173 |             self._set_shared_vars({'model_input': X, 'model_output': y})
174 | 
175 |         self._inference(inference_type, inference_args, num_advi_sample_draws=num_advi_sample_draws)
176 | 
177 |         return self
178 | 
179 |     def predict_proba(self, X):
180 |         """
181 |         Predicts the probabilities that the data points belong to each category.
182 | 
183 |         Given a new data point :math:`\\vec{x}`, we want to estimate the probability that
184 |         it belongs to a category :math:`c`. Following the notations in [1], the probability
185 |         reads:
186 | 
187 |         .. math::
188 | 
189 |             P(y=c|\\vec{x}, \\mathcal{D}) = P(y=c|\\mathcal{D}) \\prod_{j=1}^{n_{dims}} \\
190 |                                             P(x_j|y=c, \\mathcal{D})
191 | 
192 |         We previously used the data :math:`\\mathcal{D}` to estimate the
193 |         distribution of the parameters :math:`\\vec{\\mu}`, :math:`\\vec{\\pi}`
194 |         and :math:`\\vec{\\sigma}`. To compute the above probability, we need
195 |         to integrate over the values of these parameters:
196 | 
197 |         .. math::
198 | 
199 |             P(y=c|\\vec{x}, \\mathcal{D}) = \\left[\\int Cat(y=c|\\vec{\\pi})P(\\vec{\\pi}|\\
200 |                                             \\mathcal{D})\\mathrm{d}\\vec{\\pi}\\right]
201 |                                             \\int P(\\vec{x}|\\vec{\\mu}, \\vec{\\sigma})\\
202 |                                             P(\\vec{\\mu}|\\mathcal{D})\\
203 |                                             P(\\vec{\\sigma}|\\mathcal{D})\\
204 |                                             \\mathrm{d}\\vec{\\mu}\\mathrm{d}\\vec{\\sigma}
205 | 
206 |         Parameters
207 |         ----------
208 |         X : numpy array
209 |             shape [num_training_samples, num_pred]. Contains the points
210 |             for which we want to predict the class
211 | 
212 |         Returns
213 |         -------
214 |         A numpy array of shape [num_training_samples, num_cats] that contains the probabilities
215 |         that each sample belong to each category.
216 | 
217 |         References
218 |         ----------
219 |         .. [1] Murphy, K. P. (2012). Machine learning: a probabilistic perspective.
220 |         """
221 | 
222 |         if self.trace is None:
223 |             raise PyMC3ModelsError('Run fit on the model before predict')
224 | 
225 |         posterior_prediction = np.array([])
226 |         for x in X:
227 |             prob_per_sample = scipy.stats.norm(self.trace['mu'], self.trace['sigma']).pdf(x)
228 |             prob_per_feature = [
229 |                 np.sum(prob_per_sample[:, :, i], axis=0)/len(self.trace['mu'])
230 |                 for i in range(self.num_pred)
231 |             ]
232 |             prob_per_class = normalize(ft.reduce(lambda x, y: x*y, prob_per_feature))
233 |             if len(posterior_prediction) == 0:
234 |                 posterior_prediction = prob_per_class
235 |             else:
236 |                 posterior_prediction = np.vstack((posterior_prediction, prob_per_class))
237 | 
238 |         return posterior_prediction
239 | 
240 |     def predict(self, X):
241 |         """
242 |         Classify new data with a trained Naive Bayes model. The output is the point
243 |         estimate of the posterior predictive distribution that corresponds to the
244 |         one-hot loss function.
245 | 
246 |         Parameters
247 |         ----------
248 |         X : numpy array
249 |             shape [num_training_samples, num_pred]. Contains the data
250 |             to classify
251 | 
252 |         Returns
253 |         -------
254 |         A numpy array of shape [num_training_samples,] that contains the predicted class to
255 |         which the data points belong.
256 |         """
257 |         proba = self.predict_proba(X)
258 |         predictions = np.argmax(proba, axis=1)
259 |         return predictions
260 | 
261 |     def score(self, X, y):
262 |         """
263 |         Scores new data with a trained model with sklearn's accuracy_score.
264 | 
265 |         Parameters
266 |         ----------
267 |         X : numpy array
268 |             shape [num_training_samples, num_pred]. Contains the data points
269 | 
270 |         y : numpy array
271 |             shape [num_training_samples,]. Contains the category of the data points
272 | 
273 |         Returns
274 |         -------
275 |         A float representing the accuracy score of the predictions.
276 |         """
277 | 
278 |         return accuracy_score(y, self.predict(X))
279 | 
280 |     def save(self, file_prefix):
281 |         params = {
282 |             'inference_type': self.inference_type,
283 |             'num_cats': self.num_cats,
284 |             'num_pred': self.num_pred,
285 |             'num_training_samples': self.num_training_samples
286 |         }
287 |         super(GaussianNaiveBayes, self).save(file_prefix, params)
288 | 
289 |     def load(self, file_profile):
290 |         params = super(GaussianNaiveBayes, self).load(file_profile, load_custom_params=True)
291 | 
292 |         self.inference_type = params['inference_type']
293 |         self.num_cats = params['num_cats']
294 |         self.num_pred = params['num_pred']
295 |         self.num_training_samples = params['num_training_samples']
296 | 


--------------------------------------------------------------------------------
/pymc3_models/models/__init__.py:
--------------------------------------------------------------------------------
  1 | import joblib
  2 | import matplotlib.pyplot as plt
  3 | import pymc3 as pm
  4 | import seaborn as sns
  5 | from sklearn.base import BaseEstimator
  6 | 
  7 | from pymc3_models.exc import PyMC3ModelsError
  8 | 
  9 | 
 10 | class BayesianModel(BaseEstimator):
 11 |     """
 12 |     Bayesian model base class
 13 |     """
 14 |     def __init__(self):
 15 |         self.cached_model = None
 16 |         self.inference_type = None
 17 |         self.num_pred = None
 18 |         self.shared_vars = None
 19 |         self.summary = None
 20 |         self.trace = None
 21 | 
 22 |     def create_model(self):
 23 |         raise NotImplementedError
 24 | 
 25 |     def _set_shared_vars(self, shared_vars):
 26 |         """
 27 |         Sets theano shared variables for the PyMC3 model.
 28 |         """
 29 |         for key in shared_vars.keys():
 30 |             self.shared_vars[key].set_value(shared_vars[key])
 31 | 
 32 |     def _inference(self, inference_type='advi', inference_args=None, num_advi_sample_draws=10000):
 33 |         """
 34 |         Calls internal methods for two types of inferences.
 35 |         Raises an error if the inference_type is not supported.
 36 | 
 37 |         Parameters
 38 |         ----------
 39 |         inference_type : str (defaults to 'advi')
 40 |             specifies which inference method to call
 41 |             Currently, only 'advi' and 'nuts' are supported.
 42 | 
 43 |         inference_args : dict (defaults to None)
 44 |             arguments to be passed to the inference methods
 45 |             Check the PyMC3 docs to see what is permitted.
 46 | 
 47 |         num_advi_sample_draws : int (defaults to 10000)
 48 |             Number of samples to draw from ADVI approximation after it has been fit;
 49 |             not used if inference_type != 'advi'
 50 |         """
 51 |         if inference_type == 'advi':
 52 |             self._advi_inference(inference_args, num_advi_sample_draws=num_advi_sample_draws)
 53 |         elif inference_type == 'nuts':
 54 |             self._nuts_inference(inference_args)
 55 |         else:
 56 |             raise PyMC3ModelsError('{} is not a supported type of inference'.format(inference_type))
 57 | 
 58 |     def _advi_inference(self, inference_args, num_advi_sample_draws):
 59 |         """
 60 |         Runs variational ADVI and then samples from those results.
 61 | 
 62 |         Parameters
 63 |         ----------
 64 |         inference_args : dict
 65 |             arguments to be passed to the PyMC3 fit method
 66 |             See PyMC3 doc for permissible values.
 67 | 
 68 |         num_advi_sample_draws : int
 69 |             Number of samples to draw from ADVI approximation after it has been fit
 70 |         """
 71 |         with self.cached_model:
 72 |             inference = pm.ADVI()
 73 |             approx = pm.fit(method=inference, **inference_args)
 74 | 
 75 |         self.approx = approx
 76 |         self.trace = approx.sample(draws=num_advi_sample_draws)
 77 |         self.summary = pm.summary(self.trace)
 78 |         self.advi_hist = inference.hist
 79 | 
 80 |     def _nuts_inference(self, inference_args):
 81 |         """
 82 |         Runs NUTS inference.
 83 | 
 84 |         Parameters
 85 |         ----------
 86 |         inference_args : dict
 87 |             arguments to be passed to the PyMC3 sample method
 88 |             See PyMC3 doc for permissible values.
 89 |         """
 90 |         with self.cached_model:
 91 |             step = pm.NUTS()
 92 |             nuts_trace = pm.sample(step=step, **inference_args)
 93 | 
 94 |         self.trace = nuts_trace
 95 |         self.summary = pm.summary(self.trace)
 96 | 
 97 |     def _set_default_inference_args(self):
 98 |         """
 99 |         Set default values for inference arguments if none are provided, dependent on inference type.
100 | 
101 |         ADVI Default Parameters
102 |         -----------------------
103 |         callbacks : list
104 |             contains a parameter stopping check.
105 | 
106 |         n : int (defaults to 200000)
107 |             number of iterations for ADVI fit
108 | 
109 |         NUTS Default Parameters
110 |         -----------------------
111 |         draws : int (defaults to 2000)
112 |             number of samples to draw
113 |         """
114 |         if self.inference_type == 'advi':
115 |             inference_args = {
116 |                 'n': 200000,
117 |                 'callbacks': [pm.callbacks.CheckParametersConvergence()]
118 |             }
119 |         elif self.inference_type == 'nuts':
120 |             inference_args = {
121 |                 'draws': 2000
122 |             }
123 |         else:
124 |             inference_args = None
125 | 
126 |         return inference_args
127 | 
128 |     def fit(self):
129 |         raise NotImplementedError
130 | 
131 |     def predict(self):
132 |         raise NotImplementedError
133 | 
134 |     def score(self):
135 |         raise NotImplementedError
136 | 
137 |     def save(self, file_prefix, custom_params=None):
138 |         """
139 |         Saves the trace and custom params to files with the given file_prefix.
140 | 
141 |         Parameters
142 |         ----------
143 |         file_prefix : str
144 |             path and prefix used to identify where to save the trace for this model,
145 |             e.g. given file_prefix = 'path/to/file/'
146 |             This will attempt to save to 'path/to/file/trace.pickle'.
147 | 
148 |         custom_params : dict (defaults to None)
149 |             Custom parameters to save
150 |         """
151 |         fileObject = open(file_prefix + 'trace.pickle', 'wb')
152 |         joblib.dump(self.trace, fileObject)
153 |         fileObject.close()
154 | 
155 |         if custom_params:
156 |             fileObject = open(file_prefix + 'params.pickle', 'wb')
157 |             joblib.dump(custom_params, fileObject)
158 |             fileObject.close()
159 | 
160 |     def load(self, file_prefix, load_custom_params=False):
161 |         """
162 |         Loads a saved version of the trace, and custom param files with the given file_prefix.
163 | 
164 |         Parameters
165 |         ----------
166 |         file_prefix : str
167 |             path and prefix used to identify where to load the saved trace for this model,
168 |             e.g. given file_prefix = 'path/to/file/'
169 |             This will attempt to load 'path/to/file/trace.pickle'.
170 | 
171 |         load_custom_params : bool (defaults to False)
172 |             flag to indicate whether custom parameters should be loaded
173 | 
174 |         Returns
175 |         ----------
176 |         custom_params : Dictionary of custom parameters
177 |         """
178 |         self.trace = joblib.load(file_prefix + 'trace.pickle')
179 | 
180 |         custom_params = None
181 |         if load_custom_params:
182 |             custom_params = joblib.load(file_prefix + 'params.pickle')
183 | 
184 |         return custom_params
185 | 
186 |     def plot_elbo(self):
187 |         """
188 |         Plot the ELBO values after running ADVI minibatch.
189 |         """
190 |         if self.inference_type != 'advi':
191 |             raise PyMC3ModelsError(
192 |                 'This method should only be called after calling fit with ADVI minibatch.'
193 |             )
194 | 
195 |         sns.set_style('white')
196 |         plt.plot(-self.advi_hist)
197 |         plt.ylabel('ELBO')
198 |         plt.xlabel('iteration')
199 |         sns.despine()
200 | 


--------------------------------------------------------------------------------
/pymc3_models/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def normalize(array):
 5 |     """
 6 |     Normalize values in the array to get probabilities.
 7 | 
 8 |     Parameters
 9 |     ----------
10 |     array : numpy array of shape [1,]
11 | 
12 |     Returns
13 |     -------
14 |     A normalized array
15 |     """
16 |     return array/np.sum(array)
17 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
 1 | flake8
 2 | joblib>=0.11
 3 | matplotlib>=2.1.1
 4 | numpy>=1.13.1
 5 | pandas>=0.21.1
 6 | pymc3>=3.4.1
 7 | pytest
 8 | scikit-learn>=0.19.1
 9 | scipy>=1.0.0
10 | seaborn>=0.8.1
11 | sphinx-autobuild>=0.7.1
12 | sphinx-rtd-theme>=0.2.4
13 | sphinx>=1.5.


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | joblib>=0.11
 2 | matplotlib>=2.1.1
 3 | numpy>=1.13.1
 4 | numpydoc>=0.7.0
 5 | pandas>=0.21.1
 6 | pymc3>=3.4.1
 7 | scikit-learn>=0.19.1
 8 | scipy>=1.0.0
 9 | seaborn>=0.8.1
10 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | max-line-length = 110
 3 | application-import-names = pymc3_models
 4 | import-order-style = appnexus
 5 | exclude =
 6 |     pymc3_models/__init__.py,
 7 |     setup.py,
 8 | ignore =
 9 |     # https://pep257.readthedocs.io/en/latest/error_codes.html
10 | 
11 |     # Missing docstrings in certain cases
12 |     D100
13 |     D103
14 |     D104
15 |     D107
16 | 
17 |     # 1 blank line required between summary line and description
18 |     D204
19 |     D205
20 | 
21 |     # First line should not end with a period.
22 |     D400
23 | 
24 |     # No blank lines allowed after function docstring
25 |     D202
26 | 
27 |     # Missing blank line after last section
28 |     D413
29 | 
30 |     # Line break before and after binary operator
31 |     W503
32 |     W504
33 | 
34 |     # Missing whitespace around arithmetic operator
35 |     E226
36 | 
37 |     # First line should be in imperative mood
38 |     D401
39 | 
40 |     # Local variable is assigned but never used
41 |     F841
42 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | with open('AUTHORS.txt') as a:
 4 |     # reSt-ify the authors list
 5 |     authors = ''
 6 |     for author in a.read().split('\n'):
 7 |         authors += '| '+author+'\n'
 8 | 
 9 | with open('pymc3_models/_version.py') as version_file:
10 |     exec(version_file.read())
11 | 
12 | with open('README.md') as r:
13 |     readme = r.read()
14 | 
15 | 
16 | setup(
17 |     name='pymc3_models',
18 |     version=__version__,
19 |     description='Custom PyMC3 models built on top of the scikit-learn API',
20 |     long_description=readme,
21 |     long_description_content_type='text/markdown',
22 |     author='Nicole Carlson',
23 |     author_email='nicole@parsingscience.com',
24 |     url='https://github.com/parsing-science/pymc3_models',
25 |     packages=find_packages(),
26 |     package_data={'docs': ['*']},
27 |     include_package_data=True,
28 |     zip_safe=False,
29 |     install_requires=[
30 |         'joblib',
31 |         'matplotlib',
32 |         'numpy',
33 |         'pandas>=0.19',
34 |         'pymc3>=3.3',
35 |         'scipy',
36 |         'seaborn',
37 |         'sklearn'
38 |     ],
39 |     classifiers=[
40 |         'License :: OSI Approved :: Apache Software License',
41 |         'Programming Language :: Python :: 2.7',
42 |         'Programming Language :: Python :: 3.4'
43 |     ]
44 | )
45 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parsing-science/pymc3_models/1b8cca86a2ce05dfec3df81ea57d17c7defb51ed/tests/__init__.py


--------------------------------------------------------------------------------
/tests/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parsing-science/pymc3_models/1b8cca86a2ce05dfec3df81ea57d17c7defb51ed/tests/models/__init__.py


--------------------------------------------------------------------------------
/tests/models/test_BayesianModel.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from pymc3_models.models import BayesianModel
 4 | 
 5 | 
 6 | class BayesianModelTestCase(unittest.TestCase):
 7 |     def test_create_model_raises_not_implemented_error(self):
 8 |         with self.assertRaises(NotImplementedError):
 9 |             BM = BayesianModel()
10 |             BM.create_model()
11 | 
12 |     def test_fit_raises_not_implemented_error(self):
13 |         with self.assertRaises(NotImplementedError):
14 |             BM = BayesianModel()
15 |             BM.fit()
16 | 
17 |     def test_predict_raises_not_implemented_error(self):
18 |         with self.assertRaises(NotImplementedError):
19 |             BM = BayesianModel()
20 |             BM.predict()
21 | 
22 |     def test_score_raises_not_implemented_error(self):
23 |         with self.assertRaises(NotImplementedError):
24 |             BM = BayesianModel()
25 |             BM.score()
26 | 


--------------------------------------------------------------------------------
/tests/models/test_HierarchicalLogisticRegression.py:
--------------------------------------------------------------------------------
  1 | import shutil
  2 | import tempfile
  3 | import unittest
  4 | 
  5 | import numpy as np
  6 | import pandas as pd
  7 | import pymc3 as pm
  8 | from pymc3 import summary
  9 | from sklearn.model_selection import train_test_split
 10 | 
 11 | from pymc3_models.exc import PyMC3ModelsError
 12 | from pymc3_models import HierarchicalLogisticRegression
 13 | 
 14 | 
 15 | class HierarchicalLogisticRegressionTestCase(unittest.TestCase):
 16 |     def setUp(self):
 17 |         def numpy_invlogit(x):
 18 |             return 1 / (1 + np.exp(-x))
 19 | 
 20 |         self.num_cats = 3
 21 |         self.num_pred = 1
 22 |         self.num_samples_per_cat = 100000
 23 | 
 24 |         # Set random seed for repeatability
 25 |         np.random.seed(27)
 26 | 
 27 |         self.alphas = np.random.randn(self.num_cats)
 28 |         self.betas = np.random.randn(self.num_cats, self.num_pred)
 29 |         # TODO: make this more efficient; right now, it's very explicit.
 30 |         x_a = np.random.randn(self.num_samples_per_cat, self.num_pred)
 31 |         y_a = np.random.binomial(1, numpy_invlogit(self.alphas[0] + np.sum(self.betas[0] * x_a, 1)))
 32 |         x_b = np.random.randn(self.num_samples_per_cat, self.num_pred)
 33 |         y_b = np.random.binomial(1, numpy_invlogit(self.alphas[1] + np.sum(self.betas[1] * x_b, 1)))
 34 |         x_c = np.random.randn(self.num_samples_per_cat, self.num_pred)
 35 |         y_c = np.random.binomial(1, numpy_invlogit(self.alphas[2] + np.sum(self.betas[2] * x_c, 1)))
 36 | 
 37 |         X = np.concatenate([x_a, x_b, x_c])
 38 |         Y = np.concatenate([y_a, y_b, y_c])
 39 |         cats = np.concatenate([
 40 |             np.zeros(self.num_samples_per_cat, dtype=np.int),
 41 |             np.ones(self.num_samples_per_cat, dtype=np.int),
 42 |             2*np.ones(self.num_samples_per_cat, dtype=np.int)
 43 |         ])
 44 | 
 45 |         output = train_test_split(X, cats, Y, test_size=0.4)
 46 | 
 47 |         self.X_train, self.X_test, self.cat_train, self.cat_test, self.Y_train, self.Y_test = output
 48 | 
 49 |         self.test_HLR = HierarchicalLogisticRegression()
 50 |         # Fit the model once
 51 |         inference_args = {
 52 |             'n': 60000,
 53 |             'callbacks': [pm.callbacks.CheckParametersConvergence()]
 54 |         }
 55 |         # Note: print is here so PyMC3 output won't overwrite the test name
 56 |         print('')
 57 |         self.test_HLR.fit(
 58 |             self.X_train,
 59 |             self.Y_train,
 60 |             self.cat_train,
 61 |             num_advi_sample_draws=5000,
 62 |             minibatch_size=2000,
 63 |             inference_args=inference_args
 64 |         )
 65 | 
 66 |         self.test_dir = tempfile.mkdtemp()
 67 | 
 68 |     def tearDown(self):
 69 |         shutil.rmtree(self.test_dir)
 70 | 
 71 | 
 72 | class HierarchicalLogisticRegressionFitTestCase(HierarchicalLogisticRegressionTestCase):
 73 |     def test_fit_returns_correct_model(self):
 74 |         self.assertEqual(self.num_cats, self.test_HLR.num_cats)
 75 |         self.assertEqual(self.num_pred, self.test_HLR.num_pred)
 76 | 
 77 |         # TODO: Figure out best way to test
 78 |         # np.testing.assert_almost_equal(self.alphas, self.test_HLR.trace['alphas'].mean(), decimal=1)
 79 |         # np.testing.assert_almost_equal(self.betas, self.test_HLR.trace['betas'].mean(), decimal=1)
 80 | 
 81 |         # For now, just check that the estimated parameters have the correct signs
 82 |         np.testing.assert_equal(
 83 |             np.sign(self.alphas),
 84 |             np.sign(self.test_HLR.trace['alpha'].mean(axis=0))
 85 |         )
 86 |         np.testing.assert_equal(
 87 |             np.sign(self.betas),
 88 |             np.sign(self.test_HLR.trace['beta'].mean(axis=0))
 89 |         )
 90 | 
 91 | 
 92 | class HierarchicalLogisticRegressionPredictProbaTestCase(HierarchicalLogisticRegressionTestCase):
 93 |     def test_predict_proba_returns_probabilities(self):
 94 |         probs = self.test_HLR.predict_proba(self.X_test, self.cat_test)
 95 |         self.assertEqual(probs.shape, self.Y_test.shape)
 96 | 
 97 |     def test_predict_proba_returns_probabilities_and_std(self):
 98 |         probs, stds = self.test_HLR.predict_proba(self.X_test, self.cat_test, return_std=True)
 99 |         self.assertEqual(probs.shape, self.Y_test.shape)
100 |         self.assertEqual(stds.shape, self.Y_test.shape)
101 | 
102 |     def test_predict_proba_raises_error_if_not_fit(self):
103 |         with self.assertRaises(PyMC3ModelsError) as no_fit_error:
104 |             test_HLR = HierarchicalLogisticRegression()
105 |             test_HLR.predict_proba(self.X_train, self.cat_train)
106 | 
107 |         expected = 'Run fit on the model before predict.'
108 |         self.assertEqual(str(no_fit_error.exception), expected)
109 | 
110 | 
111 | class HierarchicalLogisticRegressionPredictTestCase(HierarchicalLogisticRegressionTestCase):
112 |     def test_predict_returns_predictions(self):
113 |         preds = self.test_HLR.predict(self.X_test, self.cat_test)
114 |         self.assertEqual(preds.shape, self.Y_test.shape)
115 | 
116 | 
117 | class HierarchicalLogisticRegressionScoreTestCase(HierarchicalLogisticRegressionTestCase):
118 |     def test_score_scores(self):
119 |         score = self.test_HLR.score(self.X_test, self.Y_test, self.cat_test)
120 |         naive_score = np.mean(self.Y_test)
121 |         self.assertGreaterEqual(score, naive_score)
122 | 
123 | 
124 | class HierarchicalLogisticRegressionSaveandLoadTestCase(HierarchicalLogisticRegressionTestCase):
125 |     def test_save_and_load_work_correctly(self):
126 |         probs1 = self.test_HLR.predict_proba(self.X_test, self.cat_test)
127 |         self.test_HLR.save(self.test_dir)
128 | 
129 |         HLR2 = HierarchicalLogisticRegression()
130 | 
131 |         HLR2.load(self.test_dir)
132 | 
133 |         self.assertEqual(self.test_HLR.num_cats, HLR2.num_cats)
134 |         self.assertEqual(self.test_HLR.num_pred, HLR2.num_pred)
135 |         self.assertEqual(self.test_HLR.num_training_samples, HLR2.num_training_samples)
136 |         pd.testing.assert_frame_equal(summary(self.test_HLR.trace), summary(HLR2.trace))
137 | 
138 |         probs2 = HLR2.predict_proba(self.X_test, self.cat_test)
139 | 
140 |         np.testing.assert_almost_equal(probs2, probs1, decimal=1)
141 | 


--------------------------------------------------------------------------------
/tests/models/test_LinearRegression.py:
--------------------------------------------------------------------------------
  1 | import shutil
  2 | import tempfile
  3 | import unittest
  4 | 
  5 | import numpy as np
  6 | import pandas as pd
  7 | from pymc3 import summary
  8 | from sklearn.linear_model import LinearRegression as skLinearRegression
  9 | from sklearn.model_selection import train_test_split
 10 | 
 11 | from pymc3_models.exc import PyMC3ModelsError
 12 | from pymc3_models import LinearRegression
 13 | 
 14 | 
 15 | class LinearRegressionTestCase(unittest.TestCase):
 16 |     def setUp(self):
 17 |         self.num_pred = 1
 18 |         self.alpha = 3
 19 |         self.betas = 4
 20 |         self.s = 2
 21 | 
 22 |         # Set random seed for repeatability
 23 |         np.random.seed(27)
 24 | 
 25 |         X = np.random.randn(1000, 1)
 26 |         noise = self.s * np.random.randn(1000, 1)
 27 |         Y = self.betas * X + self.alpha + noise
 28 |         Y = np.squeeze(Y)
 29 | 
 30 |         self.X_train, self.X_test, self.Y_train, self.Y_test = train_test_split(
 31 |             X, Y, test_size=0.4
 32 |         )
 33 | 
 34 |         self.test_LR = LinearRegression()
 35 |         # Fit the model with ADVI once
 36 |         self.test_LR.fit(self.X_train, self.Y_train, num_advi_sample_draws=5000, minibatch_size=2000)
 37 | 
 38 |         self.nuts_LR = LinearRegression()
 39 | 
 40 |         self.test_dir = tempfile.mkdtemp()
 41 | 
 42 |     def tearDown(self):
 43 |         shutil.rmtree(self.test_dir)
 44 | 
 45 | 
 46 | class LinearRegressionFitTestCase(LinearRegressionTestCase):
 47 |     def test_advi_fit_returns_correct_model(self):
 48 |         self.assertEqual(self.num_pred, self.test_LR.num_pred)
 49 | 
 50 |         np.testing.assert_almost_equal(self.alpha, self.test_LR.summary['mean']['alpha__0'], decimal=1)
 51 |         np.testing.assert_almost_equal(self.betas, self.test_LR.summary['mean']['betas__0_0'], decimal=1)
 52 |         np.testing.assert_almost_equal(self.s, self.test_LR.summary['mean']['s'], decimal=1)
 53 | 
 54 |     def test_nuts_fit_returns_correct_model(self):
 55 |         # Note: print is here so PyMC3 output won't overwrite the test name
 56 |         print('')
 57 |         self.nuts_LR.fit(self.X_train, self.Y_train, inference_type='nuts', inference_args={'draws': 2000})
 58 | 
 59 |         self.assertEqual(self.num_pred, self.nuts_LR.num_pred)
 60 | 
 61 |         np.testing.assert_almost_equal(self.alpha, self.nuts_LR.summary['mean']['alpha__0'], decimal=1)
 62 |         np.testing.assert_almost_equal(self.betas, self.nuts_LR.summary['mean']['betas__0_0'], decimal=1)
 63 |         np.testing.assert_almost_equal(self.s, self.nuts_LR.summary['mean']['s'], decimal=1)
 64 | 
 65 | 
 66 | class LinearRegressionPredictTestCase(LinearRegressionTestCase):
 67 |     def test_predict_returns_predictions(self):
 68 |         preds = self.test_LR.predict(self.X_test)
 69 |         self.assertEqual(preds.shape, self.Y_test.shape)
 70 | 
 71 |     def test_predict_returns_mean_predictions_and_std(self):
 72 |         preds, stds = self.test_LR.predict(self.X_test, return_std=True)
 73 |         self.assertEqual(preds.shape, self.Y_test.shape)
 74 |         self.assertEqual(stds.shape, self.Y_test.shape)
 75 | 
 76 |     def test_predict_raises_error_if_not_fit(self):
 77 |         with self.assertRaises(PyMC3ModelsError) as no_fit_error:
 78 |             test_LR = LinearRegression()
 79 |             test_LR.predict(self.X_train)
 80 | 
 81 |         expected = 'Run fit on the model before predict.'
 82 |         self.assertEqual(str(no_fit_error.exception), expected)
 83 | 
 84 | 
 85 | class LinearRegressionScoreTestCase(LinearRegressionTestCase):
 86 |     def test_score_matches_sklearn_performance(self):
 87 |         skLR = skLinearRegression()
 88 |         skLR.fit(self.X_train, self.Y_train)
 89 |         skLR_score = skLR.score(self.X_test, self.Y_test)
 90 | 
 91 |         score = self.test_LR.score(self.X_test, self.Y_test)
 92 |         np.testing.assert_almost_equal(skLR_score, score, decimal=1)
 93 | 
 94 | 
 95 | class LinearRegressionSaveandLoadTestCase(LinearRegressionTestCase):
 96 |     def test_save_and_load_work_correctly(self):
 97 |         score1 = self.test_LR.score(self.X_test, self.Y_test)
 98 |         self.test_LR.save(self.test_dir)
 99 | 
100 |         LR2 = LinearRegression()
101 | 
102 |         LR2.load(self.test_dir)
103 | 
104 |         self.assertEqual(self.test_LR.inference_type, LR2.inference_type)
105 |         self.assertEqual(self.test_LR.num_pred, LR2.num_pred)
106 |         self.assertEqual(self.test_LR.num_training_samples, LR2.num_training_samples)
107 |         pd.testing.assert_frame_equal(summary(self.test_LR.trace), summary(LR2.trace))
108 | 
109 |         score2 = LR2.score(self.X_test, self.Y_test)
110 | 
111 |         np.testing.assert_almost_equal(score1, score2, decimal=1)
112 | 


--------------------------------------------------------------------------------
/tests/models/test_LogisticRegression.py:
--------------------------------------------------------------------------------
  1 | import shutil
  2 | import tempfile
  3 | import unittest
  4 | 
  5 | import numpy as np
  6 | import pandas as pd
  7 | import pymc3 as pm
  8 | from pymc3 import summary
  9 | from sklearn.linear_model import LogisticRegression as sklearn_LR
 10 | from sklearn.model_selection import train_test_split
 11 | 
 12 | from pymc3_models.exc import PyMC3ModelsError
 13 | from pymc3_models import LogisticRegression
 14 | 
 15 | 
 16 | class LogisticRegressionTestCase(unittest.TestCase):
 17 |     def setUp(self):
 18 |         def numpy_invlogit(x):
 19 |             return 1 / (1 + np.exp(-x))
 20 | 
 21 |         self.num_pred = 1
 22 |         self.num_samples = 10000
 23 | 
 24 |         # Set random seed for repeatability
 25 |         np.random.seed(27)
 26 | 
 27 |         self.alphas = np.random.randn(1)
 28 |         self.betas = np.random.randn(1, self.num_pred)
 29 |         X = np.random.randn(self.num_samples, self.num_pred)
 30 |         Y = np.random.binomial(1, numpy_invlogit(self.alphas[0] + np.sum(self.betas * X, 1)))
 31 | 
 32 |         self.X_train, self.X_test, self.Y_train, self.Y_test = train_test_split(X, Y, test_size=0.4)
 33 | 
 34 |         self.test_LR = LogisticRegression()
 35 |         # Fit the model once
 36 |         inference_args = {
 37 |             'n': 60000,
 38 |             'callbacks': [pm.callbacks.CheckParametersConvergence()]
 39 |         }
 40 |         # Note: print is here so PyMC3 output won't overwrite the test name
 41 |         print('')
 42 |         self.test_LR.fit(
 43 |             self.X_train,
 44 |             self.Y_train,
 45 |             num_advi_sample_draws=5000,
 46 |             minibatch_size=2000,
 47 |             inference_args=inference_args
 48 |         )
 49 | 
 50 |         self.test_dir = tempfile.mkdtemp()
 51 | 
 52 |     def tearDown(self):
 53 |         shutil.rmtree(self.test_dir)
 54 | 
 55 | 
 56 | class LogisticRegressionFitTestCase(LogisticRegressionTestCase):
 57 |     def test_fit_returns_correct_model(self):
 58 |         self.assertEqual(self.num_pred, self.test_LR.num_pred)
 59 | 
 60 |         np.testing.assert_almost_equal(self.alphas, self.test_LR.trace['alpha'].mean(), decimal=1)
 61 |         np.testing.assert_almost_equal(self.betas, self.test_LR.trace['betas'].mean(), decimal=1)
 62 | 
 63 | 
 64 | class LogisticRegressionPredictProbaTestCase(LogisticRegressionTestCase):
 65 |     def test_predict_proba_returns_probabilities(self):
 66 |         probs = self.test_LR.predict_proba(self.X_test)
 67 |         self.assertEqual(probs.shape, self.Y_test.shape)
 68 | 
 69 |     def test_predict_proba_returns_probabilities_and_std(self):
 70 |         probs, stds = self.test_LR.predict_proba(self.X_test, return_std=True)
 71 |         self.assertEqual(probs.shape, self.Y_test.shape)
 72 |         self.assertEqual(stds.shape, self.Y_test.shape)
 73 | 
 74 |     def test_predict_proba_raises_error_if_not_fit(self):
 75 |         with self.assertRaises(PyMC3ModelsError) as no_fit_error:
 76 |             test_LR = LogisticRegression()
 77 |             test_LR.predict_proba(self.X_train)
 78 | 
 79 |         expected = 'Run fit on the model before predict.'
 80 |         self.assertEqual(str(no_fit_error.exception), expected)
 81 | 
 82 | 
 83 | class LogisticRegressionPredictTestCase(LogisticRegressionTestCase):
 84 |     def test_predict_returns_predictions(self):
 85 |         preds = self.test_LR.predict(self.X_test)
 86 |         self.assertEqual(preds.shape, self.Y_test.shape)
 87 | 
 88 | 
 89 | class LogisticRegressionScoreTestCase(LogisticRegressionTestCase):
 90 |     def test_score_scores(self):
 91 |         score = self.test_LR.score(self.X_test, self.Y_test)
 92 |         naive_score = np.mean(self.Y_test)
 93 |         self.assertGreaterEqual(score, naive_score)
 94 | 
 95 |     def test_score_matches_sklearn_performance(self):
 96 |         SLR = sklearn_LR()
 97 |         SLR.fit(self.X_train, self.Y_train)
 98 |         SLR_score = SLR.score(self.X_test, self.Y_test)
 99 | 
100 |         self.test_LR.fit(self.X_train, self.Y_train)
101 |         test_LR_score = self.test_LR.score(self.X_test, self.Y_test)
102 | 
103 |         self.assertAlmostEqual(SLR_score, test_LR_score, 1)
104 | 
105 | 
106 | class LogisticRegressionSaveandLoadTestCase(LogisticRegressionTestCase):
107 |     def test_save_and_load_work_correctly(self):
108 |         probs1 = self.test_LR.predict_proba(self.X_test)
109 |         self.test_LR.save(self.test_dir)
110 | 
111 |         LR2 = LogisticRegression()
112 | 
113 |         LR2.load(self.test_dir)
114 | 
115 |         self.assertEqual(self.test_LR.num_pred, LR2.num_pred)
116 |         self.assertEqual(self.test_LR.num_training_samples, LR2.num_training_samples)
117 |         pd.testing.assert_frame_equal(summary(self.test_LR.trace), summary(LR2.trace))
118 | 
119 |         probs2 = LR2.predict_proba(self.X_test)
120 | 
121 |         np.testing.assert_almost_equal(probs2, probs1, decimal=1)
122 | 


--------------------------------------------------------------------------------
/tests/models/test_NaiveBayes.py:
--------------------------------------------------------------------------------
  1 | import shutil
  2 | import tempfile
  3 | import unittest
  4 | 
  5 | import numpy as np
  6 | import pandas as pd
  7 | import pymc3 as pm
  8 | from pymc3 import summary
  9 | import scipy.stats
 10 | from sklearn.model_selection import train_test_split
 11 | 
 12 | from pymc3_models import GaussianNaiveBayes
 13 | from pymc3_models.exc import PyMC3ModelsError
 14 | 
 15 | 
 16 | class GaussianNaiveBayesTestCase(unittest.TestCase):
 17 |     def setUp(self):
 18 |         """
 19 |         Set up a test case with synthetic data.
 20 |         """
 21 | 
 22 |         self.num_cats = 3
 23 |         self.num_pred = 10
 24 |         self.num_samples = 50000
 25 | 
 26 |         # Set random seed for repeatability
 27 |         np.random.seed(27)
 28 | 
 29 |         # Generate priors
 30 |         self.alpha = np.ones(self.num_cats)
 31 |         self.pi = np.random.dirichlet(self.alpha)
 32 |         self.mu = np.random.normal(0, 100, size=(self.num_cats, self.num_pred))
 33 |         self.sigma = scipy.stats.halfnorm(loc=0, scale=100).rvs(size=(self.num_cats, self.num_pred))
 34 |         # Generate data
 35 |         Y = np.random.choice(range(self.num_cats), self.num_samples, p=self.pi)
 36 |         x_vectors = []
 37 |         for i in Y:
 38 |             x_vectors.append(np.random.normal(self.mu[i], self.sigma[i]))
 39 |         X = np.vstack(x_vectors)
 40 | 
 41 |         # Split into train/test sets
 42 |         self.X_train, self.X_test, self.Y_train, self.Y_test = train_test_split(X, Y, test_size=0.4)
 43 | 
 44 |         self.num_training_samples = self.Y_train.shape[0]
 45 | 
 46 |         self.test_GNB = GaussianNaiveBayes()
 47 |         # Fit the model once
 48 |         inference_args = {
 49 |             'n': 10000,
 50 |             'callbacks': [pm.callbacks.CheckParametersConvergence()]
 51 |         }
 52 |         # Note: print is here so PyMC3 output won't overwrite the test name
 53 |         print('')
 54 |         self.test_GNB.fit(
 55 |             self.X_train,
 56 |             self.Y_train,
 57 |             num_advi_sample_draws=1000,
 58 |             minibatch_size=2000,
 59 |             inference_args=inference_args
 60 |         )
 61 | 
 62 |         self.test_dir = tempfile.mkdtemp()
 63 | 
 64 |     def tearDown(self):
 65 |         """
 66 |         Tear down the testing environment.
 67 |         """
 68 |         shutil.rmtree(self.test_dir)
 69 | 
 70 | 
 71 | class GaussianNaiveBayesFitTestCase(GaussianNaiveBayesTestCase):
 72 |     def test_fit_returns_correct_model(self):
 73 |         """
 74 |         Test the model initialization and fit.
 75 | 
 76 |         Currently, only the sign of inferred parameters is checked
 77 |         against the sign of the parameters used to generate the data.
 78 | 
 79 |         TOOO: Find better strategies to test probabilistic code.
 80 |         """
 81 |         # Check that the model correctly infers dimensions
 82 |         self.assertEqual(self.num_cats, self.test_GNB.num_cats)
 83 |         self.assertEqual(self.num_training_samples, self.test_GNB.num_training_samples)
 84 |         self.assertEqual(self.num_pred, self.test_GNB.num_pred)
 85 | 
 86 |         # TODO: How do you write tests for a stochastic model?
 87 |         # TODO: Diagnose the sampling with a reasonable sampling size?
 88 |         np.testing.assert_equal(
 89 |             np.sign(self.pi),
 90 |             np.sign(self.test_GNB.trace['pi'].mean(axis=0))
 91 |         )
 92 |         np.testing.assert_equal(
 93 |             np.sign(self.sigma),
 94 |             np.sign(self.test_GNB.trace['sigma'].mean(axis=0))
 95 |         )
 96 | 
 97 | 
 98 | class GaussianNaiveBayesPredictProbaTest(GaussianNaiveBayesTestCase):
 99 |     def test_predict_proba_returns_probabilities(self):
100 |         probs = self.test_GNB.predict_proba(self.X_test)
101 |         self.assertEqual(probs.shape[0], self.Y_test.shape[0])
102 | 
103 |     def test_predict_proba_raises_error_if_not_fit(self):
104 |         with self.assertRaises(PyMC3ModelsError) as no_fit_error:
105 |             test_GNB = GaussianNaiveBayes()
106 |             test_GNB.predict_proba(self.X_train)
107 |         expected = 'Run fit on the model before predict'
108 |         self.assertEqual(str(no_fit_error.exception), expected)
109 | 
110 | 
111 | class GaussianNaiveBayesPredictionTestCase(GaussianNaiveBayesTestCase):
112 |     def test_predict_returns_predictions(self):
113 |         """
114 |         Test that the predict() function's output has the correct shape.
115 |         """
116 |         preds = self.test_GNB.predict(self.X_test)
117 |         self.assertEqual(preds.shape, self.Y_test.shape)
118 | 
119 | 
120 | @unittest.skip('test not implemented yet')
121 | class GaussianNaiveBayesScoreTestCase(GaussianNaiveBayesTestCase):
122 |     def test_score_scores(self):
123 |         # TODO: Figure out how to test the score function
124 |         score = self.test_GNB.score(self.X_test, self.Y_test)
125 | 
126 | 
127 | class GaussianNaiveBayesSaveAndLoadTestCase(GaussianNaiveBayesTestCase):
128 |     def test_save_and_load_work_correctly(self):
129 |         probs1 = self.test_GNB.predict_proba(self.X_test)
130 |         self.test_GNB.save(self.test_dir)
131 | 
132 |         GNB2 = GaussianNaiveBayes()
133 |         GNB2.load(self.test_dir)
134 |         self.assertEqual(self.test_GNB.num_cats, GNB2.num_cats)
135 |         self.assertEqual(self.test_GNB.num_pred, GNB2.num_pred)
136 |         self.assertEqual(self.test_GNB.num_training_samples, GNB2.num_training_samples)
137 |         pd.testing.assert_frame_equal(summary(self.test_GNB.trace), summary(GNB2.trace))
138 | 
139 |         probs2 = GNB2.predict_proba(self.X_test)
140 |         np.testing.assert_almost_equal(probs2, probs1, decimal=1)
141 | 


--------------------------------------------------------------------------------