├── .gitignore ├── .travis.yml ├── CHANGELOG.md ├── CONTRIBUTING.md ├── LICENSE.txt ├── MANIFEST.in ├── README.md ├── docs ├── README.md ├── build │ └── .gitignore └── source │ ├── _static │ └── .gitignore │ ├── _templates │ └── .gitignore │ ├── conf.py │ ├── data-templates.rst │ ├── development.rst │ ├── getting-started.rst │ ├── index.rst │ ├── model-steps.rst │ ├── modelmanager.rst │ └── utilities.rst ├── examples ├── UrbanSim-Templates-demo.ipynb ├── configs │ └── README.md └── data │ └── buildings-demo.csv ├── requirements-dev.txt ├── requirements-extras.txt ├── setup.py ├── tests ├── .gitignore ├── README.md ├── configs │ └── README.md ├── data │ └── README.md ├── pytest.ini ├── test_binary_logit.py ├── test_column_expression.py ├── test_data_load.py ├── test_data_save.py ├── test_large_multinomial_logit.py ├── test_regression.py ├── test_segmented_large_multinomial_logit.py ├── test_shared_core.py ├── test_shared_output_column.py ├── test_small_multinomial_logit.py ├── test_utils.py └── test_utils_broadcasts.py └── urbansim_templates ├── .gitignore ├── __init__.py ├── data ├── __init__.py ├── column_from_expression.py ├── load_table.py └── save_table.py ├── modelmanager.py ├── models ├── .gitignore ├── __init__.py ├── binary_logit.py ├── large_multinomial_logit.py ├── regression.py ├── segmented_large_multinomial_logit.py ├── shared.py └── small_multinomial_logit.py ├── shared ├── __init__.py ├── core.py └── output_column.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | .cache/* 2 | build/* 3 | dist/* 4 | urbansim_templates.egg-info/* 5 | **/*.pyc 6 | **/.doctrees/* 7 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - '3.6' 5 | - '3.8' 6 | - '3.9' 7 | 8 | install: 9 | - pip install . 10 | - pip install -r requirements-extras.txt 11 | - pip install -r requirements-dev.txt 12 | - pip list 13 | - pip show urbansim_templates 14 | 15 | script: 16 | - cd tests 17 | - coverage run --source urbansim_templates --module pytest --verbose 18 | 19 | after_success: 20 | - coverage report --show-missing 21 | - coveralls -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # UrbanSim Templates change log 2 | 3 | ## 0.2 (not yet released) 4 | 5 | #### 0.2.dev9 (2020-05-15) 6 | 7 | - fixes a bug in `BinaryLogitStep` simulation where the output is not updated correctly 8 | - adds a `resid` attribute to fitted `OLSRegressionStep` models, for diagnostics 9 | 10 | #### 0.2.dev8 (2020-04-17) 11 | 12 | - allows segmented large MNL models to be estimated with a `MergedChoiceTable` that's passed in by the user (rather than generated automatically), thus achieving parity with the non-segmented model class 13 | 14 | #### 0.2.dev7 (2019-07-15) 15 | 16 | - fixes a bug with the `out_transform` parameter for `OLSRegressionStep` 17 | 18 | #### 0.2.dev6 (2019-04-04) 19 | 20 | - introduces classes for storing common settings: `shared.CoreTemplateSettings`, `shared.OutputColumnSettings` 21 | - adds new shared functions: `shared.register_column()`, `utils.cols_in_expression()` 22 | - modifies `ColumnFromExpression` template to divide its parameters into three groups 23 | 24 | #### 0.2.dev5 (2019-03-29) 25 | 26 | - adds new template: `data.ColumnFromExpression` 27 | 28 | #### 0.2.dev4 (2019-03-26) 29 | 30 | - adds new data management utilities: `utils.validate_table()`, `utils.validate_all_tables()`, `utils.merge_tables()` 31 | - updates `utils.get_data()` to use the new merge tool 32 | - updates `BinaryLogitStep` and `OLSRegressionStep` to use the shared to use `utils.get_data()`, removing any reliance on Orca broadcasts 33 | - raises the `pandas` requirement to 0.23 34 | 35 | #### 0.2.dev3 (2019-03-21) 36 | 37 | - adds an `mct` argment to `SegmentedLargeMultinomialLogitStep.fit_all()` 38 | - adds an `interaction_terms` argument to `SegmentedLargeMultinomialLogitStep.run_all()` 39 | 40 | #### 0.2.dev2 (2019-03-04) 41 | 42 | - adds template for saving data: `data.SaveTable()` 43 | - renames `io.TableFromDisk()` to `data.LoadTable()` 44 | 45 | #### 0.2.dev1 (2019-02-27) 46 | 47 | - fixes a crash in small MNL simulation 48 | 49 | #### 0.2.dev0 (2019-02-19) 50 | 51 | - adds first data i/o template: `io.TableFromDisk()` 52 | - adds support for `autorun` template property 53 | 54 | 55 | ## 0.1.3 (2019-07-15) 56 | 57 | - patch to incorporate the `out_transform` bug fix for `OLSRegressionStep`, from 0.2.dev7 58 | 59 | 60 | ## 0.1.2 (2019-02-28) 61 | 62 | - patch to incorporate the small MNL bug fix from 0.2.dev1 63 | 64 | 65 | ## 0.1.1 (2019-02-05) 66 | 67 | #### 0.1.1.dev1 (2019-01-30) 68 | 69 | - adds support for passing multiple tables of interaction terms in large MNL 70 | - enables on-the-fly creation of output columns in small MNL 71 | 72 | #### 0.1.1.dev0 (2019-01-20) 73 | 74 | - allows join keys to be used as data filters in MNL simulation 75 | 76 | 77 | ## 0.1 (2019-01-16) 78 | 79 | #### 0.1.dev25 (2019-01-15) 80 | 81 | - fixes an OLS simulation bug that raised an error when the output column didn't exist yet 82 | - implements `out_transform` for OLS simulation 83 | 84 | #### 0.1.dev24 (2018-12-20) 85 | 86 | - fixes a string comparison bug that caused problems with binary logit output in Windows 87 | - adds `model` as an attribute of large MNL model steps, which provides a `choicemodels.MultinomialLogitResults` object and is available any time after a model step is fitted 88 | - enables on-the-fly creation of output columns in large MNL 89 | - fixes a large MNL simulation bug when there are no valid choosers or alternatives after evaluating the filters 90 | - moves unit tests out of the module directory 91 | 92 | #### 0.1.dev23 (2018-12-13) 93 | 94 | - fixes a bug with interaction terms passed into `LargeMultinomialLogitStep.run()` 95 | 96 | #### 0.1.dev22 (2018-12-13) 97 | 98 | - narrows the output of `utils.get_data()` to include only the columns requested (plus the index of the primary table) -- previously Orca had also provided some extra columns such as join keys 99 | 100 | #### 0.1.dev21 (2018-12-11) 101 | 102 | - adds a new function `utils.get_data()` to assemble data from Orca, automatically detecting columns included in model expressions and filters 103 | 104 | - implements `SegmentedLargeMultinomialLogit.run_all()` 105 | 106 | #### 0.1.dev20 (2018-12-11) 107 | 108 | - fixes a model expression persistence bug in the small MNL template 109 | 110 | #### 0.1.dev19 (2018-12-06) 111 | 112 | - fixes a bug to allow large MNL simulation with multiple chooser tables 113 | 114 | #### 0.1.dev18 (2018-11-19) 115 | 116 | - improves installation and testing 117 | 118 | #### 0.1.dev17 (2018-11-15) 119 | 120 | - adds an `interaction_terms` parameter that users can manually pass to `LargeMultinomialLogitStep.run()`, as a temporary solution until interaction terms are fully handled by the templates 121 | - also adds a `chooser_batch_size` parameter in the same place, to reduce memory pressure when there are large numbers of choosers 122 | 123 | #### 0.1.dev16 (2018-11-06) 124 | 125 | - adds a tool for testing template validity 126 | 127 | #### 0.1.dev15 (2018-10-15) 128 | 129 | - adds new `LargeMultinomialLogitStep` parameters related to choice simulation: `constrained_choices`, `alt_capacity`, `chooser_size`, and `max_iter` 130 | - updates `LargeMultinomialLogitStep.run()` to use improved simulation utilities from ChoiceModels 0.2.dev4 131 | 132 | #### 0.1.dev14 (2018-09-25) 133 | 134 | - adds a template for segmented large MNL models: `SegmentedLargeMultinomialLogitStep`, which can automatically generate a set of large MNL models based on segmentation rules 135 | 136 | #### 0.1.dev13 (2018-09-24) 137 | 138 | - adds a `@modelmanager.template` decorator that makes a class available to the currently running instance of ModelManager 139 | 140 | #### 0.1.dev12 (2018-09-19) 141 | 142 | - moves the `register()` operation to `modelmanager` (previously it was a method implemented by the individual templates) 143 | - adds general ModelManager support for supplemental objects like pickled model results 144 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Thanks for using UrbanSim Templates! 2 | 3 | This is an open source project that's part of the Urban Data Science Toolkit. Development and maintenance is a collaboration between UrbanSim Inc and U.C. Berkeley's Urban Analytics Lab. 4 | 5 | You can contact Sam Maurer, the lead developer, at `maurer@urbansim.com`. 6 | 7 | 8 | ## If you have a problem: 9 | 10 | - Take a look at the [open issues](https://github.com/UDST/urbansim_templates/issues) and [closed issues](https://github.com/UDST/urbansim_templates/issues?q=is%3Aissue+is%3Aclosed) to see if there's already a related discussion 11 | 12 | - Open a new issue describing the problem -- if possible, include any error messages, the operating system and version of python you're using, and versions of any libraries that may be relevant 13 | 14 | 15 | ## Feature proposals: 16 | 17 | - Take a look at the [open issues](https://github.com/UDST/urbansim_templates/issues) and [closed issues](https://github.com/UDST/urbansim_templates/issues?q=is%3Aissue+is%3Aclosed) to see if there's already a related discussion 18 | 19 | - Post your proposal as a new issue, so we can discuss it (some proposals may not be a good fit for the project) 20 | 21 | 22 | ## Contributing code: 23 | 24 | - Create a new branch of `UDST/urbansim_templates`, or fork the repository to your own account 25 | 26 | - Make your changes, following the existing styles for code and inline documentation 27 | 28 | - Add [tests](https://github.com/UDST/urbansim_templates/tree/master/tests) if possible! 29 | 30 | - Open a pull request to the `UDST/urbansim_templates` master branch, including a writeup of your changes -- take a look at some of the closed PR's for examples 31 | 32 | - Current maintainers will review the code, suggest changes, and hopefully merge it! 33 | 34 | 35 | ## Updating the version number: 36 | 37 | - Each pull request that changes substantive code should increment the development version number, e.g. from `0.2.dev7` to `0.2.dev8`, so that users know exactly which version they're running 38 | 39 | - It works best to do this just before merging (in case other PR's are merged first, and so you know the release date for the changelog and documentation) 40 | 41 | - There are three places where the version number needs to be changed: 42 | - `setup.py` 43 | - `urbansim_templates/__init__.py` 44 | - `docs/source/index.rst` 45 | 46 | - Please also add a section to `CHANGELOG.md` describing the changes! 47 | 48 | 49 | ## Updating the documentation: 50 | 51 | - See instructions in `docs/README.md` 52 | 53 | 54 | ## Preparing a production release: 55 | 56 | - Make a new branch for release prep 57 | 58 | - Update the version number and `CHANGELOG.md` 59 | 60 | - Make sure all the tests are passing, and check if updates are needed to `README.md` or to the documentation 61 | 62 | - Open a pull request to the master branch to finalize it 63 | 64 | - After merging, tag the release on Github and follow the distribution procedures below 65 | 66 | 67 | ## Patching an earlier release: 68 | 69 | - We're not maintaining separate code branches for dev/ production/ major releases, but you can easily recreate them from tags if you need to patch an earlier release 70 | 71 | - In Github, create a new branch from the tag for the version you'd like to patch, calling it something like `v1-production` 72 | 73 | - Create a second branch from that one, called something like `v1-patch` 74 | 75 | - Make your changes in the `v1-patch` branch, and open a PR to `v1-production` to finalize it 76 | 77 | - After merging, tag the release on Github and follow the normal distribution procedures 78 | 79 | - After the new release is tagged, you can delete the extra branches -- a branch is just a pointer to the latest commit in a chain, and these commits will still be accessible via the tag 80 | 81 | 82 | ## Distributing a release on PyPI (for pip installation): 83 | 84 | - Register an account at https://pypi.org, ask one of the current maintainers to add you to the project, and `pip install twine` 85 | 86 | - Check out the copy of the code you'd like to release 87 | 88 | - Run `python setup.py sdist bdist_wheel --universal` 89 | 90 | - This should create a `dist` directory containing two package files -- delete any old ones before the next step 91 | 92 | - Run `twine upload dist/*` -- this will prompt you for your pypi.org credentials 93 | 94 | - Check https://pypi.org/project/urbansim-templates/ for the new version 95 | 96 | 97 | ## Distributing a release on Conda Forge (for conda installation): 98 | 99 | - Make a fork of the [conda-forge/urbansim_templates-feedstock](https://github.com/conda-forge/urbansim_templates-feedstock) repository -- there may already be a fork in udst 100 | 101 | - Edit `recipe/meta.yaml`: 102 | - update the version number 103 | - paste a new hash matching the tar.gz file that was uploaded to pypi (it's available on the pypi.org project page) 104 | 105 | - Check that the run requirements still match `requirements.txt` 106 | 107 | - Open a pull request to the `conda-forge/urbansim_templates-feedstock` master branch 108 | 109 | - Automated tests will run, and after they pass one of the current project maintainers will be able to merge the PR -- you can add your Github user name to the maintainers list in `meta.yaml` for the next update 110 | 111 | - Check https://anaconda.org/conda-forge/urbansim-templates for the new version (may take a few minutes for it to appear) 112 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2018, UrbanSim Inc. 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | # files to include in the source distribution on pypi (setup.py and README.md are included automatically) 2 | 3 | include CHANGELOG.md 4 | include LICENSE.txt 5 | include requirements.txt 6 | include requirements-extras.txt 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/UDST/urbansim_templates.svg?branch=master)](https://travis-ci.org/UDST/urbansim_templates) 2 | [![Coverage Status](https://coveralls.io/repos/github/UDST/urbansim_templates/badge.svg?branch=master)](https://coveralls.io/github/UDST/urbansim_templates?branch=master) 3 | 4 | # UrbanSim Templates 5 | 6 | UrbanSim Templates is a Python library that provides building blocks for Orca-based simulation models. It's part of the [Urban Data Science Toolkit](https://docs.udst.org) (UDST). 7 | 8 | The library contains templates for common types of model steps, plus a tool called ModelManager that runs as an extension to the [Orca](https://udst.github.io/orca) task orchestrator. ModelManager can register template-based model steps with the orchestrator, save them to disk, and automatically reload them for future sessions. The package was developed to make it easier to set up new simulation models — model step templates reduce the need for custom code and make settings more portable between models. 9 | 10 | ### Installation 11 | UrbanSim Templates can be installed using the Pip or Conda package managers. 12 | 13 | ``` 14 | pip install urbansim_templates 15 | ``` 16 | 17 | ``` 18 | conda install urbansim_templates --channel conda-forge 19 | ``` 20 | 21 | ### Documentation 22 | 23 | See the online documentation for much more: https://udst.github.io/urbansim_templates 24 | 25 | Some additional documentation is available within the repo in `CHANGELOG.md`, `CONTRIBUTING.md`, `/docs/README.md`, and `/tests/README.md`. 26 | 27 | There's discussion of current and planned features in the [pull requests](https://github.com/udst/urbansim_templates/pulls?utf8=✓&q=is%3Apr) and [issues](https://github.com/udst/urbansim_templates/issues?utf8=✓&q=is%3Aissue), both open and closed. 28 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | This folder generates the UrbanSim Templates online documentation, hosted at https://udst.github.io/urbansim_templates/. 2 | 3 | ### How it works 4 | 5 | HTML files are generated using [Sphinx](http://sphinx-doc.org) and hosted with GitHub Pages from the `gh-pages` branch of the repository. The online documentation is rendered and updated **manually**. 6 | 7 | ### Editing the documentation 8 | 9 | The files in `docs/source`, along with docstrings in the source code, determine what appears in the rendered documentation. Here's a [good tutorial](https://pythonhosted.org/an_example_pypi_project/sphinx.html) for Sphinx. 10 | 11 | ### Previewing changes locally 12 | 13 | Install the copy of UrbanSim Templates that the documentation is meant to reflect. Install the documentation tools. 14 | 15 | ``` 16 | pip install . 17 | pip install sphinx sphinx_rtd_theme 18 | ``` 19 | 20 | Build the documentation. There should be status messages and warnings, but no errors. 21 | 22 | ``` 23 | cd docs 24 | sphinx-build -b html source build 25 | ``` 26 | 27 | The HTML files will show up in `docs/build/`. 28 | 29 | ### Uploading changes 30 | 31 | Clone a second copy of the repository and check out the `gh-pages` branch. Copy over the updated HTML files, commit them, and push the changes to GitHub. 32 | 33 | ### Discussion 34 | 35 | There are various discussions about documentation in the issue threads. [Issue #120](https://github.com/UDST/urbansim_templates/issues/120) is a good starting point. 36 | -------------------------------------------------------------------------------- /docs/build/.gitignore: -------------------------------------------------------------------------------- 1 | **/* -------------------------------------------------------------------------------- /docs/source/_static/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UDST/urbansim_templates/723b83b4187da53a50ee03fdba4842a464f68240/docs/source/_static/.gitignore -------------------------------------------------------------------------------- /docs/source/_templates/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UDST/urbansim_templates/723b83b4187da53a50ee03fdba4842a464f68240/docs/source/_templates/.gitignore -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # UrbanSim Templates documentation build configuration file, created by 5 | # sphinx-quickstart on Fri Jan 4 15:26:06 2019. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | # 20 | # import os 21 | # import sys 22 | # sys.path.insert(0, os.path.abspath('../..')) 23 | 24 | import sphinx_rtd_theme 25 | 26 | 27 | # -- General configuration ------------------------------------------------ 28 | 29 | # If your documentation needs a minimal Sphinx version, state it here. 30 | # 31 | # needs_sphinx = '1.0' 32 | 33 | # Add any Sphinx extension module names here, as strings. They can be 34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 35 | # ones. 36 | extensions = [ 37 | 'sphinx.ext.autodoc', 38 | 'sphinx.ext.autosummary', 39 | 'sphinx.ext.napoleon', 40 | 'sphinx.ext.viewcode'] 41 | 42 | # Add any paths that contain templates here, relative to this directory. 43 | templates_path = ['_templates'] 44 | 45 | # The suffix(es) of source filenames. 46 | # You can specify multiple suffix as a list of string: 47 | # 48 | # source_suffix = ['.rst', '.md'] 49 | source_suffix = '.rst' 50 | 51 | # The master toctree document. 52 | master_doc = 'index' 53 | 54 | # General information about the project. 55 | project = 'UrbanSim Templates' 56 | copyright = '2021, UDST' 57 | author = 'UDST' 58 | 59 | # The version info for the project you're documenting, acts as replacement for 60 | # |version| and |release|, also used in various other places throughout the 61 | # built documents. 62 | # 63 | # The short X.Y version. 64 | # version = '0.1' 65 | # The full version, including alpha/beta/rc tags. 66 | # release = '0.1' 67 | import urbansim_templates 68 | version = release = urbansim_templates.__version__ 69 | 70 | # The language for content autogenerated by Sphinx. Refer to documentation 71 | # for a list of supported languages. 72 | # 73 | # This is also used if you do content translation via gettext catalogs. 74 | # Usually you set "language" from the command line for these cases. 75 | language = None 76 | 77 | # List of patterns, relative to source directory, that match files and 78 | # directories to ignore when looking for source files. 79 | # This patterns also effect to html_static_path and html_extra_path 80 | exclude_patterns = [] 81 | 82 | # The name of the Pygments (syntax highlighting) style to use. 83 | pygments_style = 'sphinx' 84 | 85 | # If true, `todo` and `todoList` produce output, else they produce nothing. 86 | todo_include_todos = False 87 | 88 | 89 | # -- Options for HTML output ---------------------------------------------- 90 | 91 | # The theme to use for HTML and HTML Help pages. See the documentation for 92 | # a list of builtin themes. 93 | # 94 | html_theme = 'sphinx_rtd_theme' 95 | 96 | # Theme options are theme-specific and customize the look and feel of a theme 97 | # further. For a list of options available for each theme, see the 98 | # documentation. 99 | # 100 | # html_theme_options = {} 101 | 102 | # Add any paths that contain custom static files (such as style sheets) here, 103 | # relative to this directory. They are copied after the builtin static files, 104 | # so a file named "default.css" will overwrite the builtin "default.css". 105 | html_static_path = ['_static'] 106 | 107 | 108 | # -- Options for HTMLHelp output ------------------------------------------ 109 | 110 | # Output file base name for HTML help builder. 111 | htmlhelp_basename = 'UrbanSimTemplatesdoc' 112 | 113 | 114 | # -- Options for LaTeX output --------------------------------------------- 115 | 116 | latex_elements = { 117 | # The paper size ('letterpaper' or 'a4paper'). 118 | # 119 | # 'papersize': 'letterpaper', 120 | 121 | # The font size ('10pt', '11pt' or '12pt'). 122 | # 123 | # 'pointsize': '10pt', 124 | 125 | # Additional stuff for the LaTeX preamble. 126 | # 127 | # 'preamble': '', 128 | 129 | # Latex figure (float) alignment 130 | # 131 | # 'figure_align': 'htbp', 132 | } 133 | 134 | # Grouping the document tree into LaTeX files. List of tuples 135 | # (source start file, target name, title, 136 | # author, documentclass [howto, manual, or own class]). 137 | latex_documents = [ 138 | (master_doc, 'UrbanSimTemplates.tex', 'UrbanSim Templates Documentation', 139 | 'UDST', 'manual'), 140 | ] 141 | 142 | 143 | # -- Options for manual page output --------------------------------------- 144 | 145 | # One entry per manual page. List of tuples 146 | # (source start file, name, description, authors, manual section). 147 | man_pages = [ 148 | (master_doc, 'urbansimtemplates', 'UrbanSim Templates Documentation', 149 | [author], 1) 150 | ] 151 | 152 | 153 | # -- Options for Texinfo output ------------------------------------------- 154 | 155 | # Grouping the document tree into Texinfo files. List of tuples 156 | # (source start file, target name, title, author, 157 | # dir menu entry, description, category) 158 | texinfo_documents = [ 159 | (master_doc, 'UrbanSimTemplates', 'UrbanSim Templates Documentation', 160 | author, 'UrbanSimTemplates', 'One line description of project.', 161 | 'Miscellaneous'), 162 | ] 163 | 164 | 165 | 166 | -------------------------------------------------------------------------------- /docs/source/data-templates.rst: -------------------------------------------------------------------------------- 1 | Data management templates 2 | ========================= 3 | 4 | Usage 5 | ----- 6 | 7 | Data templates help you load tables into `Orca `__, create columns of derived data, or save tables or subsets of tables to disk. 8 | 9 | .. code-block:: python 10 | 11 | from urbansim_templates.data import LoadTable 12 | 13 | t = LoadTable() 14 | t.table = 'buildings' # a name for the Orca table 15 | t.source_type = 'csv' 16 | t.path = 'buildings.csv' 17 | t.csv_index_cols = 'building_id' 18 | t.name = 'load_buildings' # a name for the model step that sets up the table 19 | 20 | You can run this directly using ``t.run()``, or register the configured template to be part of a larger workflow: 21 | 22 | .. code-block:: python 23 | 24 | from urbansim_templates import modelmanager 25 | 26 | modelmanager.register(t) 27 | 28 | Registration does two things: (a) it saves the configured template to disk as a yaml file, and (b) it creates a model step with logic for loading the table. Running the model step is equivalent to running the configured template object: 29 | 30 | .. code-block:: python 31 | 32 | t.run() 33 | 34 | # equivalent: 35 | import orca 36 | orca.run(['load_buildings']) 37 | 38 | Strictly speaking, running the model step doesn't load the data, it just sets up an Orca table with instructions for loading the data when it's needed. (This is called lazy evaluation.) 39 | 40 | .. code-block:: python 41 | 42 | orca.run(['load_buildings']) # now an Orca table named 'buildings' is registered 43 | 44 | orca.get_table('buildings').to_frame() # now the data is read from disk 45 | 46 | Because "running" the table-loading step is costless, it's done automatically when you register a configured template. It's also done automatically when you initialize a ModelManager session and table-loading configs are read from yaml. (If you'd like to disable this for a particular table, you can set ``t.autorun == False``.) 47 | 48 | 49 | Recommended data schemas 50 | ~~~~~~~~~~~~~~~~~~~~~~~~ 51 | 52 | The :mod:`~urbansim_templates.data.LoadTable` template will work with any data that can be loaded into a Pandas DataFrame. But we highly recommend following stricter data schema rules: 53 | 54 | 1. Each table should include a unique, named index column (a.k.a. primary key) or set of columns (multi-index, a.k.a composite key). 55 | 56 | 2. If a column is meant to be a join key for another table, it should have the same name as the index of that table. 57 | 58 | 3. Duplication of column names across tables (except for the join keys) is discouraged, for clarity. 59 | 60 | If you follow these rules, tables can be automatically merged on the fly, for example to assemble estimation data or calculate indicators. 61 | 62 | You can use :func:`~urbansim_templates.utils.validate_table()` or :func:`~urbansim_templates.utils.validate_all_tables()` to check whether these expectations are met. When templates merge tables on the fly, they use :func:`~urbansim_templates.utils.merge_tables()`. 63 | 64 | These utility functions work with any Orca table that meets the schema expectations, whether or not it was created with a template. 65 | 66 | 67 | Compatibility with Orca 68 | ~~~~~~~~~~~~~~~~~~~~~~~ 69 | 70 | From Orca's perspective, tables set up using the :mod:`~urbansim_templates.data.LoadTable` template are equivalent to tables that are registered using ``orca.add_table()`` or the ``@orca.table`` decorator. Technically, they are ``orca.TableFuncWrapper`` objects. 71 | 72 | Unlike the templates, Orca relies on user-specified "`broadcast `__" relationships to perform automatic merging of tables. :mod:`~urbansim_templates.data.LoadTable` does not register any broadcasts, because they're not needed if tables follow the schema rules above. So if you use these tables in non-template model steps, you may need to add broadcasts separately. 73 | 74 | 75 | Data loading API 76 | ---------------- 77 | 78 | .. autoclass:: urbansim_templates.data.LoadTable 79 | :members: 80 | 81 | 82 | Column creation API 83 | ------------------- 84 | 85 | .. autoclass:: urbansim_templates.data.ColumnFromExpression 86 | :members: 87 | 88 | .. autoclass:: urbansim_templates.data.ExpressionSettings 89 | :members: 90 | 91 | Data output API 92 | --------------- 93 | 94 | .. autoclass:: urbansim_templates.data.SaveTable 95 | :members: 96 | -------------------------------------------------------------------------------- /docs/source/development.rst: -------------------------------------------------------------------------------- 1 | Development guide 2 | ================= 3 | 4 | Below are some strategies we've come up with for the templates. Technical contribution guidelines are in the `Github repo `__. 5 | 6 | 7 | Design patterns for templates 8 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 9 | 10 | A ModelManager-compliant template is a Python class that conforms to the following spec: 11 | 12 | 1. can save itself to a dict using a method named ``to_dict()`` 13 | 2. can rebuild itself from a dict using a method named ``from_dict()`` 14 | 3. can execute a configured version of itself using a method named ``run()`` 15 | 4. accepts parameters ``name`` (str) and ``tags`` (list of str) 16 | 5. uses the ``@modelmanager.template`` decorator 17 | 18 | Running a configured model step executes logic and typically saves output to Orca. 19 | 20 | Templates should try to use parameter names that are consistent or harmonious with other templates. 21 | 22 | Tables and columns of data should be input as named Orca objects. Other inputs that are hard to store as strings (like callables) should probably be input as Orca objects as well; we're still working on a solution for this. 23 | 24 | All template inputs should be accepted either as constructor parameters or object properties, if feasible: 25 | 26 | .. code-block:: python 27 | 28 | m1 = TemplateStep(foo='yes') 29 | m2 = TemplateStep() 30 | m2.foo = 'yes' 31 | 32 | It's fine for templates to require interactive configuration, like fitting a statistical model. Also fine to require these actions to be completed before the model step can be saved or run. 33 | 34 | Ideally, users should be able to edit object properties and re-run the interactive components whenever they like. Changes will not be saved until a an object is re-registered with ModelManager. 35 | 36 | Lightweight intermediate outputs like summary tables and fitted parameters should be saved in an object's dictionary representation if feasible. 37 | 38 | Bigger intermediate outputs, like pickled copies of full fitted models, can be automatically stored to disk by providing an entry named ``supplemental_objects`` in a model's dictionary representation. This should contain a list of dicts, each of which has parameters ``name`` (str), ``content`` (obj), and ``content_type`` (str, e.g. 'pickle'). 39 | 40 | To avoid dependency bloat, the default installation only includes the dependencies required for core model management and the most commonly used templates. Templates using additional libraries should check whether they're installed before fitting or running a model step, and provide helpful error messages if not. 41 | -------------------------------------------------------------------------------- /docs/source/getting-started.rst: -------------------------------------------------------------------------------- 1 | Getting started 2 | =============== 3 | 4 | Intro 5 | ----- 6 | 7 | UrbanSim Templates is a Python library that provides building blocks for Orca-based simulation models. It's part of the `Urban Data Science Toolkit `__ (UDST). 8 | 9 | The library contains templates for common types of model steps, plus a tool called ModelManager that runs as an extension to the `Orca `__ task orchestrator. ModelManager can register template-based model steps with the orchestrator, save them to disk, and automatically reload them for future sessions. The package was developed to make it easier to set up new simulation models — model step templates reduce the need for custom code and make settings more portable between models. 10 | 11 | UrbanSim Templates is `hosted on Github `__ with a BSD 3-Clause open source license. The code repository includes some material not found in this documentation: a `change log `__, a `contributor's guide `__, and instructions for `running the tests `__, `updating the documentation `__, and `creating a new release `__. 12 | 13 | Another useful resource is the `issues `__ and `pull requests `__ on Github, which include detailed feature proposals and other discussions. 14 | 15 | UrbanSim Templates was created in 2018 by Sam Maurer (maurer@urbansim.com), who remains the lead developer, with contributions from Paul Waddell, Max Gardner, Eddie Janowicz, Arezoo Besharati Zadeh, Xavier Gitiaux, and others. 16 | 17 | 18 | Installation 19 | ------------ 20 | 21 | UrbanSim Templates is currently tested with Python versions 3.6, 3.7, 3.8, and 3.9. 22 | 23 | Production releases 24 | ~~~~~~~~~~~~~~~~~~~ 25 | 26 | UrbanSim Templates can be installed using the Pip or Conda package managers. 27 | 28 | .. code-block:: python 29 | 30 | pip install urbansim_templates 31 | 32 | .. code-block:: python 33 | 34 | conda install urbansim_templates --channel conda-forge 35 | 36 | Dependencies include `NumPy `__, `Pandas `__, and `Statsmodels `__, plus two other UDST libraries: `Orca `__ and `ChoiceModels `__. These will be included automatically when you install UrbanSim Templates. 37 | 38 | Certain less-commonly-used templates require additional packages: currently, `PyLogit `__ and `Scikit-learn `__. You'll need to install these separately to use the associated templates. 39 | 40 | When new production releases of UrbanSim Templates come out, you can upgrade like this: 41 | 42 | .. code-block:: python 43 | 44 | pip install urbansim_templates --upgrade 45 | 46 | .. code-block:: python 47 | 48 | conda update urbansim_templates --channel conda-forge 49 | 50 | 51 | Developer pre-releases 52 | ~~~~~~~~~~~~~~~~~~~~~~ 53 | 54 | Developer pre-releases of UrbanSim Templates can be installed using the Github URL. These versions sometimes require having a developer release of `ChoiceModels `__ as well. Information about the developer releases can be found in Github `pull requests `__. 55 | 56 | .. code-block:: python 57 | 58 | pip install git+git://github.com/udst/choicemodels.git 59 | pip install git+git://github.com/udst/urbansim_templates.git 60 | 61 | You can use the same command to upgrade. 62 | 63 | 64 | Cloning the repository 65 | ~~~~~~~~~~~~~~~~~~~~~~ 66 | 67 | If you'll be modifying the code, you can install UrbanSim Templates by cloning the Github repository: 68 | 69 | .. code-block:: python 70 | 71 | git clone https://github.com/udst/urbansim_templates.git 72 | cd urbansim_templates 73 | python setup.py develop 74 | 75 | Update it with ``git pull``. 76 | 77 | 78 | Basic usage 79 | ----------- 80 | 81 | Initializing ModelManager 82 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 83 | 84 | To get started, import and initialize ModelManager. This makes sure there's a directory set up to store any template-based model steps that are generated within the script or notebook. 85 | 86 | .. code-block:: python 87 | 88 | from urbansim_templates import modelmanager 89 | 90 | modelmanager.initialize() 91 | 92 | The default file location is a ``configs`` folder located in the current working directory; you can provide an alternate path if needed. If ModelManager finds existing saved objects in the directory, it will load them and register them with Orca. 93 | 94 | .. note:: 95 | It can be helpful to add a cell to your notebook that reports which version of UrbanSim Templates is installed, particularly if you're using development releases! 96 | 97 | .. code-block:: python 98 | 99 | In [2]: import urbansim_templates 100 | print(urbansim_templates.__version__) 101 | 102 | Out[2]: '0.2' 103 | 104 | 105 | Creating a model step 106 | ~~~~~~~~~~~~~~~~~~~~~ 107 | 108 | Now we can choose a template and use it to build a model step. The templates are Python classes that contain logic for setting up and running different kinds of model logic — currently focusing on OLS regressions and discrete choice models. 109 | 110 | A template takes a variety of arguments, which can either be passed as parameters or set as object properties after an instance of the template is created. 111 | 112 | .. code-block:: python 113 | 114 | from urbansim_templates.models import OLSRegressionStep 115 | 116 | m = OLSRegressionStep() 117 | m.name = 'price-prediction' 118 | m.tables = 'buildings' 119 | m.model_expression = 'sale_price ~ residential_sqft' 120 | 121 | This sets up ``m`` as an instance of the OLS regression template. The ``tables`` and ``model_expression`` arguments refer to data that needs to be registered separately with Orca. So let's load the data before trying to estimate the model: 122 | 123 | .. code-block:: python 124 | 125 | import orca 126 | import pandas as pd 127 | 128 | url = "https://raw.githubusercontent.com/UDST/urbansim_templates/dev/examples/data/buildings-demo.csv" 129 | df = pd.read_csv(url).dropna() 130 | orca.add_table('buildings', df) 131 | 132 | 133 | Fitting the statistical model 134 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 135 | 136 | Now we can fit the building price model: 137 | 138 | .. code-block:: python 139 | 140 | m.fit() 141 | 142 | This will print a summary table describing the estimation results. 143 | 144 | Now that we have a fitted model, we can use it to predict sale prices for other buildings. UrbanSim forecasting models consist of many interconnected steps like this, iteratively predicting real estate prices, household moves, construction, and other urban dynamics. 145 | 146 | 147 | Registering the step 148 | ~~~~~~~~~~~~~~~~~~~~ 149 | 150 | Now we can register the model step: 151 | 152 | .. code-block:: python 153 | 154 | modelmanager.register(m) 155 | 156 | ModelManager parses the step, saves a copy to disk, and registers a runnable version of it as a standard Orca step, so that it can be invoked as part of a sequence of other steps: 157 | 158 | .. code-block:: python 159 | 160 | orca.run(['price-prediction', 'household-moves', 'residential-development']) 161 | 162 | In real usage, some additional parameters would be set to specify which data to use for prediction, and where to store the output. 163 | 164 | 165 | Making changes 166 | ~~~~~~~~~~~~~~ 167 | 168 | ModelManager also includes some interactive functionality. Previously registered steps can be retrieved as template objects, which can be modified and re-registered as needed. This also works with model steps loaded from disk. 169 | 170 | .. code-block:: python 171 | 172 | modelmanager.list_steps() 173 | 174 | m2 = modelmanager.get_step('price-prediction') 175 | ... 176 | 177 | m2.name = 'better-price-prediction' 178 | modelmanager.register(m2) 179 | modelmanager.remove_step('price-prediction') 180 | 181 | If you take a look in the ``configs`` folder, you'll see a yaml file representing the saved model step. It includes the settings we provided, plus the fitted coefficients and anything else generated by the internal logic of the template. 182 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. UrbanSim Templates documentation master file, created by 2 | sphinx-quickstart on Fri Jan 4 15:26:06 2019. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | UrbanSim Templates 7 | ================== 8 | 9 | UrbanSim Templates provides building blocks for Orca-based simulation models. It's part of the `Urban Data Science Toolkit `__ (UDST). 10 | 11 | The library contains templates for common types of model steps, plus a tool called ModelManager that runs as an extension to the `Orca `__ task orchestrator. ModelManager can register template-based model steps with the orchestrator, save them to disk, and automatically reload them for future sessions. 12 | 13 | v0.2.dev9, released July 22, 2019 14 | 15 | 16 | Contents 17 | -------- 18 | 19 | .. toctree:: 20 | :maxdepth: 2 21 | 22 | getting-started 23 | modelmanager 24 | model-steps 25 | data-templates 26 | utilities 27 | development 28 | -------------------------------------------------------------------------------- /docs/source/model-steps.rst: -------------------------------------------------------------------------------- 1 | Model step template APIs 2 | ======================== 3 | 4 | The following templates are included in the core package. ModelManager can also work with templates defined elsewhere, as long as they follow the specifications described in the design guidelines. 5 | 6 | 7 | OLS Regression 8 | -------------- 9 | 10 | .. autoclass:: urbansim_templates.models.OLSRegressionStep 11 | :members: 12 | 13 | 14 | Binary Logit 15 | ------------ 16 | 17 | .. autoclass:: urbansim_templates.models.BinaryLogitStep 18 | :members: 19 | 20 | 21 | Small Multinomial Logit 22 | ----------------------- 23 | 24 | .. autoclass:: urbansim_templates.models.SmallMultinomialLogitStep 25 | :members: 26 | 27 | 28 | Large Multinomial Logit 29 | ----------------------- 30 | 31 | .. autoclass:: urbansim_templates.models.LargeMultinomialLogitStep 32 | :members: 33 | 34 | 35 | Segmented Large Multinomial Logit 36 | --------------------------------- 37 | 38 | .. autoclass:: urbansim_templates.models.SegmentedLargeMultinomialLogitStep 39 | :members: 40 | 41 | 42 | Template Step parent class 43 | -------------------------- 44 | 45 | .. autoclass:: urbansim_templates.models.TemplateStep 46 | :members: -------------------------------------------------------------------------------- /docs/source/modelmanager.rst: -------------------------------------------------------------------------------- 1 | ModelManager API 2 | ================ 3 | 4 | ModelManager runs as an extension to the `Orca `__ task orchestrator. ModelManager can register template-based model steps with the orchestrator, save them to disk, and automatically reload them for future sessions. 5 | 6 | The recommended way to load ModelManager is like this:: 7 | 8 | from urbansim_templates import modelmanager 9 | 10 | modelmanager.initialize() 11 | 12 | 13 | Core operations 14 | --------------- 15 | 16 | .. automodule:: urbansim_templates.modelmanager 17 | :members: initialize, register, list_steps, get_step, remove_step 18 | 19 | 20 | Internal functionality 21 | ---------------------- 22 | 23 | These functions are the building blocks of ModelManager. You probably won't need to use 24 | them directly, but they could be useful for debugging or for extending ModelManager's 25 | functionality. 26 | 27 | .. automodule:: urbansim_templates.modelmanager 28 | :members: template, build_step, save_step_to_disk, load_supplemental_object, 29 | save_supplemental_object, remove_supplemental_object, get_config_dir -------------------------------------------------------------------------------- /docs/source/utilities.rst: -------------------------------------------------------------------------------- 1 | Shared utilities 2 | ================ 3 | 4 | The utilities are mainly helper functions for templates. 5 | 6 | 7 | General template tools API 8 | -------------------------- 9 | 10 | .. automodule:: urbansim_templates.shared 11 | :members: CoreTemplateSettings 12 | 13 | 14 | Column output tools API 15 | ----------------------- 16 | 17 | .. automodule:: urbansim_templates.shared 18 | :members: OutputColumnSettings, register_column 19 | 20 | 21 | Table schemas and merging API 22 | ----------------------------- 23 | 24 | .. automodule:: urbansim_templates.utils 25 | :members: validate_table, validate_all_tables, merge_tables 26 | 27 | 28 | Other helper functions API 29 | -------------------------- 30 | 31 | .. automodule:: urbansim_templates.utils 32 | :members: all_cols, cols_in_expression, get_data, get_df, trim_cols, to_list, update_column, update_name 33 | 34 | 35 | Spec validation API 36 | ------------------- 37 | 38 | .. automodule:: urbansim_templates.utils 39 | :members: validate_template 40 | 41 | 42 | Version management API 43 | ---------------------- 44 | 45 | .. automodule:: urbansim_templates.utils 46 | :members: parse_version, version_greater_or_equal 47 | -------------------------------------------------------------------------------- /examples/UrbanSim-Templates-demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "fixed-tenant", 6 | "metadata": {}, 7 | "source": [ 8 | "# UrbanSim Templates demo\n", 9 | "\n", 10 | "Sam Maurer, Feb 2021\n", 11 | "\n", 12 | "### Background\n", 13 | "\n", 14 | "[UrbanSim](https://github.com/udst/urbansim) is a platform for modeling land use in cities. It runs in Python and uses the [Orca](https://github.com/udst/orca) task orchestration system. \n", 15 | "\n", 16 | "Orca breaks a model into \"steps\", Python functions that can be assembled on the fly into linear or cyclical pipelines. (Typically each step is a statistical model capturing one aspect of the dynamics being studied.) Orca is designed for workflows like city simulation where the data representing a model's state is so large that it needs to be managed outside the task graph. Steps refer to tables and columns of data by name rather than passing the data directly.\n", 17 | "\n", 18 | "UrbanSim [Templates](https://github.com/udst/urbansim_templates) is a library that provides automated building blocks for Orca-based models. The templates were developed to reduce the need for custom code and improve the portability of model components.\n", 19 | "\n", 20 | "Currently we have templates for (a) regression, (b) binary logit, (c) multinomial logit estimated with [PyLogit](https://github.com/timothyb0912/pylogit) (best choice for flexible utility expressions), and (d) multinomial logit estimated with [ChoiceModels](https://github.com/udst/choicemodels) (best choice for sampling of interchangeable alternatives).\n", 21 | "\n", 22 | "### Documentation\n", 23 | "\n", 24 | "Full UrbanSim Templates documentation: https://udst.github.io/urbansim_templates/\n", 25 | "\n", 26 | "### Installation\n", 27 | "\n", 28 | "You can install `orca` and `urbansim_templates` with Pip or from Conda Forge." 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 1, 34 | "id": "hearing-rescue", 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "name": "stdout", 39 | "output_type": "stream", 40 | "text": [ 41 | "1.2.1\n" 42 | ] 43 | } 44 | ], 45 | "source": [ 46 | "import pandas as pd\n", 47 | "\n", 48 | "print(pd.__version__)" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 2, 54 | "id": "ultimate-durham", 55 | "metadata": {}, 56 | "outputs": [ 57 | { 58 | "name": "stdout", 59 | "output_type": "stream", 60 | "text": [ 61 | "1.5.4\n" 62 | ] 63 | } 64 | ], 65 | "source": [ 66 | "import orca\n", 67 | "\n", 68 | "print(orca.__version__)" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 3, 74 | "id": "taken-membership", 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "name": "stdout", 79 | "output_type": "stream", 80 | "text": [ 81 | "0.2.dev9\n" 82 | ] 83 | } 84 | ], 85 | "source": [ 86 | "import urbansim_templates\n", 87 | "\n", 88 | "print(urbansim_templates.__version__)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 4, 94 | "id": "independent-macedonia", 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "# This makes the notebook output clearer\n", 99 | "import warnings\n", 100 | "warnings.simplefilter(action='ignore', category=FutureWarning)" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "id": "ultimate-partner", 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "id": "stylish-problem", 114 | "metadata": {}, 115 | "source": [ 116 | "### Setting up ModelManager\n", 117 | "\n", 118 | "[ModelManager](https://udst.github.io/urbansim_templates/modelmanager.html) is part of the Templates library. It's an extension to Orca for saving and loading template-based model steps. \n", 119 | "\n", 120 | "By default it will look for a folder named `configs` in your current working directory, where it will read and save yaml representations of model steps. If there are already model steps there, the corresponding template classes need to be loaded before initializing ModelManager." 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 5, 126 | "id": "fatal-welsh", 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "name": "stdout", 131 | "output_type": "stream", 132 | "text": [ 133 | "No yaml files found in path 'configs'\n" 134 | ] 135 | } 136 | ], 137 | "source": [ 138 | "from urbansim_templates.models import OLSRegressionStep\n", 139 | "from urbansim_templates import modelmanager\n", 140 | "\n", 141 | "modelmanager.initialize()" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "id": "acute-savings", 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "id": "legitimate-square", 155 | "metadata": {}, 156 | "source": [ 157 | "### Setting up data\n", 158 | "\n", 159 | "We'll load a DataFrame and register it with Orca, so that our statistical models can refer to it." 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 6, 165 | "id": "polyphonic-pointer", 166 | "metadata": {}, 167 | "outputs": [ 168 | { 169 | "data": { 170 | "text/plain": [ 171 | "482" 172 | ] 173 | }, 174 | "execution_count": 6, 175 | "metadata": {}, 176 | "output_type": "execute_result" 177 | } 178 | ], 179 | "source": [ 180 | "df = pd.read_csv('data/buildings-demo.csv').dropna()\n", 181 | "len(df)" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 7, 187 | "id": "quarterly-rugby", 188 | "metadata": {}, 189 | "outputs": [ 190 | { 191 | "data": { 192 | "text/plain": [ 193 | "" 194 | ] 195 | }, 196 | "execution_count": 7, 197 | "metadata": {}, 198 | "output_type": "execute_result" 199 | } 200 | ], 201 | "source": [ 202 | "orca.add_table('buildings', df)" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": 8, 208 | "id": "broken-manchester", 209 | "metadata": {}, 210 | "outputs": [ 211 | { 212 | "data": { 213 | "text/html": [ 214 | "
\n", 215 | "\n", 228 | "\n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | "
building_idparcel_iddevelopment_type_idimprovement_valueresidential_unitsresidential_sqftsqft_per_unitnon_residential_sqftbuilding_sqftres_price_per_sqftstoriesyear_builtsale_pricesale_yearbuilding_type_id
3732871210.0113931393.000.00.00000012008670250.02008.01
497426611116580.0110181018.001018.0474.35053411946703000.02007.01
6117166261457526.0136933693.003693.0124.8244321199895000.01996.01
1015742822195050.0111061106.001106.0448.07426111957675000.02005.01
13187434441166000.0113541354.001354.0411.5064011195118500.02006.01
\n", 342 | "
" 343 | ], 344 | "text/plain": [ 345 | " building_id parcel_id development_type_id improvement_value \\\n", 346 | "3 7 328712 1 0.0 \n", 347 | "4 9 742661 1 116580.0 \n", 348 | "6 11 716626 1 457526.0 \n", 349 | "10 15 742822 1 95050.0 \n", 350 | "13 18 743444 1 166000.0 \n", 351 | "\n", 352 | " residential_units residential_sqft sqft_per_unit non_residential_sqft \\\n", 353 | "3 1 1393 1393.0 0 \n", 354 | "4 1 1018 1018.0 0 \n", 355 | "6 1 3693 3693.0 0 \n", 356 | "10 1 1106 1106.0 0 \n", 357 | "13 1 1354 1354.0 0 \n", 358 | "\n", 359 | " building_sqft res_price_per_sqft stories year_built sale_price \\\n", 360 | "3 0.0 0.000000 1 2008 670250.0 \n", 361 | "4 1018.0 474.350534 1 1946 703000.0 \n", 362 | "6 3693.0 124.824432 1 1998 95000.0 \n", 363 | "10 1106.0 448.074261 1 1957 675000.0 \n", 364 | "13 1354.0 411.506401 1 1951 18500.0 \n", 365 | "\n", 366 | " sale_year building_type_id \n", 367 | "3 2008.0 1 \n", 368 | "4 2007.0 1 \n", 369 | "6 1996.0 1 \n", 370 | "10 2005.0 1 \n", 371 | "13 2006.0 1 " 372 | ] 373 | }, 374 | "execution_count": 8, 375 | "metadata": {}, 376 | "output_type": "execute_result" 377 | } 378 | ], 379 | "source": [ 380 | "orca.get_table('buildings').to_frame().head()" 381 | ] 382 | }, 383 | { 384 | "cell_type": "code", 385 | "execution_count": null, 386 | "id": "cheap-sugar", 387 | "metadata": {}, 388 | "outputs": [], 389 | "source": [] 390 | }, 391 | { 392 | "cell_type": "markdown", 393 | "id": "international-ordering", 394 | "metadata": {}, 395 | "source": [ 396 | "### Fitting a model\n", 397 | "\n", 398 | "Now we can choose a [template](https://udst.github.io/urbansim_templates/model-steps.html) and use it to fit a model." 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "execution_count": 9, 404 | "id": "studied-federation", 405 | "metadata": {}, 406 | "outputs": [], 407 | "source": [ 408 | "from urbansim_templates.models import OLSRegressionStep\n", 409 | "\n", 410 | "m = OLSRegressionStep()\n", 411 | "m.name = 'price-prediction'\n", 412 | "m.tables = 'buildings'\n", 413 | "m.model_expression = 'np.log1p(res_price_per_sqft) ~ non_residential_sqft>0 + year_built<1960'" 414 | ] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "execution_count": 10, 419 | "id": "checked-addition", 420 | "metadata": {}, 421 | "outputs": [ 422 | { 423 | "name": "stdout", 424 | "output_type": "stream", 425 | "text": [ 426 | " OLS Regression Results \n", 427 | "========================================================================================\n", 428 | "Dep. Variable: np.log1p(res_price_per_sqft) R-squared: 0.398\n", 429 | "Model: OLS Adj. R-squared: 0.395\n", 430 | "Method: Least Squares F-statistic: 158.1\n", 431 | "Date: Tue, 09 Feb 2021 Prob (F-statistic): 1.93e-53\n", 432 | "Time: 12:02:09 Log-Likelihood: -598.98\n", 433 | "No. Observations: 482 AIC: 1204.\n", 434 | "Df Residuals: 479 BIC: 1216.\n", 435 | "Df Model: 2 \n", 436 | "Covariance Type: nonrobust \n", 437 | "====================================================================================================\n", 438 | " coef std err t P>|t| [0.025 0.975]\n", 439 | "----------------------------------------------------------------------------------------------------\n", 440 | "Intercept 5.5567 0.047 118.870 0.000 5.465 5.649\n", 441 | "non_residential_sqft > 0[T.True] -5.6513 0.320 -17.642 0.000 -6.281 -5.022\n", 442 | "year_built < 1960[T.True] 0.2206 0.082 2.693 0.007 0.060 0.382\n", 443 | "==============================================================================\n", 444 | "Omnibus: 511.938 Durbin-Watson: 1.611\n", 445 | "Prob(Omnibus): 0.000 Jarque-Bera (JB): 21647.939\n", 446 | "Skew: -4.895 Prob(JB): 0.00\n", 447 | "Kurtosis: 34.338 Cond. No. 8.89\n", 448 | "==============================================================================\n", 449 | "\n", 450 | "Notes:\n", 451 | "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" 452 | ] 453 | } 454 | ], 455 | "source": [ 456 | "m.fit()" 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "execution_count": null, 462 | "id": "concerned-argument", 463 | "metadata": {}, 464 | "outputs": [], 465 | "source": [] 466 | }, 467 | { 468 | "cell_type": "markdown", 469 | "id": "widespread-cache", 470 | "metadata": {}, 471 | "source": [ 472 | "### Registering the step\n", 473 | "\n", 474 | "When we're happy with the specification, we can \"register\" the step with ModelManager. This saves a copy to disk and also passes it to Orca so it can be run as part of a sequence of other steps for validation or simulation." 475 | ] 476 | }, 477 | { 478 | "cell_type": "code", 479 | "execution_count": 11, 480 | "id": "thick-steam", 481 | "metadata": {}, 482 | "outputs": [ 483 | { 484 | "name": "stdout", 485 | "output_type": "stream", 486 | "text": [ 487 | "Saving 'price-prediction.yaml': /Users/maurer/Dropbox/Git-imac/udst/urbansim_templates/examples/configs\n", 488 | "Registering model step 'price-prediction'\n" 489 | ] 490 | } 491 | ], 492 | "source": [ 493 | "modelmanager.register(m)" 494 | ] 495 | }, 496 | { 497 | "cell_type": "code", 498 | "execution_count": null, 499 | "id": "egyptian-newport", 500 | "metadata": {}, 501 | "outputs": [], 502 | "source": [] 503 | }, 504 | { 505 | "cell_type": "markdown", 506 | "id": "bound-rapid", 507 | "metadata": {}, 508 | "source": [ 509 | "### Making changes\n", 510 | "\n", 511 | "Previously registered steps can be retrieved, modified, and re-registered as needed." 512 | ] 513 | }, 514 | { 515 | "cell_type": "code", 516 | "execution_count": 12, 517 | "id": "portable-supplier", 518 | "metadata": {}, 519 | "outputs": [ 520 | { 521 | "data": { 522 | "text/plain": [ 523 | "[{'name': 'price-prediction', 'template': 'OLSRegressionStep', 'tags': []}]" 524 | ] 525 | }, 526 | "execution_count": 12, 527 | "metadata": {}, 528 | "output_type": "execute_result" 529 | } 530 | ], 531 | "source": [ 532 | "modelmanager.list_steps()" 533 | ] 534 | }, 535 | { 536 | "cell_type": "code", 537 | "execution_count": 13, 538 | "id": "married-delay", 539 | "metadata": {}, 540 | "outputs": [], 541 | "source": [ 542 | "m2 = modelmanager.get_step('price-prediction')" 543 | ] 544 | }, 545 | { 546 | "cell_type": "code", 547 | "execution_count": 14, 548 | "id": "responsible-logic", 549 | "metadata": {}, 550 | "outputs": [ 551 | { 552 | "name": "stdout", 553 | "output_type": "stream", 554 | "text": [ 555 | "Saving 'better-price-prediction.yaml': /Users/maurer/Dropbox/Git-imac/udst/urbansim_templates/examples/configs\n", 556 | "Registering model step 'better-price-prediction'\n" 557 | ] 558 | } 559 | ], 560 | "source": [ 561 | "m2.name = 'better-price-prediction'\n", 562 | "# here you can edit the specification and re-fit, etc.\n", 563 | "\n", 564 | "modelmanager.register(m2)" 565 | ] 566 | }, 567 | { 568 | "cell_type": "code", 569 | "execution_count": 15, 570 | "id": "productive-wyoming", 571 | "metadata": {}, 572 | "outputs": [ 573 | { 574 | "name": "stdout", 575 | "output_type": "stream", 576 | "text": [ 577 | "Removing 'better-price-prediction' and 'better-price-prediction.yaml'\n" 578 | ] 579 | } 580 | ], 581 | "source": [ 582 | "modelmanager.remove_step('better-price-prediction')" 583 | ] 584 | }, 585 | { 586 | "cell_type": "code", 587 | "execution_count": null, 588 | "id": "inclusive-annual", 589 | "metadata": {}, 590 | "outputs": [], 591 | "source": [] 592 | }, 593 | { 594 | "cell_type": "code", 595 | "execution_count": null, 596 | "id": "martial-fortune", 597 | "metadata": {}, 598 | "outputs": [], 599 | "source": [] 600 | } 601 | ], 602 | "metadata": { 603 | "kernelspec": { 604 | "display_name": "Python [conda env:template-demo] *", 605 | "language": "python", 606 | "name": "conda-env-template-demo-py" 607 | }, 608 | "language_info": { 609 | "codemirror_mode": { 610 | "name": "ipython", 611 | "version": 3 612 | }, 613 | "file_extension": ".py", 614 | "mimetype": "text/x-python", 615 | "name": "python", 616 | "nbconvert_exporter": "python", 617 | "pygments_lexer": "ipython3", 618 | "version": "3.8.5" 619 | } 620 | }, 621 | "nbformat": 4, 622 | "nbformat_minor": 5 623 | } 624 | -------------------------------------------------------------------------------- /examples/configs/README.md: -------------------------------------------------------------------------------- 1 | This folder stores configs that are generated by the demo notebook. -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | # requirements for development and testing 2 | 3 | coverage 4 | coveralls 5 | pytest 6 | sphinx 7 | sphinx_rtd_theme -------------------------------------------------------------------------------- /requirements-extras.txt: -------------------------------------------------------------------------------- 1 | # additional requirements for less-used templates 2 | 3 | pylogit >= 0.2 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name='urbansim_templates', 5 | version='0.2.dev9', 6 | description='UrbanSim extension for managing model steps', 7 | author='UrbanSim Inc.', 8 | author_email='info@urbansim.com', 9 | url='https://github.com/udst/urbansim_templates', 10 | classifiers=[ 11 | 'Programming Language :: Python :: 2', 12 | 'Programming Language :: Python :: 2.7', 13 | 'Programming Language :: Python :: 3', 14 | 'Programming Language :: Python :: 3.5', 15 | 'Programming Language :: Python :: 3.6', 16 | 'Programming Language :: Python :: 3.7', 17 | 'Programming Language :: Python :: 3.8', 18 | 'License :: OSI Approved :: BSD License' 19 | ], 20 | packages=find_packages(exclude=['*.tests']), 21 | install_requires=[ 22 | 'choicemodels >= 0.2.2.dev1', 23 | 'numpy >= 1.14', 24 | 'orca >= 1.4', 25 | 'pandas >= 0.23', 26 | 'patsy >= 0.4', 27 | 'statsmodels >= 0.8, <0.11; python_version <"3.6"', 28 | 'statsmodels >= 0.8; python_version >="3.6"', 29 | 'urbansim >= 3.1' 30 | ] 31 | ) 32 | -------------------------------------------------------------------------------- /tests/.gitignore: -------------------------------------------------------------------------------- 1 | .cache/* 2 | .coverage 3 | __pycache__/* -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | Run tests from this folder using `pytest *.py -s`. -------------------------------------------------------------------------------- /tests/configs/README.md: -------------------------------------------------------------------------------- 1 | This folder stores configs that are temporarily generated during tests. -------------------------------------------------------------------------------- /tests/data/README.md: -------------------------------------------------------------------------------- 1 | This folder stores data that is temporarily generated during tests. -------------------------------------------------------------------------------- /tests/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | filterwarnings = 3 | ignore:::orca 4 | ignore:::urbansim 5 | ignore:::pandas 6 | ignore:::past 7 | ignore:::prettytable 8 | ignore:::statsmodels 9 | ignore:::yaml -------------------------------------------------------------------------------- /tests/test_binary_logit.py: -------------------------------------------------------------------------------- 1 | import orca 2 | import numpy as np 3 | import pandas as pd 4 | import pytest 5 | 6 | from urbansim_templates import modelmanager 7 | from urbansim_templates.models import BinaryLogitStep 8 | from urbansim_templates.utils import validate_template 9 | 10 | 11 | @pytest.fixture 12 | def orca_session(): 13 | d1 = {'a': np.random.random(100), 14 | 'b': np.random.randint(2, size=100)} 15 | 16 | obs = pd.DataFrame(d1) 17 | orca.add_table('obs', obs) 18 | 19 | 20 | def test_template_validity(): 21 | """ 22 | Run the template through the standard validation check. 23 | 24 | """ 25 | assert validate_template(BinaryLogitStep) 26 | 27 | 28 | def test_binary_logit(orca_session): 29 | """ 30 | For now this just tests that the code runs. 31 | 32 | """ 33 | modelmanager.initialize() 34 | 35 | m = BinaryLogitStep() 36 | m.tables = 'obs' 37 | m.model_expression = 'b ~ a' 38 | 39 | m.fit() 40 | 41 | m.name = 'binary-test' 42 | modelmanager.register(m) 43 | 44 | modelmanager.initialize() 45 | m = modelmanager.get_step('binary-test') 46 | 47 | modelmanager.remove_step('binary-test') -------------------------------------------------------------------------------- /tests/test_column_expression.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import pytest 4 | 5 | import orca 6 | 7 | from urbansim_templates import modelmanager 8 | from urbansim_templates.data import ColumnFromExpression, ExpressionSettings 9 | from urbansim_templates.utils import validate_template 10 | 11 | 12 | def test_expression_settings_persistence(): 13 | """ 14 | Confirm ExpressionSettings properties persist through the constructor, to_dict(), 15 | and from_dict(). 16 | 17 | """ 18 | d = {'table': 'tab', 'expression': 'a + b + c'} 19 | obj = ExpressionSettings(table = 'tab', expression = 'a + b + c') 20 | 21 | assert(d == obj.to_dict() == ExpressionSettings.from_dict(d).to_dict()) 22 | 23 | 24 | def test_legacy_data_loader(orca_session): 25 | """ 26 | Check that loading a saved dict with the legacy format works. 27 | 28 | """ 29 | d = { 30 | 'name': 'n', 31 | 'tags': ['a', 'b'], 32 | 'autorun': False, 33 | 'column_name': 'col', 34 | 'table': 'tab', 35 | 'expression': 'abc', 36 | 'data_type': 'int', 37 | 'missing_values': 5, 38 | 'cache': True, 39 | 'cache_scope': 'step'} 40 | 41 | c = ColumnFromExpression.from_dict(d) 42 | assert(c.meta.name == d['name']) 43 | assert(c.meta.tags == d['tags']) 44 | assert(c.meta.autorun == d['autorun']) 45 | assert(c.data.table == d['table']) 46 | assert(c.data.expression == d['expression']) 47 | assert(c.output.column_name == d['column_name']) 48 | assert(c.output.data_type == d['data_type']) 49 | assert(c.output.missing_values == d['missing_values']) 50 | assert(c.output.cache == d['cache']) 51 | assert(c.output.cache_scope == d['cache_scope']) 52 | 53 | 54 | @pytest.fixture 55 | def orca_session(): 56 | """ 57 | Set up a clean Orca and ModelManager session, with a data table. 58 | 59 | """ 60 | orca.clear_all() 61 | modelmanager.initialize() 62 | 63 | d1 = {'id': np.arange(5), 64 | 'a': np.random.random(5), 65 | 'b': np.random.choice(np.arange(20), size=5)} 66 | 67 | df = pd.DataFrame(d1).set_index('id') 68 | orca.add_table('obs', df) 69 | 70 | 71 | # def test_template_validity(): 72 | # """ 73 | # Check template conforms to basic spec. 74 | # 75 | # """ 76 | # assert validate_template(ColumnFromExpression) 77 | 78 | 79 | def test_missing_colname(orca_session): 80 | """ 81 | Missing column_name should raise a ValueError. 82 | 83 | """ 84 | c = ColumnFromExpression() 85 | c.data.table = 'tab' 86 | c.data.expression = 'a' 87 | 88 | try: 89 | c.run() 90 | except ValueError as e: 91 | print(e) 92 | return 93 | 94 | pytest.fail() 95 | 96 | 97 | def test_missing_table(orca_session): 98 | """ 99 | Missing table should raise a ValueError. 100 | 101 | """ 102 | c = ColumnFromExpression() 103 | c.data.expression = 'a' 104 | c.output.column_name = 'col' 105 | 106 | try: 107 | c.run() 108 | except ValueError as e: 109 | print(e) 110 | return 111 | 112 | pytest.fail() 113 | 114 | 115 | def test_missing_expression(orca_session): 116 | """ 117 | Missing expression should raise a ValueError. 118 | 119 | """ 120 | c = ColumnFromExpression() 121 | c.data.table = 'tab' 122 | c.output.column_name = 'col' 123 | 124 | try: 125 | c.run() 126 | except ValueError as e: 127 | print(e) 128 | return 129 | 130 | pytest.fail() 131 | 132 | 133 | def test_expression(orca_session): 134 | """ 135 | Check that column is created and expression evaluated correctly. 136 | 137 | """ 138 | c = ColumnFromExpression() 139 | c.data.table = 'obs' 140 | c.data.expression = 'a * 5 + sqrt(b)' 141 | c.output.column_name = 'c' 142 | 143 | c.run() 144 | 145 | val1 = orca.get_table('obs').get_column('c') 146 | df = orca.get_table('obs').to_frame() 147 | val2 = df.a * 5 + np.sqrt(df.b) 148 | assert(val1.equals(val2)) 149 | 150 | 151 | def test_modelmanager_registration(orca_session): 152 | """ 153 | Check that modelmanager registration and auto-run work as expected. 154 | 155 | """ 156 | c = ColumnFromExpression() 157 | c.data.table = 'obs' 158 | c.data.expression = 'a + b' 159 | c.output.column_name = 'c' 160 | 161 | modelmanager.register(c) 162 | modelmanager.remove_step(c.meta.name) 163 | assert('c' in orca.get_table('obs').columns) 164 | 165 | 166 | def test_expression_with_standalone_columns(orca_session): 167 | """ 168 | Check that expression can assemble data from stand-alone columns that are not part 169 | of the core DataFrame wrapped by a table. 170 | 171 | """ 172 | c = ColumnFromExpression() 173 | c.data.table = 'obs' 174 | c.data.expression = 'a + b' 175 | c.output.column_name = 'c' 176 | 177 | modelmanager.register(c) 178 | modelmanager.remove_step(c.meta.name) 179 | 180 | d = ColumnFromExpression() 181 | d.data.table = 'obs' 182 | d.data.expression = 'a + c' 183 | d.output.column_name = 'd' 184 | 185 | d.run() 186 | assert('d' in orca.get_table('obs').columns) 187 | 188 | -------------------------------------------------------------------------------- /tests/test_data_load.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import pandas as pd 5 | import pytest 6 | 7 | import orca 8 | 9 | from urbansim_templates import modelmanager 10 | from urbansim_templates.data import LoadTable 11 | from urbansim_templates.utils import validate_template 12 | 13 | 14 | @pytest.fixture 15 | def orca_session(): 16 | """ 17 | Set up a clean Orca session and initialize ModelManager. 18 | 19 | """ 20 | orca.clear_all() 21 | modelmanager.initialize() 22 | 23 | 24 | @pytest.fixture 25 | def data(request): 26 | """ 27 | Create some data files on disk. 28 | 29 | """ 30 | d1 = {'building_id': np.arange(10), 31 | 'price': 1e6*np.random.random(10)} 32 | 33 | bldg = pd.DataFrame(d1).set_index('building_id') 34 | bldg.to_csv('data/buildings.csv') 35 | bldg.to_csv('data/buildings.csv.gz', compression='gzip') 36 | bldg.to_hdf('data/buildings.hdf', key='buildings') 37 | 38 | def teardown(): 39 | os.remove('data/buildings.csv') 40 | os.remove('data/buildings.csv.gz') 41 | os.remove('data/buildings.hdf') 42 | 43 | request.addfinalizer(teardown) 44 | 45 | 46 | def test_template_validity(): 47 | """ 48 | Run the templates through the standard validation check. 49 | 50 | """ 51 | assert validate_template(LoadTable) 52 | 53 | 54 | def test_property_persistence(orca_session): 55 | """ 56 | Test persistence of properties across registration, saving, and reloading. 57 | 58 | """ 59 | t = LoadTable() 60 | t.table = 'buildings' 61 | t.source_type = 'csv' 62 | t.path = 'data/buildings.csv' 63 | t.csv_index_cols = 'building_id' 64 | t.extra_settings = {'make_data_awesome': True} # unfortunately not a valid setting 65 | t.cache = False 66 | t.cache_scope = 'iteration' 67 | t.copy_col = False 68 | t.name = 'buildings-csv' 69 | t.tags = ['awesome', 'data'] 70 | t.autorun = False 71 | 72 | d1 = t.to_dict() 73 | modelmanager.register(t) 74 | modelmanager.initialize() 75 | d2 = modelmanager.get_step(t.name).to_dict() 76 | 77 | assert d1 == d2 78 | modelmanager.remove_step(t.name) 79 | 80 | 81 | def test_csv(orca_session, data): 82 | """ 83 | Test loading data from a CSV file. 84 | 85 | """ 86 | t = LoadTable() 87 | t.table = 'buildings' 88 | t.source_type = 'csv' 89 | t.path = 'data/buildings.csv' 90 | t.csv_index_cols = 'building_id' 91 | 92 | assert 'buildings' not in orca.list_tables() 93 | 94 | modelmanager.register(t) 95 | assert 'buildings' in orca.list_tables() 96 | _ = orca.get_table('buildings').to_frame() 97 | 98 | modelmanager.initialize() 99 | assert 'buildings' in orca.list_tables() 100 | 101 | modelmanager.remove_step(t.name) 102 | 103 | 104 | def test_hdf(orca_session, data): 105 | """ 106 | Test loading data from an HDF file. 107 | 108 | """ 109 | t = LoadTable() 110 | t.table = 'buildings' 111 | t.source_type = 'hdf' 112 | t.path = 'data/buildings.hdf' 113 | 114 | assert 'buildings' not in orca.list_tables() 115 | 116 | modelmanager.register(t) 117 | assert 'buildings' in orca.list_tables() 118 | _ = orca.get_table('buildings').to_frame() 119 | 120 | modelmanager.initialize() 121 | assert 'buildings' in orca.list_tables() 122 | 123 | modelmanager.remove_step(t.name) 124 | 125 | 126 | def test_extra_settings(orca_session, data): 127 | """ 128 | Test loading data with extra settings, e.g. for compressed files. 129 | 130 | """ 131 | t = LoadTable() 132 | t.table = 'buildings' 133 | t.source_type = 'csv' 134 | t.path = 'data/buildings.csv.gz' 135 | t.csv_index_cols = 'building_id' 136 | t.extra_settings = {'compression': 'gzip'} 137 | 138 | assert 'buildings' not in orca.list_tables() 139 | 140 | modelmanager.register(t) 141 | assert 'buildings' in orca.list_tables() 142 | _ = orca.get_table('buildings').to_frame() 143 | 144 | modelmanager.initialize() 145 | assert 'buildings' in orca.list_tables() 146 | 147 | modelmanager.remove_step(t.name) 148 | 149 | 150 | def test_without_autorun(orca_session, data): 151 | """ 152 | Confirm that disabling autorun works. 153 | 154 | """ 155 | t = LoadTable() 156 | t.table = 'buildings' 157 | t.source_type = 'csv' 158 | t.path = 'data/buildings.csv' 159 | t.csv_index_cols = 'building_id' 160 | t.autorun = False 161 | 162 | modelmanager.register(t) 163 | assert 'buildings' not in orca.list_tables() 164 | 165 | modelmanager.remove_step(t.name) 166 | 167 | 168 | -------------------------------------------------------------------------------- /tests/test_data_save.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import pandas as pd 5 | import pytest 6 | 7 | import orca 8 | 9 | from urbansim_templates import modelmanager 10 | from urbansim_templates.data import SaveTable 11 | from urbansim_templates.utils import update_column, validate_template 12 | 13 | 14 | @pytest.fixture 15 | def orca_session(): 16 | """ 17 | Set up a clean Orca session and initialize ModelManager. 18 | 19 | """ 20 | orca.clear_all() 21 | modelmanager.initialize() 22 | 23 | 24 | @pytest.fixture 25 | def data(): 26 | """ 27 | Create a data table. 28 | 29 | """ 30 | d1 = {'building_id': np.arange(10), 31 | 'price': (1e6*np.random.random(10)).astype(int)} 32 | 33 | df = pd.DataFrame(d1).set_index('building_id') 34 | 35 | orca.add_table('buildings', df) 36 | 37 | 38 | def test_template_validity(): 39 | """ 40 | Run the templates through the standard validation check. 41 | 42 | """ 43 | assert validate_template(SaveTable) 44 | 45 | 46 | def test_property_persistence(orca_session): 47 | """ 48 | Test persistence of properties across registration, saving, and reloading. 49 | 50 | """ 51 | t = SaveTable() 52 | t.table = 'buildings' 53 | t.columns = ['window_panes', 'number_of_chimneys'] 54 | t.filters = 'number_of_chimneys > 15' 55 | t.output_type = 'csv' 56 | t.path = 'data/buildings.csv' 57 | t.extra_settings = {'make_data_awesome': True} 58 | t.name = 'save-buildings-csv' 59 | t.tags = ['awesome', 'chimneys'] 60 | 61 | d1 = t.to_dict() 62 | modelmanager.register(t) 63 | modelmanager.initialize() 64 | d2 = modelmanager.get_step(t.name).to_dict() 65 | 66 | assert d1 == d2 67 | modelmanager.remove_step(t.name) 68 | 69 | 70 | def test_csv(orca_session, data): 71 | """ 72 | Test saving data to a CSV file. 73 | 74 | """ 75 | t = SaveTable() 76 | t.table = 'buildings' 77 | t.output_type = 'csv' 78 | t.path = 'data/buildings.csv' 79 | 80 | t.run() 81 | 82 | df = pd.read_csv(t.path).set_index('building_id') 83 | assert(df.equals(orca.get_table(t.table).to_frame())) 84 | 85 | os.remove(t.path) 86 | 87 | 88 | def test_hdf(orca_session, data): 89 | """ 90 | Test saving data to an HDF file. 91 | 92 | """ 93 | t = SaveTable() 94 | t.table = 'buildings' 95 | t.output_type = 'hdf' 96 | t.path = 'data/buildings.h5' 97 | 98 | t.run() 99 | 100 | df = pd.read_hdf(t.path) 101 | assert(df.equals(orca.get_table(t.table).to_frame())) 102 | 103 | os.remove(t.path) 104 | 105 | 106 | def test_columns(orca_session, data): 107 | """ 108 | Test requesting specific columns. 109 | 110 | """ 111 | update_column(table = 'buildings', 112 | column = 'price2', 113 | data = (1e6*np.random.random(10)).astype(int)) 114 | 115 | t = SaveTable() 116 | t.table = 'buildings' 117 | t.columns = 'price2' 118 | t.output_type = 'csv' 119 | t.path = 'data/buildings.csv' 120 | 121 | t.run() 122 | 123 | df = pd.read_csv(t.path).set_index('building_id') 124 | assert(list(df.columns) == ['price2']) 125 | 126 | 127 | def test_filters(orca_session, data): 128 | """ 129 | Test applying data filters before table is saved. 130 | 131 | """ 132 | t = SaveTable() 133 | t.table = 'buildings' 134 | t.filters = 'price < 200000' 135 | t.output_type = 'csv' 136 | t.path = 'data/buildings.csv' 137 | 138 | t.run() 139 | 140 | df = pd.read_csv(t.path).set_index('building_id') 141 | assert(len(df) < 10) 142 | 143 | os.remove(t.path) 144 | 145 | 146 | def test_extra_settings(orca_session, data): 147 | """ 148 | """ 149 | pass 150 | 151 | 152 | def test_dynamic_paths(orca_session): 153 | """ 154 | Test inserting run id, model iteration, or timestamp into path. 155 | 156 | """ 157 | t = SaveTable() 158 | t.path = '%RUN%-%ITER%' 159 | 160 | assert(t.get_dynamic_filepath() == '0-0') 161 | 162 | orca.add_injectable('run_id', 5) 163 | orca.add_injectable('iter_var', 3) 164 | 165 | assert(t.get_dynamic_filepath() == '5-3') 166 | 167 | t.path = '%TS%' 168 | s = t.get_dynamic_filepath() 169 | assert(len(s) == 15) 170 | 171 | 172 | -------------------------------------------------------------------------------- /tests/test_large_multinomial_logit.py: -------------------------------------------------------------------------------- 1 | import orca 2 | import numpy as np 3 | import pandas as pd 4 | import pytest 5 | 6 | from choicemodels import MultinomialLogitResults 7 | 8 | from urbansim_templates import modelmanager 9 | from urbansim_templates.models import LargeMultinomialLogitStep 10 | from urbansim_templates.utils import validate_template 11 | 12 | 13 | @pytest.fixture 14 | def orca_session(): 15 | d1 = {'oid': np.arange(10), 16 | 'obsval': np.random.random(10), 17 | 'choice': np.random.choice(np.arange(20), size=10)} 18 | 19 | d2 = {'aid': np.arange(20), 20 | 'altval': np.random.random(20)} 21 | 22 | obs = pd.DataFrame(d1).set_index('oid') 23 | orca.add_table('obs', obs) 24 | 25 | alts = pd.DataFrame(d2).set_index('aid') 26 | orca.add_table('alts', alts) 27 | 28 | 29 | def test_template_validity(): 30 | """ 31 | Run the template through the standard validation check. 32 | 33 | """ 34 | assert validate_template(LargeMultinomialLogitStep) 35 | 36 | 37 | def test_observation_sampling(orca_session): 38 | modelmanager.initialize() 39 | 40 | m = LargeMultinomialLogitStep() 41 | m.choosers = 'obs' 42 | m.alternatives = 'alts' 43 | m.choice_column = 'choice' 44 | m.model_expression = 'obsval + altval' 45 | 46 | m.fit() 47 | assert(len(m.mergedchoicetable.to_frame()) == 200) 48 | 49 | m.chooser_sample_size = 5 50 | m.fit() 51 | assert(len(m.mergedchoicetable.to_frame()) == 100) 52 | 53 | m.name = 'mnl-test' 54 | modelmanager.register(m) 55 | 56 | modelmanager.initialize() 57 | m = modelmanager.get_step('mnl-test') 58 | assert(m.chooser_sample_size == 5) 59 | 60 | modelmanager.remove_step('mnl-test') 61 | 62 | 63 | @pytest.fixture 64 | def data(): 65 | num_obs = 100 66 | num_alts = 120 67 | 68 | d1 = {'oid': np.arange(num_obs), 69 | 'obsval': np.random.random(num_obs), 70 | 'choice': np.random.choice(np.arange(num_alts), size=num_obs)} 71 | 72 | d2 = {'aid': np.arange(num_alts), 73 | 'altval': np.random.random(num_alts)} 74 | 75 | obs = pd.DataFrame(d1).set_index('oid') 76 | orca.add_table('obs', obs) 77 | 78 | alts = pd.DataFrame(d2).set_index('aid') 79 | orca.add_table('alts', alts) 80 | 81 | 82 | @pytest.fixture 83 | def m(data): 84 | """ 85 | Build a fitted model. 86 | 87 | """ 88 | m = LargeMultinomialLogitStep() 89 | m.choosers = 'obs' 90 | m.alternatives = 'alts' 91 | m.choice_column = 'choice' 92 | m.model_expression = 'obsval + altval' 93 | m.alt_sample_size = 10 94 | 95 | m.fit() 96 | return m 97 | 98 | 99 | def test_property_persistence(m): 100 | """ 101 | Test persistence of properties across registration, saving, and reloading. 102 | 103 | """ 104 | m.fit() 105 | m.name = 'my-model' 106 | m.tags = ['tag1'] 107 | m.chooser_filters = 'filters1' 108 | m.chooser_sample_size = 100 109 | m.alt_filters = 'filter2' 110 | m.out_choosers = 'choosers2' 111 | m.out_alternatives = 'alts2' 112 | m.out_column = 'choices' 113 | m.out_chooser_filters = 'filters3' 114 | m.out_alt_filters = 'filters4' 115 | m.constrained_choices = True 116 | m.alt_capacity = 'cap' 117 | m.chooser_size = 'size' 118 | m.max_iter = 17 119 | 120 | d1 = m.to_dict() 121 | modelmanager.initialize() 122 | modelmanager.register(m) 123 | modelmanager.initialize() 124 | d2 = modelmanager.get_step('my-model').to_dict() 125 | 126 | assert d1 == d2 127 | modelmanager.remove_step('my-model') 128 | 129 | 130 | def test_simulation_unconstrained(m): 131 | """ 132 | Test simulation chooser filters with unconstrained choices. 133 | 134 | """ 135 | obs = orca.get_table('obs').to_frame() 136 | obs.loc[:24, 'choice'] = -1 137 | orca.add_table('obs', obs) 138 | 139 | m.out_chooser_filters = 'choice == -1' 140 | m.run() 141 | 142 | assert len(m.choices) == 25 143 | 144 | obs = orca.get_table('obs').to_frame() 145 | assert sum(obs.choice == -1) == 0 146 | assert obs.loc[:24, 'choice'].equals(m.choices) 147 | 148 | 149 | def test_simulation_single_occupancy(m): 150 | """ 151 | Test simulation of single-occupancy choices. 152 | 153 | """ 154 | m.constrained_choices = True 155 | m.run() 156 | 157 | obs = orca.get_table('obs').to_frame() 158 | assert len(obs) == len(obs.choice.unique()) 159 | 160 | 161 | def test_simulation_constrained(m): 162 | """ 163 | Test simulation of choices with explicit capacities and sizes. 164 | 165 | """ 166 | obs = orca.get_table('obs').to_frame() 167 | obs.loc[:,'choice'] = -1 168 | obs['size'] = np.random.choice([1,2], size=len(obs)) 169 | orca.add_table('obs', obs) 170 | 171 | alts = orca.get_table('alts').to_frame() 172 | alts['cap'] = np.random.choice([1,2,3], size=len(alts)) 173 | orca.add_table('alts', alts) 174 | 175 | m.constrained_choices = True 176 | m.alt_capacity = 'cap' 177 | m.chooser_size = 'size' 178 | m.run() 179 | 180 | obs = orca.get_table('obs').to_frame() 181 | assert all(~obs.choice.isin([-1])) 182 | 183 | 184 | def test_simulation_no_valid_choosers(m): 185 | """ 186 | If there are no valid choosers after applying filters, simulation should exit. 187 | 188 | """ 189 | m.out_chooser_filters = 'choice == -1' 190 | m.run() 191 | 192 | 193 | def test_simulation_no_valid_alternatives(m): 194 | """ 195 | If there are no valid alternatives after applying filters, simulation should exit. 196 | 197 | """ 198 | m.out_alt_filters = 'altval == -1' 199 | m.run() 200 | 201 | 202 | def test_output_column_autocreation(m): 203 | """ 204 | Test on-the-fly creation of the output column. 205 | 206 | """ 207 | m.out_column = 'potato_chips' 208 | m.run() 209 | 210 | assert('potato_chips' in orca.get_table('obs').columns) 211 | assert(m.choices.equals(orca.get_table('obs').to_frame()['potato_chips'])) 212 | 213 | 214 | def test_diagnostic_attributes(data): 215 | """ 216 | Test that diagnostic attributes are available when expected. 217 | 218 | """ 219 | m = LargeMultinomialLogitStep() 220 | m.choosers = 'obs' 221 | m.alternatives = 'alts' 222 | m.choice_column = 'choice' 223 | m.model_expression = 'obsval + altval' 224 | m.alt_sample_size = 10 225 | 226 | assert(m.model is None) 227 | assert(m.mergedchoicetable is None) 228 | assert(m.probabilities is None) 229 | assert(m.choices is None) 230 | 231 | m.fit() 232 | 233 | assert(isinstance(m.model, MultinomialLogitResults)) 234 | 235 | len_mct = len(m.mergedchoicetable.to_frame()) 236 | len_obs_alts = len(orca.get_table(m.choosers).to_frame()) * m.alt_sample_size 237 | 238 | assert(len_mct == len_obs_alts) 239 | 240 | name = m.name 241 | modelmanager.register(m) 242 | modelmanager.initialize() 243 | m = modelmanager.get_step(name) 244 | 245 | assert(isinstance(m.model, MultinomialLogitResults)) 246 | 247 | m.run() 248 | 249 | len_mct = len(m.mergedchoicetable.to_frame()) 250 | len_probs = len(m.probabilities) 251 | len_choices = len(m.choices) 252 | len_obs = len(orca.get_table(m.choosers).to_frame()) 253 | len_obs_alts = len_obs * m.alt_sample_size 254 | 255 | assert(len_mct == len_obs_alts) 256 | assert(len_probs == len_obs_alts) 257 | assert(len_choices == len_obs) 258 | 259 | modelmanager.remove_step(name) 260 | 261 | 262 | def test_simulation_join_key_as_filter(m): 263 | """ 264 | This tests that it's possible to use a join key as a both a data filter for one of 265 | the tables, and as a choice column for the model. 266 | 267 | This came up because MergedChoiceTable doesn't allow the observations and 268 | alternatives to have any column names in common -- the rationale is to maintain data 269 | traceability by avoiding any of-the-fly renaming or dropped columns. 270 | 271 | In the templates, in order to support things like using 'households.building_id' as a 272 | filter column and 'buildings.building_id' as a choice column, we apply the filters 273 | and then drop columns that are no longer needed before merging the tables. 274 | 275 | """ 276 | obs = orca.get_table('obs') 277 | obs['aid'] = obs.get_column('choice') 278 | 279 | m.out_choosers = 'obs' 280 | m.out_chooser_filters = 'aid > 50' 281 | m.out_alternatives = 'alts' 282 | m.out_column = 'aid' 283 | 284 | m.run() 285 | 286 | -------------------------------------------------------------------------------- /tests/test_regression.py: -------------------------------------------------------------------------------- 1 | import orca 2 | import numpy as np 3 | import pandas as pd 4 | import pytest 5 | 6 | from urbansim_templates import modelmanager 7 | from urbansim_templates.models import OLSRegressionStep 8 | from urbansim_templates.utils import validate_template 9 | 10 | 11 | @pytest.fixture 12 | def orca_session(): 13 | d1 = {'a': np.random.random(100), 14 | 'b': np.random.random(100)} 15 | 16 | obs = pd.DataFrame(d1) 17 | orca.add_table('obs', obs) 18 | 19 | 20 | def test_template_validity(): 21 | """ 22 | Run the template through the standard validation check. 23 | 24 | """ 25 | assert validate_template(OLSRegressionStep) 26 | 27 | 28 | def test_ols(orca_session): 29 | """ 30 | For now this just tests that the code runs. 31 | 32 | """ 33 | modelmanager.initialize() 34 | 35 | m = OLSRegressionStep() 36 | m.tables = 'obs' 37 | m.model_expression = 'a ~ b' 38 | 39 | m.fit() 40 | 41 | m.name = 'ols-test' 42 | modelmanager.register(m) 43 | 44 | modelmanager.initialize() 45 | m = modelmanager.get_step('ols-test') 46 | 47 | modelmanager.remove_step('ols-test') 48 | 49 | 50 | def test_simulation(orca_session): 51 | """ 52 | Test that predicted values are correctly written to Orca. 53 | 54 | """ 55 | modelmanager.initialize() 56 | 57 | m = OLSRegressionStep() 58 | m.tables = 'obs' 59 | m.model_expression = 'a ~ b' 60 | m.fit() 61 | 62 | m.out_column = 'a_predicted' 63 | m.run() 64 | 65 | assert orca.get_table('obs').to_frame()['a_predicted'].equals(m.predicted_values) 66 | 67 | 68 | def test_out_transform(orca_session): 69 | """ 70 | Test transformation of the predicted values. 71 | 72 | """ 73 | modelmanager.initialize() 74 | 75 | m = OLSRegressionStep() 76 | m.tables = 'obs' 77 | m.model_expression = 'a ~ b' 78 | m.fit() 79 | 80 | m.out_column = 'a_predicted' 81 | m.out_transform = 'np.exp' 82 | m.run() 83 | 84 | predictions = m.predicted_values.apply(np.exp) 85 | 86 | assert orca.get_table('obs').to_frame()['a_predicted'].equals(predictions) 87 | 88 | -------------------------------------------------------------------------------- /tests/test_segmented_large_multinomial_logit.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import pytest 4 | 5 | import orca 6 | from urbansim.models.util import apply_filter_query 7 | 8 | from urbansim_templates import modelmanager 9 | from urbansim_templates.models import SegmentedLargeMultinomialLogitStep 10 | from urbansim_templates.utils import get_data, validate_template 11 | 12 | 13 | @pytest.fixture 14 | def orca_session(): 15 | """ 16 | Set up a clean Orca session with a couple of data tables. 17 | 18 | """ 19 | d1 = {'oid': np.arange(100), 20 | 'group': np.random.choice(['A','B','C'], size=100), 21 | 'int_group': np.random.choice([3,4], size=100), 22 | 'obsval': np.random.random(100), 23 | 'choice': np.random.choice(np.arange(20), size=100)} 24 | 25 | d2 = {'aid': np.arange(20), 26 | 'altval': np.random.random(20)} 27 | 28 | obs = pd.DataFrame(d1).set_index('oid') 29 | orca.add_table('obs', obs) 30 | 31 | alts = pd.DataFrame(d2).set_index('aid') 32 | orca.add_table('alts', alts) 33 | 34 | 35 | @pytest.fixture 36 | def orca_session_alts_as_list(): 37 | """ 38 | Set up a clean Orca session with a couple of data tables. 39 | 40 | """ 41 | d1 = {'oid': np.arange(100), 42 | 'group': np.random.choice(['A', 'B', 'C'], size=100), 43 | 'int_group': np.random.choice([3, 4], size=100), 44 | 'obsval': np.random.random(100), 45 | 'choice': np.random.choice(np.arange(20), size=100)} 46 | 47 | d2 = {'aid': np.arange(20), 48 | 'altval': np.random.random(20)} 49 | 50 | d3 = {'aid': np.arange(20), 51 | 'altval_2': np.random.random(20)} 52 | 53 | obs = pd.DataFrame(d1).set_index('oid') 54 | orca.add_table('obs', obs) 55 | 56 | d2_df = pd.DataFrame(d2).set_index('aid') 57 | orca.add_table('d2', d2_df) 58 | 59 | d3_df = pd.DataFrame(d3).set_index('aid') 60 | orca.add_table('d3', d3_df) 61 | 62 | orca.broadcast('d3', 'd2', cast_index=True, onto_index=True) 63 | 64 | 65 | @pytest.fixture 66 | def m_alts_as_list(orca_session_alts_as_list): 67 | """ 68 | Set up a partially configured model step with multiple 69 | tables of alternatives 70 | """ 71 | m = SegmentedLargeMultinomialLogitStep() 72 | m.defaults.choosers = 'obs' 73 | m.defaults.alternatives = ['d2', 'd3'] 74 | m.defaults.choice_column = 'choice' 75 | m.defaults.model_expression = 'obsval + altval + altval_2' 76 | m.segmentation_column = 'group' 77 | return m 78 | 79 | 80 | @pytest.fixture 81 | def m(orca_session): 82 | """ 83 | Set up a partially configured model step. 84 | 85 | """ 86 | m = SegmentedLargeMultinomialLogitStep() 87 | m.defaults.choosers = 'obs' 88 | m.defaults.alternatives = 'alts' 89 | m.defaults.choice_column = 'choice' 90 | m.defaults.model_expression = 'obsval + altval' 91 | m.segmentation_column = 'group' 92 | return m 93 | 94 | 95 | def test_template_validity(): 96 | """ 97 | Run the template through the standard validation check. 98 | 99 | """ 100 | assert validate_template(SegmentedLargeMultinomialLogitStep) 101 | 102 | 103 | def test_basic_operation(m): 104 | """ 105 | Test basic operation of the template. 106 | 107 | """ 108 | m.fit_all() 109 | m.to_dict() 110 | assert len(m.submodels) == 3 111 | 112 | def test_basic_operation_alts_as_list(m_alts_as_list): 113 | """ 114 | Test basic operation of the template. 115 | 116 | """ 117 | m = m_alts_as_list 118 | m.fit_all() 119 | m.to_dict() 120 | assert len(m.submodels) == 3 121 | 122 | def test_basic_operation(m): 123 | """ 124 | Test basic operation of the template. 125 | 126 | """ 127 | m.fit_all() 128 | m.to_dict() 129 | assert len(m.submodels) == 3 130 | 131 | 132 | def test_numeric_segments(m): 133 | """ 134 | Test support for using ints as categorical variables. 135 | 136 | """ 137 | m.segmentation_column = 'int_group' 138 | m.build_submodels() 139 | assert len(m.submodels) == 2 140 | 141 | 142 | def test_chooser_filters(m): 143 | """ 144 | Test that the default chooser filters generate the correct data subset. 145 | 146 | """ 147 | m.defaults.chooser_filters = "group != 'A'" 148 | m.build_submodels() 149 | assert len(m.submodels) == 2 150 | 151 | m.defaults.chooser_filters = ["group != 'A'", "group != 'B'"] 152 | m.build_submodels() 153 | assert len(m.submodels) == 1 154 | 155 | 156 | def test_alternative_filters(m): 157 | """ 158 | Test that the default alternative filters generate the correct data subset. 159 | 160 | """ 161 | m.defaults.alt_filters = 'aid < 5' 162 | 163 | df = orca.get_table(m.defaults.choosers).to_frame() 164 | len1 = len(df.loc[df.choice < 5]) 165 | len2 = len(m.get_segmentation_column()) 166 | 167 | assert len1 == len2 168 | 169 | 170 | def test_alternative_filters_for_alts_as_list(m_alts_as_list): 171 | """ 172 | Test that the default alternative filters generate the correct data subset. 173 | 174 | """ 175 | m = m_alts_as_list 176 | m.defaults.alt_filters = 'altval_2 < 0.5' 177 | 178 | m.build_submodels() 179 | for k, v in m.submodels.items(): 180 | alts = get_data(tables = v.alternatives, filters = v.alt_filters) 181 | assert alts['altval_2'].max() < 0.5 182 | 183 | 184 | def test_submodel_filters(m): 185 | """ 186 | Test that submodel filters generate the correct data subset. 187 | 188 | """ 189 | m.build_submodels() 190 | 191 | df = orca.get_table(m.defaults.choosers).to_frame() 192 | len1 = len(apply_filter_query(df.loc[df.group == 'A'], m.defaults.chooser_filters)) 193 | len2 = len(apply_filter_query(df, m.submodels['A'].chooser_filters)) 194 | 195 | assert len1 == len2 196 | 197 | 198 | def test_property_persistence(m): 199 | """ 200 | Test persistence of properties across registration, saving, and reloading. 201 | 202 | """ 203 | m.name = 'test' 204 | m.tags = ['one','two'] 205 | m.fit_all() 206 | d1 = m.to_dict() 207 | modelmanager.initialize() 208 | modelmanager.register(m) 209 | modelmanager.initialize() 210 | d2 = modelmanager.get_step('test').to_dict() 211 | assert d1 == d2 212 | modelmanager.remove_step('test') 213 | 214 | 215 | def test_filter_generation(m): 216 | """ 217 | Test additional cases of generating submodel filters. 218 | 219 | """ 220 | m.defaults.chooser_filters = 'obsval > 0.5' 221 | m.build_submodels() 222 | assert m.submodels['A'].chooser_filters == ['obsval > 0.5', "group == 'A'"] 223 | 224 | m.defaults.chooser_filters = ['obsval > 0.5', 'obsval < 0.9'] 225 | m.build_submodels() 226 | assert m.submodels['A'].chooser_filters == \ 227 | ['obsval > 0.5', 'obsval < 0.9', "group == 'A'"] 228 | 229 | 230 | @pytest.fixture 231 | def d(): 232 | d = {'choosers': 'a', 233 | 'alternatives': 'b', 234 | 'model_expression': 'c', 235 | 'choice_column': 'd', 236 | 'chooser_sample_size': 'f', 237 | 'alt_sample_size': 'h', 238 | 'out_choosers': 'i', 239 | 'out_alternatives': 'j', 240 | 'out_column': 'k', 241 | 'out_chooser_filters': 'l', 242 | 'out_alt_filters': 'm'} 243 | return d 244 | 245 | 246 | def test_initial_propagation_of_defaults(m): 247 | """ 248 | Test that submodels receive properties of the defaults object. 249 | 250 | """ 251 | d = m.defaults.to_dict() 252 | 253 | m.build_submodels() 254 | 255 | d2 = m.submodels['A'].to_dict() 256 | for k, v in d.items(): 257 | if k != 'chooser_filters': 258 | assert d2[k] == v 259 | 260 | 261 | def test_subsequent_propagation_of_defaults(m, d): 262 | """ 263 | Test that submodels are updated correctly when the defaults are subsequently changed. 264 | 265 | """ 266 | m.build_submodels() 267 | 268 | for k, v in d.items(): 269 | setattr(m.defaults, k, v) 270 | 271 | d2 = m.submodels['A'].to_dict() 272 | for k, v in d.items(): 273 | assert d2[k] == v 274 | 275 | # these should NOT be passed to the submodels 276 | m.defaults.chooser_filters = 'test' 277 | assert m.submodels['A'].chooser_filters != 'test' 278 | 279 | m.defaults.alt_filters = 'test' 280 | assert m.submodels['A'].alt_filters != 'test' 281 | 282 | 283 | def test_independence_of_submodels(m, d): 284 | """ 285 | Test that updating one submodel does not change others. 286 | 287 | """ 288 | m.build_submodels() 289 | 290 | for k, v in d.items(): 291 | setattr(m.submodels['A'], k, v) 292 | 293 | d2 = m.submodels['B'].to_dict() 294 | for k, v in d.items(): 295 | assert d2[k] != v 296 | 297 | 298 | -------------------------------------------------------------------------------- /tests/test_shared_core.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import pytest 4 | 5 | from urbansim_templates.shared import CoreTemplateSettings 6 | 7 | 8 | def test_property_persistence(): 9 | """ 10 | Confirm CoreTemplateSettings properties persist through to_dict() and from_dict(). 11 | 12 | """ 13 | obj = CoreTemplateSettings() 14 | obj.name = 'name' 15 | obj.tags = ['tag1', 'tag2'] 16 | obj.notes = 'notes' 17 | obj.autorun = True 18 | obj.template = 'CoolNewTemplate' 19 | obj.template_version = '0.1.dev0' 20 | 21 | d = obj.to_dict() 22 | print(d) 23 | 24 | obj2 = CoreTemplateSettings.from_dict(d) 25 | assert(obj2.to_dict() == d) 26 | 27 | -------------------------------------------------------------------------------- /tests/test_shared_output_column.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import numpy as np 4 | import pandas as pd 5 | import pytest 6 | 7 | import orca 8 | 9 | from urbansim_templates.shared import OutputColumnSettings, register_column 10 | 11 | 12 | def test_property_persistence(): 13 | """ 14 | Confirm OutputColumnSettings properties persist through to_dict() and from_dict(). 15 | 16 | """ 17 | obj = OutputColumnSettings() 18 | obj.column_name = 'column' 19 | obj.table = 'table' 20 | obj.data_type = 'int32' 21 | obj.missing_values = 5 22 | obj.cache = True 23 | obj.cache_scope = 'iteration' 24 | 25 | d = obj.to_dict() 26 | print(d) 27 | 28 | obj2 = OutputColumnSettings.from_dict(d) 29 | assert(obj2.to_dict() == d) 30 | 31 | 32 | # Tests for register_column().. 33 | 34 | @pytest.fixture 35 | def orca_session(): 36 | """ 37 | Set up a clean Orca session, with a data table. 38 | 39 | """ 40 | orca.clear_all() 41 | 42 | df = pd.DataFrame({'a': [0.1, 1.33, 2.4]}, index=[1,2,3]) 43 | orca.add_table('tab', df) 44 | 45 | 46 | def test_column_registration(orca_session): 47 | """ 48 | Confirm column registration works. 49 | 50 | """ 51 | series = pd.Series([4,5,6], index=[1,2,3]) 52 | 53 | def build_column(): 54 | return series 55 | 56 | settings = OutputColumnSettings(column_name='col', table='tab') 57 | register_column(build_column, settings) 58 | 59 | assert(orca.get_table('tab').get_column('col').equals(series)) 60 | 61 | 62 | def test_filling_missing_values(orca_session): 63 | """ 64 | Confirm that filling missing values works. 65 | 66 | """ 67 | series1 = pd.Series([4.0, np.nan, 6.0], index=[1,2,3]) 68 | series2 = pd.Series([4.0, 5.0, 6.0], index=[1,2,3]) 69 | 70 | def build_column(): 71 | return series1 72 | 73 | settings = OutputColumnSettings(column_name='col', table='tab', missing_values=5) 74 | register_column(build_column, settings) 75 | 76 | assert(orca.get_table('tab').get_column('col').equals(series2)) 77 | 78 | 79 | def test_casting_data_type(orca_session): 80 | """ 81 | Confirm that filling missing values works. 82 | 83 | """ 84 | series1 = pd.Series([4.0, 5.0, 6.0], index=[1,2,3]) 85 | series2 = pd.Series([4, 5, 6], index=[1,2,3]) 86 | 87 | def build_column(): 88 | return series1 89 | 90 | settings = OutputColumnSettings(column_name='col', table='tab', data_type='int') 91 | register_column(build_column, settings) 92 | 93 | assert(orca.get_table('tab').get_column('col').equals(series2)) 94 | 95 | 96 | -------------------------------------------------------------------------------- /tests/test_small_multinomial_logit.py: -------------------------------------------------------------------------------- 1 | import orca 2 | import numpy as np 3 | import pandas as pd 4 | import pytest 5 | from collections import OrderedDict 6 | 7 | from urbansim_templates import modelmanager 8 | from urbansim_templates.models import SmallMultinomialLogitStep 9 | from urbansim_templates.utils import validate_template 10 | 11 | 12 | @pytest.fixture 13 | def orca_session(): 14 | d1 = {'id': np.arange(100), 15 | 'building_id': np.arange(100), 16 | 'a': np.random.random(100), 17 | 'choice': np.random.randint(3, size=100)} 18 | 19 | d2 = {'building_id': np.arange(100), 20 | 'b': np.random.random(100)} 21 | 22 | households = pd.DataFrame(d1).set_index('id') 23 | orca.add_table('households', households) 24 | 25 | buildings = pd.DataFrame(d2).set_index('building_id') 26 | orca.add_table('buildings', buildings) 27 | 28 | orca.broadcast(cast='buildings', onto='households', 29 | cast_index=True, onto_on='building_id') 30 | 31 | 32 | def test_template_validity(): 33 | """ 34 | Run the template through the standard validation check. 35 | 36 | """ 37 | assert validate_template(SmallMultinomialLogitStep) 38 | 39 | 40 | def test_small_mnl(orca_session): 41 | """ 42 | Test that the code runs, and that the model_expression is always available. 43 | 44 | """ 45 | modelmanager.initialize() 46 | 47 | m = SmallMultinomialLogitStep() 48 | m.tables = ['households', 'buildings'] 49 | m.choice_column = 'choice' 50 | m.model_expression = OrderedDict([ 51 | ('intercept', [1,2]), ('a', [0,2]), ('b', [0,2])]) 52 | 53 | m.fit() 54 | assert(m.model_expression is not None) 55 | 56 | print(m.model_expression) 57 | 58 | m.name = 'small-mnl-test' 59 | modelmanager.register(m) 60 | assert(m.model_expression is not None) 61 | 62 | print(m.model_expression) 63 | 64 | # TEST SIMULATION 65 | m.out_column = 'simulated_choice' 66 | 67 | m.run() 68 | print(orca.get_table('households').to_frame()) 69 | 70 | modelmanager.initialize() 71 | m = modelmanager.get_step('small-mnl-test') 72 | assert(m.model_expression is not None) 73 | 74 | print(m.model_expression) 75 | 76 | modelmanager.remove_step('small-mnl-test') -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import pytest 4 | 5 | import orca 6 | 7 | from urbansim_templates import utils 8 | 9 | 10 | def test_parse_version(): 11 | assert utils.parse_version('0.1.0.dev0') == (0, 1, 0, 0) 12 | assert utils.parse_version('0.115.3') == (0, 115, 3, None) 13 | assert utils.parse_version('3.1.dev7') == (3, 1, 0, 7) 14 | assert utils.parse_version('5.4') == (5, 4, 0, None) 15 | 16 | def test_version_greater_or_equal(): 17 | assert utils.version_greater_or_equal('2.0', '0.1.1') == True 18 | assert utils.version_greater_or_equal('0.1.1', '2.0') == False 19 | assert utils.version_greater_or_equal('2.1', '2.0.1') == True 20 | assert utils.version_greater_or_equal('2.0.1', '2.1') == False 21 | assert utils.version_greater_or_equal('1.1.3', '1.1.2') == True 22 | assert utils.version_greater_or_equal('1.1.2', '1.1.3') == False 23 | assert utils.version_greater_or_equal('1.1.3', '1.1.3') == True 24 | assert utils.version_greater_or_equal('1.1.3.dev1', '1.1.3.dev0') == True 25 | assert utils.version_greater_or_equal('1.1.3.dev0', '1.1.3') == False 26 | 27 | 28 | ############################### 29 | ## get_df 30 | 31 | @pytest.fixture 32 | def df(): 33 | d = {'id': [1,2,3], 'val1': [4,5,6], 'val2': [7,8,9]} 34 | return pd.DataFrame(d).set_index('id') 35 | 36 | 37 | def test_get_df_dataframe(df): 38 | """ 39 | Confirm that get_df() works when passed a DataFrame. 40 | 41 | """ 42 | df_out = utils.get_df(df) 43 | pd.testing.assert_frame_equal(df, df_out) 44 | 45 | 46 | def test_get_df_str(df): 47 | """ 48 | Confirm that get_df() works with str input. 49 | 50 | """ 51 | orca.add_table('df', df) 52 | df_out = utils.get_df('df') 53 | pd.testing.assert_frame_equal(df, df_out) 54 | 55 | 56 | def test_get_df_dataframewrapper(df): 57 | """ 58 | Confirm that get_df() works with orca.DataFrameWrapper input. 59 | 60 | """ 61 | dfw = orca.DataFrameWrapper('df', df) 62 | df_out = utils.get_df(dfw) 63 | pd.testing.assert_frame_equal(df, df_out) 64 | 65 | 66 | def test_get_df_tablefuncwrapper(df): 67 | """ 68 | Confirm that get_df() works with orca.TableFuncWrapper input. 69 | 70 | """ 71 | def df_callable(): 72 | return df 73 | 74 | tfw = orca.TableFuncWrapper('df', df_callable) 75 | df_out = utils.get_df(tfw) 76 | pd.testing.assert_frame_equal(df, df_out) 77 | 78 | 79 | def test_get_df_columns(df): 80 | """ 81 | Confirm that get_df() limits columns, and filters out duplicates and invalid ones. 82 | 83 | """ 84 | dfw = orca.DataFrameWrapper('df', df) 85 | df_out = utils.get_df(dfw, ['id', 'val1', 'val1', 'val3']) 86 | pd.testing.assert_frame_equal(df[['val1']], df_out) 87 | 88 | 89 | def test_get_df_unsupported_type(df): 90 | """ 91 | Confirm that get_df() raises an error for an unsupported type. 92 | 93 | """ 94 | try: 95 | df_out = utils.get_df([df]) 96 | except ValueError as e: 97 | print(e) 98 | return 99 | 100 | pytest.fail() 101 | 102 | 103 | 104 | ############################### 105 | ## all_cols 106 | 107 | def test_all_cols_dataframe(df): 108 | """ 109 | Confirm that all_cols() works with DataFrame input. 110 | 111 | """ 112 | cols = utils.all_cols(df) 113 | assert sorted(cols) == sorted(['id', 'val1', 'val2']) 114 | 115 | 116 | def test_all_cols_orca(df): 117 | """ 118 | Confirm that all_cols() works with Orca input. 119 | 120 | """ 121 | orca.add_table('df', df) 122 | cols = utils.all_cols('df') 123 | assert sorted(cols) == sorted(['id', 'val1', 'val2']) 124 | 125 | 126 | def test_all_cols_extras(df): 127 | """ 128 | Confirm that all_cols() includes columns not part of the Orca core table. 129 | 130 | """ 131 | orca.add_table('df', df) 132 | orca.add_column('df', 'newcol', pd.Series()) 133 | cols = utils.all_cols('df') 134 | assert sorted(cols) == sorted(['id', 'val1', 'val2', 'newcol']) 135 | 136 | 137 | def test_all_cols_unsupported_type(df): 138 | """ 139 | Confirm that all_cols() raises an error for an unsupported type. 140 | 141 | """ 142 | try: 143 | cols = utils.all_cols([df]) 144 | except ValueError as e: 145 | print(e) 146 | return 147 | 148 | pytest.fail() 149 | 150 | 151 | 152 | 153 | ############################### 154 | ## get_data 155 | 156 | @pytest.fixture 157 | def orca_session(): 158 | d1 = {'id': [1, 2, 3], 159 | 'building_id': [1, 2, 3], 160 | 'tenure': [1, 1, 0], 161 | 'age': [25, 45, 65]} 162 | 163 | d2 = {'building_id': [1, 2, 3], 164 | 'zone_id': [17, 17, 17], 165 | 'pop': [2, 2, 2]} 166 | 167 | d3 = {'zone_id': [17], 168 | 'pop': [500]} 169 | 170 | households = pd.DataFrame(d1).set_index('id') 171 | orca.add_table('households', households) 172 | 173 | buildings = pd.DataFrame(d2).set_index('building_id') 174 | orca.add_table('buildings', buildings) 175 | 176 | zones = pd.DataFrame(d3).set_index('zone_id') 177 | orca.add_table('zones', zones) 178 | 179 | orca.broadcast(cast='buildings', onto='households', 180 | cast_index=True, onto_on='building_id') 181 | 182 | orca.broadcast(cast='zones', onto='buildings', 183 | cast_index=True, onto_on='zone_id') 184 | 185 | 186 | def test_get_data(orca_session): 187 | """ 188 | General test - multiple tables, binding filters, extra columns. 189 | 190 | """ 191 | df = utils.get_data(tables = ['households', 'buildings'], 192 | model_expression = 'tenure ~ pop', 193 | filters = ['age > 20', 'age < 50'], 194 | extra_columns = 'zone_id') 195 | 196 | assert(set(df.columns) == set(['tenure', 'pop', 'age', 'zone_id'])) 197 | assert(len(df) == 2) 198 | 199 | 200 | def test_get_data_single_table(orca_session): 201 | """ 202 | Single table, no other params. 203 | 204 | """ 205 | df = utils.get_data(tables = 'households') 206 | assert(len(df) == 3) 207 | 208 | 209 | def test_get_data_bad_columns(orca_session): 210 | """ 211 | Bad column name, should be ignored. 212 | 213 | """ 214 | df = utils.get_data(tables = ['households', 'buildings'], 215 | model_expression = 'tenure ~ pop + potato') 216 | 217 | assert(set(df.columns) == set(['tenure', 'pop'])) 218 | 219 | 220 | def test_update_column(orca_session): 221 | """ 222 | General test. 223 | 224 | Additional tests to add: series without index, adding column on the fly. 225 | 226 | """ 227 | table = 'buildings' 228 | column = 'pop' 229 | data = pd.Series([3,3,3], index=[1,2,3]) 230 | 231 | utils.update_column(table, column, data) 232 | assert(orca.get_table(table).to_frame()[column].tolist() == [3,3,3]) 233 | 234 | 235 | def test_update_column_incomplete_series(orca_session): 236 | """ 237 | Update certain values but not others, with non-matching index orders. 238 | 239 | """ 240 | table = 'buildings' 241 | column = 'pop' 242 | data = pd.Series([10,5], index=[3,1]) 243 | 244 | utils.update_column(table, column, data) 245 | assert(orca.get_table(table).to_frame()[column].tolist() == [5,2,10]) 246 | 247 | 248 | def test_add_column_incomplete_series(orca_session): 249 | """ 250 | Add an incomplete column to confirm that it's aligned based on the index. (The ints 251 | will be cast to floats to accommodate the missing values.) 252 | 253 | """ 254 | table = 'buildings' 255 | column = 'pop2' 256 | data = pd.Series([10,5], index=[3,1]) 257 | 258 | utils.update_column(table, column, data) 259 | stored_data = orca.get_table(table).to_frame()[column].tolist() 260 | 261 | np.testing.assert_array_equal(stored_data, [5.0, np.nan, 10.0]) 262 | -------------------------------------------------------------------------------- /tests/test_utils_broadcasts.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for the utilities for merging tables using implicit join keys instead of Orca 3 | broadcasts. 4 | 5 | """ 6 | import pandas as pd 7 | import pytest 8 | 9 | import orca 10 | 11 | from urbansim_templates.utils import validate_table, validate_all_tables, merge_tables 12 | from urbansim_templates.utils import all_cols 13 | 14 | 15 | @pytest.fixture 16 | def orca_session(): 17 | """ 18 | Set up a clean Orca session. 19 | 20 | """ 21 | orca.clear_all() 22 | 23 | 24 | ############################### 25 | ## validate_tables() 26 | 27 | def test_validation_table_not_registered(orca_session): 28 | """ 29 | Table validation should raise a ValueError if the table isn't registered. 30 | 31 | """ 32 | try: 33 | validate_table('tab') 34 | except ValueError as e: 35 | print(e) 36 | return 37 | 38 | pytest.fail() # fail is ValueError wasn't raised 39 | 40 | 41 | def test_validation_index_unnamed(orca_session): 42 | """ 43 | Table validation should raise a ValueError if index is unnamed. 44 | 45 | """ 46 | d = {'id': [1,1,3], 'value': [4,4,4]} 47 | orca.add_table('tab', pd.DataFrame(d)) # generates auto index without a name 48 | 49 | try: 50 | validate_table('tab') 51 | except ValueError as e: 52 | print(e) 53 | return 54 | 55 | pytest.fail() # fail if ValueError wasn't raised 56 | 57 | 58 | def test_validation_duplicate_colnames(orca_session): 59 | """ 60 | Table validation should raise a ValueError if columns share a name with index. 61 | 62 | """ 63 | d = {'id1': [1,1,3], 'id2': [3,3,9], 'value': [4,4,4]} 64 | df = pd.DataFrame(d).set_index(['id1', 'id2']) 65 | df['id2'] = [10,10,10] # column with same name as one of the multi-index levels 66 | orca.add_table('tab', df) 67 | 68 | try: 69 | validate_table('tab') 70 | except ValueError as e: 71 | print(e) 72 | return 73 | 74 | pytest.fail() # fail if ValueError wasn't raised 75 | 76 | 77 | def test_validation_index_unique(orca_session): 78 | """ 79 | Table validation should pass if the index is unique. 80 | 81 | These tests of the validate() method generate Orca tables directly, which is just a 82 | shortcut for testing -- the intended use is for the method to validate the table 83 | loaded by the TableStep. 84 | 85 | """ 86 | d = {'id': [1,2,3], 'value': [4,4,4]} 87 | orca.add_table('tab', pd.DataFrame(d).set_index('id')) 88 | 89 | validate_table('tab') 90 | 91 | 92 | def test_validation_index_not_unique(orca_session): 93 | """ 94 | Table validation should raise a ValueError if the index is not unique. 95 | 96 | """ 97 | d = {'id': [1,1,3], 'value': [4,4,4]} 98 | orca.add_table('tab', pd.DataFrame(d).set_index('id')) 99 | 100 | try: 101 | validate_table('tab') 102 | except ValueError as e: 103 | print(e) 104 | return 105 | 106 | pytest.fail() # fail if ValueError wasn't raised 107 | 108 | 109 | def test_validation_multiindex_unique(orca_session): 110 | """ 111 | Table validation should pass with a MultiIndex whose combinations are unique. 112 | 113 | """ 114 | d = {'id': [1,1,1], 'sub_id': [1,2,3], 'value': [4,4,4]} 115 | orca.add_table('tab', pd.DataFrame(d).set_index(['id', 'sub_id'])) 116 | 117 | validate_table('tab') 118 | 119 | 120 | def test_validation_multiindex_not_unique(orca_session): 121 | """ 122 | Table validation should raise a ValueError if the MultiIndex combinations are not 123 | unique. 124 | 125 | """ 126 | d = {'id': [1,1,1], 'sub_id': [2,2,3], 'value': [4,4,4]} 127 | orca.add_table('tab', pd.DataFrame(d).set_index(['id', 'sub_id'])) 128 | 129 | try: 130 | validate_table('tab') 131 | except ValueError as e: 132 | print(e) 133 | return 134 | 135 | pytest.fail() # fail if ValueError wasn't raised 136 | 137 | 138 | def test_validation_columns_vs_other_indexes(orca_session): 139 | """ 140 | Table validation should compare the 'households.building_id' column to 141 | 'buildings.build_id'. 142 | 143 | """ 144 | d = {'household_id': [1,2,3], 'building_id': [2,3,4]} 145 | orca.add_table('households', pd.DataFrame(d).set_index('household_id')) 146 | 147 | d = {'building_id': [1,2,3,4], 'value': [4,4,4,4]} 148 | orca.add_table('buildings', pd.DataFrame(d).set_index('building_id')) 149 | 150 | validate_table('households') 151 | 152 | 153 | def test_validation_index_vs_other_columns(orca_session): 154 | """ 155 | Table validation should compare the 'households.building_id' column to 156 | 'buildings.build_id'. 157 | 158 | """ 159 | d = {'building_id': [1,2,3,4], 'value': [4,4,4,4]} 160 | orca.add_table('buildings', pd.DataFrame(d).set_index('building_id')) 161 | 162 | d = {'household_id': [1,2,3], 'building_id': [2,3,5]} 163 | orca.add_table('households', pd.DataFrame(d).set_index('household_id')) 164 | 165 | validate_table('buildings') 166 | 167 | 168 | def test_validation_reciprocal_false(orca_session): 169 | """ 170 | This combination should not produce any column comparisons. 171 | 172 | """ 173 | d = {'building_id': [1,2,3,4], 'value': [4,4,4,4]} 174 | orca.add_table('buildings', pd.DataFrame(d).set_index('building_id')) 175 | 176 | d = {'household_id': [1,2,3], 'building_id': [2,3,5]} 177 | orca.add_table('households', pd.DataFrame(d).set_index('household_id')) 178 | 179 | print("Begin reciprocal test") 180 | validate_table('buildings', reciprocal=False) 181 | print("End reciprocal test") 182 | 183 | 184 | def test_validation_with_multiindexes(orca_session): 185 | """ 186 | Here, table validation should compare 'choice_table.[home_tract,work_tract]' to 187 | 'distances.[home_tract,work_tract]'. 188 | 189 | """ 190 | d = {'obs_id': [1,1,1,1], 'alt_id': [1,2,3,4], 191 | 'home_tract': [55,55,55,55], 'work_tract': [17,46,19,55]} 192 | orca.add_table('choice_table', pd.DataFrame(d).set_index(['obs_id','alt_id'])) 193 | 194 | d = {'home_tract': [55,55,55], 'work_tract': [17,18,19], 'dist': [1,1,1]} 195 | orca.add_table('distances', pd.DataFrame(d).set_index(['home_tract','work_tract'])) 196 | 197 | validate_table('choice_table') 198 | 199 | 200 | def test_validate_all_tables(orca_session): 201 | """ 202 | 203 | """ 204 | d = {'building_id': [1,2,3,4], 'value': [4,4,4,4]} 205 | orca.add_table('buildings', pd.DataFrame(d).set_index('building_id')) 206 | 207 | d = {'household_id': [1,2,3], 'building_id': [2,3,5]} 208 | orca.add_table('households', pd.DataFrame(d).set_index('household_id')) 209 | 210 | validate_all_tables() 211 | 212 | 213 | ############################### 214 | ## merge_tables() 215 | 216 | def test_merge_two_tables(): 217 | """ 218 | Merge two tables. 219 | 220 | """ 221 | d = {'building_id': [1,2,3,4], 'value': [4,4,4,4]} 222 | buildings = pd.DataFrame(d).set_index('building_id') 223 | 224 | d = {'household_id': [1,2,3], 'building_id': [2,3,4]} 225 | households = pd.DataFrame(d).set_index('household_id') 226 | 227 | merged = merge_tables([households, buildings]) 228 | assert sorted(all_cols(merged)) == sorted(['household_id', 'building_id', 'value']) 229 | 230 | 231 | def test_merge_three_tables(): 232 | """ 233 | Merge three tables. 234 | 235 | """ 236 | d = {'zone_id': [1], 'size': [1]} 237 | zones = pd.DataFrame(d).set_index('zone_id') 238 | 239 | d = {'building_id': [1,2,3,4], 'zone_id': [1,1,1,1], 'height': [4,4,4,4]} 240 | buildings = pd.DataFrame(d).set_index('building_id') 241 | 242 | d = {'household_id': [1,2,3], 'building_id': [2,3,4]} 243 | households = pd.DataFrame(d).set_index('household_id') 244 | 245 | merged = merge_tables([households, buildings, zones]) 246 | assert sorted(all_cols(merged)) == sorted( 247 | ['household_id', 'building_id', 'zone_id', 'height', 'size']) 248 | 249 | 250 | def test_merge_three_tables_out_of_order(): 251 | """ 252 | Merge three tables, where the second and third are each merged onto the first. 253 | 254 | """ 255 | d = {'zone_id': [1], 'size': [1]} 256 | zones = pd.DataFrame(d).set_index('zone_id') 257 | 258 | d = {'building_id': [1,2,3,4], 'height': [4,4,4,4]} 259 | buildings = pd.DataFrame(d).set_index('building_id') 260 | 261 | d = {'household_id': [1,2,3], 'building_id': [2,3,4], 'zone_id': [1,1,1]} 262 | households = pd.DataFrame(d).set_index('household_id') 263 | 264 | merged = merge_tables([households, buildings, zones]) 265 | assert sorted(all_cols(merged)) == sorted( 266 | ['household_id', 'building_id', 'zone_id', 'height', 'size']) 267 | 268 | 269 | def test_merge_tables_limit_columns(): 270 | """ 271 | Merge tables and remove some of the columns. 272 | 273 | """ 274 | d = {'zone_id': [1], 'size': [1]} 275 | zones = pd.DataFrame(d).set_index('zone_id') 276 | 277 | d = {'building_id': [1,2,3,4], 'zone_id': [1,1,1,1], 'height': [4,4,4,4]} 278 | buildings = pd.DataFrame(d).set_index('building_id') 279 | 280 | d = {'household_id': [1,2,3], 'building_id': [2,3,4]} 281 | households = pd.DataFrame(d).set_index('household_id') 282 | 283 | merged = merge_tables([households, buildings, zones], 284 | columns=['zone_id', 'height', 'size']) 285 | assert sorted(all_cols(merged)) == sorted( 286 | ['household_id', 'zone_id', 'height', 'size']) 287 | 288 | 289 | def test_merge_tables_duplicate_column_names(): 290 | """ 291 | Confirm tables can be merged with overlapping column names, as long as they're not 292 | included in the list of columns to retain. 293 | 294 | """ 295 | d = {'building_id': [1,2,3,4], 'value': [4,4,4,4], 'dupe': [1,1,1,1]} 296 | buildings = pd.DataFrame(d).set_index('building_id') 297 | 298 | d = {'household_id': [1,2,3], 'building_id': [2,3,4], 'dupe': [1,1,1]} 299 | households = pd.DataFrame(d).set_index('household_id') 300 | 301 | # Duplicate columns should raise a ValueError 302 | try: 303 | merged = merge_tables([households, buildings]) 304 | pytest.fail() 305 | except ValueError as e: 306 | print(e) 307 | 308 | # Excluding the duplicated name should make things ok 309 | merged = merge_tables([households, buildings], columns=['value']) 310 | assert sorted(all_cols(merged)) == sorted(['household_id', 'value']) 311 | 312 | 313 | def test_merge_tables_multiindex(): 314 | """ 315 | Merge tables where the source table has a multi-index. 316 | 317 | """ 318 | d = {'building_id': [1,1,2,2], 'unit_id': [1,2,1,2], 'value': [4,4,4,4]} 319 | units = pd.DataFrame(d).set_index(['building_id', 'unit_id']) 320 | 321 | d = {'household_id': [1,2,3], 'building_id': [1,1,2], 'unit_id': [1,2,1]} 322 | households = pd.DataFrame(d).set_index('household_id') 323 | 324 | merged = merge_tables([households, units]) 325 | assert sorted(all_cols(merged)) == sorted( 326 | ['household_id', 'building_id', 'unit_id', 'value']) 327 | 328 | 329 | def test_merge_tables_missing_values(): 330 | """ 331 | If the target table includes identifiers not found in the source table, missing 332 | values should be inserted, changing the data type. 333 | 334 | """ 335 | d = {'building_id': [1,1,2,2], 'unit_id': [1,2,1,2], 'value': [4,4,4,4]} 336 | units = pd.DataFrame(d).set_index(['building_id', 'unit_id']) 337 | 338 | d = {'household_id': [1,2,3], 'building_id': [1,1,3], 'unit_id': [1,2,1]} 339 | households = pd.DataFrame(d).set_index('household_id') 340 | 341 | merged = merge_tables([households, units]) 342 | assert units.value.dtype == 'int64' 343 | assert merged.values.dtype == 'float64' 344 | 345 | 346 | -------------------------------------------------------------------------------- /urbansim_templates/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/* -------------------------------------------------------------------------------- /urbansim_templates/__init__.py: -------------------------------------------------------------------------------- 1 | version = __version__ = '0.2.dev9' 2 | -------------------------------------------------------------------------------- /urbansim_templates/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .column_from_expression import ColumnFromExpression, ExpressionSettings 2 | from .load_table import LoadTable 3 | from .save_table import SaveTable 4 | -------------------------------------------------------------------------------- /urbansim_templates/data/column_from_expression.py: -------------------------------------------------------------------------------- 1 | import orca 2 | import pandas as pd 3 | 4 | from urbansim_templates import modelmanager, shared, utils, __version__ 5 | from urbansim_templates.shared import CoreTemplateSettings, OutputColumnSettings 6 | 7 | 8 | class ExpressionSettings(): 9 | """ 10 | Stores custom parameters used by the 11 | :mod:`~urbansim_templates.data.ColumnFromExpression` template. Parameters can be 12 | passed to the constructor or set as attributes. 13 | 14 | Parameters 15 | ---------- 16 | table : str, optional 17 | Name of Orca table the expression will be evaluated on. Required before running 18 | then template. 19 | 20 | expression : str, optional 21 | String describing operations on existing columns of the table, for example 22 | "a/log(b+c)". Required before running. Supports arithmetic and math functions 23 | including sqrt, abs, log, log1p, exp, and expm1 -- see Pandas ``df.eval()`` 24 | documentation for further details. 25 | 26 | """ 27 | def __init__(self, table = None, expression = None): 28 | self.table = table 29 | self.expression = expression 30 | 31 | @classmethod 32 | def from_dict(cls, d): 33 | return cls(table=d['table'], expression=d['expression']) 34 | 35 | def to_dict(self): 36 | return {'table': self.table, 'expression': self.expression} 37 | 38 | 39 | @modelmanager.template 40 | class ColumnFromExpression(): 41 | """ 42 | Template to register a column of derived data with Orca, based on an expression. 43 | Parameters may be passed to the constructor, but they are easier to set as 44 | attributes. The expression can refer to any columns in the same table, and will be 45 | evaluated using ``df.eval()``. Values will be calculated lazily, only when the column 46 | is needed for a specific operation. 47 | 48 | Parameters 49 | ---------- 50 | meta : :mod:`~urbansim_templates.shared.CoreTemplateSettings`, optional 51 | Standard parameters. This template sets the default value of ``meta.autorun`` 52 | to True. 53 | 54 | data : :mod:`~urbansim_templates.data.ExpressionSettings`, optional 55 | Special parameters for this template. 56 | 57 | output : :mod:`~urbansim_templates.shared.OutputColumnSettings`, optional 58 | Parameters for the column that will be generated. This template uses 59 | ``data.table`` as the default value for ``output.table``. 60 | 61 | """ 62 | def __init__(self, meta=None, data=None, output=None): 63 | 64 | self.meta = CoreTemplateSettings(autorun=True) if meta is None else meta 65 | self.meta.template = self.__class__.__name__ 66 | self.meta.template_version = __version__ 67 | 68 | self.data = ExpressionSettings() if data is None else data 69 | self.output = OutputColumnSettings() if output is None else output 70 | 71 | 72 | @classmethod 73 | def from_dict(cls, d): 74 | """ 75 | Create a class instance from a saved dictionary. 76 | 77 | """ 78 | if 'meta' not in d: 79 | return cls.from_dict_0_2_dev5(d) 80 | 81 | return cls( 82 | meta = CoreTemplateSettings.from_dict(d['meta']), 83 | data = ExpressionSettings.from_dict(d['data']), 84 | output = OutputColumnSettings.from_dict(d['output'])) 85 | 86 | 87 | @classmethod 88 | def from_dict_0_2_dev5(cls, d): 89 | """ 90 | Converter to read saved data from 0.2.dev5 or earlier. Automatically invoked by 91 | ``from_dict()`` as needed. 92 | 93 | """ 94 | return cls( 95 | meta = CoreTemplateSettings( 96 | name = d['name'], 97 | tags = d['tags'], 98 | autorun = d['autorun']), 99 | data = ExpressionSettings( 100 | table = d['table'], 101 | expression = d['expression']), 102 | output = OutputColumnSettings( 103 | column_name = d['column_name'], 104 | data_type = d['data_type'], 105 | missing_values = d['missing_values'], 106 | cache = d['cache'], 107 | cache_scope = d['cache_scope'])) 108 | 109 | 110 | def to_dict(self): 111 | """ 112 | Create a dictionary representation of the object. 113 | 114 | """ 115 | return { 116 | 'meta': self.meta.to_dict(), 117 | 'data': self.data.to_dict(), 118 | 'output': self.output.to_dict()} 119 | 120 | 121 | def run(self): 122 | """ 123 | Run the template, registering a column of derived data with Orca. Requires values 124 | to be set for ``data.table``, ``data.expression``, and ``output.column_name``. 125 | 126 | """ 127 | if self.data.table is None: 128 | raise ValueError("Please provide a table") 129 | 130 | if self.data.expression is None: 131 | raise ValueError("Please provide an expression") 132 | 133 | if self.output.column_name is None: 134 | raise ValueError("Please provide a column name") 135 | 136 | settings = self.output 137 | 138 | if settings.table is None: 139 | settings.table = self.data.table 140 | 141 | cols = utils.cols_in_expression(self.data.expression) 142 | 143 | def build_column(): 144 | df = utils.get_df(self.data.table, columns=cols) 145 | series = df.eval(self.data.expression) 146 | return series 147 | 148 | shared.register_column(build_column, settings) 149 | 150 | -------------------------------------------------------------------------------- /urbansim_templates/data/load_table.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | try: 4 | import pathlib # Python 3.4+ 5 | except: 6 | pass 7 | 8 | import os 9 | 10 | import orca 11 | import pandas as pd 12 | 13 | from urbansim_templates import modelmanager, __version__ 14 | 15 | 16 | @modelmanager.template 17 | class LoadTable(): 18 | """ 19 | Template for registering data tables from local CSV or HDF files. Parameters can be 20 | passed to the constructor or set as attributes. 21 | 22 | An instance of this template class stores *instructions for loading a data table*, 23 | packaged into an Orca step. Running the instructions registers the table with Orca. 24 | 25 | Parameters 26 | ---------- 27 | table : str, optional 28 | Name of the Orca table to be created. Must be provided before running the step. 29 | 30 | source_type : 'csv' or 'hdf', optional 31 | Source type. Must be provided before running the step. 32 | 33 | path : str, optional 34 | Local file path to load data from, either absolute or relative to the 35 | ModelManager config directory. Please provide a Unix-style path (this will work 36 | on any platform, but a Windows-style path won't, and they're hard to normalize 37 | automatically). 38 | 39 | url : str, optional - NOT YET IMPLEMENTED 40 | Remote url to download file from. 41 | 42 | csv_index_cols : str or list of str, optional 43 | Required for tables loaded from csv. 44 | 45 | extra_settings : dict, optional 46 | Additional arguments to pass to ``pd.read_csv()`` or ``pd.read_hdf()``. For 47 | example, you could automatically extract csv data from a gzip file using 48 | {'compression': 'gzip'}, or specify the table identifier within a multi-object 49 | hdf store using {'key': 'table-name'}. See Pandas documentation for additional 50 | settings. 51 | 52 | orca_test_spec : dict, optional - NOT YET IMPLEMENTED 53 | Data characteristics to be tested when the table is validated. 54 | 55 | cache : bool, default True 56 | Passed to ``orca.table()``. Note that the default is True, unlike in the 57 | underlying general-purpose Orca function, because tables read from disk should 58 | not need to be regenerated during the course of a model run. 59 | 60 | cache_scope : 'step', 'iteration', or 'forever', default 'forever' 61 | Passed to ``orca.table()``. Default is 'forever', as in Orca. 62 | 63 | copy_col : bool, default True 64 | Passed to ``orca.table()``. Default is True, as in Orca. 65 | 66 | name : str, optional 67 | Name of the model step. 68 | 69 | tags : list of str, optional 70 | Tags, passed to ModelManager. 71 | 72 | autorun : bool, default True 73 | Automatically run the step whenever it's registered with ModelManager. 74 | 75 | """ 76 | def __init__(self, 77 | table = None, 78 | source_type = None, 79 | path = None, 80 | csv_index_cols = None, 81 | extra_settings = {}, 82 | cache = True, 83 | cache_scope = 'forever', 84 | copy_col = True, 85 | name = None, 86 | tags = [], 87 | autorun = True): 88 | 89 | # Template-specific params 90 | self.table = table 91 | self.source_type = source_type 92 | self.path = path 93 | self.csv_index_cols = csv_index_cols 94 | self.extra_settings = extra_settings 95 | self.cache = cache 96 | self.cache_scope = cache_scope 97 | self.copy_col = copy_col 98 | 99 | # Standard params 100 | self.name = name 101 | self.tags = tags 102 | self.autorun = autorun 103 | 104 | # Automatic params 105 | self.template = self.__class__.__name__ 106 | self.template_version = __version__ 107 | 108 | 109 | @classmethod 110 | def from_dict(cls, d): 111 | """ 112 | Create an object instance from a saved dictionary representation. 113 | 114 | Parameters 115 | ---------- 116 | d : dict 117 | 118 | Returns 119 | ------- 120 | Table 121 | 122 | """ 123 | obj = cls( 124 | table = d['table'], 125 | source_type = d['source_type'], 126 | path = d['path'], 127 | csv_index_cols = d['csv_index_cols'], 128 | extra_settings = d['extra_settings'], 129 | cache = d['cache'], 130 | cache_scope = d['cache_scope'], 131 | copy_col = d['copy_col'], 132 | name = d['name'], 133 | tags = d['tags'], 134 | autorun = d['autorun'] 135 | ) 136 | return obj 137 | 138 | 139 | def to_dict(self): 140 | """ 141 | Create a dictionary representation of the object. 142 | 143 | Returns 144 | ------- 145 | dict 146 | 147 | """ 148 | d = { 149 | 'template': self.template, 150 | 'template_version': self.template_version, 151 | 'name': self.name, 152 | 'tags': self.tags, 153 | 'autorun': self.autorun, 154 | 'table': self.table, 155 | 'source_type': self.source_type, 156 | 'path': self.path, 157 | 'csv_index_cols': self.csv_index_cols, 158 | 'extra_settings': self.extra_settings, 159 | 'cache': self.cache, 160 | 'cache_scope': self.cache_scope, 161 | 'copy_col': self.copy_col 162 | } 163 | return d 164 | 165 | 166 | def run(self): 167 | """ 168 | Register a data table with Orca. 169 | 170 | Requires values to be set for ``table``, ``source_type``, and ``path``. CSV data 171 | also requires ``csv_index_cols``. 172 | 173 | Returns 174 | ------- 175 | None 176 | 177 | """ 178 | if self.table is None: 179 | raise ValueError("Please provide a table name") 180 | 181 | if self.source_type not in ['csv', 'hdf']: 182 | raise ValueError("Please provide a source type of 'csv' or 'hdf'") 183 | 184 | if self.path is None: 185 | raise ValueError("Please provide a file path") 186 | 187 | kwargs = self.extra_settings 188 | 189 | # Table from CSV file 190 | if self.source_type == 'csv': 191 | if self.csv_index_cols is None: 192 | raise ValueError("Please provide index column name(s) for the csv") 193 | 194 | @orca.table(table_name = self.table, 195 | cache = self.cache, 196 | cache_scope = self.cache_scope, 197 | copy_col = self.copy_col) 198 | def orca_table(): 199 | df = pd.read_csv(self.path, **kwargs).set_index(self.csv_index_cols) 200 | return df 201 | 202 | # Table from HDF file 203 | elif self.source_type == 'hdf': 204 | @orca.table(table_name = self.table, 205 | cache = self.cache, 206 | cache_scope = self.cache_scope, 207 | copy_col = self.copy_col) 208 | def orca_table(): 209 | df = pd.read_hdf(self.path, **kwargs) 210 | return df 211 | 212 | 213 | -------------------------------------------------------------------------------- /urbansim_templates/data/save_table.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import datetime 4 | 5 | import orca 6 | import pandas as pd 7 | 8 | from urbansim_templates import modelmanager, __version__ 9 | from urbansim_templates.utils import get_data 10 | 11 | 12 | @modelmanager.template 13 | class SaveTable(): 14 | """ 15 | Template for saving Orca tables to local CSV or HDF5 files. Parameters can be passed 16 | to the constructor or set as attributes. 17 | 18 | Parameters 19 | ---------- 20 | table : str, optional 21 | Name of the Orca table. Must be provided before running the step. 22 | 23 | columns : str or list of str, optional 24 | Names of columns to include. ``None`` will return all columns. Indexes will 25 | always be included. 26 | 27 | filters : str or list of str, optional 28 | Filters to apply to the data before saving. Will be passed to 29 | ``pd.DataFrame.query()``. 30 | 31 | output_type : 'csv' or 'hdf', optional 32 | Type of file to be created. Must be provided before running the step. 33 | 34 | path : str, optional 35 | Local file path to save the data to, either absolute or relative to the 36 | ModelManager config directory. Please provide a Unix-style path (this will work 37 | on any platform, but a Windows-style path won't, and they're hard to normalize 38 | automatically). For dynamic file names, you can include the characters "%RUN%", 39 | "%ITER%", or "%TS%". These will be replaced by the run id, the model iteration 40 | value, or a timestamp when the output file is created. 41 | 42 | extra_settings : dict, optional 43 | Additional arguments to pass to ``pd.to_csv()`` or ``pd.to_hdf()``. For example, 44 | you could automatically compress csv data using {'compression': 'gzip'}, or 45 | specify a custom table name for an hdf store using {'key': 'table-name'}. See 46 | Pandas documentation for additional settings. 47 | 48 | name : str, optional 49 | Name of the model step. 50 | 51 | tags : list of str, optional 52 | Tags, passed to ModelManager. 53 | 54 | """ 55 | def __init__(self, 56 | table = None, 57 | columns = None, 58 | filters = None, 59 | output_type = None, 60 | path = None, 61 | extra_settings = None, 62 | name = None, 63 | tags = []): 64 | 65 | # Template-specific params 66 | self.table = table 67 | self.columns = columns 68 | self.filters = filters 69 | self.output_type = output_type 70 | self.path = path 71 | self.extra_settings = extra_settings 72 | 73 | # Standard params 74 | self.name = name 75 | self.tags = tags 76 | 77 | # Automatic params 78 | self.template = self.__class__.__name__ 79 | self.template_version = __version__ 80 | 81 | 82 | @classmethod 83 | def from_dict(cls, d): 84 | """ 85 | Create an object instance from a saved dictionary representation. 86 | 87 | Parameters 88 | ---------- 89 | d : dict 90 | 91 | Returns 92 | ------- 93 | Table 94 | 95 | """ 96 | obj = cls( 97 | table = d['table'], 98 | columns = d['columns'], 99 | filters = d['filters'], 100 | output_type = d['output_type'], 101 | path = d['path'], 102 | extra_settings = d['extra_settings'], 103 | name = d['name'], 104 | tags = d['tags'], 105 | ) 106 | return obj 107 | 108 | 109 | def to_dict(self): 110 | """ 111 | Create a dictionary representation of the object. 112 | 113 | Returns 114 | ------- 115 | dict 116 | 117 | """ 118 | d = { 119 | 'template': self.template, 120 | 'template_version': self.template_version, 121 | 'name': self.name, 122 | 'tags': self.tags, 123 | 'table': self.table, 124 | 'columns': self.columns, 125 | 'filters': self.filters, 126 | 'output_type': self.output_type, 127 | 'path': self.path, 128 | 'extra_settings': self.extra_settings, 129 | } 130 | return d 131 | 132 | 133 | def get_dynamic_filepath(self): 134 | """ 135 | Substitute run id, model iteration, and/or timestamp into the filename. 136 | 137 | For the run id and model iteration, we look for Orca injectables named ``run_id`` 138 | and ``iter_var``, respectively. If none is found, we use ``0``. 139 | 140 | The timestamp is UTC, formatted as ``YYYYMMDD-HHMMSS``. 141 | 142 | Returns 143 | ------- 144 | str 145 | 146 | """ 147 | if self.path is None: 148 | raise ValueError("Please provide a file path") 149 | 150 | run = 0 151 | if orca.is_injectable('run_id'): 152 | run = orca.get_injectable('run_id') 153 | 154 | iter = 0 155 | if orca.is_injectable('iter_var'): 156 | iter = orca.get_injectable('iter_var') 157 | 158 | ts = datetime.datetime.utcnow().strftime('%Y%m%d-%H%M%S') 159 | 160 | s = self.path 161 | s = s.replace('%RUN%', str(run)) 162 | s = s.replace('%ITER%', str(iter)) 163 | s = s.replace('%TS%', ts) 164 | 165 | return s 166 | 167 | 168 | def run(self): 169 | """ 170 | Save a table to disk. 171 | 172 | Saving a table to an HDF store requires providing a ``key`` that will be used to 173 | identify the table in the store. We'll use the Orca table name, unless you 174 | provide a different ``key`` in the ``extra_settings``. 175 | 176 | Returns 177 | ------- 178 | None 179 | 180 | """ 181 | if self.output_type not in ['csv', 'hdf']: 182 | raise ValueError("Please provide an output type of 'csv' or 'hdf'") 183 | 184 | if self.table is None: 185 | raise ValueError("Please provide the table name") 186 | 187 | if self.path is None: 188 | raise ValueError("Please provide a file path") 189 | 190 | kwargs = self.extra_settings 191 | if kwargs is None: 192 | kwargs = dict() 193 | 194 | df = get_data(tables = self.table, 195 | filters = self.filters, 196 | extra_columns = self.columns) 197 | 198 | if self.output_type == 'csv': 199 | df.to_csv(self.get_dynamic_filepath(), **kwargs) 200 | 201 | elif self.output_type == 'hdf': 202 | if 'key' not in kwargs: 203 | kwargs['key'] = self.table 204 | 205 | df.to_hdf(self.get_dynamic_filepath(), **kwargs) 206 | 207 | -------------------------------------------------------------------------------- /urbansim_templates/modelmanager.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import os 4 | import copy 5 | import pickle 6 | from collections import OrderedDict 7 | 8 | import orca 9 | from urbansim.utils import yamlio 10 | 11 | from .__init__ import __version__ 12 | from .utils import update_name, version_greater_or_equal 13 | 14 | 15 | _templates = {} # global registry of template classes 16 | _steps = {} # global registry of model steps in memory 17 | _disk_store = None # path to saved steps on disk 18 | 19 | 20 | def template(cls): 21 | """ 22 | This is a decorator for ModelManager-compliant template classes. Place 23 | `@modelmanager.template` on the line before a class defintion. 24 | 25 | This makes the class available to ModelManager (e.g. for reading saved steps from 26 | disk) whenever it's imported. 27 | 28 | """ 29 | _templates[cls.__name__] = cls 30 | return cls 31 | 32 | 33 | def initialize(path='configs'): 34 | """ 35 | Load saved model steps from disk. Each file in the directory will be checked for 36 | compliance with the ModelManager YAML format and then loaded into memory. 37 | 38 | If run multiple times, steps will be cleared from memory and re-loaded. 39 | 40 | Parameters 41 | ---------- 42 | path : str 43 | Path to config directory, either absolute or relative to the Python working 44 | directory 45 | 46 | """ 47 | if not os.path.exists(path): 48 | print("Path not found: {}".format(os.path.join(os.getcwd(), path))) 49 | # TO DO - automatically create directory if run again after warning? 50 | return 51 | 52 | global _steps, _disk_store 53 | _steps = {} # clear memory 54 | _disk_store = path # save initialization path 55 | 56 | files = [] 57 | for f in os.listdir(path): 58 | if f[-5:] == '.yaml': 59 | files.append(os.path.join(path, f)) 60 | 61 | if len(files) == 0: 62 | print("No yaml files found in path '{}'".format(path)) 63 | return 64 | 65 | steps = [] 66 | for f in files: 67 | d = yamlio.yaml_to_dict(str_or_buffer=f) 68 | if 'modelmanager_version' in d: 69 | # TO DO - check that file name matches object name in the file? 70 | if version_greater_or_equal(d['modelmanager_version'], '0.1.dev8'): 71 | # This is the version that switched from a single file to multiple files 72 | # with one object stored in each 73 | steps.append(d) 74 | 75 | if len(steps) == 0: 76 | print("No files from ModelManager 0.1.dev8 or later found in path '{}'"\ 77 | .format(path)) 78 | 79 | for d in steps: 80 | # TO DO - check for this key, to be safe 81 | step = build_step(d['saved_object']) 82 | register(step, save_to_disk=False) 83 | 84 | 85 | def build_step(d): 86 | """ 87 | Build a model step object from a saved dictionary. This includes loading supplemental 88 | objects from disk. 89 | 90 | Parameters 91 | ---------- 92 | d : dict 93 | Representation of a model step. 94 | 95 | Returns 96 | ------- 97 | object 98 | 99 | """ 100 | template = d['meta']['template'] if 'meta' in d else d['template'] 101 | 102 | if 'supplemental_objects' in d: 103 | for i, item in enumerate(d['supplemental_objects']): 104 | content = load_supplemental_object(d['name'], **item) 105 | d['supplemental_objects'][i]['content'] = content 106 | 107 | return _templates[template].from_dict(d) 108 | 109 | 110 | def load_supplemental_object(step_name, name, content_type, required=True): 111 | """ 112 | Load a supplemental object from disk. 113 | 114 | Parameters 115 | ---------- 116 | step_name : str 117 | Name of the associated model step. 118 | name : str 119 | Name of the supplemental object. 120 | content_type : str 121 | Currently supports 'pickle'. 122 | required : bool, optional 123 | Whether the supplemental object is required (not yet supported). 124 | 125 | Returns 126 | ------- 127 | object 128 | 129 | """ 130 | if (content_type == 'pickle'): 131 | with open(os.path.join(_disk_store, step_name+'-'+name+'.pkl'), 'rb') as f: 132 | return pickle.load(f) 133 | 134 | 135 | def register(step, save_to_disk=True): 136 | """ 137 | Register a model step with ModelManager and Orca. This includes saving it to disk, 138 | optionally, so it can be automatically loaded in the future. 139 | 140 | Registering a step will overwrite any previously loaded step with the same name. If a 141 | name has not yet been assigned, one will be generated from the template name and a 142 | timestamp. 143 | 144 | If the model step includes an attribute 'autorun' that's set to True, the step will 145 | run after being registered. 146 | 147 | Parameters 148 | ---------- 149 | step : object 150 | 151 | Returns 152 | ------- 153 | None 154 | 155 | """ 156 | # Currently supporting both step.name and step.meta.name 157 | if hasattr(step, 'meta'): 158 | # TO DO: move the name updating to CoreTemplateSettings? 159 | step.meta.name = update_name(step.meta.template, step.meta.name) 160 | name = step.meta.name 161 | 162 | else: 163 | step.name = update_name(step.template, step.name) 164 | name = step.name 165 | 166 | if save_to_disk: 167 | save_step_to_disk(step) 168 | 169 | print("Registering model step '{}'".format(name)) 170 | 171 | _steps[name] = step 172 | 173 | # Create a callable that runs the model step, and register it with orca 174 | def run_step(): 175 | return step.run() 176 | 177 | orca.add_step(name, run_step) 178 | 179 | if hasattr(step, 'meta'): 180 | if step.meta.autorun: 181 | orca.run([name]) 182 | 183 | elif hasattr(step, 'autorun'): 184 | if step.autorun: 185 | orca.run([name]) 186 | 187 | 188 | def list_steps(): 189 | """ 190 | Return a list of registered steps, with name, template, and tags for each. 191 | 192 | Returns 193 | ------- 194 | list of dicts, ordered by name 195 | 196 | """ 197 | steps = [] 198 | for k in sorted(_steps.keys()): 199 | if hasattr(_steps[k], 'meta'): 200 | steps += [{'name': _steps[k].meta.name, 201 | 'template': _steps[k].meta.template, 202 | 'tags': _steps[k].meta.tags, 203 | 'notes': _steps[k].meta.notes}] 204 | else: 205 | steps += [{'name': _steps[k].name, 206 | 'template': _steps[k].template, 207 | 'tags': _steps[k].tags}] 208 | return steps 209 | 210 | 211 | def save_step_to_disk(step): 212 | """ 213 | Save a model step to disk, over-writing the previous file. The file will be named 214 | 'model-name.yaml' and will be saved to the initialization directory. 215 | 216 | """ 217 | name = step.meta.name if hasattr(step, 'meta') else step.name 218 | 219 | if _disk_store is None: 220 | print("Please run 'modelmanager.initialize()' before registering new model steps") 221 | return 222 | 223 | print("Saving '{}.yaml': {}".format(name, 224 | os.path.join(os.getcwd(), _disk_store))) 225 | 226 | d = step.to_dict() 227 | 228 | # Save supplemental objects 229 | if 'supplemental_objects' in d: 230 | for item in filter(None, d['supplemental_objects']): 231 | save_supplemental_object(name, **item) 232 | del item['content'] 233 | 234 | # Save main yaml file 235 | headers = {'modelmanager_version': __version__} 236 | 237 | content = OrderedDict(headers) 238 | content.update({'saved_object': d}) 239 | 240 | yamlio.convert_to_yaml(content, os.path.join(_disk_store, name+'.yaml')) 241 | 242 | 243 | def save_supplemental_object(step_name, name, content, content_type, required=True): 244 | """ 245 | Save a supplemental object to disk. 246 | 247 | Parameters 248 | ---------- 249 | step_name : str 250 | Name of the associated model step. 251 | name : str 252 | Name of the supplemental object. 253 | content : obj 254 | Object to save. 255 | content_type : str 256 | Currently supports 'pickle'. 257 | required : bool, optional 258 | Whether the supplemental object is required (not yet supported). 259 | 260 | """ 261 | if content_type == 'pickle': 262 | content.to_pickle(os.path.join(_disk_store, step_name+'-'+name+'.pkl')) 263 | 264 | 265 | def get_step(name): 266 | """ 267 | Return the class representation of a registered step, by name. 268 | 269 | Parameters 270 | ---------- 271 | name : str 272 | 273 | Returns 274 | ------- 275 | instance of a template class 276 | 277 | """ 278 | return copy.deepcopy(_steps[name]) 279 | 280 | 281 | def remove_step(name): 282 | """ 283 | Remove a model step, by name. It will immediately be removed from ModelManager and 284 | from disk, but will remain registered in Orca until the current Python process 285 | terminates. 286 | 287 | Parameters 288 | ---------- 289 | name : str 290 | 291 | """ 292 | print("Removing '{}' and '{}.yaml'".format(name, name)) 293 | 294 | d = _steps[name].to_dict() 295 | 296 | if 'supplemental_objects' in d: 297 | for item in filter(None, d['supplemental_objects']): 298 | remove_supplemental_object(name, item['name'], item['content_type']) 299 | 300 | del _steps[name] 301 | os.remove(os.path.join(_disk_store, name+'.yaml')) 302 | 303 | 304 | def remove_supplemental_object(step_name, name, content_type): 305 | """ 306 | Remove a supplemental object from disk. 307 | 308 | Parameters 309 | ---------- 310 | step_name : str 311 | Name of the associated model step. 312 | name : str 313 | Name of the supplemental object. 314 | content_type : str 315 | Currently supports 'pickle'. 316 | 317 | """ 318 | # TO DO - check that the file exists first 319 | 320 | if content_type == 'pickle': 321 | os.remove(os.path.join(_disk_store, step_name+'-'+name+'.pkl')) 322 | 323 | 324 | def get_config_dir(): 325 | """ 326 | Return the config directory, for other services that need to interoperate. 327 | 328 | Returns 329 | ------- 330 | str 331 | 332 | """ 333 | return _disk_store 334 | 335 | -------------------------------------------------------------------------------- /urbansim_templates/models/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/* -------------------------------------------------------------------------------- /urbansim_templates/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .binary_logit import BinaryLogitStep 2 | from .large_multinomial_logit import LargeMultinomialLogitStep 3 | from .regression import OLSRegressionStep 4 | from .segmented_large_multinomial_logit import SegmentedLargeMultinomialLogitStep 5 | from .shared import TemplateStep 6 | from .small_multinomial_logit import SmallMultinomialLogitStep -------------------------------------------------------------------------------- /urbansim_templates/models/binary_logit.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import numpy as np 4 | import pandas as pd 5 | import patsy 6 | from datetime import datetime as dt 7 | from statsmodels.api import Logit 8 | 9 | import orca 10 | 11 | from .. import modelmanager 12 | from ..utils import get_data 13 | from .shared import TemplateStep 14 | 15 | 16 | @modelmanager.template 17 | class BinaryLogitStep(TemplateStep): 18 | """ 19 | A class for building binary logit model steps. This extends TemplateStep, where some 20 | common functionality is defined. Estimation is handled by Statsmodels and simulation 21 | is handled within this class. 22 | 23 | Expected usage: 24 | - create a model object 25 | - specify some parameters 26 | - run the `fit()` method 27 | - iterate as needed 28 | 29 | Then, for simulation: 30 | - specify some simulation parameters 31 | - use the `run()` method for interactive testing 32 | - use `modelmanager.register()` to save the model to Orca and disk 33 | - registered steps can be accessed via ModelManager and Orca 34 | 35 | All parameters listed in the constructor can be set directly on the class object, 36 | at any time. 37 | 38 | Parameters 39 | ---------- 40 | tables : str or list of str, optional 41 | Name(s) of Orca tables to draw data from. The first table is the primary one. 42 | Any additional tables need to have merge relationships ("broadcasts") specified 43 | so that they can be merged unambiguously onto the first table. Among them, the 44 | tables must contain all variables used in the model expression and filters. The 45 | left-hand-side variable should be in the primary table. The `tables` parameter is 46 | required for fitting a model, but it does not have to be provided when the object 47 | is created. 48 | 49 | model_expression : str, optional 50 | Patsy formula containing both the left- and right-hand sides of the model 51 | expression: http://patsy.readthedocs.io/en/latest/formulas.html 52 | This parameter is required for fitting a model, but it does not have to be 53 | provided when the object is created. 54 | 55 | filters : str or list of str, optional 56 | Filters to apply to the data before fitting the model. These are passed to 57 | `pd.DataFrame.query()`. Filters are applied after any additional tables are merged 58 | onto the primary one. Replaces the `fit_filters` argument in UrbanSim. 59 | 60 | out_tables : str or list of str, optional 61 | Name(s) of Orca tables to use for simulation. If not provided, the `tables` 62 | parameter will be used. Same guidance applies: the tables must be able to be 63 | merged unambiguously, and must include all columns used in the right-hand-side 64 | of the model expression and in the `out_filters`. 65 | 66 | out_column : str, optional 67 | Name of the column to write simulated choices to. If it does not already exist 68 | in the primary output table, it will be created. If not provided, the left-hand- 69 | side variable from the model expression will be used. Replaces the `out_fname` 70 | argument in UrbanSim. 71 | 72 | # TO DO - auto-generation not yet working; column must exist in the primary table 73 | 74 | out_filters : str or list of str, optional 75 | Filters to apply to the data before simulation. If not provided, no filters will 76 | be applied. Replaces the `predict_filters` argument in UrbanSim. 77 | 78 | out_value_true : numeric or str, optional 79 | Value to save to the output column corresponding to an affirmative choice. 80 | Default is 1 (int). Use keyword 'nothing' to leave values unchanged. 81 | 82 | out_value_false : numeric or str, optional 83 | Value to save to the output column corresponding to a negative choice. Default 84 | is 0 (int). Use keyword 'nothing' to leave values unchanged. 85 | 86 | name : str, optional 87 | Name of the model step, passed to ModelManager. If none is provided, a name is 88 | generated each time the `fit()` method runs. 89 | 90 | tags : list of str, optional 91 | Tags, passed to ModelManager. 92 | 93 | """ 94 | def __init__(self, tables=None, model_expression=None, filters=None, out_tables=None, 95 | out_column=None, out_filters=None, out_value_true=1, out_value_false=0, 96 | name=None, tags=[]): 97 | 98 | # Parent class can initialize the standard parameters 99 | TemplateStep.__init__(self, tables=tables, model_expression=model_expression, 100 | filters=filters, out_tables=out_tables, out_column=out_column, 101 | out_transform=None, out_filters=out_filters, name=name, tags=tags) 102 | 103 | # Custom parameters not in parent class 104 | self.out_value_true = out_value_true 105 | self.out_value_false = out_value_false 106 | 107 | # Placeholders for model fit data, filled in by fit() or from_dict() 108 | self.summary_table = None 109 | self.fitted_parameters = None 110 | 111 | 112 | @classmethod 113 | def from_dict(cls, d): 114 | """ 115 | Create an object instance from a saved dictionary representation. 116 | 117 | Parameters 118 | ---------- 119 | d : dict 120 | 121 | Returns 122 | ------- 123 | BinaryLogitStep 124 | 125 | """ 126 | # Pass values from the dictionary to the __init__() method 127 | obj = cls(tables=d['tables'], model_expression=d['model_expression'], 128 | filters=d['filters'], out_tables=d['out_tables'], 129 | out_column=d['out_column'], out_filters=d['out_filters'], 130 | out_value_true=d['out_value_true'], out_value_false=d['out_value_false'], 131 | name=d['name'], tags=d['tags']) 132 | 133 | obj.summary_table = d['summary_table'] 134 | obj.fitted_parameters = d['fitted_parameters'] 135 | 136 | return obj 137 | 138 | 139 | def to_dict(self): 140 | """ 141 | Create a dictionary representation of the object. 142 | 143 | Returns 144 | ------- 145 | dict 146 | 147 | """ 148 | d = TemplateStep.to_dict(self) 149 | 150 | # Add parameters not in parent class 151 | d.update({ 152 | 'out_value_true': self.out_value_true, 153 | 'out_value_false': self.out_value_false, 154 | 'summary_table': self.summary_table, 155 | 'fitted_parameters': self.fitted_parameters 156 | }) 157 | return d 158 | 159 | 160 | def fit(self): 161 | """ 162 | Fit the model; save and report results. This currently uses the Statsmodels 163 | Logit class with default estimation settings. (It will shift to ChoiceModels 164 | once more infrastructure is in place.) 165 | 166 | The `fit()` method can be run as many times as desired. Results will not be saved 167 | with Orca or ModelManager until the `register()` method is run. 168 | 169 | Parameters 170 | ---------- 171 | None 172 | 173 | Returns 174 | ------- 175 | None 176 | 177 | """ 178 | # TO DO - verify that params are in place for estimation 179 | 180 | # Workaround for a temporary statsmodels bug: 181 | # https://github.com/statsmodels/statsmodels/issues/3931 182 | from scipy import stats 183 | stats.chisqprob = lambda chisq, df: stats.chi2.sf(chisq, df) 184 | 185 | df = get_data(tables = self.tables, 186 | filters = self.filters, 187 | model_expression = self.model_expression) 188 | 189 | m = Logit.from_formula(data=df, formula=self.model_expression) 190 | results = m.fit() 191 | 192 | self.name = self._generate_name() 193 | self.summary_table = str(results.summary()) 194 | print(self.summary_table) 195 | 196 | # For now, we can just save the summary table and the fitted parameters. Later on 197 | # we will probably want programmatic access to more details about the fit (e.g. 198 | # for autospec), but we can add that when it's needed. 199 | 200 | self.fitted_parameters = results.params.tolist() # params is a pd.Series 201 | 202 | 203 | def run(self): 204 | """ 205 | Run the model step: calculate simulated choices and use them to update a column. 206 | 207 | For binary logit, we calculate predicted probabilities and then perform a weighted 208 | random draw to determine the simulated binary outcomes. This is done directly from 209 | the fitted parameters, because we can't conveniently regenerate a Statsmodels 210 | results object from a dictionary representation. 211 | 212 | The predicted probabilities and simulated choices are saved to the class object 213 | for interactive use (`probabilities` and `choices`, with type pd.Series) but are 214 | not persisted in the dictionary representation of the model step. 215 | 216 | Parameters 217 | ---------- 218 | None 219 | 220 | Returns 221 | ------- 222 | None 223 | 224 | """ 225 | # TO DO - verify that params are in place for prediction 226 | 227 | df = get_data(tables = self.out_tables, 228 | fallback_tables = self.tables, 229 | filters = self.out_filters, 230 | model_expression = self.model_expression, 231 | extra_columns = self.out_column) 232 | 233 | dm = patsy.dmatrices(data=df, formula_like=self.model_expression, 234 | return_type='dataframe')[1] # right-hand-side design matrix 235 | 236 | beta_X = np.dot(dm, self.fitted_parameters) 237 | probs = np.divide(np.exp(beta_X), 1 + np.exp(beta_X)) 238 | 239 | rand = np.random.random(len(probs)) 240 | choices = np.less(rand, probs) 241 | 242 | # Save results to the class object (via df to include index) 243 | df['_probs'] = probs 244 | self.probabilities = df._probs 245 | df['_choices'] = choices 246 | self.choices = df._choices 247 | 248 | # TO DO - generate column if it does not exist 249 | 250 | colname = self._get_out_column() 251 | tabname = self._get_out_table() 252 | 253 | if self.out_value_true != 'nothing': 254 | df.loc[df._choices==True, colname] = self.out_value_true 255 | 256 | if self.out_value_false != 'nothing': 257 | df.loc[df._choices==False, colname] = self.out_value_false 258 | 259 | orca.get_table(tabname).update_col_from_series(colname, df[colname], cast=True) 260 | 261 | -------------------------------------------------------------------------------- /urbansim_templates/models/regression.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import math 4 | import numpy as np 5 | import pandas as pd 6 | from datetime import datetime as dt 7 | 8 | import orca 9 | from urbansim.models import RegressionModel 10 | from urbansim.utils import yamlio 11 | 12 | from .. import modelmanager 13 | from ..utils import get_data, update_column 14 | from .shared import TemplateStep 15 | 16 | 17 | @modelmanager.template 18 | class OLSRegressionStep(TemplateStep): 19 | """ 20 | A class for building OLS (ordinary least squares) regression model steps. This extends 21 | TemplateStep, where some common functionality is defined. Estimation and simulation 22 | are handled by `urbansim.models.RegressionModel()`. 23 | 24 | Expected usage: 25 | - create a model object 26 | - specify some parameters 27 | - run the `fit()` method 28 | - iterate as needed 29 | 30 | Then, for simulation: 31 | - specify some simulation parameters 32 | - use the `run()` method for interactive testing 33 | - use `modelmanager.register()` to save the model to Orca and disk 34 | - registered steps can be accessed via ModelManager and Orca 35 | 36 | All parameters listed in the constructor can be set directly on the class object, 37 | at any time. 38 | 39 | Parameters 40 | ---------- 41 | tables : str or list of str, optional 42 | Name(s) of Orca tables to draw data from. The first table is the primary one. 43 | Any additional tables need to have merge relationships ("broadcasts") specified 44 | so that they can be merged unambiguously onto the first table. Among them, the 45 | tables must contain all variables used in the model expression and filters. The 46 | left-hand-side variable should be in the primary table. The `tables` parameter is 47 | required for fitting a model, but it does not have to be provided when the object 48 | is created. 49 | 50 | model_expression : str, optional 51 | Patsy formula containing both the left- and right-hand sides of the model 52 | expression: http://patsy.readthedocs.io/en/latest/formulas.html 53 | This parameter is required for fitting a model, but it does not have to be 54 | provided when the object is created. 55 | 56 | filters : str or list of str, optional 57 | Filters to apply to the data before fitting the model. These are passed to 58 | `pd.DataFrame.query()`. Filters are applied after any additional tables are merged 59 | onto the primary one. Replaces the `fit_filters` argument in UrbanSim. 60 | 61 | out_tables : str or list of str, optional 62 | Name(s) of Orca tables to use for simulation. If not provided, the `tables` 63 | parameter will be used. Same guidance applies: the tables must be able to be 64 | merged unambiguously, and must include all columns used in the right-hand-side 65 | of the model expression and in the `out_filters`. 66 | 67 | out_column : str, optional 68 | Name of the column to write predicted values to. If it does not already exist 69 | in the primary output table, it will be created. If not provided, the left-hand- 70 | side variable from the model expression will be used. Replaces the `out_fname` 71 | argument in UrbanSim. 72 | 73 | out_transform : str, optional 74 | Element-wise transformation to apply to the predicted values, for example to 75 | reverse a transformation of the left-hand-side variable in the model expression. 76 | This should be provided as a string containing a function name. Supports anything 77 | from NumPy or Python's built-in math library, for example 'np.exp' or 78 | 'math.floor'. Replaces the `ytransform` argument in UrbanSim. 79 | 80 | out_filters : str or list of str, optional 81 | Filters to apply to the data before simulation. If not provided, no filters will 82 | be applied. Replaces the `predict_filters` argument in UrbanSim. 83 | 84 | name : str, optional 85 | Name of the model step, passed to ModelManager. If none is provided, a name is 86 | generated each time the `fit()` method runs. 87 | 88 | tags : list of str, optional 89 | Tags, passed to ModelManager. 90 | 91 | """ 92 | def __init__(self, tables=None, model_expression=None, filters=None, out_tables=None, 93 | out_column=None, out_transform=None, out_filters=None, name=None, tags=[]): 94 | 95 | # Parent class can initialize the standard parameters 96 | TemplateStep.__init__(self, tables=tables, model_expression=model_expression, 97 | filters=filters, out_tables=out_tables, out_column=out_column, 98 | out_transform=out_transform, out_filters=out_filters, name=name, 99 | tags=tags) 100 | 101 | # Placeholders for model fit data, filled in by fit() or from_dict() 102 | self.summary_table = None 103 | self.fitted_parameters = None 104 | self.residuals = None 105 | self.model = None 106 | 107 | 108 | @classmethod 109 | def from_dict(cls, d): 110 | """ 111 | Create an object instance from a saved dictionary representation. 112 | 113 | Parameters 114 | ---------- 115 | d : dict 116 | 117 | Returns 118 | ------- 119 | OLSRegressionStep 120 | 121 | """ 122 | # Pass values from the dictionary to the __init__() method 123 | obj = cls(tables=d['tables'], model_expression=d['model_expression'], 124 | filters=d['filters'], out_tables=d['out_tables'], 125 | out_column=d['out_column'], out_transform=d['out_transform'], 126 | out_filters=d['out_filters'], name=d['name'], tags=d['tags']) 127 | 128 | obj.summary_table = d['summary_table'] 129 | obj.fitted_parameters = d['fitted_parameters'] 130 | obj.model = None 131 | 132 | # Unpack the urbansim.models.RegressionModel() sub-object and resuscitate it 133 | if d['model'] is not None: 134 | model_config = yamlio.convert_to_yaml(d['model'], None) 135 | obj.model = RegressionModel.from_yaml(model_config) 136 | 137 | return obj 138 | 139 | 140 | def to_dict(self): 141 | """ 142 | Create a dictionary representation of the object. 143 | 144 | Returns 145 | ------- 146 | dict 147 | 148 | """ 149 | d = TemplateStep.to_dict(self) 150 | 151 | # Add parameters not in parent class 152 | d.update({ 153 | 'summary_table': self.summary_table, 154 | 'fitted_parameters': self.fitted_parameters, 155 | 'model': self.model.to_dict() if self.model else None 156 | }) 157 | return d 158 | 159 | 160 | def fit(self): 161 | """ 162 | Fit the model; save and report results. 163 | 164 | This currently uses the `RegressionModel` class from core UrbanSim. We save the 165 | model object for prediction and interactive use (`model`, with type 166 | `urbansim.models.regression.RegressionModel`). 167 | 168 | For example, you can use this to get a latex version of the summary table using 169 | `m.model.model_fit.summary().as_latex()`. This may change in the future if we 170 | refactor the template to use StatsModels directly. 171 | 172 | """ 173 | self.model = RegressionModel(model_expression=self.model_expression, 174 | fit_filters=self.filters, predict_filters=self.out_filters, 175 | ytransform=None, name=self.name) 176 | 177 | df = get_data(tables = self.tables, 178 | filters = self.filters, 179 | model_expression = self.model_expression) 180 | 181 | results = self.model.fit(df) 182 | 183 | self.name = self._generate_name() 184 | self.summary_table = str(results.summary()) 185 | print(self.summary_table) 186 | 187 | # We don't strictly need to save the fitted parameters, because they are also 188 | # contained in the urbansim.models.RegressionModel() sub-object. But maintaining 189 | # a parallel data structure to other templates will make it easier to refactor the 190 | # code later on to not rely on RegressionModel any more. 191 | 192 | self.fitted_parameters = results.params.tolist() 193 | self.residuals = results.resid 194 | 195 | def run(self): 196 | """ 197 | Run the model step: calculate predicted values, transform them as specified, and 198 | use them to update a column. 199 | 200 | The pre-transformation predicted values are saved to the class object for 201 | diagnostic use (`predicted_values` with type pd.Series). The post-transformation 202 | predicted values are written to Orca. 203 | 204 | """ 205 | df = get_data(tables = self.out_tables, 206 | fallback_tables = self.tables, 207 | filters = self.out_filters, 208 | model_expression = self.model_expression) 209 | 210 | values = self.model.predict(df) 211 | self.predicted_values = values 212 | 213 | if self.out_transform is not None: 214 | values = values.apply(eval(self.out_transform)) 215 | 216 | colname = self._get_out_column() 217 | tabname = self._get_out_table() 218 | 219 | update_column(table = tabname, 220 | column = colname, 221 | data = values) 222 | 223 | -------------------------------------------------------------------------------- /urbansim_templates/models/segmented_large_multinomial_logit.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import copy 4 | 5 | import numpy as np 6 | import pandas as pd 7 | 8 | import orca 9 | from urbansim.models.util import apply_filter_query 10 | 11 | from ..__init__ import __version__ 12 | from ..utils import get_data, update_name 13 | from .. import modelmanager 14 | from . import LargeMultinomialLogitStep 15 | from .shared import TemplateStep 16 | 17 | 18 | @modelmanager.template 19 | class SegmentedLargeMultinomialLogitStep(TemplateStep): 20 | """ 21 | This template automatically generates a set of LargeMultinomialLogitStep submodels 22 | corresponding to "segments" or categories of choosers. The submodels can be directly 23 | accessed and edited. 24 | 25 | Running 'build_submodels()' will create a submodel for each category of choosers 26 | identified in the segmentation column. The submodels are implemented using filter 27 | queries. 28 | 29 | Once they are generated, the 'submodels' property contains a dict of 30 | LargeMultinomialLogitStep objects, identified by category name. You can edit their 31 | properties as needed, fit them individually, etc. 32 | 33 | Editing a property in the 'defaults' object will update all the submodels at once, 34 | while leaving customizations to other properties intact. 35 | 36 | Parameters 37 | ---------- 38 | defaults : LargeMultinomialLogitStep, optional 39 | Object containing initial parameter values for the submodels. Values for 40 | 'choosers', 'alternatives', and 'choice_column' are required to generate 41 | submodels, but do not have to be provided when the object is created. 42 | 43 | segmentation_column : str, optional 44 | Name of a column of categorical values in the 'defaults.choosers' table. Any data 45 | that can be interpreted by Pandas as categorical is valid. This is required to 46 | generate submodels, but does not have to be provided when the object is created. 47 | 48 | name : str, optional 49 | Name of the model step. 50 | 51 | tags : list of str, optional 52 | Tags associated with the model step. 53 | 54 | """ 55 | def __init__(self, defaults=None, segmentation_column=None, name=None, tags=[]): 56 | 57 | if defaults is None: 58 | defaults = LargeMultinomialLogitStep() 59 | 60 | self.defaults = defaults 61 | self.defaults.bind_to(self.update_submodels) 62 | 63 | self.segmentation_column = segmentation_column 64 | 65 | self.name = name 66 | self.tags = tags 67 | 68 | self.template = self.__class__.__name__ 69 | self.template_version = __version__ 70 | 71 | # Properties to be filled in by build_submodels() or from_dict() 72 | self.submodels = {} 73 | 74 | 75 | @classmethod 76 | def from_dict(cls, d): 77 | """ 78 | Create an object instance from a saved dictionary representation. 79 | 80 | Parameters 81 | ---------- 82 | d : dict 83 | 84 | Returns 85 | ------- 86 | SegmentedLargeMultinomialLogitStep 87 | 88 | """ 89 | mnl_step = LargeMultinomialLogitStep.from_dict 90 | 91 | obj = cls( 92 | defaults = mnl_step(d['defaults']), 93 | segmentation_column = d['segmentation_column'], 94 | name = d['name'], 95 | tags = d['tags']) 96 | 97 | obj.submodels = {k: mnl_step(m) for k, m in d['submodels'].items()} 98 | 99 | return obj 100 | 101 | 102 | def to_dict(self): 103 | """ 104 | Create a dictionary representation of the object. 105 | 106 | Returns 107 | ------- 108 | dict 109 | 110 | """ 111 | d = { 112 | 'template': self.template, 113 | 'template_version': self.template_version, 114 | 'name': self.name, 115 | 'tags': self.tags, 116 | 'defaults': self.defaults.to_dict(), 117 | 'segmentation_column': self.segmentation_column, 118 | 'submodels': {k: m.to_dict() for k, m in self.submodels.items()} 119 | } 120 | return d 121 | 122 | 123 | def get_segmentation_column(self, mct=None): 124 | """ 125 | Get the column of segmentation values from Orca. Chooser and alternative filters 126 | are applied to identify valid observations. 127 | 128 | Parameters 129 | ---------- 130 | mct : choicemodels.tools.MergedChoiceTable 131 | This parameter is a temporary backdoor allowing us to pass in a more 132 | complicated choice table than can be generated within the template, for 133 | example including sampling weights or interaction terms. 134 | 135 | Returns 136 | ------- 137 | pd.Series 138 | 139 | """ 140 | if mct is not None: 141 | df = mct.to_frame() 142 | else: 143 | obs = get_data(tables = self.defaults.choosers, 144 | filters = self.defaults.chooser_filters, 145 | extra_columns = [self.defaults.choice_column, 146 | self.segmentation_column]) 147 | 148 | alts = get_data(tables = self.defaults.alternatives, 149 | filters = self.defaults.alt_filters) 150 | 151 | df = pd.merge(obs, alts, how='inner', 152 | left_on=self.defaults.choice_column, right_index=True) 153 | 154 | return df[self.segmentation_column] 155 | 156 | 157 | def build_submodels(self, mct=None): 158 | """ 159 | Create a submodel for each category of choosers identified in the segmentation 160 | column. Only categories with at least one observation remaining after applying 161 | chooser and alternative filters will be included. 162 | 163 | Running this method will overwrite any previous submodels. 164 | 165 | Parameters 166 | ---------- 167 | mct : choicemodels.tools.MergedChoiceTable 168 | This parameter is a temporary backdoor allowing us to pass in a more 169 | complicated choice table than can be generated within the template, for 170 | example including sampling weights or interaction terms. 171 | 172 | """ 173 | self.submodels = {} 174 | submodel = LargeMultinomialLogitStep.from_dict(self.defaults.to_dict()) 175 | 176 | col = self.get_segmentation_column(mct=mct) 177 | 178 | if (len(col) == 0): 179 | print("Warning: No valid observations after applying the chooser and "+ 180 | "alternative filters") 181 | return 182 | 183 | cats = col.astype('category').cat.categories.values 184 | 185 | print("Building submodels for {} categories: {}".format(len(cats), cats)) 186 | 187 | for cat in cats: 188 | m = copy.deepcopy(submodel) 189 | seg_filter = "{} == '{}'".format(self.segmentation_column, cat) 190 | 191 | if isinstance(m.chooser_filters, list): 192 | m.chooser_filters += [seg_filter] 193 | 194 | elif isinstance(m.chooser_filters, str): 195 | m.chooser_filters = [m.chooser_filters, seg_filter] 196 | 197 | else: 198 | m.chooser_filters = seg_filter 199 | 200 | # TO DO - same for out_chooser_filters, once we handle simulation 201 | self.submodels[cat] = m 202 | 203 | 204 | def update_submodels(self, param, value): 205 | """ 206 | Updates a property across all the submodels. This method is bound to the 207 | `defaults` object and runs automatically when one of its properties is changed. 208 | 209 | Note that the `chooser_filters` and `alt_filters` properties cannot currently be 210 | updated this way, because they can affect the model segmentation. If you are 211 | confident the changes are valid, you can edit the submodels directly. Otherwise, 212 | you can regenerate them using updated defaults by running `build_submodels()`. 213 | 214 | Parameters 215 | ---------- 216 | param : str 217 | Property name. 218 | value : anything 219 | 220 | """ 221 | if (param in ['chooser_filters', 'alt_filters']) & (len(self.submodels) > 0): 222 | print("Warning: Changing '{}' can affect the model segmentation. Changes " + 223 | "have been saved to 'defaults' but not to the submodels. To " + 224 | "regenerate them using the new defaults, run 'build_submodels()'."\ 225 | .format(param)) 226 | return 227 | 228 | for k, m in self.submodels.items(): 229 | setattr(m, param, value) 230 | 231 | 232 | def fit_all(self, mct=None): 233 | """ 234 | Fit all the submodels. Build the submodels first, if they don't exist yet. This 235 | method can be run as many times as desired. 236 | 237 | Parameters 238 | ---------- 239 | mct : choicemodels.tools.MergedChoiceTable 240 | This parameter is a temporary backdoor allowing us to pass in a more 241 | complicated choice table than can be generated within the template, for 242 | example including sampling weights or interaction terms. 243 | 244 | 245 | """ 246 | if (len(self.submodels) == 0): 247 | self.build_submodels(mct=mct) 248 | 249 | for k, m in self.submodels.items(): 250 | print(' SEGMENT: {0} = {1} '.format( 251 | self.segmentation_column, str(k)).center(70, '#')) 252 | m.fit(mct=mct) 253 | 254 | self.name = update_name(self.template, self.name) 255 | 256 | 257 | def run(self): 258 | """ 259 | Convenience method (requied by template spec) that invokes `run_all()`. 260 | 261 | """ 262 | self.run_all() 263 | 264 | 265 | def run_all(self, interaction_terms=None): 266 | """ 267 | Run all the submodels. 268 | 269 | Parameters 270 | ---------- 271 | interaction_terms : pandas.Series, pandas.DataFrame, or list of either, optional 272 | Additional column(s) of interaction terms whose values depend on the 273 | combination of observation and alternative, to be merged onto the final data 274 | table. If passed as a Series or DataFrame, it should include a two-level 275 | MultiIndex. One level's name and values should match an index or column from 276 | the observations table, and the other should match an index or column from the 277 | alternatives table. 278 | 279 | """ 280 | for k, m in self.submodels.items(): 281 | print(' SEGMENT: {0} = {1} '.format( 282 | self.segmentation_column, str(k)).center(70, '#')) 283 | m.run(interaction_terms=interaction_terms) 284 | -------------------------------------------------------------------------------- /urbansim_templates/models/shared.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from collections import OrderedDict 6 | from datetime import datetime as dt 7 | 8 | import orca 9 | from urbansim.models import util 10 | 11 | from ..__init__ import __version__ 12 | 13 | 14 | class TemplateStep(object): 15 | """ 16 | Shared functionality for the template classes. 17 | 18 | Parameters 19 | ---------- 20 | tables : str or list of str, optional 21 | Required to fit a model, but doesn't have to be provided at initialization. 22 | model_expression : str, optional 23 | Required to fit a model, but doesn't have to be provided at initialization. 24 | filters : str or list of str ?, optional 25 | Replaces `fit_filters` argument. 26 | out_tables : str or list of str, optional 27 | out_column : str, optional 28 | Replaces `out_fname` argument. 29 | out_transform : callable, optional 30 | Replaces `ytransform` argument. 31 | out_filters : str or list of str ?, optional 32 | Replaces `predict_filters` argument. 33 | name : str, optional 34 | For ModelManager. 35 | tags : list of str, optional 36 | For ModelManager. 37 | 38 | """ 39 | def __init__(self, tables=None, model_expression=None, filters=None, out_tables=None, 40 | out_column=None, out_transform=None, out_filters=None, name=None, tags=[]): 41 | 42 | self.tables = tables 43 | self.model_expression = model_expression 44 | self.filters = filters 45 | 46 | # TO DO - out_transform might not belong here - is it only used for OLS? 47 | 48 | self.out_tables = out_tables 49 | self.out_column = out_column 50 | self.out_transform = out_transform 51 | self.out_filters = out_filters 52 | 53 | self.name = name 54 | self.tags = tags 55 | 56 | self.template = type(self).__name__ # class name 57 | self.template_version = __version__ 58 | 59 | 60 | @classmethod 61 | def from_dict(cls, d): 62 | """ 63 | Create an object instance from a saved dictionary representation. 64 | 65 | Child classes will need to override this method to implement loading of custom 66 | parameters and estimation results. 67 | 68 | Parameters 69 | ---------- 70 | d : dict 71 | 72 | Returns 73 | ------- 74 | TemplateStep 75 | 76 | """ 77 | # Pass values from the dictionary to the __init__() method 78 | return cls(d['tables'], d['model_expression'], d['filters'], d['out_tables'], 79 | d['out_column'], d['out_transform'], d['out_filters'], d['name'], 80 | d['tags']) 81 | 82 | 83 | def to_dict(self): 84 | """ 85 | Create a dictionary representation of the object. 86 | 87 | Child classes will need to override this method to implement saving of custom 88 | parameters and estimation results. 89 | 90 | Returns 91 | ------- 92 | dict 93 | 94 | """ 95 | d = { 96 | 'template': self.template, 97 | 'template_version': self.template_version, 98 | 'name': self.name, 99 | 'tags': self.tags, 100 | 'tables': self.tables, 101 | 'model_expression': self.model_expression, 102 | 'filters': self.filters, 103 | 'out_tables': self.out_tables, 104 | 'out_column': self.out_column, 105 | 'out_transform': self.out_transform, 106 | 'out_filters': self.out_filters 107 | } 108 | return d 109 | 110 | 111 | def _normalize_table_param(self, tables): 112 | """ 113 | Normalize table parameter input. TO DO - add more type validation 114 | 115 | """ 116 | if isinstance(tables, list): 117 | # Normalize [] to None 118 | if len(tables) == 0: 119 | return None 120 | 121 | # Normalize [str] to str 122 | if len(tables) == 1: 123 | return tables[0] 124 | 125 | return tables 126 | 127 | 128 | @property 129 | def tables(self): 130 | return self.__tables 131 | 132 | @tables.setter 133 | def tables(self, tables): 134 | self.__tables = self._normalize_table_param(tables) 135 | 136 | @property 137 | def out_tables(self): 138 | return self.__out_tables 139 | 140 | @out_tables.setter 141 | def out_tables(self, out_tables): 142 | self.__out_tables = self._normalize_table_param(out_tables) 143 | 144 | 145 | def _get_out_column(self): 146 | """ 147 | Return name of the column to save data to. This is 'out_column' if it exsits, 148 | otherwise the left-hand-side column name from the model expression. 149 | 150 | Returns 151 | ------- 152 | str 153 | 154 | """ 155 | if self.out_column is not None: 156 | return self.out_column 157 | 158 | else: 159 | # TO DO - there must be a cleaner way to get LHS column name 160 | return self.model_expression.split('~')[0].split(' ')[0] 161 | 162 | 163 | def _get_out_table(self): 164 | """ 165 | Return name of the table to save data to. This is 'out_tables' or its first 166 | element, if it exists, otherwise 'tables' or its first element. 167 | 168 | Returns 169 | ------- 170 | str 171 | 172 | """ 173 | if self.out_tables is not None: 174 | tables = self.out_tables 175 | else: 176 | tables = self.tables 177 | 178 | if isinstance(tables, str): 179 | return tables 180 | else: 181 | return tables[0] 182 | 183 | 184 | def _generate_name(self): 185 | """ 186 | THIS METHOD IS DEPRECATED, AND SHOULD BE REPLACED BY UTILS.UPDATE_NAME(). 187 | 188 | Generate a name for the class instance, based on its type and the current 189 | timestamp. But if a custom name has already been provided, return that instead. 190 | 191 | (We can't tell with certainty whether an existing name was auto-generated or 192 | customized, and it doesn't seem worth keeping track. A name is judged to be custom 193 | if it does not contain the class type.) 194 | 195 | Returns 196 | ------- 197 | str 198 | 199 | """ 200 | if (self.name is None) or (self.template in self.name): 201 | return self.template + '-' + dt.now().strftime('%Y%m%d-%H%M%S') 202 | else: 203 | return self.name 204 | 205 | -------------------------------------------------------------------------------- /urbansim_templates/models/small_multinomial_logit.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | from collections import OrderedDict 4 | import os 5 | import pickle 6 | 7 | import numpy as np 8 | import pandas as pd 9 | 10 | from choicemodels import MultinomialLogit 11 | import orca 12 | 13 | from urbansim_templates import modelmanager 14 | from urbansim_templates.models import TemplateStep 15 | from urbansim_templates.utils import get_data, update_column 16 | 17 | 18 | @modelmanager.template 19 | class SmallMultinomialLogitStep(TemplateStep): 20 | """ 21 | A class for building multinomial logit model steps where the number of alternatives is 22 | "small". Estimation is handled by PyLogit via the ChoiceModels API. Simulation is 23 | handled by PyLogit (probabilities) and ChoiceModels (simulation draws). 24 | 25 | Multinomial logit models can involve a range of different specification and estimation 26 | mechanics. For now these are separated into two templates. What's the difference? 27 | 28 | "Small" MNL: 29 | - data is in a single table (choosers) 30 | - each alternative can have a different model expression 31 | - all the alternatives are available to all choosers 32 | - estimation and simulation use the PyLogit engine (via ChoiceModels) 33 | 34 | "Large" MNL: 35 | - data is in two tables (choosers and alternatives) 36 | - each alternative has the same model expression 37 | - N alternatives are sampled for each chooser 38 | - estimation and simulation use the ChoiceModels engine (formerly UrbanSim MNL) 39 | 40 | TO DO: 41 | - Add support for specifying availability of alternatives 42 | - Add support for sampling weights 43 | - Add support for on-the-fly interaction calculations (e.g. distance) 44 | 45 | Parameters 46 | ---------- 47 | tables : str or list of str, optional 48 | Name(s) of Orca tables to draw data from. The first table is the primary one. 49 | Any additional tables need to have merge relationships ("broadcasts") specified 50 | so that they can be merged unambiguously onto the first table. Among them, the 51 | tables must contain all variables used in the model expression and filters. The 52 | index of the primary table should be a unique ID. The `tables` parameter is 53 | required for fitting a model, but it does not have to be provided when the object 54 | is created. Reserved column names: '_obs_id', '_alt_id', '_chosen'. 55 | 56 | model_expression : OrderedDict, optional 57 | PyLogit model expression. This parameter is required for fitting a model, but it 58 | does not have to be provided when the object is created. 59 | 60 | model_labels : OrderedDict, optional 61 | PyLogit model labels. 62 | 63 | choice_column : str, optional 64 | Name of the column indicating observed choices, for model estimation. The column 65 | should contain integers matching the alternatives in the model expression. This 66 | parameter is required for fitting a model, but it does not have to be provided 67 | when the object is created. 68 | 69 | initial_coefs : list of numerics, optional 70 | Starting values for the parameter estimation algorithm, passed to PyLogit. Length 71 | must be equal to the number of parameters being estimated. If this is not 72 | provided, zeros will be used. 73 | 74 | filters : str or list of str, optional 75 | Filters to apply to the data before fitting the model. These are passed to 76 | `pd.DataFrame.query()`. Filters are applied after any additional tables are merged 77 | onto the primary one. Replaces the `fit_filters` argument in UrbanSim. 78 | 79 | out_tables : str or list of str, optional 80 | Name(s) of Orca tables to use for simulation. If not provided, the `tables` 81 | parameter will be used. Same guidance applies: the tables must be able to be 82 | merged unambiguously, and must include all columns used in the model expression 83 | and in the `out_filters`. 84 | 85 | out_column : str, optional 86 | Name of the column to write simulated choices to. If it does not already exist 87 | in the primary output table, it will be created. If not provided, the 88 | `choice_column` will be used. Replaces the `out_fname` argument in UrbanSim. 89 | 90 | out_filters : str or list of str, optional 91 | Filters to apply to the data before simulation. If not provided, no filters will 92 | be applied. Replaces the `predict_filters` argument in UrbanSim. 93 | 94 | name : str, optional 95 | Name of the model step, passed to ModelManager. If none is provided, a name is 96 | generated each time the `fit()` method runs. 97 | 98 | tags : list of str, optional 99 | Tags, passed to ModelManager. 100 | 101 | """ 102 | def __init__(self, tables=None, model_expression=None, model_labels=None, 103 | choice_column=None, initial_coefs=None, filters=None, out_tables=None, 104 | out_column=None, out_filters=None, name=None, tags=[]): 105 | 106 | # Parent class can initialize the standard parameters 107 | TemplateStep.__init__(self, tables=tables, model_expression=model_expression, 108 | filters=filters, out_tables=out_tables, out_column=out_column, 109 | out_transform=None, out_filters=out_filters, name=name, tags=tags) 110 | 111 | # Custom parameters not in parent class 112 | self.model_labels = model_labels 113 | self.choice_column = choice_column 114 | self.initial_coefs = initial_coefs 115 | 116 | # Placeholders for model fit data, filled in by fit() or from_dict() 117 | self.summary_table = None 118 | self.model = None 119 | 120 | 121 | @classmethod 122 | def from_dict(cls, d): 123 | """ 124 | Create an object instance from a saved dictionary representation. 125 | 126 | Parameters 127 | ---------- 128 | d : dict 129 | 130 | Returns 131 | ------- 132 | SmallMultinomialLogitStep 133 | 134 | """ 135 | # Pass values from the dictionary to the __init__() method 136 | obj = cls(tables=d['tables'], model_expression=None, model_labels=None, 137 | choice_column=d['choice_column'], initial_coefs=d['initial_coefs'], 138 | filters=d['filters'], out_tables=d['out_tables'], 139 | out_column=d['out_column'], out_filters=d['out_filters'], name=d['name'], 140 | tags=d['tags']) 141 | 142 | # Load non-strings and model fit parameters 143 | # TO DO - handle non-existence cases more carefully than 'except pass'! 144 | try: 145 | k = d['model_expression_keys'] 146 | v = d['model_expression_values'] 147 | obj.model_expression = OrderedDict([(k[i], v[i]) for i in range(len(k))]) 148 | except: 149 | pass 150 | 151 | try: 152 | k = d['model_label_keys'] 153 | v = d['model_label_values'] 154 | obj.model_labels = OrderedDict([(k[i], v[i]) for i in range(len(k))]) 155 | except: 156 | pass 157 | 158 | obj.summary_table = d['summary_table'] 159 | 160 | if 'supplemental_objects' in d: 161 | for item in filter(None, d['supplemental_objects']): 162 | if (item['name'] == 'model-object'): 163 | obj.model = item['content'] 164 | 165 | return obj 166 | 167 | 168 | def to_dict(self): 169 | """ 170 | Create a dictionary representation of the object. 171 | 172 | Returns 173 | ------- 174 | dict 175 | 176 | """ 177 | tmp_model_expression = self.model_expression 178 | self.model_expression = None 179 | 180 | d = TemplateStep.to_dict(self) 181 | self.model_expression = tmp_model_expression 182 | 183 | # Can't store OrderedDicts in YAML, so convert them 184 | if tmp_model_expression is not None: 185 | d.update({ 186 | 'model_expression_keys': [k for (k,v) in tmp_model_expression.items()], 187 | 'model_expression_values': [v for (k,v) in tmp_model_expression.items()], 188 | }) 189 | 190 | if self.model_labels is not None: 191 | d.update({ 192 | 'model_label_keys': [k for (k,v) in self.model_labels.items()], 193 | 'model_label_values': [v for (k,v) in self.model_labels.items()] 194 | }) 195 | 196 | # Add parameters not in parent class 197 | d.update({ 198 | 'model_labels': None, 199 | 'choice_column': self.choice_column, 200 | 'initial_coefs': self.initial_coefs, 201 | 'summary_table': self.summary_table 202 | }) 203 | 204 | # Add supplemental objects 205 | objects = [] 206 | if self.model is not None: 207 | objects.append({'name': 'model-object', 208 | 'content': self.model, 209 | 'content_type': 'pickle', 210 | 'required': True}) 211 | 212 | d.update({'supplemental_objects': objects}) 213 | 214 | return d 215 | 216 | 217 | def _get_alts(self): 218 | """ 219 | Get a unique, sorted list of alternative id's included in the model expression. 220 | 221 | Returns 222 | ------- 223 | list 224 | 225 | """ 226 | ids = [] 227 | for k, v in self.model_expression.items(): 228 | # TO DO - check if PyLogit supports v being a non-list (single numeric) 229 | for elem in v: 230 | if isinstance(elem, list): 231 | ids += elem 232 | else: 233 | ids += [elem] 234 | 235 | return np.unique(ids) 236 | 237 | 238 | def _get_param_count(self): 239 | """ 240 | Count the number of parameters implied by the model expression. 241 | 242 | Returns 243 | ------- 244 | int 245 | 246 | """ 247 | count = 0 248 | for k, v in self.model_expression.items(): 249 | # TO DO - check if PyLogit supports v being a non-list (single numeric) 250 | for elem in v: 251 | count += 1 252 | 253 | return count 254 | 255 | 256 | def _to_long(self, df, task='fit'): 257 | """ 258 | Convert a data table from wide format to long format. Currently handles the case 259 | where there are attributes of choosers but not of alternatives, and no 260 | availability or interaction terms. (This is not supported in the PyLogit 261 | conversion utility.) 262 | 263 | TO DO 264 | - extend to handle characteristics of alternatives? 265 | - move to ChoiceModels 266 | 267 | Parameters 268 | ---------- 269 | df : pd.DataFrame 270 | One row per observation. The observation id should be in the index. Reserved 271 | column names: '_obs_id', '_alt_id', '_chosen'. 272 | 273 | task : 'fit' or 'predict', optional 274 | If 'fit' (default), a column named '_chosen' is generated with binary 275 | indicator of observed choices. 276 | 277 | Returns 278 | ------- 279 | pd.DataFrame 280 | One row per combination of observation and alternative. The observation is in 281 | '_obs_id'. The alternative is in 'alt_id'. Table is sorted by observation and 282 | alternative. If task is 'fit', a column named '_chosen' is generated with 283 | binary indicator of observed choices. Remaining columns are retained from the 284 | input data. 285 | 286 | """ 287 | # Get lists of obs and alts 288 | obs = df.index.sort_values().unique().tolist() 289 | alts = self._get_alts() 290 | 291 | # Long df is cartesian product of alts and obs 292 | obs_prod, alts_prod = pd.core.reshape.util.cartesian_product([obs, alts]) 293 | 294 | long_df = pd.DataFrame({'_obs_id': obs_prod, '_alt_id': alts_prod}) 295 | long_df = long_df.merge(df, left_on='_obs_id', right_index=True) 296 | 297 | if (task == 'fit'): 298 | # Add binary indicator of chosen rows 299 | long_df['_chosen'] = 0 300 | long_df.loc[long_df._alt_id == long_df[self.choice_column], '_chosen'] = 1 301 | 302 | return long_df 303 | 304 | 305 | def fit(self): 306 | """ 307 | Fit the model; save and report results. This uses PyLogit via ChoiceModels. 308 | 309 | The `fit()` method can be run as many times as desired. Results will not be saved 310 | with Orca or ModelManager until the `register()` method is run. 311 | 312 | """ 313 | expr_cols = [t[0] for t in list(self.model_expression.items()) \ 314 | if t[0] != 'intercept'] 315 | 316 | df = get_data(tables = self.tables, 317 | filters = self.filters, 318 | extra_columns = expr_cols + [self.choice_column]) 319 | 320 | long_df = self._to_long(df) 321 | 322 | # Set initial coefs to 0 if none provided 323 | pc = self._get_param_count() 324 | if (self.initial_coefs is None) or (len(self.initial_coefs) != pc): 325 | self.initial_coefs = np.zeros(pc).tolist() 326 | 327 | model = MultinomialLogit(data=long_df, 328 | observation_id_col='_obs_id', 329 | choice_col='_chosen', 330 | model_expression=self.model_expression, 331 | model_labels=self.model_labels, 332 | alternative_id_col='_alt_id', 333 | initial_coefs=self.initial_coefs) 334 | 335 | results = model.fit() 336 | 337 | self.name = self._generate_name() 338 | self.summary_table = str(results.report_fit()) 339 | print(self.summary_table) 340 | 341 | # We need the PyLogit fitted model object for prediction, so save it directly 342 | self.model = results.get_raw_results() 343 | 344 | 345 | def run(self): 346 | """ 347 | Run the model step: calculate simulated choices and use them to update a column. 348 | 349 | Alternatives that appear in the estimation data but not in the model expression 350 | will not be available for simulation. 351 | 352 | Predicted probabilities come from PyLogit. Monte Carlo simulation of choices is 353 | performed directly. (This functionality will move to ChoiceModels.) 354 | 355 | The predicted probabilities and simulated choices are saved to the class object 356 | for interactive use (`probabilities` with type pd.DataFrame, and `choices` with 357 | type pd.Series) but are not persisted in the dictionary representation of the 358 | model step. 359 | 360 | """ 361 | expr_cols = [t[0] for t in list(self.model_expression.items()) \ 362 | if t[0] != 'intercept'] 363 | 364 | df = get_data(tables = self.out_tables, 365 | fallback_tables = self.tables, 366 | filters = self.out_filters, 367 | extra_columns = expr_cols) 368 | 369 | long_df = self._to_long(df, 'predict') 370 | 371 | num_obs = len(df) 372 | num_alts = len(self._get_alts()) 373 | 374 | # Get predictions from underlying model - this is an ndarray with the same length 375 | # as the long-format df, representing choice probability for each alternative 376 | probs = self.model.predict(long_df) 377 | 378 | # Generate choices by adapting an approach from UrbanSim MNL 379 | # https://github.com/UDST/choicemodels/blob/master/choicemodels/mnl.py#L578-L583 380 | cumprobs = probs.reshape((num_obs, num_alts)).cumsum(axis=1) 381 | rands = np.random.random(num_obs) 382 | diff = np.subtract(cumprobs.transpose(), rands).transpose() 383 | 384 | # The diff conversion replaces negative values with 0 and positive values with 1, 385 | # so that argmax can return the position of the first positive value 386 | choice_ix = np.argmax((diff + 1.0).astype('i4'), axis=1) 387 | choice_ix_1d = choice_ix + (np.arange(num_obs) * num_alts) 388 | 389 | choices = long_df._alt_id.values.take(choice_ix_1d) 390 | 391 | # Save results to the class object (via df to include indexes) 392 | long_df['_probability'] = probs 393 | self.probabilities = long_df[['_obs_id', '_alt_id', '_probability']] 394 | df['_choices'] = choices 395 | self.choices = df._choices 396 | 397 | # Save to Orca 398 | update_column(table=self.out_tables, 399 | fallback_table=self.tables, 400 | column=self.out_column, 401 | fallback_column=self.choice_column, 402 | data=self.choices) 403 | -------------------------------------------------------------------------------- /urbansim_templates/shared/__init__.py: -------------------------------------------------------------------------------- 1 | from .core import CoreTemplateSettings 2 | from .output_column import OutputColumnSettings, register_column 3 | -------------------------------------------------------------------------------- /urbansim_templates/shared/core.py: -------------------------------------------------------------------------------- 1 | from urbansim_templates import __version__ 2 | 3 | 4 | class CoreTemplateSettings(): 5 | """ 6 | Stores standard parameters and logic used by all templates. Parameters can be passed 7 | to the constructor or set as attributes. 8 | 9 | Parameters 10 | ---------- 11 | name : str, optional 12 | Name of the configured template instance. 13 | 14 | tags : list of str, optional 15 | Tags associated with the configured template instance. 16 | 17 | notes : str, optional 18 | Notes associates with the configured template instance. 19 | 20 | autorun : bool, optional 21 | Whether to run the configured template instance automatically when it's 22 | registered or loaded by ModelManager. The overall default is False, but the 23 | default can be overriden at the template level. 24 | 25 | template : str 26 | Name of the template class associated with a configured instance. 27 | 28 | template_version : str 29 | Version of the template class package. 30 | 31 | """ 32 | def __init__(self, 33 | name = None, 34 | tags = [], 35 | notes = None, 36 | autorun = False, 37 | template = None, 38 | template_version = None): 39 | 40 | self.name = name 41 | self.tags = tags 42 | self.notes = notes 43 | self.autorun = autorun 44 | self.template = template 45 | self.template_version = template_version 46 | 47 | # automatic attributes 48 | self.modelmanager_version = __version__ 49 | 50 | 51 | @classmethod 52 | def from_dict(cls, d): 53 | """ 54 | Create a class instance from a saved dictionary representation. 55 | 56 | Parameters 57 | ---------- 58 | d : dict 59 | 60 | Returns 61 | ------- 62 | obj : CoreTemplateSettings 63 | 64 | """ 65 | obj = cls( 66 | name = d['name'], 67 | tags = d['tags'], 68 | notes = d['notes'], 69 | autorun = d['autorun'], 70 | template = d['template'], 71 | template_version = d['template_version'], 72 | ) 73 | return obj 74 | 75 | 76 | def to_dict(self): 77 | """ 78 | Create a dictionary representation of the object. 79 | 80 | Returns 81 | ------- 82 | d : dict 83 | 84 | """ 85 | d = { 86 | 'name': self.name, 87 | 'tags': self.tags, 88 | 'notes': self.notes, 89 | 'autorun': self.autorun, 90 | 'template': self.template, 91 | 'template_version': self.template_version, 92 | 'modelmanager_version': self.modelmanager_version, 93 | } 94 | return d 95 | 96 | -------------------------------------------------------------------------------- /urbansim_templates/shared/output_column.py: -------------------------------------------------------------------------------- 1 | import orca 2 | 3 | from urbansim_templates import __version__ 4 | 5 | 6 | class OutputColumnSettings(): 7 | """ 8 | Stores standard parameters used by templates that generate or modify columns. 9 | Parameters can be passed to the constructor or set as attributes. 10 | 11 | Parameters 12 | ---------- 13 | column_name : str, optional 14 | Name of the Orca column to be created or modified. Generally required before 15 | running a configured template. 16 | 17 | table : str, optional 18 | Name of Orca table the column will be associated with. Generally required before 19 | running the configured template. 20 | 21 | data_type : str, optional 22 | Python type or ``numpy.dtype`` to case the column's values to. 23 | 24 | missing_values : str or numeric, optional 25 | Value to use for rows that would otherwise be missing. 26 | 27 | cache : bool, default False 28 | Whether to cache column values after they are calculated 29 | 30 | cache_scope : 'step', 'iteration', or 'forever', default 'forever' 31 | How long to cache column values for (ignored if ``cache`` is False). 32 | 33 | """ 34 | # TO DO: say something about Orca defaults and about core vs. computed columns. 35 | 36 | def __init__(self, 37 | column_name = None, 38 | table = None, 39 | data_type = None, 40 | missing_values = None, 41 | cache = False, 42 | cache_scope = 'forever'): 43 | 44 | self.column_name = column_name 45 | self.table = table 46 | self.data_type = data_type 47 | self.missing_values = missing_values 48 | self.cache = cache 49 | self.cache_scope = cache_scope 50 | 51 | # automatic attributes 52 | self.modelmanager_version = __version__ 53 | 54 | 55 | @classmethod 56 | def from_dict(cls, d): 57 | """ 58 | Create a class instance from a saved dictionary representation. 59 | 60 | Parameters 61 | ---------- 62 | d : dict 63 | 64 | Returns 65 | ------- 66 | obj : OutputColumnSettings 67 | 68 | """ 69 | return cls( 70 | column_name = d['column_name'], 71 | table = d['table'], 72 | data_type = d['data_type'], 73 | missing_values = d['missing_values'], 74 | cache = d['cache'], 75 | cache_scope = d['cache_scope']) 76 | 77 | 78 | def to_dict(self): 79 | """ 80 | Create a dictionary representation of the object. 81 | 82 | Returns 83 | ------- 84 | d : dict 85 | 86 | """ 87 | return { 88 | 'column_name': self.column_name, 89 | 'table': self.table, 90 | 'data_type': self.data_type, 91 | 'missing_values': self.missing_values, 92 | 'cache': self.cache, 93 | 'cache_scope': self.cache_scope, 94 | 'modelmanager_version': self.modelmanager_version} 95 | 96 | 97 | ###################################### 98 | ###################################### 99 | 100 | 101 | def register_column(build_column, settings): 102 | """ 103 | Register a callable as an Orca column. 104 | 105 | Parameters 106 | ---------- 107 | build_column : callable 108 | Callable should return a ``pd.Series``. 109 | 110 | settings : ColumnOutputSettings 111 | 112 | """ 113 | @orca.column(table_name = settings.table, 114 | column_name = settings.column_name, 115 | cache = settings.cache, 116 | cache_scope = settings.cache_scope) 117 | 118 | def orca_column(): 119 | series = build_column() 120 | 121 | if settings.missing_values is not None: 122 | series = series.fillna(settings.missing_values) 123 | 124 | if settings.data_type is not None: 125 | series = series.astype(settings.data_type) 126 | 127 | return series 128 | 129 | --------------------------------------------------------------------------------