├── .gitignore
├── .travis.yml
├── CHANGELOG.md
├── CONTRIBUTING.md
├── LICENSE.txt
├── MANIFEST.in
├── README.md
├── docs
    ├── README.md
    ├── build
    │   └── .gitignore
    └── source
    │   ├── _static
    │       └── .gitignore
    │   ├── _templates
    │       └── .gitignore
    │   ├── conf.py
    │   ├── data-templates.rst
    │   ├── development.rst
    │   ├── getting-started.rst
    │   ├── index.rst
    │   ├── model-steps.rst
    │   ├── modelmanager.rst
    │   └── utilities.rst
├── examples
    ├── UrbanSim-Templates-demo.ipynb
    ├── configs
    │   └── README.md
    └── data
    │   └── buildings-demo.csv
├── requirements-dev.txt
├── requirements-extras.txt
├── setup.py
├── tests
    ├── .gitignore
    ├── README.md
    ├── configs
    │   └── README.md
    ├── data
    │   └── README.md
    ├── pytest.ini
    ├── test_binary_logit.py
    ├── test_column_expression.py
    ├── test_data_load.py
    ├── test_data_save.py
    ├── test_large_multinomial_logit.py
    ├── test_regression.py
    ├── test_segmented_large_multinomial_logit.py
    ├── test_shared_core.py
    ├── test_shared_output_column.py
    ├── test_small_multinomial_logit.py
    ├── test_utils.py
    └── test_utils_broadcasts.py
└── urbansim_templates
    ├── .gitignore
    ├── __init__.py
    ├── data
        ├── __init__.py
        ├── column_from_expression.py
        ├── load_table.py
        └── save_table.py
    ├── modelmanager.py
    ├── models
        ├── .gitignore
        ├── __init__.py
        ├── binary_logit.py
        ├── large_multinomial_logit.py
        ├── regression.py
        ├── segmented_large_multinomial_logit.py
        ├── shared.py
        └── small_multinomial_logit.py
    ├── shared
        ├── __init__.py
        ├── core.py
        └── output_column.py
    └── utils.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .cache/*
2 | build/*
3 | dist/*
4 | urbansim_templates.egg-info/*
5 | **/*.pyc
6 | **/.doctrees/*
7 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | python:
 4 |   - '3.6'
 5 |   - '3.8'
 6 |   - '3.9'
 7 | 
 8 | install:
 9 |   - pip install .
10 |   - pip install -r requirements-extras.txt
11 |   - pip install -r requirements-dev.txt
12 |   - pip list
13 |   - pip show urbansim_templates
14 | 
15 | script:
16 |   - cd tests
17 |   - coverage run --source urbansim_templates --module pytest --verbose
18 | 
19 | after_success:
20 |   - coverage report --show-missing
21 |   - coveralls


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # UrbanSim Templates change log
  2 | 
  3 | ## 0.2 (not yet released)
  4 | 
  5 | #### 0.2.dev9 (2020-05-15)
  6 | 
  7 | - fixes a bug in `BinaryLogitStep` simulation where the output is not updated correctly
  8 | - adds a `resid` attribute to fitted `OLSRegressionStep` models, for diagnostics
  9 | 
 10 | #### 0.2.dev8 (2020-04-17)
 11 | 
 12 | - allows segmented large MNL models to be estimated with a `MergedChoiceTable` that's passed in by the user (rather than generated automatically), thus achieving parity with the non-segmented model class
 13 | 
 14 | #### 0.2.dev7 (2019-07-15)
 15 | 
 16 | - fixes a bug with the `out_transform` parameter for `OLSRegressionStep`
 17 | 
 18 | #### 0.2.dev6 (2019-04-04)
 19 | 
 20 | - introduces classes for storing common settings: `shared.CoreTemplateSettings`, `shared.OutputColumnSettings`
 21 | - adds new shared functions: `shared.register_column()`, `utils.cols_in_expression()`
 22 | - modifies `ColumnFromExpression` template to divide its parameters into three groups
 23 | 
 24 | #### 0.2.dev5 (2019-03-29)
 25 | 
 26 | - adds new template: `data.ColumnFromExpression`
 27 | 
 28 | #### 0.2.dev4 (2019-03-26)
 29 | 
 30 | - adds new data management utilities: `utils.validate_table()`, `utils.validate_all_tables()`, `utils.merge_tables()`
 31 | - updates `utils.get_data()` to use the new merge tool
 32 | - updates `BinaryLogitStep` and `OLSRegressionStep` to use the shared to use `utils.get_data()`, removing any reliance on Orca broadcasts
 33 | - raises the `pandas` requirement to 0.23
 34 | 
 35 | #### 0.2.dev3 (2019-03-21)
 36 | 
 37 | - adds an `mct` argment to `SegmentedLargeMultinomialLogitStep.fit_all()`
 38 | - adds an `interaction_terms` argument to `SegmentedLargeMultinomialLogitStep.run_all()`
 39 | 
 40 | #### 0.2.dev2 (2019-03-04)
 41 | 
 42 | - adds template for saving data: `data.SaveTable()`
 43 | - renames `io.TableFromDisk()` to `data.LoadTable()`
 44 | 
 45 | #### 0.2.dev1 (2019-02-27)
 46 | 
 47 | - fixes a crash in small MNL simulation
 48 | 
 49 | #### 0.2.dev0 (2019-02-19)
 50 | 
 51 | - adds first data i/o template: `io.TableFromDisk()`
 52 | - adds support for `autorun` template property
 53 | 
 54 | 
 55 | ## 0.1.3 (2019-07-15)
 56 | 
 57 | - patch to incorporate the `out_transform` bug fix for `OLSRegressionStep`, from 0.2.dev7
 58 | 
 59 | 
 60 | ## 0.1.2 (2019-02-28)
 61 | 
 62 | - patch to incorporate the small MNL bug fix from 0.2.dev1
 63 | 
 64 | 
 65 | ## 0.1.1 (2019-02-05)
 66 | 
 67 | #### 0.1.1.dev1 (2019-01-30)
 68 | 
 69 | - adds support for passing multiple tables of interaction terms in large MNL
 70 | - enables on-the-fly creation of output columns in small MNL
 71 | 
 72 | #### 0.1.1.dev0 (2019-01-20)
 73 | 
 74 | - allows join keys to be used as data filters in MNL simulation
 75 | 
 76 | 
 77 | ## 0.1 (2019-01-16)
 78 | 
 79 | #### 0.1.dev25 (2019-01-15)
 80 | 
 81 | - fixes an OLS simulation bug that raised an error when the output column didn't exist yet
 82 | - implements `out_transform` for OLS simulation
 83 | 
 84 | #### 0.1.dev24 (2018-12-20)
 85 | 
 86 | - fixes a string comparison bug that caused problems with binary logit output in Windows
 87 | - adds `model` as an attribute of large MNL model steps, which provides a `choicemodels.MultinomialLogitResults` object and is available any time after a model step is fitted
 88 | - enables on-the-fly creation of output columns in large MNL
 89 | - fixes a large MNL simulation bug when there are no valid choosers or alternatives after evaluating the filters
 90 | - moves unit tests out of the module directory
 91 | 
 92 | #### 0.1.dev23 (2018-12-13)
 93 | 
 94 | - fixes a bug with interaction terms passed into `LargeMultinomialLogitStep.run()`
 95 | 
 96 | #### 0.1.dev22 (2018-12-13)
 97 | 
 98 | - narrows the output of `utils.get_data()` to include only the columns requested (plus the index of the primary table) -- previously Orca had also provided some extra columns such as join keys
 99 | 
100 | #### 0.1.dev21 (2018-12-11)
101 | 
102 | - adds a new function `utils.get_data()` to assemble data from Orca, automatically detecting columns included in model expressions and filters
103 | 
104 | - implements `SegmentedLargeMultinomialLogit.run_all()`
105 | 
106 | #### 0.1.dev20 (2018-12-11)
107 | 
108 | - fixes a model expression persistence bug in the small MNL template
109 | 
110 | #### 0.1.dev19 (2018-12-06)
111 | 
112 | - fixes a bug to allow large MNL simulation with multiple chooser tables
113 | 
114 | #### 0.1.dev18 (2018-11-19)
115 | 
116 | - improves installation and testing
117 | 
118 | #### 0.1.dev17 (2018-11-15)
119 | 
120 | - adds an `interaction_terms` parameter that users can manually pass to `LargeMultinomialLogitStep.run()`, as a temporary solution until interaction terms are fully handled by the templates
121 | - also adds a `chooser_batch_size` parameter in the same place, to reduce memory pressure when there are large numbers of choosers
122 | 
123 | #### 0.1.dev16 (2018-11-06)
124 | 
125 | - adds a tool for testing template validity
126 | 
127 | #### 0.1.dev15 (2018-10-15)
128 | 
129 | - adds new `LargeMultinomialLogitStep` parameters related to choice simulation: `constrained_choices`, `alt_capacity`, `chooser_size`, and `max_iter`
130 | - updates `LargeMultinomialLogitStep.run()` to use improved simulation utilities from ChoiceModels 0.2.dev4
131 | 
132 | #### 0.1.dev14 (2018-09-25)
133 | 
134 | - adds a template for segmented large MNL models: `SegmentedLargeMultinomialLogitStep`, which can automatically generate a set of large MNL models based on segmentation rules
135 | 
136 | #### 0.1.dev13 (2018-09-24)
137 | 
138 | - adds a `@modelmanager.template` decorator that makes a class available to the currently running instance of ModelManager
139 | 
140 | #### 0.1.dev12 (2018-09-19)
141 | 
142 | - moves the `register()` operation to `modelmanager` (previously it was a method implemented by the individual templates)
143 | - adds general ModelManager support for supplemental objects like pickled model results
144 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | Thanks for using UrbanSim Templates! 
  2 | 
  3 | This is an open source project that's part of the Urban Data Science Toolkit. Development and maintenance is a collaboration between UrbanSim Inc and U.C. Berkeley's Urban Analytics Lab. 
  4 | 
  5 | You can contact Sam Maurer, the lead developer, at `maurer@urbansim.com`.
  6 | 
  7 | 
  8 | ## If you have a problem:
  9 | 
 10 | - Take a look at the [open issues](https://github.com/UDST/urbansim_templates/issues) and [closed issues](https://github.com/UDST/urbansim_templates/issues?q=is%3Aissue+is%3Aclosed) to see if there's already a related discussion
 11 | 
 12 | - Open a new issue describing the problem -- if possible, include any error messages, the operating system and version of python you're using, and versions of any libraries that may be relevant
 13 | 
 14 | 
 15 | ## Feature proposals:
 16 | 
 17 | - Take a look at the [open issues](https://github.com/UDST/urbansim_templates/issues) and [closed issues](https://github.com/UDST/urbansim_templates/issues?q=is%3Aissue+is%3Aclosed) to see if there's already a related discussion
 18 | 
 19 | - Post your proposal as a new issue, so we can discuss it (some proposals may not be a good fit for the project)
 20 | 
 21 | 
 22 | ## Contributing code:
 23 | 
 24 | - Create a new branch of `UDST/urbansim_templates`, or fork the repository to your own account
 25 | 
 26 | - Make your changes, following the existing styles for code and inline documentation
 27 | 
 28 | - Add [tests](https://github.com/UDST/urbansim_templates/tree/master/tests) if possible!
 29 | 
 30 | - Open a pull request to the `UDST/urbansim_templates` master branch, including a writeup of your changes -- take a look at some of the closed PR's for examples
 31 | 
 32 | - Current maintainers will review the code, suggest changes, and hopefully merge it!
 33 | 
 34 | 
 35 | ## Updating the version number:
 36 | 
 37 | - Each pull request that changes substantive code should increment the development version number, e.g. from `0.2.dev7` to `0.2.dev8`, so that users know exactly which version they're running
 38 | 
 39 | - It works best to do this just before merging (in case other PR's are merged first, and so you know the release date for the changelog and documentation)
 40 | 
 41 | - There are three places where the version number needs to be changed: 
 42 |   - `setup.py`
 43 |   - `urbansim_templates/__init__.py`
 44 |   - `docs/source/index.rst`
 45 | 
 46 | - Please also add a section to `CHANGELOG.md` describing the changes!
 47 | 
 48 | 
 49 | ## Updating the documentation: 
 50 | 
 51 | - See instructions in `docs/README.md`
 52 | 
 53 | 
 54 | ## Preparing a production release:
 55 | 
 56 | - Make a new branch for release prep
 57 | 
 58 | - Update the version number and `CHANGELOG.md`
 59 | 
 60 | - Make sure all the tests are passing, and check if updates are needed to `README.md` or to the documentation
 61 | 
 62 | - Open a pull request to the master branch to finalize it
 63 | 
 64 | - After merging, tag the release on Github and follow the distribution procedures below
 65 | 
 66 | 
 67 | ## Patching an earlier release:
 68 | 
 69 | - We're not maintaining separate code branches for dev/ production/ major releases, but you can easily recreate them from tags if you need to patch an earlier release
 70 | 
 71 | - In Github, create a new branch from the tag for the version you'd like to patch, calling it something like `v1-production`
 72 | 
 73 | - Create a second branch from that one, called something like `v1-patch`
 74 | 
 75 | - Make your changes in the `v1-patch` branch, and open a PR to `v1-production` to finalize it
 76 | 
 77 | - After merging, tag the release on Github and follow the normal distribution procedures
 78 | 
 79 | - After the new release is tagged, you can delete the extra branches -- a branch is just a pointer to the latest commit in a chain, and these commits will still be accessible via the tag
 80 | 
 81 | 
 82 | ## Distributing a release on PyPI (for pip installation):
 83 | 
 84 | - Register an account at https://pypi.org, ask one of the current maintainers to add you to the project, and `pip install twine`
 85 | 
 86 | - Check out the copy of the code you'd like to release
 87 | 
 88 | - Run `python setup.py sdist bdist_wheel --universal`
 89 | 
 90 | - This should create a `dist` directory containing two package files -- delete any old ones before the next step
 91 | 
 92 | - Run `twine upload dist/*` -- this will prompt you for your pypi.org credentials
 93 | 
 94 | - Check https://pypi.org/project/urbansim-templates/ for the new version
 95 | 
 96 | 
 97 | ## Distributing a release on Conda Forge (for conda installation):
 98 | 
 99 | - Make a fork of the [conda-forge/urbansim_templates-feedstock](https://github.com/conda-forge/urbansim_templates-feedstock) repository -- there may already be a fork in udst
100 | 
101 | - Edit `recipe/meta.yaml`: 
102 |   - update the version number
103 |   - paste a new hash matching the tar.gz file that was uploaded to pypi (it's available on the pypi.org project page)
104 | 
105 | - Check that the run requirements still match `requirements.txt`
106 | 
107 | - Open a pull request to the `conda-forge/urbansim_templates-feedstock` master branch
108 | 
109 | - Automated tests will run, and after they pass one of the current project maintainers will be able to merge the PR -- you can add your Github user name to the maintainers list in `meta.yaml` for the next update
110 | 
111 | - Check https://anaconda.org/conda-forge/urbansim-templates for the new version (may take a few minutes for it to appear)
112 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2018, UrbanSim Inc.
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | # files to include in the source distribution on pypi (setup.py and README.md are included automatically)
2 | 
3 | include CHANGELOG.md
4 | include LICENSE.txt
5 | include requirements.txt
6 | include requirements-extras.txt
7 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![Build Status](https://travis-ci.org/UDST/urbansim_templates.svg?branch=master)](https://travis-ci.org/UDST/urbansim_templates)
 2 | [![Coverage Status](https://coveralls.io/repos/github/UDST/urbansim_templates/badge.svg?branch=master)](https://coveralls.io/github/UDST/urbansim_templates?branch=master)
 3 | 
 4 | # UrbanSim Templates
 5 | 
 6 | UrbanSim Templates is a Python library that provides building blocks for Orca-based simulation models. It's part of the [Urban Data Science Toolkit](https://docs.udst.org) (UDST).
 7 | 
 8 | The library contains templates for common types of model steps, plus a tool called ModelManager that runs as an extension to the [Orca](https://udst.github.io/orca) task orchestrator. ModelManager can register template-based model steps with the orchestrator, save them to disk, and automatically reload them for future sessions. The package was developed to make it easier to set up new simulation models — model step templates reduce the need for custom code and make settings more portable between models.
 9 | 
10 | ### Installation
11 | UrbanSim Templates can be installed using the Pip or Conda package managers.
12 | 
13 | ```
14 | pip install urbansim_templates
15 | ```
16 | 
17 | ```
18 | conda install urbansim_templates --channel conda-forge
19 | ```
20 | 
21 | ### Documentation
22 | 
23 | See the online documentation for much more: https://udst.github.io/urbansim_templates
24 | 
25 | Some additional documentation is available within the repo in `CHANGELOG.md`, `CONTRIBUTING.md`, `/docs/README.md`, and `/tests/README.md`.
26 | 
27 | There's discussion of current and planned features in the [pull requests](https://github.com/udst/urbansim_templates/pulls?utf8=✓&q=is%3Apr) and [issues](https://github.com/udst/urbansim_templates/issues?utf8=✓&q=is%3Aissue), both open and closed.
28 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | This folder generates the UrbanSim Templates online documentation, hosted at https://udst.github.io/urbansim_templates/.
 2 | 
 3 | ### How it works
 4 | 
 5 | HTML files are generated using [Sphinx](http://sphinx-doc.org) and hosted with GitHub Pages from the `gh-pages` branch of the repository. The online documentation is rendered and updated **manually**. 
 6 | 
 7 | ### Editing the documentation
 8 | 
 9 | The files in `docs/source`, along with docstrings in the source code, determine what appears in the rendered documentation. Here's a [good tutorial](https://pythonhosted.org/an_example_pypi_project/sphinx.html) for Sphinx.
10 | 
11 | ### Previewing changes locally
12 | 
13 | Install the copy of UrbanSim Templates that the documentation is meant to reflect. Install the documentation tools.
14 | 
15 | ```
16 | pip install . 
17 | pip install sphinx sphinx_rtd_theme
18 | ```
19 | 
20 | Build the documentation. There should be status messages and warnings, but no errors.
21 | 
22 | ```
23 | cd docs
24 | sphinx-build -b html source build
25 | ```
26 | 
27 | The HTML files will show up in `docs/build/`. 
28 | 
29 | ### Uploading changes
30 | 
31 | Clone a second copy of the repository and check out the `gh-pages` branch. Copy over the updated HTML files, commit them, and push the changes to GitHub.
32 | 
33 | ### Discussion
34 | 
35 | There are various discussions about documentation in the issue threads. [Issue #120](https://github.com/UDST/urbansim_templates/issues/120) is a good starting point.
36 | 


--------------------------------------------------------------------------------
/docs/build/.gitignore:
--------------------------------------------------------------------------------
1 | **/*


--------------------------------------------------------------------------------
/docs/source/_static/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UDST/urbansim_templates/723b83b4187da53a50ee03fdba4842a464f68240/docs/source/_static/.gitignore


--------------------------------------------------------------------------------
/docs/source/_templates/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UDST/urbansim_templates/723b83b4187da53a50ee03fdba4842a464f68240/docs/source/_templates/.gitignore


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # UrbanSim Templates documentation build configuration file, created by
  5 | # sphinx-quickstart on Fri Jan  4 15:26:06 2019.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | #
 20 | # import os
 21 | # import sys
 22 | # sys.path.insert(0, os.path.abspath('../..'))
 23 | 
 24 | import sphinx_rtd_theme
 25 | 
 26 | 
 27 | # -- General configuration ------------------------------------------------
 28 | 
 29 | # If your documentation needs a minimal Sphinx version, state it here.
 30 | #
 31 | # needs_sphinx = '1.0'
 32 | 
 33 | # Add any Sphinx extension module names here, as strings. They can be
 34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 35 | # ones.
 36 | extensions = [
 37 |     'sphinx.ext.autodoc',
 38 |     'sphinx.ext.autosummary',
 39 |     'sphinx.ext.napoleon',
 40 |     'sphinx.ext.viewcode']
 41 | 
 42 | # Add any paths that contain templates here, relative to this directory.
 43 | templates_path = ['_templates']
 44 | 
 45 | # The suffix(es) of source filenames.
 46 | # You can specify multiple suffix as a list of string:
 47 | #
 48 | # source_suffix = ['.rst', '.md']
 49 | source_suffix = '.rst'
 50 | 
 51 | # The master toctree document.
 52 | master_doc = 'index'
 53 | 
 54 | # General information about the project.
 55 | project = 'UrbanSim Templates'
 56 | copyright = '2021, UDST'
 57 | author = 'UDST'
 58 | 
 59 | # The version info for the project you're documenting, acts as replacement for
 60 | # |version| and |release|, also used in various other places throughout the
 61 | # built documents.
 62 | #
 63 | # The short X.Y version.
 64 | # version = '0.1'
 65 | # The full version, including alpha/beta/rc tags.
 66 | # release = '0.1'
 67 | import urbansim_templates
 68 | version = release = urbansim_templates.__version__
 69 | 
 70 | # The language for content autogenerated by Sphinx. Refer to documentation
 71 | # for a list of supported languages.
 72 | #
 73 | # This is also used if you do content translation via gettext catalogs.
 74 | # Usually you set "language" from the command line for these cases.
 75 | language = None
 76 | 
 77 | # List of patterns, relative to source directory, that match files and
 78 | # directories to ignore when looking for source files.
 79 | # This patterns also effect to html_static_path and html_extra_path
 80 | exclude_patterns = []
 81 | 
 82 | # The name of the Pygments (syntax highlighting) style to use.
 83 | pygments_style = 'sphinx'
 84 | 
 85 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 86 | todo_include_todos = False
 87 | 
 88 | 
 89 | # -- Options for HTML output ----------------------------------------------
 90 | 
 91 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 92 | # a list of builtin themes.
 93 | #
 94 | html_theme = 'sphinx_rtd_theme'
 95 | 
 96 | # Theme options are theme-specific and customize the look and feel of a theme
 97 | # further.  For a list of options available for each theme, see the
 98 | # documentation.
 99 | #
100 | # html_theme_options = {}
101 | 
102 | # Add any paths that contain custom static files (such as style sheets) here,
103 | # relative to this directory. They are copied after the builtin static files,
104 | # so a file named "default.css" will overwrite the builtin "default.css".
105 | html_static_path = ['_static']
106 | 
107 | 
108 | # -- Options for HTMLHelp output ------------------------------------------
109 | 
110 | # Output file base name for HTML help builder.
111 | htmlhelp_basename = 'UrbanSimTemplatesdoc'
112 | 
113 | 
114 | # -- Options for LaTeX output ---------------------------------------------
115 | 
116 | latex_elements = {
117 |     # The paper size ('letterpaper' or 'a4paper').
118 |     #
119 |     # 'papersize': 'letterpaper',
120 | 
121 |     # The font size ('10pt', '11pt' or '12pt').
122 |     #
123 |     # 'pointsize': '10pt',
124 | 
125 |     # Additional stuff for the LaTeX preamble.
126 |     #
127 |     # 'preamble': '',
128 | 
129 |     # Latex figure (float) alignment
130 |     #
131 |     # 'figure_align': 'htbp',
132 | }
133 | 
134 | # Grouping the document tree into LaTeX files. List of tuples
135 | # (source start file, target name, title,
136 | #  author, documentclass [howto, manual, or own class]).
137 | latex_documents = [
138 |     (master_doc, 'UrbanSimTemplates.tex', 'UrbanSim Templates Documentation',
139 |      'UDST', 'manual'),
140 | ]
141 | 
142 | 
143 | # -- Options for manual page output ---------------------------------------
144 | 
145 | # One entry per manual page. List of tuples
146 | # (source start file, name, description, authors, manual section).
147 | man_pages = [
148 |     (master_doc, 'urbansimtemplates', 'UrbanSim Templates Documentation',
149 |      [author], 1)
150 | ]
151 | 
152 | 
153 | # -- Options for Texinfo output -------------------------------------------
154 | 
155 | # Grouping the document tree into Texinfo files. List of tuples
156 | # (source start file, target name, title, author,
157 | #  dir menu entry, description, category)
158 | texinfo_documents = [
159 |     (master_doc, 'UrbanSimTemplates', 'UrbanSim Templates Documentation',
160 |      author, 'UrbanSimTemplates', 'One line description of project.',
161 |      'Miscellaneous'),
162 | ]
163 | 
164 | 
165 | 
166 | 


--------------------------------------------------------------------------------
/docs/source/data-templates.rst:
--------------------------------------------------------------------------------
 1 | Data management templates
 2 | =========================
 3 | 
 4 | Usage
 5 | -----
 6 | 
 7 | Data templates help you load tables into `Orca <https://udst.github.io/orca>`__, create columns of derived data, or save tables or subsets of tables to disk. 
 8 | 
 9 | .. code-block:: python
10 |     
11 |     from urbansim_templates.data import LoadTable
12 |     
13 |     t = LoadTable()
14 |     t.table = 'buildings'  # a name for the Orca table
15 |     t.source_type = 'csv'
16 |     t.path = 'buildings.csv'
17 |     t.csv_index_cols = 'building_id'
18 |     t.name = 'load_buildings'  # a name for the model step that sets up the table
19 | 
20 | You can run this directly using ``t.run()``, or register the configured template to be part of a larger workflow:
21 | 
22 | .. code-block:: python
23 | 
24 |     from urbansim_templates import modelmanager
25 | 
26 |     modelmanager.register(t)
27 | 
28 | Registration does two things: (a) it saves the configured template to disk as a yaml file, and (b) it creates a model step with logic for loading the table. Running the model step is equivalent to running the configured template object:
29 | 
30 | .. code-block:: python
31 | 
32 |     t.run()
33 |     
34 |     # equivalent:
35 |     import orca
36 |     orca.run(['load_buildings'])
37 | 
38 | Strictly speaking, running the model step doesn't load the data, it just sets up an Orca table with instructions for loading the data when it's needed. (This is called lazy evaluation.)
39 | 
40 | .. code-block:: python
41 | 
42 |     orca.run(['load_buildings'])  # now an Orca table named 'buildings' is registered
43 |     
44 |     orca.get_table('buildings').to_frame()  # now the data is read from disk
45 | 
46 | Because "running" the table-loading step is costless, it's done automatically when you register a configured template. It's also done automatically when you initialize a ModelManager session and table-loading configs are read from yaml. (If you'd like to disable this for a particular table, you can set ``t.autorun == False``.)
47 | 
48 | 
49 | Recommended data schemas
50 | ~~~~~~~~~~~~~~~~~~~~~~~~
51 | 
52 | The :mod:`~urbansim_templates.data.LoadTable` template will work with any data that can be loaded into a Pandas DataFrame. But we highly recommend following stricter data schema rules:
53 | 
54 | 1. Each table should include a unique, named index column (a.k.a. primary key) or set of columns (multi-index, a.k.a composite key).
55 | 
56 | 2. If a column is meant to be a join key for another table, it should have the same name as the index of that table.
57 | 
58 | 3. Duplication of column names across tables (except for the join keys) is discouraged, for clarity. 
59 | 
60 | If you follow these rules, tables can be automatically merged on the fly, for example to assemble estimation data or calculate indicators.
61 | 
62 | You can use :func:`~urbansim_templates.utils.validate_table()` or :func:`~urbansim_templates.utils.validate_all_tables()` to check whether these expectations are met. When templates merge tables on the fly, they use :func:`~urbansim_templates.utils.merge_tables()`.
63 | 
64 | These utility functions work with any Orca table that meets the schema expectations, whether or not it was created with a template.
65 | 
66 | 
67 | Compatibility with Orca
68 | ~~~~~~~~~~~~~~~~~~~~~~~
69 | 
70 | From Orca's perspective, tables set up using the :mod:`~urbansim_templates.data.LoadTable` template are equivalent to tables that are registered using ``orca.add_table()`` or the ``@orca.table`` decorator. Technically, they are ``orca.TableFuncWrapper`` objects.
71 | 
72 | Unlike the templates, Orca relies on user-specified "`broadcast <http://udst.github.io/orca/core.html#merge-api>`__" relationships to perform automatic merging of tables. :mod:`~urbansim_templates.data.LoadTable` does not register any broadcasts, because they're not needed if tables follow the schema rules above. So if you use these tables in non-template model steps, you may need to add broadcasts separately.
73 | 
74 | 
75 | Data loading API
76 | ----------------
77 | 
78 | .. autoclass:: urbansim_templates.data.LoadTable
79 |    :members:
80 | 
81 | 
82 | Column creation API
83 | -------------------
84 | 
85 | .. autoclass:: urbansim_templates.data.ColumnFromExpression
86 |    :members:
87 | 
88 | .. autoclass:: urbansim_templates.data.ExpressionSettings
89 |    :members:
90 | 
91 | Data output API
92 | ---------------
93 | 
94 | .. autoclass:: urbansim_templates.data.SaveTable
95 |    :members:
96 | 


--------------------------------------------------------------------------------
/docs/source/development.rst:
--------------------------------------------------------------------------------
 1 | Development guide
 2 | =================
 3 | 
 4 | Below are some strategies we've come up with for the templates. Technical contribution guidelines are in the `Github repo <http://github.com/UDST/urbansim_templates/blob/master/CONTRIBUTING.md>`__.
 5 | 
 6 | 
 7 | Design patterns for templates
 8 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 9 | 
10 | A ModelManager-compliant template is a Python class that conforms to the following spec:
11 | 
12 |    1. can save itself to a dict using a method named ``to_dict()``
13 |    2. can rebuild itself from a dict using a method named ``from_dict()``
14 |    3. can execute a configured version of itself using a method named ``run()``
15 |    4. accepts parameters ``name`` (str) and ``tags`` (list of str)
16 |    5. uses the ``@modelmanager.template`` decorator
17 | 
18 | Running a configured model step executes logic and typically saves output to Orca.
19 | 
20 | Templates should try to use parameter names that are consistent or harmonious with other templates.
21 | 
22 | Tables and columns of data should be input as named Orca objects. Other inputs that are hard to store as strings (like callables) should probably be input as Orca objects as well; we're still working on a solution for this.
23 | 
24 | All template inputs should be accepted either as constructor parameters or object properties, if feasible:
25 | 
26 | .. code-block:: python
27 | 
28 |     m1 = TemplateStep(foo='yes')
29 |     m2 = TemplateStep()
30 |     m2.foo = 'yes'
31 | 
32 | It's fine for templates to require interactive configuration, like fitting a statistical model. Also fine to require these actions to be completed before the model step can be saved or run.
33 | 
34 | Ideally, users should be able to edit object properties and re-run the interactive components whenever they like. Changes will not be saved until a an object is re-registered with ModelManager.
35 | 
36 | Lightweight intermediate outputs like summary tables and fitted parameters should be saved in an object's dictionary representation if feasible.
37 | 
38 | Bigger intermediate outputs, like pickled copies of full fitted models, can be automatically stored to disk by providing an entry named ``supplemental_objects`` in a model's dictionary representation. This should contain a list of dicts, each of which has parameters ``name`` (str), ``content`` (obj), and ``content_type`` (str, e.g. 'pickle').
39 | 
40 | To avoid dependency bloat, the default installation only includes the dependencies required for core model management and the most commonly used templates. Templates using additional libraries should check whether they're installed before fitting or running a model step, and provide helpful error messages if not. 
41 | 


--------------------------------------------------------------------------------
/docs/source/getting-started.rst:
--------------------------------------------------------------------------------
  1 | Getting started
  2 | ===============
  3 | 
  4 | Intro
  5 | -----
  6 | 
  7 | UrbanSim Templates is a Python library that provides building blocks for Orca-based simulation models. It's part of the `Urban Data Science Toolkit <http://docs.udst.org>`__ (UDST).
  8 | 
  9 | The library contains templates for common types of model steps, plus a tool called ModelManager that runs as an extension to the `Orca <https://udst.github.io/orca>`__ task orchestrator. ModelManager can register template-based model steps with the orchestrator, save them to disk, and automatically reload them for future sessions. The package was developed to make it easier to set up new simulation models — model step templates reduce the need for custom code and make settings more portable between models.
 10 | 
 11 | UrbanSim Templates is `hosted on Github <https://github.com/udst/urbansim_templates>`__ with a BSD 3-Clause open source license. The code repository includes some material not found in this documentation: a `change log <https://github.com/UDST/urbansim_templates/blob/master/CHANGELOG.md>`__, a `contributor's guide <http://>`__, and instructions for `running the tests <https://github.com/UDST/urbansim_templates/blob/master/tests/README.md>`__, `updating the documentation <http://>`__, and `creating a new release <http://>`__.
 12 | 
 13 | Another useful resource is the `issues <https://github.com/UDST/urbansim_templates/issues?utf8=✓&q=is%3Aissue>`__ and `pull requests <https://github.com/UDST/urbansim_templates/pulls?q=is%3Apr>`__ on Github, which include detailed feature proposals and other discussions.
 14 | 
 15 | UrbanSim Templates was created in 2018 by Sam Maurer (maurer@urbansim.com), who remains the lead developer, with contributions from Paul Waddell, Max Gardner, Eddie Janowicz, Arezoo Besharati Zadeh, Xavier Gitiaux, and others.
 16 | 
 17 | 
 18 | Installation
 19 | ------------
 20 | 
 21 | UrbanSim Templates is currently tested with Python versions 3.6, 3.7, 3.8, and 3.9. 
 22 |     
 23 | Production releases
 24 | ~~~~~~~~~~~~~~~~~~~
 25 | 
 26 | UrbanSim Templates can be installed using the Pip or Conda package managers.
 27 | 
 28 | .. code-block:: python
 29 | 
 30 |     pip install urbansim_templates
 31 | 
 32 | .. code-block:: python
 33 | 
 34 |     conda install urbansim_templates --channel conda-forge
 35 | 
 36 | Dependencies include `NumPy <http://numpy.org>`__, `Pandas <http://pandas.pydata.org>`__, and `Statsmodels <http://statsmodels.org>`__, plus two other UDST libraries: `Orca <http://udst.github.io/orca>`__ and `ChoiceModels <http://github.com/udst/choicemodels>`__. These will be included automatically when you install UrbanSim Templates. 
 37 | 
 38 | Certain less-commonly-used templates require additional packages: currently, `PyLogit <https://github.com/timothyb0912/pylogit>`__ and `Scikit-learn <http://scikit-learn.org>`__. You'll need to install these separately to use the associated templates. 
 39 | 
 40 | When new production releases of UrbanSim Templates come out, you can upgrade like this:
 41 | 
 42 | .. code-block:: python
 43 | 
 44 |     pip install urbansim_templates --upgrade
 45 | 
 46 | .. code-block:: python
 47 | 
 48 |     conda update urbansim_templates --channel conda-forge
 49 | 
 50 | 
 51 | Developer pre-releases
 52 | ~~~~~~~~~~~~~~~~~~~~~~
 53 | 
 54 | Developer pre-releases of UrbanSim Templates can be installed using the Github URL. These versions sometimes require having a developer release of `ChoiceModels <http://github.com/udst/choicemodels>`__ as well. Information about the developer releases can be found in Github `pull requests <https://github.com/UDST/urbansim_templates/pulls?q=is%3Apr>`__.
 55 | 
 56 | .. code-block:: python
 57 | 
 58 |     pip install git+git://github.com/udst/choicemodels.git
 59 |     pip install git+git://github.com/udst/urbansim_templates.git
 60 | 
 61 | You can use the same command to upgrade.
 62 | 
 63 | 
 64 | Cloning the repository
 65 | ~~~~~~~~~~~~~~~~~~~~~~
 66 | 
 67 | If you'll be modifying the code, you can install UrbanSim Templates by cloning the Github repository:
 68 | 
 69 | .. code-block:: python
 70 | 
 71 |     git clone https://github.com/udst/urbansim_templates.git
 72 |     cd urbansim_templates
 73 |     python setup.py develop
 74 | 
 75 | Update it with ``git pull``.
 76 | 
 77 | 
 78 | Basic usage
 79 | -----------
 80 | 
 81 | Initializing ModelManager
 82 | ~~~~~~~~~~~~~~~~~~~~~~~~~
 83 | 
 84 | To get started, import and initialize ModelManager. This makes sure there's a directory set up to store any template-based model steps that are generated within the script or notebook. 
 85 | 
 86 | .. code-block:: python
 87 | 
 88 |     from urbansim_templates import modelmanager
 89 |     
 90 |     modelmanager.initialize()
 91 | 
 92 | The default file location is a ``configs`` folder located in the current working directory; you can provide an alternate path if needed. If ModelManager finds existing saved objects in the directory, it will load them and register them with Orca.
 93 | 
 94 | .. note::
 95 |     It can be helpful to add a cell to your notebook that reports which version of UrbanSim Templates is installed, particularly if you're using development releases!
 96 |     
 97 |     .. code-block:: python
 98 |     
 99 |         In [2]: import urbansim_templates
100 |                 print(urbansim_templates.__version__)
101 |         
102 |         Out[2]: '0.2'
103 | 
104 | 
105 | Creating a model step
106 | ~~~~~~~~~~~~~~~~~~~~~
107 | 
108 | Now we can choose a template and use it to build a model step. The templates are Python classes that contain logic for setting up and running different kinds of model logic — currently focusing on OLS regressions and discrete choice models.
109 | 
110 | A template takes a variety of arguments, which can either be passed as parameters or set as object properties after an instance of the template is created. 
111 | 
112 | .. code-block:: python
113 | 
114 |     from urbansim_templates.models import OLSRegressionStep
115 |     
116 |     m = OLSRegressionStep()
117 |     m.name = 'price-prediction'
118 |     m.tables = 'buildings'
119 |     m.model_expression = 'sale_price ~ residential_sqft'
120 | 
121 | This sets up ``m`` as an instance of the OLS regression template. The ``tables`` and ``model_expression`` arguments refer to data that needs to be registered separately with Orca. So let's load the data before trying to estimate the model: 
122 | 
123 | .. code-block:: python
124 |     
125 |     import orca
126 |     import pandas as pd
127 |     
128 |     url = "https://raw.githubusercontent.com/UDST/urbansim_templates/dev/examples/data/buildings-demo.csv"
129 |     df = pd.read_csv(url).dropna()
130 |     orca.add_table('buildings', df)
131 | 
132 | 
133 | Fitting the statistical model
134 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
135 | 
136 | Now we can fit the building price model:
137 | 
138 | .. code-block:: python
139 |     
140 |     m.fit()
141 | 
142 | This will print a summary table describing the estimation results. 
143 | 
144 | Now that we have a fitted model, we can use it to predict sale prices for other buildings. UrbanSim forecasting models consist of many interconnected steps like this, iteratively predicting real estate prices, household moves, construction, and other urban dynamics. 
145 | 
146 | 
147 | Registering the step
148 | ~~~~~~~~~~~~~~~~~~~~
149 | 
150 | Now we can register the model step:
151 | 
152 | .. code-block:: python
153 | 
154 |     modelmanager.register(m)
155 | 
156 | ModelManager parses the step, saves a copy to disk, and registers a runnable version of it as a standard Orca step, so that it can be invoked as part of a sequence of other steps:
157 | 
158 | .. code-block:: python
159 | 
160 |     orca.run(['price-prediction', 'household-moves', 'residential-development'])
161 | 
162 | In real usage, some additional parameters would be set to specify which data to use for prediction, and where to store the output.
163 | 
164 | 
165 | Making changes
166 | ~~~~~~~~~~~~~~
167 | 
168 | ModelManager also includes some interactive functionality. Previously registered steps can be retrieved as template objects, which can be modified and re-registered as needed. This also works with model steps loaded from disk.
169 | 
170 | .. code-block:: python
171 | 
172 |     modelmanager.list_steps()
173 |     
174 |     m2 = modelmanager.get_step('price-prediction')
175 |     ...
176 |     
177 |     m2.name = 'better-price-prediction'
178 |     modelmanager.register(m2)
179 |     modelmanager.remove_step('price-prediction')
180 |     
181 | If you take a look in the ``configs`` folder, you'll see a yaml file representing the saved model step. It includes the settings we provided, plus the fitted coefficients and anything else generated by the internal logic of the template.
182 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. UrbanSim Templates documentation master file, created by
 2 |    sphinx-quickstart on Fri Jan  4 15:26:06 2019.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | UrbanSim Templates
 7 | ==================
 8 | 
 9 | UrbanSim Templates provides building blocks for Orca-based simulation models. It's part of the `Urban Data Science Toolkit <http://docs.udst.org>`__ (UDST).
10 | 
11 | The library contains templates for common types of model steps, plus a tool called ModelManager that runs as an extension to the `Orca <https://udst.github.io/orca>`__ task orchestrator. ModelManager can register template-based model steps with the orchestrator, save them to disk, and automatically reload them for future sessions.
12 | 
13 | v0.2.dev9, released July 22, 2019
14 | 
15 | 
16 | Contents
17 | --------
18 | 
19 | .. toctree::
20 |    :maxdepth: 2
21 |    
22 |    getting-started
23 |    modelmanager
24 |    model-steps
25 |    data-templates
26 |    utilities
27 |    development
28 | 


--------------------------------------------------------------------------------
/docs/source/model-steps.rst:
--------------------------------------------------------------------------------
 1 | Model step template APIs
 2 | ========================
 3 | 
 4 | The following templates are included in the core package. ModelManager can also work with templates defined elsewhere, as long as they follow the specifications described in the design guidelines.
 5 | 
 6 | 
 7 | OLS Regression
 8 | --------------
 9 | 
10 | .. autoclass:: urbansim_templates.models.OLSRegressionStep
11 |    :members:
12 | 
13 | 
14 | Binary Logit
15 | ------------
16 | 
17 | .. autoclass:: urbansim_templates.models.BinaryLogitStep
18 |    :members:
19 | 
20 | 
21 | Small Multinomial Logit
22 | -----------------------
23 | 
24 | .. autoclass:: urbansim_templates.models.SmallMultinomialLogitStep
25 |    :members:
26 | 
27 | 
28 | Large Multinomial Logit
29 | -----------------------
30 | 
31 | .. autoclass:: urbansim_templates.models.LargeMultinomialLogitStep
32 |    :members:
33 | 
34 | 
35 | Segmented Large Multinomial Logit
36 | ---------------------------------
37 | 
38 | .. autoclass:: urbansim_templates.models.SegmentedLargeMultinomialLogitStep
39 |    :members:
40 | 
41 | 
42 | Template Step parent class
43 | --------------------------
44 | 
45 | .. autoclass:: urbansim_templates.models.TemplateStep
46 |    :members:


--------------------------------------------------------------------------------
/docs/source/modelmanager.rst:
--------------------------------------------------------------------------------
 1 | ModelManager API
 2 | ================
 3 | 
 4 | ModelManager runs as an extension to the `Orca <https://udst.github.io/orca>`__ task orchestrator. ModelManager can register template-based model steps with the orchestrator, save them to disk, and automatically reload them for future sessions. 
 5 | 
 6 | The recommended way to load ModelManager is like this::
 7 | 
 8 |     from urbansim_templates import modelmanager
 9 |     
10 |     modelmanager.initialize()
11 | 
12 | 
13 | Core operations
14 | ---------------
15 | 
16 | .. automodule:: urbansim_templates.modelmanager
17 |    :members: initialize, register, list_steps, get_step, remove_step
18 | 
19 | 
20 | Internal functionality
21 | ----------------------
22 | 
23 | These functions are the building blocks of ModelManager. You probably won't need to use 
24 | them directly, but they could be useful for debugging or for extending ModelManager's  
25 | functionality.
26 | 
27 | .. automodule:: urbansim_templates.modelmanager
28 |    :members: template, build_step, save_step_to_disk, load_supplemental_object, 
29 |              save_supplemental_object, remove_supplemental_object, get_config_dir


--------------------------------------------------------------------------------
/docs/source/utilities.rst:
--------------------------------------------------------------------------------
 1 | Shared utilities
 2 | ================
 3 | 
 4 | The utilities are mainly helper functions for templates. 
 5 | 
 6 | 
 7 | General template tools API
 8 | --------------------------
 9 | 
10 | .. automodule:: urbansim_templates.shared
11 |    :members: CoreTemplateSettings
12 | 
13 | 
14 | Column output tools API
15 | -----------------------
16 | 
17 | .. automodule:: urbansim_templates.shared
18 |    :members: OutputColumnSettings, register_column
19 | 
20 | 
21 | Table schemas and merging API
22 | -----------------------------
23 | 
24 | .. automodule:: urbansim_templates.utils
25 |    :members: validate_table, validate_all_tables, merge_tables
26 | 
27 | 
28 | Other helper functions API
29 | --------------------------
30 | 
31 | .. automodule:: urbansim_templates.utils
32 |    :members: all_cols, cols_in_expression, get_data, get_df, trim_cols, to_list, update_column, update_name
33 | 
34 | 
35 | Spec validation API
36 | -------------------
37 | 
38 | .. automodule:: urbansim_templates.utils
39 |    :members: validate_template
40 | 
41 | 
42 | Version management API
43 | ----------------------
44 | 
45 | .. automodule:: urbansim_templates.utils
46 |    :members: parse_version, version_greater_or_equal
47 | 


--------------------------------------------------------------------------------
/examples/UrbanSim-Templates-demo.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "fixed-tenant",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# UrbanSim Templates demo\n",
  9 |     "\n",
 10 |     "Sam Maurer, Feb 2021\n",
 11 |     "\n",
 12 |     "### Background\n",
 13 |     "\n",
 14 |     "[UrbanSim](https://github.com/udst/urbansim) is a platform for modeling land use in cities. It runs in Python and uses the [Orca](https://github.com/udst/orca) task orchestration system. \n",
 15 |     "\n",
 16 |     "Orca breaks a model into \"steps\", Python functions that can be assembled on the fly into linear or cyclical pipelines. (Typically each step is a statistical model capturing one aspect of the dynamics being studied.) Orca is designed for workflows like city simulation where the data representing a model's state is so large that it needs to be managed outside the task graph. Steps refer to tables and columns of data by name rather than passing the data directly.\n",
 17 |     "\n",
 18 |     "UrbanSim [Templates](https://github.com/udst/urbansim_templates) is a library that provides automated building blocks for Orca-based models. The templates were developed to reduce the need for custom code and improve the portability of model components.\n",
 19 |     "\n",
 20 |     "Currently we have templates for (a) regression, (b) binary logit, (c) multinomial logit estimated with [PyLogit](https://github.com/timothyb0912/pylogit) (best choice for flexible utility expressions), and (d) multinomial logit estimated with [ChoiceModels](https://github.com/udst/choicemodels) (best choice for sampling of interchangeable alternatives).\n",
 21 |     "\n",
 22 |     "### Documentation\n",
 23 |     "\n",
 24 |     "Full UrbanSim Templates documentation: https://udst.github.io/urbansim_templates/\n",
 25 |     "\n",
 26 |     "### Installation\n",
 27 |     "\n",
 28 |     "You can install `orca` and `urbansim_templates` with Pip or from Conda Forge."
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 1,
 34 |    "id": "hearing-rescue",
 35 |    "metadata": {},
 36 |    "outputs": [
 37 |     {
 38 |      "name": "stdout",
 39 |      "output_type": "stream",
 40 |      "text": [
 41 |       "1.2.1\n"
 42 |      ]
 43 |     }
 44 |    ],
 45 |    "source": [
 46 |     "import pandas as pd\n",
 47 |     "\n",
 48 |     "print(pd.__version__)"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 2,
 54 |    "id": "ultimate-durham",
 55 |    "metadata": {},
 56 |    "outputs": [
 57 |     {
 58 |      "name": "stdout",
 59 |      "output_type": "stream",
 60 |      "text": [
 61 |       "1.5.4\n"
 62 |      ]
 63 |     }
 64 |    ],
 65 |    "source": [
 66 |     "import orca\n",
 67 |     "\n",
 68 |     "print(orca.__version__)"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 3,
 74 |    "id": "taken-membership",
 75 |    "metadata": {},
 76 |    "outputs": [
 77 |     {
 78 |      "name": "stdout",
 79 |      "output_type": "stream",
 80 |      "text": [
 81 |       "0.2.dev9\n"
 82 |      ]
 83 |     }
 84 |    ],
 85 |    "source": [
 86 |     "import urbansim_templates\n",
 87 |     "\n",
 88 |     "print(urbansim_templates.__version__)"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 4,
 94 |    "id": "independent-macedonia",
 95 |    "metadata": {},
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "# This makes the notebook output clearer\n",
 99 |     "import warnings\n",
100 |     "warnings.simplefilter(action='ignore', category=FutureWarning)"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": null,
106 |    "id": "ultimate-partner",
107 |    "metadata": {},
108 |    "outputs": [],
109 |    "source": []
110 |   },
111 |   {
112 |    "cell_type": "markdown",
113 |    "id": "stylish-problem",
114 |    "metadata": {},
115 |    "source": [
116 |     "### Setting up ModelManager\n",
117 |     "\n",
118 |     "[ModelManager](https://udst.github.io/urbansim_templates/modelmanager.html) is part of the Templates library. It's an extension to Orca for saving and loading template-based model steps. \n",
119 |     "\n",
120 |     "By default it will look for a folder named `configs` in your current working directory, where it will read and save yaml representations of model steps. If there are already model steps there, the corresponding template classes need to be loaded before initializing ModelManager."
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 5,
126 |    "id": "fatal-welsh",
127 |    "metadata": {},
128 |    "outputs": [
129 |     {
130 |      "name": "stdout",
131 |      "output_type": "stream",
132 |      "text": [
133 |       "No yaml files found in path 'configs'\n"
134 |      ]
135 |     }
136 |    ],
137 |    "source": [
138 |     "from urbansim_templates.models import OLSRegressionStep\n",
139 |     "from urbansim_templates import modelmanager\n",
140 |     "\n",
141 |     "modelmanager.initialize()"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": null,
147 |    "id": "acute-savings",
148 |    "metadata": {},
149 |    "outputs": [],
150 |    "source": []
151 |   },
152 |   {
153 |    "cell_type": "markdown",
154 |    "id": "legitimate-square",
155 |    "metadata": {},
156 |    "source": [
157 |     "### Setting up data\n",
158 |     "\n",
159 |     "We'll load a DataFrame and register it with Orca, so that our statistical models can refer to it."
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": 6,
165 |    "id": "polyphonic-pointer",
166 |    "metadata": {},
167 |    "outputs": [
168 |     {
169 |      "data": {
170 |       "text/plain": [
171 |        "482"
172 |       ]
173 |      },
174 |      "execution_count": 6,
175 |      "metadata": {},
176 |      "output_type": "execute_result"
177 |     }
178 |    ],
179 |    "source": [
180 |     "df = pd.read_csv('data/buildings-demo.csv').dropna()\n",
181 |     "len(df)"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "code",
186 |    "execution_count": 7,
187 |    "id": "quarterly-rugby",
188 |    "metadata": {},
189 |    "outputs": [
190 |     {
191 |      "data": {
192 |       "text/plain": [
193 |        "<orca.orca.DataFrameWrapper at 0x7f8cb8e1d670>"
194 |       ]
195 |      },
196 |      "execution_count": 7,
197 |      "metadata": {},
198 |      "output_type": "execute_result"
199 |     }
200 |    ],
201 |    "source": [
202 |     "orca.add_table('buildings', df)"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "code",
207 |    "execution_count": 8,
208 |    "id": "broken-manchester",
209 |    "metadata": {},
210 |    "outputs": [
211 |     {
212 |      "data": {
213 |       "text/html": [
214 |        "<div>\n",
215 |        "<style scoped>\n",
216 |        "    .dataframe tbody tr th:only-of-type {\n",
217 |        "        vertical-align: middle;\n",
218 |        "    }\n",
219 |        "\n",
220 |        "    .dataframe tbody tr th {\n",
221 |        "        vertical-align: top;\n",
222 |        "    }\n",
223 |        "\n",
224 |        "    .dataframe thead th {\n",
225 |        "        text-align: right;\n",
226 |        "    }\n",
227 |        "</style>\n",
228 |        "<table border=\"1\" class=\"dataframe\">\n",
229 |        "  <thead>\n",
230 |        "    <tr style=\"text-align: right;\">\n",
231 |        "      <th></th>\n",
232 |        "      <th>building_id</th>\n",
233 |        "      <th>parcel_id</th>\n",
234 |        "      <th>development_type_id</th>\n",
235 |        "      <th>improvement_value</th>\n",
236 |        "      <th>residential_units</th>\n",
237 |        "      <th>residential_sqft</th>\n",
238 |        "      <th>sqft_per_unit</th>\n",
239 |        "      <th>non_residential_sqft</th>\n",
240 |        "      <th>building_sqft</th>\n",
241 |        "      <th>res_price_per_sqft</th>\n",
242 |        "      <th>stories</th>\n",
243 |        "      <th>year_built</th>\n",
244 |        "      <th>sale_price</th>\n",
245 |        "      <th>sale_year</th>\n",
246 |        "      <th>building_type_id</th>\n",
247 |        "    </tr>\n",
248 |        "  </thead>\n",
249 |        "  <tbody>\n",
250 |        "    <tr>\n",
251 |        "      <th>3</th>\n",
252 |        "      <td>7</td>\n",
253 |        "      <td>328712</td>\n",
254 |        "      <td>1</td>\n",
255 |        "      <td>0.0</td>\n",
256 |        "      <td>1</td>\n",
257 |        "      <td>1393</td>\n",
258 |        "      <td>1393.0</td>\n",
259 |        "      <td>0</td>\n",
260 |        "      <td>0.0</td>\n",
261 |        "      <td>0.000000</td>\n",
262 |        "      <td>1</td>\n",
263 |        "      <td>2008</td>\n",
264 |        "      <td>670250.0</td>\n",
265 |        "      <td>2008.0</td>\n",
266 |        "      <td>1</td>\n",
267 |        "    </tr>\n",
268 |        "    <tr>\n",
269 |        "      <th>4</th>\n",
270 |        "      <td>9</td>\n",
271 |        "      <td>742661</td>\n",
272 |        "      <td>1</td>\n",
273 |        "      <td>116580.0</td>\n",
274 |        "      <td>1</td>\n",
275 |        "      <td>1018</td>\n",
276 |        "      <td>1018.0</td>\n",
277 |        "      <td>0</td>\n",
278 |        "      <td>1018.0</td>\n",
279 |        "      <td>474.350534</td>\n",
280 |        "      <td>1</td>\n",
281 |        "      <td>1946</td>\n",
282 |        "      <td>703000.0</td>\n",
283 |        "      <td>2007.0</td>\n",
284 |        "      <td>1</td>\n",
285 |        "    </tr>\n",
286 |        "    <tr>\n",
287 |        "      <th>6</th>\n",
288 |        "      <td>11</td>\n",
289 |        "      <td>716626</td>\n",
290 |        "      <td>1</td>\n",
291 |        "      <td>457526.0</td>\n",
292 |        "      <td>1</td>\n",
293 |        "      <td>3693</td>\n",
294 |        "      <td>3693.0</td>\n",
295 |        "      <td>0</td>\n",
296 |        "      <td>3693.0</td>\n",
297 |        "      <td>124.824432</td>\n",
298 |        "      <td>1</td>\n",
299 |        "      <td>1998</td>\n",
300 |        "      <td>95000.0</td>\n",
301 |        "      <td>1996.0</td>\n",
302 |        "      <td>1</td>\n",
303 |        "    </tr>\n",
304 |        "    <tr>\n",
305 |        "      <th>10</th>\n",
306 |        "      <td>15</td>\n",
307 |        "      <td>742822</td>\n",
308 |        "      <td>1</td>\n",
309 |        "      <td>95050.0</td>\n",
310 |        "      <td>1</td>\n",
311 |        "      <td>1106</td>\n",
312 |        "      <td>1106.0</td>\n",
313 |        "      <td>0</td>\n",
314 |        "      <td>1106.0</td>\n",
315 |        "      <td>448.074261</td>\n",
316 |        "      <td>1</td>\n",
317 |        "      <td>1957</td>\n",
318 |        "      <td>675000.0</td>\n",
319 |        "      <td>2005.0</td>\n",
320 |        "      <td>1</td>\n",
321 |        "    </tr>\n",
322 |        "    <tr>\n",
323 |        "      <th>13</th>\n",
324 |        "      <td>18</td>\n",
325 |        "      <td>743444</td>\n",
326 |        "      <td>1</td>\n",
327 |        "      <td>166000.0</td>\n",
328 |        "      <td>1</td>\n",
329 |        "      <td>1354</td>\n",
330 |        "      <td>1354.0</td>\n",
331 |        "      <td>0</td>\n",
332 |        "      <td>1354.0</td>\n",
333 |        "      <td>411.506401</td>\n",
334 |        "      <td>1</td>\n",
335 |        "      <td>1951</td>\n",
336 |        "      <td>18500.0</td>\n",
337 |        "      <td>2006.0</td>\n",
338 |        "      <td>1</td>\n",
339 |        "    </tr>\n",
340 |        "  </tbody>\n",
341 |        "</table>\n",
342 |        "</div>"
343 |       ],
344 |       "text/plain": [
345 |        "    building_id  parcel_id  development_type_id  improvement_value  \\\n",
346 |        "3             7     328712                    1                0.0   \n",
347 |        "4             9     742661                    1           116580.0   \n",
348 |        "6            11     716626                    1           457526.0   \n",
349 |        "10           15     742822                    1            95050.0   \n",
350 |        "13           18     743444                    1           166000.0   \n",
351 |        "\n",
352 |        "    residential_units  residential_sqft  sqft_per_unit  non_residential_sqft  \\\n",
353 |        "3                   1              1393         1393.0                     0   \n",
354 |        "4                   1              1018         1018.0                     0   \n",
355 |        "6                   1              3693         3693.0                     0   \n",
356 |        "10                  1              1106         1106.0                     0   \n",
357 |        "13                  1              1354         1354.0                     0   \n",
358 |        "\n",
359 |        "    building_sqft  res_price_per_sqft  stories  year_built  sale_price  \\\n",
360 |        "3             0.0            0.000000        1        2008    670250.0   \n",
361 |        "4          1018.0          474.350534        1        1946    703000.0   \n",
362 |        "6          3693.0          124.824432        1        1998     95000.0   \n",
363 |        "10         1106.0          448.074261        1        1957    675000.0   \n",
364 |        "13         1354.0          411.506401        1        1951     18500.0   \n",
365 |        "\n",
366 |        "    sale_year  building_type_id  \n",
367 |        "3      2008.0                 1  \n",
368 |        "4      2007.0                 1  \n",
369 |        "6      1996.0                 1  \n",
370 |        "10     2005.0                 1  \n",
371 |        "13     2006.0                 1  "
372 |       ]
373 |      },
374 |      "execution_count": 8,
375 |      "metadata": {},
376 |      "output_type": "execute_result"
377 |     }
378 |    ],
379 |    "source": [
380 |     "orca.get_table('buildings').to_frame().head()"
381 |    ]
382 |   },
383 |   {
384 |    "cell_type": "code",
385 |    "execution_count": null,
386 |    "id": "cheap-sugar",
387 |    "metadata": {},
388 |    "outputs": [],
389 |    "source": []
390 |   },
391 |   {
392 |    "cell_type": "markdown",
393 |    "id": "international-ordering",
394 |    "metadata": {},
395 |    "source": [
396 |     "### Fitting a model\n",
397 |     "\n",
398 |     "Now we can choose a [template](https://udst.github.io/urbansim_templates/model-steps.html) and use it to fit a model."
399 |    ]
400 |   },
401 |   {
402 |    "cell_type": "code",
403 |    "execution_count": 9,
404 |    "id": "studied-federation",
405 |    "metadata": {},
406 |    "outputs": [],
407 |    "source": [
408 |     "from urbansim_templates.models import OLSRegressionStep\n",
409 |     "\n",
410 |     "m = OLSRegressionStep()\n",
411 |     "m.name = 'price-prediction'\n",
412 |     "m.tables = 'buildings'\n",
413 |     "m.model_expression = 'np.log1p(res_price_per_sqft) ~ non_residential_sqft>0 + year_built<1960'"
414 |    ]
415 |   },
416 |   {
417 |    "cell_type": "code",
418 |    "execution_count": 10,
419 |    "id": "checked-addition",
420 |    "metadata": {},
421 |    "outputs": [
422 |     {
423 |      "name": "stdout",
424 |      "output_type": "stream",
425 |      "text": [
426 |       "                                 OLS Regression Results                                 \n",
427 |       "========================================================================================\n",
428 |       "Dep. Variable:     np.log1p(res_price_per_sqft)   R-squared:                       0.398\n",
429 |       "Model:                                      OLS   Adj. R-squared:                  0.395\n",
430 |       "Method:                           Least Squares   F-statistic:                     158.1\n",
431 |       "Date:                          Tue, 09 Feb 2021   Prob (F-statistic):           1.93e-53\n",
432 |       "Time:                                  12:02:09   Log-Likelihood:                -598.98\n",
433 |       "No. Observations:                           482   AIC:                             1204.\n",
434 |       "Df Residuals:                               479   BIC:                             1216.\n",
435 |       "Df Model:                                     2                                         \n",
436 |       "Covariance Type:                      nonrobust                                         \n",
437 |       "====================================================================================================\n",
438 |       "                                       coef    std err          t      P>|t|      [0.025      0.975]\n",
439 |       "----------------------------------------------------------------------------------------------------\n",
440 |       "Intercept                            5.5567      0.047    118.870      0.000       5.465       5.649\n",
441 |       "non_residential_sqft > 0[T.True]    -5.6513      0.320    -17.642      0.000      -6.281      -5.022\n",
442 |       "year_built < 1960[T.True]            0.2206      0.082      2.693      0.007       0.060       0.382\n",
443 |       "==============================================================================\n",
444 |       "Omnibus:                      511.938   Durbin-Watson:                   1.611\n",
445 |       "Prob(Omnibus):                  0.000   Jarque-Bera (JB):            21647.939\n",
446 |       "Skew:                          -4.895   Prob(JB):                         0.00\n",
447 |       "Kurtosis:                      34.338   Cond. No.                         8.89\n",
448 |       "==============================================================================\n",
449 |       "\n",
450 |       "Notes:\n",
451 |       "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
452 |      ]
453 |     }
454 |    ],
455 |    "source": [
456 |     "m.fit()"
457 |    ]
458 |   },
459 |   {
460 |    "cell_type": "code",
461 |    "execution_count": null,
462 |    "id": "concerned-argument",
463 |    "metadata": {},
464 |    "outputs": [],
465 |    "source": []
466 |   },
467 |   {
468 |    "cell_type": "markdown",
469 |    "id": "widespread-cache",
470 |    "metadata": {},
471 |    "source": [
472 |     "### Registering the step\n",
473 |     "\n",
474 |     "When we're happy with the specification, we can \"register\" the step with ModelManager. This saves a copy to disk and also passes it to Orca so it can be run as part of a sequence of other steps for validation or simulation."
475 |    ]
476 |   },
477 |   {
478 |    "cell_type": "code",
479 |    "execution_count": 11,
480 |    "id": "thick-steam",
481 |    "metadata": {},
482 |    "outputs": [
483 |     {
484 |      "name": "stdout",
485 |      "output_type": "stream",
486 |      "text": [
487 |       "Saving 'price-prediction.yaml': /Users/maurer/Dropbox/Git-imac/udst/urbansim_templates/examples/configs\n",
488 |       "Registering model step 'price-prediction'\n"
489 |      ]
490 |     }
491 |    ],
492 |    "source": [
493 |     "modelmanager.register(m)"
494 |    ]
495 |   },
496 |   {
497 |    "cell_type": "code",
498 |    "execution_count": null,
499 |    "id": "egyptian-newport",
500 |    "metadata": {},
501 |    "outputs": [],
502 |    "source": []
503 |   },
504 |   {
505 |    "cell_type": "markdown",
506 |    "id": "bound-rapid",
507 |    "metadata": {},
508 |    "source": [
509 |     "### Making changes\n",
510 |     "\n",
511 |     "Previously registered steps can be retrieved, modified, and re-registered as needed."
512 |    ]
513 |   },
514 |   {
515 |    "cell_type": "code",
516 |    "execution_count": 12,
517 |    "id": "portable-supplier",
518 |    "metadata": {},
519 |    "outputs": [
520 |     {
521 |      "data": {
522 |       "text/plain": [
523 |        "[{'name': 'price-prediction', 'template': 'OLSRegressionStep', 'tags': []}]"
524 |       ]
525 |      },
526 |      "execution_count": 12,
527 |      "metadata": {},
528 |      "output_type": "execute_result"
529 |     }
530 |    ],
531 |    "source": [
532 |     "modelmanager.list_steps()"
533 |    ]
534 |   },
535 |   {
536 |    "cell_type": "code",
537 |    "execution_count": 13,
538 |    "id": "married-delay",
539 |    "metadata": {},
540 |    "outputs": [],
541 |    "source": [
542 |     "m2 = modelmanager.get_step('price-prediction')"
543 |    ]
544 |   },
545 |   {
546 |    "cell_type": "code",
547 |    "execution_count": 14,
548 |    "id": "responsible-logic",
549 |    "metadata": {},
550 |    "outputs": [
551 |     {
552 |      "name": "stdout",
553 |      "output_type": "stream",
554 |      "text": [
555 |       "Saving 'better-price-prediction.yaml': /Users/maurer/Dropbox/Git-imac/udst/urbansim_templates/examples/configs\n",
556 |       "Registering model step 'better-price-prediction'\n"
557 |      ]
558 |     }
559 |    ],
560 |    "source": [
561 |     "m2.name = 'better-price-prediction'\n",
562 |     "# here you can edit the specification and re-fit, etc.\n",
563 |     "\n",
564 |     "modelmanager.register(m2)"
565 |    ]
566 |   },
567 |   {
568 |    "cell_type": "code",
569 |    "execution_count": 15,
570 |    "id": "productive-wyoming",
571 |    "metadata": {},
572 |    "outputs": [
573 |     {
574 |      "name": "stdout",
575 |      "output_type": "stream",
576 |      "text": [
577 |       "Removing 'better-price-prediction' and 'better-price-prediction.yaml'\n"
578 |      ]
579 |     }
580 |    ],
581 |    "source": [
582 |     "modelmanager.remove_step('better-price-prediction')"
583 |    ]
584 |   },
585 |   {
586 |    "cell_type": "code",
587 |    "execution_count": null,
588 |    "id": "inclusive-annual",
589 |    "metadata": {},
590 |    "outputs": [],
591 |    "source": []
592 |   },
593 |   {
594 |    "cell_type": "code",
595 |    "execution_count": null,
596 |    "id": "martial-fortune",
597 |    "metadata": {},
598 |    "outputs": [],
599 |    "source": []
600 |   }
601 |  ],
602 |  "metadata": {
603 |   "kernelspec": {
604 |    "display_name": "Python [conda env:template-demo] *",
605 |    "language": "python",
606 |    "name": "conda-env-template-demo-py"
607 |   },
608 |   "language_info": {
609 |    "codemirror_mode": {
610 |     "name": "ipython",
611 |     "version": 3
612 |    },
613 |    "file_extension": ".py",
614 |    "mimetype": "text/x-python",
615 |    "name": "python",
616 |    "nbconvert_exporter": "python",
617 |    "pygments_lexer": "ipython3",
618 |    "version": "3.8.5"
619 |   }
620 |  },
621 |  "nbformat": 4,
622 |  "nbformat_minor": 5
623 | }
624 | 


--------------------------------------------------------------------------------
/examples/configs/README.md:
--------------------------------------------------------------------------------
1 | This folder stores configs that are generated by the demo notebook.


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | # requirements for development and testing
2 | 
3 | coverage
4 | coveralls
5 | pytest
6 | sphinx
7 | sphinx_rtd_theme


--------------------------------------------------------------------------------
/requirements-extras.txt:
--------------------------------------------------------------------------------
1 | # additional requirements for less-used templates
2 | 
3 | pylogit >= 0.2


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name='urbansim_templates',
 5 |     version='0.2.dev9', 
 6 |     description='UrbanSim extension for managing model steps',
 7 |     author='UrbanSim Inc.',
 8 |     author_email='info@urbansim.com',
 9 |     url='https://github.com/udst/urbansim_templates',
10 |     classifiers=[
11 |         'Programming Language :: Python :: 2',
12 |         'Programming Language :: Python :: 2.7',
13 |         'Programming Language :: Python :: 3',
14 |         'Programming Language :: Python :: 3.5',
15 |         'Programming Language :: Python :: 3.6',
16 |         'Programming Language :: Python :: 3.7',
17 |         'Programming Language :: Python :: 3.8',
18 |         'License :: OSI Approved :: BSD License'
19 |     ],
20 |     packages=find_packages(exclude=['*.tests']),
21 |     install_requires=[
22 |         'choicemodels >= 0.2.2.dev1',
23 |         'numpy >= 1.14',
24 |         'orca >= 1.4',
25 |         'pandas >= 0.23',
26 |         'patsy >= 0.4',
27 |         'statsmodels >= 0.8, <0.11; python_version <"3.6"',
28 |         'statsmodels >= 0.8; python_version >="3.6"',
29 |         'urbansim >= 3.1'
30 |     ]
31 | )
32 | 


--------------------------------------------------------------------------------
/tests/.gitignore:
--------------------------------------------------------------------------------
1 | .cache/*
2 | .coverage
3 | __pycache__/*


--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
1 | Run tests from this folder using `pytest *.py -s`.


--------------------------------------------------------------------------------
/tests/configs/README.md:
--------------------------------------------------------------------------------
1 | This folder stores configs that are temporarily generated during tests.


--------------------------------------------------------------------------------
/tests/data/README.md:
--------------------------------------------------------------------------------
1 | This folder stores data that is temporarily generated during tests.


--------------------------------------------------------------------------------
/tests/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | filterwarnings =
3 |     ignore:::orca
4 |     ignore:::urbansim
5 |     ignore:::pandas
6 |     ignore:::past
7 |     ignore:::prettytable
8 |     ignore:::statsmodels
9 |     ignore:::yaml


--------------------------------------------------------------------------------
/tests/test_binary_logit.py:
--------------------------------------------------------------------------------
 1 | import orca
 2 | import numpy as np
 3 | import pandas as pd
 4 | import pytest
 5 | 
 6 | from urbansim_templates import modelmanager
 7 | from urbansim_templates.models import BinaryLogitStep
 8 | from urbansim_templates.utils import validate_template
 9 | 
10 | 
11 | @pytest.fixture
12 | def orca_session():
13 |     d1 = {'a': np.random.random(100),
14 |           'b': np.random.randint(2, size=100)}
15 | 
16 |     obs = pd.DataFrame(d1)
17 |     orca.add_table('obs', obs)
18 | 
19 | 
20 | def test_template_validity():
21 |     """
22 |     Run the template through the standard validation check.
23 |     
24 |     """
25 |     assert validate_template(BinaryLogitStep)
26 | 
27 | 
28 | def test_binary_logit(orca_session):
29 |     """
30 |     For now this just tests that the code runs.
31 |     
32 |     """
33 |     modelmanager.initialize()
34 | 
35 |     m = BinaryLogitStep()
36 |     m.tables = 'obs'
37 |     m.model_expression = 'b ~ a'
38 |     
39 |     m.fit()
40 |     
41 |     m.name = 'binary-test'
42 |     modelmanager.register(m)
43 |     
44 |     modelmanager.initialize()
45 |     m = modelmanager.get_step('binary-test')
46 |     
47 |     modelmanager.remove_step('binary-test')


--------------------------------------------------------------------------------
/tests/test_column_expression.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import pytest
  4 | 
  5 | import orca
  6 | 
  7 | from urbansim_templates import modelmanager
  8 | from urbansim_templates.data import ColumnFromExpression, ExpressionSettings
  9 | from urbansim_templates.utils import validate_template
 10 | 
 11 | 
 12 | def test_expression_settings_persistence():
 13 |     """
 14 |     Confirm ExpressionSettings properties persist through the constructor, to_dict(),
 15 |     and from_dict().
 16 |     
 17 |     """
 18 |     d = {'table': 'tab', 'expression': 'a + b + c'}
 19 |     obj = ExpressionSettings(table = 'tab', expression = 'a + b + c')
 20 | 
 21 |     assert(d == obj.to_dict() == ExpressionSettings.from_dict(d).to_dict())
 22 | 
 23 | 
 24 | def test_legacy_data_loader(orca_session):
 25 |     """
 26 |     Check that loading a saved dict with the legacy format works.
 27 |     
 28 |     """
 29 |     d = {
 30 |         'name': 'n',
 31 |         'tags': ['a', 'b'],
 32 |         'autorun': False,
 33 |         'column_name': 'col',
 34 |         'table': 'tab',
 35 |         'expression': 'abc',
 36 |         'data_type': 'int',
 37 |         'missing_values': 5,
 38 |         'cache': True,
 39 |         'cache_scope': 'step'}
 40 |     
 41 |     c = ColumnFromExpression.from_dict(d)
 42 |     assert(c.meta.name == d['name'])
 43 |     assert(c.meta.tags == d['tags'])
 44 |     assert(c.meta.autorun == d['autorun'])
 45 |     assert(c.data.table == d['table'])
 46 |     assert(c.data.expression == d['expression'])
 47 |     assert(c.output.column_name == d['column_name'])
 48 |     assert(c.output.data_type == d['data_type'])
 49 |     assert(c.output.missing_values == d['missing_values'])
 50 |     assert(c.output.cache == d['cache'])
 51 |     assert(c.output.cache_scope == d['cache_scope'])
 52 | 
 53 | 
 54 | @pytest.fixture
 55 | def orca_session():
 56 |     """
 57 |     Set up a clean Orca and ModelManager session, with a data table.
 58 |     
 59 |     """
 60 |     orca.clear_all()
 61 |     modelmanager.initialize()
 62 | 
 63 |     d1 = {'id': np.arange(5), 
 64 |           'a': np.random.random(5),
 65 |           'b': np.random.choice(np.arange(20), size=5)}
 66 | 
 67 |     df = pd.DataFrame(d1).set_index('id')
 68 |     orca.add_table('obs', df)
 69 | 
 70 | 
 71 | # def test_template_validity():
 72 | #     """
 73 | #     Check template conforms to basic spec.
 74 | #     
 75 | #     """
 76 | #     assert validate_template(ColumnFromExpression)
 77 | 
 78 | 
 79 | def test_missing_colname(orca_session):
 80 |     """
 81 |     Missing column_name should raise a ValueError.
 82 |     
 83 |     """
 84 |     c = ColumnFromExpression()
 85 |     c.data.table = 'tab'
 86 |     c.data.expression = 'a'
 87 |     
 88 |     try:
 89 |         c.run()
 90 |     except ValueError as e:
 91 |         print(e)
 92 |         return
 93 |     
 94 |     pytest.fail()
 95 | 
 96 | 
 97 | def test_missing_table(orca_session):
 98 |     """
 99 |     Missing table should raise a ValueError.
100 |     
101 |     """
102 |     c = ColumnFromExpression()
103 |     c.data.expression = 'a'
104 |     c.output.column_name = 'col'
105 |     
106 |     try:
107 |         c.run()
108 |     except ValueError as e:
109 |         print(e)
110 |         return
111 |     
112 |     pytest.fail()
113 | 
114 | 
115 | def test_missing_expression(orca_session):
116 |     """
117 |     Missing expression should raise a ValueError.
118 |     
119 |     """
120 |     c = ColumnFromExpression()
121 |     c.data.table = 'tab'
122 |     c.output.column_name = 'col'
123 |     
124 |     try:
125 |         c.run()
126 |     except ValueError as e:
127 |         print(e)
128 |         return
129 |     
130 |     pytest.fail()
131 | 
132 | 
133 | def test_expression(orca_session):
134 |     """
135 |     Check that column is created and expression evaluated correctly.
136 |     
137 |     """
138 |     c = ColumnFromExpression()
139 |     c.data.table = 'obs'
140 |     c.data.expression = 'a * 5 + sqrt(b)'
141 |     c.output.column_name = 'c'
142 |     
143 |     c.run()
144 |     
145 |     val1 = orca.get_table('obs').get_column('c')
146 |     df = orca.get_table('obs').to_frame()
147 |     val2 = df.a * 5 + np.sqrt(df.b)
148 |     assert(val1.equals(val2))
149 |     
150 | 
151 | def test_modelmanager_registration(orca_session):
152 |     """
153 |     Check that modelmanager registration and auto-run work as expected.
154 |     
155 |     """
156 |     c = ColumnFromExpression()
157 |     c.data.table = 'obs'
158 |     c.data.expression = 'a + b'
159 |     c.output.column_name = 'c'
160 |     
161 |     modelmanager.register(c)
162 |     modelmanager.remove_step(c.meta.name)
163 |     assert('c' in orca.get_table('obs').columns)
164 | 
165 | 
166 | def test_expression_with_standalone_columns(orca_session):
167 |     """
168 |     Check that expression can assemble data from stand-alone columns that are not part 
169 |     of the core DataFrame wrapped by a table.
170 |     
171 |     """
172 |     c = ColumnFromExpression()
173 |     c.data.table = 'obs'
174 |     c.data.expression = 'a + b'
175 |     c.output.column_name = 'c'
176 |     
177 |     modelmanager.register(c)
178 |     modelmanager.remove_step(c.meta.name)
179 | 
180 |     d = ColumnFromExpression()
181 |     d.data.table = 'obs'
182 |     d.data.expression = 'a + c'
183 |     d.output.column_name = 'd'
184 |     
185 |     d.run()
186 |     assert('d' in orca.get_table('obs').columns)
187 | 
188 | 


--------------------------------------------------------------------------------
/tests/test_data_load.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | import pytest
  6 | 
  7 | import orca
  8 | 
  9 | from urbansim_templates import modelmanager
 10 | from urbansim_templates.data import LoadTable
 11 | from urbansim_templates.utils import validate_template
 12 | 
 13 | 
 14 | @pytest.fixture
 15 | def orca_session():
 16 |     """
 17 |     Set up a clean Orca session and initialize ModelManager.
 18 |     
 19 |     """
 20 |     orca.clear_all()
 21 |     modelmanager.initialize()
 22 | 
 23 | 
 24 | @pytest.fixture
 25 | def data(request):
 26 |     """
 27 |     Create some data files on disk.
 28 |     
 29 |     """
 30 |     d1 = {'building_id': np.arange(10),
 31 |           'price': 1e6*np.random.random(10)}
 32 |     
 33 |     bldg = pd.DataFrame(d1).set_index('building_id')
 34 |     bldg.to_csv('data/buildings.csv')
 35 |     bldg.to_csv('data/buildings.csv.gz', compression='gzip')
 36 |     bldg.to_hdf('data/buildings.hdf', key='buildings')
 37 |     
 38 |     def teardown():
 39 |         os.remove('data/buildings.csv')
 40 |         os.remove('data/buildings.csv.gz')
 41 |         os.remove('data/buildings.hdf')
 42 |     
 43 |     request.addfinalizer(teardown)
 44 | 
 45 | 
 46 | def test_template_validity():
 47 |     """
 48 |     Run the templates through the standard validation check.
 49 |     
 50 |     """
 51 |     assert validate_template(LoadTable)
 52 | 
 53 | 
 54 | def test_property_persistence(orca_session):
 55 |     """
 56 |     Test persistence of properties across registration, saving, and reloading.
 57 |     
 58 |     """
 59 |     t = LoadTable()
 60 |     t.table = 'buildings'
 61 |     t.source_type = 'csv'
 62 |     t.path = 'data/buildings.csv'
 63 |     t.csv_index_cols = 'building_id'
 64 |     t.extra_settings = {'make_data_awesome': True}  # unfortunately not a valid setting
 65 |     t.cache = False
 66 |     t.cache_scope = 'iteration'
 67 |     t.copy_col = False
 68 |     t.name = 'buildings-csv'
 69 |     t.tags = ['awesome', 'data']
 70 |     t.autorun = False
 71 |     
 72 |     d1 = t.to_dict()
 73 |     modelmanager.register(t)
 74 |     modelmanager.initialize()
 75 |     d2 = modelmanager.get_step(t.name).to_dict()
 76 |     
 77 |     assert d1 == d2
 78 |     modelmanager.remove_step(t.name)
 79 | 
 80 | 
 81 | def test_csv(orca_session, data):
 82 |     """
 83 |     Test loading data from a CSV file.
 84 |     
 85 |     """
 86 |     t = LoadTable()
 87 |     t.table = 'buildings'
 88 |     t.source_type = 'csv'
 89 |     t.path = 'data/buildings.csv'
 90 |     t.csv_index_cols = 'building_id'
 91 |     
 92 |     assert 'buildings' not in orca.list_tables()
 93 |     
 94 |     modelmanager.register(t)
 95 |     assert 'buildings' in orca.list_tables()
 96 |     _ = orca.get_table('buildings').to_frame()
 97 |     
 98 |     modelmanager.initialize()
 99 |     assert 'buildings' in orca.list_tables()
100 |     
101 |     modelmanager.remove_step(t.name)
102 | 
103 | 
104 | def test_hdf(orca_session, data):
105 |     """
106 |     Test loading data from an HDF file.
107 |     
108 |     """
109 |     t = LoadTable()
110 |     t.table = 'buildings'
111 |     t.source_type = 'hdf'
112 |     t.path = 'data/buildings.hdf'
113 |     
114 |     assert 'buildings' not in orca.list_tables()
115 |     
116 |     modelmanager.register(t)
117 |     assert 'buildings' in orca.list_tables()
118 |     _ = orca.get_table('buildings').to_frame()
119 |     
120 |     modelmanager.initialize()
121 |     assert 'buildings' in orca.list_tables()
122 |     
123 |     modelmanager.remove_step(t.name)
124 | 
125 | 
126 | def test_extra_settings(orca_session, data):
127 |     """
128 |     Test loading data with extra settings, e.g. for compressed files.
129 |     
130 |     """
131 |     t = LoadTable()
132 |     t.table = 'buildings'
133 |     t.source_type = 'csv'
134 |     t.path = 'data/buildings.csv.gz'
135 |     t.csv_index_cols = 'building_id'
136 |     t.extra_settings = {'compression': 'gzip'}
137 |     
138 |     assert 'buildings' not in orca.list_tables()
139 |     
140 |     modelmanager.register(t)
141 |     assert 'buildings' in orca.list_tables()
142 |     _ = orca.get_table('buildings').to_frame()
143 |     
144 |     modelmanager.initialize()
145 |     assert 'buildings' in orca.list_tables()
146 |     
147 |     modelmanager.remove_step(t.name)
148 | 
149 | 
150 | def test_without_autorun(orca_session, data):
151 |     """
152 |     Confirm that disabling autorun works.
153 |     
154 |     """
155 |     t = LoadTable()
156 |     t.table = 'buildings'
157 |     t.source_type = 'csv'
158 |     t.path = 'data/buildings.csv'
159 |     t.csv_index_cols = 'building_id'
160 |     t.autorun = False
161 |     
162 |     modelmanager.register(t)
163 |     assert 'buildings' not in orca.list_tables()
164 |     
165 |     modelmanager.remove_step(t.name)
166 |     
167 |     
168 |     


--------------------------------------------------------------------------------
/tests/test_data_save.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | import pytest
  6 | 
  7 | import orca
  8 | 
  9 | from urbansim_templates import modelmanager
 10 | from urbansim_templates.data import SaveTable
 11 | from urbansim_templates.utils import update_column, validate_template
 12 | 
 13 | 
 14 | @pytest.fixture
 15 | def orca_session():
 16 |     """
 17 |     Set up a clean Orca session and initialize ModelManager.
 18 |     
 19 |     """
 20 |     orca.clear_all()
 21 |     modelmanager.initialize()
 22 | 
 23 | 
 24 | @pytest.fixture
 25 | def data():
 26 |     """
 27 |     Create a data table.
 28 |     
 29 |     """
 30 |     d1 = {'building_id': np.arange(10),
 31 |           'price': (1e6*np.random.random(10)).astype(int)}
 32 |     
 33 |     df = pd.DataFrame(d1).set_index('building_id')
 34 |     
 35 |     orca.add_table('buildings', df)
 36 | 
 37 | 
 38 | def test_template_validity():
 39 |     """
 40 |     Run the templates through the standard validation check.
 41 |     
 42 |     """
 43 |     assert validate_template(SaveTable)
 44 | 
 45 | 
 46 | def test_property_persistence(orca_session):
 47 |     """
 48 |     Test persistence of properties across registration, saving, and reloading.
 49 |     
 50 |     """
 51 |     t = SaveTable()
 52 |     t.table = 'buildings'
 53 |     t.columns = ['window_panes', 'number_of_chimneys']
 54 |     t.filters = 'number_of_chimneys > 15'
 55 |     t.output_type = 'csv'
 56 |     t.path = 'data/buildings.csv'
 57 |     t.extra_settings = {'make_data_awesome': True}
 58 |     t.name = 'save-buildings-csv'
 59 |     t.tags = ['awesome', 'chimneys']
 60 |     
 61 |     d1 = t.to_dict()
 62 |     modelmanager.register(t)
 63 |     modelmanager.initialize()
 64 |     d2 = modelmanager.get_step(t.name).to_dict()
 65 |     
 66 |     assert d1 == d2
 67 |     modelmanager.remove_step(t.name)
 68 | 
 69 | 
 70 | def test_csv(orca_session, data):
 71 |     """
 72 |     Test saving data to a CSV file.
 73 |     
 74 |     """
 75 |     t = SaveTable()
 76 |     t.table = 'buildings'
 77 |     t.output_type = 'csv'
 78 |     t.path = 'data/buildings.csv'
 79 |     
 80 |     t.run()
 81 |     
 82 |     df = pd.read_csv(t.path).set_index('building_id')
 83 |     assert(df.equals(orca.get_table(t.table).to_frame()))
 84 |     
 85 |     os.remove(t.path)
 86 | 
 87 | 
 88 | def test_hdf(orca_session, data):
 89 |     """
 90 |     Test saving data to an HDF file.
 91 |     
 92 |     """
 93 |     t = SaveTable()
 94 |     t.table = 'buildings'
 95 |     t.output_type = 'hdf'
 96 |     t.path = 'data/buildings.h5'
 97 |     
 98 |     t.run()
 99 |     
100 |     df = pd.read_hdf(t.path)
101 |     assert(df.equals(orca.get_table(t.table).to_frame()))
102 |         
103 |     os.remove(t.path)
104 | 
105 |     
106 | def test_columns(orca_session, data):
107 |     """
108 |     Test requesting specific columns.
109 |     
110 |     """
111 |     update_column(table = 'buildings', 
112 |                   column = 'price2', 
113 |                   data = (1e6*np.random.random(10)).astype(int))
114 |     
115 |     t = SaveTable()
116 |     t.table = 'buildings'
117 |     t.columns = 'price2'
118 |     t.output_type = 'csv'
119 |     t.path = 'data/buildings.csv'
120 |     
121 |     t.run()
122 |     
123 |     df = pd.read_csv(t.path).set_index('building_id')
124 |     assert(list(df.columns) == ['price2'])
125 | 
126 | 
127 | def test_filters(orca_session, data):
128 |     """
129 |     Test applying data filters before table is saved.
130 |     
131 |     """
132 |     t = SaveTable()
133 |     t.table = 'buildings'
134 |     t.filters = 'price < 200000'
135 |     t.output_type = 'csv'
136 |     t.path = 'data/buildings.csv'
137 |     
138 |     t.run()
139 |     
140 |     df = pd.read_csv(t.path).set_index('building_id')
141 |     assert(len(df) < 10)
142 |     
143 |     os.remove(t.path)
144 | 
145 | 
146 | def test_extra_settings(orca_session, data):
147 |     """
148 |     """
149 |     pass
150 | 
151 | 
152 | def test_dynamic_paths(orca_session):
153 |     """
154 |     Test inserting run id, model iteration, or timestamp into path.
155 |     
156 |     """
157 |     t = SaveTable()
158 |     t.path = '%RUN%-%ITER%'
159 |     
160 |     assert(t.get_dynamic_filepath() == '0-0')
161 |     
162 |     orca.add_injectable('run_id', 5)
163 |     orca.add_injectable('iter_var', 3)
164 |     
165 |     assert(t.get_dynamic_filepath() == '5-3')
166 |     
167 |     t.path = '%TS%'
168 |     s = t.get_dynamic_filepath()
169 |     assert(len(s) == 15)
170 | 
171 | 
172 | 


--------------------------------------------------------------------------------
/tests/test_large_multinomial_logit.py:
--------------------------------------------------------------------------------
  1 | import orca
  2 | import numpy as np
  3 | import pandas as pd
  4 | import pytest
  5 | 
  6 | from choicemodels import MultinomialLogitResults
  7 | 
  8 | from urbansim_templates import modelmanager
  9 | from urbansim_templates.models import LargeMultinomialLogitStep
 10 | from urbansim_templates.utils import validate_template
 11 | 
 12 | 
 13 | @pytest.fixture
 14 | def orca_session():
 15 |     d1 = {'oid': np.arange(10), 
 16 |           'obsval': np.random.random(10),
 17 |           'choice': np.random.choice(np.arange(20), size=10)}
 18 | 
 19 |     d2 = {'aid': np.arange(20), 
 20 |           'altval': np.random.random(20)}
 21 | 
 22 |     obs = pd.DataFrame(d1).set_index('oid')
 23 |     orca.add_table('obs', obs)
 24 | 
 25 |     alts = pd.DataFrame(d2).set_index('aid')
 26 |     orca.add_table('alts', alts)
 27 | 
 28 | 
 29 | def test_template_validity():
 30 |     """
 31 |     Run the template through the standard validation check.
 32 |     
 33 |     """
 34 |     assert validate_template(LargeMultinomialLogitStep)
 35 | 
 36 | 
 37 | def test_observation_sampling(orca_session):
 38 |     modelmanager.initialize()
 39 | 
 40 |     m = LargeMultinomialLogitStep()
 41 |     m.choosers = 'obs'
 42 |     m.alternatives = 'alts'
 43 |     m.choice_column = 'choice'
 44 |     m.model_expression = 'obsval + altval'
 45 |     
 46 |     m.fit()
 47 |     assert(len(m.mergedchoicetable.to_frame()) == 200)
 48 |     
 49 |     m.chooser_sample_size = 5
 50 |     m.fit()
 51 |     assert(len(m.mergedchoicetable.to_frame()) == 100)
 52 |     
 53 |     m.name = 'mnl-test'
 54 |     modelmanager.register(m)
 55 |     
 56 |     modelmanager.initialize()
 57 |     m = modelmanager.get_step('mnl-test')
 58 |     assert(m.chooser_sample_size == 5)
 59 |     
 60 |     modelmanager.remove_step('mnl-test')
 61 | 
 62 | 
 63 | @pytest.fixture
 64 | def data():
 65 |     num_obs = 100
 66 |     num_alts = 120
 67 |     
 68 |     d1 = {'oid': np.arange(num_obs), 
 69 |           'obsval': np.random.random(num_obs),
 70 |           'choice': np.random.choice(np.arange(num_alts), size=num_obs)}
 71 | 
 72 |     d2 = {'aid': np.arange(num_alts), 
 73 |           'altval': np.random.random(num_alts)}
 74 | 
 75 |     obs = pd.DataFrame(d1).set_index('oid')
 76 |     orca.add_table('obs', obs)
 77 | 
 78 |     alts = pd.DataFrame(d2).set_index('aid')
 79 |     orca.add_table('alts', alts)
 80 | 
 81 | 
 82 | @pytest.fixture
 83 | def m(data):
 84 |     """
 85 |     Build a fitted model.
 86 |     
 87 |     """
 88 |     m = LargeMultinomialLogitStep()
 89 |     m.choosers = 'obs'
 90 |     m.alternatives = 'alts'
 91 |     m.choice_column = 'choice'
 92 |     m.model_expression = 'obsval + altval'
 93 |     m.alt_sample_size = 10
 94 |     
 95 |     m.fit()
 96 |     return m
 97 | 
 98 | 
 99 | def test_property_persistence(m):
100 |     """
101 |     Test persistence of properties across registration, saving, and reloading.
102 |     
103 |     """
104 |     m.fit()
105 |     m.name = 'my-model'
106 |     m.tags = ['tag1']
107 |     m.chooser_filters = 'filters1'
108 |     m.chooser_sample_size = 100
109 |     m.alt_filters = 'filter2'
110 |     m.out_choosers = 'choosers2'
111 |     m.out_alternatives = 'alts2'
112 |     m.out_column = 'choices'
113 |     m.out_chooser_filters = 'filters3'
114 |     m.out_alt_filters = 'filters4'
115 |     m.constrained_choices = True
116 |     m.alt_capacity = 'cap'
117 |     m.chooser_size = 'size'
118 |     m.max_iter = 17
119 |     
120 |     d1 = m.to_dict()
121 |     modelmanager.initialize()
122 |     modelmanager.register(m)
123 |     modelmanager.initialize()
124 |     d2 = modelmanager.get_step('my-model').to_dict()
125 |     
126 |     assert d1 == d2
127 |     modelmanager.remove_step('my-model')
128 |     
129 | 
130 | def test_simulation_unconstrained(m):
131 |     """
132 |     Test simulation chooser filters with unconstrained choices.
133 |     
134 |     """
135 |     obs = orca.get_table('obs').to_frame()
136 |     obs.loc[:24, 'choice'] = -1
137 |     orca.add_table('obs', obs)
138 |     
139 |     m.out_chooser_filters = 'choice == -1'
140 |     m.run()
141 |     
142 |     assert len(m.choices) == 25
143 |     
144 |     obs = orca.get_table('obs').to_frame()
145 |     assert sum(obs.choice == -1) == 0
146 |     assert obs.loc[:24, 'choice'].equals(m.choices)
147 |     
148 |     
149 | def test_simulation_single_occupancy(m):
150 |     """
151 |     Test simulation of single-occupancy choices.
152 |     
153 |     """
154 |     m.constrained_choices = True
155 |     m.run()
156 |     
157 |     obs = orca.get_table('obs').to_frame()
158 |     assert len(obs) == len(obs.choice.unique())
159 |     
160 |     
161 | def test_simulation_constrained(m):
162 |     """
163 |     Test simulation of choices with explicit capacities and sizes.
164 |     
165 |     """
166 |     obs = orca.get_table('obs').to_frame()
167 |     obs.loc[:,'choice'] = -1
168 |     obs['size'] = np.random.choice([1,2], size=len(obs))
169 |     orca.add_table('obs', obs)
170 |     
171 |     alts = orca.get_table('alts').to_frame()
172 |     alts['cap'] = np.random.choice([1,2,3], size=len(alts))
173 |     orca.add_table('alts', alts)
174 |     
175 |     m.constrained_choices = True
176 |     m.alt_capacity = 'cap'
177 |     m.chooser_size = 'size'
178 |     m.run()
179 |     
180 |     obs = orca.get_table('obs').to_frame()
181 |     assert all(~obs.choice.isin([-1]))
182 | 
183 | 
184 | def test_simulation_no_valid_choosers(m):
185 |     """
186 |     If there are no valid choosers after applying filters, simulation should exit.
187 |     
188 |     """
189 |     m.out_chooser_filters = 'choice == -1'
190 |     m.run()
191 |     
192 | 
193 | def test_simulation_no_valid_alternatives(m):
194 |     """
195 |     If there are no valid alternatives after applying filters, simulation should exit.
196 |     
197 |     """
198 |     m.out_alt_filters = 'altval == -1'
199 |     m.run()
200 |     
201 | 
202 | def test_output_column_autocreation(m):
203 |     """
204 |     Test on-the-fly creation of the output column.
205 |     
206 |     """
207 |     m.out_column = 'potato_chips'
208 |     m.run()
209 |     
210 |     assert('potato_chips' in orca.get_table('obs').columns)
211 |     assert(m.choices.equals(orca.get_table('obs').to_frame()['potato_chips']))
212 |     
213 | 
214 | def test_diagnostic_attributes(data):
215 |     """
216 |     Test that diagnostic attributes are available when expected.
217 |     
218 |     """
219 |     m = LargeMultinomialLogitStep()
220 |     m.choosers = 'obs'
221 |     m.alternatives = 'alts'
222 |     m.choice_column = 'choice'
223 |     m.model_expression = 'obsval + altval'
224 |     m.alt_sample_size = 10
225 |     
226 |     assert(m.model is None)
227 |     assert(m.mergedchoicetable is None)
228 |     assert(m.probabilities is None)
229 |     assert(m.choices is None)
230 |     
231 |     m.fit()
232 |     
233 |     assert(isinstance(m.model, MultinomialLogitResults))
234 |     
235 |     len_mct = len(m.mergedchoicetable.to_frame())
236 |     len_obs_alts = len(orca.get_table(m.choosers).to_frame()) * m.alt_sample_size
237 |     
238 |     assert(len_mct == len_obs_alts)
239 |     
240 |     name = m.name
241 |     modelmanager.register(m)
242 |     modelmanager.initialize()
243 |     m = modelmanager.get_step(name)
244 |     
245 |     assert(isinstance(m.model, MultinomialLogitResults))
246 | 
247 |     m.run()
248 | 
249 |     len_mct = len(m.mergedchoicetable.to_frame())
250 |     len_probs = len(m.probabilities)
251 |     len_choices = len(m.choices)
252 |     len_obs = len(orca.get_table(m.choosers).to_frame())
253 |     len_obs_alts = len_obs * m.alt_sample_size
254 |     
255 |     assert(len_mct == len_obs_alts)
256 |     assert(len_probs == len_obs_alts)
257 |     assert(len_choices == len_obs)
258 | 
259 |     modelmanager.remove_step(name)
260 | 
261 | 
262 | def test_simulation_join_key_as_filter(m):
263 |     """
264 |     This tests that it's possible to use a join key as a both a data filter for one of 
265 |     the tables, and as a choice column for the model. 
266 |     
267 |     This came up because MergedChoiceTable doesn't allow the observations and 
268 |     alternatives to have any column names in common -- the rationale is to maintain data 
269 |     traceability by avoiding any of-the-fly renaming or dropped columns. 
270 |     
271 |     In the templates, in order to support things like using 'households.building_id' as a 
272 |     filter column and 'buildings.building_id' as a choice column, we apply the filters 
273 |     and then drop columns that are no longer needed before merging the tables. 
274 |     
275 |     """
276 |     obs = orca.get_table('obs')
277 |     obs['aid'] = obs.get_column('choice')
278 |     
279 |     m.out_choosers = 'obs'
280 |     m.out_chooser_filters = 'aid > 50'
281 |     m.out_alternatives = 'alts'
282 |     m.out_column = 'aid'
283 |     
284 |     m.run()
285 | 
286 | 


--------------------------------------------------------------------------------
/tests/test_regression.py:
--------------------------------------------------------------------------------
 1 | import orca
 2 | import numpy as np
 3 | import pandas as pd
 4 | import pytest
 5 | 
 6 | from urbansim_templates import modelmanager
 7 | from urbansim_templates.models import OLSRegressionStep
 8 | from urbansim_templates.utils import validate_template
 9 | 
10 | 
11 | @pytest.fixture
12 | def orca_session():
13 |     d1 = {'a': np.random.random(100),
14 |           'b': np.random.random(100)}
15 | 
16 |     obs = pd.DataFrame(d1)
17 |     orca.add_table('obs', obs)
18 | 
19 | 
20 | def test_template_validity():
21 |     """
22 |     Run the template through the standard validation check.
23 |     
24 |     """
25 |     assert validate_template(OLSRegressionStep)
26 | 
27 | 
28 | def test_ols(orca_session):
29 |     """
30 |     For now this just tests that the code runs.
31 |     
32 |     """
33 |     modelmanager.initialize()
34 | 
35 |     m = OLSRegressionStep()
36 |     m.tables = 'obs'
37 |     m.model_expression = 'a ~ b'
38 |     
39 |     m.fit()
40 |     
41 |     m.name = 'ols-test'
42 |     modelmanager.register(m)
43 |     
44 |     modelmanager.initialize()
45 |     m = modelmanager.get_step('ols-test')
46 |     
47 |     modelmanager.remove_step('ols-test')
48 | 
49 | 
50 | def test_simulation(orca_session):
51 |     """
52 |     Test that predicted values are correctly written to Orca.
53 |     
54 |     """
55 |     modelmanager.initialize()
56 |     
57 |     m = OLSRegressionStep()
58 |     m.tables = 'obs'
59 |     m.model_expression = 'a ~ b'
60 |     m.fit()
61 |     
62 |     m.out_column = 'a_predicted'
63 |     m.run()
64 |     
65 |     assert orca.get_table('obs').to_frame()['a_predicted'].equals(m.predicted_values)
66 | 
67 | 
68 | def test_out_transform(orca_session):
69 |     """
70 |     Test transformation of the predicted values.
71 |     
72 |     """
73 |     modelmanager.initialize()
74 |     
75 |     m = OLSRegressionStep()
76 |     m.tables = 'obs'
77 |     m.model_expression = 'a ~ b'
78 |     m.fit()
79 |     
80 |     m.out_column = 'a_predicted'
81 |     m.out_transform = 'np.exp'
82 |     m.run()
83 |     
84 |     predictions = m.predicted_values.apply(np.exp)
85 |     
86 |     assert orca.get_table('obs').to_frame()['a_predicted'].equals(predictions)
87 | 
88 | 


--------------------------------------------------------------------------------
/tests/test_segmented_large_multinomial_logit.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import pytest
  4 | 
  5 | import orca
  6 | from urbansim.models.util import apply_filter_query
  7 | 
  8 | from urbansim_templates import modelmanager
  9 | from urbansim_templates.models import SegmentedLargeMultinomialLogitStep
 10 | from urbansim_templates.utils import get_data, validate_template
 11 | 
 12 | 
 13 | @pytest.fixture
 14 | def orca_session():
 15 |     """
 16 |     Set up a clean Orca session with a couple of data tables.
 17 |     
 18 |     """
 19 |     d1 = {'oid': np.arange(100), 
 20 |           'group': np.random.choice(['A','B','C'], size=100),
 21 |           'int_group': np.random.choice([3,4], size=100),
 22 |           'obsval': np.random.random(100),
 23 |           'choice': np.random.choice(np.arange(20), size=100)}
 24 | 
 25 |     d2 = {'aid': np.arange(20), 
 26 |           'altval': np.random.random(20)}
 27 | 
 28 |     obs = pd.DataFrame(d1).set_index('oid')
 29 |     orca.add_table('obs', obs)
 30 | 
 31 |     alts = pd.DataFrame(d2).set_index('aid')
 32 |     orca.add_table('alts', alts)
 33 | 
 34 | 
 35 | @pytest.fixture
 36 | def orca_session_alts_as_list():
 37 |     """
 38 |     Set up a clean Orca session with a couple of data tables.
 39 |     
 40 |     """
 41 |     d1 = {'oid': np.arange(100),
 42 |           'group': np.random.choice(['A', 'B', 'C'], size=100),
 43 |           'int_group': np.random.choice([3, 4], size=100),
 44 |           'obsval': np.random.random(100),
 45 |           'choice': np.random.choice(np.arange(20), size=100)}
 46 | 
 47 |     d2 = {'aid': np.arange(20),
 48 |           'altval': np.random.random(20)}
 49 | 
 50 |     d3 = {'aid': np.arange(20),
 51 |           'altval_2': np.random.random(20)}
 52 | 
 53 |     obs = pd.DataFrame(d1).set_index('oid')
 54 |     orca.add_table('obs', obs)
 55 | 
 56 |     d2_df = pd.DataFrame(d2).set_index('aid')
 57 |     orca.add_table('d2', d2_df)
 58 | 
 59 |     d3_df = pd.DataFrame(d3).set_index('aid')
 60 |     orca.add_table('d3', d3_df)
 61 | 
 62 |     orca.broadcast('d3', 'd2', cast_index=True, onto_index=True)
 63 | 
 64 | 
 65 | @pytest.fixture
 66 | def m_alts_as_list(orca_session_alts_as_list):
 67 |     """
 68 |     Set up a partially configured model step with multiple
 69 |     tables of alternatives
 70 |     """
 71 |     m = SegmentedLargeMultinomialLogitStep()
 72 |     m.defaults.choosers = 'obs'
 73 |     m.defaults.alternatives = ['d2', 'd3']
 74 |     m.defaults.choice_column = 'choice'
 75 |     m.defaults.model_expression = 'obsval + altval + altval_2'
 76 |     m.segmentation_column = 'group'
 77 |     return m
 78 | 
 79 | 
 80 | @pytest.fixture
 81 | def m(orca_session):
 82 |     """
 83 |     Set up a partially configured model step.
 84 |     
 85 |     """
 86 |     m = SegmentedLargeMultinomialLogitStep()
 87 |     m.defaults.choosers = 'obs'
 88 |     m.defaults.alternatives = 'alts'
 89 |     m.defaults.choice_column = 'choice'
 90 |     m.defaults.model_expression = 'obsval + altval'
 91 |     m.segmentation_column = 'group'
 92 |     return m
 93 | 
 94 | 
 95 | def test_template_validity():
 96 |     """
 97 |     Run the template through the standard validation check.
 98 |     
 99 |     """
100 |     assert validate_template(SegmentedLargeMultinomialLogitStep)
101 | 
102 | 
103 | def test_basic_operation(m):
104 |     """
105 |     Test basic operation of the template.
106 |     
107 |     """
108 |     m.fit_all()
109 |     m.to_dict()
110 |     assert len(m.submodels) == 3
111 | 
112 | def test_basic_operation_alts_as_list(m_alts_as_list):
113 |     """
114 |     Test basic operation of the template.
115 |     
116 |     """
117 |     m = m_alts_as_list
118 |     m.fit_all()
119 |     m.to_dict()
120 |     assert len(m.submodels) == 3
121 | 
122 | def test_basic_operation(m):
123 |     """
124 |     Test basic operation of the template.
125 |     
126 |     """
127 |     m.fit_all()
128 |     m.to_dict()
129 |     assert len(m.submodels) == 3
130 |     
131 |     
132 | def test_numeric_segments(m):
133 |     """
134 |     Test support for using ints as categorical variables.
135 |     
136 |     """
137 |     m.segmentation_column = 'int_group'
138 |     m.build_submodels()
139 |     assert len(m.submodels) == 2
140 |     
141 | 
142 | def test_chooser_filters(m):
143 |     """
144 |     Test that the default chooser filters generate the correct data subset.
145 |     
146 |     """
147 |     m.defaults.chooser_filters = "group != 'A'"
148 |     m.build_submodels()
149 |     assert len(m.submodels) == 2 
150 | 
151 |     m.defaults.chooser_filters = ["group != 'A'", "group != 'B'"]
152 |     m.build_submodels()
153 |     assert len(m.submodels) == 1
154 | 
155 | 
156 | def test_alternative_filters(m):
157 |     """
158 |     Test that the default alternative filters generate the correct data subset.
159 |     
160 |     """
161 |     m.defaults.alt_filters = 'aid < 5'
162 |     
163 |     df = orca.get_table(m.defaults.choosers).to_frame()
164 |     len1 = len(df.loc[df.choice < 5])
165 |     len2 = len(m.get_segmentation_column())
166 |     
167 |     assert len1 == len2
168 | 
169 | 
170 | def test_alternative_filters_for_alts_as_list(m_alts_as_list):
171 |     """
172 |     Test that the default alternative filters generate the correct data subset.
173 |     
174 |     """
175 |     m = m_alts_as_list
176 |     m.defaults.alt_filters = 'altval_2 < 0.5'
177 |     
178 |     m.build_submodels()
179 |     for k, v in m.submodels.items():
180 |         alts = get_data(tables = v.alternatives, filters = v.alt_filters)
181 |         assert alts['altval_2'].max() < 0.5
182 | 
183 | 
184 | def test_submodel_filters(m):
185 |     """
186 |     Test that submodel filters generate the correct data subset.
187 |     
188 |     """
189 |     m.build_submodels()
190 |     
191 |     df = orca.get_table(m.defaults.choosers).to_frame()
192 |     len1 = len(apply_filter_query(df.loc[df.group == 'A'], m.defaults.chooser_filters))
193 |     len2 = len(apply_filter_query(df, m.submodels['A'].chooser_filters))
194 |     
195 |     assert len1 == len2
196 |     
197 | 
198 | def test_property_persistence(m):
199 |     """
200 |     Test persistence of properties across registration, saving, and reloading.
201 |     
202 |     """
203 |     m.name = 'test'
204 |     m.tags = ['one','two']
205 |     m.fit_all()
206 |     d1 = m.to_dict()
207 |     modelmanager.initialize()
208 |     modelmanager.register(m)
209 |     modelmanager.initialize()
210 |     d2 = modelmanager.get_step('test').to_dict()
211 |     assert d1 == d2
212 |     modelmanager.remove_step('test')
213 |     
214 | 
215 | def test_filter_generation(m):
216 |     """
217 |     Test additional cases of generating submodel filters.
218 |     
219 |     """
220 |     m.defaults.chooser_filters = 'obsval > 0.5'
221 |     m.build_submodels()
222 |     assert m.submodels['A'].chooser_filters == ['obsval > 0.5', "group == 'A'"]
223 |     
224 |     m.defaults.chooser_filters = ['obsval > 0.5', 'obsval < 0.9']
225 |     m.build_submodels()
226 |     assert m.submodels['A'].chooser_filters == \
227 |                 ['obsval > 0.5', 'obsval < 0.9', "group == 'A'"]
228 |     
229 | 
230 | @pytest.fixture
231 | def d():
232 |     d = {'choosers': 'a', 
233 |          'alternatives': 'b', 
234 |          'model_expression': 'c', 
235 |          'choice_column': 'd', 
236 |          'chooser_sample_size': 'f', 
237 |          'alt_sample_size': 'h', 
238 |          'out_choosers': 'i', 
239 |          'out_alternatives': 'j', 
240 |          'out_column': 'k', 
241 |          'out_chooser_filters': 'l',
242 |          'out_alt_filters': 'm'}
243 |     return d    
244 | 
245 | 
246 | def test_initial_propagation_of_defaults(m):
247 |     """
248 |     Test that submodels receive properties of the defaults object.
249 |     
250 |     """    
251 |     d = m.defaults.to_dict()
252 |     
253 |     m.build_submodels()
254 | 
255 |     d2 = m.submodels['A'].to_dict()
256 |     for k, v in d.items():
257 |         if k != 'chooser_filters':
258 |             assert d2[k] == v
259 | 
260 | 
261 | def test_subsequent_propagation_of_defaults(m, d):
262 |     """
263 |     Test that submodels are updated correctly when the defaults are subsequently changed.
264 |     
265 |     """
266 |     m.build_submodels()
267 |     
268 |     for k, v in d.items():
269 |         setattr(m.defaults, k, v)
270 | 
271 |     d2 = m.submodels['A'].to_dict()
272 |     for k, v in d.items():
273 |         assert d2[k] == v
274 |     
275 |     # these should NOT be passed to the submodels
276 |     m.defaults.chooser_filters = 'test'
277 |     assert m.submodels['A'].chooser_filters != 'test'
278 | 
279 |     m.defaults.alt_filters = 'test'
280 |     assert m.submodels['A'].alt_filters != 'test'
281 | 
282 | 
283 | def test_independence_of_submodels(m, d):
284 |     """
285 |     Test that updating one submodel does not change others.
286 |     
287 |     """
288 |     m.build_submodels()
289 |     
290 |     for k, v in d.items():
291 |         setattr(m.submodels['A'], k, v)
292 | 
293 |     d2 = m.submodels['B'].to_dict()
294 |     for k, v in d.items():
295 |         assert d2[k] != v
296 | 
297 | 
298 | 


--------------------------------------------------------------------------------
/tests/test_shared_core.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import pytest
 4 | 
 5 | from urbansim_templates.shared import CoreTemplateSettings
 6 | 
 7 | 
 8 | def test_property_persistence():
 9 |     """
10 |     Confirm CoreTemplateSettings properties persist through to_dict() and from_dict().
11 |     
12 |     """
13 |     obj = CoreTemplateSettings()
14 |     obj.name = 'name'
15 |     obj.tags = ['tag1', 'tag2']
16 |     obj.notes = 'notes'
17 |     obj.autorun = True
18 |     obj.template = 'CoolNewTemplate'
19 |     obj.template_version = '0.1.dev0'
20 |     
21 |     d = obj.to_dict()
22 |     print(d)
23 |     
24 |     obj2 = CoreTemplateSettings.from_dict(d)
25 |     assert(obj2.to_dict() == d)
26 | 
27 | 


--------------------------------------------------------------------------------
/tests/test_shared_output_column.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | import pytest
 6 | 
 7 | import orca
 8 | 
 9 | from urbansim_templates.shared import OutputColumnSettings, register_column
10 | 
11 | 
12 | def test_property_persistence():
13 |     """
14 |     Confirm OutputColumnSettings properties persist through to_dict() and from_dict().
15 |     
16 |     """
17 |     obj = OutputColumnSettings()
18 |     obj.column_name = 'column'
19 |     obj.table = 'table'
20 |     obj.data_type = 'int32'
21 |     obj.missing_values = 5
22 |     obj.cache = True
23 |     obj.cache_scope = 'iteration'
24 |     
25 |     d = obj.to_dict()
26 |     print(d)
27 |     
28 |     obj2 = OutputColumnSettings.from_dict(d)
29 |     assert(obj2.to_dict() == d)
30 | 
31 | 
32 | # Tests for register_column()..
33 | 
34 | @pytest.fixture
35 | def orca_session():
36 |     """
37 |     Set up a clean Orca session, with a data table.
38 |     
39 |     """
40 |     orca.clear_all()
41 |     
42 |     df = pd.DataFrame({'a': [0.1, 1.33, 2.4]}, index=[1,2,3])
43 |     orca.add_table('tab', df)
44 | 
45 | 
46 | def test_column_registration(orca_session):
47 |     """
48 |     Confirm column registration works.
49 |     
50 |     """
51 |     series = pd.Series([4,5,6], index=[1,2,3])
52 |     
53 |     def build_column():
54 |         return series
55 |     
56 |     settings = OutputColumnSettings(column_name='col', table='tab')
57 |     register_column(build_column, settings)
58 |     
59 |     assert(orca.get_table('tab').get_column('col').equals(series))
60 | 
61 | 
62 | def test_filling_missing_values(orca_session):
63 |     """
64 |     Confirm that filling missing values works.
65 |     
66 |     """
67 |     series1 = pd.Series([4.0, np.nan, 6.0], index=[1,2,3])
68 |     series2 = pd.Series([4.0, 5.0, 6.0], index=[1,2,3])
69 |     
70 |     def build_column():
71 |         return series1
72 |     
73 |     settings = OutputColumnSettings(column_name='col', table='tab', missing_values=5)
74 |     register_column(build_column, settings)
75 |     
76 |     assert(orca.get_table('tab').get_column('col').equals(series2))
77 | 
78 | 
79 | def test_casting_data_type(orca_session):
80 |     """
81 |     Confirm that filling missing values works.
82 |     
83 |     """
84 |     series1 = pd.Series([4.0, 5.0, 6.0], index=[1,2,3])
85 |     series2 = pd.Series([4, 5, 6], index=[1,2,3])
86 |     
87 |     def build_column():
88 |         return series1
89 |     
90 |     settings = OutputColumnSettings(column_name='col', table='tab', data_type='int')
91 |     register_column(build_column, settings)
92 |     
93 |     assert(orca.get_table('tab').get_column('col').equals(series2))
94 | 
95 | 
96 | 


--------------------------------------------------------------------------------
/tests/test_small_multinomial_logit.py:
--------------------------------------------------------------------------------
 1 | import orca
 2 | import numpy as np
 3 | import pandas as pd
 4 | import pytest
 5 | from collections import OrderedDict
 6 | 
 7 | from urbansim_templates import modelmanager
 8 | from urbansim_templates.models import SmallMultinomialLogitStep
 9 | from urbansim_templates.utils import validate_template
10 | 
11 | 
12 | @pytest.fixture
13 | def orca_session():
14 |     d1 = {'id': np.arange(100),
15 |           'building_id': np.arange(100),
16 |           'a': np.random.random(100),
17 |           'choice': np.random.randint(3, size=100)}
18 |     
19 |     d2 = {'building_id': np.arange(100),
20 |           'b': np.random.random(100)}
21 | 
22 |     households = pd.DataFrame(d1).set_index('id')
23 |     orca.add_table('households', households)
24 |     
25 |     buildings = pd.DataFrame(d2).set_index('building_id')
26 |     orca.add_table('buildings', buildings)
27 | 
28 |     orca.broadcast(cast='buildings', onto='households', 
29 |                    cast_index=True, onto_on='building_id')
30 | 
31 | 
32 | def test_template_validity():
33 |     """
34 |     Run the template through the standard validation check.
35 |     
36 |     """
37 |     assert validate_template(SmallMultinomialLogitStep)
38 | 
39 | 
40 | def test_small_mnl(orca_session):
41 |     """
42 |     Test that the code runs, and that the model_expression is always available.
43 |     
44 |     """
45 |     modelmanager.initialize()
46 | 
47 |     m = SmallMultinomialLogitStep()
48 |     m.tables = ['households', 'buildings']
49 |     m.choice_column = 'choice'
50 |     m.model_expression = OrderedDict([
51 |             ('intercept', [1,2]), ('a', [0,2]), ('b', [0,2])])
52 |     
53 |     m.fit()
54 |     assert(m.model_expression is not None)
55 |     
56 |     print(m.model_expression)
57 |     
58 |     m.name = 'small-mnl-test'
59 |     modelmanager.register(m)
60 |     assert(m.model_expression is not None)
61 |     
62 |     print(m.model_expression)
63 |     
64 |     # TEST SIMULATION
65 |     m.out_column = 'simulated_choice'
66 |     
67 |     m.run()
68 |     print(orca.get_table('households').to_frame())
69 |     
70 |     modelmanager.initialize()
71 |     m = modelmanager.get_step('small-mnl-test')
72 |     assert(m.model_expression is not None)
73 |     
74 |     print(m.model_expression)
75 |     
76 |     modelmanager.remove_step('small-mnl-test')


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import pytest
  4 | 
  5 | import orca
  6 | 
  7 | from urbansim_templates import utils
  8 | 
  9 | 
 10 | def test_parse_version():
 11 |     assert utils.parse_version('0.1.0.dev0') == (0, 1, 0, 0)
 12 |     assert utils.parse_version('0.115.3') == (0, 115, 3, None)
 13 |     assert utils.parse_version('3.1.dev7') == (3, 1, 0, 7)
 14 |     assert utils.parse_version('5.4') == (5, 4, 0, None)
 15 | 
 16 | def test_version_greater_or_equal():
 17 |     assert utils.version_greater_or_equal('2.0', '0.1.1') == True    
 18 |     assert utils.version_greater_or_equal('0.1.1', '2.0') == False    
 19 |     assert utils.version_greater_or_equal('2.1', '2.0.1') == True
 20 |     assert utils.version_greater_or_equal('2.0.1', '2.1') == False
 21 |     assert utils.version_greater_or_equal('1.1.3', '1.1.2') == True
 22 |     assert utils.version_greater_or_equal('1.1.2', '1.1.3') == False
 23 |     assert utils.version_greater_or_equal('1.1.3', '1.1.3') == True
 24 |     assert utils.version_greater_or_equal('1.1.3.dev1', '1.1.3.dev0') == True
 25 |     assert utils.version_greater_or_equal('1.1.3.dev0', '1.1.3') == False
 26 | 
 27 | 
 28 | ###############################
 29 | ## get_df
 30 | 
 31 | @pytest.fixture
 32 | def df():
 33 |     d = {'id': [1,2,3], 'val1': [4,5,6], 'val2': [7,8,9]}
 34 |     return pd.DataFrame(d).set_index('id')
 35 | 
 36 | 
 37 | def test_get_df_dataframe(df):
 38 |     """
 39 |     Confirm that get_df() works when passed a DataFrame.
 40 |     
 41 |     """
 42 |     df_out = utils.get_df(df)
 43 |     pd.testing.assert_frame_equal(df, df_out)
 44 |     
 45 | 
 46 | def test_get_df_str(df):
 47 |     """
 48 |     Confirm that get_df() works with str input.
 49 |     
 50 |     """
 51 |     orca.add_table('df', df)
 52 |     df_out = utils.get_df('df')
 53 |     pd.testing.assert_frame_equal(df, df_out)
 54 | 
 55 | 
 56 | def test_get_df_dataframewrapper(df):
 57 |     """
 58 |     Confirm that get_df() works with orca.DataFrameWrapper input.
 59 |     
 60 |     """
 61 |     dfw = orca.DataFrameWrapper('df', df)
 62 |     df_out = utils.get_df(dfw)
 63 |     pd.testing.assert_frame_equal(df, df_out)    
 64 |     
 65 | 
 66 | def test_get_df_tablefuncwrapper(df):
 67 |     """
 68 |     Confirm that get_df() works with orca.TableFuncWrapper input.
 69 |     
 70 |     """
 71 |     def df_callable():
 72 |         return df
 73 |     
 74 |     tfw = orca.TableFuncWrapper('df', df_callable)
 75 |     df_out = utils.get_df(tfw)
 76 |     pd.testing.assert_frame_equal(df, df_out)    
 77 |     
 78 | 
 79 | def test_get_df_columns(df):
 80 |     """
 81 |     Confirm that get_df() limits columns, and filters out duplicates and invalid ones.
 82 |     
 83 |     """
 84 |     dfw = orca.DataFrameWrapper('df', df)
 85 |     df_out = utils.get_df(dfw, ['id', 'val1', 'val1', 'val3'])
 86 |     pd.testing.assert_frame_equal(df[['val1']], df_out)    
 87 |     
 88 | 
 89 | def test_get_df_unsupported_type(df):
 90 |     """
 91 |     Confirm that get_df() raises an error for an unsupported type.
 92 |     
 93 |     """
 94 |     try:
 95 |         df_out = utils.get_df([df])
 96 |     except ValueError as e:
 97 |         print(e)
 98 |         return
 99 |     
100 |     pytest.fail()
101 |     
102 | 
103 | 
104 | ###############################
105 | ## all_cols
106 | 
107 | def test_all_cols_dataframe(df):
108 |     """
109 |     Confirm that all_cols() works with DataFrame input.
110 |     
111 |     """
112 |     cols = utils.all_cols(df)
113 |     assert sorted(cols) == sorted(['id', 'val1', 'val2'])
114 | 
115 | 
116 | def test_all_cols_orca(df):
117 |     """
118 |     Confirm that all_cols() works with Orca input.
119 |     
120 |     """
121 |     orca.add_table('df', df)
122 |     cols = utils.all_cols('df')
123 |     assert sorted(cols) == sorted(['id', 'val1', 'val2'])
124 | 
125 | 
126 | def test_all_cols_extras(df):
127 |     """
128 |     Confirm that all_cols() includes columns not part of the Orca core table.
129 |     
130 |     """
131 |     orca.add_table('df', df)
132 |     orca.add_column('df', 'newcol', pd.Series())
133 |     cols = utils.all_cols('df')
134 |     assert sorted(cols) == sorted(['id', 'val1', 'val2', 'newcol'])
135 | 
136 | 
137 | def test_all_cols_unsupported_type(df):
138 |     """
139 |     Confirm that all_cols() raises an error for an unsupported type.
140 |     
141 |     """
142 |     try:
143 |         cols = utils.all_cols([df])
144 |     except ValueError as e:
145 |         print(e)
146 |         return
147 |     
148 |     pytest.fail()
149 | 
150 | 
151 | 
152 | 
153 | ###############################
154 | ## get_data
155 | 
156 | @pytest.fixture
157 | def orca_session():
158 |     d1 = {'id': [1, 2, 3],
159 |           'building_id': [1, 2, 3],
160 |           'tenure': [1, 1, 0],
161 |           'age': [25, 45, 65]}
162 |     
163 |     d2 = {'building_id': [1, 2, 3],
164 |           'zone_id': [17, 17, 17],
165 |           'pop': [2, 2, 2]}
166 |     
167 |     d3 = {'zone_id': [17],
168 |           'pop': [500]}
169 | 
170 |     households = pd.DataFrame(d1).set_index('id')
171 |     orca.add_table('households', households)
172 |     
173 |     buildings = pd.DataFrame(d2).set_index('building_id')
174 |     orca.add_table('buildings', buildings)
175 |     
176 |     zones = pd.DataFrame(d3).set_index('zone_id')
177 |     orca.add_table('zones', zones)
178 |     
179 |     orca.broadcast(cast='buildings', onto='households', 
180 |                    cast_index=True, onto_on='building_id')
181 |     
182 |     orca.broadcast(cast='zones', onto='buildings', 
183 |                    cast_index=True, onto_on='zone_id')
184 |     
185 | 
186 | def test_get_data(orca_session):
187 |     """
188 |     General test - multiple tables, binding filters, extra columns.
189 |         
190 |     """
191 |     df = utils.get_data(tables = ['households', 'buildings'], 
192 |                         model_expression = 'tenure ~ pop', 
193 |                         filters = ['age > 20', 'age < 50'],
194 |                         extra_columns = 'zone_id')
195 |     
196 |     assert(set(df.columns) == set(['tenure', 'pop', 'age', 'zone_id']))
197 |     assert(len(df) == 2)
198 | 
199 | 
200 | def test_get_data_single_table(orca_session):
201 |     """
202 |     Single table, no other params.
203 |         
204 |     """
205 |     df = utils.get_data(tables = 'households')
206 |     assert(len(df) == 3)
207 | 
208 | 
209 | def test_get_data_bad_columns(orca_session):
210 |     """
211 |     Bad column name, should be ignored.
212 |         
213 |     """
214 |     df = utils.get_data(tables = ['households', 'buildings'], 
215 |                         model_expression = 'tenure ~ pop + potato')
216 |     
217 |     assert(set(df.columns) == set(['tenure', 'pop']))
218 | 
219 | 
220 | def test_update_column(orca_session):
221 |     """
222 |     General test.
223 |     
224 |     Additional tests to add: series without index, adding column on the fly.
225 |     
226 |     """
227 |     table = 'buildings'
228 |     column = 'pop'
229 |     data = pd.Series([3,3,3], index=[1,2,3])
230 |     
231 |     utils.update_column(table, column, data)
232 |     assert(orca.get_table(table).to_frame()[column].tolist() == [3,3,3])
233 |     
234 |     
235 | def test_update_column_incomplete_series(orca_session):
236 |     """
237 |     Update certain values but not others, with non-matching index orders.
238 |     
239 |     """
240 |     table = 'buildings'
241 |     column = 'pop'
242 |     data = pd.Series([10,5], index=[3,1])
243 |     
244 |     utils.update_column(table, column, data)
245 |     assert(orca.get_table(table).to_frame()[column].tolist() == [5,2,10])
246 |     
247 | 
248 | def test_add_column_incomplete_series(orca_session):
249 |     """
250 |     Add an incomplete column to confirm that it's aligned based on the index. (The ints 
251 |     will be cast to floats to accommodate the missing values.)
252 |     
253 |     """
254 |     table = 'buildings'
255 |     column = 'pop2'
256 |     data = pd.Series([10,5], index=[3,1])
257 |     
258 |     utils.update_column(table, column, data)
259 |     stored_data = orca.get_table(table).to_frame()[column].tolist()
260 |     
261 |     np.testing.assert_array_equal(stored_data, [5.0, np.nan, 10.0])
262 | 


--------------------------------------------------------------------------------
/tests/test_utils_broadcasts.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Tests for the utilities for merging tables using implicit join keys instead of Orca 
  3 | broadcasts.
  4 | 
  5 | """
  6 | import pandas as pd
  7 | import pytest
  8 | 
  9 | import orca
 10 | 
 11 | from urbansim_templates.utils import validate_table, validate_all_tables, merge_tables
 12 | from urbansim_templates.utils import all_cols
 13 | 
 14 | 
 15 | @pytest.fixture
 16 | def orca_session():
 17 |     """
 18 |     Set up a clean Orca session.
 19 |     
 20 |     """
 21 |     orca.clear_all()
 22 | 
 23 | 
 24 | ###############################
 25 | ## validate_tables()
 26 | 
 27 | def test_validation_table_not_registered(orca_session):
 28 |     """
 29 |     Table validation should raise a ValueError if the table isn't registered.
 30 |     
 31 |     """
 32 |     try:
 33 |         validate_table('tab')
 34 |     except ValueError as e:
 35 |         print(e)
 36 |         return
 37 |     
 38 |     pytest.fail()  # fail is ValueError wasn't raised
 39 | 
 40 | 
 41 | def test_validation_index_unnamed(orca_session):
 42 |     """
 43 |     Table validation should raise a ValueError if index is unnamed.
 44 |     
 45 |     """
 46 |     d = {'id': [1,1,3], 'value': [4,4,4]}
 47 |     orca.add_table('tab', pd.DataFrame(d))  # generates auto index without a name
 48 |     
 49 |     try:
 50 |         validate_table('tab')
 51 |     except ValueError as e:
 52 |         print(e)
 53 |         return
 54 |     
 55 |     pytest.fail()  # fail if ValueError wasn't raised
 56 |     
 57 | 
 58 | def test_validation_duplicate_colnames(orca_session):
 59 |     """
 60 |     Table validation should raise a ValueError if columns share a name with index.
 61 |     
 62 |     """
 63 |     d = {'id1': [1,1,3], 'id2': [3,3,9], 'value': [4,4,4]}
 64 |     df = pd.DataFrame(d).set_index(['id1', 'id2'])
 65 |     df['id2'] = [10,10,10]  # column with same name as one of the multi-index levels    
 66 |     orca.add_table('tab', df)
 67 |     
 68 |     try:
 69 |         validate_table('tab')
 70 |     except ValueError as e:
 71 |         print(e)
 72 |         return
 73 |     
 74 |     pytest.fail()  # fail if ValueError wasn't raised
 75 |     
 76 | 
 77 | def test_validation_index_unique(orca_session):
 78 |     """
 79 |     Table validation should pass if the index is unique.
 80 |     
 81 |     These tests of the validate() method generate Orca tables directly, which is just a 
 82 |     shortcut for testing -- the intended use is for the method to validate the table
 83 |     loaded by the TableStep. 
 84 |     
 85 |     """
 86 |     d = {'id': [1,2,3], 'value': [4,4,4]}
 87 |     orca.add_table('tab', pd.DataFrame(d).set_index('id'))
 88 |     
 89 |     validate_table('tab')
 90 |     
 91 | 
 92 | def test_validation_index_not_unique(orca_session):
 93 |     """
 94 |     Table validation should raise a ValueError if the index is not unique.
 95 |     
 96 |     """
 97 |     d = {'id': [1,1,3], 'value': [4,4,4]}
 98 |     orca.add_table('tab', pd.DataFrame(d).set_index('id'))
 99 |     
100 |     try:
101 |         validate_table('tab')
102 |     except ValueError as e:
103 |         print(e)
104 |         return
105 |     
106 |     pytest.fail()  # fail if ValueError wasn't raised
107 | 
108 | 
109 | def test_validation_multiindex_unique(orca_session):
110 |     """
111 |     Table validation should pass with a MultiIndex whose combinations are unique.
112 |     
113 |     """
114 |     d = {'id': [1,1,1], 'sub_id': [1,2,3], 'value': [4,4,4]}
115 |     orca.add_table('tab', pd.DataFrame(d).set_index(['id', 'sub_id']))
116 |     
117 |     validate_table('tab')
118 | 
119 | 
120 | def test_validation_multiindex_not_unique(orca_session):
121 |     """
122 |     Table validation should raise a ValueError if the MultiIndex combinations are not 
123 |     unique.
124 |     
125 |     """
126 |     d = {'id': [1,1,1], 'sub_id': [2,2,3], 'value': [4,4,4]}
127 |     orca.add_table('tab', pd.DataFrame(d).set_index(['id', 'sub_id']))
128 |     
129 |     try:
130 |         validate_table('tab')
131 |     except ValueError as e:
132 |         print(e)
133 |         return
134 |     
135 |     pytest.fail()  # fail if ValueError wasn't raised
136 | 
137 | 
138 | def test_validation_columns_vs_other_indexes(orca_session):
139 |     """
140 |     Table validation should compare the 'households.building_id' column to 
141 |     'buildings.build_id'.
142 |     
143 |     """
144 |     d = {'household_id': [1,2,3], 'building_id': [2,3,4]}
145 |     orca.add_table('households', pd.DataFrame(d).set_index('household_id'))
146 | 
147 |     d = {'building_id': [1,2,3,4], 'value': [4,4,4,4]}
148 |     orca.add_table('buildings', pd.DataFrame(d).set_index('building_id'))
149 | 
150 |     validate_table('households')
151 | 
152 | 
153 | def test_validation_index_vs_other_columns(orca_session):
154 |     """
155 |     Table validation should compare the 'households.building_id' column to 
156 |     'buildings.build_id'.
157 |     
158 |     """
159 |     d = {'building_id': [1,2,3,4], 'value': [4,4,4,4]}
160 |     orca.add_table('buildings', pd.DataFrame(d).set_index('building_id'))
161 | 
162 |     d = {'household_id': [1,2,3], 'building_id': [2,3,5]}
163 |     orca.add_table('households', pd.DataFrame(d).set_index('household_id'))
164 | 
165 |     validate_table('buildings')
166 | 
167 | 
168 | def test_validation_reciprocal_false(orca_session):
169 |     """
170 |     This combination should not produce any column comparisons.
171 |     
172 |     """
173 |     d = {'building_id': [1,2,3,4], 'value': [4,4,4,4]}
174 |     orca.add_table('buildings', pd.DataFrame(d).set_index('building_id'))
175 | 
176 |     d = {'household_id': [1,2,3], 'building_id': [2,3,5]}
177 |     orca.add_table('households', pd.DataFrame(d).set_index('household_id'))
178 | 
179 |     print("Begin reciprocal test")
180 |     validate_table('buildings', reciprocal=False)
181 |     print("End reciprocal test")
182 | 
183 | 
184 | def test_validation_with_multiindexes(orca_session):
185 |     """
186 |     Here, table validation should compare 'choice_table.[home_tract,work_tract]' to
187 |     'distances.[home_tract,work_tract]'.
188 |     
189 |     """
190 |     d = {'obs_id': [1,1,1,1], 'alt_id': [1,2,3,4], 
191 |          'home_tract': [55,55,55,55], 'work_tract': [17,46,19,55]}
192 |     orca.add_table('choice_table', pd.DataFrame(d).set_index(['obs_id','alt_id']))
193 | 
194 |     d = {'home_tract': [55,55,55], 'work_tract': [17,18,19], 'dist': [1,1,1]}
195 |     orca.add_table('distances', pd.DataFrame(d).set_index(['home_tract','work_tract']))
196 | 
197 |     validate_table('choice_table')
198 | 
199 | 
200 | def test_validate_all_tables(orca_session):
201 |     """
202 |     
203 |     """
204 |     d = {'building_id': [1,2,3,4], 'value': [4,4,4,4]}
205 |     orca.add_table('buildings', pd.DataFrame(d).set_index('building_id'))
206 | 
207 |     d = {'household_id': [1,2,3], 'building_id': [2,3,5]}
208 |     orca.add_table('households', pd.DataFrame(d).set_index('household_id'))
209 | 
210 |     validate_all_tables()
211 | 
212 | 
213 | ###############################
214 | ## merge_tables()
215 | 
216 | def test_merge_two_tables():
217 |     """
218 |     Merge two tables.
219 |     
220 |     """
221 |     d = {'building_id': [1,2,3,4], 'value': [4,4,4,4]}
222 |     buildings = pd.DataFrame(d).set_index('building_id')
223 | 
224 |     d = {'household_id': [1,2,3], 'building_id': [2,3,4]}
225 |     households = pd.DataFrame(d).set_index('household_id')
226 |     
227 |     merged = merge_tables([households, buildings])
228 |     assert sorted(all_cols(merged)) == sorted(['household_id', 'building_id', 'value'])
229 |     
230 |     
231 | def test_merge_three_tables():
232 |     """
233 |     Merge three tables.
234 |     
235 |     """
236 |     d = {'zone_id': [1], 'size': [1]}
237 |     zones = pd.DataFrame(d).set_index('zone_id')
238 | 
239 |     d = {'building_id': [1,2,3,4], 'zone_id': [1,1,1,1], 'height': [4,4,4,4]}
240 |     buildings = pd.DataFrame(d).set_index('building_id')
241 | 
242 |     d = {'household_id': [1,2,3], 'building_id': [2,3,4]}
243 |     households = pd.DataFrame(d).set_index('household_id')
244 |     
245 |     merged = merge_tables([households, buildings, zones])
246 |     assert sorted(all_cols(merged)) == sorted(
247 |             ['household_id', 'building_id', 'zone_id', 'height', 'size'])
248 |     
249 |     
250 | def test_merge_three_tables_out_of_order():
251 |     """
252 |     Merge three tables, where the second and third are each merged onto the first.
253 |     
254 |     """
255 |     d = {'zone_id': [1], 'size': [1]}
256 |     zones = pd.DataFrame(d).set_index('zone_id')
257 | 
258 |     d = {'building_id': [1,2,3,4], 'height': [4,4,4,4]}
259 |     buildings = pd.DataFrame(d).set_index('building_id')
260 | 
261 |     d = {'household_id': [1,2,3], 'building_id': [2,3,4], 'zone_id': [1,1,1]}
262 |     households = pd.DataFrame(d).set_index('household_id')
263 |     
264 |     merged = merge_tables([households, buildings, zones])
265 |     assert sorted(all_cols(merged)) == sorted(
266 |             ['household_id', 'building_id', 'zone_id', 'height', 'size'])
267 |     
268 |     
269 | def test_merge_tables_limit_columns():
270 |     """
271 |     Merge tables and remove some of the columns.
272 |     
273 |     """
274 |     d = {'zone_id': [1], 'size': [1]}
275 |     zones = pd.DataFrame(d).set_index('zone_id')
276 | 
277 |     d = {'building_id': [1,2,3,4], 'zone_id': [1,1,1,1], 'height': [4,4,4,4]}
278 |     buildings = pd.DataFrame(d).set_index('building_id')
279 | 
280 |     d = {'household_id': [1,2,3], 'building_id': [2,3,4]}
281 |     households = pd.DataFrame(d).set_index('household_id')
282 |     
283 |     merged = merge_tables([households, buildings, zones], 
284 |                           columns=['zone_id', 'height', 'size'])
285 |     assert sorted(all_cols(merged)) == sorted(
286 |             ['household_id', 'zone_id', 'height', 'size'])
287 |     
288 |     
289 | def test_merge_tables_duplicate_column_names():
290 |     """
291 |     Confirm tables can be merged with overlapping column names, as long as they're not 
292 |     included in the list of columns to retain.
293 |     
294 |     """
295 |     d = {'building_id': [1,2,3,4], 'value': [4,4,4,4], 'dupe': [1,1,1,1]}
296 |     buildings = pd.DataFrame(d).set_index('building_id')
297 | 
298 |     d = {'household_id': [1,2,3], 'building_id': [2,3,4], 'dupe': [1,1,1]}
299 |     households = pd.DataFrame(d).set_index('household_id')
300 |     
301 |     # Duplicate columns should raise a ValueError
302 |     try:
303 |         merged = merge_tables([households, buildings])
304 |         pytest.fail()
305 |     except ValueError as e:
306 |         print(e)
307 |     
308 |     # Excluding the duplicated name should make things ok
309 |     merged = merge_tables([households, buildings], columns=['value'])
310 |     assert sorted(all_cols(merged)) == sorted(['household_id', 'value'])
311 |     
312 |     
313 | def test_merge_tables_multiindex():
314 |     """
315 |     Merge tables where the source table has a multi-index.
316 |     
317 |     """
318 |     d = {'building_id': [1,1,2,2], 'unit_id': [1,2,1,2], 'value': [4,4,4,4]}
319 |     units = pd.DataFrame(d).set_index(['building_id', 'unit_id'])
320 | 
321 |     d = {'household_id': [1,2,3], 'building_id': [1,1,2], 'unit_id': [1,2,1]}
322 |     households = pd.DataFrame(d).set_index('household_id')
323 |     
324 |     merged = merge_tables([households, units])
325 |     assert sorted(all_cols(merged)) == sorted(
326 |             ['household_id', 'building_id', 'unit_id', 'value'])
327 |     
328 |     
329 | def test_merge_tables_missing_values():
330 |     """
331 |     If the target table includes identifiers not found in the source table, missing 
332 |     values should be inserted, changing the data type.
333 |     
334 |     """
335 |     d = {'building_id': [1,1,2,2], 'unit_id': [1,2,1,2], 'value': [4,4,4,4]}
336 |     units = pd.DataFrame(d).set_index(['building_id', 'unit_id'])
337 | 
338 |     d = {'household_id': [1,2,3], 'building_id': [1,1,3], 'unit_id': [1,2,1]}
339 |     households = pd.DataFrame(d).set_index('household_id')
340 |     
341 |     merged = merge_tables([households, units])
342 |     assert units.value.dtype == 'int64'
343 |     assert merged.values.dtype == 'float64'
344 |     
345 | 
346 | 


--------------------------------------------------------------------------------
/urbansim_templates/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/*


--------------------------------------------------------------------------------
/urbansim_templates/__init__.py:
--------------------------------------------------------------------------------
1 | version = __version__ = '0.2.dev9'
2 | 


--------------------------------------------------------------------------------
/urbansim_templates/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .column_from_expression import ColumnFromExpression, ExpressionSettings
2 | from .load_table import LoadTable
3 | from .save_table import SaveTable
4 | 


--------------------------------------------------------------------------------
/urbansim_templates/data/column_from_expression.py:
--------------------------------------------------------------------------------
  1 | import orca
  2 | import pandas as pd
  3 | 
  4 | from urbansim_templates import modelmanager, shared, utils, __version__
  5 | from urbansim_templates.shared import CoreTemplateSettings, OutputColumnSettings
  6 | 
  7 | 
  8 | class ExpressionSettings():
  9 |     """
 10 |     Stores custom parameters used by the 
 11 |     :mod:`~urbansim_templates.data.ColumnFromExpression` template. Parameters can be
 12 |     passed to the constructor or set as attributes.
 13 |     
 14 |     Parameters
 15 |     ----------
 16 |     table : str, optional
 17 |         Name of Orca table the expression will be evaluated on. Required before running
 18 |         then template.
 19 |     
 20 |     expression : str, optional
 21 |         String describing operations on existing columns of the table, for example 
 22 |         "a/log(b+c)". Required before running. Supports arithmetic and math functions 
 23 |         including sqrt, abs, log, log1p, exp, and expm1 -- see Pandas ``df.eval()`` 
 24 |         documentation for further details.
 25 |     
 26 |     """
 27 |     def __init__(self, table = None, expression = None):
 28 |         self.table = table
 29 |         self.expression = expression
 30 |     
 31 |     @classmethod
 32 |     def from_dict(cls, d):
 33 |         return cls(table=d['table'], expression=d['expression'])
 34 | 
 35 |     def to_dict(self):
 36 |         return {'table': self.table, 'expression': self.expression}
 37 | 
 38 | 
 39 | @modelmanager.template
 40 | class ColumnFromExpression():
 41 |     """
 42 |     Template to register a column of derived data with Orca, based on an expression. 
 43 |     Parameters may be passed to the constructor, but they are easier to set as
 44 |     attributes. The expression can refer to any columns in the same table, and will be
 45 |     evaluated using ``df.eval()``. Values will be calculated lazily, only when the column
 46 |     is needed for a specific operation.
 47 |         
 48 |     Parameters
 49 |     ----------
 50 |     meta : :mod:`~urbansim_templates.shared.CoreTemplateSettings`, optional
 51 |         Standard parameters. This template sets the default value of ``meta.autorun``
 52 |         to True.
 53 |     
 54 |     data : :mod:`~urbansim_templates.data.ExpressionSettings`, optional
 55 |         Special parameters for this template.
 56 |         
 57 |     output : :mod:`~urbansim_templates.shared.OutputColumnSettings`, optional
 58 |         Parameters for the column that will be generated. This template uses
 59 |         ``data.table`` as the default value for ``output.table``.
 60 |         
 61 |     """
 62 |     def __init__(self, meta=None, data=None, output=None):
 63 |         
 64 |         self.meta = CoreTemplateSettings(autorun=True) if meta is None else meta
 65 |         self.meta.template = self.__class__.__name__
 66 |         self.meta.template_version = __version__
 67 |                 
 68 |         self.data = ExpressionSettings() if data is None else data
 69 |         self.output = OutputColumnSettings() if output is None else output
 70 |     
 71 | 
 72 |     @classmethod
 73 |     def from_dict(cls, d):
 74 |         """
 75 |         Create a class instance from a saved dictionary.
 76 |         
 77 |         """
 78 |         if 'meta' not in d:
 79 |             return cls.from_dict_0_2_dev5(d)
 80 |         
 81 |         return cls(
 82 |             meta = CoreTemplateSettings.from_dict(d['meta']),
 83 |             data = ExpressionSettings.from_dict(d['data']),
 84 |             output = OutputColumnSettings.from_dict(d['output']))    
 85 |     
 86 |     
 87 |     @classmethod
 88 |     def from_dict_0_2_dev5(cls, d):
 89 |         """
 90 |         Converter to read saved data from 0.2.dev5 or earlier. Automatically invoked by
 91 |         ``from_dict()`` as needed.
 92 |         
 93 |         """
 94 |         return cls(
 95 |             meta = CoreTemplateSettings(
 96 |                 name = d['name'],
 97 |                 tags = d['tags'],
 98 |                 autorun = d['autorun']),
 99 |             data = ExpressionSettings(
100 |                 table = d['table'],
101 |                 expression = d['expression']),
102 |             output = OutputColumnSettings(
103 |                 column_name = d['column_name'],
104 |                 data_type = d['data_type'],
105 |                 missing_values = d['missing_values'],
106 |                 cache = d['cache'],
107 |                 cache_scope = d['cache_scope']))
108 |     
109 |     
110 |     def to_dict(self):
111 |         """
112 |         Create a dictionary representation of the object.
113 |         
114 |         """
115 |         return {
116 |             'meta': self.meta.to_dict(), 
117 |             'data': self.data.to_dict(),
118 |             'output': self.output.to_dict()}
119 |     
120 |     
121 |     def run(self):
122 |         """
123 |         Run the template, registering a column of derived data with Orca. Requires values
124 |         to be set for ``data.table``, ``data.expression``, and ``output.column_name``.
125 |         
126 |         """
127 |         if self.data.table is None:
128 |             raise ValueError("Please provide a table")
129 |         
130 |         if self.data.expression is None:
131 |             raise ValueError("Please provide an expression")
132 |         
133 |         if self.output.column_name is None:
134 |             raise ValueError("Please provide a column name")
135 |         
136 |         settings = self.output
137 |         
138 |         if settings.table is None:
139 |             settings.table = self.data.table
140 | 
141 |         cols = utils.cols_in_expression(self.data.expression)
142 |         
143 |         def build_column():
144 |             df = utils.get_df(self.data.table, columns=cols)
145 |             series = df.eval(self.data.expression)
146 |             return series
147 | 
148 |         shared.register_column(build_column, settings)
149 |         
150 |     


--------------------------------------------------------------------------------
/urbansim_templates/data/load_table.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | try:
  4 |     import pathlib  # Python 3.4+
  5 | except:
  6 |     pass
  7 | 
  8 | import os
  9 | 
 10 | import orca
 11 | import pandas as pd
 12 | 
 13 | from urbansim_templates import modelmanager, __version__
 14 | 
 15 | 
 16 | @modelmanager.template
 17 | class LoadTable():
 18 |     """
 19 |     Template for registering data tables from local CSV or HDF files. Parameters can be
 20 |     passed to the constructor or set as attributes.
 21 |     
 22 |     An instance of this template class stores *instructions for loading a data table*, 
 23 |     packaged into an Orca step. Running the instructions registers the table with Orca. 
 24 |     
 25 |     Parameters
 26 |     ----------
 27 |     table : str, optional
 28 |         Name of the Orca table to be created. Must be provided before running the step.
 29 |     
 30 |     source_type : 'csv' or 'hdf', optional
 31 |         Source type. Must be provided before running the step.
 32 |     
 33 |     path : str, optional
 34 |         Local file path to load data from, either absolute or relative to the 
 35 |         ModelManager config directory. Please provide a Unix-style path (this will work 
 36 |         on any platform, but a Windows-style path won't, and they're hard to normalize 
 37 |         automatically).
 38 |     
 39 |     url : str, optional - NOT YET IMPLEMENTED
 40 |         Remote url to download file from.
 41 |     
 42 |     csv_index_cols : str or list of str, optional
 43 |         Required for tables loaded from csv.
 44 |     
 45 |     extra_settings : dict, optional
 46 |         Additional arguments to pass to ``pd.read_csv()`` or ``pd.read_hdf()``. For 
 47 |         example, you could automatically extract csv data from a gzip file using 
 48 |         {'compression': 'gzip'}, or specify the table identifier within a multi-object 
 49 |         hdf store using {'key': 'table-name'}. See Pandas documentation for additional 
 50 |         settings.
 51 |     
 52 |     orca_test_spec : dict, optional - NOT YET IMPLEMENTED
 53 |         Data characteristics to be tested when the table is validated.
 54 |     
 55 |     cache : bool, default True
 56 |         Passed to ``orca.table()``. Note that the default is True, unlike in the 
 57 |         underlying general-purpose Orca function, because tables read from disk should 
 58 |         not need to be regenerated during the course of a model run.
 59 |     
 60 |     cache_scope : 'step', 'iteration', or 'forever', default 'forever'
 61 |         Passed to ``orca.table()``. Default is 'forever', as in Orca.
 62 |     
 63 |     copy_col : bool, default True
 64 |         Passed to ``orca.table()``. Default is True, as in Orca. 
 65 |         
 66 |     name : str, optional
 67 |         Name of the model step. 
 68 |     
 69 |     tags : list of str, optional
 70 |         Tags, passed to ModelManager.
 71 |     
 72 |     autorun : bool, default True
 73 |         Automatically run the step whenever it's registered with ModelManager.
 74 |     
 75 |     """
 76 |     def __init__(self, 
 77 |             table = None, 
 78 |             source_type = None, 
 79 |             path = None, 
 80 |             csv_index_cols = None,
 81 |             extra_settings = {}, 
 82 |             cache = True, 
 83 |             cache_scope = 'forever', 
 84 |             copy_col = True, 
 85 |             name = None,
 86 |             tags = [], 
 87 |             autorun = True):
 88 |         
 89 |         # Template-specific params
 90 |         self.table = table
 91 |         self.source_type = source_type
 92 |         self.path = path
 93 |         self.csv_index_cols = csv_index_cols
 94 |         self.extra_settings = extra_settings
 95 |         self.cache = cache
 96 |         self.cache_scope = cache_scope
 97 |         self.copy_col = copy_col
 98 |         
 99 |         # Standard params
100 |         self.name = name
101 |         self.tags = tags
102 |         self.autorun = autorun
103 |         
104 |         # Automatic params
105 |         self.template = self.__class__.__name__
106 |         self.template_version = __version__
107 |     
108 |     
109 |     @classmethod
110 |     def from_dict(cls, d):
111 |         """
112 |         Create an object instance from a saved dictionary representation.
113 |         
114 |         Parameters
115 |         ----------
116 |         d : dict
117 |         
118 |         Returns
119 |         -------
120 |         Table
121 |         
122 |         """
123 |         obj = cls(
124 |             table = d['table'],
125 |             source_type = d['source_type'],
126 |             path = d['path'],
127 |             csv_index_cols = d['csv_index_cols'],
128 |             extra_settings = d['extra_settings'],
129 |             cache = d['cache'],
130 |             cache_scope = d['cache_scope'],
131 |             copy_col = d['copy_col'],
132 |             name = d['name'],
133 |             tags = d['tags'],
134 |             autorun = d['autorun']
135 |         )
136 |         return obj
137 |     
138 |     
139 |     def to_dict(self):
140 |         """
141 |         Create a dictionary representation of the object.
142 |         
143 |         Returns
144 |         -------
145 |         dict
146 |         
147 |         """
148 |         d = {
149 |             'template': self.template,
150 |             'template_version': self.template_version,
151 |             'name': self.name,
152 |             'tags': self.tags,
153 |             'autorun': self.autorun,
154 |             'table': self.table,
155 |             'source_type': self.source_type,
156 |             'path': self.path,
157 |             'csv_index_cols': self.csv_index_cols,
158 |             'extra_settings': self.extra_settings,
159 |             'cache': self.cache,
160 |             'cache_scope': self.cache_scope,
161 |             'copy_col': self.copy_col
162 |         }
163 |         return d
164 |     
165 |     
166 |     def run(self):
167 |         """
168 |         Register a data table with Orca.
169 |         
170 |         Requires values to be set for ``table``, ``source_type``, and ``path``. CSV data 
171 |         also requires ``csv_index_cols``. 
172 |         
173 |         Returns
174 |         -------
175 |         None
176 |         
177 |         """
178 |         if self.table is None:
179 |             raise ValueError("Please provide a table name")
180 |         
181 |         if self.source_type not in ['csv', 'hdf']:
182 |             raise ValueError("Please provide a source type of 'csv' or 'hdf'")
183 |         
184 |         if self.path is None:
185 |             raise ValueError("Please provide a file path")
186 |         
187 |         kwargs = self.extra_settings
188 |         
189 |         # Table from CSV file
190 |         if self.source_type == 'csv':
191 |             if self.csv_index_cols is None:
192 |                 raise ValueError("Please provide index column name(s) for the csv")
193 |         
194 |             @orca.table(table_name = self.table, 
195 |                         cache = self.cache, 
196 |                         cache_scope = self.cache_scope, 
197 |                         copy_col = self.copy_col)
198 |             def orca_table():
199 |                 df = pd.read_csv(self.path, **kwargs).set_index(self.csv_index_cols)
200 |                 return df
201 |             
202 |         # Table from HDF file
203 |         elif self.source_type == 'hdf':
204 |             @orca.table(table_name = self.table, 
205 |                         cache = self.cache, 
206 |                         cache_scope = self.cache_scope, 
207 |                         copy_col = self.copy_col)
208 |             def orca_table():
209 |                 df = pd.read_hdf(self.path, **kwargs)
210 |                 return df
211 |             
212 |         
213 | 


--------------------------------------------------------------------------------
/urbansim_templates/data/save_table.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import datetime
  4 | 
  5 | import orca
  6 | import pandas as pd
  7 | 
  8 | from urbansim_templates import modelmanager, __version__
  9 | from urbansim_templates.utils import get_data
 10 | 
 11 | 
 12 | @modelmanager.template
 13 | class SaveTable():
 14 |     """
 15 |     Template for saving Orca tables to local CSV or HDF5 files. Parameters can be passed
 16 |     to the constructor or set as attributes.
 17 |     
 18 |     Parameters
 19 |     ----------
 20 |     table : str, optional
 21 |         Name of the Orca table. Must be provided before running the step.
 22 |     
 23 |     columns : str or list of str, optional
 24 |         Names of columns to include. ``None`` will return all columns. Indexes will 
 25 |         always be included.
 26 |     
 27 |     filters : str or list of str, optional
 28 |         Filters to apply to the data before saving. Will be passed to 
 29 |         ``pd.DataFrame.query()``.
 30 |     
 31 |     output_type : 'csv' or 'hdf', optional
 32 |         Type of file to be created. Must be provided before running the step. 
 33 |     
 34 |     path : str, optional
 35 |         Local file path to save the data to, either absolute or relative to the 
 36 |         ModelManager config directory. Please provide a Unix-style path (this will work 
 37 |         on any platform, but a Windows-style path won't, and they're hard to normalize 
 38 |         automatically). For dynamic file names, you can include the characters "%RUN%",
 39 |         "%ITER%", or "%TS%". These will be replaced by the run id, the model iteration 
 40 |         value, or a timestamp when the output file is created.
 41 |     
 42 |     extra_settings : dict, optional
 43 |         Additional arguments to pass to ``pd.to_csv()`` or ``pd.to_hdf()``. For example, 
 44 |         you could automatically compress csv data using {'compression': 'gzip'}, or 
 45 |         specify a custom table name for an hdf store using {'key': 'table-name'}. See 
 46 |         Pandas documentation for additional settings.
 47 |             
 48 |     name : str, optional
 49 |         Name of the model step.
 50 |     
 51 |     tags : list of str, optional
 52 |         Tags, passed to ModelManager.
 53 |     
 54 |     """
 55 |     def __init__(self, 
 56 |             table = None,
 57 |             columns = None,
 58 |             filters = None,
 59 |             output_type = None, 
 60 |             path = None, 
 61 |             extra_settings = None, 
 62 |             name = None,
 63 |             tags = []):
 64 |         
 65 |         # Template-specific params
 66 |         self.table = table
 67 |         self.columns = columns
 68 |         self.filters = filters
 69 |         self.output_type = output_type
 70 |         self.path = path
 71 |         self.extra_settings = extra_settings
 72 |         
 73 |         # Standard params
 74 |         self.name = name
 75 |         self.tags = tags
 76 |         
 77 |         # Automatic params
 78 |         self.template = self.__class__.__name__
 79 |         self.template_version = __version__
 80 |     
 81 |     
 82 |     @classmethod
 83 |     def from_dict(cls, d):
 84 |         """
 85 |         Create an object instance from a saved dictionary representation.
 86 |         
 87 |         Parameters
 88 |         ----------
 89 |         d : dict
 90 |         
 91 |         Returns
 92 |         -------
 93 |         Table
 94 |         
 95 |         """
 96 |         obj = cls(
 97 |             table = d['table'], 
 98 |             columns = d['columns'], 
 99 |             filters = d['filters'], 
100 |             output_type = d['output_type'], 
101 |             path = d['path'], 
102 |             extra_settings = d['extra_settings'], 
103 |             name = d['name'], 
104 |             tags = d['tags'], 
105 |         )
106 |         return obj
107 |     
108 |     
109 |     def to_dict(self):
110 |         """
111 |         Create a dictionary representation of the object.
112 |         
113 |         Returns
114 |         -------
115 |         dict
116 |         
117 |         """
118 |         d = {
119 |             'template': self.template,
120 |             'template_version': self.template_version,
121 |             'name': self.name,
122 |             'tags': self.tags,
123 |             'table': self.table,
124 |             'columns': self.columns,
125 |             'filters': self.filters,
126 |             'output_type': self.output_type,
127 |             'path': self.path,
128 |             'extra_settings': self.extra_settings,
129 |         }
130 |         return d
131 |     
132 |     
133 |     def get_dynamic_filepath(self):
134 |         """
135 |         Substitute run id, model iteration, and/or timestamp into the filename. 
136 |         
137 |         For the run id and model iteration, we look for Orca injectables named ``run_id`` 
138 |         and ``iter_var``, respectively. If none is found, we use ``0``.
139 |         
140 |         The timestamp is UTC, formatted as ``YYYYMMDD-HHMMSS``.
141 |         
142 |         Returns
143 |         -------
144 |         str
145 |         
146 |         """
147 |         if self.path is None:
148 |             raise ValueError("Please provide a file path")
149 | 
150 |         run = 0
151 |         if orca.is_injectable('run_id'):
152 |             run = orca.get_injectable('run_id')
153 |         
154 |         iter = 0
155 |         if orca.is_injectable('iter_var'):
156 |             iter = orca.get_injectable('iter_var')
157 |         
158 |         ts = datetime.datetime.utcnow().strftime('%Y%m%d-%H%M%S')
159 |         
160 |         s = self.path
161 |         s = s.replace('%RUN%', str(run))
162 |         s = s.replace('%ITER%', str(iter))
163 |         s = s.replace('%TS%', ts)
164 |         
165 |         return s
166 |     
167 |     
168 |     def run(self):
169 |         """
170 |         Save a table to disk.
171 |         
172 |         Saving a table to an HDF store requires providing a ``key`` that will be used to 
173 |         identify the table in the store. We'll use the Orca table name, unless you 
174 |         provide a different ``key`` in the ``extra_settings``.
175 | 
176 |         Returns
177 |         -------
178 |         None
179 |         
180 |         """
181 |         if self.output_type not in ['csv', 'hdf']:
182 |             raise ValueError("Please provide an output type of 'csv' or 'hdf'")
183 |         
184 |         if self.table is None:
185 |             raise ValueError("Please provide the table name")
186 |         
187 |         if self.path is None:
188 |             raise ValueError("Please provide a file path")
189 |         
190 |         kwargs = self.extra_settings
191 |         if kwargs is None:
192 |             kwargs = dict()
193 | 
194 |         df = get_data(tables = self.table, 
195 |                       filters = self.filters, 
196 |                       extra_columns = self.columns)
197 |                 
198 |         if self.output_type == 'csv':
199 |             df.to_csv(self.get_dynamic_filepath(), **kwargs)
200 |         
201 |         elif self.output_type == 'hdf':
202 |             if 'key' not in kwargs:
203 |                 kwargs['key'] = self.table
204 |             
205 |             df.to_hdf(self.get_dynamic_filepath(), **kwargs)
206 |         
207 |         


--------------------------------------------------------------------------------
/urbansim_templates/modelmanager.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import os
  4 | import copy
  5 | import pickle
  6 | from collections import OrderedDict
  7 | 
  8 | import orca
  9 | from urbansim.utils import yamlio
 10 | 
 11 | from .__init__ import __version__
 12 | from .utils import update_name, version_greater_or_equal
 13 | 
 14 | 
 15 | _templates = {}  # global registry of template classes
 16 | _steps = {}  # global registry of model steps in memory
 17 | _disk_store = None  # path to saved steps on disk
 18 | 
 19 | 
 20 | def template(cls):
 21 |     """
 22 |     This is a decorator for ModelManager-compliant template classes. Place
 23 |     `@modelmanager.template` on the line before a class defintion.
 24 |     
 25 |     This makes the class available to ModelManager (e.g. for reading saved steps from 
 26 |     disk) whenever it's imported.
 27 |     
 28 |     """
 29 |     _templates[cls.__name__] = cls
 30 |     return cls
 31 |     
 32 | 
 33 | def initialize(path='configs'):
 34 |     """
 35 |     Load saved model steps from disk. Each file in the directory will be checked for 
 36 |     compliance with the ModelManager YAML format and then loaded into memory.
 37 |     
 38 |     If run multiple times, steps will be cleared from memory and re-loaded.
 39 |     
 40 |     Parameters
 41 |     ----------
 42 |     path : str
 43 |         Path to config directory, either absolute or relative to the Python working 
 44 |         directory
 45 |     
 46 |     """
 47 |     if not os.path.exists(path):
 48 |         print("Path not found: {}".format(os.path.join(os.getcwd(), path)))
 49 |         # TO DO - automatically create directory if run again after warning?
 50 |         return
 51 |         
 52 |     global _steps, _disk_store
 53 |     _steps = {}  # clear memory
 54 |     _disk_store = path  # save initialization path
 55 |     
 56 |     files = []
 57 |     for f in os.listdir(path):
 58 |         if f[-5:] == '.yaml':
 59 |             files.append(os.path.join(path, f))
 60 |     
 61 |     if len(files) == 0:
 62 |         print("No yaml files found in path '{}'".format(path))
 63 |         return
 64 |         
 65 |     steps = []
 66 |     for f in files:
 67 |         d = yamlio.yaml_to_dict(str_or_buffer=f)
 68 |         if 'modelmanager_version' in d:
 69 |             # TO DO - check that file name matches object name in the file?
 70 |             if version_greater_or_equal(d['modelmanager_version'], '0.1.dev8'):
 71 |                 # This is the version that switched from a single file to multiple files
 72 |                 # with one object stored in each
 73 |                 steps.append(d)            
 74 |     
 75 |     if len(steps) == 0:
 76 |         print("No files from ModelManager 0.1.dev8 or later found in path '{}'"\
 77 |                 .format(path))
 78 |     
 79 |     for d in steps:
 80 |         # TO DO - check for this key, to be safe
 81 |         step = build_step(d['saved_object'])
 82 |         register(step, save_to_disk=False)
 83 | 
 84 | 
 85 | def build_step(d):
 86 |     """
 87 |     Build a model step object from a saved dictionary. This includes loading supplemental
 88 |     objects from disk.
 89 |     
 90 |     Parameters
 91 |     ----------
 92 |     d : dict
 93 |         Representation of a model step.
 94 |     
 95 |     Returns
 96 |     -------
 97 |     object
 98 |     
 99 |     """
100 |     template = d['meta']['template'] if 'meta' in d else d['template']
101 |     
102 |     if 'supplemental_objects' in d:
103 |         for i, item in enumerate(d['supplemental_objects']):
104 |             content = load_supplemental_object(d['name'], **item)
105 |             d['supplemental_objects'][i]['content'] = content
106 |     
107 |     return _templates[template].from_dict(d)
108 |     
109 | 
110 | def load_supplemental_object(step_name, name, content_type, required=True):
111 |     """
112 |     Load a supplemental object from disk.
113 |     
114 |     Parameters
115 |     ----------
116 |     step_name : str
117 |         Name of the associated model step.
118 |     name : str
119 |         Name of the supplemental object.
120 |     content_type : str
121 |         Currently supports 'pickle'.
122 |     required : bool, optional
123 |         Whether the supplemental object is required (not yet supported).
124 |     
125 |     Returns
126 |     -------
127 |     object
128 |     
129 |     """
130 |     if (content_type == 'pickle'):
131 |         with open(os.path.join(_disk_store, step_name+'-'+name+'.pkl'), 'rb') as f:
132 |             return pickle.load(f)
133 |     
134 | 
135 | def register(step, save_to_disk=True):
136 |     """
137 |     Register a model step with ModelManager and Orca. This includes saving it to disk,
138 |     optionally, so it can be automatically loaded in the future.
139 |     
140 |     Registering a step will overwrite any previously loaded step with the same name. If a 
141 |     name has not yet been assigned, one will be generated from the template name and a 
142 |     timestamp.
143 |     
144 |     If the model step includes an attribute 'autorun' that's set to True, the step will 
145 |     run after being registered.
146 |     
147 |     Parameters
148 |     ----------
149 |     step : object
150 |     
151 |     Returns
152 |     -------
153 |     None
154 |     
155 |     """
156 |     # Currently supporting both step.name and step.meta.name
157 |     if hasattr(step, 'meta'):
158 |         # TO DO: move the name updating to CoreTemplateSettings?
159 |         step.meta.name = update_name(step.meta.template, step.meta.name)
160 |         name = step.meta.name
161 |     
162 |     else:
163 |         step.name = update_name(step.template, step.name)
164 |         name = step.name
165 |     
166 |     if save_to_disk:
167 |         save_step_to_disk(step)
168 |     
169 |     print("Registering model step '{}'".format(name))
170 |     
171 |     _steps[name] = step
172 |     
173 |     # Create a callable that runs the model step, and register it with orca
174 |     def run_step():
175 |         return step.run()
176 |         
177 |     orca.add_step(name, run_step)
178 |     
179 |     if hasattr(step, 'meta'):
180 |         if step.meta.autorun:
181 |             orca.run([name])
182 |     
183 |     elif hasattr(step, 'autorun'):
184 |         if step.autorun:
185 |             orca.run([name]) 
186 |         
187 |     
188 | def list_steps():
189 |     """
190 |     Return a list of registered steps, with name, template, and tags for each.
191 |     
192 |     Returns
193 |     -------
194 |     list of dicts, ordered by name
195 |     
196 |     """
197 |     steps = []
198 |     for k in sorted(_steps.keys()):
199 |         if hasattr(_steps[k], 'meta'):
200 |             steps += [{'name': _steps[k].meta.name,
201 |                        'template': _steps[k].meta.template,
202 |                        'tags': _steps[k].meta.tags,
203 |                        'notes': _steps[k].meta.notes}]
204 |         else:
205 |             steps += [{'name': _steps[k].name,
206 |                        'template': _steps[k].template,
207 |                        'tags': _steps[k].tags}]
208 |     return steps
209 |     
210 | 
211 | def save_step_to_disk(step):
212 |     """
213 |     Save a model step to disk, over-writing the previous file. The file will be named
214 |     'model-name.yaml' and will be saved to the initialization directory. 
215 |     
216 |     """
217 |     name = step.meta.name if hasattr(step, 'meta') else step.name
218 | 
219 |     if _disk_store is None:
220 |         print("Please run 'modelmanager.initialize()' before registering new model steps")
221 |         return
222 |     
223 |     print("Saving '{}.yaml': {}".format(name, 
224 |             os.path.join(os.getcwd(), _disk_store)))
225 |     
226 |     d = step.to_dict()
227 |     
228 |     # Save supplemental objects
229 |     if 'supplemental_objects' in d:
230 |         for item in filter(None, d['supplemental_objects']):
231 |             save_supplemental_object(name, **item)
232 |             del item['content']
233 |     
234 |     # Save main yaml file
235 |     headers = {'modelmanager_version': __version__}
236 | 
237 |     content = OrderedDict(headers)
238 |     content.update({'saved_object': d})
239 |     
240 |     yamlio.convert_to_yaml(content, os.path.join(_disk_store, name+'.yaml'))
241 |     
242 | 
243 | def save_supplemental_object(step_name, name, content, content_type, required=True):
244 |     """
245 |     Save a supplemental object to disk.
246 |     
247 |     Parameters
248 |     ----------
249 |     step_name : str
250 |         Name of the associated model step.
251 |     name : str
252 |         Name of the supplemental object.
253 |     content : obj
254 |         Object to save.
255 |     content_type : str
256 |         Currently supports 'pickle'.
257 |     required : bool, optional
258 |         Whether the supplemental object is required (not yet supported).
259 |     
260 |     """
261 |     if content_type == 'pickle':
262 |         content.to_pickle(os.path.join(_disk_store, step_name+'-'+name+'.pkl'))
263 |         
264 | 
265 | def get_step(name):
266 |     """
267 |     Return the class representation of a registered step, by name.
268 |     
269 |     Parameters
270 |     ----------
271 |     name : str
272 |     
273 |     Returns
274 |     -------
275 |     instance of a template class
276 |     
277 |     """
278 |     return copy.deepcopy(_steps[name])
279 | 
280 | 
281 | def remove_step(name):
282 |     """
283 |     Remove a model step, by name. It will immediately be removed from ModelManager and 
284 |     from disk, but will remain registered in Orca until the current Python process 
285 |     terminates.
286 |     
287 |     Parameters
288 |     ----------
289 |     name : str
290 |     
291 |     """
292 |     print("Removing '{}' and '{}.yaml'".format(name, name))
293 |     
294 |     d = _steps[name].to_dict()
295 |     
296 |     if 'supplemental_objects' in d:
297 |         for item in filter(None, d['supplemental_objects']):
298 |             remove_supplemental_object(name, item['name'], item['content_type'])
299 | 
300 |     del _steps[name]
301 |     os.remove(os.path.join(_disk_store, name+'.yaml'))
302 |     
303 | 
304 | def remove_supplemental_object(step_name, name, content_type):
305 |     """
306 |     Remove a supplemental object from disk.
307 |     
308 |     Parameters
309 |     ----------
310 |     step_name : str
311 |         Name of the associated model step.
312 |     name : str
313 |         Name of the supplemental object.
314 |     content_type : str
315 |         Currently supports 'pickle'.
316 |     
317 |     """
318 |     # TO DO - check that the file exists first
319 |     
320 |     if content_type == 'pickle':
321 |         os.remove(os.path.join(_disk_store, step_name+'-'+name+'.pkl'))
322 |     
323 | 
324 | def get_config_dir():
325 |     """
326 |     Return the config directory, for other services that need to interoperate.
327 |     
328 |     Returns
329 |     -------
330 |     str
331 |     
332 |     """
333 |     return _disk_store
334 | 
335 | 


--------------------------------------------------------------------------------
/urbansim_templates/models/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/*


--------------------------------------------------------------------------------
/urbansim_templates/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .binary_logit import BinaryLogitStep
2 | from .large_multinomial_logit import LargeMultinomialLogitStep
3 | from .regression import OLSRegressionStep
4 | from .segmented_large_multinomial_logit import SegmentedLargeMultinomialLogitStep
5 | from .shared import TemplateStep
6 | from .small_multinomial_logit import SmallMultinomialLogitStep


--------------------------------------------------------------------------------
/urbansim_templates/models/binary_logit.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | import patsy
  6 | from datetime import datetime as dt
  7 | from statsmodels.api import Logit
  8 | 
  9 | import orca
 10 | 
 11 | from .. import modelmanager
 12 | from ..utils import get_data
 13 | from .shared import TemplateStep
 14 | 
 15 | 
 16 | @modelmanager.template
 17 | class BinaryLogitStep(TemplateStep):
 18 |     """
 19 |     A class for building binary logit model steps. This extends TemplateStep, where some
 20 |     common functionality is defined. Estimation is handled by Statsmodels and simulation
 21 |     is handled within this class.
 22 |     
 23 |     Expected usage:
 24 |     - create a model object
 25 |     - specify some parameters
 26 |     - run the `fit()` method
 27 |     - iterate as needed
 28 |     
 29 |     Then, for simulation:
 30 |     - specify some simulation parameters
 31 |     - use the `run()` method for interactive testing
 32 |     - use `modelmanager.register()` to save the model to Orca and disk
 33 |     - registered steps can be accessed via ModelManager and Orca
 34 |     
 35 |     All parameters listed in the constructor can be set directly on the class object,
 36 |     at any time.
 37 |     
 38 |     Parameters
 39 |     ----------
 40 |     tables : str or list of str, optional
 41 |         Name(s) of Orca tables to draw data from. The first table is the primary one. 
 42 |         Any additional tables need to have merge relationships ("broadcasts") specified
 43 |         so that they can be merged unambiguously onto the first table. Among them, the 
 44 |         tables must contain all variables used in the model expression and filters. The
 45 |         left-hand-side variable should be in the primary table. The `tables` parameter is 
 46 |         required for fitting a model, but it does not have to be provided when the object 
 47 |         is created.
 48 | 
 49 |     model_expression : str, optional
 50 |         Patsy formula containing both the left- and right-hand sides of the model
 51 |         expression: http://patsy.readthedocs.io/en/latest/formulas.html
 52 |         This parameter is required for fitting a model, but it does not have to be 
 53 |         provided when the object is created.
 54 | 
 55 |     filters : str or list of str, optional
 56 |         Filters to apply to the data before fitting the model. These are passed to 
 57 |         `pd.DataFrame.query()`. Filters are applied after any additional tables are merged 
 58 |         onto the primary one. Replaces the `fit_filters` argument in UrbanSim.
 59 |     
 60 |     out_tables : str or list of str, optional
 61 |         Name(s) of Orca tables to use for simulation. If not provided, the `tables` 
 62 |         parameter will be used. Same guidance applies: the tables must be able to be 
 63 |         merged unambiguously, and must include all columns used in the right-hand-side
 64 |         of the model expression and in the `out_filters`.
 65 |     
 66 |     out_column : str, optional
 67 |         Name of the column to write simulated choices to. If it does not already exist
 68 |         in the primary output table, it will be created. If not provided, the left-hand-
 69 |         side variable from the model expression will be used. Replaces the `out_fname` 
 70 |         argument in UrbanSim.
 71 |         
 72 |         # TO DO - auto-generation not yet working; column must exist in the primary table
 73 |     
 74 |     out_filters : str or list of str, optional
 75 |         Filters to apply to the data before simulation. If not provided, no filters will
 76 |         be applied. Replaces the `predict_filters` argument in UrbanSim.
 77 |         
 78 |     out_value_true : numeric or str, optional
 79 |         Value to save to the output column corresponding to an affirmative choice. 
 80 |         Default is 1 (int). Use keyword 'nothing' to leave values unchanged.
 81 |     
 82 |     out_value_false : numeric or str, optional
 83 |         Value to save to the output column corresponding to a negative choice. Default 
 84 |         is 0 (int). Use keyword 'nothing' to leave values unchanged.
 85 |     
 86 |     name : str, optional
 87 |         Name of the model step, passed to ModelManager. If none is provided, a name is
 88 |         generated each time the `fit()` method runs.
 89 |     
 90 |     tags : list of str, optional
 91 |         Tags, passed to ModelManager.
 92 |     
 93 |     """
 94 |     def __init__(self, tables=None, model_expression=None, filters=None, out_tables=None,
 95 |             out_column=None, out_filters=None, out_value_true=1, out_value_false=0, 
 96 |             name=None, tags=[]):
 97 |         
 98 |         # Parent class can initialize the standard parameters
 99 |         TemplateStep.__init__(self, tables=tables, model_expression=model_expression, 
100 |                 filters=filters, out_tables=out_tables, out_column=out_column, 
101 |                 out_transform=None, out_filters=out_filters, name=name, tags=tags)
102 |         
103 |         # Custom parameters not in parent class
104 |         self.out_value_true = out_value_true
105 |         self.out_value_false = out_value_false
106 |         
107 |         # Placeholders for model fit data, filled in by fit() or from_dict()
108 |         self.summary_table = None 
109 |         self.fitted_parameters = None
110 |         
111 |     
112 |     @classmethod
113 |     def from_dict(cls, d):
114 |         """
115 |         Create an object instance from a saved dictionary representation.
116 |         
117 |         Parameters
118 |         ----------
119 |         d : dict
120 |         
121 |         Returns
122 |         -------
123 |         BinaryLogitStep
124 |         
125 |         """
126 |         # Pass values from the dictionary to the __init__() method
127 |         obj = cls(tables=d['tables'], model_expression=d['model_expression'], 
128 |                 filters=d['filters'], out_tables=d['out_tables'], 
129 |                 out_column=d['out_column'], out_filters=d['out_filters'], 
130 |                 out_value_true=d['out_value_true'], out_value_false=d['out_value_false'], 
131 |                 name=d['name'], tags=d['tags'])
132 |                 
133 |         obj.summary_table = d['summary_table']
134 |         obj.fitted_parameters = d['fitted_parameters']
135 |         
136 |         return obj
137 |     
138 |     
139 |     def to_dict(self):
140 |         """
141 |         Create a dictionary representation of the object.
142 |         
143 |         Returns
144 |         -------
145 |         dict
146 |         
147 |         """
148 |         d = TemplateStep.to_dict(self)
149 |         
150 |         # Add parameters not in parent class
151 |         d.update({
152 |             'out_value_true': self.out_value_true,
153 |             'out_value_false': self.out_value_false,
154 |             'summary_table': self.summary_table,
155 |             'fitted_parameters': self.fitted_parameters
156 |         })
157 |         return d
158 |     
159 |     
160 |     def fit(self):
161 |         """
162 |         Fit the model; save and report results. This currently uses the Statsmodels
163 |         Logit class with default estimation settings. (It will shift to ChoiceModels
164 |         once more infrastructure is in place.)
165 |         
166 |         The `fit()` method can be run as many times as desired. Results will not be saved 
167 |         with Orca or ModelManager until the `register()` method is run. 
168 |         
169 |         Parameters
170 |         ----------
171 |         None
172 |         
173 |         Returns
174 |         -------
175 |         None
176 |         
177 |         """
178 |         # TO DO - verify that params are in place for estimation
179 |         
180 |         # Workaround for a temporary statsmodels bug: 
181 |         # https://github.com/statsmodels/statsmodels/issues/3931
182 |         from scipy import stats
183 |         stats.chisqprob = lambda chisq, df: stats.chi2.sf(chisq, df)
184 |         
185 |         df = get_data(tables = self.tables, 
186 |                       filters = self.filters, 
187 |                       model_expression = self.model_expression)
188 | 
189 |         m = Logit.from_formula(data=df, formula=self.model_expression)
190 |         results = m.fit()
191 | 
192 |         self.name = self._generate_name()        
193 |         self.summary_table = str(results.summary())
194 |         print(self.summary_table)
195 |         
196 |         # For now, we can just save the summary table and the fitted parameters. Later on
197 |         # we will probably want programmatic access to more details about the fit (e.g. 
198 |         # for autospec), but we can add that when it's needed.      
199 |         
200 |         self.fitted_parameters = results.params.tolist()  # params is a pd.Series
201 |         
202 |     
203 |     def run(self):
204 |         """
205 |         Run the model step: calculate simulated choices and use them to update a column.
206 |         
207 |         For binary logit, we calculate predicted probabilities and then perform a weighted 
208 |         random draw to determine the simulated binary outcomes. This is done directly from 
209 |         the fitted parameters, because we can't conveniently regenerate a Statsmodels 
210 |         results object from a dictionary representation.
211 |         
212 |         The predicted probabilities and simulated choices are saved to the class object 
213 |         for interactive use (`probabilities` and `choices`, with type pd.Series) but are 
214 |         not persisted in the dictionary representation of the model step.
215 |         
216 |         Parameters
217 |         ----------
218 |         None
219 |         
220 |         Returns
221 |         -------
222 |         None
223 |         
224 |         """
225 |         # TO DO - verify that params are in place for prediction
226 |         
227 |         df = get_data(tables = self.out_tables, 
228 |                       fallback_tables = self.tables,
229 |                       filters = self.out_filters, 
230 |                       model_expression = self.model_expression,
231 |                       extra_columns = self.out_column)
232 | 
233 |         dm = patsy.dmatrices(data=df, formula_like=self.model_expression,
234 |                              return_type='dataframe')[1]  # right-hand-side design matrix
235 |         
236 |         beta_X = np.dot(dm, self.fitted_parameters)
237 |         probs = np.divide(np.exp(beta_X), 1 + np.exp(beta_X))
238 |         
239 |         rand = np.random.random(len(probs))
240 |         choices = np.less(rand, probs)
241 |         
242 |         # Save results to the class object (via df to include index)
243 |         df['_probs'] = probs
244 |         self.probabilities = df._probs
245 |         df['_choices'] = choices
246 |         self.choices = df._choices
247 |                 
248 |         # TO DO - generate column if it does not exist
249 | 
250 |         colname = self._get_out_column()
251 |         tabname = self._get_out_table()
252 |         
253 |         if self.out_value_true != 'nothing':
254 |             df.loc[df._choices==True, colname] = self.out_value_true
255 |         
256 |         if self.out_value_false != 'nothing':
257 |             df.loc[df._choices==False, colname] = self.out_value_false
258 |         
259 |         orca.get_table(tabname).update_col_from_series(colname, df[colname], cast=True)
260 |         
261 |         


--------------------------------------------------------------------------------
/urbansim_templates/models/regression.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import math
  4 | import numpy as np
  5 | import pandas as pd
  6 | from datetime import datetime as dt
  7 | 
  8 | import orca
  9 | from urbansim.models import RegressionModel
 10 | from urbansim.utils import yamlio
 11 | 
 12 | from .. import modelmanager
 13 | from ..utils import get_data, update_column
 14 | from .shared import TemplateStep
 15 | 
 16 | 
 17 | @modelmanager.template
 18 | class OLSRegressionStep(TemplateStep):
 19 |     """
 20 |     A class for building OLS (ordinary least squares) regression model steps. This extends 
 21 |     TemplateStep, where some common functionality is defined. Estimation and simulation
 22 |     are handled by `urbansim.models.RegressionModel()`.
 23 |     
 24 |     Expected usage:
 25 |     - create a model object
 26 |     - specify some parameters
 27 |     - run the `fit()` method
 28 |     - iterate as needed
 29 |     
 30 |     Then, for simulation:
 31 |     - specify some simulation parameters
 32 |     - use the `run()` method for interactive testing
 33 |     - use `modelmanager.register()` to save the model to Orca and disk
 34 |     - registered steps can be accessed via ModelManager and Orca
 35 |     
 36 |     All parameters listed in the constructor can be set directly on the class object,
 37 |     at any time.
 38 | 
 39 |     Parameters
 40 |     ----------
 41 |     tables : str or list of str, optional
 42 |         Name(s) of Orca tables to draw data from. The first table is the primary one. 
 43 |         Any additional tables need to have merge relationships ("broadcasts") specified
 44 |         so that they can be merged unambiguously onto the first table. Among them, the 
 45 |         tables must contain all variables used in the model expression and filters. The
 46 |         left-hand-side variable should be in the primary table. The `tables` parameter is 
 47 |         required for fitting a model, but it does not have to be provided when the object 
 48 |         is created.
 49 | 
 50 |     model_expression : str, optional
 51 |         Patsy formula containing both the left- and right-hand sides of the model
 52 |         expression: http://patsy.readthedocs.io/en/latest/formulas.html
 53 |         This parameter is required for fitting a model, but it does not have to be 
 54 |         provided when the object is created.
 55 | 
 56 |     filters : str or list of str, optional
 57 |         Filters to apply to the data before fitting the model. These are passed to 
 58 |         `pd.DataFrame.query()`. Filters are applied after any additional tables are merged 
 59 |         onto the primary one. Replaces the `fit_filters` argument in UrbanSim.
 60 |     
 61 |     out_tables : str or list of str, optional
 62 |         Name(s) of Orca tables to use for simulation. If not provided, the `tables` 
 63 |         parameter will be used. Same guidance applies: the tables must be able to be 
 64 |         merged unambiguously, and must include all columns used in the right-hand-side
 65 |         of the model expression and in the `out_filters`.
 66 |     
 67 |     out_column : str, optional
 68 |         Name of the column to write predicted values to. If it does not already exist
 69 |         in the primary output table, it will be created. If not provided, the left-hand-
 70 |         side variable from the model expression will be used. Replaces the `out_fname` 
 71 |         argument in UrbanSim.
 72 |     
 73 |     out_transform : str, optional
 74 |         Element-wise transformation to apply to the predicted values, for example to
 75 |         reverse a transformation of the left-hand-side variable in the model expression.
 76 |         This should be provided as a string containing a function name. Supports anything
 77 |         from NumPy or Python's built-in math library, for example 'np.exp' or
 78 |         'math.floor'. Replaces the `ytransform` argument in UrbanSim.
 79 |     
 80 |     out_filters : str or list of str, optional
 81 |         Filters to apply to the data before simulation. If not provided, no filters will
 82 |         be applied. Replaces the `predict_filters` argument in UrbanSim.
 83 |         
 84 |     name : str, optional
 85 |         Name of the model step, passed to ModelManager. If none is provided, a name is
 86 |         generated each time the `fit()` method runs.
 87 |     
 88 |     tags : list of str, optional
 89 |         Tags, passed to ModelManager.
 90 |     
 91 |     """
 92 |     def __init__(self, tables=None, model_expression=None, filters=None, out_tables=None,
 93 |             out_column=None, out_transform=None, out_filters=None, name=None, tags=[]):
 94 |         
 95 |         # Parent class can initialize the standard parameters
 96 |         TemplateStep.__init__(self, tables=tables, model_expression=model_expression, 
 97 |                 filters=filters, out_tables=out_tables, out_column=out_column, 
 98 |                 out_transform=out_transform, out_filters=out_filters, name=name, 
 99 |                 tags=tags)
100 |         
101 |         # Placeholders for model fit data, filled in by fit() or from_dict()
102 |         self.summary_table = None 
103 |         self.fitted_parameters = None
104 |         self.residuals = None
105 |         self.model = None
106 | 
107 |     
108 |     @classmethod
109 |     def from_dict(cls, d):
110 |         """
111 |         Create an object instance from a saved dictionary representation.
112 |         
113 |         Parameters
114 |         ----------
115 |         d : dict
116 |         
117 |         Returns
118 |         -------
119 |         OLSRegressionStep
120 |         
121 |         """
122 |         # Pass values from the dictionary to the __init__() method
123 |         obj = cls(tables=d['tables'], model_expression=d['model_expression'], 
124 |                 filters=d['filters'], out_tables=d['out_tables'], 
125 |                 out_column=d['out_column'], out_transform=d['out_transform'],
126 |                 out_filters=d['out_filters'], name=d['name'], tags=d['tags'])
127 | 
128 |         obj.summary_table = d['summary_table']
129 |         obj.fitted_parameters = d['fitted_parameters']
130 |         obj.model = None
131 |         
132 |         # Unpack the urbansim.models.RegressionModel() sub-object and resuscitate it
133 |         if d['model'] is not None:
134 |             model_config = yamlio.convert_to_yaml(d['model'], None)
135 |             obj.model = RegressionModel.from_yaml(model_config)
136 |         
137 |         return obj
138 |         
139 |     
140 |     def to_dict(self):
141 |         """
142 |         Create a dictionary representation of the object.
143 |         
144 |         Returns
145 |         -------
146 |         dict
147 |         
148 |         """
149 |         d = TemplateStep.to_dict(self)
150 |         
151 |         # Add parameters not in parent class
152 |         d.update({
153 |             'summary_table': self.summary_table,
154 |             'fitted_parameters': self.fitted_parameters,
155 |             'model': self.model.to_dict() if self.model else None
156 |         })
157 |         return d
158 |         
159 |         
160 |     def fit(self):
161 |         """
162 |         Fit the model; save and report results.
163 |         
164 |         This currently uses the `RegressionModel` class from core UrbanSim. We save the 
165 |         model object for prediction and interactive use (`model`, with type
166 |         `urbansim.models.regression.RegressionModel`).
167 |         
168 |         For example, you can use this to get a latex version of the summary table using
169 |         `m.model.model_fit.summary().as_latex()`. This may change in the future if we 
170 |         refactor the template to use StatsModels directly.
171 |         
172 |         """
173 |         self.model = RegressionModel(model_expression=self.model_expression,
174 |                 fit_filters=self.filters, predict_filters=self.out_filters,
175 |                 ytransform=None, name=self.name)
176 | 
177 |         df = get_data(tables = self.tables,
178 |                       filters = self.filters,
179 |                       model_expression = self.model_expression)
180 |         
181 |         results = self.model.fit(df)
182 |         
183 |         self.name = self._generate_name()
184 |         self.summary_table = str(results.summary())
185 |         print(self.summary_table)
186 |         
187 |         # We don't strictly need to save the fitted parameters, because they are also
188 |         # contained in the urbansim.models.RegressionModel() sub-object. But maintaining
189 |         # a parallel data structure to other templates will make it easier to refactor the
190 |         # code later on to not rely on RegressionModel any more. 
191 |         
192 |         self.fitted_parameters = results.params.tolist()
193 |         self.residuals = results.resid
194 |         
195 |     def run(self):
196 |         """
197 |         Run the model step: calculate predicted values, transform them as specified, and 
198 |         use them to update a column.
199 |         
200 |         The pre-transformation predicted values are saved to the class object for 
201 |         diagnostic use (`predicted_values` with type pd.Series). The post-transformation 
202 |         predicted values are written to Orca.
203 |         
204 |         """
205 |         df = get_data(tables = self.out_tables,
206 |                       fallback_tables = self.tables,
207 |                       filters = self.out_filters,
208 |                       model_expression = self.model_expression)
209 |         
210 |         values = self.model.predict(df)
211 |         self.predicted_values = values
212 |         
213 |         if self.out_transform is not None:
214 |             values = values.apply(eval(self.out_transform))
215 |         
216 |         colname = self._get_out_column()
217 |         tabname = self._get_out_table()
218 | 
219 |         update_column(table = tabname,
220 |                       column = colname,
221 |                       data = values)        
222 | 
223 | 


--------------------------------------------------------------------------------
/urbansim_templates/models/segmented_large_multinomial_logit.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import copy
  4 | 
  5 | import numpy as np
  6 | import pandas as pd
  7 | 
  8 | import orca
  9 | from urbansim.models.util import apply_filter_query
 10 | 
 11 | from ..__init__ import __version__
 12 | from ..utils import get_data, update_name
 13 | from .. import modelmanager
 14 | from . import LargeMultinomialLogitStep
 15 | from .shared import TemplateStep
 16 | 
 17 | 
 18 | @modelmanager.template
 19 | class SegmentedLargeMultinomialLogitStep(TemplateStep):
 20 |     """
 21 |     This template automatically generates a set of LargeMultinomialLogitStep submodels
 22 |     corresponding to "segments" or categories of choosers. The submodels can be directly
 23 |     accessed and edited.
 24 | 
 25 |     Running 'build_submodels()' will create a submodel for each category of choosers
 26 |     identified in the segmentation column. The submodels are implemented using filter
 27 |     queries.
 28 | 
 29 |     Once they are generated, the 'submodels' property contains a dict of
 30 |     LargeMultinomialLogitStep objects, identified by category name. You can edit their
 31 |     properties as needed, fit them individually, etc.
 32 | 
 33 |     Editing a property in the 'defaults' object will update all the submodels at once,
 34 |     while leaving customizations to other properties intact.
 35 | 
 36 |     Parameters
 37 |     ----------
 38 |     defaults : LargeMultinomialLogitStep, optional
 39 |         Object containing initial parameter values for the submodels. Values for
 40 |         'choosers', 'alternatives', and 'choice_column' are required to generate
 41 |         submodels, but do not have to be provided when the object is created.
 42 | 
 43 |     segmentation_column : str, optional
 44 |         Name of a column of categorical values in the 'defaults.choosers' table. Any data
 45 |         that can be interpreted by Pandas as categorical is valid. This is required to
 46 |         generate submodels, but does not have to be provided when the object is created.
 47 | 
 48 |     name : str, optional
 49 |         Name of the model step.
 50 | 
 51 |     tags : list of str, optional
 52 |         Tags associated with the model step.
 53 | 
 54 |     """
 55 |     def __init__(self, defaults=None, segmentation_column=None, name=None, tags=[]):
 56 | 
 57 |         if defaults is None:
 58 |             defaults = LargeMultinomialLogitStep()
 59 | 
 60 |         self.defaults = defaults
 61 |         self.defaults.bind_to(self.update_submodels)
 62 | 
 63 |         self.segmentation_column = segmentation_column
 64 | 
 65 |         self.name = name
 66 |         self.tags = tags
 67 | 
 68 |         self.template = self.__class__.__name__
 69 |         self.template_version = __version__
 70 | 
 71 |         # Properties to be filled in by build_submodels() or from_dict()
 72 |         self.submodels = {}
 73 | 
 74 | 
 75 |     @classmethod
 76 |     def from_dict(cls, d):
 77 |         """
 78 |         Create an object instance from a saved dictionary representation.
 79 | 
 80 |         Parameters
 81 |         ----------
 82 |         d : dict
 83 | 
 84 |         Returns
 85 |         -------
 86 |         SegmentedLargeMultinomialLogitStep
 87 | 
 88 |         """
 89 |         mnl_step = LargeMultinomialLogitStep.from_dict
 90 | 
 91 |         obj = cls(
 92 |             defaults = mnl_step(d['defaults']),
 93 |             segmentation_column = d['segmentation_column'],
 94 |             name = d['name'],
 95 |             tags = d['tags'])
 96 | 
 97 |         obj.submodels = {k: mnl_step(m) for k, m in d['submodels'].items()}
 98 | 
 99 |         return obj
100 | 
101 | 
102 |     def to_dict(self):
103 |         """
104 |         Create a dictionary representation of the object.
105 | 
106 |         Returns
107 |         -------
108 |         dict
109 | 
110 |         """
111 |         d = {
112 |             'template': self.template,
113 |             'template_version': self.template_version,
114 |             'name': self.name,
115 |             'tags': self.tags,
116 |             'defaults': self.defaults.to_dict(),
117 |             'segmentation_column': self.segmentation_column,
118 |             'submodels': {k: m.to_dict() for k, m in self.submodels.items()}
119 |         }
120 |         return d
121 | 
122 | 
123 |     def get_segmentation_column(self, mct=None):
124 |         """
125 |         Get the column of segmentation values from Orca. Chooser and alternative filters
126 |         are applied to identify valid observations.
127 | 
128 |         Parameters
129 |         ----------
130 |         mct : choicemodels.tools.MergedChoiceTable
131 |             This parameter is a temporary backdoor allowing us to pass in a more
132 |             complicated choice table than can be generated within the template, for
133 |             example including sampling weights or interaction terms.
134 | 
135 |         Returns
136 |         -------
137 |         pd.Series
138 | 
139 |         """
140 |         if mct is not None:
141 |             df = mct.to_frame()
142 |         else:
143 |             obs = get_data(tables = self.defaults.choosers,
144 |                            filters = self.defaults.chooser_filters,
145 |                            extra_columns = [self.defaults.choice_column,
146 |                                             self.segmentation_column])
147 | 
148 |             alts = get_data(tables = self.defaults.alternatives,
149 |                             filters = self.defaults.alt_filters)
150 | 
151 |             df = pd.merge(obs, alts, how='inner',
152 |                           left_on=self.defaults.choice_column, right_index=True)
153 | 
154 |         return df[self.segmentation_column]
155 | 
156 | 
157 |     def build_submodels(self, mct=None):
158 |         """
159 |         Create a submodel for each category of choosers identified in the segmentation
160 |         column. Only categories with at least one observation remaining after applying
161 |         chooser and alternative filters will be included.
162 | 
163 |         Running this method will overwrite any previous submodels.
164 | 
165 |         Parameters
166 |         ----------
167 |         mct : choicemodels.tools.MergedChoiceTable
168 |             This parameter is a temporary backdoor allowing us to pass in a more
169 |             complicated choice table than can be generated within the template, for
170 |             example including sampling weights or interaction terms.
171 | 
172 |         """
173 |         self.submodels = {}
174 |         submodel = LargeMultinomialLogitStep.from_dict(self.defaults.to_dict())
175 | 
176 |         col = self.get_segmentation_column(mct=mct)
177 | 
178 |         if (len(col) == 0):
179 |             print("Warning: No valid observations after applying the chooser and "+
180 |                   "alternative filters")
181 |             return
182 | 
183 |         cats = col.astype('category').cat.categories.values
184 | 
185 |         print("Building submodels for {} categories: {}".format(len(cats), cats))
186 | 
187 |         for cat in cats:
188 |             m = copy.deepcopy(submodel)
189 |             seg_filter = "{} == '{}'".format(self.segmentation_column, cat)
190 | 
191 |             if isinstance(m.chooser_filters, list):
192 |                 m.chooser_filters += [seg_filter]
193 | 
194 |             elif isinstance(m.chooser_filters, str):
195 |                 m.chooser_filters = [m.chooser_filters, seg_filter]
196 | 
197 |             else:
198 |                 m.chooser_filters = seg_filter
199 | 
200 |             # TO DO - same for out_chooser_filters, once we handle simulation
201 |             self.submodels[cat] = m
202 | 
203 | 
204 |     def update_submodels(self, param, value):
205 |         """
206 |         Updates a property across all the submodels. This method is bound to the
207 |         `defaults` object and runs automatically when one of its properties is changed.
208 | 
209 |         Note that the `chooser_filters` and `alt_filters` properties cannot currently be
210 |         updated this way, because they can affect the model segmentation. If you are
211 |         confident the changes are valid, you can edit the submodels directly. Otherwise,
212 |         you can regenerate them using updated defaults by running `build_submodels()`.
213 | 
214 |         Parameters
215 |         ----------
216 |         param : str
217 |             Property name.
218 |         value : anything
219 | 
220 |         """
221 |         if (param in ['chooser_filters', 'alt_filters']) & (len(self.submodels) > 0):
222 |             print("Warning: Changing '{}' can affect the model segmentation. Changes " +
223 |                   "have been saved to 'defaults' but not to the submodels. To " +
224 |                   "regenerate them using the new defaults, run 'build_submodels()'."\
225 |                   .format(param))
226 |             return
227 | 
228 |         for k, m in self.submodels.items():
229 |             setattr(m, param, value)
230 | 
231 | 
232 |     def fit_all(self, mct=None):
233 |         """
234 |         Fit all the submodels. Build the submodels first, if they don't exist yet. This
235 |         method can be run as many times as desired.
236 | 
237 |         Parameters
238 |         ----------
239 |         mct : choicemodels.tools.MergedChoiceTable
240 |             This parameter is a temporary backdoor allowing us to pass in a more
241 |             complicated choice table than can be generated within the template, for
242 |             example including sampling weights or interaction terms.
243 | 
244 | 
245 |         """
246 |         if (len(self.submodels) == 0):
247 |             self.build_submodels(mct=mct)
248 | 
249 |         for k, m in self.submodels.items():
250 |             print(' SEGMENT: {0} = {1} '.format(
251 |                 self.segmentation_column, str(k)).center(70, '#'))
252 |             m.fit(mct=mct)
253 | 
254 |         self.name = update_name(self.template, self.name)
255 | 
256 | 
257 |     def run(self):
258 |         """
259 |         Convenience method (requied by template spec) that invokes `run_all()`.
260 | 
261 |         """
262 |         self.run_all()
263 | 
264 | 
265 |     def run_all(self, interaction_terms=None):
266 |         """
267 |         Run all the submodels.
268 | 
269 |         Parameters
270 |         ----------
271 |         interaction_terms : pandas.Series, pandas.DataFrame, or list of either, optional
272 |             Additional column(s) of interaction terms whose values depend on the
273 |             combination of observation and alternative, to be merged onto the final data
274 |             table. If passed as a Series or DataFrame, it should include a two-level
275 |             MultiIndex. One level's name and values should match an index or column from
276 |             the observations table, and the other should match an index or column from the
277 |             alternatives table.
278 | 
279 |         """
280 |         for k, m in self.submodels.items():
281 |             print(' SEGMENT: {0} = {1} '.format(
282 |                 self.segmentation_column, str(k)).center(70, '#'))
283 |             m.run(interaction_terms=interaction_terms)
284 | 


--------------------------------------------------------------------------------
/urbansim_templates/models/shared.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | from collections import OrderedDict
  6 | from datetime import datetime as dt
  7 | 
  8 | import orca
  9 | from urbansim.models import util
 10 | 
 11 | from ..__init__ import __version__
 12 | 
 13 | 
 14 | class TemplateStep(object):
 15 |     """
 16 |     Shared functionality for the template classes.
 17 |     
 18 |     Parameters
 19 |     ----------
 20 |     tables : str or list of str, optional
 21 |         Required to fit a model, but doesn't have to be provided at initialization.
 22 |     model_expression : str, optional
 23 |         Required to fit a model, but doesn't have to be provided at initialization.
 24 |     filters : str or list of str ?, optional
 25 |         Replaces `fit_filters` argument.
 26 |     out_tables : str or list of str, optional
 27 |     out_column : str, optional
 28 |         Replaces `out_fname` argument.
 29 |     out_transform : callable, optional
 30 |         Replaces `ytransform` argument.
 31 |     out_filters : str or list of str ?, optional
 32 |         Replaces `predict_filters` argument.
 33 |     name : str, optional
 34 |         For ModelManager.
 35 |     tags : list of str, optional
 36 |         For ModelManager.
 37 | 
 38 |     """
 39 |     def __init__(self, tables=None, model_expression=None, filters=None, out_tables=None,
 40 |             out_column=None, out_transform=None, out_filters=None, name=None, tags=[]):
 41 |         
 42 |         self.tables = tables
 43 |         self.model_expression = model_expression
 44 |         self.filters = filters
 45 |         
 46 |         # TO DO - out_transform might not belong here - is it only used for OLS?
 47 |         
 48 |         self.out_tables = out_tables
 49 |         self.out_column = out_column
 50 |         self.out_transform = out_transform
 51 |         self.out_filters = out_filters
 52 |         
 53 |         self.name = name
 54 |         self.tags = tags
 55 |         
 56 |         self.template = type(self).__name__  # class name
 57 |         self.template_version = __version__
 58 |                 
 59 | 
 60 |     @classmethod
 61 |     def from_dict(cls, d):
 62 |         """
 63 |         Create an object instance from a saved dictionary representation. 
 64 |         
 65 |         Child classes will need to override this method to implement loading of custom
 66 |         parameters and estimation results. 
 67 |         
 68 |         Parameters
 69 |         ----------
 70 |         d : dict
 71 |         
 72 |         Returns
 73 |         -------
 74 |         TemplateStep
 75 |         
 76 |         """
 77 |         # Pass values from the dictionary to the __init__() method
 78 |         return cls(d['tables'], d['model_expression'], d['filters'], d['out_tables'],
 79 |                 d['out_column'], d['out_transform'], d['out_filters'], d['name'],
 80 |                 d['tags'])
 81 |     
 82 |     
 83 |     def to_dict(self):
 84 |         """
 85 |         Create a dictionary representation of the object.
 86 |         
 87 |         Child classes will need to override this method to implement saving of custom
 88 |         parameters and estimation results.
 89 |         
 90 |         Returns
 91 |         -------
 92 |         dict
 93 |         
 94 |         """
 95 |         d = {
 96 |             'template': self.template,
 97 |             'template_version': self.template_version,
 98 |             'name': self.name,
 99 |             'tags': self.tags,
100 |             'tables': self.tables,
101 |             'model_expression': self.model_expression,
102 |             'filters': self.filters,
103 |             'out_tables': self.out_tables,
104 |             'out_column': self.out_column,
105 |             'out_transform': self.out_transform,
106 |             'out_filters': self.out_filters
107 |         }
108 |         return d
109 |     
110 |     
111 |     def _normalize_table_param(self, tables):
112 |         """
113 |         Normalize table parameter input. TO DO - add more type validation
114 |         
115 |         """
116 |         if isinstance(tables, list):
117 |             # Normalize [] to None
118 |             if len(tables) == 0:
119 |                 return None
120 |         
121 |             # Normalize [str] to str
122 |             if len(tables) == 1:
123 |                 return tables[0]
124 |                 
125 |         return tables
126 |     
127 |     
128 |     @property
129 |     def tables(self):
130 |         return self.__tables
131 |         
132 |     @tables.setter
133 |     def tables(self, tables):
134 |         self.__tables = self._normalize_table_param(tables)
135 |             
136 |     @property
137 |     def out_tables(self):
138 |         return self.__out_tables
139 |         
140 |     @out_tables.setter
141 |     def out_tables(self, out_tables):
142 |         self.__out_tables = self._normalize_table_param(out_tables)
143 | 
144 | 
145 |     def _get_out_column(self):
146 |         """
147 |         Return name of the column to save data to. This is 'out_column' if it exsits,
148 |         otherwise the left-hand-side column name from the model expression.
149 |         
150 |         Returns
151 |         -------
152 |         str
153 |         
154 |         """
155 |         if self.out_column is not None:
156 |             return self.out_column
157 |         
158 |         else:
159 |             # TO DO - there must be a cleaner way to get LHS column name
160 |             return self.model_expression.split('~')[0].split(' ')[0]
161 |     
162 |     
163 |     def _get_out_table(self):
164 |         """
165 |         Return name of the table to save data to. This is 'out_tables' or its first 
166 |         element, if it exists, otherwise 'tables' or its first element.
167 |         
168 |         Returns
169 |         -------
170 |         str
171 |         
172 |         """
173 |         if self.out_tables is not None:
174 |             tables = self.out_tables
175 |         else:
176 |             tables = self.tables
177 |             
178 |         if isinstance(tables, str):
179 |             return tables
180 |         else:
181 |             return tables[0]
182 |         
183 |     
184 |     def _generate_name(self):
185 |         """
186 |         THIS METHOD IS DEPRECATED, AND SHOULD BE REPLACED BY UTILS.UPDATE_NAME().
187 |         
188 |         Generate a name for the class instance, based on its type and the current 
189 |         timestamp. But if a custom name has already been provided, return that instead. 
190 |         
191 |         (We can't tell with certainty whether an existing name was auto-generated or
192 |         customized, and it doesn't seem worth keeping track. A name is judged to be custom
193 |         if it does not contain the class type.)
194 |                 
195 |         Returns
196 |         -------
197 |         str
198 |         
199 |         """
200 |         if (self.name is None) or (self.template in self.name):
201 |             return self.template + '-' + dt.now().strftime('%Y%m%d-%H%M%S')
202 |         else:
203 |             return self.name
204 | 
205 | 


--------------------------------------------------------------------------------
/urbansim_templates/models/small_multinomial_logit.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | from collections import OrderedDict
  4 | import os
  5 | import pickle
  6 | 
  7 | import numpy as np
  8 | import pandas as pd
  9 | 
 10 | from choicemodels import MultinomialLogit
 11 | import orca
 12 | 
 13 | from urbansim_templates import modelmanager
 14 | from urbansim_templates.models import TemplateStep
 15 | from urbansim_templates.utils import get_data, update_column
 16 | 
 17 | 
 18 | @modelmanager.template
 19 | class SmallMultinomialLogitStep(TemplateStep):
 20 |     """
 21 |     A class for building multinomial logit model steps where the number of alternatives is
 22 |     "small". Estimation is handled by PyLogit via the ChoiceModels API. Simulation is 
 23 |     handled by PyLogit (probabilities) and ChoiceModels (simulation draws). 
 24 |     
 25 |     Multinomial logit models can involve a range of different specification and estimation
 26 |     mechanics. For now these are separated into two templates. What's the difference?
 27 |     
 28 |     "Small" MNL:
 29 |     - data is in a single table (choosers)
 30 |     - each alternative can have a different model expression
 31 |     - all the alternatives are available to all choosers
 32 |     - estimation and simulation use the PyLogit engine (via ChoiceModels)
 33 |     
 34 |     "Large" MNL:
 35 |     - data is in two tables (choosers and alternatives)
 36 |     - each alternative has the same model expression
 37 |     - N alternatives are sampled for each chooser
 38 |     - estimation and simulation use the ChoiceModels engine (formerly UrbanSim MNL)
 39 |     
 40 |     TO DO:
 41 |     - Add support for specifying availability of alternatives
 42 |     - Add support for sampling weights
 43 |     - Add support for on-the-fly interaction calculations (e.g. distance)
 44 |     
 45 |     Parameters
 46 |     ----------
 47 |     tables : str or list of str, optional
 48 |         Name(s) of Orca tables to draw data from. The first table is the primary one. 
 49 |         Any additional tables need to have merge relationships ("broadcasts") specified
 50 |         so that they can be merged unambiguously onto the first table. Among them, the 
 51 |         tables must contain all variables used in the model expression and filters. The
 52 |         index of the primary table should be a unique ID. The `tables` parameter is 
 53 |         required for fitting a model, but it does not have to be provided when the object 
 54 |         is created. Reserved column names: '_obs_id', '_alt_id', '_chosen'.
 55 | 
 56 |     model_expression : OrderedDict, optional
 57 |         PyLogit model expression. This parameter is required for fitting a model, but it 
 58 |         does not have to be provided when the object is created.
 59 |     
 60 |     model_labels : OrderedDict, optional
 61 |         PyLogit model labels.
 62 |         
 63 |     choice_column : str, optional
 64 |         Name of the column indicating observed choices, for model estimation. The column
 65 |         should contain integers matching the alternatives in the model expression. This
 66 |         parameter is required for fitting a model, but it does not have to be provided
 67 |         when the object is created.
 68 |         
 69 |     initial_coefs : list of numerics, optional
 70 |         Starting values for the parameter estimation algorithm, passed to PyLogit. Length 
 71 |         must be equal to the number of parameters being estimated. If this is not 
 72 |         provided, zeros will be used.
 73 |         
 74 |     filters : str or list of str, optional
 75 |         Filters to apply to the data before fitting the model. These are passed to 
 76 |         `pd.DataFrame.query()`. Filters are applied after any additional tables are merged 
 77 |         onto the primary one. Replaces the `fit_filters` argument in UrbanSim.
 78 | 
 79 |     out_tables : str or list of str, optional
 80 |         Name(s) of Orca tables to use for simulation. If not provided, the `tables` 
 81 |         parameter will be used. Same guidance applies: the tables must be able to be 
 82 |         merged unambiguously, and must include all columns used in the model expression 
 83 |         and in the `out_filters`.
 84 | 
 85 |     out_column : str, optional
 86 |         Name of the column to write simulated choices to. If it does not already exist
 87 |         in the primary output table, it will be created. If not provided, the 
 88 |         `choice_column` will be used. Replaces the `out_fname` argument in UrbanSim.
 89 |         
 90 |     out_filters : str or list of str, optional
 91 |         Filters to apply to the data before simulation. If not provided, no filters will
 92 |         be applied. Replaces the `predict_filters` argument in UrbanSim.
 93 |         
 94 |     name : str, optional
 95 |         Name of the model step, passed to ModelManager. If none is provided, a name is
 96 |         generated each time the `fit()` method runs.
 97 |     
 98 |     tags : list of str, optional
 99 |         Tags, passed to ModelManager.
100 |     
101 |     """
102 |     def __init__(self, tables=None, model_expression=None, model_labels=None,
103 |             choice_column=None, initial_coefs=None, filters=None, out_tables=None,
104 |             out_column=None, out_filters=None, name=None, tags=[]):
105 |         
106 |         # Parent class can initialize the standard parameters
107 |         TemplateStep.__init__(self, tables=tables, model_expression=model_expression, 
108 |                 filters=filters, out_tables=out_tables, out_column=out_column, 
109 |                 out_transform=None, out_filters=out_filters, name=name, tags=tags)
110 |         
111 |         # Custom parameters not in parent class
112 |         self.model_labels = model_labels
113 |         self.choice_column = choice_column
114 |         self.initial_coefs = initial_coefs
115 |         
116 |         # Placeholders for model fit data, filled in by fit() or from_dict()
117 |         self.summary_table = None 
118 |         self.model = None
119 | 
120 |     
121 |     @classmethod
122 |     def from_dict(cls, d):
123 |         """
124 |         Create an object instance from a saved dictionary representation.
125 |         
126 |         Parameters
127 |         ----------
128 |         d : dict
129 |         
130 |         Returns
131 |         -------
132 |         SmallMultinomialLogitStep
133 |         
134 |         """
135 |         # Pass values from the dictionary to the __init__() method
136 |         obj = cls(tables=d['tables'], model_expression=None, model_labels=None, 
137 |                 choice_column=d['choice_column'], initial_coefs=d['initial_coefs'], 
138 |                 filters=d['filters'], out_tables=d['out_tables'], 
139 |                 out_column=d['out_column'], out_filters=d['out_filters'], name=d['name'], 
140 |                 tags=d['tags'])
141 |                 
142 |         # Load non-strings and model fit parameters
143 |         # TO DO - handle non-existence cases more carefully than 'except pass'!
144 |         try:
145 |             k = d['model_expression_keys']
146 |             v = d['model_expression_values']
147 |             obj.model_expression = OrderedDict([(k[i], v[i]) for i in range(len(k))])
148 |         except:
149 |             pass
150 | 
151 |         try:
152 |             k = d['model_label_keys']
153 |             v = d['model_label_values']
154 |             obj.model_labels = OrderedDict([(k[i], v[i]) for i in range(len(k))])
155 |         except:
156 |             pass
157 |                 
158 |         obj.summary_table = d['summary_table']
159 |         
160 |         if 'supplemental_objects' in d:
161 |             for item in filter(None, d['supplemental_objects']):
162 |                 if (item['name'] == 'model-object'):
163 |                     obj.model = item['content']
164 |         
165 |         return obj
166 | 
167 | 
168 |     def to_dict(self):
169 |         """
170 |         Create a dictionary representation of the object.
171 |         
172 |         Returns
173 |         -------
174 |         dict
175 |         
176 |         """
177 |         tmp_model_expression = self.model_expression
178 |         self.model_expression = None
179 |         
180 |         d = TemplateStep.to_dict(self)
181 |         self.model_expression = tmp_model_expression
182 |         
183 |         # Can't store OrderedDicts in YAML, so convert them
184 |         if tmp_model_expression is not None:
185 |             d.update({
186 |                 'model_expression_keys': [k for (k,v) in tmp_model_expression.items()],
187 |                 'model_expression_values': [v for (k,v) in tmp_model_expression.items()],
188 |             })
189 |             
190 |         if self.model_labels is not None:
191 |             d.update({
192 |                 'model_label_keys': [k for (k,v) in self.model_labels.items()],
193 |                 'model_label_values': [v for (k,v) in self.model_labels.items()]
194 |             })
195 |         
196 |         # Add parameters not in parent class
197 |         d.update({
198 |             'model_labels': None,
199 |             'choice_column': self.choice_column,
200 |             'initial_coefs': self.initial_coefs,
201 |             'summary_table': self.summary_table
202 |         })
203 |         
204 |         # Add supplemental objects
205 |         objects = []
206 |         if self.model is not None:
207 |             objects.append({'name': 'model-object',
208 |                             'content': self.model,
209 |                             'content_type': 'pickle',
210 |                             'required': True})
211 | 
212 |         d.update({'supplemental_objects': objects})
213 |         
214 |         return d
215 |     
216 |     
217 |     def _get_alts(self):
218 |         """
219 |         Get a unique, sorted list of alternative id's included in the model expression.
220 |         
221 |         Returns
222 |         -------
223 |         list
224 |         
225 |         """
226 |         ids = []
227 |         for k, v in self.model_expression.items():
228 |             # TO DO - check if PyLogit supports v being a non-list (single numeric) 
229 |             for elem in v:
230 |                 if isinstance(elem, list):
231 |                     ids += elem
232 |                 else:
233 |                     ids += [elem]
234 |         
235 |         return np.unique(ids)
236 |     
237 |     
238 |     def _get_param_count(self):
239 |         """
240 |         Count the number of parameters implied by the model expression.
241 |         
242 |         Returns
243 |         -------
244 |         int
245 |         
246 |         """
247 |         count = 0
248 |         for k, v in self.model_expression.items():
249 |             # TO DO - check if PyLogit supports v being a non-list (single numeric) 
250 |             for elem in v:
251 |                 count += 1
252 |         
253 |         return count
254 |     
255 |     
256 |     def _to_long(self, df, task='fit'):
257 |         """
258 |         Convert a data table from wide format to long format. Currently handles the case
259 |         where there are attributes of choosers but not of alternatives, and no 
260 |         availability or interaction terms. (This is not supported in the PyLogit 
261 |         conversion utility.)
262 |                 
263 |         TO DO 
264 |         - extend to handle characteristics of alternatives?
265 |         - move to ChoiceModels
266 |         
267 |         Parameters
268 |         ----------
269 |         df : pd.DataFrame
270 |             One row per observation. The observation id should be in the index. Reserved 
271 |             column names: '_obs_id', '_alt_id', '_chosen'.
272 |         
273 |         task : 'fit' or 'predict', optional
274 |             If 'fit' (default), a column named '_chosen' is generated with binary 
275 |             indicator of observed choices.
276 |             
277 |         Returns
278 |         -------
279 |         pd.DataFrame
280 |             One row per combination of observation and alternative. The observation is in 
281 |             '_obs_id'. The alternative is in 'alt_id'. Table is sorted by observation and 
282 |             alternative. If task is 'fit', a column named '_chosen' is generated with 
283 |             binary indicator of observed choices. Remaining columns are retained from the 
284 |             input data.
285 |         
286 |         """
287 |         # Get lists of obs and alts
288 |         obs = df.index.sort_values().unique().tolist()
289 |         alts = self._get_alts()
290 |         
291 |         # Long df is cartesian product of alts and obs
292 |         obs_prod, alts_prod = pd.core.reshape.util.cartesian_product([obs, alts])
293 | 
294 |         long_df = pd.DataFrame({'_obs_id': obs_prod, '_alt_id': alts_prod})
295 |         long_df = long_df.merge(df, left_on='_obs_id', right_index=True)
296 |         
297 |         if (task == 'fit'):
298 |             # Add binary indicator of chosen rows
299 |             long_df['_chosen'] = 0
300 |             long_df.loc[long_df._alt_id == long_df[self.choice_column], '_chosen'] = 1
301 | 
302 |         return long_df    
303 |     
304 |     
305 |     def fit(self):
306 |         """
307 |         Fit the model; save and report results. This uses PyLogit via ChoiceModels.
308 |         
309 |         The `fit()` method can be run as many times as desired. Results will not be saved 
310 |         with Orca or ModelManager until the `register()` method is run. 
311 |         
312 |         """
313 |         expr_cols = [t[0] for t in list(self.model_expression.items()) \
314 |                      if t[0] != 'intercept']
315 |         
316 |         df = get_data(tables = self.tables,
317 |                       filters = self.filters,
318 |                       extra_columns = expr_cols + [self.choice_column])
319 | 
320 |         long_df = self._to_long(df)
321 |         
322 |         # Set initial coefs to 0 if none provided
323 |         pc = self._get_param_count()
324 |         if (self.initial_coefs is None) or (len(self.initial_coefs) != pc):
325 |             self.initial_coefs = np.zeros(pc).tolist()
326 | 
327 |         model = MultinomialLogit(data=long_df, 
328 |                                  observation_id_col='_obs_id',
329 |                                  choice_col='_chosen',
330 |                                  model_expression=self.model_expression,
331 |                                  model_labels=self.model_labels,
332 |                                  alternative_id_col='_alt_id',
333 |                                  initial_coefs=self.initial_coefs)
334 |         
335 |         results = model.fit()
336 | 
337 |         self.name = self._generate_name()        
338 |         self.summary_table = str(results.report_fit())
339 |         print(self.summary_table)
340 | 
341 |         # We need the PyLogit fitted model object for prediction, so save it directly
342 |         self.model = results.get_raw_results()
343 | 
344 | 
345 |     def run(self):
346 |         """
347 |         Run the model step: calculate simulated choices and use them to update a column.
348 |         
349 |         Alternatives that appear in the estimation data but not in the model expression
350 |         will not be available for simulation.
351 |         
352 |         Predicted probabilities come from PyLogit. Monte Carlo simulation of choices is
353 |         performed directly. (This functionality will move to ChoiceModels.)
354 |         
355 |         The predicted probabilities and simulated choices are saved to the class object 
356 |         for interactive use (`probabilities` with type pd.DataFrame, and `choices` with 
357 |         type pd.Series) but are not persisted in the dictionary representation of the 
358 |         model step.
359 |         
360 |         """
361 |         expr_cols = [t[0] for t in list(self.model_expression.items()) \
362 |                      if t[0] != 'intercept']
363 |         
364 |         df = get_data(tables = self.out_tables,
365 |                       fallback_tables = self.tables,
366 |                       filters = self.out_filters,
367 |                       extra_columns = expr_cols)
368 | 
369 |         long_df = self._to_long(df, 'predict')
370 |         
371 |         num_obs = len(df)
372 |         num_alts = len(self._get_alts())
373 |         
374 |         # Get predictions from underlying model - this is an ndarray with the same length
375 |         # as the long-format df, representing choice probability for each alternative
376 |         probs = self.model.predict(long_df)
377 |         
378 |         # Generate choices by adapting an approach from UrbanSim MNL
379 |         # https://github.com/UDST/choicemodels/blob/master/choicemodels/mnl.py#L578-L583
380 |         cumprobs = probs.reshape((num_obs, num_alts)).cumsum(axis=1)
381 |         rands = np.random.random(num_obs)
382 |         diff = np.subtract(cumprobs.transpose(), rands).transpose()
383 |         
384 |         # The diff conversion replaces negative values with 0 and positive values with 1,
385 |         # so that argmax can return the position of the first positive value
386 |         choice_ix = np.argmax((diff + 1.0).astype('i4'), axis=1)
387 |         choice_ix_1d = choice_ix + (np.arange(num_obs) * num_alts)
388 |         
389 |         choices = long_df._alt_id.values.take(choice_ix_1d)
390 | 
391 |         # Save results to the class object (via df to include indexes)
392 |         long_df['_probability'] = probs
393 |         self.probabilities = long_df[['_obs_id', '_alt_id', '_probability']]
394 |         df['_choices'] = choices
395 |         self.choices = df._choices
396 | 
397 |         # Save to Orca
398 |         update_column(table=self.out_tables,
399 |                       fallback_table=self.tables,
400 |                       column=self.out_column,
401 |                       fallback_column=self.choice_column,
402 |                       data=self.choices)
403 | 


--------------------------------------------------------------------------------
/urbansim_templates/shared/__init__.py:
--------------------------------------------------------------------------------
1 | from .core import CoreTemplateSettings
2 | from .output_column import OutputColumnSettings, register_column
3 | 


--------------------------------------------------------------------------------
/urbansim_templates/shared/core.py:
--------------------------------------------------------------------------------
 1 | from urbansim_templates import __version__
 2 | 
 3 | 
 4 | class CoreTemplateSettings():
 5 |     """
 6 |     Stores standard parameters and logic used by all templates. Parameters can be passed 
 7 |     to the constructor or set as attributes.
 8 |     
 9 |     Parameters
10 |     ----------
11 |     name : str, optional
12 |         Name of the configured template instance.
13 |     
14 |     tags : list of str, optional
15 |         Tags associated with the configured template instance.
16 |     
17 |     notes : str, optional
18 |         Notes associates with the configured template instance.
19 |     
20 |     autorun : bool, optional
21 |         Whether to run the configured template instance automatically when it's 
22 |         registered or loaded by ModelManager. The overall default is False, but the 
23 |         default can be overriden at the template level.
24 |     
25 |     template : str
26 |         Name of the template class associated with a configured instance.
27 |     
28 |     template_version : str
29 |         Version of the template class package.
30 |     
31 |     """
32 |     def __init__(self,
33 |             name = None,
34 |             tags = [],
35 |             notes = None,
36 |             autorun = False,
37 |             template = None,
38 |             template_version = None):
39 |         
40 |         self.name = name
41 |         self.tags = tags
42 |         self.notes = notes
43 |         self.autorun = autorun
44 |         self.template = template
45 |         self.template_version = template_version
46 |         
47 |         # automatic attributes
48 |         self.modelmanager_version = __version__
49 |     
50 |     
51 |     @classmethod
52 |     def from_dict(cls, d):
53 |         """
54 |         Create a class instance from a saved dictionary representation.
55 |         
56 |         Parameters
57 |         ----------
58 |         d : dict
59 |         
60 |         Returns
61 |         -------
62 |         obj : CoreTemplateSettings
63 |         
64 |         """
65 |         obj = cls(
66 |             name = d['name'],
67 |             tags = d['tags'],
68 |             notes = d['notes'],
69 |             autorun = d['autorun'],
70 |             template = d['template'],
71 |             template_version = d['template_version'],
72 |         )
73 |         return obj
74 |     
75 |     
76 |     def to_dict(self):
77 |         """
78 |         Create a dictionary representation of the object.
79 |         
80 |         Returns
81 |         -------
82 |         d : dict
83 |         
84 |         """
85 |         d = {
86 |             'name': self.name,
87 |             'tags': self.tags,
88 |             'notes': self.notes,
89 |             'autorun': self.autorun,
90 |             'template': self.template,
91 |             'template_version': self.template_version,
92 |             'modelmanager_version': self.modelmanager_version,
93 |         }
94 |         return d
95 | 
96 | 


--------------------------------------------------------------------------------
/urbansim_templates/shared/output_column.py:
--------------------------------------------------------------------------------
  1 | import orca
  2 | 
  3 | from urbansim_templates import __version__
  4 | 
  5 | 
  6 | class OutputColumnSettings():
  7 |     """
  8 |     Stores standard parameters used by templates that generate or modify columns. 
  9 |     Parameters can be passed to the constructor or set as attributes.
 10 |     
 11 |     Parameters
 12 |     ----------
 13 |     column_name : str, optional
 14 |         Name of the Orca column to be created or modified. Generally required before
 15 |         running a configured template.
 16 |     
 17 |     table : str, optional
 18 |         Name of Orca table the column will be associated with. Generally required before
 19 |         running the configured template.
 20 |     
 21 |     data_type : str, optional
 22 |         Python type or ``numpy.dtype`` to case the column's values to.
 23 |     
 24 |     missing_values : str or numeric, optional
 25 |         Value to use for rows that would otherwise be missing.
 26 |     
 27 |     cache : bool, default False
 28 |         Whether to cache column values after they are calculated
 29 |     
 30 |     cache_scope : 'step', 'iteration', or 'forever', default 'forever'
 31 |         How long to cache column values for (ignored if ``cache`` is False).
 32 |     
 33 |     """
 34 |     # TO DO: say something about Orca defaults and about core vs. computed columns.
 35 | 
 36 |     def __init__(self,
 37 |             column_name = None,
 38 |             table = None,
 39 |             data_type = None,
 40 |             missing_values = None,
 41 |             cache = False,
 42 |             cache_scope = 'forever'):
 43 |         
 44 |         self.column_name = column_name
 45 |         self.table = table
 46 |         self.data_type = data_type
 47 |         self.missing_values = missing_values
 48 |         self.cache = cache
 49 |         self.cache_scope = cache_scope
 50 |         
 51 |         # automatic attributes
 52 |         self.modelmanager_version = __version__
 53 |     
 54 |     
 55 |     @classmethod
 56 |     def from_dict(cls, d):
 57 |         """
 58 |         Create a class instance from a saved dictionary representation.
 59 |         
 60 |         Parameters
 61 |         ----------
 62 |         d : dict
 63 |         
 64 |         Returns
 65 |         -------
 66 |         obj : OutputColumnSettings
 67 |         
 68 |         """
 69 |         return cls(
 70 |             column_name = d['column_name'],
 71 |             table = d['table'],
 72 |             data_type = d['data_type'],
 73 |             missing_values = d['missing_values'],
 74 |             cache = d['cache'],
 75 |             cache_scope = d['cache_scope'])
 76 |     
 77 |     
 78 |     def to_dict(self):
 79 |         """
 80 |         Create a dictionary representation of the object.
 81 |         
 82 |         Returns
 83 |         -------
 84 |         d : dict
 85 |         
 86 |         """
 87 |         return {
 88 |             'column_name': self.column_name,
 89 |             'table': self.table,
 90 |             'data_type': self.data_type,
 91 |             'missing_values': self.missing_values,
 92 |             'cache': self.cache,
 93 |             'cache_scope': self.cache_scope,
 94 |             'modelmanager_version': self.modelmanager_version}
 95 | 
 96 | 
 97 | ######################################
 98 | ######################################
 99 | 
100 | 
101 | def register_column(build_column, settings):
102 |     """
103 |     Register a callable as an Orca column.
104 |     
105 |     Parameters
106 |     ----------
107 |     build_column : callable
108 |         Callable should return a ``pd.Series``. 
109 |     
110 |     settings : ColumnOutputSettings
111 |     
112 |     """
113 |     @orca.column(table_name = settings.table, 
114 |                  column_name = settings.column_name, 
115 |                  cache = settings.cache, 
116 |                  cache_scope = settings.cache_scope)
117 | 
118 |     def orca_column():
119 |         series = build_column()
120 |         
121 |         if settings.missing_values is not None:
122 |             series = series.fillna(settings.missing_values)
123 |         
124 |         if settings.data_type is not None:
125 |             series = series.astype(settings.data_type)
126 |         
127 |         return series
128 |     
129 | 


--------------------------------------------------------------------------------