├── .gitignore ├── .pytest_cache └── v │ └── cache │ ├── lastfailed │ └── nodeids ├── .travis.yml ├── CONTRIBUTING.md ├── LICENSE.txt ├── MANIFEST.in ├── README.rst ├── demos ├── .ipynb_checkpoints │ ├── census_api-checkpoint.ipynb │ ├── non_census_synthesis-checkpoint.ipynb │ ├── simple_synthesis-checkpoint.ipynb │ └── synthesize-checkpoint.ipynb ├── census_api.ipynb ├── input_data │ ├── Puma_Tract_Crosswalk_500.csv │ ├── hh_marginals.csv │ ├── hhld_marginals_500.csv │ ├── hhld_pums_500.csv │ ├── household_sample.csv │ ├── person_marginals.csv │ ├── person_sample.csv │ ├── pop_marginals_500.csv │ └── pop_pums_500.csv ├── non_census_synthesis.ipynb ├── simple_synthesis.ipynb ├── synthesize.ipynb └── synthesize.py ├── requirements-dev.txt ├── scripts ├── dl_and_slice_pums.py ├── synth_example.py └── tract_to_puma00_xref.py ├── setup.cfg ├── setup.py └── synthpop ├── __init__.py ├── categorizer.py ├── census_helpers.py ├── config.py ├── draw.py ├── ipf ├── __init__.py ├── ipf.py └── test │ ├── __init__.py │ └── test_ipf.py ├── ipu ├── __init__.py ├── ipu.py └── test │ ├── __init__.py │ └── test_ipu.py ├── recipes ├── __init__.py ├── starter.py ├── starter2.py └── tests │ ├── __init__.py │ └── test_starter.py ├── synthesizer.py ├── test ├── __init__.py ├── test_categorizer.py ├── test_censushelpers.py ├── test_data │ ├── hh_marginals.csv │ ├── household_sample.csv │ ├── person_marginals.csv │ └── person_sample.csv ├── test_draw.py └── test_zone_synthesizer.py └── zone_synthesizer.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | bin/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # Installer logs 26 | pip-log.txt 27 | pip-delete-this-directory.txt 28 | 29 | # Unit test / coverage reports 30 | htmlcov/ 31 | .tox/ 32 | .coverage 33 | .cache 34 | nosetests.xml 35 | coverage.xml 36 | 37 | # Translations 38 | *.mo 39 | 40 | # Mr Developer 41 | .mr.developer.cfg 42 | .project 43 | .pydevproject 44 | 45 | # Rope 46 | .ropeproject 47 | 48 | # Django stuff: 49 | *.log 50 | *.pot 51 | 52 | # Sphinx documentation 53 | docs/_build/ 54 | -------------------------------------------------------------------------------- /.pytest_cache/v/cache/lastfailed: -------------------------------------------------------------------------------- 1 | {} -------------------------------------------------------------------------------- /.pytest_cache/v/cache/nodeids: -------------------------------------------------------------------------------- 1 | [ 2 | "synthpop/recipes/tests/test_starter.py::test_starter", 3 | "synthpop/recipes/tests/test_starter2.py::test_starter2" 4 | ] -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - '3.5' 5 | - '3.6' 6 | - '3.7' 7 | - '3.8' 8 | 9 | install: 10 | - pip install . 11 | - pip install -r requirements-dev.txt 12 | - pip list 13 | - pip show synthpop 14 | 15 | script: 16 | - pycodestyle synthpop 17 | - py.test --cov synthpop --cov-report term-missing 18 | 19 | after_success: 20 | - coveralls 21 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Contributing to ActivitySim 2 | =========================== 3 | 4 | Style 5 | ----- 6 | 7 | - Python code should follow the [PEP 8 Style Guide][pep8]. 8 | - Python docstrings should follow the [NumPy documentation format][numpydoc]. 9 | 10 | ### Imports 11 | 12 | Imports should be one per line. 13 | Imports should be grouped into standard library, third-party, 14 | and intra-library imports. `from` import should follow "regular" `imports`. 15 | Within each group the imports should be alphabetized. 16 | Here's an example: 17 | 18 | ```python 19 | import sys 20 | from glob import glob 21 | 22 | import numpy as np 23 | 24 | import urbansim.urbansim.modelcompile as modelcompile 25 | from urbansim.util import misc 26 | ``` 27 | 28 | Imports of scientific Python libraries should follow these conventions: 29 | 30 | ```python 31 | import matplotlib.pyplot as plt 32 | import numpy as np 33 | import pandas as pd 34 | import scipy as sp 35 | ``` 36 | 37 | 38 | Thanks! 39 | 40 | [pep8]: http://legacy.python.org/dev/peps/pep-0008/ 41 | [numpydoc]: https://github.com/numpy/numpy/blob/master/doc/HOWTO_DOCUMENT.rst.txt 42 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, UrbanSim Inc. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this 7 | list of conditions and the following disclaimer. 8 | 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | 3. Neither the name of the copyright holder nor the names of its contributors 14 | may be used to endorse or promote products derived from this software without 15 | specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 21 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 24 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 25 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | # files to include in the source distribution on pypi (setup and README are included automatically) 2 | 3 | include LICENSE.txt 4 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | SynthPop 2 | ======== 3 | 4 | .. image:: https://travis-ci.org/UDST/synthpop.svg?branch=master 5 | :alt: Build Status 6 | :target: https://travis-ci.org/UDST/synthpop 7 | 8 | .. image:: https://coveralls.io/repos/UDST/synthpop/badge.svg?branch=master 9 | :alt: Test Coverage 10 | :target: https://coveralls.io/r/UDST/synthpop?branch=master 11 | 12 | SynthPop is a reimplementation of `PopGen`_ using the modern scientific Python 13 | stack, with a focus on performance and code reusability. 14 | 15 | The SynthPop code is a completely new implementation of the algorithms 16 | described in this reference, and the paper as well as this repository should be 17 | cited if you use SynthPop: 18 | 19 | Ye, Xin, Karthik Konduri, Ram Pendyala, Bhargava Sana and Paul Waddell. A Methodology to Match Distributions of Both Households and Person Attributes in the Generation of Synthetic Populations. Transportation Research Board 88th Annual Meeting Compendium of Papers DVD. January 11-15, 2009 20 | 21 | The paper is available here: 22 | http://www.scag.ca.gov/Documents/PopulationSynthesizerPaper_TRB.pdf 23 | 24 | .. _PopGen: http://urbanmodel.asu.edu/popgen.html 25 | 26 | # Installation 27 | 28 | ``` 29 | virtualenv venv --python=python3.7 30 | source venv/bin/activate 31 | pip install -r requierements.txt 32 | cd synthpop/ 33 | python setup.py develop 34 | ``` 35 | To run `Synthpop` you need a Census API that you can get one from [here](https://api.census.gov/data/key_signup.html). After you get and validate the API key you can add it as an enviromental variable to your environment as by adding to `/venv/bin/activate` the following line: 36 | `export CENSUS='yourApiKey'` 37 | -------------------------------------------------------------------------------- /demos/.ipynb_checkpoints/simple_synthesis-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from synthpop.synthesizer import synthesize, enable_logging\n", 10 | "import synthpop.categorizer as cat\n", 11 | "\n", 12 | "import multiprocessing\n", 13 | "import os\n", 14 | "import seaborn as sns\n", 15 | "from functools import partial\n", 16 | "import pandas as pd\n", 17 | "\n", 18 | "import synthpop.zone_synthesizer as zs" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "hh_marginal_file = 'input_data/hh_marginals.csv'\n", 28 | "person_marginal_file = 'input_data/person_marginals.csv'\n", 29 | "hh_sample_file = 'input_data/household_sample.csv'\n", 30 | "person_sample_file = 'input_data/person_sample.csv'" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 3, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "hh_marg, p_marg, hh_sample, p_sample, xwalk = zs.load_data(hh_marginal_file, person_marginal_file, hh_sample_file, person_sample_file)" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 4, 45 | "metadata": {}, 46 | "outputs": [ 47 | { 48 | "name": "stderr", 49 | "output_type": "stream", 50 | "text": [ 51 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 52 | " adj = constraint / (column * weights).sum()\n" 53 | ] 54 | }, 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "Drawing 254 households\n" 60 | ] 61 | }, 62 | { 63 | "name": "stderr", 64 | "output_type": "stream", 65 | "text": [ 66 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 67 | " adj = constraint / (column * weights).sum()\n" 68 | ] 69 | }, 70 | { 71 | "name": "stdout", 72 | "output_type": "stream", 73 | "text": [ 74 | "Drawing 306 households\n" 75 | ] 76 | }, 77 | { 78 | "name": "stderr", 79 | "output_type": "stream", 80 | "text": [ 81 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 82 | " adj = constraint / (column * weights).sum()\n" 83 | ] 84 | }, 85 | { 86 | "name": "stdout", 87 | "output_type": "stream", 88 | "text": [ 89 | "Drawing 356 households\n" 90 | ] 91 | }, 92 | { 93 | "name": "stderr", 94 | "output_type": "stream", 95 | "text": [ 96 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 97 | " adj = constraint / (column * weights).sum()\n" 98 | ] 99 | }, 100 | { 101 | "name": "stdout", 102 | "output_type": "stream", 103 | "text": [ 104 | "Drawing 390 households\n" 105 | ] 106 | }, 107 | { 108 | "name": "stderr", 109 | "output_type": "stream", 110 | "text": [ 111 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 112 | " adj = constraint / (column * weights).sum()\n" 113 | ] 114 | }, 115 | { 116 | "name": "stdout", 117 | "output_type": "stream", 118 | "text": [ 119 | "Drawing 601 households\n" 120 | ] 121 | }, 122 | { 123 | "name": "stderr", 124 | "output_type": "stream", 125 | "text": [ 126 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 127 | " adj = constraint / (column * weights).sum()\n" 128 | ] 129 | }, 130 | { 131 | "name": "stdout", 132 | "output_type": "stream", 133 | "text": [ 134 | "Drawing 324 households\n" 135 | ] 136 | }, 137 | { 138 | "name": "stderr", 139 | "output_type": "stream", 140 | "text": [ 141 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 142 | " adj = constraint / (column * weights).sum()\n" 143 | ] 144 | }, 145 | { 146 | "name": "stdout", 147 | "output_type": "stream", 148 | "text": [ 149 | "Drawing 556 households\n" 150 | ] 151 | }, 152 | { 153 | "name": "stderr", 154 | "output_type": "stream", 155 | "text": [ 156 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 157 | " adj = constraint / (column * weights).sum()\n" 158 | ] 159 | }, 160 | { 161 | "name": "stdout", 162 | "output_type": "stream", 163 | "text": [ 164 | "Drawing 342 households\n" 165 | ] 166 | }, 167 | { 168 | "name": "stderr", 169 | "output_type": "stream", 170 | "text": [ 171 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 172 | " adj = constraint / (column * weights).sum()\n" 173 | ] 174 | }, 175 | { 176 | "name": "stdout", 177 | "output_type": "stream", 178 | "text": [ 179 | "Drawing 273 households\n" 180 | ] 181 | }, 182 | { 183 | "name": "stderr", 184 | "output_type": "stream", 185 | "text": [ 186 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 187 | " adj = constraint / (column * weights).sum()\n" 188 | ] 189 | }, 190 | { 191 | "name": "stdout", 192 | "output_type": "stream", 193 | "text": [ 194 | "Drawing 228 households\n", 195 | "Drawing 857 households\n", 196 | "Drawing 748 households\n" 197 | ] 198 | }, 199 | { 200 | "name": "stderr", 201 | "output_type": "stream", 202 | "text": [ 203 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 204 | " adj = constraint / (column * weights).sum()\n" 205 | ] 206 | }, 207 | { 208 | "name": "stdout", 209 | "output_type": "stream", 210 | "text": [ 211 | "Drawing 744 households\n" 212 | ] 213 | }, 214 | { 215 | "name": "stderr", 216 | "output_type": "stream", 217 | "text": [ 218 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 219 | " adj = constraint / (column * weights).sum()\n" 220 | ] 221 | }, 222 | { 223 | "name": "stdout", 224 | "output_type": "stream", 225 | "text": [ 226 | "Drawing 953 households\n" 227 | ] 228 | }, 229 | { 230 | "name": "stderr", 231 | "output_type": "stream", 232 | "text": [ 233 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 234 | " adj = constraint / (column * weights).sum()\n" 235 | ] 236 | }, 237 | { 238 | "name": "stdout", 239 | "output_type": "stream", 240 | "text": [ 241 | "Drawing 719 households\n" 242 | ] 243 | }, 244 | { 245 | "name": "stderr", 246 | "output_type": "stream", 247 | "text": [ 248 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 249 | " adj = constraint / (column * weights).sum()\n" 250 | ] 251 | }, 252 | { 253 | "name": "stdout", 254 | "output_type": "stream", 255 | "text": [ 256 | "Drawing 185 households\n" 257 | ] 258 | }, 259 | { 260 | "name": "stderr", 261 | "output_type": "stream", 262 | "text": [ 263 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 264 | " adj = constraint / (column * weights).sum()\n" 265 | ] 266 | }, 267 | { 268 | "name": "stdout", 269 | "output_type": "stream", 270 | "text": [ 271 | "Drawing 183 households\n" 272 | ] 273 | }, 274 | { 275 | "name": "stderr", 276 | "output_type": "stream", 277 | "text": [ 278 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 279 | " adj = constraint / (column * weights).sum()\n" 280 | ] 281 | }, 282 | { 283 | "name": "stdout", 284 | "output_type": "stream", 285 | "text": [ 286 | "Drawing 286 households\n" 287 | ] 288 | }, 289 | { 290 | "name": "stderr", 291 | "output_type": "stream", 292 | "text": [ 293 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 294 | " adj = constraint / (column * weights).sum()\n" 295 | ] 296 | }, 297 | { 298 | "name": "stdout", 299 | "output_type": "stream", 300 | "text": [ 301 | "Drawing 317 households\n" 302 | ] 303 | }, 304 | { 305 | "name": "stderr", 306 | "output_type": "stream", 307 | "text": [ 308 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 309 | " adj = constraint / (column * weights).sum()\n" 310 | ] 311 | }, 312 | { 313 | "name": "stdout", 314 | "output_type": "stream", 315 | "text": [ 316 | "Drawing 711 households\n" 317 | ] 318 | }, 319 | { 320 | "name": "stderr", 321 | "output_type": "stream", 322 | "text": [ 323 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 324 | " adj = constraint / (column * weights).sum()\n" 325 | ] 326 | }, 327 | { 328 | "name": "stdout", 329 | "output_type": "stream", 330 | "text": [ 331 | "Drawing 345 households\n" 332 | ] 333 | } 334 | ], 335 | "source": [ 336 | "all_households, all_persons, all_stats = zs.synthesize_all_zones(hh_marg, p_marg, hh_sample, p_sample, xwalk)" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": 5, 342 | "metadata": {}, 343 | "outputs": [], 344 | "source": [ 345 | "all_persons, all_households, all_stats = zs.multiprocess_synthesize(hh_marg, p_marg, hh_sample, p_sample, xwalk)" 346 | ] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": 6, 351 | "metadata": {}, 352 | "outputs": [ 353 | { 354 | "name": "stderr", 355 | "output_type": "stream", 356 | "text": [ 357 | "C:\\Users\\Juan\\Anaconda3\\envs\\synpop_py3\\lib\\site-packages\\matplotlib\\axes\\_axes.py:6448: UserWarning: The 'normed' kwarg is deprecated, and has been replaced by the 'density' kwarg.\n", 358 | " warnings.warn(\"The 'normed' kwarg is deprecated, and has been \"\n" 359 | ] 360 | }, 361 | { 362 | "data": { 363 | "text/plain": [ 364 | "" 365 | ] 366 | }, 367 | "execution_count": 6, 368 | "metadata": {}, 369 | "output_type": "execute_result" 370 | }, 371 | { 372 | "data": { 373 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAELCAYAAADZW/HeAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvFvnyVgAAIABJREFUeJzt3Xl8nGd56P3fNdr33bJsyZbkfYnjxLKdPSEhIaFpAiUhCSkE2hKgzSkc2p6GnsKBFN4D5+1bDj3NCaRsYclGKAfn4BDIZrLZsU3syLZsS5ZlW7L2zdqXmev9Y54x44lkjaTZfX0/H300era55/F4rrm36xZVxRhjjHFFuwDGGGNigwUEY4wxgAUEY4wxDgsIxhhjAAsIxhhjHBYQjDHGABYQjDHGOCwgGGOMASwgGGOMcSRHuwCzUVxcrJWVldEuhjHGxJW9e/d2qWrJTMfFVUCorKxkz5490S6GMcbEFRE5Ecxx1mRkjDEGsIBgjDHGYQHBGGMMYAHBGGOMwwKCMcYYIMiAICI3i8gREWkQkQen2H+NiPxeRCZF5A6/7e8RkX1+P6Mi8gFn3w9F5Ljfvo2he1nGGGNma8ZhpyKSBDwM3Ag0A7tFZJuqHvI77CTwceBv/c9V1ZeBjc51CoEG4Dd+h/ydqj4znxdgjDEmNIKZh7AFaFDVRgAReRK4HTgbEFS1ydnnOc917gCeU9XhOZfWGGNM2ATTZLQYOOX3d7OzbbbuBp4I2PY1EXlHRL4pImlzuKYxxpgQCaaGIFNs09k8iYiUARcBz/tt/gLQBqQCjwJ/Dzw0xbn3A/cDLFmyZDZPa4wJwuO7Tk677yNb7f/chSSYGkIzUOH3dzlwepbP82HgF6o64dugqq3qNQb8AG/T1Luo6qOqWqOqNSUlM6biMMYYM0fBBITdwAoRqRKRVLxNP9tm+Tz3ENBc5NQaEBEBPgAcmOU1jTHGhNCMAUFVJ4EH8Db31AFPq+pBEXlIRG4DEJHNItIM3Al8R0QO+s4XkUq8NYwdAZf+qYjUArVAMfDV+b8cY4wxcxVUtlNV3Q5sD9j2Jb/Hu/E2JU11bhNTdEKr6vWzKagxxpjwspnKxhhjAAsIxhhjHBYQjDHGABYQjDHGOCwgGGOMASwgGGOMcVhAMMYYA1hAMMYY47CAYIwxBrCAYIwxxmEBwRhjDGABwRhjjMMCgjHGGMACgjHGGIcFBGOMMYAFBGOMMQ4LCMYYYwALCMYYYxwWEIwxxgAWEIwxxjgsIBhjjAEgOZiDRORm4FtAEvBdVf16wP5rgP8JbADuVtVn/Pa5gVrnz5OqepuzvQp4EigEfg98VFXH5/dyjDGh9Piuk+fd/5GtSyJUEhMJM9YQRCQJeBi4BVgL3CMiawMOOwl8HHh8ikuMqOpG5+c2v+3fAL6pqiuAXuDP51B+Y4wxIRJMk9EWoEFVG51v8E8Ct/sfoKpNqvoO4AnmSUVEgOsBX03iMeADQZfaGGNMyAUTEBYDp/z+bna2BStdRPaIyE4R8X3oFwF9qjo5x2saY4wJsWD6EGSKbTqL51iiqqdFpBp4SURqgTPBXlNE7gfuB1iyxNorjTEmXIKpITQDFX5/lwOng30CVT3t/G4EXgEuAbqAfBHxBaRpr6mqj6pqjarWlJSUBPu0xhhjZimYgLAbWCEiVSKSCtwNbAvm4iJSICJpzuNi4ErgkKoq8DJwh3PofcAvZ1t4Y4wxoTNjQHDa+R8AngfqgKdV9aCIPCQiviGkm0WkGbgT+I6IHHROXwPsEZH9eAPA11X1kLPv74HPi0gD3j6F74XyhRljjJmdoOYhqOp2YHvAti/5Pd6Nt9kn8Lw3gIumuWYj3hFMJo7YuHRjEpfNVDbGGANYQDDGGOOwgGCMMQawgGCMMcZhAcEYYwxgAcEYY4zDAoIxxhjAAoIxxhiHBQRjjDGABQRjjDGOoFJXmAvLTOkpjDGJyWoIxhhjAAsIxhhjHBYQjDHGABYQjDHGOCwgGGOMASwgGGOMcVhAMMYYA1hAMMYY47CAYIwxBggyIIjIzSJyREQaROTBKfZfIyK/F5FJEbnDb/tGEXlTRA6KyDsicpffvh+KyHER2ef8bAzNSzLGGDMXM6auEJEk4GHgRqAZ2C0i21T1kN9hJ4GPA38bcPow8DFVrReRRcBeEXleVfuc/X+nqs/M90UYY4yZv2ByGW0BGlS1EUBEngRuB84GBFVtcvZ5/E9U1aN+j0+LSAdQAvRhjDEmpgTTZLQYOOX3d7OzbVZEZAuQChzz2/w1pynpmyKSNttrGmOMCZ1gAoJMsU1n8yQiUgb8GPiEqvpqEV8AVgObgULg76c5934R2SMiezo7O2fztMYYY2YhmIDQDFT4/V0OnA72CUQkF/gV8I+qutO3XVVb1WsM+AHepql3UdVHVbVGVWtKSkqCfVpjjDGzFExA2A2sEJEqEUkF7ga2BXNx5/hfAD9S1Z8F7CtzfgvwAeDAbApujDEmtGYMCKo6CTwAPA/UAU+r6kEReUhEbgMQkc0i0gzcCXxHRA46p38YuAb4+BTDS38qIrVALVAMfDWkr8wYY8ysBLVimqpuB7YHbPuS3+PdeJuSAs/7CfCTaa55/axKaowxJqxsprIxxhjAAoIxxhiHBQRjjDGABQRjjDEOCwjGGGMACwjGGGMcFhCMMcYAFhCMMcY4LCAYY4wBLCAYY4xxWEAwxhgDWEAwxhjjsIBgjDEGsIBgjDHGEVT6a2OMSTSP7zo57b6PbF0SwZLEDqshGGOMASwgGGOMcVhAMMYYA1hAMMYY47CAYIwxBrCAYIwxxhFUQBCRm0XkiIg0iMiDU+y/RkR+LyKTInJHwL77RKTe+bnPb/smEal1rvmvIiLzfznGGGPmasaAICJJwMPALcBa4B4RWRtw2Eng48DjAecWAv8N2ApsAf6biBQ4ux8B7gdWOD83z/lVGGOMmbdgaghbgAZVbVTVceBJ4Hb/A1S1SVXfATwB574P+K2q9qhqL/Bb4GYRKQNyVfVNVVXgR8AH5vtijDHGzF0wAWExcMrv72ZnWzCmO3ex83gu1zTGGBMGwQSEqdr2NcjrT3du0NcUkftFZI+I7Ons7AzyaY0xodI3PM6u492MTwY2AJhEE0wuo2agwu/vcuB0kNdvBq4LOPcVZ3t5MNdU1UeBRwFqamqCDUTGmHnqGRrn1wfbOHS6H4/C4OgkN6wpjXaxTBgFU0PYDawQkSoRSQXuBrYFef3ngZtEpMDpTL4JeF5VW4EBEbnMGV30MeCXcyi/MSZMtu1v4Wj7AFctL2b5gmxeP9bF6IQ72sUyYTRjQFDVSeABvB/udcDTqnpQRB4SkdsARGSziDQDdwLfEZGDzrk9wD/hDSq7gYecbQCfAb4LNADHgOdC+sqMMXM2ODZJQ8cgl1cXcfP6Mt63diGjEx52NnZHu2gmjIJKf62q24HtAdu+5Pd4N+c2Afkf933g+1Ns3wOsn01hjTGRUdvibSa6uCIfgMUFGawqzeHV+i4ury4iLSUpyiU04WAzlY0x77L/VB8Lc9NZmJt+dtv1qxcwMuFm1/Ge85xp4pkFBGPMOXqGxjnZM3y2duBTUZjJ0sJMalv6o1QyE24WEIwx53inuQ+ADeV579pXXZLF6b4RxqxzOSFZQDDGnGPfqT6WFmVSkJn6rn2VRVkocLJnOPIFM2FnAcEYc9aZ0Qk6BsZYV5Y75f4lhZkI0NQ9FNmCmYiwgGCMOauldwTw9hdMJS0liUX5GTR1Ww0hEVlAMMac1dw7jEugLC9j2mMqizI51TPMpNtSWSSaoOYhGHOhe3zXyfPu/8jWJREqSXg1946wICed1OTpvysuLcri9WPdtPSNRLBkkXWh/HsHshqCMQYAVaW5d4TygulrBwBLi7zNSdZslHgsIBhjAOgdnmBkws3iGQJCTnoKxdmpnLCO5YRjTUbmvA63nuFQ6xkqCjOpLs6iKDst2kUyYdLc6/3GX14wdYeyv8qiLA6ePoPHo7hctvptorCAYKY1NuHm579vZnjczZ4TvQjwiSurWL4gO9pFM2HQ3DtCskvOSVcxnSWFmew50UtT9xDVJfZ+SBQWEMy0Xm3oYmjczWeuXUZ6ShLfe62RV+s7EzYgzNSRmOiae0coy0snKYhv/AvzvEHjaPuABYQEYn0IZkoDoxO8Vt/F+sV5VBRmUpKTxtbqIuo7Buk4Mxrt4pkQ86hyum+ExUE0FwEsyElHgCNtg+EtmIkoCwhmSi8d7mDS4+GmtX9YIWtzZSHJLuENy4mfcDoHxhh3e2YcYeSTmuyiICuVo+0DYS6ZiSQLCOZdRsbd7G7qoaaykGK/TuTstGQursjn7ZO9jIxbcrNE4ptTsDg/uIAAUJqbzhELCAnFAoJ5l4bOQTwKlwSkPwa4YlkRE25ld5PlxE8kHWfGSBI55wvATEpz0zjeNcTYpH05SBQWEMy71LcPkJbsmnL4YVleBhUFGRw8bTnxE0nn4BhF2alBdSj7lOam4/YojZ02HyFRWEAw51BVGjoGWVaSPe2HQ3VJNi19I/bNMIF0DoxRkjO7OSalzvDUI23WbJQoLCCYcxzrHKJvZIIVpdMPJawqzsKjlhM/Ubg9Ss/QGCWznHRYnJ1KSpJYP0ICsYBgzvG7o50ArFiQM+0xSwszcQkc77KmgkTQMzSOR6F4ljWEZJeL6uJsjloNIWEEFRBE5GYROSIiDSLy4BT700TkKWf/LhGpdLbfKyL7/H48IrLR2feKc03fvgWhfGFmbn5X30lRViqFWe9eLcvHlxPfAkJi6BwYA5h1DQFg5cIcqyEkkBkDgogkAQ8DtwBrgXtEZG3AYX8O9KrqcuCbwDcAVPWnqrpRVTcCHwWaVHWf33n3+varakcIXo+Zh9EJNzsbu1lROn3twKeqKIvm3hEmLCd+3OscdALCLGsIAKtKs2nuHWFwbDLUxTJREEwNYQvQoKqNqjoOPAncHnDM7cBjzuNngBtEJLBH8h7gifkU1oTXnqZeRic8rAwiNUVVcRZuj3LK+hHiXufAGDnpyaSnJM363JXOl4d6qyUkhGACwmLglN/fzc62KY9R1UmgHygKOOYu3h0QfuA0F31xigACgIjcLyJ7RGRPZ2dnEMU1c/VWUw8u8X7Yz2RpURaC9SMkgq7BsVnNP/C3aqE3INhIo8QQTECY6oNaZ3OMiGwFhlX1gN/+e1X1IuBq5+ejUz25qj6qqjWqWlNSUhJEcc1cvdPcx/IF2aQF8U0xIzWJhXnpFhDinKrOacipT0VBJmnJLho6LKdRIggmIDQDFX5/lwOnpztGRJKBPMB/KuvdBNQOVLXF+T0API63acpEiapS29zPhvJ3z06eTlVxFid7hpn0WD9CvOoeGmdkwj2nDmUAl0uoKs6yLwYJIpiAsBtYISJVIpKK98N9W8Ax24D7nMd3AC+pqgKIiAu4E2/fA862ZBEpdh6nALcCBzBR09I3QvfQOBeX5wV9zpLCTCY9SseZsTCWzITTMeeb/VxrCADVJVk0WkBICDMGBKdP4AHgeaAOeFpVD4rIQyJym3PY94AiEWkAPg/4D029BmhW1Ua/bWnA8yLyDrAPaAH+fd6vxsxZbbM3FcVFs6ghlOV5E6G19ifuYuuJzvdBPtcaAvyhpmgjzuJfUAvkqOp2YHvAti/5PR7FWwuY6txXgMsCtg0Bm2ZZVhNG+5v7SUkS1pTlcOj0maDOKXJmqp7uH7V/zDh1rGOQZJeQl5ky52tUF2efHXFmi+XEN5upbABvh/LqhbmkJQc/9NAlQlleBq19tmBOvDrWOUhJThquqQf5BaWqxDsqzZLcxT8LCAaPR6lt6eeiWfQf+JTlpdPaP4LTZWTizLHOoTkPOfWpdoYpW8dy/LOAYGjqHmJgdHJWHco+ZXkZjE166B2eCEPJTDiNTrhp7h2eV4cyQH6mN9VJY5cNPY13FhAMtS3eDuXZDDn1KXMWW7eO5fhzonsYj86vQ9mnqjjLmowSgAUEw/5T/aSnuFgRRMqKQKW53sXWT1s/Qtw51jn/Iac+1cU29DQRWEAw1Lb0sW5RHslJs387pCa7KMlJsxpCHPLNQZhvHwJ4O5Y7B8YYGLWmw3hmAeEC5/Eoda0DrFuUO+dreDuWrYYQbxq7hliUl05q8vw/Bnwdy01dluwwnllAuMD5UhevKZtPQMigf2SCYUuBHFeOdQ6ybA7NhFPxzT+wjuX4ZgHhAneo1TsJbV4BId/pWD5jtYR4oaocc9bODoUlhZmI2FyEeGcB4QJ3uO0MIrDyPGsoz+RsCos+60eIF+1nxhgad7OsZOZU58FIT0mivCDDOpbjnAWEC1xd6xmqirLITA0qi8mUstOSyUlLpt2S3MWNRmeEUShTTVQVZ3PcmozimgWEC1xd68C8mot8SnPTaR+wJqN44RtyGqomI/B2LB/vHEqoWes7G7vZcaSDUz3DuD2J87qmM/evhSbuDYxOcLJnmA/XlM/7WqW5abzV1IPHo7hcc8+LYyLjWOcQWalJlObOf8ipT3VJFkPjbjoGxijNTQ/ZdaNlT1MP2/b7ln5ppzArlb+6bjkZqbNfajReWA3hAnbUWQd39cLQ1BAm3Epzr/UjxAPfCKNpVq6dE9/Sq4nQsdzcO8y2/adZXpLNg7es5kOXltM7NM7LRzqiXbSwsoBwATvU6g0Ia+YxB8FngfON8Igtth4XGjuHQtpcBIkz9HR4fJLHd50kOy2ZuzZXkJuewqalBWxaWsAbx7roHEjcvjILCBewutYz5KYnsyhv/tX7BU76g6MWEGLe8PgkLX0jZyeThUpZbjppyS6Ox3kNYXdTL30jE9yzZQlZaX9oVb9xbSkpSS6217ZGsXThZQHhAna49Qyry3JD0myQnpJEfmYKR9osIMQ6X5NOqCal+fjWV47noaeqyt4TPVQWZVJRmHnOvpz0FK5fvYAj7QO8Vt8VpRKGlwWEC5THoxxuG2BtCEYY+ZTmpFsNIQ6EY4SRT3VJVlyvi3Cie5iuwXFqlhZOuf/y6iIyUpL42d5TES5ZZFhAuECd7BlmeNzNmrKckF2zNDedY52DtrZujGvsHMIlsLQoc+aDZyne11fee6KX1GQX6xdPvTZIcpKLdYtyeeFQO6MT7giXLvwsIFygDrfNP2VFoNLcNCbcSlMcf0OcypG2M5zsSZykbcc6BykvyCQ9JfTDJ33rK8fj/RqbcFPb0s+GxXnnTfi3oTyfoXE3ryTgiKOgAoKI3CwiR0SkQUQenGJ/mog85ezfJSKVzvZKERkRkX3Oz7f9ztkkIrXOOf8qoRz/ZmZ0qHUAl8DK0tDWECBxRhp5VNle28pjb57g2zuO8Z0dx6hPgNd2rHMoZCkrAvnWV47HjuXaln7G3R5qKqduLvKpKs6iKCuVZ99JvM7lGQOCiCQBDwO3AGuBe0RkbcBhfw70qupy4JvAN/z2HVPVjc7Pp/22PwLcD6xwfm6e+8sws1XXeoaq4qyQfkv0LtYORxOgY3nC7eGnO0/wWkMXl1UXcuuGMs6MTvDDN5rieu0Hj0dp7AxdUrtAvpFL8Tj0dF9zH8XZaVQUZJz3uCSXcPP6hbxU18HweGJl+A2mhrAFaFDVRlUdB54Ebg845nbgMefxM8AN5/vGLyJlQK6qvqneee4/Aj4w69KbOatrPRPS5iKAlCQXlcVZCVFDeK2hi7q2Af54Qxm3XbyYK5YV81fv8c5SfXZ/a9ymZ2jpG2Fs0hPyEUY+vvWV461jeWzSzYmuYdaU5QQ16u7WDYsYmXDz0uHEajYKJiAsBvy71JudbVMeo6qTQD9Q5OyrEpG3RWSHiFztd3zzDNc0YXJmdILm3pGQBwSAVaU5HG2Pv2+H/kYn3LxW38XqhTlcvqz47PbM1GRuXFtKU/fQ2XWo441vSGio5yD4qyrO4licNRkd7xzCrRp0E+qWqkJKctL4VYI1GwUTEKYKl4Ffj6Y7phVYoqqXAJ8HHheR3CCv6b2wyP0iskdE9nR2dgZRXDMT31yBUI4w8llZmkNT91Bcj8B4s7GbkQk3169e8K59mysLWZSXznMH2hifjL+RNL5lM8NVQwAnyV2c1RCOdgyQkiQsLQxu5FWSS7hxbSmv1ncxGacjqqYSTEBoBir8/i4HTk93jIgkA3lAj6qOqWo3gKruBY4BK53j/TOqTXVNnPMeVdUaVa0pKSkJorhmJnUhWBRnOqsW5qAKDR3xWUvwrx2UF7z7w8Elwh9fvIj+kQl2N/VEoYTzc6xzkLyMFIqyUsP2HPG4vvLRdm+/ymzWFb9qeTGDY5Psb47P2uJUgnn1u4EVIlIlIqnA3cC2gGO2Afc5j+8AXlJVFZESp1MaEanG23ncqKqtwICIXOb0NXwM+GUIXo8JQl3rAHkZKSwMQ0ZKX5U7Xmcs7zxP7cBnaVEWi/MzePtkbwRLFhrHOgdZVpIV0qR2gXzNUfFSS+geHKNnaJwVsxxxd3l1ESLwekPizFqeMSA4fQIPAM8DdcDTqnpQRB4Skducw74HFIlIA96mId/Q1GuAd0RkP97O5k+rqu9r1WeA7wINeGsOz4XoNZkZeDuUg+s8m63KokxSk1xxOWNZVXmrqYflJdlT1g78XbIkn9P9o7T1x9caEI2dQyFdFGcqvhFMvhnRse6oU5tdOctmtIKsVNYvyuO1BAoIQa2HoKrbge0B277k93gUuHOK834O/Hyaa+4B1s+msGb+3B7lSNsAd2+pmPngOUhOcrFsQXZcjjQ62TNM3/AE711TOuOxG8rz2V7bytunerklrywCpZu/M6MTdAyMhW3Iqc/SoiySXEJ9nAwuqG8foDArlaLs2a8NceXyYr73WiNDY5PnJMKLVzZT+QJzsmeYkQl3WPoPfFaVZsfNh4G//c39JLskqPxO2WnJrFqYy75TfXGzktbZpHZhmpTmk5rsorIoMy76kSY9Hho7h+a8pvhVy4uZcHtrlonAAsIF5myHcggWxZnOyoU5tPSNxFWn4qTbQ21LP6sX5gQ9We+SinwGRifjpmkkEiOMfFYsyImLgNDcM8K428PyOdaaaioLSE128XqCZD+1gHCBqWs9Q5JLWDHHb0TBWOV0zsXTfIQ3jnUzNDbJhvL8oM9ZvTCHjJSkuOlcbuwaJNklLAlyaOV8LF+QzYmeYcYmY3v48fFub62psmhutab0lCQ2VxYkTD+CBYQLzMHTZ1hWEtqUFYFWng0I8dOPsG3/adKSXaxaGPxIE1/my8NtA3GR3fNo+yBLizJJmcXQyrlaUepNctfUFdtJ7o53DbEwN53MebT/X7m8mMNtAwmxkpoFhAtMbUv/tKl9Q2VxfgaZqUlxM/R0bNLN8wfaWLcod9YflqsX5jI26YmLOQlH2gZCsn52MHwd17HcbOT2KCe7h6mc56ztK5zZ7Dsbu0NRrKiygHABaT8zSufAGBeFOSC4XMKK0py4qSHsbOxhYGxyToFy2YIskl3CS3WxndNmeHySkz3Ds6oBzceykmxEoL4jdt8DLX3e/oOqeQaE9YtyyUpNYtdxCwgmjtQ6MyrDHRDAO9IoXgLCS3XtpKe45jQcMy05iarirJhPcubrzwlluvPzyUhNorwgg/oYriH4Js7NNyAkJ7moqSxkV2Ps1xJnYgHhAlLb0o9LYO2i8DcbrCzNoWtwnK7B2G5XVVVeOtLBlcuK59y2vnphDo1dQzTG8GgjX0ry1RGqIYB3pNGxmA4Ig5TkpJEdgvkDW6sLqe8YjPn3+0wsIFxAalv6WVaSTWZq+CfQ+JomYr0foaFjkFM9I1y/ZvpUFTNZ5bTLx3It4XDbAOkprnctHB9Oyxdk09g5FJPJ3ybdHk50D1M1x9FFgbZWeZM7v3U8vmsJFhAuILUt/RFpLgLOTu46dPpMRJ5vrnwf4u9ZNfeAUJiVysrS7JgOCEfbB1hZmkOSK3ILEy5fkM2428Op3thbUKiudYCxyfn3H/hsKM8jIyWJXXHesWwB4QLh61AO9wgjn6LsNMry0jlwOrYzQb54uIM1Zbksyj//KlkzuX51KW8d7+FMjE7GO9w2ELH+A58VzgS4WFx21NcBHKqAkJLkoqaygF1WQzDx4GyHcnlkAgLAukW5HIzhGkL/8AR7T/Ry/er5p1V/z6oSJj3KGzE4Qal7cIyuwbGI9h/AH2ZEN8Rg38rOxh6KslLJzUgJ2TW3VhVyuG2A3qHxkF0z0iwgXCBqW/oRIag8PaGyblEexzoHY3bd2R31nbg9yvWrZ05mN5NLlxaQk5bMjqOxt4iTL9FgpGsIueneFOuxltfK41F2N/WErHbgs7Xa248Qz7UECwgXiANOh3IkMzKuW5SLqre9Nha9fLiDwqxUNlYEn65iOilJLq5cXswrRzpjbr3laIww8lm1MOds/qxYcbhtgP6RiZAHhA3leaQlu+J6PoIFhAtEJDuUfXz9FYdisB/B7VFeOdLBdStLQtbReu2qElr7R2Nu7P2R9gHyM1MoyZl9euf5Wrcol/qOwZhaUjXU/Qc+aclJXLqkIK7nI1hAuAC09I3QMTDGhgj2HwCU5aVTkJnCgZbY+oYIsO9UL73DE7znPCujzda1K719ETuOxFaz0ZG2AVaVhmdBpJmsX5yH26MxNUlxV2MP5QUZ5GeGfhnRy6qLqGs7Q/9wbA4umIkFhAvAHifPzubKwog+r4iwfnEeB1tjr4bw0uEOklzCNStDt073ovwMVpZmx1Q/gsejHG0fjFjKikDrnEmQsTK4wLcqnm/eQKhtrS5ElbjIbTUVCwgXgN1NPWSnJUelDXntolyOtA0wPhlbk5NerOugZmkBeSEcZQLeWsJbx3sYGouNjvTGriEG55inKRQqCjLJSUvmQEtsfCmo7xikZ2icrdXh+XK0sSKf1GRX3Ca6s4BwAdh9vJdLluSTHIG0x4HWLcpjwq0xleTsdN8Ih9sGuGEes5Onc92qBYy7PTHzgbD/VB9ASDrO58LlEtbE0PBj38SxrVXhCQjpKUlsrMiP25FGFhASXP/wBEfaB9gS4eYin/W+JoMY6kfwzSi+PoT9Bz41lQVkpCTFTLPR/uY+slKTwr6O8vl414w4ExNLje483sPC3PSwLhJ0WVUhB0/3x+zyudYUAAAZkElEQVQkxfOxgJDg9p70flOpiVJAqCzKIis1KaZmLL98uIMlhZlh+ZBMS07iimVFsRMQTvWxoTw/oikrAq1flMfohCfqyf9UlV2NPWytLgxrB/tl1UV4FPY2xcdKev6CCggicrOIHBGRBhF5cIr9aSLylLN/l4hUOttvFJG9IlLr/L7e75xXnGvuc35C/3XNsLupl2SXRLXJ4KLyPN4+2ReV5w80PD7Jaw1dXL96Qdg+FK5dVcKJ7uGz6ZWjZXTCzaHWM1wcpX97n3WLY6Nj+XDbAF2DY1y5vDisz3PJkgJSkiRmmg1nY8aAICJJwMPALcBa4B4RWRtw2J8Dvaq6HPgm8A1nexfwx6p6EXAf8OOA8+5V1Y3OT+xmBotje5p6WL84j4zU8C2ZOZPNld4q9GAMdLT+7mgXY5Meblo7/9nJ07lupfe7zY4j0X1L17WeYcKtbKyIToeyz7KSbFKTXVHvWH6t3ptW5OoV4Q0IGalJXFyez8447EcIpoawBWhQ1UZVHQeeBG4POOZ24DHn8TPADSIiqvq2qp52th8E0kUk8rNjLlCjE272n+pnS5g60IJVU1mIR2FfDNQSfnOojbyMFDaH8Z4sKcqkqjgr6s1Gf+hQLohqOVKSXKxemBP1GsJrDV0sK8miLG9+iQyDcVl1EQda4q8fIZiAsBg45fd3s7NtymNUdRLoBwIH+n4IeFtV/VeQ+IHTXPRFmab+LiL3i8geEdnT2Rkb7bLxoraln3G3h5ql0f1AuHRJPi6J/tjsSbeHF+s6uGH1grAvNH/tyhLebOyO6gzdfaf6KM1NY2FeetTK4ONNdNgftbQeY5Nudh3v5uoVoZt3cj7XrCzBHaPJDs8nmP8VU31QB/6rnvcYEVmHtxnpU37773Wakq52fj461ZOr6qOqWqOqNSUlkfnHTBRvNHQjEr0OZZ+c9BRWL8xlz4noBoS3mnroH5ngpnXhay7yuXZVCaMTnqgumLK/uZ+Ly6Pbf+Bz0eJ8zoxORq1fZe+JXkYnPFwV5v4Dn0uW5MdsssPzCSYgNAMVfn+XA6enO0ZEkoE8oMf5uxz4BfAxVT3mO0FVW5zfA8DjeJumTAi9dLidjRX5FGaFfor+bG2uLODtk31MRHH1rN8cbCct2RXS2cnTuayqiNRkV9Q+EPqGxzneNcTGJbERELZUeWup0QqQr9V3kewSLlsWnhnKgXzJDnfEYLLD8wkmIOwGVohIlYikAncD2wKO2Ya30xjgDuAlVVURyQd+BXxBVV/3HSwiySJS7DxOAW4FDszvpRh/HQOj7G/u54YwjLWfi5rKQobH3VHLfKmq/PZQO1evKI7IEqIZqUlsrSrklSh1LO931r/YGCM1hGUl2RRnp0ZtwtZrDV1csiQ/JOsnB+vaVSWc7h+lIcaSHZ7PjAHB6RN4AHgeqAOeVtWDIvKQiNzmHPY9oEhEGoDPA76hqQ8Ay4EvBgwvTQOeF5F3gH1AC/DvoXxhF7pXDnu/mYYi138o1FR6vyHujtLY7IOnz9DSN8JNaxdG7DmvX72AY51DHIvC+Pudjd0ku4QNUR5y6iMibKkqZFdjd8S/MfcOjVPb0s9VyyPb5OyricZTs1FQPWuqul1VV6rqMlX9mrPtS6q6zXk8qqp3qupyVd2iqo3O9q+qapbf0NKNqtqhqkOquklVN6jqOlX9rKrGTn7cBPDS4Q7K8tJZUxadpGaByvIyKC/IOJtoL9K27T9Nskt4bxiHmwa6eb03+DxX2xqx5/TZcaSTTUsLIvqNeCZbq4o43T9Kc4TXWP5dfSeqcFWYh5sGWpyfwYoFsZXscCY2UzkBjU26ebW+k/eEcfLVXGyuLGR3U2/EvyG6Pcr/ebuF61YtiGh/SlleBpuWFrC9ti1izwne5sJDrWci0lcyG76EcpGesPX8wTZKctK4JAq1pWtXlrCrsSdmVw0MZAEhAb11vIehcXfM9B/4XFZdSNfgGIfbIpvo7vWGLjoGxviTSwNHS4ffLesXcqj1DE0RHF3z6lHvUMdrYywgrFyQQ35mSkT7EUbG3bx8uJP3rSvFFYX0HdeuKompZIczsYCQgF6s6yAt2cUVyyJbRZ6Jt8YCvz3UHtHn/cXbLeSmJ4clmd1MbrmoDIDnDkSulrDjaCfF2akRXT87GC6XsKWyMKJLTO442snIhJtb1pdF7Dn9bakqJCctOeK1xLmygJBg3B7l+YNtXLW8OKrpKqayICedjRX5EQ0IQ2OT/PpAG3+0YRHpKZG/H4vzM9hYkc9zByLTj+D2KK/Wd3LNipKofCOeydbqIk71jHC6LzL9CM8fbCM/MyVqs/XTkpO4ad1Cnj/Yxthk7HeTWkBIMDuOdtDaP8odm8qjXZQp3bi2lNqWflr7I/eBMDLhjkpzkc/7L1rIO839nOoZDvtzHWjpp3d4Iub6D3x86xBEopYwPunhhbp2blxTGvaZ6edz68VlDIxO8rujsT9r2QJCgnnirVMUZ6dyw5rYGG4ayJdU7oUI1RKe2n2KisKMqKbv8DVX/OLtlrA/1++cES2RHlETrDVluRRnp0aklvjGsS4GRifPjvaKlquWF5OfmcKz+wPn88YeCwgJpOPMKC8d7uBDm8pJTY7Nf9plJdlUFWfxmwh8IOw/1ceu4z3cd3llVEdbVRRmcvWKYp546ySTYZ6p/cLhDtYvzqU4OzZzSCa5hFvWl/HS4Y6wLzO6vbaV7LTksKe7nklKkotb1pfxQl07I+Ox3WwUm58aZk5+trcZt0e5q6Zi5oOjRES4cW0pOxu7w54J8tFXG8lJT+buLUvC+jzBuHfrUlr7R3n5SPjGpB9tH2D/qT4+sDF6zWPBuHVDGaMTHl48HL5Z3P0jEzy7v5X3X7QwKn1Hgf54QxnD4+6zq/XFKgsICcLjUZ7afYqtVYVUR3G5xGDcuLaUCbfychj/c5zqGea52lY+snVJTEzOeu+aBZTmpvHTXSfC9hxPvnWKlCThg5fEdkCoqSxkQU4a/zeMTSg/39vMyISbj11eGbbnmI2t1UUUZ6fxy33hbzacDwsICeK3de2c7Bnmnhj4NjyTS5cUsDg/g8d3nQzbc3zvteMkuYRPXFEVtueYjeQkF3dvXsKOo51h6VwenXDzH283c9O6hRTFaHORT5JLeP9FZbxytJOBMNQSPR7lJztPsLEin/WLo7s4kE+SS/jQpsW8UNcekcEFc2UBIQFMuj1849eHqS7J4tYN0RlvPRtJLuHjV1Sy63hPWFbR6hgY5andp7jt4sUxsRaAzz1bluAS4adhCIS/OdRO3/AEd2+O3eZCf398cRnjk56wdC6/caybxq4hPnb50pBfez4+fkUlLhF+8HpTtIsyLQsICeCpPado7BziwZtXkxzF4XWz8eHNFWSmJvH9146H/Nr/z6/qcHuUB65fHvJrz8fCvHTet66Un+w8Qdfg2MwnzMKTb52kvCCDK2NsMuJ0LqkoYFFeelhG3vzozSYKs1J5/0Wx9eWoLC+DWzeU8dTuk/SPxOZKavHx6WGmNTQ2yTd/W8/mygJujGDitvnKy0jhwzUVPPvOaTrOjIbsum8e6+b/7DvNp66tpqo4K2TXDZW/uWkVIxNu/vXF+pBd82j7AG8c6+aumoqYnIw2FZdL+JNLy3nlaCf17aFLZdLQMcALde3ctbkiJjqTA/3F1dUMjbt54q3wNZfOR/R728y8/K+XGugaHOM7H90U9NDKcLbdz8YnrqzksTeb+PHOE/zNTavmfb0Jt4cv/fIA5QUZ/OV1sVU78FlWks09Wyp4fNdJPn5F5bwHAKgqX3n2ILnpyXxka+z3H/n7s6uq+MHrx/nWi/X820cunff1vPfiEFlpyfzFVbHRdxRo/eI8Lq8u4oevN/FnV1bF3PBwCwhx7MW6dr694xgfrilnk9/Eq1j5wJ/J0qIsblxTyg9eb+KuzRWUF2TO63rfeqGe+o5BvndfTcyl7fD32RtW8ovft/CNXx/mOx+tmde1fn2gjdcbuvnKbetivjM5UGFWKvddUckjO47x1+0DrCydX6r2F+s6eLW+iy/eujam78Wnr1vGfd9/i+++1hhzX1xiKzyZoDV1DfG5p/axfnEuD92+PtrFmbMv3roWVeXvfvYOHs/c02L/fG8z//ZyA3fVVMTsLG2fkpw0Pn3tMp4/2M72eayVMDLu5qu/qmP1whzujbPagc8nr64mKzWZb70wvya0sUk3//SrQyxfkB1zncmBrl1Zws3rFvKtF+ojmgU3GBYQ4lDHwCif+vFeklzCI/duism20mBVFGbyj7eu5c3Gbh57s2lO19jZ2M2D//EOVywr4qsfjI/geP+11WxaWsDfPL1/TsuKqipff66Olr4RvnzburgZTBCoICuVj19Rya9qW9l7Yu6r6f3Lb45yonuYL966Nqp5i4L1ldvXkZrk4h9+URtTay7H/p0z56hvH+CDD7/ByZ5hHv7IpVQUzq+ZJRbcvbmC61aV8PXnDs96EfZn95/mz364myWFmTxy76a4+DAAbxbMR+69lNyMZD75oz30DI3P6vz/+UI9j715gj+7sorLqiOzcHy4fPKaaioKM/jMT/bSPocBBo/vOsl3ftfIvVuXxNwaENMpzU3n729ZzRvHusMyDHmu4uN/j0FV+eW+Fv7kkTcYd3t4+lOXRz1HS6iICP/jQxtYnJ/Bvd/dyc/2nJrxnOHxSR569hD/6Ym3WVOWy+OfvIy8zJQIlDZ0FuSm852P1tAxMMYdj7wRVE1BVfnfrzTwrRfruXNTOf/4R2siUNLwystI4d8/VsPg2CSf+vFeRieCz/fz8pEOvvjLA1y3qoSv3LYujKUMvY9sWcLVK4r50i8PzKvpMJSsUzkO7D/Vx9e21/HW8R42lOfxv++9dN4dsLFmQW46v/jLK/nMT/fyd8+8w6v1XfzpZUvZXFlwdvSUqtLSN8Ize5t57I0meocn+MSVlfzD+9eQkuSasTM9FkfhbKzI50d/toW/fuJtPvDw63zhltXcWVNB1hTpNg609PPlbQfZc6KXP9pQxtc/tAGXS+JmEMH5rF6Yy798+GI+/ZPfc/+P9/LPd2xgQe70kwon3R4eeeUY33qxnlWlOfzbRy6Nu2Yzl0v49p9u4mPff4u/fuJt0pJdUe//kmDar0TkZuBbQBLwXVX9esD+NOBHwCagG7hLVZucfV8A/hxwA3+tqs8Hc82p1NTU6J49e4J+cfHsdN8ILx7u4Ondp6ht6acgM4W/e99q7tpcQdIMY82j+QEx3w/dCbeHf/7NEX668ySDY5OU5qZRkpNGZkoyjV2DdA16m1beu2YBn752GTWVf1j4JJ5fd+fAGP/5qX281tBFZmoS71u3kMqiLLLSkmjtH+Wt4z0cON1PYWYq/+XmVdy56Q9zDuL5dQf66a4TPPTsIdJTkviH96/m/ReVkZP+h5rf6ISbV4508J3fNfL2yT5uu3gR/3T7+jnVDudz30L5us+MTnDvv+/i4Ol+Pnl1NZ9778qQj5ITkb2qOuOQthkDgogkAUeBG4FmYDdwj6oe8jvmL4ENqvppEbkb+KCq3iUia4EngC3AIuAFYKVz2nmvOZVEDAijE25O941wqneE+vYB6loHePtkL43O6IPVC3O4Z8sSPnDJYvIygnvTJ8IHxPD4JNtr23i9oYv+kQkGxyapKMjk4oo8rlhWxPIF7x6iGO+vW1XZc6KXn+9t5tcH2+gb9s5mTUt2ccmSfK5YVsx9V1S+630Q7687UGPnIH/7s/38/mQfSS7h4vI8stKSGR53c6RtgMGxSYqz0/jirWu4fR6ZXWMlIIA3KPz37XU88dYplhZl8qlrlvFHG8qC/j8/k1AGhMuBL6vq+5y/vwCgqv/d75jnnWPeFJFkoA0oAR70P9Z3nHPaea85lbkGhIHRCUSE9GTXnKqVqsqkRxmf9Hh/3N7fY35/j064GRl3MzzuZnh8kpEJN0NjbkbGJ73bJtwMj03SNzJB79A4vcPe3wMBOeFLctK4aLH3Q+/qFSWsLM1+14SzWG4iiGazTKJ9ME66PQyNuclITTrvBKZEe93gXQp0d1MPr9V3sbOxm0mPkpmaxJLCTP5oQxmXVxfx9J7msDx3MML1ut841sVDzx7icNsAqckurlxWxEXl+WxYnMdly4rmnLk32IAQzNUXA/69fM3A1umOUdVJEekHipztOwPO9YX0ma4ZMp99ct/ZPORJLm9gSEtJwiWgCh5VFG+WRMW7TZ1tbo8y7vYw15FhgneBjJRkF2nJLjJSkshKS6IwK5Xyggyy0pLJz0ghPzOVkpy0c/7B957onddQvGiI5WAVTvPtv7hQ79t0klzCZdVFcT+CarauWFbMc5+9mtqWfn6+t5k3G7vZcbQTj8ILn79myppxKAUTEKZqsA78eJzumOm2T/V1Z8qPXBG5H7jf+XNQRI5MU85AxUAsL2Jq5ZufuCrfvVEsyDRCcv/C+Lpi+t/33iiUb8U3ZnV4YPmCmq0XTEBoBvxz6pYDgSkKfcc0O01GeUDPDOfOdE0AVPVR4NEgynkOEdkTTBUpWqx882Plmx8r3/wkavmCaVDfDawQkSoRSQXuBrYFHLMNuM95fAfwkno7J7YBd4tImohUASuAt4K8pjHGmAiasYbg9Ak8ADyPd4jo91X1oIg8BOxR1W3A94Afi0gD3prB3c65B0XkaeAQMAn8laq6Aaa6ZuhfnjHGmGAF1WWtqtuB7QHbvuT3eBS4c5pzvwZ8LZhrhtism5kizMo3P1a++bHyzU9Cli+oiWnGGGMSX3zN9TbGGBM2CRMQRKRJRGpFZJ+I7HG2FYrIb0Wk3vldMNN1Ily+L4tIi7Ntn4i8P4rlyxeRZ0TksIjUicjlMXb/pipfTNw/EVnlV4Z9InJGRD4XK/fvPOWLifvnlPE/i8hBETkgIk+ISLoz6GSXc/+ecgagxFL5figix/3u38Yolu+zTtkOisjnnG2zfv8lTJORiDQBNara5bftfwA9qvp1EXkQKFDVv4+h8n0ZGFTVf45GmfyJyGPAq6r6Xec/XibwD8TO/ZuqfJ8jRu6fj3hTvbTgnWj5V8TI/ZumfJ8gBu6fiCwGXgPWquqIMxBlO/B+4D9U9UkR+TawX1UfiaHyXQf8X1V9JtJlCijfeuBJvCmCxoFfA58BPsks338JU0OYxu3AY87jx4APRLEsMUtEcoFr8I4WQ1XHVbWPGLl/5ylfLLoBOKaqJ4iR+xfAv3yxJBnIEO88pkygFbge8H3YRvv+BZZvynlTUbIG2Kmqw6o6CewAPsgc3n+JFBAU+I2I7BXv7GaAUlVtBXB+L4ha6aYuH8ADIvKOiHw/ik0y1UAn8AMReVtEvisiWcTO/ZuufBAb98/f3XgTOkLs3D9//uWDGLh/qtoC/DNwEm8g6Af2An3OBxycm/Ym6uVT1d84u7/m3L9vijfrczQcAK4RkSIRycRbs6pgDu+/RAoIV6rqpcAtwF+JyDXRLlCAqcr3CLAM2Ij3jfb/RalsycClwCOqegkwhJOYMEZMV75YuX8AOE1ZtwE/i2Y5pjNF+WLi/jmB6HagCm9W5Cy8/08CRaV9e6ryicifAl8AVgObgUIgKs2BqloHfAP4Ld7mov14533NWsIEBFU97fzuAH6Btz2tXUTKAJzfHbFUPlVtV1W3qnqAf3fKHA3NQLOq7nL+fgbvB3Cs3L8pyxdD98/nFuD3qtru/B0r98/nnPLF0P17L3BcVTtVdQL4D+AKIN9pooHzpLeJVvlUtVW9xoAfEMX3n6p+T1UvVdVr8E4OrmcO77+ECAgikiUiOb7HwE14q1H+KTXuA34ZS+Xz/WM5Poi3zBGnqm3AKRFZ5Wy6Ae/s8pi4f9OVL1bun597OLc5Jibun59zyhdD9+8kcJmIZIqI8If338t4U+FAdO/fVOWr8/uwFbzt81F7/4nIAuf3EuBP8P47z/r9lxCjjESkGu+3bvA2Lzyuql8TkSLgaWAJ3n/UO1V1dqu4h7d8P8ZbXVegCfiUr80vCmXcCHwXSAUa8Y5AcRED9+885ftXYuf+ZeJN6V6tqv3Otph4/52nfLH0/vsKcBfepo63gb/A22fwJN7mmLeBP3W+jcdK+Z7Du+6LAPuAT6vqYJTK9yreJQcmgM+r6otzef8lREAwxhgzfwnRZGSMMWb+LCAYY4wBLCAYY4xxWEAwxhgDWEAwxhjjsIBgjDEGsIBgzJyJyHUicsUMx3xaRD42xfZKEYn2RDpjzhHUEprGmCldBwwCb0x3gKp+O2KlMWaeLCAYE8D5Rv+3eGfwvoN3tuc/4p0l3Q3cC2QAnwbcTqKz/6Sqr05xrS/jrDkgIpuA7wPDePPrGxNTLCAY40dE1gH/FW922i4RKcQbGC5TVRWRvwD+i6r+jbNoy2wWmPkB3sCxQ0T+3/C8AmPmzgKCMee6HnjGt7KdqvaIyEXAU04ys1Tg+GwvKiJ5QL6q7nA2/ZipUzwbEzXWqWzMuYR3593/X8C/qepFwKeA9BBd15iYYgHBmHO9CHzYyRSJ02SUh3cdYvhDOmGAASAnmIs6S372i8hVzqZ7Q1NcY0LHAoIxflT1IPA1YIeI7Af+Bfgy8DMnxXCX3+HPAh8UkX0icnUQl/8E8LCIvAmMhLbkxsyfpb82xhgDWA3BGGOMw0YZGRMCIvJfgTsDNv9MVb8WjfIYMxfWZGSMMQawJiNjjDEOCwjGGGMACwjGGGMcFhCMMcYAFhCMMcY4/n96stpJgf0UigAAAABJRU5ErkJggg==\n", 374 | "text/plain": [ 375 | "
" 376 | ] 377 | }, 378 | "metadata": {}, 379 | "output_type": "display_data" 380 | } 381 | ], 382 | "source": [ 383 | "sns.distplot(all_persons.cat_id)" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": null, 389 | "metadata": {}, 390 | "outputs": [], 391 | "source": [] 392 | } 393 | ], 394 | "metadata": { 395 | "kernelspec": { 396 | "display_name": "Python 3", 397 | "language": "python", 398 | "name": "python3" 399 | }, 400 | "language_info": { 401 | "codemirror_mode": { 402 | "name": "ipython", 403 | "version": 3 404 | }, 405 | "file_extension": ".py", 406 | "mimetype": "text/x-python", 407 | "name": "python", 408 | "nbconvert_exporter": "python", 409 | "pygments_lexer": "ipython3", 410 | "version": "3.6.4" 411 | } 412 | }, 413 | "nbformat": 4, 414 | "nbformat_minor": 1 415 | } 416 | -------------------------------------------------------------------------------- /demos/input_data/Puma_Tract_Crosswalk_500.csv: -------------------------------------------------------------------------------- 1 | state,county,tract,bg,pumano 2 | 35,61,971400,0,500 3 | 35,61,971300,0,500 4 | 35,61,971100,0,500 5 | 35,61,971000,0,500 -------------------------------------------------------------------------------- /demos/input_data/hh_marginals.csv: -------------------------------------------------------------------------------- 1 | zone_id,sample_geog,cars,cars,cars,children,children,income,income,income,workers,workers,workers 2 | ,,none,one,two or more,no,yes,gt100,gt35-lt100,lt35,none,one,two or more 3 | 1,1,7,49,197,41,215,57,125,74,72,77,105 4 | 2,1,9,59,237,68,239,83,126,98,87,93,125 5 | 3,1,10,69,275,79,279,74,170,114,102,108,146 6 | 4,1,11,76,302,167,224,42,105,244,111,118,160 7 | 5,1,18,117,466,86,517,50,261,292,171,182,247 8 | 6,1,9,63,252,65,261,80,139,107,92,98,133 9 | 7,1,19,159,377,160,397,96,186,275,199,194,162 10 | 8,1,11,98,231,86,257,30,99,214,123,119,100 11 | 9,1,9,78,186,49,226,22,164,89,98,95,79 12 | 10,1,7,65,155,55,175,21,143,66,82,80,66 13 | 11,1,17,297,542,289,570,118,407,334,303,279,274 14 | 12,1,15,258,474,201,548,76,371,302,264,244,240 15 | 13,1,40,217,486,251,495,121,314,311,269,259,216 16 | 14,1,51,278,622,472,482,53,320,581,344,332,277 17 | 15,1,38,210,470,220,501,68,350,303,259,251,209 18 | 16,1,23,79,83,45,142,0,60,127,87,54,43 19 | 17,1,23,78,81,47,137,0,49,135,86,54,43 20 | 18,1,36,122,127,103,184,0,134,153,134,84,67 21 | 19,1,40,135,141,66,252,23,190,105,149,93,75 22 | 20,1,89,303,318,442,271,19,167,527,334,209,168 23 | 21,1,43,147,154,108,238,0,161,185,162,101,81 -------------------------------------------------------------------------------- /demos/input_data/hhld_marginals_500.csv: -------------------------------------------------------------------------------- 1 | state,county,tract,bg,inc1,inc2,inc3,inc4,inc5,wkr1,wkr2,wkr3,wkr4,veh1,veh2,veh3,veh4,veh5,child1,child2,hhlds 2 | 35,61,971400,0,115,261,200,245,340,311,406,404,40,52,320,530,194,65,504,657,1161 3 | 35,61,971300,0,194,147,94,168,174,214,240,282,41,0,309,274,101,93,254,523,777 4 | 35,61,971100,0,151,176,65,151,27,208,240,109,13,0,149,242,122,57,215,355,570 5 | 35,61,971000,0,512,495,349,375,265,815,706,374,101,76,643,770,374,133,558,1438,1996 -------------------------------------------------------------------------------- /demos/input_data/person_marginals.csv: -------------------------------------------------------------------------------- 1 | zone_id,age,age,age,age,race,race,race,race,sex,sex 2 | ,19 and under,20 to 35,35 to 60,above 60,asian,black,other,white,female,male 3 | 1,312,108,223,177,64,0,0,756,440,380 4 | 2,235,143,296,181,0,0,0,855,452,403 5 | 3,303,229,445,174,0,0,24,1127,565,586 6 | 4,215,77,356,189,0,0,29,808,389,448 7 | 5,506,539,619,262,0,0,0,1926,981,945 8 | 6,377,171,285,102,0,0,47,888,476,459 9 | 7,312,150,488,382,0,0,14,1318,681,651 10 | 8,246,100,229,242,0,0,0,817,337,480 11 | 9,218,182,203,185,0,0,6,782,411,377 12 | 10,52,75,150,227,0,0,22,482,206,298 13 | 11,490,314,617,721,21,82,14,2025,1062,1080 14 | 12,639,356,721,381,7,4,46,2040,1162,935 15 | 13,345,341,647,564,0,21,179,1697,895,1002 16 | 14,372,363,708,638,0,6,89,1986,1044,1037 17 | 15,361,281,624,528,6,0,141,1647,871,923 18 | 16,149,92,67,157,0,0,0,465,212,253 19 | 17,287,69,196,81,0,0,0,633,366,267 20 | 18,160,128,265,93,0,0,20,626,366,280 21 | 19,418,158,313,198,0,0,0,1087,546,541 22 | 20,238,151,495,327,0,0,132,1079,748,463 23 | 21,272,133,203,279,0,0,0,887,401,486 -------------------------------------------------------------------------------- /demos/input_data/pop_marginals_500.csv: -------------------------------------------------------------------------------- 1 | state,county,tract,bg,sex1,sex2,age1,age2,age3,race1,race2,race3,pop 2 | 35,61,971400,0,1632,1666,983,1997,318,1738,1289,271,3298 3 | 35,61,971300,0,1052,1021,518,1285,270,975,895,203,2073 4 | 35,61,971100,0,822,773,442,970,183,912,622,61,1595 5 | 35,61,971000,0,2232,2491,1085,2528,1110,2183,2313,227,4723 -------------------------------------------------------------------------------- /demos/simple_synthesis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from synthpop.synthesizer import synthesize, enable_logging\n", 10 | "import synthpop.categorizer as cat\n", 11 | "\n", 12 | "import multiprocessing\n", 13 | "import os\n", 14 | "import seaborn as sns\n", 15 | "from functools import partial\n", 16 | "import pandas as pd\n", 17 | "\n", 18 | "import synthpop.zone_synthesizer as zs" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "hh_marginal_file = 'input_data/hh_marginals.csv'\n", 28 | "person_marginal_file = 'input_data/person_marginals.csv'\n", 29 | "hh_sample_file = 'input_data/household_sample.csv'\n", 30 | "person_sample_file = 'input_data/person_sample.csv'" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 3, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "hh_marg, p_marg, hh_sample, p_sample, xwalk = zs.load_data(hh_marginal_file, person_marginal_file, hh_sample_file, person_sample_file)" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 4, 45 | "metadata": {}, 46 | "outputs": [ 47 | { 48 | "name": "stderr", 49 | "output_type": "stream", 50 | "text": [ 51 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 52 | " adj = constraint / (column * weights).sum()\n" 53 | ] 54 | }, 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "Drawing 254 households\n" 60 | ] 61 | }, 62 | { 63 | "name": "stderr", 64 | "output_type": "stream", 65 | "text": [ 66 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 67 | " adj = constraint / (column * weights).sum()\n" 68 | ] 69 | }, 70 | { 71 | "name": "stdout", 72 | "output_type": "stream", 73 | "text": [ 74 | "Drawing 306 households\n" 75 | ] 76 | }, 77 | { 78 | "name": "stderr", 79 | "output_type": "stream", 80 | "text": [ 81 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 82 | " adj = constraint / (column * weights).sum()\n" 83 | ] 84 | }, 85 | { 86 | "name": "stdout", 87 | "output_type": "stream", 88 | "text": [ 89 | "Drawing 356 households\n" 90 | ] 91 | }, 92 | { 93 | "name": "stderr", 94 | "output_type": "stream", 95 | "text": [ 96 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 97 | " adj = constraint / (column * weights).sum()\n" 98 | ] 99 | }, 100 | { 101 | "name": "stdout", 102 | "output_type": "stream", 103 | "text": [ 104 | "Drawing 390 households\n" 105 | ] 106 | }, 107 | { 108 | "name": "stderr", 109 | "output_type": "stream", 110 | "text": [ 111 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 112 | " adj = constraint / (column * weights).sum()\n" 113 | ] 114 | }, 115 | { 116 | "name": "stdout", 117 | "output_type": "stream", 118 | "text": [ 119 | "Drawing 601 households\n" 120 | ] 121 | }, 122 | { 123 | "name": "stderr", 124 | "output_type": "stream", 125 | "text": [ 126 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 127 | " adj = constraint / (column * weights).sum()\n" 128 | ] 129 | }, 130 | { 131 | "name": "stdout", 132 | "output_type": "stream", 133 | "text": [ 134 | "Drawing 324 households\n" 135 | ] 136 | }, 137 | { 138 | "name": "stderr", 139 | "output_type": "stream", 140 | "text": [ 141 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 142 | " adj = constraint / (column * weights).sum()\n" 143 | ] 144 | }, 145 | { 146 | "name": "stdout", 147 | "output_type": "stream", 148 | "text": [ 149 | "Drawing 556 households\n" 150 | ] 151 | }, 152 | { 153 | "name": "stderr", 154 | "output_type": "stream", 155 | "text": [ 156 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 157 | " adj = constraint / (column * weights).sum()\n" 158 | ] 159 | }, 160 | { 161 | "name": "stdout", 162 | "output_type": "stream", 163 | "text": [ 164 | "Drawing 342 households\n" 165 | ] 166 | }, 167 | { 168 | "name": "stderr", 169 | "output_type": "stream", 170 | "text": [ 171 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 172 | " adj = constraint / (column * weights).sum()\n" 173 | ] 174 | }, 175 | { 176 | "name": "stdout", 177 | "output_type": "stream", 178 | "text": [ 179 | "Drawing 273 households\n" 180 | ] 181 | }, 182 | { 183 | "name": "stderr", 184 | "output_type": "stream", 185 | "text": [ 186 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 187 | " adj = constraint / (column * weights).sum()\n" 188 | ] 189 | }, 190 | { 191 | "name": "stdout", 192 | "output_type": "stream", 193 | "text": [ 194 | "Drawing 228 households\n", 195 | "Drawing 857 households\n", 196 | "Drawing 748 households\n" 197 | ] 198 | }, 199 | { 200 | "name": "stderr", 201 | "output_type": "stream", 202 | "text": [ 203 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 204 | " adj = constraint / (column * weights).sum()\n" 205 | ] 206 | }, 207 | { 208 | "name": "stdout", 209 | "output_type": "stream", 210 | "text": [ 211 | "Drawing 744 households\n" 212 | ] 213 | }, 214 | { 215 | "name": "stderr", 216 | "output_type": "stream", 217 | "text": [ 218 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 219 | " adj = constraint / (column * weights).sum()\n" 220 | ] 221 | }, 222 | { 223 | "name": "stdout", 224 | "output_type": "stream", 225 | "text": [ 226 | "Drawing 953 households\n" 227 | ] 228 | }, 229 | { 230 | "name": "stderr", 231 | "output_type": "stream", 232 | "text": [ 233 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 234 | " adj = constraint / (column * weights).sum()\n" 235 | ] 236 | }, 237 | { 238 | "name": "stdout", 239 | "output_type": "stream", 240 | "text": [ 241 | "Drawing 719 households\n" 242 | ] 243 | }, 244 | { 245 | "name": "stderr", 246 | "output_type": "stream", 247 | "text": [ 248 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 249 | " adj = constraint / (column * weights).sum()\n" 250 | ] 251 | }, 252 | { 253 | "name": "stdout", 254 | "output_type": "stream", 255 | "text": [ 256 | "Drawing 185 households\n" 257 | ] 258 | }, 259 | { 260 | "name": "stderr", 261 | "output_type": "stream", 262 | "text": [ 263 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 264 | " adj = constraint / (column * weights).sum()\n" 265 | ] 266 | }, 267 | { 268 | "name": "stdout", 269 | "output_type": "stream", 270 | "text": [ 271 | "Drawing 183 households\n" 272 | ] 273 | }, 274 | { 275 | "name": "stderr", 276 | "output_type": "stream", 277 | "text": [ 278 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 279 | " adj = constraint / (column * weights).sum()\n" 280 | ] 281 | }, 282 | { 283 | "name": "stdout", 284 | "output_type": "stream", 285 | "text": [ 286 | "Drawing 286 households\n" 287 | ] 288 | }, 289 | { 290 | "name": "stderr", 291 | "output_type": "stream", 292 | "text": [ 293 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 294 | " adj = constraint / (column * weights).sum()\n" 295 | ] 296 | }, 297 | { 298 | "name": "stdout", 299 | "output_type": "stream", 300 | "text": [ 301 | "Drawing 317 households\n" 302 | ] 303 | }, 304 | { 305 | "name": "stderr", 306 | "output_type": "stream", 307 | "text": [ 308 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 309 | " adj = constraint / (column * weights).sum()\n" 310 | ] 311 | }, 312 | { 313 | "name": "stdout", 314 | "output_type": "stream", 315 | "text": [ 316 | "Drawing 711 households\n" 317 | ] 318 | }, 319 | { 320 | "name": "stderr", 321 | "output_type": "stream", 322 | "text": [ 323 | "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n", 324 | " adj = constraint / (column * weights).sum()\n" 325 | ] 326 | }, 327 | { 328 | "name": "stdout", 329 | "output_type": "stream", 330 | "text": [ 331 | "Drawing 345 households\n" 332 | ] 333 | } 334 | ], 335 | "source": [ 336 | "all_households, all_persons, all_stats = zs.synthesize_all_zones(hh_marg, p_marg, hh_sample, p_sample, xwalk)" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": 5, 342 | "metadata": {}, 343 | "outputs": [], 344 | "source": [ 345 | "all_persons, all_households, all_stats = zs.multiprocess_synthesize(hh_marg, p_marg, hh_sample, p_sample, xwalk)" 346 | ] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": 6, 351 | "metadata": {}, 352 | "outputs": [ 353 | { 354 | "name": "stderr", 355 | "output_type": "stream", 356 | "text": [ 357 | "C:\\Users\\Juan\\Anaconda3\\envs\\synpop_py3\\lib\\site-packages\\matplotlib\\axes\\_axes.py:6448: UserWarning: The 'normed' kwarg is deprecated, and has been replaced by the 'density' kwarg.\n", 358 | " warnings.warn(\"The 'normed' kwarg is deprecated, and has been \"\n" 359 | ] 360 | }, 361 | { 362 | "data": { 363 | "text/plain": [ 364 | "" 365 | ] 366 | }, 367 | "execution_count": 6, 368 | "metadata": {}, 369 | "output_type": "execute_result" 370 | }, 371 | { 372 | "data": { 373 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAELCAYAAADZW/HeAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvFvnyVgAAIABJREFUeJzt3Xl8nGd56P3fNdr33bJsyZbkfYnjxLKdPSEhIaFpAiUhCSkE2hKgzSkc2p6GnsKBFN4D5+1bDj3NCaRsYclGKAfn4BDIZrLZsU3syLZsS5ZlW7L2zdqXmev9Y54x44lkjaTZfX0/H300era55/F4rrm36xZVxRhjjHFFuwDGGGNigwUEY4wxgAUEY4wxDgsIxhhjAAsIxhhjHBYQjDHGABYQjDHGOCwgGGOMASwgGGOMcSRHuwCzUVxcrJWVldEuhjHGxJW9e/d2qWrJTMfFVUCorKxkz5490S6GMcbEFRE5Ecxx1mRkjDEGsIBgjDHGYQHBGGMMYAHBGGOMwwKCMcYYIMiAICI3i8gREWkQkQen2H+NiPxeRCZF5A6/7e8RkX1+P6Mi8gFn3w9F5Ljfvo2he1nGGGNma8ZhpyKSBDwM3Ag0A7tFZJuqHvI77CTwceBv/c9V1ZeBjc51CoEG4Dd+h/ydqj4znxdgjDEmNIKZh7AFaFDVRgAReRK4HTgbEFS1ydnnOc917gCeU9XhOZfWGGNM2ATTZLQYOOX3d7OzbbbuBp4I2PY1EXlHRL4pImlzuKYxxpgQCaaGIFNs09k8iYiUARcBz/tt/gLQBqQCjwJ/Dzw0xbn3A/cDLFmyZDZPa4wJwuO7Tk677yNb7f/chSSYGkIzUOH3dzlwepbP82HgF6o64dugqq3qNQb8AG/T1Luo6qOqWqOqNSUlM6biMMYYM0fBBITdwAoRqRKRVLxNP9tm+Tz3ENBc5NQaEBEBPgAcmOU1jTHGhNCMAUFVJ4EH8Db31AFPq+pBEXlIRG4DEJHNItIM3Al8R0QO+s4XkUq8NYwdAZf+qYjUArVAMfDV+b8cY4wxcxVUtlNV3Q5sD9j2Jb/Hu/E2JU11bhNTdEKr6vWzKagxxpjwspnKxhhjAAsIxhhjHBYQjDHGABYQjDHGOCwgGGOMASwgGGOMcVhAMMYYA1hAMMYY47CAYIwxBrCAYIwxxmEBwRhjDGABwRhjjMMCgjHGGMACgjHGGIcFBGOMMYAFBGOMMQ4LCMYYYwALCMYYYxwWEIwxxgAWEIwxxjgsIBhjjAEgOZiDRORm4FtAEvBdVf16wP5rgP8JbADuVtVn/Pa5gVrnz5OqepuzvQp4EigEfg98VFXH5/dyjDGh9Piuk+fd/5GtSyJUEhMJM9YQRCQJeBi4BVgL3CMiawMOOwl8HHh8ikuMqOpG5+c2v+3fAL6pqiuAXuDP51B+Y4wxIRJMk9EWoEFVG51v8E8Ct/sfoKpNqvoO4AnmSUVEgOsBX03iMeADQZfaGGNMyAUTEBYDp/z+bna2BStdRPaIyE4R8X3oFwF9qjo5x2saY4wJsWD6EGSKbTqL51iiqqdFpBp4SURqgTPBXlNE7gfuB1iyxNorjTEmXIKpITQDFX5/lwOng30CVT3t/G4EXgEuAbqAfBHxBaRpr6mqj6pqjarWlJSUBPu0xhhjZimYgLAbWCEiVSKSCtwNbAvm4iJSICJpzuNi4ErgkKoq8DJwh3PofcAvZ1t4Y4wxoTNjQHDa+R8AngfqgKdV9aCIPCQiviGkm0WkGbgT+I6IHHROXwPsEZH9eAPA11X1kLPv74HPi0gD3j6F74XyhRljjJmdoOYhqOp2YHvAti/5Pd6Nt9kn8Lw3gIumuWYj3hFMJo7YuHRjEpfNVDbGGANYQDDGGOOwgGCMMQawgGCMMcZhAcEYYwxgAcEYY4zDAoIxxhjAAoIxxhiHBQRjjDGABQRjjDGOoFJXmAvLTOkpjDGJyWoIxhhjAAsIxhhjHBYQjDHGABYQjDHGOCwgGGOMASwgGGOMcVhAMMYYA1hAMMYY47CAYIwxBggyIIjIzSJyREQaROTBKfZfIyK/F5FJEbnDb/tGEXlTRA6KyDsicpffvh+KyHER2ef8bAzNSzLGGDMXM6auEJEk4GHgRqAZ2C0i21T1kN9hJ4GPA38bcPow8DFVrReRRcBeEXleVfuc/X+nqs/M90UYY4yZv2ByGW0BGlS1EUBEngRuB84GBFVtcvZ5/E9U1aN+j0+LSAdQAvRhjDEmpgTTZLQYOOX3d7OzbVZEZAuQChzz2/w1pynpmyKSNttrGmOMCZ1gAoJMsU1n8yQiUgb8GPiEqvpqEV8AVgObgULg76c5934R2SMiezo7O2fztMYYY2YhmIDQDFT4/V0OnA72CUQkF/gV8I+qutO3XVVb1WsM+AHepql3UdVHVbVGVWtKSkqCfVpjjDGzFExA2A2sEJEqEUkF7ga2BXNx5/hfAD9S1Z8F7CtzfgvwAeDAbApujDEmtGYMCKo6CTwAPA/UAU+r6kEReUhEbgMQkc0i0gzcCXxHRA46p38YuAb4+BTDS38qIrVALVAMfDWkr8wYY8ysBLVimqpuB7YHbPuS3+PdeJuSAs/7CfCTaa55/axKaowxJqxsprIxxhjAAoIxxhiHBQRjjDGABQRjjDEOCwjGGGMACwjGGGMcFhCMMcYAFhCMMcY4LCAYY4wBLCAYY4xxWEAwxhgDWEAwxhjjsIBgjDEGsIBgjDHGEVT6a2OMSTSP7zo57b6PbF0SwZLEDqshGGOMASwgGGOMcVhAMMYYA1hAMMYY47CAYIwxBrCAYIwxxhFUQBCRm0XkiIg0iMiDU+y/RkR+LyKTInJHwL77RKTe+bnPb/smEal1rvmvIiLzfznGGGPmasaAICJJwMPALcBa4B4RWRtw2Eng48DjAecWAv8N2ApsAf6biBQ4ux8B7gdWOD83z/lVGGOMmbdgaghbgAZVbVTVceBJ4Hb/A1S1SVXfATwB574P+K2q9qhqL/Bb4GYRKQNyVfVNVVXgR8AH5vtijDHGzF0wAWExcMrv72ZnWzCmO3ex83gu1zTGGBMGwQSEqdr2NcjrT3du0NcUkftFZI+I7Ons7AzyaY0xodI3PM6u492MTwY2AJhEE0wuo2agwu/vcuB0kNdvBq4LOPcVZ3t5MNdU1UeBRwFqamqCDUTGmHnqGRrn1wfbOHS6H4/C4OgkN6wpjXaxTBgFU0PYDawQkSoRSQXuBrYFef3ngZtEpMDpTL4JeF5VW4EBEbnMGV30MeCXcyi/MSZMtu1v4Wj7AFctL2b5gmxeP9bF6IQ72sUyYTRjQFDVSeABvB/udcDTqnpQRB4SkdsARGSziDQDdwLfEZGDzrk9wD/hDSq7gYecbQCfAb4LNADHgOdC+sqMMXM2ODZJQ8cgl1cXcfP6Mt63diGjEx52NnZHu2gmjIJKf62q24HtAdu+5Pd4N+c2Afkf933g+1Ns3wOsn01hjTGRUdvibSa6uCIfgMUFGawqzeHV+i4ury4iLSUpyiU04WAzlY0x77L/VB8Lc9NZmJt+dtv1qxcwMuFm1/Ge85xp4pkFBGPMOXqGxjnZM3y2duBTUZjJ0sJMalv6o1QyE24WEIwx53inuQ+ADeV579pXXZLF6b4RxqxzOSFZQDDGnGPfqT6WFmVSkJn6rn2VRVkocLJnOPIFM2FnAcEYc9aZ0Qk6BsZYV5Y75f4lhZkI0NQ9FNmCmYiwgGCMOauldwTw9hdMJS0liUX5GTR1Ww0hEVlAMMac1dw7jEugLC9j2mMqizI51TPMpNtSWSSaoOYhGHOhe3zXyfPu/8jWJREqSXg1946wICed1OTpvysuLcri9WPdtPSNRLBkkXWh/HsHshqCMQYAVaW5d4TygulrBwBLi7zNSdZslHgsIBhjAOgdnmBkws3iGQJCTnoKxdmpnLCO5YRjTUbmvA63nuFQ6xkqCjOpLs6iKDst2kUyYdLc6/3GX14wdYeyv8qiLA6ePoPHo7hctvptorCAYKY1NuHm579vZnjczZ4TvQjwiSurWL4gO9pFM2HQ3DtCskvOSVcxnSWFmew50UtT9xDVJfZ+SBQWEMy0Xm3oYmjczWeuXUZ6ShLfe62RV+s7EzYgzNSRmOiae0coy0snKYhv/AvzvEHjaPuABYQEYn0IZkoDoxO8Vt/F+sV5VBRmUpKTxtbqIuo7Buk4Mxrt4pkQ86hyum+ExUE0FwEsyElHgCNtg+EtmIkoCwhmSi8d7mDS4+GmtX9YIWtzZSHJLuENy4mfcDoHxhh3e2YcYeSTmuyiICuVo+0DYS6ZiSQLCOZdRsbd7G7qoaaykGK/TuTstGQursjn7ZO9jIxbcrNE4ptTsDg/uIAAUJqbzhELCAnFAoJ5l4bOQTwKlwSkPwa4YlkRE25ld5PlxE8kHWfGSBI55wvATEpz0zjeNcTYpH05SBQWEMy71LcPkJbsmnL4YVleBhUFGRw8bTnxE0nn4BhF2alBdSj7lOam4/YojZ02HyFRWEAw51BVGjoGWVaSPe2HQ3VJNi19I/bNMIF0DoxRkjO7OSalzvDUI23WbJQoLCCYcxzrHKJvZIIVpdMPJawqzsKjlhM/Ubg9Ss/QGCWznHRYnJ1KSpJYP0ICsYBgzvG7o50ArFiQM+0xSwszcQkc77KmgkTQMzSOR6F4ljWEZJeL6uJsjloNIWEEFRBE5GYROSIiDSLy4BT700TkKWf/LhGpdLbfKyL7/H48IrLR2feKc03fvgWhfGFmbn5X30lRViqFWe9eLcvHlxPfAkJi6BwYA5h1DQFg5cIcqyEkkBkDgogkAQ8DtwBrgXtEZG3AYX8O9KrqcuCbwDcAVPWnqrpRVTcCHwWaVHWf33n3+varakcIXo+Zh9EJNzsbu1lROn3twKeqKIvm3hEmLCd+3OscdALCLGsIAKtKs2nuHWFwbDLUxTJREEwNYQvQoKqNqjoOPAncHnDM7cBjzuNngBtEJLBH8h7gifkU1oTXnqZeRic8rAwiNUVVcRZuj3LK+hHiXufAGDnpyaSnJM363JXOl4d6qyUkhGACwmLglN/fzc62KY9R1UmgHygKOOYu3h0QfuA0F31xigACgIjcLyJ7RGRPZ2dnEMU1c/VWUw8u8X7Yz2RpURaC9SMkgq7BsVnNP/C3aqE3INhIo8QQTECY6oNaZ3OMiGwFhlX1gN/+e1X1IuBq5+ejUz25qj6qqjWqWlNSUhJEcc1cvdPcx/IF2aQF8U0xIzWJhXnpFhDinKrOacipT0VBJmnJLho6LKdRIggmIDQDFX5/lwOnpztGRJKBPMB/KuvdBNQOVLXF+T0API63acpEiapS29zPhvJ3z06eTlVxFid7hpn0WD9CvOoeGmdkwj2nDmUAl0uoKs6yLwYJIpiAsBtYISJVIpKK98N9W8Ax24D7nMd3AC+pqgKIiAu4E2/fA862ZBEpdh6nALcCBzBR09I3QvfQOBeX5wV9zpLCTCY9SseZsTCWzITTMeeb/VxrCADVJVk0WkBICDMGBKdP4AHgeaAOeFpVD4rIQyJym3PY94AiEWkAPg/4D029BmhW1Ua/bWnA8yLyDrAPaAH+fd6vxsxZbbM3FcVFs6ghlOV5E6G19ifuYuuJzvdBPtcaAvyhpmgjzuJfUAvkqOp2YHvAti/5PR7FWwuY6txXgMsCtg0Bm2ZZVhNG+5v7SUkS1pTlcOj0maDOKXJmqp7uH7V/zDh1rGOQZJeQl5ky52tUF2efHXFmi+XEN5upbABvh/LqhbmkJQc/9NAlQlleBq19tmBOvDrWOUhJThquqQf5BaWqxDsqzZLcxT8LCAaPR6lt6eeiWfQf+JTlpdPaP4LTZWTizLHOoTkPOfWpdoYpW8dy/LOAYGjqHmJgdHJWHco+ZXkZjE166B2eCEPJTDiNTrhp7h2eV4cyQH6mN9VJY5cNPY13FhAMtS3eDuXZDDn1KXMWW7eO5fhzonsYj86vQ9mnqjjLmowSgAUEw/5T/aSnuFgRRMqKQKW53sXWT1s/Qtw51jn/Iac+1cU29DQRWEAw1Lb0sW5RHslJs387pCa7KMlJsxpCHPLNQZhvHwJ4O5Y7B8YYGLWmw3hmAeEC5/Eoda0DrFuUO+dreDuWrYYQbxq7hliUl05q8vw/Bnwdy01dluwwnllAuMD5UhevKZtPQMigf2SCYUuBHFeOdQ6ybA7NhFPxzT+wjuX4ZgHhAneo1TsJbV4BId/pWD5jtYR4oaocc9bODoUlhZmI2FyEeGcB4QJ3uO0MIrDyPGsoz+RsCos+60eIF+1nxhgad7OsZOZU58FIT0mivCDDOpbjnAWEC1xd6xmqirLITA0qi8mUstOSyUlLpt2S3MWNRmeEUShTTVQVZ3PcmozimgWEC1xd68C8mot8SnPTaR+wJqN44RtyGqomI/B2LB/vHEqoWes7G7vZcaSDUz3DuD2J87qmM/evhSbuDYxOcLJnmA/XlM/7WqW5abzV1IPHo7hcc8+LYyLjWOcQWalJlObOf8ipT3VJFkPjbjoGxijNTQ/ZdaNlT1MP2/b7ln5ppzArlb+6bjkZqbNfajReWA3hAnbUWQd39cLQ1BAm3Epzr/UjxAPfCKNpVq6dE9/Sq4nQsdzcO8y2/adZXpLNg7es5kOXltM7NM7LRzqiXbSwsoBwATvU6g0Ia+YxB8FngfON8Igtth4XGjuHQtpcBIkz9HR4fJLHd50kOy2ZuzZXkJuewqalBWxaWsAbx7roHEjcvjILCBewutYz5KYnsyhv/tX7BU76g6MWEGLe8PgkLX0jZyeThUpZbjppyS6Ox3kNYXdTL30jE9yzZQlZaX9oVb9xbSkpSS6217ZGsXThZQHhAna49Qyry3JD0myQnpJEfmYKR9osIMQ6X5NOqCal+fjWV47noaeqyt4TPVQWZVJRmHnOvpz0FK5fvYAj7QO8Vt8VpRKGlwWEC5THoxxuG2BtCEYY+ZTmpFsNIQ6EY4SRT3VJVlyvi3Cie5iuwXFqlhZOuf/y6iIyUpL42d5TES5ZZFhAuECd7BlmeNzNmrKckF2zNDedY52DtrZujGvsHMIlsLQoc+aDZyne11fee6KX1GQX6xdPvTZIcpKLdYtyeeFQO6MT7giXLvwsIFygDrfNP2VFoNLcNCbcSlMcf0OcypG2M5zsSZykbcc6BykvyCQ9JfTDJ33rK8fj/RqbcFPb0s+GxXnnTfi3oTyfoXE3ryTgiKOgAoKI3CwiR0SkQUQenGJ/mog85ezfJSKVzvZKERkRkX3Oz7f9ztkkIrXOOf8qoRz/ZmZ0qHUAl8DK0tDWECBxRhp5VNle28pjb57g2zuO8Z0dx6hPgNd2rHMoZCkrAvnWV47HjuXaln7G3R5qKqduLvKpKs6iKCuVZ99JvM7lGQOCiCQBDwO3AGuBe0RkbcBhfw70qupy4JvAN/z2HVPVjc7Pp/22PwLcD6xwfm6e+8sws1XXeoaq4qyQfkv0LtYORxOgY3nC7eGnO0/wWkMXl1UXcuuGMs6MTvDDN5rieu0Hj0dp7AxdUrtAvpFL8Tj0dF9zH8XZaVQUZJz3uCSXcPP6hbxU18HweGJl+A2mhrAFaFDVRlUdB54Ebg845nbgMefxM8AN5/vGLyJlQK6qvqneee4/Aj4w69KbOatrPRPS5iKAlCQXlcVZCVFDeK2hi7q2Af54Qxm3XbyYK5YV81fv8c5SfXZ/a9ymZ2jpG2Fs0hPyEUY+vvWV461jeWzSzYmuYdaU5QQ16u7WDYsYmXDz0uHEajYKJiAsBvy71JudbVMeo6qTQD9Q5OyrEpG3RWSHiFztd3zzDNc0YXJmdILm3pGQBwSAVaU5HG2Pv2+H/kYn3LxW38XqhTlcvqz47PbM1GRuXFtKU/fQ2XWo441vSGio5yD4qyrO4licNRkd7xzCrRp0E+qWqkJKctL4VYI1GwUTEKYKl4Ffj6Y7phVYoqqXAJ8HHheR3CCv6b2wyP0iskdE9nR2dgZRXDMT31yBUI4w8llZmkNT91Bcj8B4s7GbkQk3169e8K59mysLWZSXznMH2hifjL+RNL5lM8NVQwAnyV2c1RCOdgyQkiQsLQxu5FWSS7hxbSmv1ncxGacjqqYSTEBoBir8/i4HTk93jIgkA3lAj6qOqWo3gKruBY4BK53j/TOqTXVNnPMeVdUaVa0pKSkJorhmJnUhWBRnOqsW5qAKDR3xWUvwrx2UF7z7w8Elwh9fvIj+kQl2N/VEoYTzc6xzkLyMFIqyUsP2HPG4vvLRdm+/ymzWFb9qeTGDY5Psb47P2uJUgnn1u4EVIlIlIqnA3cC2gGO2Afc5j+8AXlJVFZESp1MaEanG23ncqKqtwICIXOb0NXwM+GUIXo8JQl3rAHkZKSwMQ0ZKX5U7Xmcs7zxP7cBnaVEWi/MzePtkbwRLFhrHOgdZVpIV0qR2gXzNUfFSS+geHKNnaJwVsxxxd3l1ESLwekPizFqeMSA4fQIPAM8DdcDTqnpQRB4Skducw74HFIlIA96mId/Q1GuAd0RkP97O5k+rqu9r1WeA7wINeGsOz4XoNZkZeDuUg+s8m63KokxSk1xxOWNZVXmrqYflJdlT1g78XbIkn9P9o7T1x9caEI2dQyFdFGcqvhFMvhnRse6oU5tdOctmtIKsVNYvyuO1BAoIQa2HoKrbge0B277k93gUuHOK834O/Hyaa+4B1s+msGb+3B7lSNsAd2+pmPngOUhOcrFsQXZcjjQ62TNM3/AE711TOuOxG8rz2V7bytunerklrywCpZu/M6MTdAyMhW3Iqc/SoiySXEJ9nAwuqG8foDArlaLs2a8NceXyYr73WiNDY5PnJMKLVzZT+QJzsmeYkQl3WPoPfFaVZsfNh4G//c39JLskqPxO2WnJrFqYy75TfXGzktbZpHZhmpTmk5rsorIoMy76kSY9Hho7h+a8pvhVy4uZcHtrlonAAsIF5myHcggWxZnOyoU5tPSNxFWn4qTbQ21LP6sX5gQ9We+SinwGRifjpmkkEiOMfFYsyImLgNDcM8K428PyOdaaaioLSE128XqCZD+1gHCBqWs9Q5JLWDHHb0TBWOV0zsXTfIQ3jnUzNDbJhvL8oM9ZvTCHjJSkuOlcbuwaJNklLAlyaOV8LF+QzYmeYcYmY3v48fFub62psmhutab0lCQ2VxYkTD+CBYQLzMHTZ1hWEtqUFYFWng0I8dOPsG3/adKSXaxaGPxIE1/my8NtA3GR3fNo+yBLizJJmcXQyrlaUepNctfUFdtJ7o53DbEwN53MebT/X7m8mMNtAwmxkpoFhAtMbUv/tKl9Q2VxfgaZqUlxM/R0bNLN8wfaWLcod9YflqsX5jI26YmLOQlH2gZCsn52MHwd17HcbOT2KCe7h6mc56ztK5zZ7Dsbu0NRrKiygHABaT8zSufAGBeFOSC4XMKK0py4qSHsbOxhYGxyToFy2YIskl3CS3WxndNmeHySkz3Ds6oBzceykmxEoL4jdt8DLX3e/oOqeQaE9YtyyUpNYtdxCwgmjtQ6MyrDHRDAO9IoXgLCS3XtpKe45jQcMy05iarirJhPcubrzwlluvPzyUhNorwgg/oYriH4Js7NNyAkJ7moqSxkV2Ps1xJnYgHhAlLb0o9LYO2i8DcbrCzNoWtwnK7B2G5XVVVeOtLBlcuK59y2vnphDo1dQzTG8GgjX0ry1RGqIYB3pNGxmA4Ig5TkpJEdgvkDW6sLqe8YjPn3+0wsIFxAalv6WVaSTWZq+CfQ+JomYr0foaFjkFM9I1y/ZvpUFTNZ5bTLx3It4XDbAOkprnctHB9Oyxdk09g5FJPJ3ybdHk50D1M1x9FFgbZWeZM7v3U8vmsJFhAuILUt/RFpLgLOTu46dPpMRJ5vrnwf4u9ZNfeAUJiVysrS7JgOCEfbB1hZmkOSK3ILEy5fkM2428Op3thbUKiudYCxyfn3H/hsKM8jIyWJXXHesWwB4QLh61AO9wgjn6LsNMry0jlwOrYzQb54uIM1Zbksyj//KlkzuX51KW8d7+FMjE7GO9w2ELH+A58VzgS4WFx21NcBHKqAkJLkoqaygF1WQzDx4GyHcnlkAgLAukW5HIzhGkL/8AR7T/Ry/er5p1V/z6oSJj3KGzE4Qal7cIyuwbGI9h/AH2ZEN8Rg38rOxh6KslLJzUgJ2TW3VhVyuG2A3qHxkF0z0iwgXCBqW/oRIag8PaGyblEexzoHY3bd2R31nbg9yvWrZ05mN5NLlxaQk5bMjqOxt4iTL9FgpGsIueneFOuxltfK41F2N/WErHbgs7Xa248Qz7UECwgXiANOh3IkMzKuW5SLqre9Nha9fLiDwqxUNlYEn65iOilJLq5cXswrRzpjbr3laIww8lm1MOds/qxYcbhtgP6RiZAHhA3leaQlu+J6PoIFhAtEJDuUfXz9FYdisB/B7VFeOdLBdStLQtbReu2qElr7R2Nu7P2R9gHyM1MoyZl9euf5Wrcol/qOwZhaUjXU/Qc+aclJXLqkIK7nI1hAuAC09I3QMTDGhgj2HwCU5aVTkJnCgZbY+oYIsO9UL73DE7znPCujzda1K719ETuOxFaz0ZG2AVaVhmdBpJmsX5yH26MxNUlxV2MP5QUZ5GeGfhnRy6qLqGs7Q/9wbA4umIkFhAvAHifPzubKwog+r4iwfnEeB1tjr4bw0uEOklzCNStDt073ovwMVpZmx1Q/gsejHG0fjFjKikDrnEmQsTK4wLcqnm/eQKhtrS5ElbjIbTUVCwgXgN1NPWSnJUelDXntolyOtA0wPhlbk5NerOugZmkBeSEcZQLeWsJbx3sYGouNjvTGriEG55inKRQqCjLJSUvmQEtsfCmo7xikZ2icrdXh+XK0sSKf1GRX3Ca6s4BwAdh9vJdLluSTHIG0x4HWLcpjwq0xleTsdN8Ih9sGuGEes5Onc92qBYy7PTHzgbD/VB9ASDrO58LlEtbE0PBj38SxrVXhCQjpKUlsrMiP25FGFhASXP/wBEfaB9gS4eYin/W+JoMY6kfwzSi+PoT9Bz41lQVkpCTFTLPR/uY+slKTwr6O8vl414w4ExNLje483sPC3PSwLhJ0WVUhB0/3x+zyudYUAAAZkElEQVQkxfOxgJDg9p70flOpiVJAqCzKIis1KaZmLL98uIMlhZlh+ZBMS07iimVFsRMQTvWxoTw/oikrAq1flMfohCfqyf9UlV2NPWytLgxrB/tl1UV4FPY2xcdKev6CCggicrOIHBGRBhF5cIr9aSLylLN/l4hUOttvFJG9IlLr/L7e75xXnGvuc35C/3XNsLupl2SXRLXJ4KLyPN4+2ReV5w80PD7Jaw1dXL96Qdg+FK5dVcKJ7uGz6ZWjZXTCzaHWM1wcpX97n3WLY6Nj+XDbAF2DY1y5vDisz3PJkgJSkiRmmg1nY8aAICJJwMPALcBa4B4RWRtw2J8Dvaq6HPgm8A1nexfwx6p6EXAf8OOA8+5V1Y3OT+xmBotje5p6WL84j4zU8C2ZOZPNld4q9GAMdLT+7mgXY5Meblo7/9nJ07lupfe7zY4j0X1L17WeYcKtbKyIToeyz7KSbFKTXVHvWH6t3ptW5OoV4Q0IGalJXFyez8447EcIpoawBWhQ1UZVHQeeBG4POOZ24DHn8TPADSIiqvq2qp52th8E0kUk8rNjLlCjE272n+pnS5g60IJVU1mIR2FfDNQSfnOojbyMFDaH8Z4sKcqkqjgr6s1Gf+hQLohqOVKSXKxemBP1GsJrDV0sK8miLG9+iQyDcVl1EQda4q8fIZiAsBg45fd3s7NtymNUdRLoBwIH+n4IeFtV/VeQ+IHTXPRFmab+LiL3i8geEdnT2Rkb7bLxoraln3G3h5ql0f1AuHRJPi6J/tjsSbeHF+s6uGH1grAvNH/tyhLebOyO6gzdfaf6KM1NY2FeetTK4ONNdNgftbQeY5Nudh3v5uoVoZt3cj7XrCzBHaPJDs8nmP8VU31QB/6rnvcYEVmHtxnpU37773Wakq52fj461ZOr6qOqWqOqNSUlkfnHTBRvNHQjEr0OZZ+c9BRWL8xlz4noBoS3mnroH5ngpnXhay7yuXZVCaMTnqgumLK/uZ+Ly6Pbf+Bz0eJ8zoxORq1fZe+JXkYnPFwV5v4Dn0uW5MdsssPzCSYgNAMVfn+XA6enO0ZEkoE8oMf5uxz4BfAxVT3mO0FVW5zfA8DjeJumTAi9dLidjRX5FGaFfor+bG2uLODtk31MRHH1rN8cbCct2RXS2cnTuayqiNRkV9Q+EPqGxzneNcTGJbERELZUeWup0QqQr9V3kewSLlsWnhnKgXzJDnfEYLLD8wkmIOwGVohIlYikAncD2wKO2Ya30xjgDuAlVVURyQd+BXxBVV/3HSwiySJS7DxOAW4FDszvpRh/HQOj7G/u54YwjLWfi5rKQobH3VHLfKmq/PZQO1evKI7IEqIZqUlsrSrklSh1LO931r/YGCM1hGUl2RRnp0ZtwtZrDV1csiQ/JOsnB+vaVSWc7h+lIcaSHZ7PjAHB6RN4AHgeqAOeVtWDIvKQiNzmHPY9oEhEGoDPA76hqQ8Ay4EvBgwvTQOeF5F3gH1AC/DvoXxhF7pXDnu/mYYi138o1FR6vyHujtLY7IOnz9DSN8JNaxdG7DmvX72AY51DHIvC+Pudjd0ku4QNUR5y6iMibKkqZFdjd8S/MfcOjVPb0s9VyyPb5OyricZTs1FQPWuqul1VV6rqMlX9mrPtS6q6zXk8qqp3qupyVd2iqo3O9q+qapbf0NKNqtqhqkOquklVN6jqOlX9rKrGTn7cBPDS4Q7K8tJZUxadpGaByvIyKC/IOJtoL9K27T9Nskt4bxiHmwa6eb03+DxX2xqx5/TZcaSTTUsLIvqNeCZbq4o43T9Kc4TXWP5dfSeqcFWYh5sGWpyfwYoFsZXscCY2UzkBjU26ebW+k/eEcfLVXGyuLGR3U2/EvyG6Pcr/ebuF61YtiGh/SlleBpuWFrC9ti1izwne5sJDrWci0lcyG76EcpGesPX8wTZKctK4JAq1pWtXlrCrsSdmVw0MZAEhAb11vIehcXfM9B/4XFZdSNfgGIfbIpvo7vWGLjoGxviTSwNHS4ffLesXcqj1DE0RHF3z6lHvUMdrYywgrFyQQ35mSkT7EUbG3bx8uJP3rSvFFYX0HdeuKompZIczsYCQgF6s6yAt2cUVyyJbRZ6Jt8YCvz3UHtHn/cXbLeSmJ4clmd1MbrmoDIDnDkSulrDjaCfF2akRXT87GC6XsKWyMKJLTO442snIhJtb1pdF7Dn9bakqJCctOeK1xLmygJBg3B7l+YNtXLW8OKrpKqayICedjRX5EQ0IQ2OT/PpAG3+0YRHpKZG/H4vzM9hYkc9zByLTj+D2KK/Wd3LNipKofCOeydbqIk71jHC6LzL9CM8fbCM/MyVqs/XTkpO4ad1Cnj/Yxthk7HeTWkBIMDuOdtDaP8odm8qjXZQp3bi2lNqWflr7I/eBMDLhjkpzkc/7L1rIO839nOoZDvtzHWjpp3d4Iub6D3x86xBEopYwPunhhbp2blxTGvaZ6edz68VlDIxO8rujsT9r2QJCgnnirVMUZ6dyw5rYGG4ayJdU7oUI1RKe2n2KisKMqKbv8DVX/OLtlrA/1++cES2RHlETrDVluRRnp0aklvjGsS4GRifPjvaKlquWF5OfmcKz+wPn88YeCwgJpOPMKC8d7uBDm8pJTY7Nf9plJdlUFWfxmwh8IOw/1ceu4z3cd3llVEdbVRRmcvWKYp546ySTYZ6p/cLhDtYvzqU4OzZzSCa5hFvWl/HS4Y6wLzO6vbaV7LTksKe7nklKkotb1pfxQl07I+Ox3WwUm58aZk5+trcZt0e5q6Zi5oOjRES4cW0pOxu7w54J8tFXG8lJT+buLUvC+jzBuHfrUlr7R3n5SPjGpB9tH2D/qT4+sDF6zWPBuHVDGaMTHl48HL5Z3P0jEzy7v5X3X7QwKn1Hgf54QxnD4+6zq/XFKgsICcLjUZ7afYqtVYVUR3G5xGDcuLaUCbfychj/c5zqGea52lY+snVJTEzOeu+aBZTmpvHTXSfC9hxPvnWKlCThg5fEdkCoqSxkQU4a/zeMTSg/39vMyISbj11eGbbnmI2t1UUUZ6fxy33hbzacDwsICeK3de2c7Bnmnhj4NjyTS5cUsDg/g8d3nQzbc3zvteMkuYRPXFEVtueYjeQkF3dvXsKOo51h6VwenXDzH283c9O6hRTFaHORT5JLeP9FZbxytJOBMNQSPR7lJztPsLEin/WLo7s4kE+SS/jQpsW8UNcekcEFc2UBIQFMuj1849eHqS7J4tYN0RlvPRtJLuHjV1Sy63hPWFbR6hgY5andp7jt4sUxsRaAzz1bluAS4adhCIS/OdRO3/AEd2+O3eZCf398cRnjk56wdC6/caybxq4hPnb50pBfez4+fkUlLhF+8HpTtIsyLQsICeCpPado7BziwZtXkxzF4XWz8eHNFWSmJvH9146H/Nr/z6/qcHuUB65fHvJrz8fCvHTet66Un+w8Qdfg2MwnzMKTb52kvCCDK2NsMuJ0LqkoYFFeelhG3vzozSYKs1J5/0Wx9eWoLC+DWzeU8dTuk/SPxOZKavHx6WGmNTQ2yTd/W8/mygJujGDitvnKy0jhwzUVPPvOaTrOjIbsum8e6+b/7DvNp66tpqo4K2TXDZW/uWkVIxNu/vXF+pBd82j7AG8c6+aumoqYnIw2FZdL+JNLy3nlaCf17aFLZdLQMcALde3ctbkiJjqTA/3F1dUMjbt54q3wNZfOR/R728y8/K+XGugaHOM7H90U9NDKcLbdz8YnrqzksTeb+PHOE/zNTavmfb0Jt4cv/fIA5QUZ/OV1sVU78FlWks09Wyp4fNdJPn5F5bwHAKgqX3n2ILnpyXxka+z3H/n7s6uq+MHrx/nWi/X820cunff1vPfiEFlpyfzFVbHRdxRo/eI8Lq8u4oevN/FnV1bF3PBwCwhx7MW6dr694xgfrilnk9/Eq1j5wJ/J0qIsblxTyg9eb+KuzRWUF2TO63rfeqGe+o5BvndfTcyl7fD32RtW8ovft/CNXx/mOx+tmde1fn2gjdcbuvnKbetivjM5UGFWKvddUckjO47x1+0DrCydX6r2F+s6eLW+iy/eujam78Wnr1vGfd9/i+++1hhzX1xiKzyZoDV1DfG5p/axfnEuD92+PtrFmbMv3roWVeXvfvYOHs/c02L/fG8z//ZyA3fVVMTsLG2fkpw0Pn3tMp4/2M72eayVMDLu5qu/qmP1whzujbPagc8nr64mKzWZb70wvya0sUk3//SrQyxfkB1zncmBrl1Zws3rFvKtF+ojmgU3GBYQ4lDHwCif+vFeklzCI/duism20mBVFGbyj7eu5c3Gbh57s2lO19jZ2M2D//EOVywr4qsfjI/geP+11WxaWsDfPL1/TsuKqipff66Olr4RvnzburgZTBCoICuVj19Rya9qW9l7Yu6r6f3Lb45yonuYL966Nqp5i4L1ldvXkZrk4h9+URtTay7H/p0z56hvH+CDD7/ByZ5hHv7IpVQUzq+ZJRbcvbmC61aV8PXnDs96EfZn95/mz364myWFmTxy76a4+DAAbxbMR+69lNyMZD75oz30DI3P6vz/+UI9j715gj+7sorLqiOzcHy4fPKaaioKM/jMT/bSPocBBo/vOsl3ftfIvVuXxNwaENMpzU3n729ZzRvHusMyDHmu4uN/j0FV+eW+Fv7kkTcYd3t4+lOXRz1HS6iICP/jQxtYnJ/Bvd/dyc/2nJrxnOHxSR569hD/6Ym3WVOWy+OfvIy8zJQIlDZ0FuSm852P1tAxMMYdj7wRVE1BVfnfrzTwrRfruXNTOf/4R2siUNLwystI4d8/VsPg2CSf+vFeRieCz/fz8pEOvvjLA1y3qoSv3LYujKUMvY9sWcLVK4r50i8PzKvpMJSsUzkO7D/Vx9e21/HW8R42lOfxv++9dN4dsLFmQW46v/jLK/nMT/fyd8+8w6v1XfzpZUvZXFlwdvSUqtLSN8Ize5t57I0meocn+MSVlfzD+9eQkuSasTM9FkfhbKzI50d/toW/fuJtPvDw63zhltXcWVNB1hTpNg609PPlbQfZc6KXP9pQxtc/tAGXS+JmEMH5rF6Yy798+GI+/ZPfc/+P9/LPd2xgQe70kwon3R4eeeUY33qxnlWlOfzbRy6Nu2Yzl0v49p9u4mPff4u/fuJt0pJdUe//kmDar0TkZuBbQBLwXVX9esD+NOBHwCagG7hLVZucfV8A/hxwA3+tqs8Hc82p1NTU6J49e4J+cfHsdN8ILx7u4Ondp6ht6acgM4W/e99q7tpcQdIMY82j+QEx3w/dCbeHf/7NEX668ySDY5OU5qZRkpNGZkoyjV2DdA16m1beu2YBn752GTWVf1j4JJ5fd+fAGP/5qX281tBFZmoS71u3kMqiLLLSkmjtH+Wt4z0cON1PYWYq/+XmVdy56Q9zDuL5dQf66a4TPPTsIdJTkviH96/m/ReVkZP+h5rf6ISbV4508J3fNfL2yT5uu3gR/3T7+jnVDudz30L5us+MTnDvv+/i4Ol+Pnl1NZ9778qQj5ITkb2qOuOQthkDgogkAUeBG4FmYDdwj6oe8jvmL4ENqvppEbkb+KCq3iUia4EngC3AIuAFYKVz2nmvOZVEDAijE25O941wqneE+vYB6loHePtkL43O6IPVC3O4Z8sSPnDJYvIygnvTJ8IHxPD4JNtr23i9oYv+kQkGxyapKMjk4oo8rlhWxPIF7x6iGO+vW1XZc6KXn+9t5tcH2+gb9s5mTUt2ccmSfK5YVsx9V1S+630Q7687UGPnIH/7s/38/mQfSS7h4vI8stKSGR53c6RtgMGxSYqz0/jirWu4fR6ZXWMlIIA3KPz37XU88dYplhZl8qlrlvFHG8qC/j8/k1AGhMuBL6vq+5y/vwCgqv/d75jnnWPeFJFkoA0oAR70P9Z3nHPaea85lbkGhIHRCUSE9GTXnKqVqsqkRxmf9Hh/3N7fY35/j064GRl3MzzuZnh8kpEJN0NjbkbGJ73bJtwMj03SNzJB79A4vcPe3wMBOeFLctK4aLH3Q+/qFSWsLM1+14SzWG4iiGazTKJ9ME66PQyNuclITTrvBKZEe93gXQp0d1MPr9V3sbOxm0mPkpmaxJLCTP5oQxmXVxfx9J7msDx3MML1ut841sVDzx7icNsAqckurlxWxEXl+WxYnMdly4rmnLk32IAQzNUXA/69fM3A1umOUdVJEekHipztOwPO9YX0ma4ZMp99ct/ZPORJLm9gSEtJwiWgCh5VFG+WRMW7TZ1tbo8y7vYw15FhgneBjJRkF2nJLjJSkshKS6IwK5Xyggyy0pLJz0ghPzOVkpy0c/7B957onddQvGiI5WAVTvPtv7hQ79t0klzCZdVFcT+CarauWFbMc5+9mtqWfn6+t5k3G7vZcbQTj8ILn79myppxKAUTEKZqsA78eJzumOm2T/V1Z8qPXBG5H7jf+XNQRI5MU85AxUAsL2Jq5ZufuCrfvVEsyDRCcv/C+Lpi+t/33iiUb8U3ZnV4YPmCmq0XTEBoBvxz6pYDgSkKfcc0O01GeUDPDOfOdE0AVPVR4NEgynkOEdkTTBUpWqx882Plmx8r3/wkavmCaVDfDawQkSoRSQXuBrYFHLMNuM95fAfwkno7J7YBd4tImohUASuAt4K8pjHGmAiasYbg9Ak8ADyPd4jo91X1oIg8BOxR1W3A94Afi0gD3prB3c65B0XkaeAQMAn8laq6Aaa6ZuhfnjHGmGAF1WWtqtuB7QHbvuT3eBS4c5pzvwZ8LZhrhtism5kizMo3P1a++bHyzU9Cli+oiWnGGGMSX3zN9TbGGBM2CRMQRKRJRGpFZJ+I7HG2FYrIb0Wk3vldMNN1Ily+L4tIi7Ntn4i8P4rlyxeRZ0TksIjUicjlMXb/pipfTNw/EVnlV4Z9InJGRD4XK/fvPOWLifvnlPE/i8hBETkgIk+ISLoz6GSXc/+ecgagxFL5figix/3u38Yolu+zTtkOisjnnG2zfv8lTJORiDQBNara5bftfwA9qvp1EXkQKFDVv4+h8n0ZGFTVf45GmfyJyGPAq6r6Xec/XibwD8TO/ZuqfJ8jRu6fj3hTvbTgnWj5V8TI/ZumfJ8gBu6fiCwGXgPWquqIMxBlO/B+4D9U9UkR+TawX1UfiaHyXQf8X1V9JtJlCijfeuBJvCmCxoFfA58BPsks338JU0OYxu3AY87jx4APRLEsMUtEcoFr8I4WQ1XHVbWPGLl/5ylfLLoBOKaqJ4iR+xfAv3yxJBnIEO88pkygFbge8H3YRvv+BZZvynlTUbIG2Kmqw6o6CewAPsgc3n+JFBAU+I2I7BXv7GaAUlVtBXB+L4ha6aYuH8ADIvKOiHw/ik0y1UAn8AMReVtEvisiWcTO/ZuufBAb98/f3XgTOkLs3D9//uWDGLh/qtoC/DNwEm8g6Af2An3OBxycm/Ym6uVT1d84u7/m3L9vijfrczQcAK4RkSIRycRbs6pgDu+/RAoIV6rqpcAtwF+JyDXRLlCAqcr3CLAM2Ij3jfb/RalsycClwCOqegkwhJOYMEZMV75YuX8AOE1ZtwE/i2Y5pjNF+WLi/jmB6HagCm9W5Cy8/08CRaV9e6ryicifAl8AVgObgUIgKs2BqloHfAP4Ld7mov14533NWsIEBFU97fzuAH6Btz2tXUTKAJzfHbFUPlVtV1W3qnqAf3fKHA3NQLOq7nL+fgbvB3Cs3L8pyxdD98/nFuD3qtru/B0r98/nnPLF0P17L3BcVTtVdQL4D+AKIN9pooHzpLeJVvlUtVW9xoAfEMX3n6p+T1UvVdVr8E4OrmcO77+ECAgikiUiOb7HwE14q1H+KTXuA34ZS+Xz/WM5Poi3zBGnqm3AKRFZ5Wy6Ae/s8pi4f9OVL1bun597OLc5Jibun59zyhdD9+8kcJmIZIqI8If338t4U+FAdO/fVOWr8/uwFbzt81F7/4nIAuf3EuBP8P47z/r9lxCjjESkGu+3bvA2Lzyuql8TkSLgaWAJ3n/UO1V1dqu4h7d8P8ZbXVegCfiUr80vCmXcCHwXSAUa8Y5AcRED9+885ftXYuf+ZeJN6V6tqv3Otph4/52nfLH0/vsKcBfepo63gb/A22fwJN7mmLeBP3W+jcdK+Z7Du+6LAPuAT6vqYJTK9yreJQcmgM+r6otzef8lREAwxhgzfwnRZGSMMWb+LCAYY4wBLCAYY4xxWEAwxhgDWEAwxhjjsIBgjDEGsIBgzJyJyHUicsUMx3xaRD42xfZKEYn2RDpjzhHUEprGmCldBwwCb0x3gKp+O2KlMWaeLCAYE8D5Rv+3eGfwvoN3tuc/4p0l3Q3cC2QAnwbcTqKz/6Sqr05xrS/jrDkgIpuA7wPDePPrGxNTLCAY40dE1gH/FW922i4RKcQbGC5TVRWRvwD+i6r+jbNoy2wWmPkB3sCxQ0T+3/C8AmPmzgKCMee6HnjGt7KdqvaIyEXAU04ys1Tg+GwvKiJ5QL6q7nA2/ZipUzwbEzXWqWzMuYR3593/X8C/qepFwKeA9BBd15iYYgHBmHO9CHzYyRSJ02SUh3cdYvhDOmGAASAnmIs6S372i8hVzqZ7Q1NcY0LHAoIxflT1IPA1YIeI7Af+Bfgy8DMnxXCX3+HPAh8UkX0icnUQl/8E8LCIvAmMhLbkxsyfpb82xhgDWA3BGGOMw0YZGRMCIvJfgTsDNv9MVb8WjfIYMxfWZGSMMQawJiNjjDEOCwjGGGMACwjGGGMcFhCMMcYAFhCMMcY4/n96stpJgf0UigAAAABJRU5ErkJggg==\n", 374 | "text/plain": [ 375 | "
" 376 | ] 377 | }, 378 | "metadata": {}, 379 | "output_type": "display_data" 380 | } 381 | ], 382 | "source": [ 383 | "sns.distplot(all_persons.cat_id)" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": null, 389 | "metadata": {}, 390 | "outputs": [], 391 | "source": [] 392 | } 393 | ], 394 | "metadata": { 395 | "kernelspec": { 396 | "display_name": "Python 3", 397 | "language": "python", 398 | "name": "python3" 399 | }, 400 | "language_info": { 401 | "codemirror_mode": { 402 | "name": "ipython", 403 | "version": 3 404 | }, 405 | "file_extension": ".py", 406 | "mimetype": "text/x-python", 407 | "name": "python", 408 | "nbconvert_exporter": "python", 409 | "pygments_lexer": "ipython3", 410 | "version": "3.6.4" 411 | } 412 | }, 413 | "nbformat": 4, 414 | "nbformat_minor": 1 415 | } 416 | -------------------------------------------------------------------------------- /demos/synthesize.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | from synthpop.recipes.starter2 import Starter 5 | from synthpop.synthesizer import synthesize_all 6 | import pandas as pd 7 | import os 8 | import sys 9 | 10 | state_abbr = sys.argv[1] 11 | county_name = sys.argv[2] 12 | 13 | starter = Starter(os.environ["CENSUS"], state_abbr, county_name) 14 | 15 | if len(sys.argv) > 3: 16 | state, county, tract, block_group = sys.argv[3:] 17 | 18 | indexes = [pd.Series( 19 | [state, county, tract, block_group], 20 | index=["state", "county", "tract", "block group"])] 21 | else: 22 | indexes = None 23 | 24 | households, people, fit_quality = synthesize_all(starter, indexes=indexes) 25 | 26 | for geo, qual in fit_quality.items(): 27 | print ('Geography: {} {} {} {}'.format( 28 | geo.state, geo.county, geo.tract, geo.block_group)) 29 | # print ' household chisq: {}'.format(qual.household_chisq) 30 | # print ' household p: {}'.format(qual.household_p) 31 | print (' people chisq: {}'.format(qual.people_chisq)) 32 | print (' people p: {}'.format(qual.people_p)) 33 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | # Additional requirements for development and testing 2 | 3 | # testing 4 | coveralls 5 | pytest 6 | pytest-cov<2.10 # 2.10 raised errors in Travis 7 | pycodestyle 8 | 9 | # building documentation 10 | numpydoc 11 | sphinx 12 | sphinx_rtd_theme 13 | -------------------------------------------------------------------------------- /scripts/dl_and_slice_pums.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import zipfile 4 | import urllib, urllib2 5 | import pandas as pd, numpy as np 6 | from bs4 import BeautifulSoup 7 | from spandex import TableLoader 8 | 9 | loader = TableLoader() 10 | 11 | soup = BeautifulSoup(urllib2.urlopen("http://www2.census.gov/acs2013_5yr/pums/")) 12 | 13 | tags = soup.find_all(href=re.compile("csv_h..\.zip")) 14 | hpums_links = [] 15 | for t in tags: 16 | hpums_links.append(t['href']) 17 | 18 | tags = soup.find_all(href=re.compile("csv_p..\.zip")) 19 | ppums_links = [] 20 | for t in tags: 21 | ppums_links.append(t['href']) 22 | 23 | pums_links = hpums_links + ppums_links 24 | for pums_file in pums_links: 25 | print pums_file 26 | pums_file_dl = urllib.URLopener() 27 | pums_file_dl.retrieve("http://www2.census.gov/acs2013_5yr/pums/%s" % pums_file, 28 | os.path.join(loader.get_path('pums'), pums_file)) 29 | 30 | for pums_file in pums_links: 31 | filepath = os.path.join(loader.get_path('pums'), pums_file) 32 | 33 | if os.path.exists(filepath): 34 | print 'Unzipping %s' % pums_file 35 | 36 | with zipfile.ZipFile(filepath, "r") as z: 37 | z.extractall(loader.get_path('pums')) 38 | 39 | for pums_file in ['ss13husa.csv', 'ss13husb.csv', 40 | 'ss13husc.csv', 'ss13husd.csv', 41 | 'ss13pusa.csv', 'ss13pusb.csv', 42 | 'ss13pusc.csv', 'ss13pusd.csv']: 43 | print 'Processing %s' % pums_file 44 | pums = pd.read_csv(os.path.join(loader.get_path('pums'), pums_file)) 45 | 46 | for state_id in np.unique(pums['ST']): 47 | print ' Processing pums for state %s' % state_id 48 | pum_state = pums[pums['ST'] == state_id] 49 | state_id = '{:>02}'.format(state_id) 50 | if pums_file[4] == 'h': 51 | pums_state_filename = 'puma_h_%s.csv' % (state_id) 52 | elif pums_file[4] == 'p': 53 | pums_state_filename = 'puma_p_%s.csv' % (state_id) 54 | pum_state.to_csv(os.path.join(loader.get_path('pums'), pums_state_filename), index = False) 55 | 56 | print ' Slicing up pums files by 2000 pumas' 57 | for puma00 in np.unique(pum_state['PUMA00']): 58 | if puma00 != -9: 59 | print puma00 60 | df = pum_state[pum_state['PUMA00'] == puma00] 61 | puma00 = '{:>05}'.format(puma00) 62 | if pums_file[4] == 'h': 63 | output_filename = 'puma00_h_%s_%s.csv' % (state_id, puma00) 64 | elif pums_file[4] == 'p': 65 | output_filename = 'puma00_p_%s_%s.csv' % (state_id, puma00) 66 | df.to_csv(os.path.join(loader.get_path('pums'), output_filename), index = False) 67 | 68 | print ' Slicing up pums files by 2010 pumas' 69 | for puma10 in np.unique(pum_state['PUMA10']): 70 | if puma10 != -9: 71 | print puma10 72 | df = pum_state[pum_state['PUMA10'] == puma10] 73 | puma10 = '{:>05}'.format(puma10) 74 | if pums_file[4] == 'h': 75 | output_filename = 'puma10_h_%s_%s.csv' % (state_id, puma10) 76 | elif pums_file[4] == 'p': 77 | output_filename = 'puma10_p_%s_%s.csv' % (state_id, puma10) 78 | df.to_csv(os.path.join(loader.get_path('pums'), output_filename), index = False) -------------------------------------------------------------------------------- /scripts/synth_example.py: -------------------------------------------------------------------------------- 1 | from synthpop.recipes.starter2 import Starter 2 | from synthpop.synthesizer import synthesize_all, enable_logging 3 | import os 4 | 5 | def synthesize_county(county): 6 | starter = Starter(os.environ["CENSUS"], "CO", county) 7 | synthetic_population = synthesize_all(starter) 8 | return synthetic_population 9 | 10 | synthesize_county('Gilpin County') 11 | -------------------------------------------------------------------------------- /scripts/tract_to_puma00_xref.py: -------------------------------------------------------------------------------- 1 | import os 2 | import urllib 3 | import zipfile 4 | from spandex import TableLoader 5 | from spandex.io import exec_sql 6 | from spandex.spatialtoolz import conform_srids, tag 7 | import pandas as pd, numpy as np 8 | 9 | import pandas.io.sql as sql 10 | def db_to_df(query): 11 | """Executes SQL query and returns DataFrame.""" 12 | conn = loader.database._connection 13 | return sql.read_frame(query, conn) 14 | 15 | loader = TableLoader() 16 | 17 | # Download puma 2000 geometry zip files 18 | for i in range(73): 19 | if i < 10: 20 | filename = 'p50%s_d00_shp.zip' % i 21 | else: 22 | filename = 'p5%s_d00_shp.zip' % i 23 | 24 | try: 25 | pumageom_file = urllib.URLopener() 26 | pumageom_file.retrieve("http://www2.census.gov/geo/tiger/PREVGENZ/pu/p500shp/%s" % filename, 27 | os.path.join(loader.get_path('puma_geom'), filename)) 28 | print 'Downloading %s' % filename 29 | except: 30 | continue 31 | 32 | # Unzip and add prj file to puma 2000 geometry 33 | for i in range(73): 34 | if i < 10: 35 | filename = 'p50%s_d00_shp.zip' % i 36 | else: 37 | filename = 'p5%s_d00_shp.zip' % i 38 | filepath = os.path.join(loader.get_path('puma_geom'), filename) 39 | 40 | if os.path.exists(filepath): 41 | print 'Unzipping and adding prj to %s' % filename 42 | 43 | with zipfile.ZipFile(filepath, "r") as z: 44 | z.extractall(loader.get_path('puma_geom')) 45 | 46 | # PUMA 2000 shapefile doesn't come with .prj file - create one 47 | shape_prjname = filename[:8] + '.prj' 48 | prj_filepath = os.path.join(loader.get_path('puma_geom'), shape_prjname) 49 | text_file = open(prj_filepath, "w") 50 | text_file.write('GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]') 51 | text_file.close() 52 | 53 | ##Next step- do the same for tracts 54 | tract_file = urllib.URLopener() 55 | tract_file.retrieve("http://www2.census.gov/geo/tiger/TIGER2010DP1/Tract_2010Census_DP1.zip", 56 | os.path.join(loader.get_path('tract2010_geom'), "Tract_2010Census_DP1.zip")) 57 | 58 | 59 | with zipfile.ZipFile(os.path.join(loader.get_path('tract2010_geom'), "Tract_2010Census_DP1.zip"), "r") as z: 60 | z.extractall(loader.get_path('tract2010_geom')) 61 | 62 | with loader.database.cursor() as cur: 63 | cur.execute(""" 64 | CREATE EXTENSION IF NOT EXISTS postgis; 65 | CREATE SCHEMA IF NOT EXISTS staging; 66 | """) 67 | loader.database.refresh() 68 | 69 | 70 | shapefiles = { 71 | 'staging.tracts10': 72 | 'tract2010_geom/Tract_2010Census_DP1.shp', 73 | } 74 | 75 | loader.load_shp_map(shapefiles) 76 | 77 | 78 | shapefiles = {} 79 | for i in range(73): 80 | if i < 10: 81 | filename = 'p50%s_d00.shp' % i 82 | else: 83 | filename = 'p5%s_d00.shp' % i 84 | filepath = os.path.join(loader.get_path('puma_geom'), filename) 85 | 86 | if os.path.exists(filepath): 87 | subfile_name = filename[:-4] 88 | shapefiles['staging.%s' % subfile_name] = 'puma_geom/%s' % filename 89 | 90 | loader.load_shp_map(shapefiles) 91 | 92 | 93 | conform_srids(loader.srid, schema=loader.tables.staging, fix=True) 94 | 95 | exec_sql("DROP table if exists staging.puma00;") 96 | 97 | sql_str = "" 98 | for i in range(73): 99 | if i < 10: 100 | filename = 'p50%s_d00.shp' % i 101 | else: 102 | filename = 'p5%s_d00.shp' % i 103 | filepath = os.path.join(loader.get_path('puma_geom'), filename) 104 | 105 | if os.path.exists(filepath): 106 | subfile_name = filename[:-4] 107 | sql_str = sql_str + 'select area, perimeter, puma5, name, geom from staging.%s' % subfile_name 108 | if i < 72: 109 | sql_str = sql_str + ' UNION ALL ' 110 | 111 | sql_str = 'with a as (' + sql_str + ') select * into staging.puma00 from a' 112 | exec_sql(sql_str) 113 | 114 | exec_sql('ALTER TABLE staging.puma00 ADD COLUMN gid BIGSERIAL PRIMARY KEY') 115 | 116 | exec_sql(""" 117 | CREATE INDEX puma00_gist ON staging.puma00 118 | USING gist (geom); 119 | """) 120 | 121 | loader.database.refresh() 122 | 123 | # Tag tracts with a parcel_id 124 | tag(loader.tables.staging.tracts10, 'puma00_id', loader.tables.staging.puma00, 'puma5') 125 | 126 | tract10_puma10_rel_file = urllib.URLopener() 127 | tract10_puma10_rel_file.retrieve("http://www2.census.gov/geo/docs/maps-data/data/rel/2010_Census_Tract_to_2010_PUMA.txt", 128 | os.path.join(loader.get_path('tract2010_geom'), 'tract10_puma10_rel_file.csv')) 129 | 130 | tract10_puma10_rel = pd.read_csv(os.path.join(loader.get_path('tract2010_geom'), 'tract10_puma10_rel_file.csv'), 131 | dtype={ 132 | "STATEFP": "object", 133 | "COUNTYFP": "object", 134 | "TRACTCE": "object", 135 | "PUMA5CE": "object" 136 | }) 137 | tract10_puma00 = db_to_df('select geoid10, namelsad10, puma00_id from staging.tracts10;') 138 | 139 | ##Need statefp/countyfp/tractce columns on tracts (split from geoid) 140 | tract10_puma00['STATEFP'] = tract10_puma00.geoid10.str.slice(0,2) 141 | tract10_puma00['COUNTYFP'] = tract10_puma00.geoid10.str.slice(2,5) 142 | tract10_puma00['TRACTCE'] = tract10_puma00.geoid10.str.slice(5,) 143 | 144 | print len(tract10_puma00) 145 | print len(tract10_puma10_rel) 146 | 147 | tract_puma_xref = pd.merge(tract10_puma10_rel, tract10_puma00, 148 | left_on = ['STATEFP', 'COUNTYFP', 'TRACTCE'], right_on = ['STATEFP', 'COUNTYFP', 'TRACTCE']) 149 | 150 | tract_puma_xref = tract_puma_xref.rename(columns = {'STATEFP':'statefp', 'COUNTYFP':'countyfp', 'TRACTCE':'tractce', 151 | 'PUMA5CE':'puma10_id'}) 152 | 153 | tract_puma_xref = tract_puma_xref[['statefp', 'countyfp', 'tractce', 'puma10_id', 'puma00_id']] 154 | 155 | tract_puma_xref.to_csv('tract10_to_puma.csv', index = False) 156 | 157 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [pycodestyle] 2 | # these are the standard ignores plus E402, E722, and E741, which weren't enforced when 3 | # the codebase was first written 4 | ignore = E121,E123,E126,E133,E226,E241,E242,E704,W503,W504,W505,E402,E722,E741 5 | max-line-length = 100 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name='synthpop', 5 | version='0.1.1', 6 | description='Population Synthesis', 7 | author='UrbanSim Inc.', 8 | author_email='udst@urbansim.com', 9 | license='BSD', 10 | url='https://github.com/udst/synthpop', 11 | classifiers=[ 12 | 'Development Status :: 4 - Beta', 13 | 'Programming Language :: Python :: 2.7', 14 | 'Programming Language :: Python :: 3.5', 15 | 'Programming Language :: Python :: 3.6', 16 | 'Programming Language :: Python :: 3.7' 17 | ], 18 | packages=find_packages(exclude=['*.tests']), 19 | install_requires=[ 20 | 'census>=0.5', 21 | 'numexpr>=2.3.1', 22 | 'numpy>=1.16.5 ', 23 | 'pandas>=0.15.0', 24 | 'scipy>=0.13.3', 25 | 'us>=0.8' 26 | ] 27 | ) 28 | -------------------------------------------------------------------------------- /synthpop/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.1.1' 2 | version = __version__ 3 | -------------------------------------------------------------------------------- /synthpop/categorizer.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | 3 | import numpy as np 4 | import pandas as pd 5 | 6 | 7 | # TODO DOCSTRINGS!! 8 | def categorize(df, eval_d, index_cols=None): 9 | cat_df = pd.DataFrame(index=df.index) 10 | 11 | for index, expr in eval_d.items(): 12 | cat_df[index] = df.eval(expr) 13 | 14 | if index_cols is not None: 15 | cat_df[index_cols] = df[index_cols] 16 | cat_df = cat_df.set_index(index_cols) 17 | 18 | cat_df.columns = pd.MultiIndex.from_tuples(cat_df.columns, 19 | names=['cat_name', 'cat_value']) 20 | 21 | cat_df = cat_df.sort_index(axis=1) 22 | 23 | return cat_df 24 | 25 | 26 | def sum_accross_category(df, subtract_mean=True): 27 | """ 28 | This is a convenience function to sum the categorical values for each 29 | category - the mean across each category is then subtracted so all the 30 | cells in the table should be close to zero. The reason why it's not 31 | exactly zero is because of rounding errors in the scaling of any tract 32 | variables down to block group variables 33 | 34 | """ 35 | df = df.stack(level=1).fillna(0).groupby(level=0).sum() 36 | if subtract_mean: 37 | df = df.sub(df.mean(axis=1), axis="rows") 38 | return df 39 | 40 | 41 | def category_combinations(index): 42 | """ 43 | THis method converts a hierarchical multindex of category names and 44 | category values into the cross-product of all possible 45 | category combinations. 46 | """ 47 | d = {} 48 | for cat_name, cat_value in index: 49 | d.setdefault(cat_name, []) 50 | d[cat_name].append(cat_value) 51 | for cat_name in list(d): 52 | if len(d[cat_name]) == 1: 53 | del d[cat_name] 54 | df = pd.DataFrame(list(itertools.product(*list(d.values())))) 55 | df.columns = cols = list(d.keys()) 56 | df.index.name = "cat_id" 57 | df = df.reset_index().set_index(cols) 58 | return df 59 | 60 | 61 | def joint_distribution(sample_df, category_df, mapping_functions=None): 62 | 63 | # set counts to zero 64 | category_df["frequency"] = 0 65 | 66 | category_names = list(category_df.index.names) 67 | if mapping_functions: 68 | for name in category_names: 69 | assert name in mapping_functions, "Every category needs to have " \ 70 | "mapping function with the same a " \ 71 | "name to define that category for " \ 72 | "the pums sample records" 73 | sample_df[name] = sample_df.apply(mapping_functions[name], 74 | axis=1).astype('category') 75 | 76 | category_df["frequency"] = sample_df.groupby(category_names).size() 77 | category_df["frequency"] = category_df["frequency"].fillna(0) 78 | 79 | # do the merge to add the category id 80 | sample_df = pd.merge(sample_df, category_df[["cat_id"]], 81 | left_on=category_names, right_index=True) 82 | 83 | return sample_df, category_df 84 | 85 | 86 | def _frequency_table(sample_df, category_ids): 87 | """ 88 | Take the result that comes out of the method above and turn it in to the 89 | frequencytable format used by the ipu 90 | """ 91 | df = sample_df.groupby(['hh_id', 'cat_id']).size().unstack().fillna(0) 92 | 93 | # need to manually add in case we missed a whole cat_id in the sample 94 | missing_ids = list(set(category_ids) - set(df.columns)) 95 | if missing_ids: 96 | missing_df = pd.DataFrame( 97 | data=np.zeros((len(df), len(missing_ids))), 98 | index=df.index, 99 | columns=missing_ids) 100 | df = df.merge(missing_df, left_index=True, right_index=True) 101 | 102 | assert len(df.columns) == len(category_ids) 103 | assert df.sum().sum() == len(sample_df) 104 | 105 | return df 106 | 107 | 108 | def frequency_tables(persons_sample_df, households_sample_df, 109 | person_cat_ids, household_cat_ids): 110 | 111 | households_sample_df.index.name = "hh_id" 112 | households_sample_df = households_sample_df.reset_index().\ 113 | set_index("serialno") 114 | 115 | h_freq_table = _frequency_table(households_sample_df, 116 | household_cat_ids) 117 | 118 | persons_sample_df = pd.merge(persons_sample_df, 119 | households_sample_df[["hh_id"]], 120 | left_on=["serialno"], right_index=True) 121 | 122 | p_freq_table = _frequency_table(persons_sample_df, 123 | person_cat_ids) 124 | p_freq_table = p_freq_table.reindex(h_freq_table.index).fillna(0) 125 | assert len(h_freq_table) == len(p_freq_table) 126 | 127 | h_freq_table = h_freq_table.sort_index(axis=1) 128 | p_freq_table = p_freq_table.sort_index(axis=1) 129 | 130 | return h_freq_table, p_freq_table 131 | -------------------------------------------------------------------------------- /synthpop/census_helpers.py: -------------------------------------------------------------------------------- 1 | import census 2 | import pandas as pd 3 | import numpy as np 4 | import us 5 | import requests 6 | from .config import synthpop_config, geog_changes_path 7 | 8 | # code to retry when census api fails 9 | sess = requests.Session() 10 | adapter = requests.adapters.HTTPAdapter(max_retries=100) 11 | sess.mount('https://', adapter) 12 | 13 | # TODO DOCSTRING!! 14 | 15 | 16 | class Census: 17 | 18 | def __init__(self, key, acsyear=2016): 19 | self.c = census.Census(key, session=sess) 20 | self.base_url = synthpop_config(acsyear).pums_storage() 21 | self.support_files = geog_changes_path(acsyear).geog_change_storage() 22 | self.acsyear_files = acsyear 23 | self.pums_relationship_file_url = self.support_files + "tract10_to_puma.csv" 24 | self.pums_relationship_df = None 25 | self.pums10_population_base_url = \ 26 | self.base_url + "puma10_p_%s_%s.csv" 27 | self.pums10_household_base_url = \ 28 | self.base_url + "puma10_h_%s_%s.csv" 29 | self.pums00_population_base_url = \ 30 | self.base_url + "puma00_p_%s_%s.csv" 31 | self.pums00_household_base_url = \ 32 | self.base_url + "puma00_h_%s_%s.csv" 33 | self.pums_population_state_base_url = \ 34 | self.base_url + "puma_p_%s.csv" 35 | self.pums_household_state_base_url = \ 36 | self.base_url + "puma_h_%s.csv" 37 | self.fips_url = self.base_url + "national_county.txt" 38 | self.fips_df = None 39 | self.pums_cache = {} 40 | 41 | # df1 is the disaggregate data frame (e.g. block groups) 42 | # df2 is the aggregate data frame (e.g. tracts) 43 | # need to scale down df2 variables to df1 level totals 44 | def _scale_and_merge(self, df1, tot1, df2, tot2, columns_to_scale, 45 | merge_columns, suffixes): 46 | df = pd.merge(df1, df2, left_on=merge_columns, right_on=merge_columns, 47 | suffixes=suffixes) 48 | 49 | # going to scale these too so store current values 50 | tot2, tot1 = df[tot2], df[tot1] 51 | # if agg number if 0, disaggregate should be 0 52 | # note this is filled by fillna below 53 | assert np.all(tot1[tot2 == 0] == 0) 54 | 55 | for col in columns_to_scale: 56 | df[col] = df[col] / tot2 * tot1 57 | # round? 58 | df[col] = df[col].fillna(0).astype('int') 59 | return df 60 | 61 | def block_group_query(self, census_columns, state, county, year=2016, 62 | tract=None, id=None): 63 | if id is None: 64 | id = "*" 65 | return self._query(census_columns, state, county, 66 | forstr="block group:%s" % id, 67 | tract=tract, year=year) 68 | 69 | def tract_query(self, census_columns, state, county, year=2016, 70 | tract=None): 71 | if tract is None: 72 | tract = "*" 73 | return self._query(census_columns, state, county, 74 | forstr="tract:%s" % tract, 75 | year=year) 76 | 77 | def _query(self, census_columns, state, county, forstr, 78 | year, tract=None): 79 | c = self.c 80 | 81 | state, county = self.try_fips_lookup(state, county) 82 | 83 | if tract is None: 84 | in_str = 'state:%s county:%s' % (state, county) 85 | else: 86 | in_str = 'state:%s county:%s tract:%s' % (state, county, tract) 87 | 88 | dfs = [] 89 | 90 | # unfortunately the api only queries 50 columns at a time 91 | # leave room for a few extra id columns 92 | def chunks(l, n): 93 | """ Yield successive n-sized chunks from l. 94 | """ 95 | for i in range(0, len(l), n): 96 | yield l[i:i+n] 97 | 98 | for census_column_batch in chunks(census_columns, 45): 99 | census_column_batch = list(census_column_batch) 100 | d = c.acs5.get(['NAME'] + census_column_batch, 101 | geo={'for': forstr, 102 | 'in': in_str}, year=year) 103 | df = pd.DataFrame(d) 104 | df[census_column_batch] = df[census_column_batch].astype('int') 105 | dfs.append(df) 106 | 107 | assert len(dfs) >= 1 108 | df = dfs[0] 109 | for mdf in dfs[1:]: 110 | df = pd.merge(df, mdf, on="NAME", suffixes=("", "_ignore")) 111 | drop_cols = list(filter(lambda x: "_ignore" in x, df.columns)) 112 | df = df.drop(drop_cols, axis=1) 113 | 114 | return df 115 | 116 | def block_group_and_tract_query(self, block_group_columns, 117 | tract_columns, state, county, 118 | merge_columns, block_group_size_attr, 119 | tract_size_attr, year=2016, tract=None): 120 | df2 = self.tract_query(tract_columns, state, county, tract=tract, 121 | year=year) 122 | df1 = self.block_group_query(block_group_columns, state, county, 123 | tract=tract, year=year) 124 | 125 | df = self._scale_and_merge(df1, block_group_size_attr, df2, 126 | tract_size_attr, tract_columns, 127 | merge_columns, suffixes=("", "_ignore")) 128 | drop_cols = list(filter(lambda x: "_ignore" in x, df.columns)) 129 | df = df.drop(drop_cols, axis=1) 130 | 131 | return df 132 | 133 | def update_geographies(self, df): 134 | acsyear = self.acsyear_files 135 | changes = pd.read_csv(self.support_files + 'geog_changes.csv', 136 | dtype={'new_geog': 'str', 'old_geog': 'str'}) 137 | for year in range(2011, acsyear): 138 | year_change = changes[changes['year'] == year].copy() 139 | import pdb 140 | if len(year_change) > 0: 141 | for index, row in year_change.iterrows(): 142 | new = row['new_geog'] 143 | old = row['old_geog'] 144 | state_new = new[:2] 145 | state_old = old[:2] 146 | county_new = new[2:5] 147 | county_old = old[2:5] 148 | if len(new) > 5: 149 | tract_new = new[5:] 150 | tract_old = old[5:] 151 | idx = df.index.max() + 1 152 | df.loc[idx, 'statefp'] = state_new 153 | df.loc[idx, 'countyfp'] = county_new 154 | df.loc[idx, 'tractce'] = tract_new 155 | old_puma10 = df[(df['statefp'] == state_old) & 156 | (df['countyfp'] == county_old) & 157 | (df['tractce'] == tract_old)]['puma10_id'].values[0] 158 | old_puma00 = df[(df['statefp'] == state_old) & 159 | (df['countyfp'] == county_old) & 160 | (df['tractce'] == tract_old)]['puma00_id'].values[0] 161 | df.loc[idx, 'puma10_id'] = old_puma10 162 | df.loc[idx, 'puma00_id'] = old_puma00 163 | else: 164 | df_change = df[(df['statefp'] == state_old) & 165 | (df['countyfp'] == county_old)].copy() 166 | df_change.loc[:, 'countyfp'] = county_new 167 | df = pd.concat([df, df_change]) 168 | return df 169 | 170 | def _get_pums_relationship(self): 171 | if self.pums_relationship_df is None: 172 | self.pums_relationship_df = \ 173 | pd.read_csv(self.pums_relationship_file_url, dtype={ 174 | "statefp": "object", 175 | "countyfp": "object", 176 | "tractce": "object", 177 | "puma10_id": "object", 178 | "puma00_id": "object", 179 | }) 180 | self.pums_relationship_df = self.update_geographies(self.pums_relationship_df) 181 | return self.pums_relationship_df 182 | 183 | def _get_fips_lookup(self): 184 | if self.fips_df is None: 185 | self.fips_df = pd.read_csv( 186 | self.fips_url, 187 | dtype={ 188 | "State ANSI": "object", 189 | "County ANSI": "object" 190 | }, 191 | index_col=["State", 192 | "County Name"] 193 | ) 194 | del self.fips_df["ANSI Cl"] 195 | return self.fips_df 196 | 197 | def tract_to_puma(self, state, county, tract): 198 | 199 | state, county = self.try_fips_lookup(state, county) 200 | 201 | df = self._get_pums_relationship() 202 | q = "statefp == '%s' and countyfp == '%s' and tractce == '%s'" % (state, county, tract) 203 | r = df.query(q) 204 | return r["puma10_id"].values[0], r["puma00_id"].values[0] 205 | 206 | def _read_csv(self, loc, **kargs): 207 | if loc not in self.pums_cache: 208 | pums_df = pd.read_csv(loc, dtype={ 209 | "PUMA10": "object", 210 | "PUMA00": "object", 211 | "ST": "object", 212 | "SERIALNO": 'str', 213 | "serialno": 'str', 214 | }, **kargs) 215 | pums_df = pums_df.rename(columns={ 216 | 'PUMA10': 'puma10', 217 | 'PUMA00': 'puma00', 218 | 'SERIALNO': 'serialno' 219 | }) 220 | self.pums_cache[loc] = pums_df 221 | return self.pums_cache[loc] 222 | 223 | def download_population_pums(self, state, puma10=None, puma00=None, **kargs): 224 | state = self.try_fips_lookup(state) 225 | if (puma10 is None) & (puma00 is None): 226 | return self._read_csv(self.pums_population_state_base_url % (state), **kargs) 227 | pums = self._read_csv(self.pums10_population_base_url % (state, puma10), **kargs) 228 | if (puma00 is not None) & (self.acsyear_files < 2018): 229 | pums00 = self._read_csv(self.pums00_population_base_url % (state, puma00), **kargs) 230 | pums = pd.concat([pums, pums00], ignore_index=True) 231 | return pums 232 | 233 | def download_household_pums(self, state, puma10=None, puma00=None, **kargs): 234 | state = self.try_fips_lookup(state) 235 | if (puma10 is None) & (puma00 is None): 236 | return self._read_csv(self.pums_household_state_base_url % (state), **kargs) 237 | pums = self._read_csv(self.pums10_household_base_url % (state, puma10), **kargs) 238 | if (puma00 is not None) & (self.acsyear_files < 2018): 239 | pums00 = self._read_csv(self.pums00_household_base_url % (state, puma00), **kargs) 240 | pums = pd.concat([pums, pums00], ignore_index=True) 241 | 242 | # filter out gq and empty units (non-hh records) 243 | pums = pums[(pums.RT == 'H') & (pums.NP > 0) & (pums.TYPE == 1)] 244 | 245 | return pums 246 | 247 | def try_fips_lookup(self, state, county=None): 248 | df = self._get_fips_lookup() 249 | 250 | if county is None: 251 | try: 252 | return getattr(us.states, state).fips 253 | except: 254 | pass 255 | return state 256 | 257 | try: 258 | return df.loc[(state, county)] 259 | except: 260 | pass 261 | return state, county 262 | -------------------------------------------------------------------------------- /synthpop/config.py: -------------------------------------------------------------------------------- 1 | class synthpop_config: 2 | 3 | def __init__(self, acsyear=2013): 4 | self.acsyear = acsyear 5 | 6 | def pums_storage(self): 7 | if self.acsyear >= 2018: 8 | storage = "https://storage.googleapis.com/synthpop-public/PUMS2018/pums_2018_acs5/" 9 | else: 10 | storage = "https://s3-us-west-1.amazonaws.com/synthpop-data2/" 11 | return storage 12 | 13 | def __call__(self): 14 | return self.pums_storage() 15 | 16 | 17 | class geog_changes_path: 18 | def __init__(self, acsyear): 19 | self.acsyear = acsyear 20 | 21 | def geog_change_storage(self): 22 | storage = "https://storage.googleapis.com/synthpop-public/support_files/" 23 | return storage 24 | 25 | def __call__(self): 26 | return self.geog_change_storage() 27 | -------------------------------------------------------------------------------- /synthpop/draw.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from scipy.stats import chisquare 6 | 7 | from .ipu.ipu import _FrequencyAndConstraints 8 | 9 | 10 | def simple_draw(num, weights, index): 11 | """ 12 | Choose among indexes based on weights using a simple random draw. 13 | 14 | Parameters 15 | ---------- 16 | num : int 17 | Number of items to draw from `index`. 18 | weights : array 19 | Array of weights corresponding to each value in `index`. 20 | Must be the same length as `index`. 21 | index : array 22 | Array of values from which to draw. Must be the same 23 | length as `weights`. 24 | 25 | Returns 26 | ------- 27 | draw : array 28 | Array of indexes drawn based on weights. 29 | 30 | """ 31 | p = weights / weights.sum() 32 | return np.random.choice(index, size=num, p=p, replace=True) 33 | 34 | 35 | def _draw_indexes(num, fac, weights): 36 | """ 37 | Construct a set of indexes that can be used to index a complete 38 | set of synthetic households. 39 | 40 | Parameters 41 | ---------- 42 | num : int 43 | The total number of households to draw. 44 | fac : _FrequencyAndConstraints 45 | weights : pandas.Series 46 | 47 | Returns 48 | ------- 49 | idx : pandas.Index 50 | Will be drawn from the index of `weights`. 51 | 52 | """ 53 | idx = [] 54 | constraint_diffs = [] 55 | 56 | for col_name, _, constraint, nz in fac.iter_columns(): 57 | if len(nz) == 0: 58 | continue 59 | 60 | flr_constraint = int(np.floor(constraint)) 61 | constraint_diffs.append((col_name, constraint - flr_constraint)) 62 | 63 | if flr_constraint > 0: 64 | wts = weights.values[nz] 65 | idx.extend( 66 | simple_draw(flr_constraint, wts, weights.index.values[nz])) 67 | 68 | if len(idx) < num: 69 | num_to_add = num - len(idx) 70 | 71 | if num_to_add > len(weights): 72 | raise RuntimeError( 73 | 'There is a mismatch between the constraints and the total ' 74 | 'number of households to draw. The total to draw appears ' 75 | 'to be higher than indicated by the constraints.') 76 | 77 | constraint_diffs = sorted( 78 | constraint_diffs, key=lambda x: x[1], reverse=True)[:num_to_add] 79 | 80 | for col_name, _ in constraint_diffs: 81 | _, _, _, nz = fac.get_column(col_name) 82 | wts = weights.values[nz] 83 | idx.extend(simple_draw(1, wts, weights.index.values[nz])) 84 | 85 | return pd.Index(idx) 86 | 87 | 88 | def execute_draw(indexes, h_pums, p_pums, hh_index_start=0): 89 | """ 90 | Take new household indexes and create new household and persons tables 91 | with updated indexes and relations. 92 | 93 | Parameters 94 | ---------- 95 | indexes : array 96 | Will be used to index `h_pums` into a new table. 97 | h_pums : pandas.DataFrame 98 | Table of household data. Expected to have a "serialno" column 99 | that matches `p_pums`. 100 | p_pums : pandas.DataFrame 101 | Table of person data. Expected to have a "serialno" columns 102 | that matches `h_pums`. 103 | hh_index_start : int, optional 104 | The starting point for new indexes on the synthesized 105 | households table. 106 | 107 | Returns 108 | ------- 109 | synth_hh : pandas.DataFrame 110 | Index will match the ``hh_id`` column in `synth_people`. 111 | synth_people : pandas.DataFrame 112 | Will be related to `synth_hh` by the ``hh_id`` column. 113 | 114 | """ 115 | synth_hh = h_pums.loc[indexes].reset_index(drop=True) 116 | synth_hh.index += hh_index_start 117 | 118 | mrg_tbl = pd.DataFrame( 119 | {'serialno': synth_hh.serialno.values, 120 | 'hh_id': synth_hh.index.values}) 121 | synth_people = pd.merge( 122 | p_pums, mrg_tbl, left_on='serialno', right_on='serialno') 123 | 124 | return synth_hh, synth_people 125 | 126 | 127 | def compare_to_constraints(synth, constraints): 128 | """ 129 | Compare the results of a synthesis draw to the target constraints. 130 | 131 | This comparison performs chi square test between the synthesized 132 | category counts and the target constraints used as inputs for the IPU. 133 | 134 | Parameters 135 | ---------- 136 | synth : pandas.Series 137 | Series of category IDs from synthesized table. 138 | constraints : pandas.Series 139 | Target constraints used in IPU step. 140 | 141 | Returns 142 | ------- 143 | chisq : float 144 | The chi squared test statistic. 145 | p : float 146 | The p-value of the test. 147 | 148 | See Also 149 | -------- 150 | scipy.stats.chisquare : Calculates a one-way chi square test. 151 | 152 | """ 153 | counts = synth.value_counts() 154 | 155 | # need to add zeros to counts for any categories that are 156 | # in the constraints but not in the counts 157 | diff = constraints.index.difference(counts.index) 158 | counts = counts.combine_first( 159 | pd.Series(np.zeros(len(diff), dtype='int'), index=diff)) 160 | 161 | counts, constraints = counts.align(constraints) 162 | 163 | # remove any items that are zero in the constraints 164 | w = constraints >= 1 165 | counts, constraints = counts[w], constraints[w] 166 | 167 | return chisquare(counts.values, constraints.values) 168 | 169 | 170 | def draw_households( 171 | num, h_pums, p_pums, household_freq, household_constraints, 172 | person_constraints, weights, hh_index_start=0): 173 | """ 174 | Draw households and persons according to weights from the IPU. 175 | 176 | Parameters 177 | ---------- 178 | num : int 179 | The total number of households to draw. 180 | h_pums : pandas.DataFrame 181 | Table of household data. Expected to have a "serialno" column 182 | that matches `p_pums`. 183 | p_pums : pandas.DataFrame 184 | Table of person data. Expected to have a "serialno" columns 185 | that matches `h_pums`. 186 | household_freq : pandas.DataFrame 187 | Frequency table for household attributes. Columns should be 188 | a MultiIndex matching the index of `household_constraints` and 189 | index should be household IDs matching the index `h_pums` 190 | and `weights`. 191 | household_constraints : pandas.Series 192 | Target marginal constraints for household classes. 193 | Index must be the same as the columns of `household_freq`. 194 | person_constraints : pandas.Series 195 | Target marginal constraints for person classes. 196 | Index must be the same as the columns of `person_freq`. 197 | weights : pandas.Series 198 | Weights from IPU. Index should match `h_pums` and `household_freq`. 199 | hh_index_start : int, optional 200 | Index at which to start the indexing of returned households. 201 | 202 | Returns 203 | ------- 204 | best_households : pandas.DataFrame 205 | Index will match the ``hh_id`` column in `synth_people`. 206 | best_people : pandas.DataFrame 207 | Will be related to `best_households` by the ``hh_id`` column. 208 | people_chisq : float 209 | people_p : float 210 | 211 | """ 212 | if num == 0: 213 | return ( 214 | pd.DataFrame(columns=h_pums.columns), 215 | pd.DataFrame(columns=p_pums.columns.append(pd.Index(['hh_id']))), 216 | 0, 1) 217 | 218 | fac = _FrequencyAndConstraints(household_freq, household_constraints) 219 | 220 | best_chisq = np.inf 221 | 222 | for _ in range(20): 223 | indexes = _draw_indexes(num, fac, weights) 224 | synth_hh, synth_people = execute_draw( 225 | indexes, h_pums, p_pums, hh_index_start=hh_index_start) 226 | people_chisq, people_p = compare_to_constraints( 227 | synth_people.cat_id, person_constraints) 228 | 229 | if people_chisq < best_chisq: 230 | best_chisq = people_chisq 231 | best_p = people_p 232 | best_households, best_people = synth_hh, synth_people 233 | 234 | return best_households, best_people, best_chisq, best_p 235 | -------------------------------------------------------------------------------- /synthpop/ipf/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UDST/synthpop/6fb13991c9d3ede2d8cf80512bd1102e37b98971/synthpop/ipf/__init__.py -------------------------------------------------------------------------------- /synthpop/ipf/ipf.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | 5 | def calculate_constraints( 6 | marginals, joint_dist, tolerance=1e-3, max_iterations=1000): 7 | """ 8 | Calculate constraints on household or person classes using 9 | single category marginals and the observed class proportions 10 | in a population sample. 11 | 12 | Constraints are calculated via an iterative proportional fitting 13 | procedure. 14 | 15 | Parameters 16 | ---------- 17 | marginals : pandas.Series 18 | The total count of each observed subcategory tracked. 19 | This should have a pandas.MultiIndex with the outer level containing 20 | high-level category descriptions and the inner level containing 21 | the individual subcategory breakdowns. 22 | joint_dist : pandas.Series 23 | The observed counts of each household or person class in some sample. 24 | The index will be a pandas.MultiIndex with a level for each observed 25 | class in the sample. The levels should be named for ease of 26 | introspection. 27 | tolerance : float, optional 28 | The condition for stopping the IPF procedure. If the change in 29 | constraints is less than or equal to this value after an iteration 30 | the calculations are stopped. 31 | max_iterations : int, optional 32 | Maximum number of iterations to do before stopping and raising 33 | an exception. 34 | 35 | Returns 36 | ------- 37 | constraints : pandas.Series 38 | Will have the index of `joint_dist` and contain the desired 39 | totals for each class. 40 | iterations : int 41 | Number of iterations performed. 42 | 43 | """ 44 | flat_joint_dist = joint_dist.reset_index() 45 | 46 | constraints = joint_dist.values.copy().astype('float') 47 | prev_constraints = constraints.copy() 48 | prev_constraints += tolerance # ensure we run at least one iteration 49 | 50 | def calc_diff(x, y): 51 | return np.abs(x - y).sum() 52 | 53 | iterations = 0 54 | 55 | list_of_loc = [ 56 | ((flat_joint_dist[idx[0]] == idx[1]).values, marginals[idx]) 57 | for idx in marginals.index 58 | ] 59 | 60 | while calc_diff(constraints, prev_constraints) > tolerance: 61 | prev_constraints[:] = constraints 62 | 63 | for loc, target in list_of_loc: 64 | constraints[loc] *= target / constraints[loc].sum() 65 | 66 | iterations += 1 67 | 68 | if iterations > max_iterations: 69 | raise RuntimeError( 70 | 'Maximum number of iterations reached during IPF: {}'.format( 71 | max_iterations)) 72 | 73 | return pd.Series(constraints, index=joint_dist.index), iterations 74 | -------------------------------------------------------------------------------- /synthpop/ipf/test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UDST/synthpop/6fb13991c9d3ede2d8cf80512bd1102e37b98971/synthpop/ipf/test/__init__.py -------------------------------------------------------------------------------- /synthpop/ipf/test/test_ipf.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import pytest 3 | from pandas.util import testing as pdt 4 | 5 | from .. import ipf 6 | 7 | 8 | def test_trivial_ipf(): 9 | # Test IPF in a situation where the desired totals and observed 10 | # sample have the same proportion and there is only one super-category. 11 | midx = pd.MultiIndex.from_product([('cat_owner',), ('yes', 'no')]) 12 | marginals = pd.Series([60, 40], index=midx) 13 | joint_dist = pd.Series( 14 | [6, 4], index=pd.Series(['yes', 'no'], name='cat_owner')) 15 | 16 | expected = pd.Series(marginals.values, index=joint_dist.index) 17 | constraints, iterations = ipf.calculate_constraints(marginals, joint_dist) 18 | 19 | pdt.assert_series_equal(constraints, expected, check_dtype=False) 20 | assert iterations == 2 21 | 22 | 23 | def test_larger_ipf(): 24 | # Test IPF with some data that's slightly more meaningful, 25 | # but for which it's harder to know the actual correct answer. 26 | marginal_midx = pd.MultiIndex.from_tuples( 27 | [('cat_owner', 'yes'), 28 | ('cat_owner', 'no'), 29 | ('car_color', 'blue'), 30 | ('car_color', 'red'), 31 | ('car_color', 'green')]) 32 | marginals = pd.Series([60, 40, 50, 30, 20], index=marginal_midx) 33 | joint_dist_midx = pd.MultiIndex.from_product( 34 | [('yes', 'no'), ('blue', 'red', 'green')], 35 | names=['cat_owner', 'car_color']) 36 | joint_dist = pd.Series([8, 4, 2, 5, 3, 2], index=joint_dist_midx) 37 | 38 | expected = pd.Series( 39 | [31.78776824, 17.77758309, 10.43464846, 40 | 18.21223176, 12.22241691, 9.56535154], 41 | index=joint_dist.index) 42 | constraints, _ = ipf.calculate_constraints(marginals, joint_dist) 43 | 44 | pdt.assert_series_equal(constraints, expected, check_dtype=False) 45 | 46 | with pytest.raises(RuntimeError): 47 | ipf.calculate_constraints(marginals, joint_dist, max_iterations=2) 48 | -------------------------------------------------------------------------------- /synthpop/ipu/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UDST/synthpop/6fb13991c9d3ede2d8cf80512bd1102e37b98971/synthpop/ipu/__init__.py -------------------------------------------------------------------------------- /synthpop/ipu/ipu.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | from __future__ import division 4 | 5 | import itertools 6 | from collections import OrderedDict 7 | import warnings 8 | 9 | import numpy as np 10 | import pandas as pd 11 | 12 | 13 | def _drop_zeros(df): 14 | """ 15 | Drop zeros from a DataFrame, returning an iterator over the columns 16 | in the DataFrame. 17 | 18 | Yields tuples of (column name, non-zero column values, non-zero indexes). 19 | 20 | Parameters 21 | ---------- 22 | df : pandas.DataFrame 23 | 24 | """ 25 | def for_each_col(col): 26 | nz = col.values.nonzero()[0] 27 | return col.iloc[nz], nz 28 | 29 | for (col_idx, (col, nz)) in df.apply(for_each_col, axis=0, raw=False).items(): 30 | yield (col_idx, col.values, nz) 31 | 32 | 33 | class _FrequencyAndConstraints(object): 34 | """ 35 | Wrap frequency tables and constraints for both household and 36 | person classes for easy iteration over all of them. 37 | 38 | Also tracks the locations of non-zero elements in each column 39 | of the frequency tables. If including person classes, both 40 | `person_freq` and `person_constraints` are required. 41 | 42 | Parameters 43 | ---------- 44 | household_freq : pandas.DataFrame 45 | Frequency table for household attributes. Columns should be 46 | a MultiIndex matching the index of `household_constraints` and 47 | index should be household IDs matching the index of 48 | `person_freq`. 49 | household_constraints : pandas.Series 50 | Target marginal constraints for household classes. 51 | Index must be the same as the columns of `household_freq`. 52 | person_freq : pandas.DataFrame, optional 53 | Frequency table for household person. Columns should be 54 | a MultiIndex matching the index of `person_constraints` and 55 | index should be household IDs matching the index of 56 | `household_freq`. 57 | person_constraints : pandas.Series, optional 58 | Target marginal constraints for person classes. 59 | Index must be the same as the columns of `person_freq`. 60 | 61 | Attributes 62 | ---------- 63 | ncols : int 64 | Total number household_wof columns across household and person classes. 65 | 66 | """ 67 | 68 | def __init__(self, household_freq, household_constraints, person_freq=None, 69 | person_constraints=None): 70 | 71 | hh_cols = ((key, col, household_constraints[key], nz) 72 | for key, col, nz in _drop_zeros(household_freq)) 73 | 74 | has_pers = person_freq is not None and person_constraints is not None 75 | if has_pers: 76 | p_cols = ((key, col, person_constraints[key], nz) 77 | for key, col, nz in _drop_zeros(person_freq)) 78 | else: 79 | p_cols = [] 80 | 81 | self._everything = OrderedDict( 82 | (t[0], t) for t in itertools.chain(hh_cols, p_cols)) 83 | self.ncols = len(self._everything) 84 | 85 | """ 86 | Check for problems in the resulting keys. 87 | These typically arise when column names are shared accross 88 | households and persons. 89 | """ 90 | keys = set([c[0] for c in self.iter_columns()]) 91 | assert len(set(household_freq.columns) - keys) == 0 92 | if has_pers: 93 | assert len(set(person_freq.columns) - keys) == 0 94 | assert self.ncols == len(household_freq.columns) + len(person_freq.columns) 95 | 96 | def iter_columns(self): 97 | """ 98 | Iterate over columns of both household and frequency tables AND 99 | the corresponding constraints for each column AND non-zero indexes 100 | applicable to each column. 101 | Yields tuples of (column name, column, constraint, nonzero). 102 | The returned column contains only the non-zero elements. 103 | 104 | """ 105 | return list(self._everything.values()) 106 | 107 | def get_column(self, key): 108 | """ 109 | Return a specific column's info by its name. 110 | 111 | Parameters 112 | ---------- 113 | key : object 114 | Column name or tuple required to index a MultiIndex column. 115 | 116 | Returns 117 | ------- 118 | col_name : object 119 | Same as `key`. 120 | column : pandas.Series 121 | Has only the non-zero elements. 122 | constraint : float 123 | The target constraint for this type. 124 | nonzero : array 125 | The location of the non-zero items in the column. 126 | 127 | """ 128 | return self._everything[key] 129 | 130 | 131 | def _fit_quality(column, weights, constraint): 132 | """ 133 | Calculate quality of fit metric for a column of the frequency table. 134 | (The 𝛿 parameter described in the IPU paper.) 135 | 136 | Parameters 137 | ---------- 138 | column : 1D array 139 | Non-zero elements of a column of a frequency table. 140 | Must have the same length as `weights`. 141 | weights : 1D array 142 | Weights corresponding to the household rows in `column`. 143 | Must have the same length as `column`. 144 | constraint : float 145 | Target marginal constraint for this column. 146 | 147 | Returns 148 | ------- 149 | quality : float 150 | 151 | """ 152 | return abs((column * weights).sum() - constraint) / constraint 153 | 154 | 155 | def _average_fit_quality(freq_wrap, weights): 156 | """ 157 | Parameters 158 | ---------- 159 | freq_wrap : `_FrequencyAndConstraints` 160 | weights : ndarray 161 | Array of weights for all households. 162 | 163 | """ 164 | return sum( 165 | _fit_quality(col, weights[nz], constraint) 166 | for _, col, constraint, nz in freq_wrap.iter_columns() 167 | ) / freq_wrap.ncols 168 | 169 | 170 | def _update_weights(column, weights, constraint): 171 | """ 172 | Update household weights based on a single column. 173 | 174 | The update will be applied to all weights, so make sure only the 175 | non-zero elements of `column` and the corresponding weights are given. 176 | 177 | Parameters 178 | ---------- 179 | column : 1D array 180 | Non-zero elements of a column of a frequency table. 181 | Must have the same length as `weights`. 182 | weights : 1D array 183 | Weights corresponding to the household rows in `column`. 184 | Must have the same length as `column`. 185 | constraint : float 186 | Target marginal constraint for this column. 187 | 188 | Returns 189 | ------- 190 | new_weights : ndarray 191 | 192 | """ 193 | adj = constraint / float((column * weights).sum()) 194 | return weights * adj 195 | 196 | 197 | def household_weights( 198 | household_freq, person_freq, household_constraints, 199 | person_constraints, geography, ignore_max_iters, 200 | convergence=1e-4, max_iterations=20000): 201 | """ 202 | Calculate the household weights that best match household and 203 | person level attributes. 204 | 205 | Parameters 206 | ---------- 207 | household_freq : pandas.DataFrame 208 | Frequency table for household attributes. Columns should be 209 | a MultiIndex matching the index of `household_constraints` and 210 | index should be household IDs matching the index of 211 | `person_freq`. 212 | person_Freq : pandas.DataFrame 213 | Frequency table for household person. Columns should be 214 | a MultiIndex matching the index of `person_constraints` and 215 | index should be household IDs matching the index of 216 | `household_freq`. 217 | household_constraints : pandas.Series 218 | Target marginal constraints for household classes. 219 | Index must be the same as the columns of `household_freq`. 220 | person_constraints : pandas.Series 221 | Target marginal constraints for person classes. 222 | Index must be the same as the columns of `person_freq`. 223 | convergence : float, optional 224 | When the average fit quality metric changes by less than this value 225 | after an iteration we declare done and send back the weights 226 | from the best fit. 227 | max_iterations, int, optional 228 | Maximum number of iterations to do before stopping and raising 229 | an exception. 230 | 231 | Returns 232 | ------- 233 | weights : pandas.Series 234 | fit_qual : float 235 | The final average fit quality metric. 236 | iterations : int 237 | Number of iterations made. 238 | 239 | """ 240 | weights = np.ones(len(household_freq), dtype='float') 241 | best_weights = weights.copy() 242 | 243 | freq_wrap = _FrequencyAndConstraints( 244 | household_freq, household_constraints, person_freq, person_constraints) 245 | 246 | fit_qual = _average_fit_quality(freq_wrap, weights) 247 | best_fit_qual = fit_qual 248 | fit_change = np.inf 249 | iterations = 0 250 | 251 | while fit_change > convergence: 252 | for _, col, constraint, nz in freq_wrap.iter_columns(): 253 | weights[nz] = _update_weights(col, weights[nz], constraint) 254 | 255 | new_fit_qual = _average_fit_quality(freq_wrap, weights) 256 | fit_change = abs(new_fit_qual - fit_qual) 257 | 258 | if new_fit_qual < fit_qual: 259 | best_fit_qual = new_fit_qual 260 | best_weights = weights.copy() 261 | 262 | fit_qual = new_fit_qual 263 | iterations += 1 264 | 265 | if iterations > max_iterations: 266 | if ignore_max_iters: 267 | fitting_tolerance = fit_change - convergence 268 | print('Fitting tolerance before 20000 iterations: %s' % str(fitting_tolerance)) 269 | ipu_dict = {'best_fit_qual': best_fit_qual, 270 | 'fit_change': fit_change, 271 | 'fitting_tolerance': fitting_tolerance, 272 | 'geog_id': geography} 273 | if isinstance(geography, pd.Series): 274 | state, county = geography['state'], geography['county'] 275 | tract, bgroup = geography['tract'], geography['block group'] 276 | np.save('max_iter_{}_{}_{}_{}.npy'.format(state, county, 277 | tract, bgroup), ipu_dict) 278 | elif isinstance(geography, list): 279 | np.save('max_iter_{}_{}.npy'.format(geography[0], geography[1]), ipu_dict) 280 | else: 281 | np.save('max_iter_{}.npy'.format(str(geography)), ipu_dict) 282 | 283 | warnings.warn( 284 | 'Maximum number of iterations reached ' 285 | 'during IPU: {}'.format(max_iterations), UserWarning) 286 | return ( 287 | pd.Series(best_weights, index=household_freq.index), 288 | best_fit_qual, iterations) 289 | else: 290 | raise RuntimeError( 291 | 'Maximum number of iterations reached ' 292 | 'during IPU: {}'.format(max_iterations)) 293 | 294 | return ( 295 | pd.Series(best_weights, index=household_freq.index), 296 | best_fit_qual, iterations) 297 | -------------------------------------------------------------------------------- /synthpop/ipu/test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UDST/synthpop/6fb13991c9d3ede2d8cf80512bd1102e37b98971/synthpop/ipu/test/__init__.py -------------------------------------------------------------------------------- /synthpop/ipu/test/test_ipu.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.testing as npt 3 | import pandas as pd 4 | import random 5 | import pytest 6 | from pandas.util import testing as pdt 7 | 8 | from .. import ipu 9 | 10 | 11 | @pytest.fixture(scope='module') 12 | def household_columns(): 13 | return pd.MultiIndex.from_product( 14 | [('yes',), ('blue', 'red')], 15 | names=['cat_owner', 'car_color']) 16 | 17 | 18 | @pytest.fixture(scope='module') 19 | def person_columns(): 20 | return pd.MultiIndex.from_product( 21 | [(7, 8, 9), ('pink',)], names=['shoe_size', 'shirt_color']) 22 | 23 | 24 | @pytest.fixture(scope='module') 25 | def household_freqs(household_columns): 26 | return pd.DataFrame( 27 | [(1, 0), 28 | (1, 0), 29 | (1, 0), 30 | (0, 1), 31 | (0, 1), 32 | (0, 1), 33 | (0, 1), 34 | (0, 1)], 35 | index=range(1, 9), 36 | columns=household_columns) 37 | 38 | 39 | @pytest.fixture(scope='module') 40 | def person_freqs(person_columns): 41 | return pd.DataFrame( 42 | [(1, 1, 1), 43 | (1, 0, 1), 44 | (2, 1, 0), 45 | (1, 0, 2), 46 | (0, 2, 1), 47 | (1, 1, 0), 48 | (2, 1, 2), 49 | (1, 1, 0)], 50 | index=range(1, 9), 51 | columns=person_columns) 52 | 53 | 54 | @pytest.fixture(scope='module') 55 | def household_constraints(household_columns): 56 | return pd.Series([35, 65], index=household_columns) 57 | 58 | 59 | @pytest.fixture(scope='module') 60 | def person_constraints(person_columns): 61 | return pd.Series([91, 65, 104], index=person_columns) 62 | 63 | 64 | @pytest.fixture(scope='module') 65 | def geography(): 66 | dtypes = ['serie', 'list'] 67 | dtype = random.choice(dtypes) 68 | 69 | if dtype == 'serie': 70 | geography = pd.Series({'state': '02', 71 | 'county': '270', 72 | 'tract': '000100', 73 | 'block group': '1'}) 74 | else: 75 | geography = ['02', '270'] 76 | 77 | return geography 78 | 79 | 80 | @pytest.fixture 81 | def freq_wrap( 82 | household_freqs, person_freqs, household_constraints, 83 | person_constraints): 84 | return ipu._FrequencyAndConstraints( 85 | household_freqs, household_constraints, person_freqs, 86 | person_constraints) 87 | 88 | 89 | def test_drop_zeros_households(household_freqs): 90 | df = list(ipu._drop_zeros(household_freqs)) 91 | 92 | assert len(df) == 2 93 | assert df[0][0] == ('yes', 'blue') 94 | npt.assert_array_equal(df[0][1], [1, 1, 1]) 95 | npt.assert_array_equal(df[0][2], [0, 1, 2]) 96 | assert df[1][0] == ('yes', 'red') 97 | npt.assert_array_equal(df[1][1], [1, 1, 1, 1, 1]) 98 | npt.assert_array_equal(df[1][2], [3, 4, 5, 6, 7]) 99 | 100 | 101 | def test_drop_zeros_person(person_freqs): 102 | df = list(ipu._drop_zeros(person_freqs)) 103 | 104 | assert len(df) == 3 105 | assert df[0][0] == (7, 'pink') 106 | npt.assert_array_equal(df[0][1], [1, 1, 2, 1, 1, 2, 1]) 107 | npt.assert_array_equal(df[0][2], [0, 1, 2, 3, 5, 6, 7]) 108 | 109 | 110 | def test_fit_quality( 111 | household_freqs, person_freqs, household_constraints, 112 | person_constraints): 113 | weights = np.ones(len(household_freqs), dtype='float') 114 | column = household_freqs[('yes', 'blue')] 115 | constraint = household_constraints[('yes', 'blue')] 116 | 117 | npt.assert_allclose( 118 | ipu._fit_quality(column, weights, constraint), 0.9143, 119 | atol=0.0001) 120 | 121 | weights = np.array([12.37, 14.61, 8.05, 16.28, 16.91, 8.97, 13.78, 8.97]) 122 | column = person_freqs[(8, 'pink')] 123 | constraint = person_constraints[(8, 'pink')] 124 | 125 | npt.assert_allclose( 126 | ipu._fit_quality(column, weights, constraint), 0.3222, 127 | atol=0.0003) 128 | 129 | 130 | def test_average_fit_quality(household_freqs, freq_wrap): 131 | weights = np.ones(len(household_freqs), dtype='float') 132 | npt.assert_allclose( 133 | ipu._average_fit_quality(freq_wrap, weights), 134 | 0.9127, 135 | atol=0.0001) 136 | 137 | weights = np.array([12.37, 14.61, 8.05, 16.28, 16.91, 8.97, 13.78, 8.97]) 138 | npt.assert_allclose( 139 | ipu._average_fit_quality(freq_wrap, weights), 140 | 0.0954, 141 | atol=0.0001) 142 | 143 | 144 | def test_update_weights( 145 | household_freqs, person_freqs, household_constraints, 146 | person_constraints): 147 | column = household_freqs[('yes', 'blue')] 148 | column = column.iloc[column.values.nonzero()[0]] 149 | constraint = household_constraints[('yes', 'blue')] 150 | weights = pd.Series( 151 | np.ones(len(column)), 152 | index=column.index) 153 | 154 | npt.assert_allclose( 155 | ipu._update_weights(column, weights, constraint), 156 | [11.67, 11.67, 11.67], 157 | atol=0.01) 158 | 159 | column = person_freqs[(9, 'pink')] 160 | column = column.iloc[column.values.nonzero()[0]] 161 | constraint = person_constraints[(9, 'pink')] 162 | weights = pd.Series( 163 | [8.05, 9.51, 8.05, 10.59, 11.0, 8.97, 8.97, 8.97], 164 | index=range(1, 9)).loc[column.index] 165 | 166 | npt.assert_allclose( 167 | ipu._update_weights(column, weights, constraint), 168 | [12.37, 14.61, 16.28, 16.91, 13.78], 169 | atol=0.01) 170 | 171 | 172 | def test_household_weights( 173 | household_freqs, person_freqs, household_constraints, 174 | person_constraints, geography, ignore_max_iters=False): 175 | weights, fit_qual, iterations = ipu.household_weights( 176 | household_freqs, person_freqs, household_constraints, 177 | person_constraints, geography, ignore_max_iters, convergence=1e-7) 178 | npt.assert_allclose( 179 | weights.values, 180 | [1.36, 25.66, 7.98, 27.79, 18.45, 8.64, 1.47, 8.64], 181 | atol=0.02) 182 | npt.assert_allclose(fit_qual, 8.51e-6, atol=1e-8) 183 | npt.assert_allclose(iterations, 637, atol=5) 184 | 185 | 186 | def test_household_weights_max_iter( 187 | household_freqs, person_freqs, household_constraints, 188 | person_constraints, geography, ignore_max_iters=False): 189 | with pytest.raises(RuntimeError): 190 | ipu.household_weights( 191 | household_freqs, person_freqs, household_constraints, 192 | person_constraints, geography, ignore_max_iters, convergence=1e-7, max_iterations=10) 193 | 194 | 195 | def test_FrequencyAndConstraints(freq_wrap): 196 | assert freq_wrap.ncols == 5 197 | assert len(list(freq_wrap.iter_columns())) == 5 198 | 199 | iter_cols = iter(freq_wrap.iter_columns()) 200 | 201 | key, col, constraint, nz = next(iter_cols) 202 | assert key == ('yes', 'blue') 203 | npt.assert_array_equal(col, [1, 1, 1]) 204 | assert constraint == 35 205 | npt.assert_array_equal(nz, [0, 1, 2]) 206 | 207 | key, col, constraint, nz = next(iter_cols) 208 | assert key == ('yes', 'red') 209 | npt.assert_array_equal(col, [1, 1, 1, 1, 1]) 210 | assert constraint == 65 211 | npt.assert_array_equal(nz, [3, 4, 5, 6, 7]) 212 | 213 | # should be into person cols now 214 | key, col, constraint, nz = next(iter_cols) 215 | assert key == (7, 'pink') 216 | npt.assert_array_equal(col, [1, 1, 2, 1, 1, 2, 1]) 217 | assert constraint == 91 218 | npt.assert_array_equal(nz, [0, 1, 2, 3, 5, 6, 7]) 219 | 220 | # test getting a column by name 221 | key, col, constraint, nz = freq_wrap.get_column((7, 'pink')) 222 | assert key == (7, 'pink') 223 | npt.assert_array_equal(col, [1, 1, 2, 1, 1, 2, 1]) 224 | assert constraint == 91 225 | npt.assert_array_equal(nz, [0, 1, 2, 3, 5, 6, 7]) 226 | -------------------------------------------------------------------------------- /synthpop/recipes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UDST/synthpop/6fb13991c9d3ede2d8cf80512bd1102e37b98971/synthpop/recipes/__init__.py -------------------------------------------------------------------------------- /synthpop/recipes/starter.py: -------------------------------------------------------------------------------- 1 | from .. import categorizer as cat 2 | from ..census_helpers import Census 3 | import pandas as pd 4 | import numpy as np 5 | 6 | 7 | # TODO DOCSTRINGS!! 8 | class Starter: 9 | """ 10 | This is a recipe for getting the marginals and joint distributions to use 11 | to pass to the synthesizer using simple categories - population, age, 12 | race, and sex for people, and children, income, cars, and workers for 13 | households. This module is responsible for 14 | 15 | Parameters 16 | ---------- 17 | c : object 18 | census_helpers.Census object 19 | state : string 20 | FIPS code the state 21 | county : string 22 | FIPS code for the county 23 | tract : string, optional 24 | FIPS code for a specific track or None for all tracts in the county 25 | 26 | Returns 27 | ------- 28 | household_marginals : DataFrame 29 | Marginals per block group for the household data (from ACS) 30 | person_marginals : DataFrame 31 | Marginals per block group for the person data (from ACS) 32 | household_jointdist : DataFrame 33 | joint distributions for the households (from PUMS), one joint 34 | distribution for each PUMA (one row per PUMA) 35 | person_jointdist : DataFrame 36 | joint distributions for the persons (from PUMS), one joint 37 | distribution for each PUMA (one row per PUMA) 38 | tract_to_puma_map : dictionary 39 | keys are tract ids and pumas are puma ids 40 | """ 41 | 42 | def __init__(self, key, state, county, tract=None, acsyear=2016): 43 | self.c = c = Census(key, acsyear) 44 | self.state = state 45 | self.county = county 46 | self.tract = tract 47 | self.acsyear = acsyear 48 | 49 | income_columns = ['B19001_0%02dE' % i for i in range(1, 18)] 50 | vehicle_columns = ['B08201_0%02dE' % i for i in range(1, 7)] 51 | workers_columns = ['B08202_0%02dE' % i for i in range(1, 6)] 52 | families_columns = ['B11001_001E', 'B11001_002E'] 53 | block_group_columns = income_columns + families_columns 54 | tract_columns = vehicle_columns + workers_columns 55 | h_acs = c.block_group_and_tract_query( 56 | block_group_columns, tract_columns, state, county, 57 | merge_columns=['tract', 'county', 'state'], 58 | block_group_size_attr="B11001_001E", 59 | tract_size_attr="B08201_001E", 60 | tract=tract, year=acsyear) 61 | self.h_acs = h_acs 62 | 63 | self.h_acs_cat = cat.categorize(h_acs, { 64 | ("children", "yes"): "B11001_002E", 65 | ("children", "no"): "B11001_001E - B11001_002E", 66 | ("income", "lt35"): "B19001_002E + B19001_003E + B19001_004E + " 67 | "B19001_005E + B19001_006E + B19001_007E", 68 | ("income", "gt35-lt100"): "B19001_008E + B19001_009E + " 69 | "B19001_010E + B19001_011E + B19001_012E" 70 | "+ B19001_013E", 71 | ("income", "gt100"): "B19001_014E + B19001_015E + B19001_016E" 72 | "+ B19001_017E", 73 | ("cars", "none"): "B08201_002E", 74 | ("cars", "one"): "B08201_003E", 75 | ("cars", "two or more"): "B08201_004E + B08201_005E + B08201_006E", 76 | ("workers", "none"): "B08202_002E", 77 | ("workers", "one"): "B08202_003E", 78 | ("workers", "two or more"): "B08202_004E + B08202_005E" 79 | }, index_cols=['state', 'county', 'tract', 'block group']) 80 | 81 | population = ['B01001_001E'] 82 | sex = ['B01001_002E', 'B01001_026E'] 83 | race = ['B02001_0%02dE' % i for i in range(1, 11)] 84 | male_age_columns = ['B01001_0%02dE' % i for i in range(3, 26)] 85 | female_age_columns = ['B01001_0%02dE' % i for i in range(27, 50)] 86 | all_columns = population + sex + race + male_age_columns + \ 87 | female_age_columns 88 | p_acs = c.block_group_query(all_columns, state, county, tract=tract, year=acsyear) 89 | self.p_acs = p_acs 90 | self.p_acs_cat = cat.categorize(p_acs, { 91 | ("age", "19 and under"): ( 92 | "B01001_003E + B01001_004E + B01001_005E + " 93 | "B01001_006E + B01001_007E + B01001_027E + " 94 | "B01001_028E + B01001_029E + B01001_030E + " 95 | "B01001_031E"), 96 | ("age", "20 to 35"): "B01001_008E + B01001_009E + B01001_010E + " 97 | "B01001_011E + B01001_012E + B01001_032E + " 98 | "B01001_033E + B01001_034E + B01001_035E + " 99 | "B01001_036E", 100 | ("age", "35 to 60"): "B01001_013E + B01001_014E + B01001_015E + " 101 | "B01001_016E + B01001_017E + B01001_037E + " 102 | "B01001_038E + B01001_039E + B01001_040E + " 103 | "B01001_041E", 104 | ("age", "above 60"): "B01001_018E + B01001_019E + B01001_020E + " 105 | "B01001_021E + B01001_022E + B01001_023E + " 106 | "B01001_024E + B01001_025E + B01001_042E + " 107 | "B01001_043E + B01001_044E + B01001_045E + " 108 | "B01001_046E + B01001_047E + B01001_048E + " 109 | "B01001_049E", 110 | ("race", "white"): "B02001_002E", 111 | ("race", "black"): "B02001_003E", 112 | ("race", "asian"): "B02001_005E", 113 | ("race", "other"): "B02001_004E + B02001_006E + B02001_007E + " 114 | "B02001_008E", 115 | ("sex", "male"): "B01001_002E", 116 | ("sex", "female"): "B01001_026E" 117 | }, index_cols=['state', 'county', 'tract', 'block group']) 118 | 119 | # Put the needed PUMS variables here. These are also the PUMS variables 120 | # that will be in the outputted synthetic population 121 | self.h_pums_cols = ('serialno', 'PUMA10', 'RT', 'NP', 122 | 'TYPE', 'VEH', 'WIF', 'NOC', 'FINCP') 123 | self.p_pums_cols = ('serialno', 'PUMA10', 'AGEP', 'RAC1P', 'SEX') 124 | 125 | if self.acsyear < 2018: 126 | self.h_pums_cols = list(self.h_pums_cols) 127 | self.h_pums_cols.insert(1, 'PUMA00') 128 | self.h_pums_cols = tuple(self.h_pums_cols) 129 | self.p_pums_cols = list(self.p_pums_cols) 130 | self.p_pums_cols.insert(1, 'PUMA00') 131 | self.p_pums_cols = tuple(self.p_pums_cols) 132 | 133 | def get_geography_name(self): 134 | # this synthesis is at the block group level for most variables 135 | return "block_group" 136 | 137 | def get_num_geographies(self): 138 | return len(self.p_acs_cat) 139 | 140 | def get_available_geography_ids(self): 141 | # return the ids of the geographies, in this case a state, county, 142 | # tract, block_group id tuple 143 | for tup in self.p_acs_cat.index: 144 | yield pd.Series(tup, index=self.p_acs_cat.index.names) 145 | 146 | def get_household_marginal_for_geography(self, ind): 147 | return self.h_acs_cat.loc[tuple(ind.values)] 148 | 149 | def get_person_marginal_for_geography(self, ind): 150 | return self.p_acs_cat.loc[tuple(ind.values)] 151 | 152 | def get_household_joint_dist_for_geography(self, ind): 153 | c = self.c 154 | 155 | puma10, puma00 = c.tract_to_puma(ind.state, ind.county, ind.tract) 156 | # this is cached so won't download more than once 157 | if type(puma00) == str: 158 | h_pums = self.c.download_household_pums(ind.state, puma10, puma00, 159 | usecols=self.h_pums_cols) 160 | elif np.isnan(puma00): # only puma10 available 161 | h_pums = self.c.download_household_pums(ind.state, puma10, None, 162 | usecols=self.h_pums_cols) 163 | 164 | def cars_cat(r): 165 | if r.VEH == 0: 166 | return "none" 167 | elif r.VEH == 1: 168 | return "one" 169 | return "two or more" 170 | 171 | def children_cat(r): 172 | if r.NOC > 0: 173 | return "yes" 174 | return "no" 175 | 176 | def income_cat(r): 177 | if r.FINCP > 100000: 178 | return "gt100" 179 | elif r.FINCP > 35000: 180 | return "gt35-lt100" 181 | return "lt35" 182 | 183 | def workers_cat(r): 184 | if r.WIF == 3: 185 | return "two or more" 186 | elif r.WIF == 2: 187 | return "two or more" 188 | elif r.WIF == 1: 189 | return "one" 190 | return "none" 191 | 192 | h_pums, jd_households = cat.joint_distribution( 193 | h_pums, 194 | cat.category_combinations(self.h_acs_cat.columns), 195 | {"cars": cars_cat, "children": children_cat, 196 | "income": income_cat, "workers": workers_cat} 197 | ) 198 | return h_pums, jd_households 199 | 200 | def get_person_joint_dist_for_geography(self, ind): 201 | c = self.c 202 | 203 | puma10, puma00 = c.tract_to_puma(ind.state, ind.county, ind.tract) 204 | # this is cached so won't download more than once 205 | if type(puma00) == str: 206 | p_pums = self.c.download_population_pums(ind.state, puma10, puma00, 207 | usecols=self.p_pums_cols) 208 | elif np.isnan(puma00): # only puma10 available 209 | p_pums = self.c.download_population_pums(ind.state, puma10, None, 210 | usecols=self.p_pums_cols) 211 | 212 | def age_cat(r): 213 | if r.AGEP <= 19: 214 | return "19 and under" 215 | elif r.AGEP <= 35: 216 | return "20 to 35" 217 | elif r.AGEP <= 60: 218 | return "35 to 60" 219 | return "above 60" 220 | 221 | def race_cat(r): 222 | if r.RAC1P == 1: 223 | return "white" 224 | elif r.RAC1P == 2: 225 | return "black" 226 | elif r.RAC1P == 6: 227 | return "asian" 228 | return "other" 229 | 230 | def sex_cat(r): 231 | if r.SEX == 1: 232 | return "male" 233 | return "female" 234 | 235 | p_pums, jd_persons = cat.joint_distribution( 236 | p_pums, 237 | cat.category_combinations(self.p_acs_cat.columns), 238 | {"age": age_cat, "race": race_cat, "sex": sex_cat} 239 | ) 240 | return p_pums, jd_persons 241 | -------------------------------------------------------------------------------- /synthpop/recipes/starter2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | from .. import categorizer as cat 5 | from ..census_helpers import Census 6 | 7 | 8 | # TODO DOCSTRINGS!! 9 | class Starter: 10 | """ 11 | This is a recipe for getting the marginals and joint distributions to use 12 | to pass to the synthesizer using simple categories - population, age, 13 | race, and sex for people, and children, income, cars, and workers for 14 | households. This module is responsible for 15 | 16 | Parameters 17 | ---------- 18 | c : object 19 | census_helpers.Census object 20 | state : string 21 | FIPS code the state 22 | county : string 23 | FIPS code for the county 24 | tract : string, optional 25 | FIPS code for a specific track or None for all tracts in the county 26 | acsyear : integer, optional 27 | Final year in the 5-year estimates ACS dataset. 28 | Default: 2016, which corresponds to 2011-2016 ACS dataset 29 | 30 | Returns 31 | ------- 32 | household_marginals : DataFrame 33 | Marginals per block group for the household data (from ACS 5-year estimates) 34 | person_marginals : DataFrame 35 | Marginals per block group for the person data (from ACS 5-year estimates) 36 | household_jointdist : DataFrame 37 | joint distributions for the households (from PUMS 2010-2000), one joint 38 | distribution for each PUMA (one row per PUMA) 39 | person_jointdist : DataFrame 40 | joint distributions for the persons (from PUMS 2010-2000), one joint 41 | distribution for each PUMA (one row per PUMA) 42 | tract_to_puma_map : dictionary 43 | keys are tract ids and pumas are puma ids 44 | """ 45 | 46 | def __init__(self, key, state, county, tract=None, acsyear=2016): 47 | self.c = c = Census(key, acsyear) 48 | self.state = state 49 | self.county = county 50 | self.tract = tract 51 | self.acsyear = acsyear 52 | 53 | structure_size_columns = ['B25032_0%02dE' % i for i in range(1, 24)] 54 | age_of_head_columns = ['B25007_0%02dE' % i for i in range(1, 22)] 55 | race_of_head_columns = ['B25006_0%02dE' % i for i in range(1, 11)] 56 | hispanic_head_columns = ['B25003I_0%02dE' % i for i in range(1, 4)] 57 | hh_size_columns = ['B25009_0%02dE' % i for i in range(1, 18)] 58 | income_columns = ['B19001_0%02dE' % i for i in range(1, 18)] 59 | vehicle_columns = ['B08201_0%02dE' % i for i in range(1, 7)] 60 | workers_columns = ['B08202_0%02dE' % i for i in range(1, 6)] 61 | presence_of_children_columns = ['B11005_001E', 'B11005_002E', 'B11005_011E'] 62 | presence_of_seniors_columns = ['B11007_002E', 'B11007_007E'] 63 | tenure_mover_columns = ['B25038_0%02dE' % i for i in range(1, 16)] 64 | block_group_columns = ( 65 | income_columns + presence_of_children_columns + 66 | presence_of_seniors_columns + tenure_mover_columns + 67 | hh_size_columns + age_of_head_columns + structure_size_columns + 68 | race_of_head_columns + hispanic_head_columns) 69 | tract_columns = vehicle_columns + workers_columns 70 | h_acs = c.block_group_and_tract_query( 71 | block_group_columns, 72 | tract_columns, state, county, 73 | merge_columns=['tract', 'county', 'state'], 74 | block_group_size_attr="B11005_001E", 75 | tract_size_attr="B08201_001E", 76 | tract=tract, year=acsyear) 77 | self.h_acs = h_acs 78 | 79 | self.h_acs_cat = cat.categorize(h_acs, { 80 | ("sf_detached", "yes"): "B25032_003E + B25032_014E", 81 | ("sf_detached", "no"): "B25032_001E - B25032_003E - B25032_014E", 82 | ("hh_age_of_head", "lt35"): 83 | "B25007_003E + B25007_004E + B25007_013E + B25007_014E", 84 | ("hh_age_of_head", "gt35-lt65"): 85 | "B25007_005E + B25007_006E + B25007_007E + B25007_008E + " 86 | "B25007_015E + B25007_016E + B25007_017E + B25007_018E", 87 | ("hh_age_of_head", "gt65"): 88 | "B25007_009E + B25007_010E + B25007_011E + " 89 | "B25007_019E + B25007_020E + B25007_021E", 90 | ("hh_race_of_head", "black"): "B25006_003E", 91 | ("hh_race_of_head", "white"): "B25006_002E", 92 | ("hh_race_of_head", "asian"): "B25006_005E", 93 | ("hh_race_of_head", "other"): 94 | "B25006_004E + B25006_006E + B25006_007E + B25006_008E ", 95 | ("hispanic_head", "yes"): "B25003I_001E", 96 | ("hispanic_head", "no"): "B11005_001E - B25003I_001E", 97 | ("hh_children", "yes"): "B11005_002E", 98 | ("hh_children", "no"): "B11005_011E", 99 | ("seniors", "yes"): "B11007_002E", 100 | ("seniors", "no"): "B11007_007E", 101 | ("hh_income", "lt30"): 102 | "B19001_002E + B19001_003E + B19001_004E + " 103 | "B19001_005E + B19001_006E", 104 | ("hh_income", "gt30-lt60"): 105 | "B19001_007E + B19001_008E + B19001_009E + " 106 | "B19001_010E + B19001_011E", 107 | ("hh_income", "gt60-lt100"): "B19001_012E + B19001_013E", 108 | ("hh_income", "gt100-lt150"): "B19001_014E + B19001_015E", 109 | ("hh_income", "gt150"): "B19001_016E + B19001_017E", 110 | ("hh_cars", "none"): "B08201_002E", 111 | ("hh_cars", "one"): "B08201_003E", 112 | ("hh_cars", "two or more"): 113 | "B08201_004E + B08201_005E + B08201_006E", 114 | ("hh_workers", "none"): "B08202_002E", 115 | ("hh_workers", "one"): "B08202_003E", 116 | ("hh_workers", "two or more"): "B08202_004E + B08202_005E", 117 | ("tenure_mover", "own recent"): "B25038_003E", 118 | ("tenure_mover", "own not recent"): "B25038_002E - B25038_003E", 119 | ("tenure_mover", "rent recent"): "B25038_010E", 120 | ("tenure_mover", "rent not recent"): "B25038_009E - B25038_010E", 121 | ("hh_size", "one"): "B25009_003E + B25009_011E", 122 | ("hh_size", "two"): "B25009_004E + B25009_012E", 123 | ("hh_size", "three"): "B25009_005E + B25009_013E", 124 | ("hh_size", "four or more"): "B25009_006E + B25009_014E + " 125 | "B25009_007E + B25009_015E + " 126 | "B25009_008E + B25009_016E + " 127 | "B25009_009E + B25009_017E", 128 | }, index_cols=['state', 'county', 'tract', 'block group']) 129 | 130 | # gq_population = ['B26001_001E'] 131 | # HH population, for the hhpop/totalpop adjustment 132 | hh_population = ['B11002_001E'] 133 | population = ['B01001_001E'] # This includes GQ 134 | hispanic = ['B03003_002E', 'B03003_003E'] 135 | sex = ['B01001_002E', 'B01001_026E'] 136 | race = ['B02001_0%02dE' % i for i in range(1, 11)] 137 | male_age_columns = ['B01001_0%02dE' % i for i in range(3, 26)] 138 | female_age_columns = ['B01001_0%02dE' % i for i in range(27, 50)] 139 | all_columns = population + sex + race + male_age_columns + \ 140 | female_age_columns + hh_population + hispanic 141 | p_acs = c.block_group_query(all_columns, state, county, tract=tract, year=acsyear) 142 | self.p_acs = p_acs 143 | self.p_acs_cat = cat.categorize(p_acs, { 144 | ("person_age", "19 and under"): 145 | "(B01001_003E + B01001_004E + B01001_005E + " 146 | "B01001_006E + B01001_007E + B01001_027E + " 147 | "B01001_028E + B01001_029E + B01001_030E + " 148 | "B01001_031E) * B11002_001E*1.0/B01001_001E", 149 | ("person_age", "20 to 35"): 150 | "(B01001_008E + B01001_009E + B01001_010E + " 151 | "B01001_011E + B01001_012E + B01001_032E + " 152 | "B01001_033E + B01001_034E + B01001_035E + " 153 | "B01001_036E) * B11002_001E*1.0/B01001_001E", 154 | ("person_age", "35 to 60"): 155 | "(B01001_013E + B01001_014E + B01001_015E + " 156 | "B01001_016E + B01001_017E + B01001_037E + " 157 | "B01001_038E + B01001_039E + B01001_040E + " 158 | "B01001_041E) * B11002_001E*1.0/B01001_001E", 159 | ("person_age", "above 60"): 160 | "(B01001_018E + B01001_019E + B01001_020E + " 161 | "B01001_021E + B01001_022E + B01001_023E + " 162 | "B01001_024E + B01001_025E + B01001_042E + " 163 | "B01001_043E + B01001_044E + B01001_045E + " 164 | "B01001_046E + B01001_047E + B01001_048E + " 165 | "B01001_049E) * B11002_001E*1.0/B01001_001E", 166 | ("race", "white"): "(B02001_002E) * B11002_001E*1.0/B01001_001E", 167 | ("race", "black"): "(B02001_003E) * B11002_001E*1.0/B01001_001E", 168 | ("race", "asian"): "(B02001_005E) * B11002_001E*1.0/B01001_001E", 169 | ("race", "other"): "(B02001_004E + B02001_006E + B02001_007E + " 170 | "B02001_008E) * B11002_001E*1.0/B01001_001E", 171 | ("person_sex", "male"): 172 | "(B01001_002E) * B11002_001E*1.0/B01001_001E", 173 | ("person_sex", "female"): 174 | "(B01001_026E) * B11002_001E*1.0/B01001_001E", 175 | ("hispanic", "yes"): 176 | "(B03003_003E) * B11002_001E*1.0/B01001_001E", 177 | ("hispanic", "no"): 178 | "(B03003_002E) * B11002_001E*1.0/B01001_001E", 179 | }, index_cols=['state', 'county', 'tract', 'block group']) 180 | 181 | # Put the needed PUMS variables here. These are also the PUMS variables 182 | # that will be in the outputted synthetic population 183 | self.h_pums_cols = ('serialno', 'PUMA10', 'RT', 'NP', 'TYPE', 184 | 'R65', 'HINCP', 'VEH', 'MV', 'TEN', 'BLD', 'R18') 185 | self.p_pums_cols = ('serialno', 'PUMA10', 'RELP', 'AGEP', 186 | 'ESR', 'RAC1P', 'HISP', 'SEX', 'SPORDER', 187 | 'PERNP', 'SCHL', 'WKHP', 'JWTR', 'SCH') 188 | if self.acsyear < 2018: 189 | self.h_pums_cols = list(self.h_pums_cols) 190 | self.h_pums_cols.insert(1, 'PUMA00') 191 | self.h_pums_cols = tuple(self.h_pums_cols) 192 | self.p_pums_cols = list(self.p_pums_cols) 193 | self.p_pums_cols.insert(1, 'PUMA00') 194 | self.p_pums_cols = tuple(self.p_pums_cols) 195 | 196 | def get_geography_name(self): 197 | # this synthesis is at the block group level for most variables 198 | return "block_group" 199 | 200 | def get_num_geographies(self): 201 | return len(self.p_acs_cat) 202 | 203 | def get_available_geography_ids(self): 204 | # return the ids of the geographies, in this case a state, county, 205 | # tract, block_group id tuple 206 | for tup in self.p_acs_cat.index: 207 | yield pd.Series(tup, index=self.p_acs_cat.index.names) 208 | 209 | def get_household_marginal_for_geography(self, ind): 210 | return self.h_acs_cat.loc[tuple(ind.values)] 211 | 212 | def get_person_marginal_for_geography(self, ind): 213 | return self.p_acs_cat.loc[tuple(ind.values)] 214 | 215 | def get_household_joint_dist_for_geography(self, ind): 216 | c = self.c 217 | 218 | puma10, puma00 = c.tract_to_puma(ind.state, ind.county, ind.tract) 219 | 220 | # this is cached so won't download more than once 221 | if type(puma00) == str: 222 | h_pums = self.c.download_household_pums(ind.state, puma10, puma00, 223 | usecols=self.h_pums_cols) 224 | p_pums = self.c.download_population_pums(ind.state, puma10, puma00, 225 | usecols=self.p_pums_cols) 226 | elif np.isnan(puma00): # only puma10 available 227 | h_pums = self.c.download_household_pums(ind.state, puma10, None, 228 | usecols=self.h_pums_cols) 229 | p_pums = self.c.download_population_pums(ind.state, puma10, None, 230 | usecols=self.p_pums_cols) 231 | 232 | h_pums = h_pums.set_index('serialno') 233 | 234 | # join persons to households, 235 | # calculate needed household-level variables 236 | age_of_head = p_pums[p_pums.RELP == 0].groupby('serialno').AGEP.max() 237 | num_workers = p_pums[p_pums.ESR.isin([1, 2, 4, 5])].groupby( 238 | 'serialno').size() 239 | h_pums['race_of_head'] = p_pums[p_pums.RELP == 0].groupby( 240 | 'serialno').RAC1P.max() 241 | h_pums['hispanic_head'] = p_pums[p_pums.RELP == 0].groupby( 242 | 'serialno').HISP.max() 243 | h_pums['age_of_head'] = age_of_head 244 | h_pums['workers'] = num_workers 245 | h_pums.workers = h_pums.workers.fillna(0) 246 | h_pums = h_pums.reset_index() 247 | 248 | def sf_detached_cat(r): 249 | if r.BLD == 2: 250 | return "yes" 251 | return "no" 252 | 253 | def age_of_head_cat(r): 254 | if r.age_of_head < 35: 255 | return "lt35" 256 | elif r.age_of_head >= 65: 257 | return "gt65" 258 | return "gt35-lt65" 259 | 260 | def race_of_head_cat(r): 261 | if r.race_of_head == 1: 262 | return "white" 263 | elif r.race_of_head == 2: 264 | return "black" 265 | elif r.race_of_head == 6: 266 | return "asian" 267 | return "other" 268 | 269 | def hispanic_head_cat(r): 270 | if r.hispanic_head == 1: 271 | return "no" 272 | return "yes" 273 | 274 | def hh_size_cat(r): 275 | if r.NP == 1: 276 | return "one" 277 | elif r.NP == 2: 278 | return "two" 279 | elif r.NP == 3: 280 | return "three" 281 | return "four or more" 282 | 283 | def cars_cat(r): 284 | if r.VEH == 0: 285 | return "none" 286 | elif r.VEH == 1: 287 | return "one" 288 | return "two or more" 289 | 290 | def children_cat(r): 291 | if r.R18 == 1: 292 | return "yes" 293 | return "no" 294 | 295 | def seniors_cat(r): 296 | if r.R65 > 0: 297 | return "yes" 298 | return "no" 299 | 300 | def income_cat(r): 301 | if r.HINCP >= 150000: 302 | return "gt150" 303 | elif (r.HINCP >= 100000) & (r.HINCP < 150000): 304 | return "gt100-lt150" 305 | elif (r.HINCP >= 60000) & (r.HINCP < 100000): 306 | return "gt60-lt100" 307 | elif (r.HINCP >= 30000) & (r.HINCP < 60000): 308 | return "gt30-lt60" 309 | return "lt30" 310 | 311 | def workers_cat(r): 312 | if r.workers >= 2: 313 | return "two or more" 314 | elif r.workers == 1: 315 | return "one" 316 | return "none" 317 | 318 | def tenure_mover_cat(r): 319 | if (r.MV < 4) & (r.TEN < 3): 320 | return "own recent" 321 | elif (r.MV >= 4) & (r.TEN < 3): 322 | return "own not recent" 323 | elif (r.MV < 4) & (r.TEN >= 3): 324 | return "rent recent" 325 | return "rent not recent" 326 | 327 | h_pums, jd_households = cat.joint_distribution( 328 | h_pums, 329 | cat.category_combinations(self.h_acs_cat.columns), 330 | {"hh_cars": cars_cat, 331 | "hh_children": children_cat, 332 | "hh_income": income_cat, 333 | "hh_workers": workers_cat, 334 | "tenure_mover": tenure_mover_cat, 335 | "seniors": seniors_cat, 336 | "hh_size": hh_size_cat, 337 | "hh_age_of_head": age_of_head_cat, 338 | "sf_detached": sf_detached_cat, 339 | "hh_race_of_head": race_of_head_cat, 340 | "hispanic_head": hispanic_head_cat} 341 | ) 342 | return h_pums, jd_households 343 | 344 | def get_person_joint_dist_for_geography(self, ind): 345 | c = self.c 346 | 347 | puma10, puma00 = c.tract_to_puma(ind.state, ind.county, ind.tract) 348 | # this is cached so won't download more than once 349 | if type(puma00) == str: 350 | p_pums = self.c.download_population_pums(ind.state, puma10, puma00, 351 | usecols=self.p_pums_cols) 352 | elif np.isnan(puma00): # only puma10 available 353 | p_pums = self.c.download_population_pums(ind.state, puma10, None, 354 | usecols=self.p_pums_cols) 355 | 356 | def age_cat(r): 357 | if r.AGEP <= 19: 358 | return "19 and under" 359 | elif r.AGEP <= 35: 360 | return "20 to 35" 361 | elif r.AGEP <= 60: 362 | return "35 to 60" 363 | return "above 60" 364 | 365 | def race_cat(r): 366 | if r.RAC1P == 1: 367 | return "white" 368 | elif r.RAC1P == 2: 369 | return "black" 370 | elif r.RAC1P == 6: 371 | return "asian" 372 | return "other" 373 | 374 | def sex_cat(r): 375 | if r.SEX == 1: 376 | return "male" 377 | return "female" 378 | 379 | def hispanic_cat(r): 380 | if r.HISP == 1: 381 | return "no" 382 | return "yes" 383 | 384 | p_pums, jd_persons = cat.joint_distribution( 385 | p_pums, 386 | cat.category_combinations(self.p_acs_cat.columns), 387 | {"person_age": age_cat, "race": race_cat, "person_sex": sex_cat, 388 | "hispanic": hispanic_cat} 389 | ) 390 | return p_pums, jd_persons 391 | -------------------------------------------------------------------------------- /synthpop/recipes/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UDST/synthpop/6fb13991c9d3ede2d8cf80512bd1102e37b98971/synthpop/recipes/tests/__init__.py -------------------------------------------------------------------------------- /synthpop/recipes/tests/test_starter.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from ...synthesizer import * 3 | from ..starter import Starter 4 | 5 | 6 | @pytest.fixture 7 | def key(): 8 | return "827402c2958dcf515e4480b7b2bb93d1025f9389" 9 | 10 | 11 | def test_starter(key): 12 | st = Starter(key, "CA", "Napa County") 13 | synthesize_all(st, num_geogs=1) 14 | -------------------------------------------------------------------------------- /synthpop/synthesizer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | from collections import namedtuple 4 | 5 | import numpy as np 6 | import pandas as pd 7 | from scipy.stats import chisquare 8 | 9 | from . import categorizer as cat 10 | from . import draw 11 | from .ipf.ipf import calculate_constraints 12 | from .ipu.ipu import household_weights 13 | 14 | logger = logging.getLogger("synthpop") 15 | FitQuality = namedtuple( 16 | 'FitQuality', 17 | ('people_chisq', 'people_p')) 18 | BlockGroupID = namedtuple( 19 | 'BlockGroupID', ('state', 'county', 'tract', 'block_group')) 20 | 21 | 22 | def enable_logging(): 23 | handler = logging.StreamHandler(stream=sys.stdout) 24 | logger.addHandler(handler) 25 | logger.setLevel(logging.DEBUG) 26 | 27 | 28 | def synthesize(h_marg, p_marg, h_jd, p_jd, h_pums, p_pums, geography, ignore_max_iters, 29 | marginal_zero_sub=.01, jd_zero_sub=.001, hh_index_start=0): 30 | 31 | # this is the zero marginal problem 32 | h_marg = h_marg.replace(0, marginal_zero_sub) 33 | p_marg = p_marg.replace(0, marginal_zero_sub) 34 | 35 | # zero cell problem 36 | h_jd.frequency = h_jd.frequency.replace(0, jd_zero_sub) 37 | p_jd.frequency = p_jd.frequency.replace(0, jd_zero_sub) 38 | 39 | # ipf for households 40 | logger.info("Running ipf for households") 41 | h_constraint, _ = calculate_constraints(h_marg, h_jd.frequency) 42 | h_constraint.index = h_jd.cat_id 43 | 44 | logger.debug("Household constraint") 45 | logger.debug(h_constraint) 46 | 47 | # ipf for persons 48 | logger.info("Running ipf for persons") 49 | p_constraint, _ = calculate_constraints(p_marg, p_jd.frequency) 50 | # p_constraint.index = p_jd.cat_id 51 | 52 | logger.debug("Person constraint") 53 | logger.debug(p_constraint) 54 | 55 | # modify person cat ids so they are unique when combined with households 56 | p_starting_cat_id = h_jd['cat_id'].max() + 1 57 | p_jd['cat_id'] += p_starting_cat_id 58 | p_pums['cat_id'] += p_starting_cat_id 59 | p_constraint.index = p_jd.cat_id 60 | 61 | # make frequency tables that the ipu expects 62 | household_freq, person_freq = cat.frequency_tables(p_pums, h_pums, 63 | p_jd.cat_id, 64 | h_jd.cat_id) 65 | 66 | # do the ipu to match person marginals 67 | logger.info("Running ipu") 68 | import time 69 | t1 = time.time() 70 | best_weights, fit_quality, iterations = household_weights(household_freq, 71 | person_freq, 72 | h_constraint, 73 | p_constraint, 74 | geography, 75 | ignore_max_iters) 76 | logger.info("Time to run ipu: %.3fs" % (time.time()-t1)) 77 | 78 | logger.debug("IPU weights:") 79 | logger.debug(best_weights.describe()) 80 | logger.debug("Fit quality:") 81 | logger.debug(fit_quality) 82 | logger.debug("Number of iterations:") 83 | logger.debug(iterations) 84 | 85 | num_households = int(h_marg.groupby(level=0).sum().mean()) 86 | print("Drawing %d households" % num_households) 87 | 88 | best_chisq = np.inf 89 | 90 | return draw.draw_households( 91 | num_households, h_pums, p_pums, household_freq, h_constraint, 92 | p_constraint, best_weights, hh_index_start=hh_index_start) 93 | 94 | 95 | def synthesize_all(recipe, num_geogs=None, indexes=None, ignore_max_iters=False, 96 | marginal_zero_sub=.01, jd_zero_sub=.001): 97 | """ 98 | Returns 99 | ------- 100 | households, people : pandas.DataFrame 101 | fit_quality : dict of FitQuality 102 | Keys are geographic IDs, values are namedtuples with attributes 103 | ``.household_chisq``, ``household_p``, ``people_chisq``, 104 | and ``people_p``. 105 | 106 | """ 107 | print("Synthesizing at geog level: '{}' (number of geographies is {})" 108 | .format(recipe.get_geography_name(), recipe.get_num_geographies())) 109 | 110 | if indexes is None: 111 | indexes = recipe.get_available_geography_ids() 112 | 113 | hh_list = [] 114 | people_list = [] 115 | cnt = 0 116 | fit_quality = {} 117 | hh_index_start = 0 118 | 119 | # TODO will parallelization work here? 120 | for geog_id in indexes: 121 | print("Synthesizing geog id:\n", geog_id) 122 | 123 | h_marg = recipe.get_household_marginal_for_geography(geog_id) 124 | logger.debug("Household marginal") 125 | logger.debug(h_marg) 126 | 127 | p_marg = recipe.get_person_marginal_for_geography(geog_id) 128 | logger.debug("Person marginal") 129 | logger.debug(p_marg) 130 | 131 | h_pums, h_jd = recipe.\ 132 | get_household_joint_dist_for_geography(geog_id) 133 | logger.debug("Household joint distribution") 134 | logger.debug(h_jd) 135 | 136 | p_pums, p_jd = recipe.get_person_joint_dist_for_geography(geog_id) 137 | logger.debug("Person joint distribution") 138 | logger.debug(p_jd) 139 | 140 | households, people, people_chisq, people_p = \ 141 | synthesize( 142 | h_marg, p_marg, h_jd, p_jd, h_pums, p_pums, geog_id, ignore_max_iters, 143 | marginal_zero_sub=marginal_zero_sub, jd_zero_sub=jd_zero_sub, 144 | hh_index_start=hh_index_start) 145 | 146 | # Append location identifiers to the synthesized households 147 | for geog_cat in geog_id.keys(): 148 | households[geog_cat] = geog_id[geog_cat] 149 | 150 | hh_list.append(households) 151 | people_list.append(people) 152 | key = BlockGroupID( 153 | geog_id['state'], geog_id['county'], geog_id['tract'], 154 | geog_id['block group']) 155 | fit_quality[key] = FitQuality(people_chisq, people_p) 156 | 157 | cnt += 1 158 | if len(households) > 0: 159 | hh_index_start = households.index.values[-1] + 1 160 | 161 | if num_geogs is not None and cnt >= num_geogs: 162 | break 163 | 164 | # TODO might want to write this to disk as we go? 165 | all_households = pd.concat(hh_list) 166 | all_persons = pd.concat(people_list, ignore_index=True) 167 | 168 | return (all_households, all_persons, fit_quality) 169 | -------------------------------------------------------------------------------- /synthpop/test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UDST/synthpop/6fb13991c9d3ede2d8cf80512bd1102e37b98971/synthpop/test/__init__.py -------------------------------------------------------------------------------- /synthpop/test/test_categorizer.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from synthpop.census_helpers import Census 4 | from synthpop import categorizer as cat 5 | import os 6 | 7 | 8 | @pytest.fixture 9 | def c(): 10 | return Census('bfa6b4e541243011fab6307a31aed9e91015ba90') 11 | 12 | 13 | @pytest.fixture 14 | def acs_data(c): 15 | population = ['B01001_001E'] 16 | sex = ['B01001_002E', 'B01001_026E'] 17 | race = ['B02001_0%02dE' % i for i in range(1, 11)] 18 | male_age_columns = ['B01001_0%02dE' % i for i in range(3, 26)] 19 | female_age_columns = ['B01001_0%02dE' % i for i in range(27, 50)] 20 | all_columns = population + sex + race + male_age_columns + \ 21 | female_age_columns 22 | df = c.block_group_query(all_columns, "06", "075", tract="030600") 23 | return df 24 | 25 | 26 | @pytest.fixture 27 | def pums_data(c): 28 | return c.download_population_pums("06", "07506") 29 | 30 | 31 | def test_categorize(acs_data, pums_data): 32 | p_acs_cat = cat.categorize(acs_data, { 33 | ("population", "total"): "B01001_001E", 34 | ("age", "19 and under"): "B01001_003E + B01001_004E + B01001_005E + " 35 | "B01001_006E + B01001_007E + B01001_027E + " 36 | "B01001_028E + B01001_029E + B01001_030E + " 37 | "B01001_031E", 38 | ("age", "20 to 35"): "B01001_008E + B01001_009E + B01001_010E + " 39 | "B01001_011E + B01001_012E + B01001_032E + " 40 | "B01001_033E + B01001_034E + B01001_035E + " 41 | "B01001_036E", 42 | ("age", "35 to 60"): "B01001_013E + B01001_014E + B01001_015E + " 43 | "B01001_016E + B01001_017E + B01001_037E + " 44 | "B01001_038E + B01001_039E + B01001_040E + " 45 | "B01001_041E", 46 | ("age", "above 60"): "B01001_018E + B01001_019E + B01001_020E + " 47 | "B01001_021E + B01001_022E + B01001_023E + " 48 | "B01001_024E + B01001_025E + B01001_042E + " 49 | "B01001_043E + B01001_044E + B01001_045E + " 50 | "B01001_046E + B01001_047E + B01001_048E + " 51 | "B01001_049E", 52 | ("race", "white"): "B02001_002E", 53 | ("race", "black"): "B02001_003E", 54 | ("race", "asian"): "B02001_005E", 55 | ("race", "other"): "B02001_004E + B02001_006E + B02001_007E + " 56 | "B02001_008E", 57 | ("sex", "male"): "B01001_002E", 58 | ("sex", "female"): "B01001_026E" 59 | }, index_cols=['NAME']) 60 | 61 | assert len(p_acs_cat) == 3 62 | assert len(p_acs_cat.columns) == 11 63 | assert len(p_acs_cat.columns.names) == 2 64 | assert p_acs_cat.columns[0][0] == "age" 65 | 66 | assert np.all(cat.sum_accross_category(p_acs_cat) < 2) 67 | 68 | def age_cat(r): 69 | if r.AGEP <= 19: 70 | return "19 and under" 71 | elif r.AGEP <= 35: 72 | return "20 to 35" 73 | elif r.AGEP <= 60: 74 | return "35 to 60" 75 | return "above 60" 76 | 77 | def race_cat(r): 78 | if r.RAC1P == 1: 79 | return "white" 80 | elif r.RAC1P == 2: 81 | return "black" 82 | elif r.RAC1P == 6: 83 | return "asian" 84 | return "other" 85 | 86 | def sex_cat(r): 87 | if r.SEX == 1: 88 | return "male" 89 | return "female" 90 | 91 | pums_data, jd_persons = cat.joint_distribution( 92 | pums_data, 93 | cat.category_combinations(p_acs_cat.columns), 94 | {"age": age_cat, "race": race_cat, "sex": sex_cat} 95 | ) 96 | -------------------------------------------------------------------------------- /synthpop/test/test_censushelpers.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from ..census_helpers import Census 3 | import numpy as np 4 | from pandas.util.testing import assert_series_equal 5 | import os 6 | 7 | 8 | @pytest.fixture 9 | def c(): 10 | return Census('bfa6b4e541243011fab6307a31aed9e91015ba90') 11 | 12 | 13 | def test_block_group_and_tract_query(c): 14 | income_columns = ['B19001_0%02dE' % i for i in range(1, 18)] 15 | vehicle_columns = ['B08201_0%02dE' % i for i in range(1, 7)] 16 | workers_columns = ['B08202_0%02dE' % i for i in range(1, 6)] 17 | families_columns = ['B11001_001E', 'B11001_002E'] 18 | block_group_columns = income_columns + families_columns 19 | tract_columns = vehicle_columns + workers_columns 20 | df = c.block_group_and_tract_query(block_group_columns, 21 | tract_columns, "06", "075", 22 | merge_columns=['tract', 'county', 23 | 'state'], 24 | block_group_size_attr="B11001_001E", 25 | tract_size_attr="B08201_001E", 26 | tract="030600") 27 | 28 | assert len(df) == 3 29 | assert_series_equal( 30 | df["B11001_001E"], df["B08201_001E"], check_names=False) 31 | assert np.all(df.state == "06") 32 | assert np.all(df.county == "075") 33 | 34 | df = c.block_group_and_tract_query(block_group_columns, 35 | tract_columns, "06", "075", 36 | merge_columns=['tract', 'county', 37 | 'state'], 38 | block_group_size_attr="B11001_001E", 39 | tract_size_attr="B08201_001E", 40 | tract=None) 41 | 42 | # number of block groups in San Francisco 43 | assert len(df) == 581 44 | assert_series_equal( 45 | df["B11001_001E"], df["B08201_001E"], check_names=False) 46 | assert np.all(df.state == "06") 47 | assert np.all(df.county == "075") 48 | 49 | 50 | def test_wide_block_group_query(c): 51 | population = ['B01001_001E'] 52 | sex = ['B01001_002E', 'B01001_026E'] 53 | race = ['B02001_0%02dE' % i for i in range(1, 11)] 54 | male_age_columns = ['B01001_0%02dE' % i for i in range(3, 26)] 55 | female_age_columns = ['B01001_0%02dE' % i for i in range(27, 50)] 56 | all_columns = population + sex + race + male_age_columns + \ 57 | female_age_columns 58 | df = c.block_group_query(all_columns, "06", "075", tract="030600") 59 | 60 | assert len(df) == 3 61 | assert np.all(df.state == "06") 62 | assert np.all(df.county == "075") 63 | assert len(df.columns) > 50 64 | 65 | 66 | def test_tract_to_puma(c): 67 | puma = c.tract_to_puma("06", "075", "030600")[0] 68 | assert puma == "07506" 69 | 70 | 71 | def test_download_pums(c): 72 | puma = "07506" 73 | c.download_population_pums("06", puma) 74 | c.download_household_pums("06", puma) 75 | c.download_population_pums("10") 76 | c.download_household_pums("10") 77 | -------------------------------------------------------------------------------- /synthpop/test/test_data/hh_marginals.csv: -------------------------------------------------------------------------------- 1 | zone_id,sample_geog,cars,cars,cars,children,children,income,income,income,workers,workers,workers 2 | ,,none,one,two or more,no,yes,gt100,gt35-lt100,lt35,none,one,two or more 3 | 1,1,7,49,197,41,215,57,125,74,72,77,105 4 | 2,1,9,59,237,68,239,83,126,98,87,93,125 5 | 3,1,10,69,275,79,279,74,170,114,102,108,146 6 | 4,1,11,76,302,167,224,42,105,244,111,118,160 7 | 5,1,18,117,466,86,517,50,261,292,171,182,247 8 | 6,1,9,63,252,65,261,80,139,107,92,98,133 9 | 7,1,19,159,377,160,397,96,186,275,199,194,162 10 | 8,1,11,98,231,86,257,30,99,214,123,119,100 11 | 9,1,9,78,186,49,226,22,164,89,98,95,79 12 | 10,1,7,65,155,55,175,21,143,66,82,80,66 13 | 11,1,17,297,542,289,570,118,407,334,303,279,274 14 | 12,1,15,258,474,201,548,76,371,302,264,244,240 15 | 13,1,40,217,486,251,495,121,314,311,269,259,216 16 | 14,1,51,278,622,472,482,53,320,581,344,332,277 17 | 15,1,38,210,470,220,501,68,350,303,259,251,209 18 | 16,1,23,79,83,45,142,0,60,127,87,54,43 19 | 17,1,23,78,81,47,137,0,49,135,86,54,43 20 | 18,1,36,122,127,103,184,0,134,153,134,84,67 21 | 19,1,40,135,141,66,252,23,190,105,149,93,75 22 | 20,1,89,303,318,442,271,19,167,527,334,209,168 23 | 21,1,43,147,154,108,238,0,161,185,162,101,81 -------------------------------------------------------------------------------- /synthpop/test/test_data/person_marginals.csv: -------------------------------------------------------------------------------- 1 | zone_id,age,age,age,age,race,race,race,race,sex,sex 2 | ,19 and under,20 to 35,35 to 60,above 60,asian,black,other,white,female,male 3 | 1,312,108,223,177,64,0,0,756,440,380 4 | 2,235,143,296,181,0,0,0,855,452,403 5 | 3,303,229,445,174,0,0,24,1127,565,586 6 | 4,215,77,356,189,0,0,29,808,389,448 7 | 5,506,539,619,262,0,0,0,1926,981,945 8 | 6,377,171,285,102,0,0,47,888,476,459 9 | 7,312,150,488,382,0,0,14,1318,681,651 10 | 8,246,100,229,242,0,0,0,817,337,480 11 | 9,218,182,203,185,0,0,6,782,411,377 12 | 10,52,75,150,227,0,0,22,482,206,298 13 | 11,490,314,617,721,21,82,14,2025,1062,1080 14 | 12,639,356,721,381,7,4,46,2040,1162,935 15 | 13,345,341,647,564,0,21,179,1697,895,1002 16 | 14,372,363,708,638,0,6,89,1986,1044,1037 17 | 15,361,281,624,528,6,0,141,1647,871,923 18 | 16,149,92,67,157,0,0,0,465,212,253 19 | 17,287,69,196,81,0,0,0,633,366,267 20 | 18,160,128,265,93,0,0,20,626,366,280 21 | 19,418,158,313,198,0,0,0,1087,546,541 22 | 20,238,151,495,327,0,0,132,1079,748,463 23 | 21,272,133,203,279,0,0,0,887,401,486 -------------------------------------------------------------------------------- /synthpop/test/test_draw.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.testing as npt 3 | import pandas as pd 4 | import pytest 5 | from pandas.util import testing as pdt 6 | 7 | from .. import draw 8 | from ..ipu.ipu import _FrequencyAndConstraints 9 | 10 | 11 | @pytest.fixture 12 | def seed(request): 13 | current = np.random.get_state() 14 | 15 | def fin(): 16 | np.random.set_state(current) 17 | request.addfinalizer(fin) 18 | 19 | np.random.seed(0) 20 | 21 | 22 | @pytest.fixture 23 | def index(): 24 | return np.array(['v', 'w', 'x', 'y', 'z'], dtype=np.str_) 25 | 26 | 27 | @pytest.fixture 28 | def weights(): 29 | return np.array([1, 2, 3, 4, 5]) 30 | 31 | 32 | @pytest.fixture 33 | def num(): 34 | return 10 35 | 36 | 37 | def test_simple_draw(index, weights, num, seed): 38 | drawn_indexes = draw.simple_draw(num, weights, index) 39 | 40 | npt.assert_array_equal( 41 | drawn_indexes, ['y', 'z', 'y', 'y', 'y', 'y', 'y', 'z', 'z', 'x']) 42 | 43 | 44 | def test_execute_draw(): 45 | hh_df = pd.DataFrame( 46 | {'a': range(5), 47 | 'b': range(5, 10), 48 | 'serialno': [11, 22, 33, 44, 55]}, 49 | index=pd.Index(['a', 'b', 'c', 'd', 'e'], name='hh_id')) 50 | 51 | pp_df = pd.DataFrame( 52 | {'x': range(100, 110), 53 | 'y': range(110, 120), 54 | 'serialno': [22, 33, 11, 55, 22, 33, 44, 55, 11, 33]}, 55 | index=pd.Index(['q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'])) 56 | 57 | indexes = ['c', 'a', 'd', 'e', 'a', 'c', 'e', 'e', 'a', 'c', 'e'] 58 | 59 | synth_hh, synth_pp = draw.execute_draw( 60 | indexes, hh_df, pp_df, hh_index_start=1000) 61 | 62 | expected_index = pd.Index(range(1000, 1011)) 63 | pdt.assert_index_equal(synth_hh.index, expected_index) 64 | pdt.assert_series_equal( 65 | synth_hh.serialno, 66 | pd.Series( 67 | [33, 11, 44, 55, 11, 33, 55, 55, 11, 33, 55], 68 | index=expected_index, name='serialno')) 69 | assert list(synth_hh.columns) == ['a', 'b', 'serialno'] 70 | 71 | pdt.assert_index_equal(synth_pp.index, pd.Index(range(24))) 72 | pdt.assert_series_equal( 73 | synth_pp.serialno, 74 | pd.Series( 75 | ([33] * 9) + ([11] * 6) + ([55] * 8) + [44], name='serialno')) 76 | pdt.assert_series_equal( 77 | synth_pp.hh_id, 78 | pd.Series( 79 | ([1000, 1005, 1009] * 3) + ([1001, 1004, 1008] * 2) + 80 | ([1003, 1006, 1007, 1010] * 2) + [1002], 81 | name='hh_id')) 82 | 83 | 84 | def test_compare_to_constraints_exact(): 85 | constraints = pd.Series([1, 3, 2], index=['a', 'b', 'c']) 86 | synth = pd.Series(['a', 'c', 'b', 'c', 'b', 'b']) 87 | 88 | chisq, p = draw.compare_to_constraints(synth, constraints) 89 | 90 | assert chisq == 0 91 | assert p == 1 92 | 93 | 94 | def test_compare_to_constraints(): 95 | constraints = pd.Series([1, 1, 2, 1, 3], index=['a', 'b', 'c', 'd', 'e']) 96 | synth = pd.Series(['e', 'a', 'e', 'e', 'c', 'e']) 97 | 98 | chisq, p = draw.compare_to_constraints(synth, constraints) 99 | 100 | 101 | @pytest.fixture 102 | def freqs(): 103 | return pd.DataFrame( 104 | {'a': [1, 1, 0, 0, 0, 0, 0, 0, 0, 0], 105 | 'b': [0, 0, 1, 1, 1, 0, 0, 0, 0, 0], 106 | 'c': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], 107 | 'd': [0, 0, 0, 0, 0, 0, 1, 1, 1, 1]}) 108 | 109 | 110 | def test_draw_indexes_easy(freqs, seed): 111 | # constraints are integers, add up to the total we want 112 | constraints = pd.Series([6, 4, 3, 9], index=freqs.columns) 113 | 114 | fac = _FrequencyAndConstraints(freqs, constraints) 115 | weights = pd.Series(np.ones(10)) 116 | 117 | idx = draw._draw_indexes(constraints.sum(), fac, weights) 118 | 119 | assert isinstance(idx, pd.Index) 120 | assert len(idx) == constraints.sum() 121 | assert idx.isin(weights.index).all() 122 | 123 | with pytest.raises(RuntimeError): 124 | draw._draw_indexes(100, fac, weights) 125 | 126 | 127 | def test_draw_indexes(freqs, seed): 128 | num = 22 129 | constraints = pd.Series([6.1, 3.2, 2.5, 8.9], index=freqs.columns) 130 | fac = _FrequencyAndConstraints(freqs, constraints) 131 | weights = pd.Series( 132 | [0.1012815, 0.11915142, 0.0369963, 0.20165698, 0.14132664, 133 | 0.02791166, 0.06182466, 0.17389766, 0.11982733, 0.01612583]) 134 | 135 | idx = draw._draw_indexes(num, fac, weights) 136 | 137 | assert isinstance(idx, pd.Index) 138 | assert len(idx) == num 139 | assert idx.isin(weights.index).all() 140 | 141 | assert idx.isin({0, 1}).sum() == 6 142 | assert idx.isin({2, 3, 4}).sum() == 4 143 | assert idx.isin({5}).sum() == 3 144 | assert idx.isin({6, 7, 8, 9}).sum() == 9 145 | -------------------------------------------------------------------------------- /synthpop/test/test_zone_synthesizer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | import pandas as pd 4 | 5 | import synthpop.zone_synthesizer as zs 6 | 7 | 8 | @pytest.fixture 9 | def hh_marg(): 10 | fname = os.path.join(os.path.dirname(__file__), 11 | 'test_data/hh_marginals.csv') 12 | return fname 13 | 14 | 15 | @pytest.fixture 16 | def p_marg(): 17 | fname = os.path.join(os.path.dirname(__file__), 18 | 'test_data/person_marginals.csv') 19 | return fname 20 | 21 | 22 | @pytest.fixture 23 | def hh_sample(): 24 | fname = os.path.join(os.path.dirname(__file__), 25 | 'test_data/household_sample.csv') 26 | return fname 27 | 28 | 29 | @pytest.fixture 30 | def p_sample(): 31 | fname = os.path.join(os.path.dirname(__file__), 32 | 'test_data/person_sample.csv') 33 | return fname 34 | 35 | 36 | def test_run(hh_marg, p_marg, hh_sample, p_sample): 37 | hh_marg, p_marg, hh_sample, p_sample, xwalk = zs.load_data(hh_marg, 38 | p_marg, 39 | hh_sample, 40 | p_sample) 41 | all_households, all_persons, all_stats = zs.synthesize_all_zones(hh_marg, 42 | p_marg, 43 | hh_sample, 44 | p_sample, 45 | xwalk) 46 | 47 | 48 | def test_run_multi(hh_marg, p_marg, hh_sample, p_sample): 49 | hhm, pm, hhs, ps, xwalk = zs.load_data(hh_marg, p_marg, 50 | hh_sample, p_sample) 51 | all_persons, all_households, all_stats = zs.multiprocess_synthesize(hhm, pm, 52 | hhs, ps, 53 | xwalk) 54 | -------------------------------------------------------------------------------- /synthpop/zone_synthesizer.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | import multiprocessing 3 | 4 | import pandas as pd 5 | 6 | from .synthesizer import synthesize, enable_logging 7 | from . import categorizer as cat 8 | 9 | 10 | def load_data(hh_marginal_file, person_marginal_file, 11 | hh_sample_file, person_sample_file): 12 | """ 13 | Load and process data inputs from .csv files on disk 14 | 15 | Parameters 16 | ---------- 17 | hh_marginal_file : string 18 | path to a csv file of household marginals 19 | person_marginal_file : string 20 | path to a csv file of person marginals 21 | hh_sample_file : string 22 | path to a csv file of sample household records to be drawn from 23 | person_sample_file : string 24 | path to a csv file of sample person records 25 | Returns 26 | ------- 27 | hh_marg : pandas.DataFrame 28 | processed and properly indexed household marginals table 29 | p_marg : pandas.DataFrame 30 | processed and properly indexed person marginals table 31 | hh_sample : pandas.DataFrame 32 | household sample table 33 | p_sample : pandas.DataFrame 34 | person sample table 35 | xwalk : list of tuples 36 | list of marginal-to-sample geography crosswalks to iterate over 37 | """ 38 | hh_sample = pd.read_csv(hh_sample_file) 39 | p_sample = pd.read_csv(person_sample_file) 40 | 41 | hh_marg = pd.read_csv(hh_marginal_file, header=[0, 1], index_col=0) 42 | hh_marg.columns.levels[0].set_names('cat_name', inplace=True) 43 | hh_marg.columns.levels[1].set_names('cat_values', inplace=True) 44 | 45 | xwalk = list(zip(hh_marg.index, hh_marg.sample_geog.unstack().values)) 46 | hh_marg = hh_marg.drop('sample_geog', axis=1, level=0) 47 | 48 | p_marg = pd.read_csv(person_marginal_file, header=[0, 1], index_col=0) 49 | p_marg.columns.levels[0].set_names('cat_name', inplace=True) 50 | p_marg.columns.levels[1].set_names('cat_values', inplace=True) 51 | 52 | return hh_marg, p_marg, hh_sample, p_sample, xwalk 53 | 54 | 55 | def synthesize_all_zones(hh_marg, p_marg, hh_sample, p_sample, xwalk): 56 | """ 57 | Iterate over a geography crosswalk list and synthesize in-line 58 | 59 | Parameters 60 | ---------- 61 | hh_marg : pandas.DataFrame 62 | processed and properly indexed household marginals table 63 | p_marg : pandas.DataFrame 64 | processed and properly indexed person marginals table 65 | hh_sample : pandas.DataFrame 66 | household sample table 67 | p_sample : pandas.DataFrame 68 | person sample table 69 | xwalk : list of tuples 70 | list of marginal-to-sample geography crosswalks to iterate over 71 | Returns 72 | ------- 73 | all_households : pandas.DataFrame 74 | synthesized household records 75 | all_persons : pandas.DataFrame 76 | synthesized person records 77 | all_stats : pandas.DataFrame 78 | chi-square and p-score values for each marginal geography drawn 79 | """ 80 | hh_list = [] 81 | people_list = [] 82 | stats_list = [] 83 | hh_index_start = 1 84 | for geogs in xwalk: 85 | households, people, stats = synthesize_zone(hh_marg, p_marg, 86 | hh_sample, p_sample, geogs) 87 | stats_list.append(stats) 88 | hh_list.append(households) 89 | people_list.append(people) 90 | 91 | if len(households) > 0: 92 | hh_index_start = households.index.values[-1] + 1 93 | all_households = pd.concat(hh_list) 94 | all_persons = pd.concat(people_list) 95 | all_households, all_persons = synch_hhids(all_households, all_persons) 96 | all_stats = pd.DataFrame(stats_list) 97 | return all_households, all_persons, all_stats 98 | 99 | 100 | def synch_hhids(households, persons): 101 | """ 102 | Synchronize household ids with corresponding person records 103 | 104 | Parameters 105 | ---------- 106 | households : pandas.DataFrame 107 | full households table with id values sequential by geog 108 | persons : pandas.DataFrame 109 | full persons table with id values sequential by geog 110 | Returns 111 | ------- 112 | households : pandas.DataFrame 113 | households table with reindexed sequential household ids 114 | persons : pandas.DataFrame 115 | persons table synchronized with updated household ids 116 | """ 117 | households['hh_id'] = households.index 118 | households['household_id'] = range(1, len(households.index)+1) 119 | persons = pd.merge( 120 | persons, households[['household_id', 'geog', 'hh_id']], 121 | how='left', left_on=['geog', 'hh_id'], right_on=['geog', 'hh_id'], 122 | suffixes=('', '_x')).drop('hh_id', axis=1) 123 | households.set_index('household_id', inplace=True) 124 | households.drop('hh_id', axis=1, inplace=True) 125 | return households, persons 126 | 127 | 128 | def synthesize_zone(hh_marg, p_marg, hh_sample, p_sample, xwalk): 129 | """ 130 | Synthesize a single zone (Used within multiprocessing synthesis) 131 | 132 | Parameters 133 | ---------- 134 | hh_marg : pandas.DataFrame 135 | processed and properly indexed household marginals table 136 | p_marg : pandas.DataFrame 137 | processed and properly indexed person marginals table 138 | hh_sample : pandas.DataFrame 139 | household sample table 140 | p_sample : pandas.DataFrame 141 | person sample table 142 | xwalk : tuple 143 | tuple of marginal-to-sample geography crosswalk 144 | Returns 145 | ------- 146 | households : pandas.DataFrame 147 | synthesized household records 148 | people : pandas.DataFrame 149 | synthesized person records 150 | stats : pandas.DataFrame 151 | chi-square and p-score values for marginal geography drawn 152 | """ 153 | hhs, hh_jd = cat.joint_distribution( 154 | hh_sample[hh_sample.sample_geog == xwalk[1]], 155 | cat.category_combinations(hh_marg.columns)) 156 | ps, p_jd = cat.joint_distribution( 157 | p_sample[p_sample.sample_geog == xwalk[1]], 158 | cat.category_combinations(p_marg.columns)) 159 | households, people, people_chisq, people_p = synthesize( 160 | hh_marg.loc[xwalk[0]], p_marg.loc[xwalk[0]], hh_jd, p_jd, hhs, ps, xwalk[0], 161 | ignore_max_iters=False, hh_index_start=1) 162 | households['geog'] = xwalk[0] 163 | people['geog'] = xwalk[0] 164 | stats = {'geog': xwalk[0], 'chi-square': people_chisq, 'p-score': people_p} 165 | return households, people, stats 166 | 167 | 168 | def multiprocess_synthesize(hh_marg, p_marg, hh_sample, 169 | p_sample, xwalk, cores=False): 170 | """ 171 | Synthesize for a set of marginal geographies via multiprocessing 172 | 173 | Parameters 174 | ---------- 175 | hh_marg : pandas.DataFrame 176 | processed and properly indexed household marginals table 177 | p_marg : pandas.DataFrame 178 | processed and properly indexed person marginals table 179 | hh_sample : pandas.DataFrame 180 | household sample table 181 | p_sample : pandas.DataFrame 182 | person sample table 183 | xwalk : list of tuples 184 | list of marginal-to-sample geography crosswalks to iterate over 185 | cores : integer, optional 186 | number of cores to use in the multiprocessing pool. defaults to 187 | multiprocessing.cpu_count() - 1 188 | Returns 189 | ------- 190 | all_households : pandas.DataFrame 191 | synthesized household records 192 | all_persons : pandas.DataFrame 193 | synthesized person records 194 | all_stats : pandas.DataFrame 195 | chi-square and p-score values for each marginal geography drawn 196 | """ 197 | cores = cores if cores else (multiprocessing.cpu_count()-1) 198 | part = partial(synthesize_zone, hh_marg, p_marg, hh_sample, p_sample) 199 | p = multiprocessing.Pool(cores) 200 | results = p.map(part, list(xwalk)) 201 | p.close() 202 | p.join() 203 | 204 | hh_list = [result[0] for result in results] 205 | people_list = [result[1] for result in results] 206 | all_stats = pd.DataFrame([result[2] for result in results]) 207 | all_households = pd.concat(hh_list) 208 | all_persons = pd.concat(people_list) 209 | all_households, all_persons = synch_hhids(all_households, all_persons) 210 | return all_households, all_persons, all_stats 211 | --------------------------------------------------------------------------------