├── .gitignore
├── .pytest_cache
    └── v
    │   └── cache
    │       ├── lastfailed
    │       └── nodeids
├── .travis.yml
├── CONTRIBUTING.md
├── LICENSE.txt
├── MANIFEST.in
├── README.rst
├── demos
    ├── .ipynb_checkpoints
    │   ├── census_api-checkpoint.ipynb
    │   ├── non_census_synthesis-checkpoint.ipynb
    │   ├── simple_synthesis-checkpoint.ipynb
    │   └── synthesize-checkpoint.ipynb
    ├── census_api.ipynb
    ├── input_data
    │   ├── Puma_Tract_Crosswalk_500.csv
    │   ├── hh_marginals.csv
    │   ├── hhld_marginals_500.csv
    │   ├── hhld_pums_500.csv
    │   ├── household_sample.csv
    │   ├── person_marginals.csv
    │   ├── person_sample.csv
    │   ├── pop_marginals_500.csv
    │   └── pop_pums_500.csv
    ├── non_census_synthesis.ipynb
    ├── simple_synthesis.ipynb
    ├── synthesize.ipynb
    └── synthesize.py
├── requirements-dev.txt
├── scripts
    ├── dl_and_slice_pums.py
    ├── synth_example.py
    └── tract_to_puma00_xref.py
├── setup.cfg
├── setup.py
└── synthpop
    ├── __init__.py
    ├── categorizer.py
    ├── census_helpers.py
    ├── config.py
    ├── draw.py
    ├── ipf
        ├── __init__.py
        ├── ipf.py
        └── test
        │   ├── __init__.py
        │   └── test_ipf.py
    ├── ipu
        ├── __init__.py
        ├── ipu.py
        └── test
        │   ├── __init__.py
        │   └── test_ipu.py
    ├── recipes
        ├── __init__.py
        ├── starter.py
        ├── starter2.py
        └── tests
        │   ├── __init__.py
        │   └── test_starter.py
    ├── synthesizer.py
    ├── test
        ├── __init__.py
        ├── test_categorizer.py
        ├── test_censushelpers.py
        ├── test_data
        │   ├── hh_marginals.csv
        │   ├── household_sample.csv
        │   ├── person_marginals.csv
        │   └── person_sample.csv
        ├── test_draw.py
        └── test_zone_synthesizer.py
    └── zone_synthesizer.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | bin/
12 | build/
13 | develop-eggs/
14 | dist/
15 | eggs/
16 | lib/
17 | lib64/
18 | parts/
19 | sdist/
20 | var/
21 | *.egg-info/
22 | .installed.cfg
23 | *.egg
24 | 
25 | # Installer logs
26 | pip-log.txt
27 | pip-delete-this-directory.txt
28 | 
29 | # Unit test / coverage reports
30 | htmlcov/
31 | .tox/
32 | .coverage
33 | .cache
34 | nosetests.xml
35 | coverage.xml
36 | 
37 | # Translations
38 | *.mo
39 | 
40 | # Mr Developer
41 | .mr.developer.cfg
42 | .project
43 | .pydevproject
44 | 
45 | # Rope
46 | .ropeproject
47 | 
48 | # Django stuff:
49 | *.log
50 | *.pot
51 | 
52 | # Sphinx documentation
53 | docs/_build/
54 | 


--------------------------------------------------------------------------------
/.pytest_cache/v/cache/lastfailed:
--------------------------------------------------------------------------------
1 | {}


--------------------------------------------------------------------------------
/.pytest_cache/v/cache/nodeids:
--------------------------------------------------------------------------------
1 | [
2 |   "synthpop/recipes/tests/test_starter.py::test_starter", 
3 |   "synthpop/recipes/tests/test_starter2.py::test_starter2"
4 | ]


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | python:
 4 |   - '3.5'
 5 |   - '3.6'
 6 |   - '3.7'
 7 |   - '3.8'
 8 | 
 9 | install:
10 |   - pip install .
11 |   - pip install -r requirements-dev.txt
12 |   - pip list
13 |   - pip show synthpop
14 | 
15 | script:
16 |   - pycodestyle synthpop
17 |   - py.test --cov synthpop --cov-report term-missing
18 | 
19 | after_success:
20 |   - coveralls
21 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | Contributing to ActivitySim
 2 | ===========================
 3 | 
 4 | Style
 5 | -----
 6 | 
 7 | - Python code should follow the [PEP 8 Style Guide][pep8].
 8 | - Python docstrings should follow the [NumPy documentation format][numpydoc].
 9 | 
10 | ### Imports
11 | 
12 | Imports should be one per line.
13 | Imports should be grouped into standard library, third-party,
14 | and intra-library imports. `from` import should follow "regular" `imports`.
15 | Within each group the imports should be alphabetized.
16 | Here's an example:
17 | 
18 | ```python
19 | import sys
20 | from glob import glob
21 | 
22 | import numpy as np
23 | 
24 | import urbansim.urbansim.modelcompile as modelcompile
25 | from urbansim.util import misc
26 | ```
27 | 
28 | Imports of scientific Python libraries should follow these conventions:
29 | 
30 | ```python
31 | import matplotlib.pyplot as plt
32 | import numpy as np
33 | import pandas as pd
34 | import scipy as sp
35 | ```
36 | 
37 | 
38 | Thanks!
39 | 
40 | [pep8]: http://legacy.python.org/dev/peps/pep-0008/
41 | [numpydoc]: https://github.com/numpy/numpy/blob/master/doc/HOWTO_DOCUMENT.rst.txt
42 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2016, UrbanSim Inc. All rights reserved.
 2 | 
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions are met:
 5 | 
 6 | 1. Redistributions of source code must retain the above copyright notice, this
 7 | list of conditions and the following disclaimer.
 8 | 
 9 | 2. Redistributions in binary form must reproduce the above copyright notice,
10 | this list of conditions and the following disclaimer in the documentation
11 | and/or other materials provided with the distribution.
12 | 
13 | 3. Neither the name of the copyright holder nor the names of its contributors
14 | may be used to endorse or promote products derived from this software without
15 | specific prior written permission.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
21 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | # files to include in the source distribution on pypi (setup and README are included automatically)
2 | 
3 | include LICENSE.txt
4 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | SynthPop
 2 | ========
 3 | 
 4 | .. image:: https://travis-ci.org/UDST/synthpop.svg?branch=master
 5 |    :alt: Build Status
 6 |    :target: https://travis-ci.org/UDST/synthpop
 7 | 
 8 | .. image:: https://coveralls.io/repos/UDST/synthpop/badge.svg?branch=master
 9 |    :alt: Test Coverage
10 |    :target: https://coveralls.io/r/UDST/synthpop?branch=master
11 | 
12 | SynthPop is a reimplementation of `PopGen`_ using the modern scientific Python
13 | stack, with a focus on performance and code reusability.
14 | 
15 | The SynthPop code is a completely new implementation of the algorithms
16 | described in this reference, and the paper as well as this repository should be
17 | cited if you use SynthPop:
18 | 
19 | Ye, Xin, Karthik Konduri, Ram Pendyala, Bhargava Sana and Paul Waddell. A Methodology to Match Distributions of Both Households and Person Attributes in the Generation of Synthetic Populations.  Transportation Research Board 88th Annual Meeting Compendium of Papers DVD. January 11-15, 2009
20 | 
21 | The paper is available here:
22 | http://www.scag.ca.gov/Documents/PopulationSynthesizerPaper_TRB.pdf
23 | 
24 | .. _PopGen: http://urbanmodel.asu.edu/popgen.html
25 | 
26 | # Installation
27 | 
28 | ```
29 | virtualenv venv --python=python3.7
30 | source venv/bin/activate
31 | pip install -r requierements.txt
32 | cd synthpop/
33 | python setup.py develop
34 | ```
35 | To run `Synthpop` you need a Census API that you can get one from [here](https://api.census.gov/data/key_signup.html). After you get and validate the API key you can add it as an enviromental variable to your environment as by adding to `/venv/bin/activate` the following line: 
36 | `export CENSUS='yourApiKey'`
37 | 


--------------------------------------------------------------------------------
/demos/.ipynb_checkpoints/simple_synthesis-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from synthpop.synthesizer import synthesize, enable_logging\n",
 10 |     "import synthpop.categorizer as cat\n",
 11 |     "\n",
 12 |     "import multiprocessing\n",
 13 |     "import os\n",
 14 |     "import seaborn as sns\n",
 15 |     "from functools import partial\n",
 16 |     "import pandas as pd\n",
 17 |     "\n",
 18 |     "import synthpop.zone_synthesizer as zs"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 2,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "hh_marginal_file = 'input_data/hh_marginals.csv'\n",
 28 |     "person_marginal_file = 'input_data/person_marginals.csv'\n",
 29 |     "hh_sample_file = 'input_data/household_sample.csv'\n",
 30 |     "person_sample_file = 'input_data/person_sample.csv'"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 3,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "hh_marg, p_marg, hh_sample, p_sample, xwalk = zs.load_data(hh_marginal_file, person_marginal_file, hh_sample_file, person_sample_file)"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 4,
 45 |    "metadata": {},
 46 |    "outputs": [
 47 |     {
 48 |      "name": "stderr",
 49 |      "output_type": "stream",
 50 |      "text": [
 51 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
 52 |       "  adj = constraint / (column * weights).sum()\n"
 53 |      ]
 54 |     },
 55 |     {
 56 |      "name": "stdout",
 57 |      "output_type": "stream",
 58 |      "text": [
 59 |       "Drawing 254 households\n"
 60 |      ]
 61 |     },
 62 |     {
 63 |      "name": "stderr",
 64 |      "output_type": "stream",
 65 |      "text": [
 66 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
 67 |       "  adj = constraint / (column * weights).sum()\n"
 68 |      ]
 69 |     },
 70 |     {
 71 |      "name": "stdout",
 72 |      "output_type": "stream",
 73 |      "text": [
 74 |       "Drawing 306 households\n"
 75 |      ]
 76 |     },
 77 |     {
 78 |      "name": "stderr",
 79 |      "output_type": "stream",
 80 |      "text": [
 81 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
 82 |       "  adj = constraint / (column * weights).sum()\n"
 83 |      ]
 84 |     },
 85 |     {
 86 |      "name": "stdout",
 87 |      "output_type": "stream",
 88 |      "text": [
 89 |       "Drawing 356 households\n"
 90 |      ]
 91 |     },
 92 |     {
 93 |      "name": "stderr",
 94 |      "output_type": "stream",
 95 |      "text": [
 96 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
 97 |       "  adj = constraint / (column * weights).sum()\n"
 98 |      ]
 99 |     },
100 |     {
101 |      "name": "stdout",
102 |      "output_type": "stream",
103 |      "text": [
104 |       "Drawing 390 households\n"
105 |      ]
106 |     },
107 |     {
108 |      "name": "stderr",
109 |      "output_type": "stream",
110 |      "text": [
111 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
112 |       "  adj = constraint / (column * weights).sum()\n"
113 |      ]
114 |     },
115 |     {
116 |      "name": "stdout",
117 |      "output_type": "stream",
118 |      "text": [
119 |       "Drawing 601 households\n"
120 |      ]
121 |     },
122 |     {
123 |      "name": "stderr",
124 |      "output_type": "stream",
125 |      "text": [
126 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
127 |       "  adj = constraint / (column * weights).sum()\n"
128 |      ]
129 |     },
130 |     {
131 |      "name": "stdout",
132 |      "output_type": "stream",
133 |      "text": [
134 |       "Drawing 324 households\n"
135 |      ]
136 |     },
137 |     {
138 |      "name": "stderr",
139 |      "output_type": "stream",
140 |      "text": [
141 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
142 |       "  adj = constraint / (column * weights).sum()\n"
143 |      ]
144 |     },
145 |     {
146 |      "name": "stdout",
147 |      "output_type": "stream",
148 |      "text": [
149 |       "Drawing 556 households\n"
150 |      ]
151 |     },
152 |     {
153 |      "name": "stderr",
154 |      "output_type": "stream",
155 |      "text": [
156 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
157 |       "  adj = constraint / (column * weights).sum()\n"
158 |      ]
159 |     },
160 |     {
161 |      "name": "stdout",
162 |      "output_type": "stream",
163 |      "text": [
164 |       "Drawing 342 households\n"
165 |      ]
166 |     },
167 |     {
168 |      "name": "stderr",
169 |      "output_type": "stream",
170 |      "text": [
171 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
172 |       "  adj = constraint / (column * weights).sum()\n"
173 |      ]
174 |     },
175 |     {
176 |      "name": "stdout",
177 |      "output_type": "stream",
178 |      "text": [
179 |       "Drawing 273 households\n"
180 |      ]
181 |     },
182 |     {
183 |      "name": "stderr",
184 |      "output_type": "stream",
185 |      "text": [
186 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
187 |       "  adj = constraint / (column * weights).sum()\n"
188 |      ]
189 |     },
190 |     {
191 |      "name": "stdout",
192 |      "output_type": "stream",
193 |      "text": [
194 |       "Drawing 228 households\n",
195 |       "Drawing 857 households\n",
196 |       "Drawing 748 households\n"
197 |      ]
198 |     },
199 |     {
200 |      "name": "stderr",
201 |      "output_type": "stream",
202 |      "text": [
203 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
204 |       "  adj = constraint / (column * weights).sum()\n"
205 |      ]
206 |     },
207 |     {
208 |      "name": "stdout",
209 |      "output_type": "stream",
210 |      "text": [
211 |       "Drawing 744 households\n"
212 |      ]
213 |     },
214 |     {
215 |      "name": "stderr",
216 |      "output_type": "stream",
217 |      "text": [
218 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
219 |       "  adj = constraint / (column * weights).sum()\n"
220 |      ]
221 |     },
222 |     {
223 |      "name": "stdout",
224 |      "output_type": "stream",
225 |      "text": [
226 |       "Drawing 953 households\n"
227 |      ]
228 |     },
229 |     {
230 |      "name": "stderr",
231 |      "output_type": "stream",
232 |      "text": [
233 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
234 |       "  adj = constraint / (column * weights).sum()\n"
235 |      ]
236 |     },
237 |     {
238 |      "name": "stdout",
239 |      "output_type": "stream",
240 |      "text": [
241 |       "Drawing 719 households\n"
242 |      ]
243 |     },
244 |     {
245 |      "name": "stderr",
246 |      "output_type": "stream",
247 |      "text": [
248 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
249 |       "  adj = constraint / (column * weights).sum()\n"
250 |      ]
251 |     },
252 |     {
253 |      "name": "stdout",
254 |      "output_type": "stream",
255 |      "text": [
256 |       "Drawing 185 households\n"
257 |      ]
258 |     },
259 |     {
260 |      "name": "stderr",
261 |      "output_type": "stream",
262 |      "text": [
263 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
264 |       "  adj = constraint / (column * weights).sum()\n"
265 |      ]
266 |     },
267 |     {
268 |      "name": "stdout",
269 |      "output_type": "stream",
270 |      "text": [
271 |       "Drawing 183 households\n"
272 |      ]
273 |     },
274 |     {
275 |      "name": "stderr",
276 |      "output_type": "stream",
277 |      "text": [
278 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
279 |       "  adj = constraint / (column * weights).sum()\n"
280 |      ]
281 |     },
282 |     {
283 |      "name": "stdout",
284 |      "output_type": "stream",
285 |      "text": [
286 |       "Drawing 286 households\n"
287 |      ]
288 |     },
289 |     {
290 |      "name": "stderr",
291 |      "output_type": "stream",
292 |      "text": [
293 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
294 |       "  adj = constraint / (column * weights).sum()\n"
295 |      ]
296 |     },
297 |     {
298 |      "name": "stdout",
299 |      "output_type": "stream",
300 |      "text": [
301 |       "Drawing 317 households\n"
302 |      ]
303 |     },
304 |     {
305 |      "name": "stderr",
306 |      "output_type": "stream",
307 |      "text": [
308 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
309 |       "  adj = constraint / (column * weights).sum()\n"
310 |      ]
311 |     },
312 |     {
313 |      "name": "stdout",
314 |      "output_type": "stream",
315 |      "text": [
316 |       "Drawing 711 households\n"
317 |      ]
318 |     },
319 |     {
320 |      "name": "stderr",
321 |      "output_type": "stream",
322 |      "text": [
323 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
324 |       "  adj = constraint / (column * weights).sum()\n"
325 |      ]
326 |     },
327 |     {
328 |      "name": "stdout",
329 |      "output_type": "stream",
330 |      "text": [
331 |       "Drawing 345 households\n"
332 |      ]
333 |     }
334 |    ],
335 |    "source": [
336 |     "all_households, all_persons, all_stats = zs.synthesize_all_zones(hh_marg, p_marg, hh_sample, p_sample, xwalk)"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "code",
341 |    "execution_count": 5,
342 |    "metadata": {},
343 |    "outputs": [],
344 |    "source": [
345 |     "all_persons, all_households, all_stats = zs.multiprocess_synthesize(hh_marg, p_marg, hh_sample, p_sample, xwalk)"
346 |    ]
347 |   },
348 |   {
349 |    "cell_type": "code",
350 |    "execution_count": 6,
351 |    "metadata": {},
352 |    "outputs": [
353 |     {
354 |      "name": "stderr",
355 |      "output_type": "stream",
356 |      "text": [
357 |       "C:\\Users\\Juan\\Anaconda3\\envs\\synpop_py3\\lib\\site-packages\\matplotlib\\axes\\_axes.py:6448: UserWarning: The 'normed' kwarg is deprecated, and has been replaced by the 'density' kwarg.\n",
358 |       "  warnings.warn(\"The 'normed' kwarg is deprecated, and has been \"\n"
359 |      ]
360 |     },
361 |     {
362 |      "data": {
363 |       "text/plain": [
364 |        "<matplotlib.axes._subplots.AxesSubplot at 0x29689d13438>"
365 |       ]
366 |      },
367 |      "execution_count": 6,
368 |      "metadata": {},
369 |      "output_type": "execute_result"
370 |     },
371 |     {
372 |      "data": {
373 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAELCAYAAADZW/HeAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvFvnyVgAAIABJREFUeJzt3Xl8nGd56P3fNdr33bJsyZbkfYnjxLKdPSEhIaFpAiUhCSkE2hKgzSkc2p6GnsKBFN4D5+1bDj3NCaRsYclGKAfn4BDIZrLZsU3syLZsS5ZlW7L2zdqXmev9Y54x44lkjaTZfX0/H300era55/F4rrm36xZVxRhjjHFFuwDGGGNigwUEY4wxgAUEY4wxDgsIxhhjAAsIxhhjHBYQjDHGABYQjDHGOCwgGGOMASwgGGOMcSRHuwCzUVxcrJWVldEuhjHGxJW9e/d2qWrJTMfFVUCorKxkz5490S6GMcbEFRE5Ecxx1mRkjDEGsIBgjDHGYQHBGGMMYAHBGGOMwwKCMcYYIMiAICI3i8gREWkQkQen2H+NiPxeRCZF5A6/7e8RkX1+P6Mi8gFn3w9F5Ljfvo2he1nGGGNma8ZhpyKSBDwM3Ag0A7tFZJuqHvI77CTwceBv/c9V1ZeBjc51CoEG4Dd+h/ydqj4znxdgjDEmNIKZh7AFaFDVRgAReRK4HTgbEFS1ydnnOc917gCeU9XhOZfWGGNM2ATTZLQYOOX3d7OzbbbuBp4I2PY1EXlHRL4pImlzuKYxxpgQCaaGIFNs09k8iYiUARcBz/tt/gLQBqQCjwJ/Dzw0xbn3A/cDLFmyZDZPa4wJwuO7Tk677yNb7f/chSSYGkIzUOH3dzlwepbP82HgF6o64dugqq3qNQb8AG/T1Luo6qOqWqOqNSUlM6biMMYYM0fBBITdwAoRqRKRVLxNP9tm+Tz3ENBc5NQaEBEBPgAcmOU1jTHGhNCMAUFVJ4EH8Db31AFPq+pBEXlIRG4DEJHNItIM3Al8R0QO+s4XkUq8NYwdAZf+qYjUArVAMfDV+b8cY4wxcxVUtlNV3Q5sD9j2Jb/Hu/E2JU11bhNTdEKr6vWzKagxxpjwspnKxhhjAAsIxhhjHBYQjDHGABYQjDHGOCwgGGOMASwgGGOMcVhAMMYYA1hAMMYY47CAYIwxBrCAYIwxxmEBwRhjDGABwRhjjMMCgjHGGMACgjHGGIcFBGOMMYAFBGOMMQ4LCMYYYwALCMYYYxwWEIwxxgAWEIwxxjgsIBhjjAEgOZiDRORm4FtAEvBdVf16wP5rgP8JbADuVtVn/Pa5gVrnz5OqepuzvQp4EigEfg98VFXH5/dyjDGh9Piuk+fd/5GtSyJUEhMJM9YQRCQJeBi4BVgL3CMiawMOOwl8HHh8ikuMqOpG5+c2v+3fAL6pqiuAXuDP51B+Y4wxIRJMk9EWoEFVG51v8E8Ct/sfoKpNqvoO4AnmSUVEgOsBX03iMeADQZfaGGNMyAUTEBYDp/z+bna2BStdRPaIyE4R8X3oFwF9qjo5x2saY4wJsWD6EGSKbTqL51iiqqdFpBp4SURqgTPBXlNE7gfuB1iyxNorjTEmXIKpITQDFX5/lwOng30CVT3t/G4EXgEuAbqAfBHxBaRpr6mqj6pqjarWlJSUBPu0xhhjZimYgLAbWCEiVSKSCtwNbAvm4iJSICJpzuNi4ErgkKoq8DJwh3PofcAvZ1t4Y4wxoTNjQHDa+R8AngfqgKdV9aCIPCQiviGkm0WkGbgT+I6IHHROXwPsEZH9eAPA11X1kLPv74HPi0gD3j6F74XyhRljjJmdoOYhqOp2YHvAti/5Pd6Nt9kn8Lw3gIumuWYj3hFMJo7YuHRjEpfNVDbGGANYQDDGGOOwgGCMMQawgGCMMcZhAcEYYwxgAcEYY4zDAoIxxhjAAoIxxhiHBQRjjDGABQRjjDGOoFJXmAvLTOkpjDGJyWoIxhhjAAsIxhhjHBYQjDHGABYQjDHGOCwgGGOMASwgGGOMcVhAMMYYA1hAMMYY47CAYIwxBggyIIjIzSJyREQaROTBKfZfIyK/F5FJEbnDb/tGEXlTRA6KyDsicpffvh+KyHER2ef8bAzNSzLGGDMXM6auEJEk4GHgRqAZ2C0i21T1kN9hJ4GPA38bcPow8DFVrReRRcBeEXleVfuc/X+nqs/M90UYY4yZv2ByGW0BGlS1EUBEngRuB84GBFVtcvZ5/E9U1aN+j0+LSAdQAvRhjDEmpgTTZLQYOOX3d7OzbVZEZAuQChzz2/w1pynpmyKSNttrGmOMCZ1gAoJMsU1n8yQiUgb8GPiEqvpqEV8AVgObgULg76c5934R2SMiezo7O2fztMYYY2YhmIDQDFT4/V0OnA72CUQkF/gV8I+qutO3XVVb1WsM+AHepql3UdVHVbVGVWtKSkqCfVpjjDGzFExA2A2sEJEqEUkF7ga2BXNx5/hfAD9S1Z8F7CtzfgvwAeDAbApujDEmtGYMCKo6CTwAPA/UAU+r6kEReUhEbgMQkc0i0gzcCXxHRA46p38YuAb4+BTDS38qIrVALVAMfDWkr8wYY8ysBLVimqpuB7YHbPuS3+PdeJuSAs/7CfCTaa55/axKaowxJqxsprIxxhjAAoIxxhiHBQRjjDGABQRjjDEOCwjGGGMACwjGGGMcFhCMMcYAFhCMMcY4LCAYY4wBLCAYY4xxWEAwxhgDWEAwxhjjsIBgjDEGsIBgjDHGEVT6a2OMSTSP7zo57b6PbF0SwZLEDqshGGOMASwgGGOMcVhAMMYYA1hAMMYY47CAYIwxBrCAYIwxxhFUQBCRm0XkiIg0iMiDU+y/RkR+LyKTInJHwL77RKTe+bnPb/smEal1rvmvIiLzfznGGGPmasaAICJJwMPALcBa4B4RWRtw2Eng48DjAecWAv8N2ApsAf6biBQ4ux8B7gdWOD83z/lVGGOMmbdgaghbgAZVbVTVceBJ4Hb/A1S1SVXfATwB574P+K2q9qhqL/Bb4GYRKQNyVfVNVVXgR8AH5vtijDHGzF0wAWExcMrv72ZnWzCmO3ex83gu1zTGGBMGwQSEqdr2NcjrT3du0NcUkftFZI+I7Ons7AzyaY0xodI3PM6u492MTwY2AJhEE0wuo2agwu/vcuB0kNdvBq4LOPcVZ3t5MNdU1UeBRwFqamqCDUTGmHnqGRrn1wfbOHS6H4/C4OgkN6wpjXaxTBgFU0PYDawQkSoRSQXuBrYFef3ngZtEpMDpTL4JeF5VW4EBEbnMGV30MeCXcyi/MSZMtu1v4Wj7AFctL2b5gmxeP9bF6IQ72sUyYTRjQFDVSeABvB/udcDTqnpQRB4SkdsARGSziDQDdwLfEZGDzrk9wD/hDSq7gYecbQCfAb4LNADHgOdC+sqMMXM2ODZJQ8cgl1cXcfP6Mt63diGjEx52NnZHu2gmjIJKf62q24HtAdu+5Pd4N+c2Afkf933g+1Ns3wOsn01hjTGRUdvibSa6uCIfgMUFGawqzeHV+i4ury4iLSUpyiU04WAzlY0x77L/VB8Lc9NZmJt+dtv1qxcwMuFm1/Ge85xp4pkFBGPMOXqGxjnZM3y2duBTUZjJ0sJMalv6o1QyE24WEIwx53inuQ+ADeV579pXXZLF6b4RxqxzOSFZQDDGnGPfqT6WFmVSkJn6rn2VRVkocLJnOPIFM2FnAcEYc9aZ0Qk6BsZYV5Y75f4lhZkI0NQ9FNmCmYiwgGCMOauldwTw9hdMJS0liUX5GTR1Ww0hEVlAMMac1dw7jEugLC9j2mMqizI51TPMpNtSWSSaoOYhGHOhe3zXyfPu/8jWJREqSXg1946wICed1OTpvysuLcri9WPdtPSNRLBkkXWh/HsHshqCMQYAVaW5d4TygulrBwBLi7zNSdZslHgsIBhjAOgdnmBkws3iGQJCTnoKxdmpnLCO5YRjTUbmvA63nuFQ6xkqCjOpLs6iKDst2kUyYdLc6/3GX14wdYeyv8qiLA6ePoPHo7hctvptorCAYKY1NuHm579vZnjczZ4TvQjwiSurWL4gO9pFM2HQ3DtCskvOSVcxnSWFmew50UtT9xDVJfZ+SBQWEMy0Xm3oYmjczWeuXUZ6ShLfe62RV+s7EzYgzNSRmOiae0coy0snKYhv/AvzvEHjaPuABYQEYn0IZkoDoxO8Vt/F+sV5VBRmUpKTxtbqIuo7Buk4Mxrt4pkQ86hyum+ExUE0FwEsyElHgCNtg+EtmIkoCwhmSi8d7mDS4+GmtX9YIWtzZSHJLuENy4mfcDoHxhh3e2YcYeSTmuyiICuVo+0DYS6ZiSQLCOZdRsbd7G7qoaaykGK/TuTstGQursjn7ZO9jIxbcrNE4ptTsDg/uIAAUJqbzhELCAnFAoJ5l4bOQTwKlwSkPwa4YlkRE25ld5PlxE8kHWfGSBI55wvATEpz0zjeNcTYpH05SBQWEMy71LcPkJbsmnL4YVleBhUFGRw8bTnxE0nn4BhF2alBdSj7lOam4/YojZ02HyFRWEAw51BVGjoGWVaSPe2HQ3VJNi19I/bNMIF0DoxRkjO7OSalzvDUI23WbJQoLCCYcxzrHKJvZIIVpdMPJawqzsKjlhM/Ubg9Ss/QGCWznHRYnJ1KSpJYP0ICsYBgzvG7o50ArFiQM+0xSwszcQkc77KmgkTQMzSOR6F4ljWEZJeL6uJsjloNIWEEFRBE5GYROSIiDSLy4BT700TkKWf/LhGpdLbfKyL7/H48IrLR2feKc03fvgWhfGFmbn5X30lRViqFWe9eLcvHlxPfAkJi6BwYA5h1DQFg5cIcqyEkkBkDgogkAQ8DtwBrgXtEZG3AYX8O9KrqcuCbwDcAVPWnqrpRVTcCHwWaVHWf33n3+varakcIXo+Zh9EJNzsbu1lROn3twKeqKIvm3hEmLCd+3OscdALCLGsIAKtKs2nuHWFwbDLUxTJREEwNYQvQoKqNqjoOPAncHnDM7cBjzuNngBtEJLBH8h7gifkU1oTXnqZeRic8rAwiNUVVcRZuj3LK+hHiXufAGDnpyaSnJM363JXOl4d6qyUkhGACwmLglN/fzc62KY9R1UmgHygKOOYu3h0QfuA0F31xigACgIjcLyJ7RGRPZ2dnEMU1c/VWUw8u8X7Yz2RpURaC9SMkgq7BsVnNP/C3aqE3INhIo8QQTECY6oNaZ3OMiGwFhlX1gN/+e1X1IuBq5+ejUz25qj6qqjWqWlNSUhJEcc1cvdPcx/IF2aQF8U0xIzWJhXnpFhDinKrOacipT0VBJmnJLho6LKdRIggmIDQDFX5/lwOnpztGRJKBPMB/KuvdBNQOVLXF+T0API63acpEiapS29zPhvJ3z06eTlVxFid7hpn0WD9CvOoeGmdkwj2nDmUAl0uoKs6yLwYJIpiAsBtYISJVIpKK98N9W8Ax24D7nMd3AC+pqgKIiAu4E2/fA862ZBEpdh6nALcCBzBR09I3QvfQOBeX5wV9zpLCTCY9SseZsTCWzITTMeeb/VxrCADVJVk0WkBICDMGBKdP4AHgeaAOeFpVD4rIQyJym3PY94AiEWkAPg/4D029BmhW1Ua/bWnA8yLyDrAPaAH+fd6vxsxZbbM3FcVFs6ghlOV5E6G19ifuYuuJzvdBPtcaAvyhpmgjzuJfUAvkqOp2YHvAti/5PR7FWwuY6txXgMsCtg0Bm2ZZVhNG+5v7SUkS1pTlcOj0maDOKXJmqp7uH7V/zDh1rGOQZJeQl5ky52tUF2efHXFmi+XEN5upbABvh/LqhbmkJQc/9NAlQlleBq19tmBOvDrWOUhJThquqQf5BaWqxDsqzZLcxT8LCAaPR6lt6eeiWfQf+JTlpdPaP4LTZWTizLHOoTkPOfWpdoYpW8dy/LOAYGjqHmJgdHJWHco+ZXkZjE166B2eCEPJTDiNTrhp7h2eV4cyQH6mN9VJY5cNPY13FhAMtS3eDuXZDDn1KXMWW7eO5fhzonsYj86vQ9mnqjjLmowSgAUEw/5T/aSnuFgRRMqKQKW53sXWT1s/Qtw51jn/Iac+1cU29DQRWEAw1Lb0sW5RHslJs387pCa7KMlJsxpCHPLNQZhvHwJ4O5Y7B8YYGLWmw3hmAeEC5/Eoda0DrFuUO+dreDuWrYYQbxq7hliUl05q8vw/Bnwdy01dluwwnllAuMD5UhevKZtPQMigf2SCYUuBHFeOdQ6ybA7NhFPxzT+wjuX4ZgHhAneo1TsJbV4BId/pWD5jtYR4oaocc9bODoUlhZmI2FyEeGcB4QJ3uO0MIrDyPGsoz+RsCos+60eIF+1nxhgad7OsZOZU58FIT0mivCDDOpbjnAWEC1xd6xmqirLITA0qi8mUstOSyUlLpt2S3MWNRmeEUShTTVQVZ3PcmozimgWEC1xd68C8mot8SnPTaR+wJqN44RtyGqomI/B2LB/vHEqoWes7G7vZcaSDUz3DuD2J87qmM/evhSbuDYxOcLJnmA/XlM/7WqW5abzV1IPHo7hcc8+LYyLjWOcQWalJlObOf8ipT3VJFkPjbjoGxijNTQ/ZdaNlT1MP2/b7ln5ppzArlb+6bjkZqbNfajReWA3hAnbUWQd39cLQ1BAm3Epzr/UjxAPfCKNpVq6dE9/Sq4nQsdzcO8y2/adZXpLNg7es5kOXltM7NM7LRzqiXbSwsoBwATvU6g0Ia+YxB8FngfON8Igtth4XGjuHQtpcBIkz9HR4fJLHd50kOy2ZuzZXkJuewqalBWxaWsAbx7roHEjcvjILCBewutYz5KYnsyhv/tX7BU76g6MWEGLe8PgkLX0jZyeThUpZbjppyS6Ox3kNYXdTL30jE9yzZQlZaX9oVb9xbSkpSS6217ZGsXThZQHhAna49Qyry3JD0myQnpJEfmYKR9osIMQ6X5NOqCal+fjWV47noaeqyt4TPVQWZVJRmHnOvpz0FK5fvYAj7QO8Vt8VpRKGlwWEC5THoxxuG2BtCEYY+ZTmpFsNIQ6EY4SRT3VJVlyvi3Cie5iuwXFqlhZOuf/y6iIyUpL42d5TES5ZZFhAuECd7BlmeNzNmrKckF2zNDedY52DtrZujGvsHMIlsLQoc+aDZyne11fee6KX1GQX6xdPvTZIcpKLdYtyeeFQO6MT7giXLvwsIFygDrfNP2VFoNLcNCbcSlMcf0OcypG2M5zsSZykbcc6BykvyCQ9JfTDJ33rK8fj/RqbcFPb0s+GxXnnTfi3oTyfoXE3ryTgiKOgAoKI3CwiR0SkQUQenGJ/mog85ezfJSKVzvZKERkRkX3Oz7f9ztkkIrXOOf8qoRz/ZmZ0qHUAl8DK0tDWECBxRhp5VNle28pjb57g2zuO8Z0dx6hPgNd2rHMoZCkrAvnWV47HjuXaln7G3R5qKqduLvKpKs6iKCuVZ99JvM7lGQOCiCQBDwO3AGuBe0RkbcBhfw70qupy4JvAN/z2HVPVjc7Pp/22PwLcD6xwfm6e+8sws1XXeoaq4qyQfkv0LtYORxOgY3nC7eGnO0/wWkMXl1UXcuuGMs6MTvDDN5rieu0Hj0dp7AxdUrtAvpFL8Tj0dF9zH8XZaVQUZJz3uCSXcPP6hbxU18HweGJl+A2mhrAFaFDVRlUdB54Ebg845nbgMefxM8AN5/vGLyJlQK6qvqneee4/Aj4w69KbOatrPRPS5iKAlCQXlcVZCVFDeK2hi7q2Af54Qxm3XbyYK5YV81fv8c5SfXZ/a9ymZ2jpG2Fs0hPyEUY+vvWV461jeWzSzYmuYdaU5QQ16u7WDYsYmXDz0uHEajYKJiAsBvy71JudbVMeo6qTQD9Q5OyrEpG3RWSHiFztd3zzDNc0YXJmdILm3pGQBwSAVaU5HG2Pv2+H/kYn3LxW38XqhTlcvqz47PbM1GRuXFtKU/fQ2XWo441vSGio5yD4qyrO4licNRkd7xzCrRp0E+qWqkJKctL4VYI1GwUTEKYKl4Ffj6Y7phVYoqqXAJ8HHheR3CCv6b2wyP0iskdE9nR2dgZRXDMT31yBUI4w8llZmkNT91Bcj8B4s7GbkQk3169e8K59mysLWZSXznMH2hifjL+RNL5lM8NVQwAnyV2c1RCOdgyQkiQsLQxu5FWSS7hxbSmv1ncxGacjqqYSTEBoBir8/i4HTk93jIgkA3lAj6qOqWo3gKruBY4BK53j/TOqTXVNnPMeVdUaVa0pKSkJorhmJnUhWBRnOqsW5qAKDR3xWUvwrx2UF7z7w8Elwh9fvIj+kQl2N/VEoYTzc6xzkLyMFIqyUsP2HPG4vvLRdm+/ymzWFb9qeTGDY5Psb47P2uJUgnn1u4EVIlIlIqnA3cC2gGO2Afc5j+8AXlJVFZESp1MaEanG23ncqKqtwICIXOb0NXwM+GUIXo8JQl3rAHkZKSwMQ0ZKX5U7Xmcs7zxP7cBnaVEWi/MzePtkbwRLFhrHOgdZVpIV0qR2gXzNUfFSS+geHKNnaJwVsxxxd3l1ESLwekPizFqeMSA4fQIPAM8DdcDTqnpQRB4Skducw74HFIlIA96mId/Q1GuAd0RkP97O5k+rqu9r1WeA7wINeGsOz4XoNZkZeDuUg+s8m63KokxSk1xxOWNZVXmrqYflJdlT1g78XbIkn9P9o7T1x9caEI2dQyFdFGcqvhFMvhnRse6oU5tdOctmtIKsVNYvyuO1BAoIQa2HoKrbge0B277k93gUuHOK834O/Hyaa+4B1s+msGb+3B7lSNsAd2+pmPngOUhOcrFsQXZcjjQ62TNM3/AE711TOuOxG8rz2V7bytunerklrywCpZu/M6MTdAyMhW3Iqc/SoiySXEJ9nAwuqG8foDArlaLs2a8NceXyYr73WiNDY5PnJMKLVzZT+QJzsmeYkQl3WPoPfFaVZsfNh4G//c39JLskqPxO2WnJrFqYy75TfXGzktbZpHZhmpTmk5rsorIoMy76kSY9Hho7h+a8pvhVy4uZcHtrlonAAsIF5myHcggWxZnOyoU5tPSNxFWn4qTbQ21LP6sX5gQ9We+SinwGRifjpmkkEiOMfFYsyImLgNDcM8K428PyOdaaaioLSE128XqCZD+1gHCBqWs9Q5JLWDHHb0TBWOV0zsXTfIQ3jnUzNDbJhvL8oM9ZvTCHjJSkuOlcbuwaJNklLAlyaOV8LF+QzYmeYcYmY3v48fFub62psmhutab0lCQ2VxYkTD+CBYQLzMHTZ1hWEtqUFYFWng0I8dOPsG3/adKSXaxaGPxIE1/my8NtA3GR3fNo+yBLizJJmcXQyrlaUepNctfUFdtJ7o53DbEwN53MebT/X7m8mMNtAwmxkpoFhAtMbUv/tKl9Q2VxfgaZqUlxM/R0bNLN8wfaWLcod9YflqsX5jI26YmLOQlH2gZCsn52MHwd17HcbOT2KCe7h6mc56ztK5zZ7Dsbu0NRrKiygHABaT8zSufAGBeFOSC4XMKK0py4qSHsbOxhYGxyToFy2YIskl3CS3WxndNmeHySkz3Ds6oBzceykmxEoL4jdt8DLX3e/oOqeQaE9YtyyUpNYtdxCwgmjtQ6MyrDHRDAO9IoXgLCS3XtpKe45jQcMy05iarirJhPcubrzwlluvPzyUhNorwgg/oYriH4Js7NNyAkJ7moqSxkV2Ps1xJnYgHhAlLb0o9LYO2i8DcbrCzNoWtwnK7B2G5XVVVeOtLBlcuK59y2vnphDo1dQzTG8GgjX0ry1RGqIYB3pNGxmA4Ig5TkpJEdgvkDW6sLqe8YjPn3+0wsIFxAalv6WVaSTWZq+CfQ+JomYr0foaFjkFM9I1y/ZvpUFTNZ5bTLx3It4XDbAOkprnctHB9Oyxdk09g5FJPJ3ybdHk50D1M1x9FFgbZWeZM7v3U8vmsJFhAuILUt/RFpLgLOTu46dPpMRJ5vrnwf4u9ZNfeAUJiVysrS7JgOCEfbB1hZmkOSK3ILEy5fkM2428Op3thbUKiudYCxyfn3H/hsKM8jIyWJXXHesWwB4QLh61AO9wgjn6LsNMry0jlwOrYzQb54uIM1Zbksyj//KlkzuX51KW8d7+FMjE7GO9w2ELH+A58VzgS4WFx21NcBHKqAkJLkoqaygF1WQzDx4GyHcnlkAgLAukW5HIzhGkL/8AR7T/Ry/er5p1V/z6oSJj3KGzE4Qal7cIyuwbGI9h/AH2ZEN8Rg38rOxh6KslLJzUgJ2TW3VhVyuG2A3qHxkF0z0iwgXCBqW/oRIag8PaGyblEexzoHY3bd2R31nbg9yvWrZ05mN5NLlxaQk5bMjqOxt4iTL9FgpGsIueneFOuxltfK41F2N/WErHbgs7Xa248Qz7UECwgXiANOh3IkMzKuW5SLqre9Nha9fLiDwqxUNlYEn65iOilJLq5cXswrRzpjbr3laIww8lm1MOds/qxYcbhtgP6RiZAHhA3leaQlu+J6PoIFhAtEJDuUfXz9FYdisB/B7VFeOdLBdStLQtbReu2qElr7R2Nu7P2R9gHyM1MoyZl9euf5Wrcol/qOwZhaUjXU/Qc+aclJXLqkIK7nI1hAuAC09I3QMTDGhgj2HwCU5aVTkJnCgZbY+oYIsO9UL73DE7znPCujzda1K719ETuOxFaz0ZG2AVaVhmdBpJmsX5yH26MxNUlxV2MP5QUZ5GeGfhnRy6qLqGs7Q/9wbA4umIkFhAvAHifPzubKwog+r4iwfnEeB1tjr4bw0uEOklzCNStDt073ovwMVpZmx1Q/gsejHG0fjFjKikDrnEmQsTK4wLcqnm/eQKhtrS5ElbjIbTUVCwgXgN1NPWSnJUelDXntolyOtA0wPhlbk5NerOugZmkBeSEcZQLeWsJbx3sYGouNjvTGriEG55inKRQqCjLJSUvmQEtsfCmo7xikZ2icrdXh+XK0sSKf1GRX3Ca6s4BwAdh9vJdLluSTHIG0x4HWLcpjwq0xleTsdN8Ih9sGuGEes5Onc92qBYy7PTHzgbD/VB9ASDrO58LlEtbE0PBj38SxrVXhCQjpKUlsrMiP25FGFhASXP/wBEfaB9gS4eYin/W+JoMY6kfwzSi+PoT9Bz41lQVkpCTFTLPR/uY+slKTwr6O8vl414w4ExNLje483sPC3PSwLhJ0WVUhB0/3x+zyudYUAAAZkElEQVQkxfOxgJDg9p70flOpiVJAqCzKIis1KaZmLL98uIMlhZlh+ZBMS07iimVFsRMQTvWxoTw/oikrAq1flMfohCfqyf9UlV2NPWytLgxrB/tl1UV4FPY2xcdKev6CCggicrOIHBGRBhF5cIr9aSLylLN/l4hUOttvFJG9IlLr/L7e75xXnGvuc35C/3XNsLupl2SXRLXJ4KLyPN4+2ReV5w80PD7Jaw1dXL96Qdg+FK5dVcKJ7uGz6ZWjZXTCzaHWM1wcpX97n3WLY6Nj+XDbAF2DY1y5vDisz3PJkgJSkiRmmg1nY8aAICJJwMPALcBa4B4RWRtw2J8Dvaq6HPgm8A1nexfwx6p6EXAf8OOA8+5V1Y3OT+xmBotje5p6WL84j4zU8C2ZOZPNld4q9GAMdLT+7mgXY5Meblo7/9nJ07lupfe7zY4j0X1L17WeYcKtbKyIToeyz7KSbFKTXVHvWH6t3ptW5OoV4Q0IGalJXFyez8447EcIpoawBWhQ1UZVHQeeBG4POOZ24DHn8TPADSIiqvq2qp52th8E0kUk8rNjLlCjE272n+pnS5g60IJVU1mIR2FfDNQSfnOojbyMFDaH8Z4sKcqkqjgr6s1Gf+hQLohqOVKSXKxemBP1GsJrDV0sK8miLG9+iQyDcVl1EQda4q8fIZiAsBg45fd3s7NtymNUdRLoBwIH+n4IeFtV/VeQ+IHTXPRFmab+LiL3i8geEdnT2Rkb7bLxoraln3G3h5ql0f1AuHRJPi6J/tjsSbeHF+s6uGH1grAvNH/tyhLebOyO6gzdfaf6KM1NY2FeetTK4ONNdNgftbQeY5Nudh3v5uoVoZt3cj7XrCzBHaPJDs8nmP8VU31QB/6rnvcYEVmHtxnpU37773Wakq52fj461ZOr6qOqWqOqNSUlkfnHTBRvNHQjEr0OZZ+c9BRWL8xlz4noBoS3mnroH5ngpnXhay7yuXZVCaMTnqgumLK/uZ+Ly6Pbf+Bz0eJ8zoxORq1fZe+JXkYnPFwV5v4Dn0uW5MdsssPzCSYgNAMVfn+XA6enO0ZEkoE8oMf5uxz4BfAxVT3mO0FVW5zfA8DjeJumTAi9dLidjRX5FGaFfor+bG2uLODtk31MRHH1rN8cbCct2RXS2cnTuayqiNRkV9Q+EPqGxzneNcTGJbERELZUeWup0QqQr9V3kewSLlsWnhnKgXzJDnfEYLLD8wkmIOwGVohIlYikAncD2wKO2Ya30xjgDuAlVVURyQd+BXxBVV/3HSwiySJS7DxOAW4FDszvpRh/HQOj7G/u54YwjLWfi5rKQobH3VHLfKmq/PZQO1evKI7IEqIZqUlsrSrklSh1LO931r/YGCM1hGUl2RRnp0ZtwtZrDV1csiQ/JOsnB+vaVSWc7h+lIcaSHZ7PjAHB6RN4AHgeqAOeVtWDIvKQiNzmHPY9oEhEGoDPA76hqQ8Ay4EvBgwvTQOeF5F3gH1AC/DvoXxhF7pXDnu/mYYi138o1FR6vyHujtLY7IOnz9DSN8JNaxdG7DmvX72AY51DHIvC+Pudjd0ku4QNUR5y6iMibKkqZFdjd8S/MfcOjVPb0s9VyyPb5OyricZTs1FQPWuqul1VV6rqMlX9mrPtS6q6zXk8qqp3qupyVd2iqo3O9q+qapbf0NKNqtqhqkOquklVN6jqOlX9rKrGTn7cBPDS4Q7K8tJZUxadpGaByvIyKC/IOJtoL9K27T9Nskt4bxiHmwa6eb03+DxX2xqx5/TZcaSTTUsLIvqNeCZbq4o43T9Kc4TXWP5dfSeqcFWYh5sGWpyfwYoFsZXscCY2UzkBjU26ebW+k/eEcfLVXGyuLGR3U2/EvyG6Pcr/ebuF61YtiGh/SlleBpuWFrC9ti1izwne5sJDrWci0lcyG76EcpGesPX8wTZKctK4JAq1pWtXlrCrsSdmVw0MZAEhAb11vIehcXfM9B/4XFZdSNfgGIfbIpvo7vWGLjoGxviTSwNHS4ffLesXcqj1DE0RHF3z6lHvUMdrYywgrFyQQ35mSkT7EUbG3bx8uJP3rSvFFYX0HdeuKompZIczsYCQgF6s6yAt2cUVyyJbRZ6Jt8YCvz3UHtHn/cXbLeSmJ4clmd1MbrmoDIDnDkSulrDjaCfF2akRXT87GC6XsKWyMKJLTO442snIhJtb1pdF7Dn9bakqJCctOeK1xLmygJBg3B7l+YNtXLW8OKrpKqayICedjRX5EQ0IQ2OT/PpAG3+0YRHpKZG/H4vzM9hYkc9zByLTj+D2KK/Wd3LNipKofCOeydbqIk71jHC6LzL9CM8fbCM/MyVqs/XTkpO4ad1Cnj/Yxthk7HeTWkBIMDuOdtDaP8odm8qjXZQp3bi2lNqWflr7I/eBMDLhjkpzkc/7L1rIO839nOoZDvtzHWjpp3d4Iub6D3x86xBEopYwPunhhbp2blxTGvaZ6edz68VlDIxO8rujsT9r2QJCgnnirVMUZ6dyw5rYGG4ayJdU7oUI1RKe2n2KisKMqKbv8DVX/OLtlrA/1++cES2RHlETrDVluRRnp0aklvjGsS4GRifPjvaKlquWF5OfmcKz+wPn88YeCwgJpOPMKC8d7uBDm8pJTY7Nf9plJdlUFWfxmwh8IOw/1ceu4z3cd3llVEdbVRRmcvWKYp546ySTYZ6p/cLhDtYvzqU4OzZzSCa5hFvWl/HS4Y6wLzO6vbaV7LTksKe7nklKkotb1pfxQl07I+Ox3WwUm58aZk5+trcZt0e5q6Zi5oOjRES4cW0pOxu7w54J8tFXG8lJT+buLUvC+jzBuHfrUlr7R3n5SPjGpB9tH2D/qT4+sDF6zWPBuHVDGaMTHl48HL5Z3P0jEzy7v5X3X7QwKn1Hgf54QxnD4+6zq/XFKgsICcLjUZ7afYqtVYVUR3G5xGDcuLaUCbfychj/c5zqGea52lY+snVJTEzOeu+aBZTmpvHTXSfC9hxPvnWKlCThg5fEdkCoqSxkQU4a/zeMTSg/39vMyISbj11eGbbnmI2t1UUUZ6fxy33hbzacDwsICeK3de2c7Bnmnhj4NjyTS5cUsDg/g8d3nQzbc3zvteMkuYRPXFEVtueYjeQkF3dvXsKOo51h6VwenXDzH283c9O6hRTFaHORT5JLeP9FZbxytJOBMNQSPR7lJztPsLEin/WLo7s4kE+SS/jQpsW8UNcekcEFc2UBIQFMuj1849eHqS7J4tYN0RlvPRtJLuHjV1Sy63hPWFbR6hgY5andp7jt4sUxsRaAzz1bluAS4adhCIS/OdRO3/AEd2+O3eZCf398cRnjk56wdC6/caybxq4hPnb50pBfez4+fkUlLhF+8HpTtIsyLQsICeCpPado7BziwZtXkxzF4XWz8eHNFWSmJvH9146H/Nr/z6/qcHuUB65fHvJrz8fCvHTet66Un+w8Qdfg2MwnzMKTb52kvCCDK2NsMuJ0LqkoYFFeelhG3vzozSYKs1J5/0Wx9eWoLC+DWzeU8dTuk/SPxOZKavHx6WGmNTQ2yTd/W8/mygJujGDitvnKy0jhwzUVPPvOaTrOjIbsum8e6+b/7DvNp66tpqo4K2TXDZW/uWkVIxNu/vXF+pBd82j7AG8c6+aumoqYnIw2FZdL+JNLy3nlaCf17aFLZdLQMcALde3ctbkiJjqTA/3F1dUMjbt54q3wNZfOR/R728y8/K+XGugaHOM7H90U9NDKcLbdz8YnrqzksTeb+PHOE/zNTavmfb0Jt4cv/fIA5QUZ/OV1sVU78FlWks09Wyp4fNdJPn5F5bwHAKgqX3n2ILnpyXxka+z3H/n7s6uq+MHrx/nWi/X820cunff1vPfiEFlpyfzFVbHRdxRo/eI8Lq8u4oevN/FnV1bF3PBwCwhx7MW6dr694xgfrilnk9/Eq1j5wJ/J0qIsblxTyg9eb+KuzRWUF2TO63rfeqGe+o5BvndfTcyl7fD32RtW8ovft/CNXx/mOx+tmde1fn2gjdcbuvnKbetivjM5UGFWKvddUckjO47x1+0DrCydX6r2F+s6eLW+iy/eujam78Wnr1vGfd9/i+++1hhzX1xiKzyZoDV1DfG5p/axfnEuD92+PtrFmbMv3roWVeXvfvYOHs/c02L/fG8z//ZyA3fVVMTsLG2fkpw0Pn3tMp4/2M72eayVMDLu5qu/qmP1whzujbPagc8nr64mKzWZb70wvya0sUk3//SrQyxfkB1zncmBrl1Zws3rFvKtF+ojmgU3GBYQ4lDHwCif+vFeklzCI/duism20mBVFGbyj7eu5c3Gbh57s2lO19jZ2M2D//EOVywr4qsfjI/geP+11WxaWsDfPL1/TsuKqipff66Olr4RvnzburgZTBCoICuVj19Rya9qW9l7Yu6r6f3Lb45yonuYL966Nqp5i4L1ldvXkZrk4h9+URtTay7H/p0z56hvH+CDD7/ByZ5hHv7IpVQUzq+ZJRbcvbmC61aV8PXnDs96EfZn95/mz364myWFmTxy76a4+DAAbxbMR+69lNyMZD75oz30DI3P6vz/+UI9j715gj+7sorLqiOzcHy4fPKaaioKM/jMT/bSPocBBo/vOsl3ftfIvVuXxNwaENMpzU3n729ZzRvHusMyDHmu4uN/j0FV+eW+Fv7kkTcYd3t4+lOXRz1HS6iICP/jQxtYnJ/Bvd/dyc/2nJrxnOHxSR569hD/6Ym3WVOWy+OfvIy8zJQIlDZ0FuSm852P1tAxMMYdj7wRVE1BVfnfrzTwrRfruXNTOf/4R2siUNLwystI4d8/VsPg2CSf+vFeRieCz/fz8pEOvvjLA1y3qoSv3LYujKUMvY9sWcLVK4r50i8PzKvpMJSsUzkO7D/Vx9e21/HW8R42lOfxv++9dN4dsLFmQW46v/jLK/nMT/fyd8+8w6v1XfzpZUvZXFlwdvSUqtLSN8Ize5t57I0meocn+MSVlfzD+9eQkuSasTM9FkfhbKzI50d/toW/fuJtPvDw63zhltXcWVNB1hTpNg609PPlbQfZc6KXP9pQxtc/tAGXS+JmEMH5rF6Yy798+GI+/ZPfc/+P9/LPd2xgQe70kwon3R4eeeUY33qxnlWlOfzbRy6Nu2Yzl0v49p9u4mPff4u/fuJt0pJdUe//kmDar0TkZuBbQBLwXVX9esD+NOBHwCagG7hLVZucfV8A/hxwA3+tqs8Hc82p1NTU6J49e4J+cfHsdN8ILx7u4Ondp6ht6acgM4W/e99q7tpcQdIMY82j+QEx3w/dCbeHf/7NEX668ySDY5OU5qZRkpNGZkoyjV2DdA16m1beu2YBn752GTWVf1j4JJ5fd+fAGP/5qX281tBFZmoS71u3kMqiLLLSkmjtH+Wt4z0cON1PYWYq/+XmVdy56Q9zDuL5dQf66a4TPPTsIdJTkviH96/m/ReVkZP+h5rf6ISbV4508J3fNfL2yT5uu3gR/3T7+jnVDudz30L5us+MTnDvv+/i4Ol+Pnl1NZ9778qQj5ITkb2qOuOQthkDgogkAUeBG4FmYDdwj6oe8jvmL4ENqvppEbkb+KCq3iUia4EngC3AIuAFYKVz2nmvOZVEDAijE25O941wqneE+vYB6loHePtkL43O6IPVC3O4Z8sSPnDJYvIygnvTJ8IHxPD4JNtr23i9oYv+kQkGxyapKMjk4oo8rlhWxPIF7x6iGO+vW1XZc6KXn+9t5tcH2+gb9s5mTUt2ccmSfK5YVsx9V1S+630Q7687UGPnIH/7s/38/mQfSS7h4vI8stKSGR53c6RtgMGxSYqz0/jirWu4fR6ZXWMlIIA3KPz37XU88dYplhZl8qlrlvFHG8qC/j8/k1AGhMuBL6vq+5y/vwCgqv/d75jnnWPeFJFkoA0oAR70P9Z3nHPaea85lbkGhIHRCUSE9GTXnKqVqsqkRxmf9Hh/3N7fY35/j064GRl3MzzuZnh8kpEJN0NjbkbGJ73bJtwMj03SNzJB79A4vcPe3wMBOeFLctK4aLH3Q+/qFSWsLM1+14SzWG4iiGazTKJ9ME66PQyNuclITTrvBKZEe93gXQp0d1MPr9V3sbOxm0mPkpmaxJLCTP5oQxmXVxfx9J7msDx3MML1ut841sVDzx7icNsAqckurlxWxEXl+WxYnMdly4rmnLk32IAQzNUXA/69fM3A1umOUdVJEekHipztOwPO9YX0ma4ZMp99ct/ZPORJLm9gSEtJwiWgCh5VFG+WRMW7TZ1tbo8y7vYw15FhgneBjJRkF2nJLjJSkshKS6IwK5Xyggyy0pLJz0ghPzOVkpy0c/7B957onddQvGiI5WAVTvPtv7hQ79t0klzCZdVFcT+CarauWFbMc5+9mtqWfn6+t5k3G7vZcbQTj8ILn79myppxKAUTEKZqsA78eJzumOm2T/V1Z8qPXBG5H7jf+XNQRI5MU85AxUAsL2Jq5ZufuCrfvVEsyDRCcv/C+Lpi+t/33iiUb8U3ZnV4YPmCmq0XTEBoBvxz6pYDgSkKfcc0O01GeUDPDOfOdE0AVPVR4NEgynkOEdkTTBUpWqx882Plmx8r3/wkavmCaVDfDawQkSoRSQXuBrYFHLMNuM95fAfwkno7J7YBd4tImohUASuAt4K8pjHGmAiasYbg9Ak8ADyPd4jo91X1oIg8BOxR1W3A94Afi0gD3prB3c65B0XkaeAQMAn8laq6Aaa6ZuhfnjHGmGAF1WWtqtuB7QHbvuT3eBS4c5pzvwZ8LZhrhtism5kizMo3P1a++bHyzU9Cli+oiWnGGGMSX3zN9TbGGBM2CRMQRKRJRGpFZJ+I7HG2FYrIb0Wk3vldMNN1Ily+L4tIi7Ntn4i8P4rlyxeRZ0TksIjUicjlMXb/pipfTNw/EVnlV4Z9InJGRD4XK/fvPOWLifvnlPE/i8hBETkgIk+ISLoz6GSXc/+ecgagxFL5figix/3u38Yolu+zTtkOisjnnG2zfv8lTJORiDQBNara5bftfwA9qvp1EXkQKFDVv4+h8n0ZGFTVf45GmfyJyGPAq6r6Xec/XibwD8TO/ZuqfJ8jRu6fj3hTvbTgnWj5V8TI/ZumfJ8gBu6fiCwGXgPWquqIMxBlO/B+4D9U9UkR+TawX1UfiaHyXQf8X1V9JtJlCijfeuBJvCmCxoFfA58BPsks338JU0OYxu3AY87jx4APRLEsMUtEcoFr8I4WQ1XHVbWPGLl/5ylfLLoBOKaqJ4iR+xfAv3yxJBnIEO88pkygFbge8H3YRvv+BZZvynlTUbIG2Kmqw6o6CewAPsgc3n+JFBAU+I2I7BXv7GaAUlVtBXB+L4ha6aYuH8ADIvKOiHw/ik0y1UAn8AMReVtEvisiWcTO/ZuufBAb98/f3XgTOkLs3D9//uWDGLh/qtoC/DNwEm8g6Af2An3OBxycm/Ym6uVT1d84u7/m3L9vijfrczQcAK4RkSIRycRbs6pgDu+/RAoIV6rqpcAtwF+JyDXRLlCAqcr3CLAM2Ij3jfb/RalsycClwCOqegkwhJOYMEZMV75YuX8AOE1ZtwE/i2Y5pjNF+WLi/jmB6HagCm9W5Cy8/08CRaV9e6ryicifAl8AVgObgUIgKs2BqloHfAP4Ld7mov14533NWsIEBFU97fzuAH6Btz2tXUTKAJzfHbFUPlVtV1W3qnqAf3fKHA3NQLOq7nL+fgbvB3Cs3L8pyxdD98/nFuD3qtru/B0r98/nnPLF0P17L3BcVTtVdQL4D+AKIN9pooHzpLeJVvlUtVW9xoAfEMX3n6p+T1UvVdVr8E4OrmcO77+ECAgikiUiOb7HwE14q1H+KTXuA34ZS+Xz/WM5Poi3zBGnqm3AKRFZ5Wy6Ae/s8pi4f9OVL1bun597OLc5Jibun59zyhdD9+8kcJmIZIqI8If338t4U+FAdO/fVOWr8/uwFbzt81F7/4nIAuf3EuBP8P47z/r9lxCjjESkGu+3bvA2Lzyuql8TkSLgaWAJ3n/UO1V1dqu4h7d8P8ZbXVegCfiUr80vCmXcCHwXSAUa8Y5AcRED9+885ftXYuf+ZeJN6V6tqv3Otph4/52nfLH0/vsKcBfepo63gb/A22fwJN7mmLeBP3W+jcdK+Z7Du+6LAPuAT6vqYJTK9yreJQcmgM+r6otzef8lREAwxhgzfwnRZGSMMWb+LCAYY4wBLCAYY4xxWEAwxhgDWEAwxhjjsIBgjDEGsIBgzJyJyHUicsUMx3xaRD42xfZKEYn2RDpjzhHUEprGmCldBwwCb0x3gKp+O2KlMWaeLCAYE8D5Rv+3eGfwvoN3tuc/4p0l3Q3cC2QAnwbcTqKz/6Sqr05xrS/jrDkgIpuA7wPDePPrGxNTLCAY40dE1gH/FW922i4RKcQbGC5TVRWRvwD+i6r+jbNoy2wWmPkB3sCxQ0T+3/C8AmPmzgKCMee6HnjGt7KdqvaIyEXAU04ys1Tg+GwvKiJ5QL6q7nA2/ZipUzwbEzXWqWzMuYR3593/X8C/qepFwKeA9BBd15iYYgHBmHO9CHzYyRSJ02SUh3cdYvhDOmGAASAnmIs6S372i8hVzqZ7Q1NcY0LHAoIxflT1IPA1YIeI7Af+Bfgy8DMnxXCX3+HPAh8UkX0icnUQl/8E8LCIvAmMhLbkxsyfpb82xhgDWA3BGGOMw0YZGRMCIvJfgTsDNv9MVb8WjfIYMxfWZGSMMQawJiNjjDEOCwjGGGMACwjGGGMcFhCMMcYAFhCMMcY4/n96stpJgf0UigAAAABJRU5ErkJggg==\n",
374 |       "text/plain": [
375 |        "<Figure size 432x288 with 1 Axes>"
376 |       ]
377 |      },
378 |      "metadata": {},
379 |      "output_type": "display_data"
380 |     }
381 |    ],
382 |    "source": [
383 |     "sns.distplot(all_persons.cat_id)"
384 |    ]
385 |   },
386 |   {
387 |    "cell_type": "code",
388 |    "execution_count": null,
389 |    "metadata": {},
390 |    "outputs": [],
391 |    "source": []
392 |   }
393 |  ],
394 |  "metadata": {
395 |   "kernelspec": {
396 |    "display_name": "Python 3",
397 |    "language": "python",
398 |    "name": "python3"
399 |   },
400 |   "language_info": {
401 |    "codemirror_mode": {
402 |     "name": "ipython",
403 |     "version": 3
404 |    },
405 |    "file_extension": ".py",
406 |    "mimetype": "text/x-python",
407 |    "name": "python",
408 |    "nbconvert_exporter": "python",
409 |    "pygments_lexer": "ipython3",
410 |    "version": "3.6.4"
411 |   }
412 |  },
413 |  "nbformat": 4,
414 |  "nbformat_minor": 1
415 | }
416 | 


--------------------------------------------------------------------------------
/demos/input_data/Puma_Tract_Crosswalk_500.csv:
--------------------------------------------------------------------------------
1 | state,county,tract,bg,pumano
2 | 35,61,971400,0,500
3 | 35,61,971300,0,500
4 | 35,61,971100,0,500
5 | 35,61,971000,0,500


--------------------------------------------------------------------------------
/demos/input_data/hh_marginals.csv:
--------------------------------------------------------------------------------
 1 | zone_id,sample_geog,cars,cars,cars,children,children,income,income,income,workers,workers,workers
 2 | ,,none,one,two or more,no,yes,gt100,gt35-lt100,lt35,none,one,two or more
 3 | 1,1,7,49,197,41,215,57,125,74,72,77,105
 4 | 2,1,9,59,237,68,239,83,126,98,87,93,125
 5 | 3,1,10,69,275,79,279,74,170,114,102,108,146
 6 | 4,1,11,76,302,167,224,42,105,244,111,118,160
 7 | 5,1,18,117,466,86,517,50,261,292,171,182,247
 8 | 6,1,9,63,252,65,261,80,139,107,92,98,133
 9 | 7,1,19,159,377,160,397,96,186,275,199,194,162
10 | 8,1,11,98,231,86,257,30,99,214,123,119,100
11 | 9,1,9,78,186,49,226,22,164,89,98,95,79
12 | 10,1,7,65,155,55,175,21,143,66,82,80,66
13 | 11,1,17,297,542,289,570,118,407,334,303,279,274
14 | 12,1,15,258,474,201,548,76,371,302,264,244,240
15 | 13,1,40,217,486,251,495,121,314,311,269,259,216
16 | 14,1,51,278,622,472,482,53,320,581,344,332,277
17 | 15,1,38,210,470,220,501,68,350,303,259,251,209
18 | 16,1,23,79,83,45,142,0,60,127,87,54,43
19 | 17,1,23,78,81,47,137,0,49,135,86,54,43
20 | 18,1,36,122,127,103,184,0,134,153,134,84,67
21 | 19,1,40,135,141,66,252,23,190,105,149,93,75
22 | 20,1,89,303,318,442,271,19,167,527,334,209,168
23 | 21,1,43,147,154,108,238,0,161,185,162,101,81


--------------------------------------------------------------------------------
/demos/input_data/hhld_marginals_500.csv:
--------------------------------------------------------------------------------
1 | state,county,tract,bg,inc1,inc2,inc3,inc4,inc5,wkr1,wkr2,wkr3,wkr4,veh1,veh2,veh3,veh4,veh5,child1,child2,hhlds
2 | 35,61,971400,0,115,261,200,245,340,311,406,404,40,52,320,530,194,65,504,657,1161
3 | 35,61,971300,0,194,147,94,168,174,214,240,282,41,0,309,274,101,93,254,523,777
4 | 35,61,971100,0,151,176,65,151,27,208,240,109,13,0,149,242,122,57,215,355,570
5 | 35,61,971000,0,512,495,349,375,265,815,706,374,101,76,643,770,374,133,558,1438,1996


--------------------------------------------------------------------------------
/demos/input_data/person_marginals.csv:
--------------------------------------------------------------------------------
 1 | zone_id,age,age,age,age,race,race,race,race,sex,sex
 2 | ,19 and under,20 to 35,35 to 60,above 60,asian,black,other,white,female,male
 3 | 1,312,108,223,177,64,0,0,756,440,380
 4 | 2,235,143,296,181,0,0,0,855,452,403
 5 | 3,303,229,445,174,0,0,24,1127,565,586
 6 | 4,215,77,356,189,0,0,29,808,389,448
 7 | 5,506,539,619,262,0,0,0,1926,981,945
 8 | 6,377,171,285,102,0,0,47,888,476,459
 9 | 7,312,150,488,382,0,0,14,1318,681,651
10 | 8,246,100,229,242,0,0,0,817,337,480
11 | 9,218,182,203,185,0,0,6,782,411,377
12 | 10,52,75,150,227,0,0,22,482,206,298
13 | 11,490,314,617,721,21,82,14,2025,1062,1080
14 | 12,639,356,721,381,7,4,46,2040,1162,935
15 | 13,345,341,647,564,0,21,179,1697,895,1002
16 | 14,372,363,708,638,0,6,89,1986,1044,1037
17 | 15,361,281,624,528,6,0,141,1647,871,923
18 | 16,149,92,67,157,0,0,0,465,212,253
19 | 17,287,69,196,81,0,0,0,633,366,267
20 | 18,160,128,265,93,0,0,20,626,366,280
21 | 19,418,158,313,198,0,0,0,1087,546,541
22 | 20,238,151,495,327,0,0,132,1079,748,463
23 | 21,272,133,203,279,0,0,0,887,401,486


--------------------------------------------------------------------------------
/demos/input_data/pop_marginals_500.csv:
--------------------------------------------------------------------------------
1 | state,county,tract,bg,sex1,sex2,age1,age2,age3,race1,race2,race3,pop
2 | 35,61,971400,0,1632,1666,983,1997,318,1738,1289,271,3298
3 | 35,61,971300,0,1052,1021,518,1285,270,975,895,203,2073
4 | 35,61,971100,0,822,773,442,970,183,912,622,61,1595
5 | 35,61,971000,0,2232,2491,1085,2528,1110,2183,2313,227,4723


--------------------------------------------------------------------------------
/demos/simple_synthesis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from synthpop.synthesizer import synthesize, enable_logging\n",
 10 |     "import synthpop.categorizer as cat\n",
 11 |     "\n",
 12 |     "import multiprocessing\n",
 13 |     "import os\n",
 14 |     "import seaborn as sns\n",
 15 |     "from functools import partial\n",
 16 |     "import pandas as pd\n",
 17 |     "\n",
 18 |     "import synthpop.zone_synthesizer as zs"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 2,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "hh_marginal_file = 'input_data/hh_marginals.csv'\n",
 28 |     "person_marginal_file = 'input_data/person_marginals.csv'\n",
 29 |     "hh_sample_file = 'input_data/household_sample.csv'\n",
 30 |     "person_sample_file = 'input_data/person_sample.csv'"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 3,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "hh_marg, p_marg, hh_sample, p_sample, xwalk = zs.load_data(hh_marginal_file, person_marginal_file, hh_sample_file, person_sample_file)"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 4,
 45 |    "metadata": {},
 46 |    "outputs": [
 47 |     {
 48 |      "name": "stderr",
 49 |      "output_type": "stream",
 50 |      "text": [
 51 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
 52 |       "  adj = constraint / (column * weights).sum()\n"
 53 |      ]
 54 |     },
 55 |     {
 56 |      "name": "stdout",
 57 |      "output_type": "stream",
 58 |      "text": [
 59 |       "Drawing 254 households\n"
 60 |      ]
 61 |     },
 62 |     {
 63 |      "name": "stderr",
 64 |      "output_type": "stream",
 65 |      "text": [
 66 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
 67 |       "  adj = constraint / (column * weights).sum()\n"
 68 |      ]
 69 |     },
 70 |     {
 71 |      "name": "stdout",
 72 |      "output_type": "stream",
 73 |      "text": [
 74 |       "Drawing 306 households\n"
 75 |      ]
 76 |     },
 77 |     {
 78 |      "name": "stderr",
 79 |      "output_type": "stream",
 80 |      "text": [
 81 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
 82 |       "  adj = constraint / (column * weights).sum()\n"
 83 |      ]
 84 |     },
 85 |     {
 86 |      "name": "stdout",
 87 |      "output_type": "stream",
 88 |      "text": [
 89 |       "Drawing 356 households\n"
 90 |      ]
 91 |     },
 92 |     {
 93 |      "name": "stderr",
 94 |      "output_type": "stream",
 95 |      "text": [
 96 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
 97 |       "  adj = constraint / (column * weights).sum()\n"
 98 |      ]
 99 |     },
100 |     {
101 |      "name": "stdout",
102 |      "output_type": "stream",
103 |      "text": [
104 |       "Drawing 390 households\n"
105 |      ]
106 |     },
107 |     {
108 |      "name": "stderr",
109 |      "output_type": "stream",
110 |      "text": [
111 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
112 |       "  adj = constraint / (column * weights).sum()\n"
113 |      ]
114 |     },
115 |     {
116 |      "name": "stdout",
117 |      "output_type": "stream",
118 |      "text": [
119 |       "Drawing 601 households\n"
120 |      ]
121 |     },
122 |     {
123 |      "name": "stderr",
124 |      "output_type": "stream",
125 |      "text": [
126 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
127 |       "  adj = constraint / (column * weights).sum()\n"
128 |      ]
129 |     },
130 |     {
131 |      "name": "stdout",
132 |      "output_type": "stream",
133 |      "text": [
134 |       "Drawing 324 households\n"
135 |      ]
136 |     },
137 |     {
138 |      "name": "stderr",
139 |      "output_type": "stream",
140 |      "text": [
141 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
142 |       "  adj = constraint / (column * weights).sum()\n"
143 |      ]
144 |     },
145 |     {
146 |      "name": "stdout",
147 |      "output_type": "stream",
148 |      "text": [
149 |       "Drawing 556 households\n"
150 |      ]
151 |     },
152 |     {
153 |      "name": "stderr",
154 |      "output_type": "stream",
155 |      "text": [
156 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
157 |       "  adj = constraint / (column * weights).sum()\n"
158 |      ]
159 |     },
160 |     {
161 |      "name": "stdout",
162 |      "output_type": "stream",
163 |      "text": [
164 |       "Drawing 342 households\n"
165 |      ]
166 |     },
167 |     {
168 |      "name": "stderr",
169 |      "output_type": "stream",
170 |      "text": [
171 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
172 |       "  adj = constraint / (column * weights).sum()\n"
173 |      ]
174 |     },
175 |     {
176 |      "name": "stdout",
177 |      "output_type": "stream",
178 |      "text": [
179 |       "Drawing 273 households\n"
180 |      ]
181 |     },
182 |     {
183 |      "name": "stderr",
184 |      "output_type": "stream",
185 |      "text": [
186 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
187 |       "  adj = constraint / (column * weights).sum()\n"
188 |      ]
189 |     },
190 |     {
191 |      "name": "stdout",
192 |      "output_type": "stream",
193 |      "text": [
194 |       "Drawing 228 households\n",
195 |       "Drawing 857 households\n",
196 |       "Drawing 748 households\n"
197 |      ]
198 |     },
199 |     {
200 |      "name": "stderr",
201 |      "output_type": "stream",
202 |      "text": [
203 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
204 |       "  adj = constraint / (column * weights).sum()\n"
205 |      ]
206 |     },
207 |     {
208 |      "name": "stdout",
209 |      "output_type": "stream",
210 |      "text": [
211 |       "Drawing 744 households\n"
212 |      ]
213 |     },
214 |     {
215 |      "name": "stderr",
216 |      "output_type": "stream",
217 |      "text": [
218 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
219 |       "  adj = constraint / (column * weights).sum()\n"
220 |      ]
221 |     },
222 |     {
223 |      "name": "stdout",
224 |      "output_type": "stream",
225 |      "text": [
226 |       "Drawing 953 households\n"
227 |      ]
228 |     },
229 |     {
230 |      "name": "stderr",
231 |      "output_type": "stream",
232 |      "text": [
233 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
234 |       "  adj = constraint / (column * weights).sum()\n"
235 |      ]
236 |     },
237 |     {
238 |      "name": "stdout",
239 |      "output_type": "stream",
240 |      "text": [
241 |       "Drawing 719 households\n"
242 |      ]
243 |     },
244 |     {
245 |      "name": "stderr",
246 |      "output_type": "stream",
247 |      "text": [
248 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
249 |       "  adj = constraint / (column * weights).sum()\n"
250 |      ]
251 |     },
252 |     {
253 |      "name": "stdout",
254 |      "output_type": "stream",
255 |      "text": [
256 |       "Drawing 185 households\n"
257 |      ]
258 |     },
259 |     {
260 |      "name": "stderr",
261 |      "output_type": "stream",
262 |      "text": [
263 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
264 |       "  adj = constraint / (column * weights).sum()\n"
265 |      ]
266 |     },
267 |     {
268 |      "name": "stdout",
269 |      "output_type": "stream",
270 |      "text": [
271 |       "Drawing 183 households\n"
272 |      ]
273 |     },
274 |     {
275 |      "name": "stderr",
276 |      "output_type": "stream",
277 |      "text": [
278 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
279 |       "  adj = constraint / (column * weights).sum()\n"
280 |      ]
281 |     },
282 |     {
283 |      "name": "stdout",
284 |      "output_type": "stream",
285 |      "text": [
286 |       "Drawing 286 households\n"
287 |      ]
288 |     },
289 |     {
290 |      "name": "stderr",
291 |      "output_type": "stream",
292 |      "text": [
293 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
294 |       "  adj = constraint / (column * weights).sum()\n"
295 |      ]
296 |     },
297 |     {
298 |      "name": "stdout",
299 |      "output_type": "stream",
300 |      "text": [
301 |       "Drawing 317 households\n"
302 |      ]
303 |     },
304 |     {
305 |      "name": "stderr",
306 |      "output_type": "stream",
307 |      "text": [
308 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
309 |       "  adj = constraint / (column * weights).sum()\n"
310 |      ]
311 |     },
312 |     {
313 |      "name": "stdout",
314 |      "output_type": "stream",
315 |      "text": [
316 |       "Drawing 711 households\n"
317 |      ]
318 |     },
319 |     {
320 |      "name": "stderr",
321 |      "output_type": "stream",
322 |      "text": [
323 |       "c:\\users\\juan\\documents\\github\\synthpop\\synthpop\\ipu\\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars\n",
324 |       "  adj = constraint / (column * weights).sum()\n"
325 |      ]
326 |     },
327 |     {
328 |      "name": "stdout",
329 |      "output_type": "stream",
330 |      "text": [
331 |       "Drawing 345 households\n"
332 |      ]
333 |     }
334 |    ],
335 |    "source": [
336 |     "all_households, all_persons, all_stats = zs.synthesize_all_zones(hh_marg, p_marg, hh_sample, p_sample, xwalk)"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "code",
341 |    "execution_count": 5,
342 |    "metadata": {},
343 |    "outputs": [],
344 |    "source": [
345 |     "all_persons, all_households, all_stats = zs.multiprocess_synthesize(hh_marg, p_marg, hh_sample, p_sample, xwalk)"
346 |    ]
347 |   },
348 |   {
349 |    "cell_type": "code",
350 |    "execution_count": 6,
351 |    "metadata": {},
352 |    "outputs": [
353 |     {
354 |      "name": "stderr",
355 |      "output_type": "stream",
356 |      "text": [
357 |       "C:\\Users\\Juan\\Anaconda3\\envs\\synpop_py3\\lib\\site-packages\\matplotlib\\axes\\_axes.py:6448: UserWarning: The 'normed' kwarg is deprecated, and has been replaced by the 'density' kwarg.\n",
358 |       "  warnings.warn(\"The 'normed' kwarg is deprecated, and has been \"\n"
359 |      ]
360 |     },
361 |     {
362 |      "data": {
363 |       "text/plain": [
364 |        "<matplotlib.axes._subplots.AxesSubplot at 0x29689d13438>"
365 |       ]
366 |      },
367 |      "execution_count": 6,
368 |      "metadata": {},
369 |      "output_type": "execute_result"
370 |     },
371 |     {
372 |      "data": {
373 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAELCAYAAADZW/HeAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvFvnyVgAAIABJREFUeJzt3Xl8nGd56P3fNdr33bJsyZbkfYnjxLKdPSEhIaFpAiUhCSkE2hKgzSkc2p6GnsKBFN4D5+1bDj3NCaRsYclGKAfn4BDIZrLZsU3syLZsS5ZlW7L2zdqXmev9Y54x44lkjaTZfX0/H300era55/F4rrm36xZVxRhjjHFFuwDGGGNigwUEY4wxgAUEY4wxDgsIxhhjAAsIxhhjHBYQjDHGABYQjDHGOCwgGGOMASwgGGOMcSRHuwCzUVxcrJWVldEuhjHGxJW9e/d2qWrJTMfFVUCorKxkz5490S6GMcbEFRE5Ecxx1mRkjDEGsIBgjDHGYQHBGGMMYAHBGGOMwwKCMcYYIMiAICI3i8gREWkQkQen2H+NiPxeRCZF5A6/7e8RkX1+P6Mi8gFn3w9F5Ljfvo2he1nGGGNma8ZhpyKSBDwM3Ag0A7tFZJuqHvI77CTwceBv/c9V1ZeBjc51CoEG4Dd+h/ydqj4znxdgjDEmNIKZh7AFaFDVRgAReRK4HTgbEFS1ydnnOc917gCeU9XhOZfWGGNM2ATTZLQYOOX3d7OzbbbuBp4I2PY1EXlHRL4pImlzuKYxxpgQCaaGIFNs09k8iYiUARcBz/tt/gLQBqQCjwJ/Dzw0xbn3A/cDLFmyZDZPa4wJwuO7Tk677yNb7f/chSSYGkIzUOH3dzlwepbP82HgF6o64dugqq3qNQb8AG/T1Luo6qOqWqOqNSUlM6biMMYYM0fBBITdwAoRqRKRVLxNP9tm+Tz3ENBc5NQaEBEBPgAcmOU1jTHGhNCMAUFVJ4EH8Db31AFPq+pBEXlIRG4DEJHNItIM3Al8R0QO+s4XkUq8NYwdAZf+qYjUArVAMfDV+b8cY4wxcxVUtlNV3Q5sD9j2Jb/Hu/E2JU11bhNTdEKr6vWzKagxxpjwspnKxhhjAAsIxhhjHBYQjDHGABYQjDHGOCwgGGOMASwgGGOMcVhAMMYYA1hAMMYY47CAYIwxBrCAYIwxxmEBwRhjDGABwRhjjMMCgjHGGMACgjHGGIcFBGOMMYAFBGOMMQ4LCMYYYwALCMYYYxwWEIwxxgAWEIwxxjgsIBhjjAEgOZiDRORm4FtAEvBdVf16wP5rgP8JbADuVtVn/Pa5gVrnz5OqepuzvQp4EigEfg98VFXH5/dyjDGh9Piuk+fd/5GtSyJUEhMJM9YQRCQJeBi4BVgL3CMiawMOOwl8HHh8ikuMqOpG5+c2v+3fAL6pqiuAXuDP51B+Y4wxIRJMk9EWoEFVG51v8E8Ct/sfoKpNqvoO4AnmSUVEgOsBX03iMeADQZfaGGNMyAUTEBYDp/z+bna2BStdRPaIyE4R8X3oFwF9qjo5x2saY4wJsWD6EGSKbTqL51iiqqdFpBp4SURqgTPBXlNE7gfuB1iyxNorjTEmXIKpITQDFX5/lwOng30CVT3t/G4EXgEuAbqAfBHxBaRpr6mqj6pqjarWlJSUBPu0xhhjZimYgLAbWCEiVSKSCtwNbAvm4iJSICJpzuNi4ErgkKoq8DJwh3PofcAvZ1t4Y4wxoTNjQHDa+R8AngfqgKdV9aCIPCQiviGkm0WkGbgT+I6IHHROXwPsEZH9eAPA11X1kLPv74HPi0gD3j6F74XyhRljjJmdoOYhqOp2YHvAti/5Pd6Nt9kn8Lw3gIumuWYj3hFMJo7YuHRjEpfNVDbGGANYQDDGGOOwgGCMMQawgGCMMcZhAcEYYwxgAcEYY4zDAoIxxhjAAoIxxhiHBQRjjDGABQRjjDGOoFJXmAvLTOkpjDGJyWoIxhhjAAsIxhhjHBYQjDHGABYQjDHGOCwgGGOMASwgGGOMcVhAMMYYA1hAMMYY47CAYIwxBggyIIjIzSJyREQaROTBKfZfIyK/F5FJEbnDb/tGEXlTRA6KyDsicpffvh+KyHER2ef8bAzNSzLGGDMXM6auEJEk4GHgRqAZ2C0i21T1kN9hJ4GPA38bcPow8DFVrReRRcBeEXleVfuc/X+nqs/M90UYY4yZv2ByGW0BGlS1EUBEngRuB84GBFVtcvZ5/E9U1aN+j0+LSAdQAvRhjDEmpgTTZLQYOOX3d7OzbVZEZAuQChzz2/w1pynpmyKSNttrGmOMCZ1gAoJMsU1n8yQiUgb8GPiEqvpqEV8AVgObgULg76c5934R2SMiezo7O2fztMYYY2YhmIDQDFT4/V0OnA72CUQkF/gV8I+qutO3XVVb1WsM+AHepql3UdVHVbVGVWtKSkqCfVpjjDGzFExA2A2sEJEqEUkF7ga2BXNx5/hfAD9S1Z8F7CtzfgvwAeDAbApujDEmtGYMCKo6CTwAPA/UAU+r6kEReUhEbgMQkc0i0gzcCXxHRA46p38YuAb4+BTDS38qIrVALVAMfDWkr8wYY8ysBLVimqpuB7YHbPuS3+PdeJuSAs/7CfCTaa55/axKaowxJqxsprIxxhjAAoIxxhiHBQRjjDGABQRjjDEOCwjGGGMACwjGGGMcFhCMMcYAFhCMMcY4LCAYY4wBLCAYY4xxWEAwxhgDWEAwxhjjsIBgjDEGsIBgjDHGEVT6a2OMSTSP7zo57b6PbF0SwZLEDqshGGOMASwgGGOMcVhAMMYYA1hAMMYY47CAYIwxBrCAYIwxxhFUQBCRm0XkiIg0iMiDU+y/RkR+LyKTInJHwL77RKTe+bnPb/smEal1rvmvIiLzfznGGGPmasaAICJJwMPALcBa4B4RWRtw2Eng48DjAecWAv8N2ApsAf6biBQ4ux8B7gdWOD83z/lVGGOMmbdgaghbgAZVbVTVceBJ4Hb/A1S1SVXfATwB574P+K2q9qhqL/Bb4GYRKQNyVfVNVVXgR8AH5vtijDHGzF0wAWExcMrv72ZnWzCmO3ex83gu1zTGGBMGwQSEqdr2NcjrT3du0NcUkftFZI+I7Ons7AzyaY0xodI3PM6u492MTwY2AJhEE0wuo2agwu/vcuB0kNdvBq4LOPcVZ3t5MNdU1UeBRwFqamqCDUTGmHnqGRrn1wfbOHS6H4/C4OgkN6wpjXaxTBgFU0PYDawQkSoRSQXuBrYFef3ngZtEpMDpTL4JeF5VW4EBEbnMGV30MeCXcyi/MSZMtu1v4Wj7AFctL2b5gmxeP9bF6IQ72sUyYTRjQFDVSeABvB/udcDTqnpQRB4SkdsARGSziDQDdwLfEZGDzrk9wD/hDSq7gYecbQCfAb4LNADHgOdC+sqMMXM2ODZJQ8cgl1cXcfP6Mt63diGjEx52NnZHu2gmjIJKf62q24HtAdu+5Pd4N+c2Afkf933g+1Ns3wOsn01hjTGRUdvibSa6uCIfgMUFGawqzeHV+i4ury4iLSUpyiU04WAzlY0x77L/VB8Lc9NZmJt+dtv1qxcwMuFm1/Ge85xp4pkFBGPMOXqGxjnZM3y2duBTUZjJ0sJMalv6o1QyE24WEIwx53inuQ+ADeV579pXXZLF6b4RxqxzOSFZQDDGnGPfqT6WFmVSkJn6rn2VRVkocLJnOPIFM2FnAcEYc9aZ0Qk6BsZYV5Y75f4lhZkI0NQ9FNmCmYiwgGCMOauldwTw9hdMJS0liUX5GTR1Ww0hEVlAMMac1dw7jEugLC9j2mMqizI51TPMpNtSWSSaoOYhGHOhe3zXyfPu/8jWJREqSXg1946wICed1OTpvysuLcri9WPdtPSNRLBkkXWh/HsHshqCMQYAVaW5d4TygulrBwBLi7zNSdZslHgsIBhjAOgdnmBkws3iGQJCTnoKxdmpnLCO5YRjTUbmvA63nuFQ6xkqCjOpLs6iKDst2kUyYdLc6/3GX14wdYeyv8qiLA6ePoPHo7hctvptorCAYKY1NuHm579vZnjczZ4TvQjwiSurWL4gO9pFM2HQ3DtCskvOSVcxnSWFmew50UtT9xDVJfZ+SBQWEMy0Xm3oYmjczWeuXUZ6ShLfe62RV+s7EzYgzNSRmOiae0coy0snKYhv/AvzvEHjaPuABYQEYn0IZkoDoxO8Vt/F+sV5VBRmUpKTxtbqIuo7Buk4Mxrt4pkQ86hyum+ExUE0FwEsyElHgCNtg+EtmIkoCwhmSi8d7mDS4+GmtX9YIWtzZSHJLuENy4mfcDoHxhh3e2YcYeSTmuyiICuVo+0DYS6ZiSQLCOZdRsbd7G7qoaaykGK/TuTstGQursjn7ZO9jIxbcrNE4ptTsDg/uIAAUJqbzhELCAnFAoJ5l4bOQTwKlwSkPwa4YlkRE25ld5PlxE8kHWfGSBI55wvATEpz0zjeNcTYpH05SBQWEMy71LcPkJbsmnL4YVleBhUFGRw8bTnxE0nn4BhF2alBdSj7lOam4/YojZ02HyFRWEAw51BVGjoGWVaSPe2HQ3VJNi19I/bNMIF0DoxRkjO7OSalzvDUI23WbJQoLCCYcxzrHKJvZIIVpdMPJawqzsKjlhM/Ubg9Ss/QGCWznHRYnJ1KSpJYP0ICsYBgzvG7o50ArFiQM+0xSwszcQkc77KmgkTQMzSOR6F4ljWEZJeL6uJsjloNIWEEFRBE5GYROSIiDSLy4BT700TkKWf/LhGpdLbfKyL7/H48IrLR2feKc03fvgWhfGFmbn5X30lRViqFWe9eLcvHlxPfAkJi6BwYA5h1DQFg5cIcqyEkkBkDgogkAQ8DtwBrgXtEZG3AYX8O9KrqcuCbwDcAVPWnqrpRVTcCHwWaVHWf33n3+varakcIXo+Zh9EJNzsbu1lROn3twKeqKIvm3hEmLCd+3OscdALCLGsIAKtKs2nuHWFwbDLUxTJREEwNYQvQoKqNqjoOPAncHnDM7cBjzuNngBtEJLBH8h7gifkU1oTXnqZeRic8rAwiNUVVcRZuj3LK+hHiXufAGDnpyaSnJM363JXOl4d6qyUkhGACwmLglN/fzc62KY9R1UmgHygKOOYu3h0QfuA0F31xigACgIjcLyJ7RGRPZ2dnEMU1c/VWUw8u8X7Yz2RpURaC9SMkgq7BsVnNP/C3aqE3INhIo8QQTECY6oNaZ3OMiGwFhlX1gN/+e1X1IuBq5+ejUz25qj6qqjWqWlNSUhJEcc1cvdPcx/IF2aQF8U0xIzWJhXnpFhDinKrOacipT0VBJmnJLho6LKdRIggmIDQDFX5/lwOnpztGRJKBPMB/KuvdBNQOVLXF+T0API63acpEiapS29zPhvJ3z06eTlVxFid7hpn0WD9CvOoeGmdkwj2nDmUAl0uoKs6yLwYJIpiAsBtYISJVIpKK98N9W8Ax24D7nMd3AC+pqgKIiAu4E2/fA862ZBEpdh6nALcCBzBR09I3QvfQOBeX5wV9zpLCTCY9SseZsTCWzITTMeeb/VxrCADVJVk0WkBICDMGBKdP4AHgeaAOeFpVD4rIQyJym3PY94AiEWkAPg/4D029BmhW1Ua/bWnA8yLyDrAPaAH+fd6vxsxZbbM3FcVFs6ghlOV5E6G19ifuYuuJzvdBPtcaAvyhpmgjzuJfUAvkqOp2YHvAti/5PR7FWwuY6txXgMsCtg0Bm2ZZVhNG+5v7SUkS1pTlcOj0maDOKXJmqp7uH7V/zDh1rGOQZJeQl5ky52tUF2efHXFmi+XEN5upbABvh/LqhbmkJQc/9NAlQlleBq19tmBOvDrWOUhJThquqQf5BaWqxDsqzZLcxT8LCAaPR6lt6eeiWfQf+JTlpdPaP4LTZWTizLHOoTkPOfWpdoYpW8dy/LOAYGjqHmJgdHJWHco+ZXkZjE166B2eCEPJTDiNTrhp7h2eV4cyQH6mN9VJY5cNPY13FhAMtS3eDuXZDDn1KXMWW7eO5fhzonsYj86vQ9mnqjjLmowSgAUEw/5T/aSnuFgRRMqKQKW53sXWT1s/Qtw51jn/Iac+1cU29DQRWEAw1Lb0sW5RHslJs387pCa7KMlJsxpCHPLNQZhvHwJ4O5Y7B8YYGLWmw3hmAeEC5/Eoda0DrFuUO+dreDuWrYYQbxq7hliUl05q8vw/Bnwdy01dluwwnllAuMD5UhevKZtPQMigf2SCYUuBHFeOdQ6ybA7NhFPxzT+wjuX4ZgHhAneo1TsJbV4BId/pWD5jtYR4oaocc9bODoUlhZmI2FyEeGcB4QJ3uO0MIrDyPGsoz+RsCos+60eIF+1nxhgad7OsZOZU58FIT0mivCDDOpbjnAWEC1xd6xmqirLITA0qi8mUstOSyUlLpt2S3MWNRmeEUShTTVQVZ3PcmozimgWEC1xd68C8mot8SnPTaR+wJqN44RtyGqomI/B2LB/vHEqoWes7G7vZcaSDUz3DuD2J87qmM/evhSbuDYxOcLJnmA/XlM/7WqW5abzV1IPHo7hcc8+LYyLjWOcQWalJlObOf8ipT3VJFkPjbjoGxijNTQ/ZdaNlT1MP2/b7ln5ppzArlb+6bjkZqbNfajReWA3hAnbUWQd39cLQ1BAm3Epzr/UjxAPfCKNpVq6dE9/Sq4nQsdzcO8y2/adZXpLNg7es5kOXltM7NM7LRzqiXbSwsoBwATvU6g0Ia+YxB8FngfON8Igtth4XGjuHQtpcBIkz9HR4fJLHd50kOy2ZuzZXkJuewqalBWxaWsAbx7roHEjcvjILCBewutYz5KYnsyhv/tX7BU76g6MWEGLe8PgkLX0jZyeThUpZbjppyS6Ox3kNYXdTL30jE9yzZQlZaX9oVb9xbSkpSS6217ZGsXThZQHhAna49Qyry3JD0myQnpJEfmYKR9osIMQ6X5NOqCal+fjWV47noaeqyt4TPVQWZVJRmHnOvpz0FK5fvYAj7QO8Vt8VpRKGlwWEC5THoxxuG2BtCEYY+ZTmpFsNIQ6EY4SRT3VJVlyvi3Cie5iuwXFqlhZOuf/y6iIyUpL42d5TES5ZZFhAuECd7BlmeNzNmrKckF2zNDedY52DtrZujGvsHMIlsLQoc+aDZyne11fee6KX1GQX6xdPvTZIcpKLdYtyeeFQO6MT7giXLvwsIFygDrfNP2VFoNLcNCbcSlMcf0OcypG2M5zsSZykbcc6BykvyCQ9JfTDJ33rK8fj/RqbcFPb0s+GxXnnTfi3oTyfoXE3ryTgiKOgAoKI3CwiR0SkQUQenGJ/mog85ezfJSKVzvZKERkRkX3Oz7f9ztkkIrXOOf8qoRz/ZmZ0qHUAl8DK0tDWECBxRhp5VNle28pjb57g2zuO8Z0dx6hPgNd2rHMoZCkrAvnWV47HjuXaln7G3R5qKqduLvKpKs6iKCuVZ99JvM7lGQOCiCQBDwO3AGuBe0RkbcBhfw70qupy4JvAN/z2HVPVjc7Pp/22PwLcD6xwfm6e+8sws1XXeoaq4qyQfkv0LtYORxOgY3nC7eGnO0/wWkMXl1UXcuuGMs6MTvDDN5rieu0Hj0dp7AxdUrtAvpFL8Tj0dF9zH8XZaVQUZJz3uCSXcPP6hbxU18HweGJl+A2mhrAFaFDVRlUdB54Ebg845nbgMefxM8AN5/vGLyJlQK6qvqneee4/Aj4w69KbOatrPRPS5iKAlCQXlcVZCVFDeK2hi7q2Af54Qxm3XbyYK5YV81fv8c5SfXZ/a9ymZ2jpG2Fs0hPyEUY+vvWV461jeWzSzYmuYdaU5QQ16u7WDYsYmXDz0uHEajYKJiAsBvy71JudbVMeo6qTQD9Q5OyrEpG3RWSHiFztd3zzDNc0YXJmdILm3pGQBwSAVaU5HG2Pv2+H/kYn3LxW38XqhTlcvqz47PbM1GRuXFtKU/fQ2XWo441vSGio5yD4qyrO4licNRkd7xzCrRp0E+qWqkJKctL4VYI1GwUTEKYKl4Ffj6Y7phVYoqqXAJ8HHheR3CCv6b2wyP0iskdE9nR2dgZRXDMT31yBUI4w8llZmkNT91Bcj8B4s7GbkQk3169e8K59mysLWZSXznMH2hifjL+RNL5lM8NVQwAnyV2c1RCOdgyQkiQsLQxu5FWSS7hxbSmv1ncxGacjqqYSTEBoBir8/i4HTk93jIgkA3lAj6qOqWo3gKruBY4BK53j/TOqTXVNnPMeVdUaVa0pKSkJorhmJnUhWBRnOqsW5qAKDR3xWUvwrx2UF7z7w8Elwh9fvIj+kQl2N/VEoYTzc6xzkLyMFIqyUsP2HPG4vvLRdm+/ymzWFb9qeTGDY5Psb47P2uJUgnn1u4EVIlIlIqnA3cC2gGO2Afc5j+8AXlJVFZESp1MaEanG23ncqKqtwICIXOb0NXwM+GUIXo8JQl3rAHkZKSwMQ0ZKX5U7Xmcs7zxP7cBnaVEWi/MzePtkbwRLFhrHOgdZVpIV0qR2gXzNUfFSS+geHKNnaJwVsxxxd3l1ESLwekPizFqeMSA4fQIPAM8DdcDTqnpQRB4Skducw74HFIlIA96mId/Q1GuAd0RkP97O5k+rqu9r1WeA7wINeGsOz4XoNZkZeDuUg+s8m63KokxSk1xxOWNZVXmrqYflJdlT1g78XbIkn9P9o7T1x9caEI2dQyFdFGcqvhFMvhnRse6oU5tdOctmtIKsVNYvyuO1BAoIQa2HoKrbge0B277k93gUuHOK834O/Hyaa+4B1s+msGb+3B7lSNsAd2+pmPngOUhOcrFsQXZcjjQ62TNM3/AE711TOuOxG8rz2V7bytunerklrywCpZu/M6MTdAyMhW3Iqc/SoiySXEJ9nAwuqG8foDArlaLs2a8NceXyYr73WiNDY5PnJMKLVzZT+QJzsmeYkQl3WPoPfFaVZsfNh4G//c39JLskqPxO2WnJrFqYy75TfXGzktbZpHZhmpTmk5rsorIoMy76kSY9Hho7h+a8pvhVy4uZcHtrlonAAsIF5myHcggWxZnOyoU5tPSNxFWn4qTbQ21LP6sX5gQ9We+SinwGRifjpmkkEiOMfFYsyImLgNDcM8K428PyOdaaaioLSE128XqCZD+1gHCBqWs9Q5JLWDHHb0TBWOV0zsXTfIQ3jnUzNDbJhvL8oM9ZvTCHjJSkuOlcbuwaJNklLAlyaOV8LF+QzYmeYcYmY3v48fFub62psmhutab0lCQ2VxYkTD+CBYQLzMHTZ1hWEtqUFYFWng0I8dOPsG3/adKSXaxaGPxIE1/my8NtA3GR3fNo+yBLizJJmcXQyrlaUepNctfUFdtJ7o53DbEwN53MebT/X7m8mMNtAwmxkpoFhAtMbUv/tKl9Q2VxfgaZqUlxM/R0bNLN8wfaWLcod9YflqsX5jI26YmLOQlH2gZCsn52MHwd17HcbOT2KCe7h6mc56ztK5zZ7Dsbu0NRrKiygHABaT8zSufAGBeFOSC4XMKK0py4qSHsbOxhYGxyToFy2YIskl3CS3WxndNmeHySkz3Ds6oBzceykmxEoL4jdt8DLX3e/oOqeQaE9YtyyUpNYtdxCwgmjtQ6MyrDHRDAO9IoXgLCS3XtpKe45jQcMy05iarirJhPcubrzwlluvPzyUhNorwgg/oYriH4Js7NNyAkJ7moqSxkV2Ps1xJnYgHhAlLb0o9LYO2i8DcbrCzNoWtwnK7B2G5XVVVeOtLBlcuK59y2vnphDo1dQzTG8GgjX0ry1RGqIYB3pNGxmA4Ig5TkpJEdgvkDW6sLqe8YjPn3+0wsIFxAalv6WVaSTWZq+CfQ+JomYr0foaFjkFM9I1y/ZvpUFTNZ5bTLx3It4XDbAOkprnctHB9Oyxdk09g5FJPJ3ybdHk50D1M1x9FFgbZWeZM7v3U8vmsJFhAuILUt/RFpLgLOTu46dPpMRJ5vrnwf4u9ZNfeAUJiVysrS7JgOCEfbB1hZmkOSK3ILEy5fkM2428Op3thbUKiudYCxyfn3H/hsKM8jIyWJXXHesWwB4QLh61AO9wgjn6LsNMry0jlwOrYzQb54uIM1Zbksyj//KlkzuX51KW8d7+FMjE7GO9w2ELH+A58VzgS4WFx21NcBHKqAkJLkoqaygF1WQzDx4GyHcnlkAgLAukW5HIzhGkL/8AR7T/Ry/er5p1V/z6oSJj3KGzE4Qal7cIyuwbGI9h/AH2ZEN8Rg38rOxh6KslLJzUgJ2TW3VhVyuG2A3qHxkF0z0iwgXCBqW/oRIag8PaGyblEexzoHY3bd2R31nbg9yvWrZ05mN5NLlxaQk5bMjqOxt4iTL9FgpGsIueneFOuxltfK41F2N/WErHbgs7Xa248Qz7UECwgXiANOh3IkMzKuW5SLqre9Nha9fLiDwqxUNlYEn65iOilJLq5cXswrRzpjbr3laIww8lm1MOds/qxYcbhtgP6RiZAHhA3leaQlu+J6PoIFhAtEJDuUfXz9FYdisB/B7VFeOdLBdStLQtbReu2qElr7R2Nu7P2R9gHyM1MoyZl9euf5Wrcol/qOwZhaUjXU/Qc+aclJXLqkIK7nI1hAuAC09I3QMTDGhgj2HwCU5aVTkJnCgZbY+oYIsO9UL73DE7znPCujzda1K719ETuOxFaz0ZG2AVaVhmdBpJmsX5yH26MxNUlxV2MP5QUZ5GeGfhnRy6qLqGs7Q/9wbA4umIkFhAvAHifPzubKwog+r4iwfnEeB1tjr4bw0uEOklzCNStDt073ovwMVpZmx1Q/gsejHG0fjFjKikDrnEmQsTK4wLcqnm/eQKhtrS5ElbjIbTUVCwgXgN1NPWSnJUelDXntolyOtA0wPhlbk5NerOugZmkBeSEcZQLeWsJbx3sYGouNjvTGriEG55inKRQqCjLJSUvmQEtsfCmo7xikZ2icrdXh+XK0sSKf1GRX3Ca6s4BwAdh9vJdLluSTHIG0x4HWLcpjwq0xleTsdN8Ih9sGuGEes5Onc92qBYy7PTHzgbD/VB9ASDrO58LlEtbE0PBj38SxrVXhCQjpKUlsrMiP25FGFhASXP/wBEfaB9gS4eYin/W+JoMY6kfwzSi+PoT9Bz41lQVkpCTFTLPR/uY+slKTwr6O8vl414w4ExNLje483sPC3PSwLhJ0WVUhB0/3x+zyudYUAAAZkElEQVQkxfOxgJDg9p70flOpiVJAqCzKIis1KaZmLL98uIMlhZlh+ZBMS07iimVFsRMQTvWxoTw/oikrAq1flMfohCfqyf9UlV2NPWytLgxrB/tl1UV4FPY2xcdKev6CCggicrOIHBGRBhF5cIr9aSLylLN/l4hUOttvFJG9IlLr/L7e75xXnGvuc35C/3XNsLupl2SXRLXJ4KLyPN4+2ReV5w80PD7Jaw1dXL96Qdg+FK5dVcKJ7uGz6ZWjZXTCzaHWM1wcpX97n3WLY6Nj+XDbAF2DY1y5vDisz3PJkgJSkiRmmg1nY8aAICJJwMPALcBa4B4RWRtw2J8Dvaq6HPgm8A1nexfwx6p6EXAf8OOA8+5V1Y3OT+xmBotje5p6WL84j4zU8C2ZOZPNld4q9GAMdLT+7mgXY5Meblo7/9nJ07lupfe7zY4j0X1L17WeYcKtbKyIToeyz7KSbFKTXVHvWH6t3ptW5OoV4Q0IGalJXFyez8447EcIpoawBWhQ1UZVHQeeBG4POOZ24DHn8TPADSIiqvq2qp52th8E0kUk8rNjLlCjE272n+pnS5g60IJVU1mIR2FfDNQSfnOojbyMFDaH8Z4sKcqkqjgr6s1Gf+hQLohqOVKSXKxemBP1GsJrDV0sK8miLG9+iQyDcVl1EQda4q8fIZiAsBg45fd3s7NtymNUdRLoBwIH+n4IeFtV/VeQ+IHTXPRFmab+LiL3i8geEdnT2Rkb7bLxoraln3G3h5ql0f1AuHRJPi6J/tjsSbeHF+s6uGH1grAvNH/tyhLebOyO6gzdfaf6KM1NY2FeetTK4ONNdNgftbQeY5Nudh3v5uoVoZt3cj7XrCzBHaPJDs8nmP8VU31QB/6rnvcYEVmHtxnpU37773Wakq52fj461ZOr6qOqWqOqNSUlkfnHTBRvNHQjEr0OZZ+c9BRWL8xlz4noBoS3mnroH5ngpnXhay7yuXZVCaMTnqgumLK/uZ+Ly6Pbf+Bz0eJ8zoxORq1fZe+JXkYnPFwV5v4Dn0uW5MdsssPzCSYgNAMVfn+XA6enO0ZEkoE8oMf5uxz4BfAxVT3mO0FVW5zfA8DjeJumTAi9dLidjRX5FGaFfor+bG2uLODtk31MRHH1rN8cbCct2RXS2cnTuayqiNRkV9Q+EPqGxzneNcTGJbERELZUeWup0QqQr9V3kewSLlsWnhnKgXzJDnfEYLLD8wkmIOwGVohIlYikAncD2wKO2Ya30xjgDuAlVVURyQd+BXxBVV/3HSwiySJS7DxOAW4FDszvpRh/HQOj7G/u54YwjLWfi5rKQobH3VHLfKmq/PZQO1evKI7IEqIZqUlsrSrklSh1LO931r/YGCM1hGUl2RRnp0ZtwtZrDV1csiQ/JOsnB+vaVSWc7h+lIcaSHZ7PjAHB6RN4AHgeqAOeVtWDIvKQiNzmHPY9oEhEGoDPA76hqQ8Ay4EvBgwvTQOeF5F3gH1AC/DvoXxhF7pXDnu/mYYi138o1FR6vyHujtLY7IOnz9DSN8JNaxdG7DmvX72AY51DHIvC+Pudjd0ku4QNUR5y6iMibKkqZFdjd8S/MfcOjVPb0s9VyyPb5OyricZTs1FQPWuqul1VV6rqMlX9mrPtS6q6zXk8qqp3qupyVd2iqo3O9q+qapbf0NKNqtqhqkOquklVN6jqOlX9rKrGTn7cBPDS4Q7K8tJZUxadpGaByvIyKC/IOJtoL9K27T9Nskt4bxiHmwa6eb03+DxX2xqx5/TZcaSTTUsLIvqNeCZbq4o43T9Kc4TXWP5dfSeqcFWYh5sGWpyfwYoFsZXscCY2UzkBjU26ebW+k/eEcfLVXGyuLGR3U2/EvyG6Pcr/ebuF61YtiGh/SlleBpuWFrC9ti1izwne5sJDrWci0lcyG76EcpGesPX8wTZKctK4JAq1pWtXlrCrsSdmVw0MZAEhAb11vIehcXfM9B/4XFZdSNfgGIfbIpvo7vWGLjoGxviTSwNHS4ffLesXcqj1DE0RHF3z6lHvUMdrYywgrFyQQ35mSkT7EUbG3bx8uJP3rSvFFYX0HdeuKompZIczsYCQgF6s6yAt2cUVyyJbRZ6Jt8YCvz3UHtHn/cXbLeSmJ4clmd1MbrmoDIDnDkSulrDjaCfF2akRXT87GC6XsKWyMKJLTO442snIhJtb1pdF7Dn9bakqJCctOeK1xLmygJBg3B7l+YNtXLW8OKrpKqayICedjRX5EQ0IQ2OT/PpAG3+0YRHpKZG/H4vzM9hYkc9zByLTj+D2KK/Wd3LNipKofCOeydbqIk71jHC6LzL9CM8fbCM/MyVqs/XTkpO4ad1Cnj/Yxthk7HeTWkBIMDuOdtDaP8odm8qjXZQp3bi2lNqWflr7I/eBMDLhjkpzkc/7L1rIO839nOoZDvtzHWjpp3d4Iub6D3x86xBEopYwPunhhbp2blxTGvaZ6edz68VlDIxO8rujsT9r2QJCgnnirVMUZ6dyw5rYGG4ayJdU7oUI1RKe2n2KisKMqKbv8DVX/OLtlrA/1++cES2RHlETrDVluRRnp0aklvjGsS4GRifPjvaKlquWF5OfmcKz+wPn88YeCwgJpOPMKC8d7uBDm8pJTY7Nf9plJdlUFWfxmwh8IOw/1ceu4z3cd3llVEdbVRRmcvWKYp546ySTYZ6p/cLhDtYvzqU4OzZzSCa5hFvWl/HS4Y6wLzO6vbaV7LTksKe7nklKkotb1pfxQl07I+Ox3WwUm58aZk5+trcZt0e5q6Zi5oOjRES4cW0pOxu7w54J8tFXG8lJT+buLUvC+jzBuHfrUlr7R3n5SPjGpB9tH2D/qT4+sDF6zWPBuHVDGaMTHl48HL5Z3P0jEzy7v5X3X7QwKn1Hgf54QxnD4+6zq/XFKgsICcLjUZ7afYqtVYVUR3G5xGDcuLaUCbfychj/c5zqGea52lY+snVJTEzOeu+aBZTmpvHTXSfC9hxPvnWKlCThg5fEdkCoqSxkQU4a/zeMTSg/39vMyISbj11eGbbnmI2t1UUUZ6fxy33hbzacDwsICeK3de2c7Bnmnhj4NjyTS5cUsDg/g8d3nQzbc3zvteMkuYRPXFEVtueYjeQkF3dvXsKOo51h6VwenXDzH283c9O6hRTFaHORT5JLeP9FZbxytJOBMNQSPR7lJztPsLEin/WLo7s4kE+SS/jQpsW8UNcekcEFc2UBIQFMuj1849eHqS7J4tYN0RlvPRtJLuHjV1Sy63hPWFbR6hgY5andp7jt4sUxsRaAzz1bluAS4adhCIS/OdRO3/AEd2+O3eZCf398cRnjk56wdC6/caybxq4hPnb50pBfez4+fkUlLhF+8HpTtIsyLQsICeCpPado7BziwZtXkxzF4XWz8eHNFWSmJvH9146H/Nr/z6/qcHuUB65fHvJrz8fCvHTet66Un+w8Qdfg2MwnzMKTb52kvCCDK2NsMuJ0LqkoYFFeelhG3vzozSYKs1J5/0Wx9eWoLC+DWzeU8dTuk/SPxOZKavHx6WGmNTQ2yTd/W8/mygJujGDitvnKy0jhwzUVPPvOaTrOjIbsum8e6+b/7DvNp66tpqo4K2TXDZW/uWkVIxNu/vXF+pBd82j7AG8c6+aumoqYnIw2FZdL+JNLy3nlaCf17aFLZdLQMcALde3ctbkiJjqTA/3F1dUMjbt54q3wNZfOR/R728y8/K+XGugaHOM7H90U9NDKcLbdz8YnrqzksTeb+PHOE/zNTavmfb0Jt4cv/fIA5QUZ/OV1sVU78FlWks09Wyp4fNdJPn5F5bwHAKgqX3n2ILnpyXxka+z3H/n7s6uq+MHrx/nWi/X820cunff1vPfiEFlpyfzFVbHRdxRo/eI8Lq8u4oevN/FnV1bF3PBwCwhx7MW6dr694xgfrilnk9/Eq1j5wJ/J0qIsblxTyg9eb+KuzRWUF2TO63rfeqGe+o5BvndfTcyl7fD32RtW8ovft/CNXx/mOx+tmde1fn2gjdcbuvnKbetivjM5UGFWKvddUckjO47x1+0DrCydX6r2F+s6eLW+iy/eujam78Wnr1vGfd9/i+++1hhzX1xiKzyZoDV1DfG5p/axfnEuD92+PtrFmbMv3roWVeXvfvYOHs/c02L/fG8z//ZyA3fVVMTsLG2fkpw0Pn3tMp4/2M72eayVMDLu5qu/qmP1whzujbPagc8nr64mKzWZb70wvya0sUk3//SrQyxfkB1zncmBrl1Zws3rFvKtF+ojmgU3GBYQ4lDHwCif+vFeklzCI/duism20mBVFGbyj7eu5c3Gbh57s2lO19jZ2M2D//EOVywr4qsfjI/geP+11WxaWsDfPL1/TsuKqipff66Olr4RvnzburgZTBCoICuVj19Rya9qW9l7Yu6r6f3Lb45yonuYL966Nqp5i4L1ldvXkZrk4h9+URtTay7H/p0z56hvH+CDD7/ByZ5hHv7IpVQUzq+ZJRbcvbmC61aV8PXnDs96EfZn95/mz364myWFmTxy76a4+DAAbxbMR+69lNyMZD75oz30DI3P6vz/+UI9j715gj+7sorLqiOzcHy4fPKaaioKM/jMT/bSPocBBo/vOsl3ftfIvVuXxNwaENMpzU3n729ZzRvHusMyDHmu4uN/j0FV+eW+Fv7kkTcYd3t4+lOXRz1HS6iICP/jQxtYnJ/Bvd/dyc/2nJrxnOHxSR569hD/6Ym3WVOWy+OfvIy8zJQIlDZ0FuSm852P1tAxMMYdj7wRVE1BVfnfrzTwrRfruXNTOf/4R2siUNLwystI4d8/VsPg2CSf+vFeRieCz/fz8pEOvvjLA1y3qoSv3LYujKUMvY9sWcLVK4r50i8PzKvpMJSsUzkO7D/Vx9e21/HW8R42lOfxv++9dN4dsLFmQW46v/jLK/nMT/fyd8+8w6v1XfzpZUvZXFlwdvSUqtLSN8Ize5t57I0meocn+MSVlfzD+9eQkuSasTM9FkfhbKzI50d/toW/fuJtPvDw63zhltXcWVNB1hTpNg609PPlbQfZc6KXP9pQxtc/tAGXS+JmEMH5rF6Yy798+GI+/ZPfc/+P9/LPd2xgQe70kwon3R4eeeUY33qxnlWlOfzbRy6Nu2Yzl0v49p9u4mPff4u/fuJt0pJdUe//kmDar0TkZuBbQBLwXVX9esD+NOBHwCagG7hLVZucfV8A/hxwA3+tqs8Hc82p1NTU6J49e4J+cfHsdN8ILx7u4Ondp6ht6acgM4W/e99q7tpcQdIMY82j+QEx3w/dCbeHf/7NEX668ySDY5OU5qZRkpNGZkoyjV2DdA16m1beu2YBn752GTWVf1j4JJ5fd+fAGP/5qX281tBFZmoS71u3kMqiLLLSkmjtH+Wt4z0cON1PYWYq/+XmVdy56Q9zDuL5dQf66a4TPPTsIdJTkviH96/m/ReVkZP+h5rf6ISbV4508J3fNfL2yT5uu3gR/3T7+jnVDudz30L5us+MTnDvv+/i4Ol+Pnl1NZ9778qQj5ITkb2qOuOQthkDgogkAUeBG4FmYDdwj6oe8jvmL4ENqvppEbkb+KCq3iUia4EngC3AIuAFYKVz2nmvOZVEDAijE25O941wqneE+vYB6loHePtkL43O6IPVC3O4Z8sSPnDJYvIygnvTJ8IHxPD4JNtr23i9oYv+kQkGxyapKMjk4oo8rlhWxPIF7x6iGO+vW1XZc6KXn+9t5tcH2+gb9s5mTUt2ccmSfK5YVsx9V1S+630Q7687UGPnIH/7s/38/mQfSS7h4vI8stKSGR53c6RtgMGxSYqz0/jirWu4fR6ZXWMlIIA3KPz37XU88dYplhZl8qlrlvFHG8qC/j8/k1AGhMuBL6vq+5y/vwCgqv/d75jnnWPeFJFkoA0oAR70P9Z3nHPaea85lbkGhIHRCUSE9GTXnKqVqsqkRxmf9Hh/3N7fY35/j064GRl3MzzuZnh8kpEJN0NjbkbGJ73bJtwMj03SNzJB79A4vcPe3wMBOeFLctK4aLH3Q+/qFSWsLM1+14SzWG4iiGazTKJ9ME66PQyNuclITTrvBKZEe93gXQp0d1MPr9V3sbOxm0mPkpmaxJLCTP5oQxmXVxfx9J7msDx3MML1ut841sVDzx7icNsAqckurlxWxEXl+WxYnMdly4rmnLk32IAQzNUXA/69fM3A1umOUdVJEekHipztOwPO9YX0ma4ZMp99ct/ZPORJLm9gSEtJwiWgCh5VFG+WRMW7TZ1tbo8y7vYw15FhgneBjJRkF2nJLjJSkshKS6IwK5Xyggyy0pLJz0ghPzOVkpy0c/7B957onddQvGiI5WAVTvPtv7hQ79t0klzCZdVFcT+CarauWFbMc5+9mtqWfn6+t5k3G7vZcbQTj8ILn79myppxKAUTEKZqsA78eJzumOm2T/V1Z8qPXBG5H7jf+XNQRI5MU85AxUAsL2Jq5ZufuCrfvVEsyDRCcv/C+Lpi+t/33iiUb8U3ZnV4YPmCmq0XTEBoBvxz6pYDgSkKfcc0O01GeUDPDOfOdE0AVPVR4NEgynkOEdkTTBUpWqx882Plmx8r3/wkavmCaVDfDawQkSoRSQXuBrYFHLMNuM95fAfwkno7J7YBd4tImohUASuAt4K8pjHGmAiasYbg9Ak8ADyPd4jo91X1oIg8BOxR1W3A94Afi0gD3prB3c65B0XkaeAQMAn8laq6Aaa6ZuhfnjHGmGAF1WWtqtuB7QHbvuT3eBS4c5pzvwZ8LZhrhtism5kizMo3P1a++bHyzU9Cli+oiWnGGGMSX3zN9TbGGBM2CRMQRKRJRGpFZJ+I7HG2FYrIb0Wk3vldMNN1Ily+L4tIi7Ntn4i8P4rlyxeRZ0TksIjUicjlMXb/pipfTNw/EVnlV4Z9InJGRD4XK/fvPOWLifvnlPE/i8hBETkgIk+ISLoz6GSXc/+ecgagxFL5figix/3u38Yolu+zTtkOisjnnG2zfv8lTJORiDQBNara5bftfwA9qvp1EXkQKFDVv4+h8n0ZGFTVf45GmfyJyGPAq6r6Xec/XibwD8TO/ZuqfJ8jRu6fj3hTvbTgnWj5V8TI/ZumfJ8gBu6fiCwGXgPWquqIMxBlO/B+4D9U9UkR+TawX1UfiaHyXQf8X1V9JtJlCijfeuBJvCmCxoFfA58BPsks338JU0OYxu3AY87jx4APRLEsMUtEcoFr8I4WQ1XHVbWPGLl/5ylfLLoBOKaqJ4iR+xfAv3yxJBnIEO88pkygFbge8H3YRvv+BZZvynlTUbIG2Kmqw6o6CewAPsgc3n+JFBAU+I2I7BXv7GaAUlVtBXB+L4ha6aYuH8ADIvKOiHw/ik0y1UAn8AMReVtEvisiWcTO/ZuufBAb98/f3XgTOkLs3D9//uWDGLh/qtoC/DNwEm8g6Af2An3OBxycm/Ym6uVT1d84u7/m3L9vijfrczQcAK4RkSIRycRbs6pgDu+/RAoIV6rqpcAtwF+JyDXRLlCAqcr3CLAM2Ij3jfb/RalsycClwCOqegkwhJOYMEZMV75YuX8AOE1ZtwE/i2Y5pjNF+WLi/jmB6HagCm9W5Cy8/08CRaV9e6ryicifAl8AVgObgUIgKs2BqloHfAP4Ld7mov14533NWsIEBFU97fzuAH6Btz2tXUTKAJzfHbFUPlVtV1W3qnqAf3fKHA3NQLOq7nL+fgbvB3Cs3L8pyxdD98/nFuD3qtru/B0r98/nnPLF0P17L3BcVTtVdQL4D+AKIN9pooHzpLeJVvlUtVW9xoAfEMX3n6p+T1UvVdVr8E4OrmcO77+ECAgikiUiOb7HwE14q1H+KTXuA34ZS+Xz/WM5Poi3zBGnqm3AKRFZ5Wy6Ae/s8pi4f9OVL1bun597OLc5Jibun59zyhdD9+8kcJmIZIqI8If338t4U+FAdO/fVOWr8/uwFbzt81F7/4nIAuf3EuBP8P47z/r9lxCjjESkGu+3bvA2Lzyuql8TkSLgaWAJ3n/UO1V1dqu4h7d8P8ZbXVegCfiUr80vCmXcCHwXSAUa8Y5AcRED9+885ftXYuf+ZeJN6V6tqv3Otph4/52nfLH0/vsKcBfepo63gb/A22fwJN7mmLeBP3W+jcdK+Z7Du+6LAPuAT6vqYJTK9yreJQcmgM+r6otzef8lREAwxhgzfwnRZGSMMWb+LCAYY4wBLCAYY4xxWEAwxhgDWEAwxhjjsIBgjDEGsIBgzJyJyHUicsUMx3xaRD42xfZKEYn2RDpjzhHUEprGmCldBwwCb0x3gKp+O2KlMWaeLCAYE8D5Rv+3eGfwvoN3tuc/4p0l3Q3cC2QAnwbcTqKz/6Sqr05xrS/jrDkgIpuA7wPDePPrGxNTLCAY40dE1gH/FW922i4RKcQbGC5TVRWRvwD+i6r+jbNoy2wWmPkB3sCxQ0T+3/C8AmPmzgKCMee6HnjGt7KdqvaIyEXAU04ys1Tg+GwvKiJ5QL6q7nA2/ZipUzwbEzXWqWzMuYR3593/X8C/qepFwKeA9BBd15iYYgHBmHO9CHzYyRSJ02SUh3cdYvhDOmGAASAnmIs6S372i8hVzqZ7Q1NcY0LHAoIxflT1IPA1YIeI7Af+Bfgy8DMnxXCX3+HPAh8UkX0icnUQl/8E8LCIvAmMhLbkxsyfpb82xhgDWA3BGGOMw0YZGRMCIvJfgTsDNv9MVb8WjfIYMxfWZGSMMQawJiNjjDEOCwjGGGMACwjGGGMcFhCMMcYAFhCMMcY4/n96stpJgf0UigAAAABJRU5ErkJggg==\n",
374 |       "text/plain": [
375 |        "<Figure size 432x288 with 1 Axes>"
376 |       ]
377 |      },
378 |      "metadata": {},
379 |      "output_type": "display_data"
380 |     }
381 |    ],
382 |    "source": [
383 |     "sns.distplot(all_persons.cat_id)"
384 |    ]
385 |   },
386 |   {
387 |    "cell_type": "code",
388 |    "execution_count": null,
389 |    "metadata": {},
390 |    "outputs": [],
391 |    "source": []
392 |   }
393 |  ],
394 |  "metadata": {
395 |   "kernelspec": {
396 |    "display_name": "Python 3",
397 |    "language": "python",
398 |    "name": "python3"
399 |   },
400 |   "language_info": {
401 |    "codemirror_mode": {
402 |     "name": "ipython",
403 |     "version": 3
404 |    },
405 |    "file_extension": ".py",
406 |    "mimetype": "text/x-python",
407 |    "name": "python",
408 |    "nbconvert_exporter": "python",
409 |    "pygments_lexer": "ipython3",
410 |    "version": "3.6.4"
411 |   }
412 |  },
413 |  "nbformat": 4,
414 |  "nbformat_minor": 1
415 | }
416 | 


--------------------------------------------------------------------------------
/demos/synthesize.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # coding: utf-8
 3 | 
 4 | from synthpop.recipes.starter2 import Starter
 5 | from synthpop.synthesizer import synthesize_all
 6 | import pandas as pd
 7 | import os
 8 | import sys
 9 | 
10 | state_abbr = sys.argv[1]
11 | county_name = sys.argv[2]
12 | 
13 | starter = Starter(os.environ["CENSUS"], state_abbr, county_name)
14 | 
15 | if len(sys.argv) > 3:
16 |     state, county, tract, block_group = sys.argv[3:]
17 | 
18 |     indexes = [pd.Series(
19 |         [state, county, tract, block_group],
20 |         index=["state", "county", "tract", "block group"])]
21 | else:
22 |     indexes = None
23 | 
24 | households, people, fit_quality = synthesize_all(starter, indexes=indexes)
25 | 
26 | for geo, qual in fit_quality.items():
27 |     print ('Geography: {} {} {} {}'.format(
28 |         geo.state, geo.county, geo.tract, geo.block_group))
29 |     # print '    household chisq: {}'.format(qual.household_chisq)
30 |     # print '    household p:     {}'.format(qual.household_p)
31 |     print ('    people chisq:    {}'.format(qual.people_chisq))
32 |     print ('    people p:        {}'.format(qual.people_p))
33 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
 1 | # Additional requirements for development and testing
 2 | 
 3 | # testing
 4 | coveralls
 5 | pytest
 6 | pytest-cov<2.10  # 2.10 raised errors in Travis
 7 | pycodestyle
 8 | 
 9 | # building documentation
10 | numpydoc
11 | sphinx
12 | sphinx_rtd_theme
13 | 


--------------------------------------------------------------------------------
/scripts/dl_and_slice_pums.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | import zipfile
 4 | import urllib, urllib2
 5 | import pandas as pd, numpy as np
 6 | from bs4 import BeautifulSoup
 7 | from spandex import TableLoader
 8 | 
 9 | loader = TableLoader()
10 | 
11 | soup = BeautifulSoup(urllib2.urlopen("http://www2.census.gov/acs2013_5yr/pums/"))
12 | 
13 | tags = soup.find_all(href=re.compile("csv_h..\.zip"))
14 | hpums_links = []
15 | for t in tags:
16 |     hpums_links.append(t['href'])
17 |     
18 | tags = soup.find_all(href=re.compile("csv_p..\.zip"))
19 | ppums_links = []
20 | for t in tags:
21 |     ppums_links.append(t['href'])
22 | 
23 | pums_links = hpums_links + ppums_links
24 | for pums_file in pums_links:
25 |     print pums_file
26 |     pums_file_dl = urllib.URLopener()
27 |     pums_file_dl.retrieve("http://www2.census.gov/acs2013_5yr/pums/%s" % pums_file, 
28 |                       os.path.join(loader.get_path('pums'), pums_file))
29 | 
30 | for pums_file in pums_links:
31 |     filepath = os.path.join(loader.get_path('pums'), pums_file)
32 |     
33 |     if os.path.exists(filepath):
34 |         print 'Unzipping %s' % pums_file
35 |         
36 |         with zipfile.ZipFile(filepath, "r") as z:
37 |             z.extractall(loader.get_path('pums'))
38 | 
39 | for pums_file in ['ss13husa.csv', 'ss13husb.csv', 
40 |                   'ss13husc.csv', 'ss13husd.csv',
41 |                   'ss13pusa.csv', 'ss13pusb.csv',
42 |                   'ss13pusc.csv', 'ss13pusd.csv']:
43 |     print 'Processing %s' % pums_file
44 |     pums = pd.read_csv(os.path.join(loader.get_path('pums'), pums_file))
45 | 
46 |     for state_id in np.unique(pums['ST']):
47 |         print '    Processing pums for state %s' % state_id
48 |         pum_state = pums[pums['ST'] == state_id]
49 |         state_id = '{:>02}'.format(state_id)
50 |         if pums_file[4] == 'h':
51 |             pums_state_filename = 'puma_h_%s.csv' % (state_id)
52 |         elif pums_file[4] == 'p':   
53 |             pums_state_filename = 'puma_p_%s.csv' % (state_id)
54 |         pum_state.to_csv(os.path.join(loader.get_path('pums'), pums_state_filename), index = False)
55 | 
56 |         print '        Slicing up pums files by 2000 pumas'
57 |         for puma00 in np.unique(pum_state['PUMA00']):
58 |             if puma00 != -9:
59 |                 print puma00
60 |                 df = pum_state[pum_state['PUMA00'] == puma00]
61 |                 puma00 = '{:>05}'.format(puma00)
62 |                 if pums_file[4] == 'h':
63 |                     output_filename = 'puma00_h_%s_%s.csv' % (state_id, puma00)
64 |                 elif pums_file[4] == 'p':   
65 |                     output_filename = 'puma00_p_%s_%s.csv' % (state_id, puma00)
66 |                 df.to_csv(os.path.join(loader.get_path('pums'), output_filename), index = False)
67 | 
68 |         print '        Slicing up pums files by 2010 pumas'
69 |         for puma10 in np.unique(pum_state['PUMA10']):
70 |             if puma10 != -9:
71 |                 print puma10
72 |                 df = pum_state[pum_state['PUMA10'] == puma10]
73 |                 puma10 = '{:>05}'.format(puma10)
74 |                 if pums_file[4] == 'h':
75 |                     output_filename = 'puma10_h_%s_%s.csv' % (state_id, puma10)
76 |                 elif pums_file[4] == 'p':   
77 |                     output_filename = 'puma10_p_%s_%s.csv' % (state_id, puma10)
78 |                 df.to_csv(os.path.join(loader.get_path('pums'), output_filename), index = False)


--------------------------------------------------------------------------------
/scripts/synth_example.py:
--------------------------------------------------------------------------------
 1 | from synthpop.recipes.starter2 import Starter
 2 | from synthpop.synthesizer import synthesize_all, enable_logging 
 3 | import os
 4 | 
 5 | def synthesize_county(county):
 6 |     starter = Starter(os.environ["CENSUS"], "CO", county)
 7 |     synthetic_population = synthesize_all(starter)
 8 |     return synthetic_population
 9 |     
10 | synthesize_county('Gilpin County')
11 | 


--------------------------------------------------------------------------------
/scripts/tract_to_puma00_xref.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import urllib
  3 | import zipfile
  4 | from spandex import TableLoader
  5 | from spandex.io import exec_sql
  6 | from spandex.spatialtoolz import conform_srids, tag
  7 | import pandas as pd, numpy as np
  8 | 
  9 | import pandas.io.sql as sql
 10 | def db_to_df(query):
 11 |     """Executes SQL query and returns DataFrame."""
 12 |     conn = loader.database._connection
 13 |     return sql.read_frame(query, conn)
 14 | 
 15 | loader = TableLoader()
 16 | 
 17 | # Download puma 2000 geometry zip files
 18 | for i in range(73): 
 19 |     if i < 10:
 20 |         filename = 'p50%s_d00_shp.zip' % i
 21 |     else:
 22 |         filename = 'p5%s_d00_shp.zip' % i
 23 |     
 24 |     try:
 25 |         pumageom_file = urllib.URLopener()
 26 |         pumageom_file.retrieve("http://www2.census.gov/geo/tiger/PREVGENZ/pu/p500shp/%s" % filename, 
 27 |                           os.path.join(loader.get_path('puma_geom'), filename))
 28 |         print 'Downloading %s' % filename
 29 |     except:
 30 |         continue
 31 | 
 32 | # Unzip and add prj file to puma 2000 geometry
 33 | for i in range(73): 
 34 |     if i < 10:
 35 |         filename = 'p50%s_d00_shp.zip' % i
 36 |     else:
 37 |         filename = 'p5%s_d00_shp.zip' % i
 38 |     filepath = os.path.join(loader.get_path('puma_geom'), filename)
 39 |     
 40 |     if os.path.exists(filepath):
 41 |         print 'Unzipping and adding prj to %s' % filename
 42 |         
 43 |         with zipfile.ZipFile(filepath, "r") as z:
 44 |             z.extractall(loader.get_path('puma_geom'))
 45 |             
 46 |         # PUMA 2000 shapefile doesn't come with .prj file - create one
 47 |         shape_prjname = filename[:8] + '.prj'
 48 |         prj_filepath = os.path.join(loader.get_path('puma_geom'), shape_prjname)
 49 |         text_file = open(prj_filepath, "w")
 50 |         text_file.write('GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]')
 51 |         text_file.close()
 52 | 
 53 | ##Next step- do the same for tracts
 54 | tract_file = urllib.URLopener()
 55 | tract_file.retrieve("http://www2.census.gov/geo/tiger/TIGER2010DP1/Tract_2010Census_DP1.zip",
 56 |                     os.path.join(loader.get_path('tract2010_geom'), "Tract_2010Census_DP1.zip"))
 57 | 
 58 | 
 59 | with zipfile.ZipFile(os.path.join(loader.get_path('tract2010_geom'), "Tract_2010Census_DP1.zip"), "r") as z:
 60 |     z.extractall(loader.get_path('tract2010_geom'))
 61 | 
 62 | with loader.database.cursor() as cur:
 63 |     cur.execute("""
 64 |         CREATE EXTENSION IF NOT EXISTS postgis;
 65 |         CREATE SCHEMA IF NOT EXISTS staging;
 66 |     """)
 67 | loader.database.refresh()
 68 | 
 69 | 
 70 | shapefiles = {
 71 |     'staging.tracts10':
 72 |     'tract2010_geom/Tract_2010Census_DP1.shp',
 73 | }
 74 | 
 75 | loader.load_shp_map(shapefiles)
 76 | 
 77 | 
 78 | shapefiles = {}
 79 | for i in range(73): 
 80 |     if i < 10:
 81 |         filename = 'p50%s_d00.shp' % i
 82 |     else:
 83 |         filename = 'p5%s_d00.shp' % i
 84 |     filepath = os.path.join(loader.get_path('puma_geom'), filename)
 85 |     
 86 |     if os.path.exists(filepath):
 87 |         subfile_name = filename[:-4]
 88 |         shapefiles['staging.%s' % subfile_name] = 'puma_geom/%s' % filename
 89 |         
 90 | loader.load_shp_map(shapefiles)
 91 | 
 92 | 
 93 | conform_srids(loader.srid, schema=loader.tables.staging, fix=True)
 94 | 
 95 | exec_sql("DROP table if exists staging.puma00;")
 96 | 
 97 | sql_str = ""
 98 | for i in range(73): 
 99 |     if i < 10:
100 |         filename = 'p50%s_d00.shp' % i
101 |     else:
102 |         filename = 'p5%s_d00.shp' % i
103 |     filepath = os.path.join(loader.get_path('puma_geom'), filename)
104 |     
105 |     if os.path.exists(filepath):
106 |         subfile_name = filename[:-4]
107 |         sql_str = sql_str + 'select area, perimeter, puma5, name, geom from staging.%s' % subfile_name
108 |         if i < 72:
109 |             sql_str = sql_str + ' UNION ALL '
110 |         
111 | sql_str = 'with a as (' + sql_str + ') select * into staging.puma00 from a'
112 | exec_sql(sql_str)
113 | 
114 | exec_sql('ALTER TABLE staging.puma00 ADD COLUMN gid BIGSERIAL PRIMARY KEY')
115 | 
116 | exec_sql("""
117 | CREATE INDEX puma00_gist ON staging.puma00
118 |   USING gist (geom);
119 | """)
120 | 
121 | loader.database.refresh()
122 | 
123 | # Tag tracts with a parcel_id
124 | tag(loader.tables.staging.tracts10, 'puma00_id', loader.tables.staging.puma00, 'puma5')
125 | 
126 | tract10_puma10_rel_file = urllib.URLopener()
127 | tract10_puma10_rel_file.retrieve("http://www2.census.gov/geo/docs/maps-data/data/rel/2010_Census_Tract_to_2010_PUMA.txt", 
128 |                   os.path.join(loader.get_path('tract2010_geom'), 'tract10_puma10_rel_file.csv'))
129 | 
130 | tract10_puma10_rel = pd.read_csv(os.path.join(loader.get_path('tract2010_geom'), 'tract10_puma10_rel_file.csv'), 
131 |                                  dtype={
132 |                                             "STATEFP": "object",
133 |                                             "COUNTYFP": "object",
134 |                                             "TRACTCE": "object",
135 |                                             "PUMA5CE": "object"
136 |                                         })
137 | tract10_puma00 = db_to_df('select geoid10, namelsad10, puma00_id from staging.tracts10;')
138 | 
139 | ##Need statefp/countyfp/tractce columns on tracts (split from geoid)
140 | tract10_puma00['STATEFP'] = tract10_puma00.geoid10.str.slice(0,2)
141 | tract10_puma00['COUNTYFP'] = tract10_puma00.geoid10.str.slice(2,5)
142 | tract10_puma00['TRACTCE'] = tract10_puma00.geoid10.str.slice(5,)
143 | 
144 | print len(tract10_puma00)
145 | print len(tract10_puma10_rel)
146 | 
147 | tract_puma_xref = pd.merge(tract10_puma10_rel, tract10_puma00, 
148 |                            left_on = ['STATEFP', 'COUNTYFP', 'TRACTCE'], right_on = ['STATEFP', 'COUNTYFP', 'TRACTCE'])
149 | 
150 | tract_puma_xref = tract_puma_xref.rename(columns = {'STATEFP':'statefp', 'COUNTYFP':'countyfp', 'TRACTCE':'tractce', 
151 |                                                     'PUMA5CE':'puma10_id'})
152 | 
153 | tract_puma_xref = tract_puma_xref[['statefp', 'countyfp', 'tractce', 'puma10_id', 'puma00_id']]
154 | 
155 | tract_puma_xref.to_csv('tract10_to_puma.csv', index = False)
156 | 
157 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [pycodestyle]
2 | # these are the standard ignores plus E402, E722, and E741, which weren't enforced when
3 | # the codebase was first written
4 | ignore = E121,E123,E126,E133,E226,E241,E242,E704,W503,W504,W505,E402,E722,E741
5 | max-line-length = 100
6 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name='synthpop',
 5 |     version='0.1.1',
 6 |     description='Population Synthesis',
 7 |     author='UrbanSim Inc.',
 8 |     author_email='udst@urbansim.com',
 9 |     license='BSD',
10 |     url='https://github.com/udst/synthpop',
11 |     classifiers=[
12 |         'Development Status :: 4 - Beta',
13 |         'Programming Language :: Python :: 2.7',
14 |         'Programming Language :: Python :: 3.5',
15 |         'Programming Language :: Python :: 3.6',
16 |         'Programming Language :: Python :: 3.7'
17 |     ],
18 |     packages=find_packages(exclude=['*.tests']),
19 |     install_requires=[
20 |         'census>=0.5',
21 |         'numexpr>=2.3.1',
22 |         'numpy>=1.16.5 ',
23 |         'pandas>=0.15.0',
24 |         'scipy>=0.13.3',
25 |         'us>=0.8'
26 |     ]
27 | )
28 | 


--------------------------------------------------------------------------------
/synthpop/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.1.1'
2 | version = __version__
3 | 


--------------------------------------------------------------------------------
/synthpop/categorizer.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | 
  6 | 
  7 | # TODO DOCSTRINGS!!
  8 | def categorize(df, eval_d, index_cols=None):
  9 |     cat_df = pd.DataFrame(index=df.index)
 10 | 
 11 |     for index, expr in eval_d.items():
 12 |         cat_df[index] = df.eval(expr)
 13 | 
 14 |     if index_cols is not None:
 15 |         cat_df[index_cols] = df[index_cols]
 16 |         cat_df = cat_df.set_index(index_cols)
 17 | 
 18 |     cat_df.columns = pd.MultiIndex.from_tuples(cat_df.columns,
 19 |                                                names=['cat_name', 'cat_value'])
 20 | 
 21 |     cat_df = cat_df.sort_index(axis=1)
 22 | 
 23 |     return cat_df
 24 | 
 25 | 
 26 | def sum_accross_category(df, subtract_mean=True):
 27 |     """
 28 |     This is a convenience function to sum the categorical values for each
 29 |     category - the mean across each category is then subtracted so all the
 30 |     cells in the table should be close to zero.  The reason why it's not
 31 |     exactly zero is because of rounding errors in the scaling of any tract
 32 |     variables down to block group variables
 33 | 
 34 |     """
 35 |     df = df.stack(level=1).fillna(0).groupby(level=0).sum()
 36 |     if subtract_mean:
 37 |         df = df.sub(df.mean(axis=1), axis="rows")
 38 |     return df
 39 | 
 40 | 
 41 | def category_combinations(index):
 42 |     """
 43 |     THis method converts a hierarchical multindex of category names and
 44 |     category values into the cross-product of all possible
 45 |     category combinations.
 46 |     """
 47 |     d = {}
 48 |     for cat_name, cat_value in index:
 49 |         d.setdefault(cat_name, [])
 50 |         d[cat_name].append(cat_value)
 51 |     for cat_name in list(d):
 52 |         if len(d[cat_name]) == 1:
 53 |             del d[cat_name]
 54 |     df = pd.DataFrame(list(itertools.product(*list(d.values()))))
 55 |     df.columns = cols = list(d.keys())
 56 |     df.index.name = "cat_id"
 57 |     df = df.reset_index().set_index(cols)
 58 |     return df
 59 | 
 60 | 
 61 | def joint_distribution(sample_df, category_df, mapping_functions=None):
 62 | 
 63 |     # set counts to zero
 64 |     category_df["frequency"] = 0
 65 | 
 66 |     category_names = list(category_df.index.names)
 67 |     if mapping_functions:
 68 |         for name in category_names:
 69 |             assert name in mapping_functions, "Every category needs to have " \
 70 |                                           "mapping function with the same a " \
 71 |                                           "name to define that category for " \
 72 |                                           "the pums sample records"
 73 |             sample_df[name] = sample_df.apply(mapping_functions[name],
 74 |                                               axis=1).astype('category')
 75 | 
 76 |     category_df["frequency"] = sample_df.groupby(category_names).size()
 77 |     category_df["frequency"] = category_df["frequency"].fillna(0)
 78 | 
 79 |     # do the merge to add the category id
 80 |     sample_df = pd.merge(sample_df, category_df[["cat_id"]],
 81 |                          left_on=category_names, right_index=True)
 82 | 
 83 |     return sample_df, category_df
 84 | 
 85 | 
 86 | def _frequency_table(sample_df, category_ids):
 87 |     """
 88 |     Take the result that comes out of the method above and turn it in to the
 89 |     frequencytable format used by the ipu
 90 |     """
 91 |     df = sample_df.groupby(['hh_id', 'cat_id']).size().unstack().fillna(0)
 92 | 
 93 |     # need to manually add in case we missed a whole cat_id in the sample
 94 |     missing_ids = list(set(category_ids) - set(df.columns))
 95 |     if missing_ids:
 96 |         missing_df = pd.DataFrame(
 97 |             data=np.zeros((len(df), len(missing_ids))),
 98 |             index=df.index,
 99 |             columns=missing_ids)
100 |         df = df.merge(missing_df, left_index=True, right_index=True)
101 | 
102 |     assert len(df.columns) == len(category_ids)
103 |     assert df.sum().sum() == len(sample_df)
104 | 
105 |     return df
106 | 
107 | 
108 | def frequency_tables(persons_sample_df, households_sample_df,
109 |                      person_cat_ids, household_cat_ids):
110 | 
111 |     households_sample_df.index.name = "hh_id"
112 |     households_sample_df = households_sample_df.reset_index().\
113 |         set_index("serialno")
114 | 
115 |     h_freq_table = _frequency_table(households_sample_df,
116 |                                     household_cat_ids)
117 | 
118 |     persons_sample_df = pd.merge(persons_sample_df,
119 |                                  households_sample_df[["hh_id"]],
120 |                                  left_on=["serialno"], right_index=True)
121 | 
122 |     p_freq_table = _frequency_table(persons_sample_df,
123 |                                     person_cat_ids)
124 |     p_freq_table = p_freq_table.reindex(h_freq_table.index).fillna(0)
125 |     assert len(h_freq_table) == len(p_freq_table)
126 | 
127 |     h_freq_table = h_freq_table.sort_index(axis=1)
128 |     p_freq_table = p_freq_table.sort_index(axis=1)
129 | 
130 |     return h_freq_table, p_freq_table
131 | 


--------------------------------------------------------------------------------
/synthpop/census_helpers.py:
--------------------------------------------------------------------------------
  1 | import census
  2 | import pandas as pd
  3 | import numpy as np
  4 | import us
  5 | import requests
  6 | from .config import synthpop_config, geog_changes_path
  7 | 
  8 | # code to retry when census api fails
  9 | sess = requests.Session()
 10 | adapter = requests.adapters.HTTPAdapter(max_retries=100)
 11 | sess.mount('https://', adapter)
 12 | 
 13 | # TODO DOCSTRING!!
 14 | 
 15 | 
 16 | class Census:
 17 | 
 18 |     def __init__(self, key, acsyear=2016):
 19 |         self.c = census.Census(key, session=sess)
 20 |         self.base_url = synthpop_config(acsyear).pums_storage()
 21 |         self.support_files = geog_changes_path(acsyear).geog_change_storage()
 22 |         self.acsyear_files = acsyear
 23 |         self.pums_relationship_file_url = self.support_files + "tract10_to_puma.csv"
 24 |         self.pums_relationship_df = None
 25 |         self.pums10_population_base_url = \
 26 |             self.base_url + "puma10_p_%s_%s.csv"
 27 |         self.pums10_household_base_url = \
 28 |             self.base_url + "puma10_h_%s_%s.csv"
 29 |         self.pums00_population_base_url = \
 30 |             self.base_url + "puma00_p_%s_%s.csv"
 31 |         self.pums00_household_base_url = \
 32 |             self.base_url + "puma00_h_%s_%s.csv"
 33 |         self.pums_population_state_base_url = \
 34 |             self.base_url + "puma_p_%s.csv"
 35 |         self.pums_household_state_base_url = \
 36 |             self.base_url + "puma_h_%s.csv"
 37 |         self.fips_url = self.base_url + "national_county.txt"
 38 |         self.fips_df = None
 39 |         self.pums_cache = {}
 40 | 
 41 |     # df1 is the disaggregate data frame (e.g. block groups)
 42 |     # df2 is the aggregate data frame (e.g. tracts)
 43 |     # need to scale down df2 variables to df1 level totals
 44 |     def _scale_and_merge(self, df1, tot1, df2, tot2, columns_to_scale,
 45 |                          merge_columns, suffixes):
 46 |         df = pd.merge(df1, df2, left_on=merge_columns, right_on=merge_columns,
 47 |                       suffixes=suffixes)
 48 | 
 49 |         # going to scale these too so store current values
 50 |         tot2, tot1 = df[tot2], df[tot1]
 51 |         # if agg number if 0, disaggregate should be 0
 52 |         # note this is filled by fillna below
 53 |         assert np.all(tot1[tot2 == 0] == 0)
 54 | 
 55 |         for col in columns_to_scale:
 56 |             df[col] = df[col] / tot2 * tot1
 57 |             # round?
 58 |             df[col] = df[col].fillna(0).astype('int')
 59 |         return df
 60 | 
 61 |     def block_group_query(self, census_columns, state, county, year=2016,
 62 |                           tract=None, id=None):
 63 |         if id is None:
 64 |             id = "*"
 65 |         return self._query(census_columns, state, county,
 66 |                            forstr="block group:%s" % id,
 67 |                            tract=tract, year=year)
 68 | 
 69 |     def tract_query(self, census_columns, state, county, year=2016,
 70 |                     tract=None):
 71 |         if tract is None:
 72 |             tract = "*"
 73 |         return self._query(census_columns, state, county,
 74 |                            forstr="tract:%s" % tract,
 75 |                            year=year)
 76 | 
 77 |     def _query(self, census_columns, state, county, forstr,
 78 |                year, tract=None):
 79 |         c = self.c
 80 | 
 81 |         state, county = self.try_fips_lookup(state, county)
 82 | 
 83 |         if tract is None:
 84 |             in_str = 'state:%s county:%s' % (state, county)
 85 |         else:
 86 |             in_str = 'state:%s county:%s tract:%s' % (state, county, tract)
 87 | 
 88 |         dfs = []
 89 | 
 90 |         # unfortunately the api only queries 50 columns at a time
 91 |         # leave room for a few extra id columns
 92 |         def chunks(l, n):
 93 |             """ Yield successive n-sized chunks from l.
 94 |             """
 95 |             for i in range(0, len(l), n):
 96 |                 yield l[i:i+n]
 97 | 
 98 |         for census_column_batch in chunks(census_columns, 45):
 99 |             census_column_batch = list(census_column_batch)
100 |             d = c.acs5.get(['NAME'] + census_column_batch,
101 |                            geo={'for': forstr,
102 |                                 'in': in_str}, year=year)
103 |             df = pd.DataFrame(d)
104 |             df[census_column_batch] = df[census_column_batch].astype('int')
105 |             dfs.append(df)
106 | 
107 |         assert len(dfs) >= 1
108 |         df = dfs[0]
109 |         for mdf in dfs[1:]:
110 |             df = pd.merge(df, mdf, on="NAME", suffixes=("", "_ignore"))
111 |             drop_cols = list(filter(lambda x: "_ignore" in x, df.columns))
112 |             df = df.drop(drop_cols, axis=1)
113 | 
114 |         return df
115 | 
116 |     def block_group_and_tract_query(self, block_group_columns,
117 |                                     tract_columns, state, county,
118 |                                     merge_columns, block_group_size_attr,
119 |                                     tract_size_attr, year=2016, tract=None):
120 |         df2 = self.tract_query(tract_columns, state, county, tract=tract,
121 |                                year=year)
122 |         df1 = self.block_group_query(block_group_columns, state, county,
123 |                                      tract=tract, year=year)
124 | 
125 |         df = self._scale_and_merge(df1, block_group_size_attr, df2,
126 |                                    tract_size_attr, tract_columns,
127 |                                    merge_columns, suffixes=("", "_ignore"))
128 |         drop_cols = list(filter(lambda x: "_ignore" in x, df.columns))
129 |         df = df.drop(drop_cols, axis=1)
130 | 
131 |         return df
132 | 
133 |     def update_geographies(self, df):
134 |         acsyear = self.acsyear_files
135 |         changes = pd.read_csv(self.support_files + 'geog_changes.csv',
136 |                               dtype={'new_geog': 'str', 'old_geog': 'str'})
137 |         for year in range(2011, acsyear):
138 |             year_change = changes[changes['year'] == year].copy()
139 |             import pdb
140 |             if len(year_change) > 0:
141 |                 for index, row in year_change.iterrows():
142 |                     new = row['new_geog']
143 |                     old = row['old_geog']
144 |                     state_new = new[:2]
145 |                     state_old = old[:2]
146 |                     county_new = new[2:5]
147 |                     county_old = old[2:5]
148 |                     if len(new) > 5:
149 |                         tract_new = new[5:]
150 |                         tract_old = old[5:]
151 |                         idx = df.index.max() + 1
152 |                         df.loc[idx, 'statefp'] = state_new
153 |                         df.loc[idx, 'countyfp'] = county_new
154 |                         df.loc[idx, 'tractce'] = tract_new
155 |                         old_puma10 = df[(df['statefp'] == state_old) &
156 |                                         (df['countyfp'] == county_old) &
157 |                                         (df['tractce'] == tract_old)]['puma10_id'].values[0]
158 |                         old_puma00 = df[(df['statefp'] == state_old) &
159 |                                         (df['countyfp'] == county_old) &
160 |                                         (df['tractce'] == tract_old)]['puma00_id'].values[0]
161 |                         df.loc[idx, 'puma10_id'] = old_puma10
162 |                         df.loc[idx, 'puma00_id'] = old_puma00
163 |                     else:
164 |                         df_change = df[(df['statefp'] == state_old) &
165 |                                        (df['countyfp'] == county_old)].copy()
166 |                         df_change.loc[:, 'countyfp'] = county_new
167 |                         df = pd.concat([df, df_change])
168 |         return df
169 | 
170 |     def _get_pums_relationship(self):
171 |         if self.pums_relationship_df is None:
172 |             self.pums_relationship_df = \
173 |                 pd.read_csv(self.pums_relationship_file_url, dtype={
174 |                     "statefp": "object",
175 |                     "countyfp": "object",
176 |                     "tractce": "object",
177 |                     "puma10_id": "object",
178 |                     "puma00_id": "object",
179 |                 })
180 |             self.pums_relationship_df = self.update_geographies(self.pums_relationship_df)
181 |         return self.pums_relationship_df
182 | 
183 |     def _get_fips_lookup(self):
184 |         if self.fips_df is None:
185 |             self.fips_df = pd.read_csv(
186 |                 self.fips_url,
187 |                 dtype={
188 |                     "State ANSI": "object",
189 |                     "County ANSI": "object"
190 |                 },
191 |                 index_col=["State",
192 |                            "County Name"]
193 |             )
194 |             del self.fips_df["ANSI Cl"]
195 |         return self.fips_df
196 | 
197 |     def tract_to_puma(self, state, county, tract):
198 | 
199 |         state, county = self.try_fips_lookup(state, county)
200 | 
201 |         df = self._get_pums_relationship()
202 |         q = "statefp == '%s' and countyfp == '%s' and tractce == '%s'" % (state, county, tract)
203 |         r = df.query(q)
204 |         return r["puma10_id"].values[0], r["puma00_id"].values[0]
205 | 
206 |     def _read_csv(self, loc, **kargs):
207 |         if loc not in self.pums_cache:
208 |             pums_df = pd.read_csv(loc, dtype={
209 |                 "PUMA10": "object",
210 |                 "PUMA00": "object",
211 |                 "ST": "object",
212 |                 "SERIALNO": 'str',
213 |                 "serialno": 'str',
214 |             }, **kargs)
215 |             pums_df = pums_df.rename(columns={
216 |                 'PUMA10': 'puma10',
217 |                 'PUMA00': 'puma00',
218 |                 'SERIALNO': 'serialno'
219 |             })
220 |             self.pums_cache[loc] = pums_df
221 |         return self.pums_cache[loc]
222 | 
223 |     def download_population_pums(self, state, puma10=None, puma00=None, **kargs):
224 |         state = self.try_fips_lookup(state)
225 |         if (puma10 is None) & (puma00 is None):
226 |             return self._read_csv(self.pums_population_state_base_url % (state), **kargs)
227 |         pums = self._read_csv(self.pums10_population_base_url % (state, puma10), **kargs)
228 |         if (puma00 is not None) & (self.acsyear_files < 2018):
229 |             pums00 = self._read_csv(self.pums00_population_base_url % (state, puma00), **kargs)
230 |             pums = pd.concat([pums, pums00], ignore_index=True)
231 |         return pums
232 | 
233 |     def download_household_pums(self, state, puma10=None, puma00=None, **kargs):
234 |         state = self.try_fips_lookup(state)
235 |         if (puma10 is None) & (puma00 is None):
236 |             return self._read_csv(self.pums_household_state_base_url % (state), **kargs)
237 |         pums = self._read_csv(self.pums10_household_base_url % (state, puma10), **kargs)
238 |         if (puma00 is not None) & (self.acsyear_files < 2018):
239 |             pums00 = self._read_csv(self.pums00_household_base_url % (state, puma00), **kargs)
240 |             pums = pd.concat([pums, pums00], ignore_index=True)
241 | 
242 |         # filter out gq and empty units (non-hh records)
243 |         pums = pums[(pums.RT == 'H') & (pums.NP > 0) & (pums.TYPE == 1)]
244 | 
245 |         return pums
246 | 
247 |     def try_fips_lookup(self, state, county=None):
248 |         df = self._get_fips_lookup()
249 | 
250 |         if county is None:
251 |             try:
252 |                 return getattr(us.states, state).fips
253 |             except:
254 |                 pass
255 |             return state
256 | 
257 |         try:
258 |             return df.loc[(state, county)]
259 |         except:
260 |             pass
261 |         return state, county
262 | 


--------------------------------------------------------------------------------
/synthpop/config.py:
--------------------------------------------------------------------------------
 1 | class synthpop_config:
 2 | 
 3 |     def __init__(self, acsyear=2013):
 4 |         self.acsyear = acsyear
 5 | 
 6 |     def pums_storage(self):
 7 |         if self.acsyear >= 2018:
 8 |             storage = "https://storage.googleapis.com/synthpop-public/PUMS2018/pums_2018_acs5/"
 9 |         else:
10 |             storage = "https://s3-us-west-1.amazonaws.com/synthpop-data2/"
11 |         return storage
12 | 
13 |     def __call__(self):
14 |         return self.pums_storage()
15 | 
16 | 
17 | class geog_changes_path:
18 |     def __init__(self, acsyear):
19 |         self.acsyear = acsyear
20 | 
21 |     def geog_change_storage(self):
22 |         storage = "https://storage.googleapis.com/synthpop-public/support_files/"
23 |         return storage
24 | 
25 |     def __call__(self):
26 |         return self.geog_change_storage()
27 | 


--------------------------------------------------------------------------------
/synthpop/draw.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | from scipy.stats import chisquare
  6 | 
  7 | from .ipu.ipu import _FrequencyAndConstraints
  8 | 
  9 | 
 10 | def simple_draw(num, weights, index):
 11 |     """
 12 |     Choose among indexes based on weights using a simple random draw.
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     num : int
 17 |         Number of items to draw from `index`.
 18 |     weights : array
 19 |         Array of weights corresponding to each value in `index`.
 20 |         Must be the same length as `index`.
 21 |     index : array
 22 |         Array of values from which to draw. Must be the same
 23 |         length as `weights`.
 24 | 
 25 |     Returns
 26 |     -------
 27 |     draw : array
 28 |         Array of indexes drawn based on weights.
 29 | 
 30 |     """
 31 |     p = weights / weights.sum()
 32 |     return np.random.choice(index, size=num, p=p, replace=True)
 33 | 
 34 | 
 35 | def _draw_indexes(num, fac, weights):
 36 |     """
 37 |     Construct a set of indexes that can be used to index a complete
 38 |     set of synthetic households.
 39 | 
 40 |     Parameters
 41 |     ----------
 42 |     num : int
 43 |         The total number of households to draw.
 44 |     fac : _FrequencyAndConstraints
 45 |     weights : pandas.Series
 46 | 
 47 |     Returns
 48 |     -------
 49 |     idx : pandas.Index
 50 |         Will be drawn from the index of `weights`.
 51 | 
 52 |     """
 53 |     idx = []
 54 |     constraint_diffs = []
 55 | 
 56 |     for col_name, _, constraint, nz in fac.iter_columns():
 57 |         if len(nz) == 0:
 58 |             continue
 59 | 
 60 |         flr_constraint = int(np.floor(constraint))
 61 |         constraint_diffs.append((col_name, constraint - flr_constraint))
 62 | 
 63 |         if flr_constraint > 0:
 64 |             wts = weights.values[nz]
 65 |             idx.extend(
 66 |                 simple_draw(flr_constraint, wts, weights.index.values[nz]))
 67 | 
 68 |     if len(idx) < num:
 69 |         num_to_add = num - len(idx)
 70 | 
 71 |         if num_to_add > len(weights):
 72 |             raise RuntimeError(
 73 |                 'There is a mismatch between the constraints and the total '
 74 |                 'number of households to draw. The total to draw appears '
 75 |                 'to be higher than indicated by the constraints.')
 76 | 
 77 |         constraint_diffs = sorted(
 78 |             constraint_diffs, key=lambda x: x[1], reverse=True)[:num_to_add]
 79 | 
 80 |         for col_name, _ in constraint_diffs:
 81 |             _, _, _, nz = fac.get_column(col_name)
 82 |             wts = weights.values[nz]
 83 |             idx.extend(simple_draw(1, wts, weights.index.values[nz]))
 84 | 
 85 |     return pd.Index(idx)
 86 | 
 87 | 
 88 | def execute_draw(indexes, h_pums, p_pums, hh_index_start=0):
 89 |     """
 90 |     Take new household indexes and create new household and persons tables
 91 |     with updated indexes and relations.
 92 | 
 93 |     Parameters
 94 |     ----------
 95 |     indexes : array
 96 |         Will be used to index `h_pums` into a new table.
 97 |     h_pums : pandas.DataFrame
 98 |         Table of household data. Expected to have a "serialno" column
 99 |         that matches `p_pums`.
100 |     p_pums : pandas.DataFrame
101 |         Table of person data. Expected to have a "serialno" columns
102 |         that matches `h_pums`.
103 |     hh_index_start : int, optional
104 |         The starting point for new indexes on the synthesized
105 |         households table.
106 | 
107 |     Returns
108 |     -------
109 |     synth_hh : pandas.DataFrame
110 |         Index will match the ``hh_id`` column in `synth_people`.
111 |     synth_people : pandas.DataFrame
112 |         Will be related to `synth_hh` by the ``hh_id`` column.
113 | 
114 |     """
115 |     synth_hh = h_pums.loc[indexes].reset_index(drop=True)
116 |     synth_hh.index += hh_index_start
117 | 
118 |     mrg_tbl = pd.DataFrame(
119 |         {'serialno': synth_hh.serialno.values,
120 |          'hh_id': synth_hh.index.values})
121 |     synth_people = pd.merge(
122 |         p_pums, mrg_tbl, left_on='serialno', right_on='serialno')
123 | 
124 |     return synth_hh, synth_people
125 | 
126 | 
127 | def compare_to_constraints(synth, constraints):
128 |     """
129 |     Compare the results of a synthesis draw to the target constraints.
130 | 
131 |     This comparison performs chi square test between the synthesized
132 |     category counts and the target constraints used as inputs for the IPU.
133 | 
134 |     Parameters
135 |     ----------
136 |     synth : pandas.Series
137 |         Series of category IDs from synthesized table.
138 |     constraints : pandas.Series
139 |         Target constraints used in IPU step.
140 | 
141 |     Returns
142 |     -------
143 |     chisq : float
144 |         The chi squared test statistic.
145 |     p : float
146 |         The p-value of the test.
147 | 
148 |     See Also
149 |     --------
150 |     scipy.stats.chisquare : Calculates a one-way chi square test.
151 | 
152 |     """
153 |     counts = synth.value_counts()
154 | 
155 |     # need to add zeros to counts for any categories that are
156 |     # in the constraints but not in the counts
157 |     diff = constraints.index.difference(counts.index)
158 |     counts = counts.combine_first(
159 |         pd.Series(np.zeros(len(diff), dtype='int'), index=diff))
160 | 
161 |     counts, constraints = counts.align(constraints)
162 | 
163 |     # remove any items that are zero in the constraints
164 |     w = constraints >= 1
165 |     counts, constraints = counts[w], constraints[w]
166 | 
167 |     return chisquare(counts.values, constraints.values)
168 | 
169 | 
170 | def draw_households(
171 |         num, h_pums, p_pums, household_freq, household_constraints,
172 |         person_constraints, weights, hh_index_start=0):
173 |     """
174 |     Draw households and persons according to weights from the IPU.
175 | 
176 |     Parameters
177 |     ----------
178 |     num : int
179 |         The total number of households to draw.
180 |     h_pums : pandas.DataFrame
181 |         Table of household data. Expected to have a "serialno" column
182 |         that matches `p_pums`.
183 |     p_pums : pandas.DataFrame
184 |         Table of person data. Expected to have a "serialno" columns
185 |         that matches `h_pums`.
186 |     household_freq : pandas.DataFrame
187 |         Frequency table for household attributes. Columns should be
188 |         a MultiIndex matching the index of `household_constraints` and
189 |         index should be household IDs matching the index `h_pums`
190 |         and `weights`.
191 |     household_constraints : pandas.Series
192 |         Target marginal constraints for household classes.
193 |         Index must be the same as the columns of `household_freq`.
194 |     person_constraints : pandas.Series
195 |         Target marginal constraints for person classes.
196 |         Index must be the same as the columns of `person_freq`.
197 |     weights : pandas.Series
198 |         Weights from IPU. Index should match `h_pums` and `household_freq`.
199 |     hh_index_start : int, optional
200 |         Index at which to start the indexing of returned households.
201 | 
202 |     Returns
203 |     -------
204 |     best_households : pandas.DataFrame
205 |         Index will match the ``hh_id`` column in `synth_people`.
206 |     best_people : pandas.DataFrame
207 |         Will be related to `best_households` by the ``hh_id`` column.
208 |     people_chisq : float
209 |     people_p : float
210 | 
211 |     """
212 |     if num == 0:
213 |         return (
214 |             pd.DataFrame(columns=h_pums.columns),
215 |             pd.DataFrame(columns=p_pums.columns.append(pd.Index(['hh_id']))),
216 |             0, 1)
217 | 
218 |     fac = _FrequencyAndConstraints(household_freq, household_constraints)
219 | 
220 |     best_chisq = np.inf
221 | 
222 |     for _ in range(20):
223 |         indexes = _draw_indexes(num, fac, weights)
224 |         synth_hh, synth_people = execute_draw(
225 |             indexes, h_pums, p_pums, hh_index_start=hh_index_start)
226 |         people_chisq, people_p = compare_to_constraints(
227 |             synth_people.cat_id, person_constraints)
228 | 
229 |         if people_chisq < best_chisq:
230 |             best_chisq = people_chisq
231 |             best_p = people_p
232 |             best_households, best_people = synth_hh, synth_people
233 | 
234 |     return best_households, best_people, best_chisq, best_p
235 | 


--------------------------------------------------------------------------------
/synthpop/ipf/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UDST/synthpop/6fb13991c9d3ede2d8cf80512bd1102e37b98971/synthpop/ipf/__init__.py


--------------------------------------------------------------------------------
/synthpop/ipf/ipf.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def calculate_constraints(
 6 |         marginals, joint_dist, tolerance=1e-3, max_iterations=1000):
 7 |     """
 8 |     Calculate constraints on household or person classes using
 9 |     single category marginals and the observed class proportions
10 |     in a population sample.
11 | 
12 |     Constraints are calculated via an iterative proportional fitting
13 |     procedure.
14 | 
15 |     Parameters
16 |     ----------
17 |     marginals : pandas.Series
18 |         The total count of each observed subcategory tracked.
19 |         This should have a pandas.MultiIndex with the outer level containing
20 |         high-level category descriptions and the inner level containing
21 |         the individual subcategory breakdowns.
22 |     joint_dist : pandas.Series
23 |         The observed counts of each household or person class in some sample.
24 |         The index will be a pandas.MultiIndex with a level for each observed
25 |         class in the sample. The levels should be named for ease of
26 |         introspection.
27 |     tolerance : float, optional
28 |         The condition for stopping the IPF procedure. If the change in
29 |         constraints is less than or equal to this value after an iteration
30 |         the calculations are stopped.
31 |     max_iterations : int, optional
32 |         Maximum number of iterations to do before stopping and raising
33 |         an exception.
34 | 
35 |     Returns
36 |     -------
37 |     constraints : pandas.Series
38 |         Will have the index of `joint_dist` and contain the desired
39 |         totals for each class.
40 |     iterations : int
41 |         Number of iterations performed.
42 | 
43 |     """
44 |     flat_joint_dist = joint_dist.reset_index()
45 | 
46 |     constraints = joint_dist.values.copy().astype('float')
47 |     prev_constraints = constraints.copy()
48 |     prev_constraints += tolerance  # ensure we run at least one iteration
49 | 
50 |     def calc_diff(x, y):
51 |         return np.abs(x - y).sum()
52 | 
53 |     iterations = 0
54 | 
55 |     list_of_loc = [
56 |         ((flat_joint_dist[idx[0]] == idx[1]).values, marginals[idx])
57 |         for idx in marginals.index
58 |     ]
59 | 
60 |     while calc_diff(constraints, prev_constraints) > tolerance:
61 |         prev_constraints[:] = constraints
62 | 
63 |         for loc, target in list_of_loc:
64 |             constraints[loc] *= target / constraints[loc].sum()
65 | 
66 |         iterations += 1
67 | 
68 |         if iterations > max_iterations:
69 |             raise RuntimeError(
70 |                 'Maximum number of iterations reached during IPF: {}'.format(
71 |                     max_iterations))
72 | 
73 |     return pd.Series(constraints, index=joint_dist.index), iterations
74 | 


--------------------------------------------------------------------------------
/synthpop/ipf/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UDST/synthpop/6fb13991c9d3ede2d8cf80512bd1102e37b98971/synthpop/ipf/test/__init__.py


--------------------------------------------------------------------------------
/synthpop/ipf/test/test_ipf.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import pytest
 3 | from pandas.util import testing as pdt
 4 | 
 5 | from .. import ipf
 6 | 
 7 | 
 8 | def test_trivial_ipf():
 9 |     # Test IPF in a situation where the desired totals and observed
10 |     # sample have the same proportion and there is only one super-category.
11 |     midx = pd.MultiIndex.from_product([('cat_owner',), ('yes', 'no')])
12 |     marginals = pd.Series([60, 40], index=midx)
13 |     joint_dist = pd.Series(
14 |         [6, 4], index=pd.Series(['yes', 'no'], name='cat_owner'))
15 | 
16 |     expected = pd.Series(marginals.values, index=joint_dist.index)
17 |     constraints, iterations = ipf.calculate_constraints(marginals, joint_dist)
18 | 
19 |     pdt.assert_series_equal(constraints, expected, check_dtype=False)
20 |     assert iterations == 2
21 | 
22 | 
23 | def test_larger_ipf():
24 |     # Test IPF with some data that's slightly more meaningful,
25 |     # but for which it's harder to know the actual correct answer.
26 |     marginal_midx = pd.MultiIndex.from_tuples(
27 |         [('cat_owner', 'yes'),
28 |          ('cat_owner', 'no'),
29 |          ('car_color', 'blue'),
30 |          ('car_color', 'red'),
31 |          ('car_color', 'green')])
32 |     marginals = pd.Series([60, 40, 50, 30, 20], index=marginal_midx)
33 |     joint_dist_midx = pd.MultiIndex.from_product(
34 |         [('yes', 'no'), ('blue', 'red', 'green')],
35 |         names=['cat_owner', 'car_color'])
36 |     joint_dist = pd.Series([8, 4, 2, 5, 3, 2], index=joint_dist_midx)
37 | 
38 |     expected = pd.Series(
39 |         [31.78776824, 17.77758309, 10.43464846,
40 |          18.21223176, 12.22241691, 9.56535154],
41 |         index=joint_dist.index)
42 |     constraints, _ = ipf.calculate_constraints(marginals, joint_dist)
43 | 
44 |     pdt.assert_series_equal(constraints, expected, check_dtype=False)
45 | 
46 |     with pytest.raises(RuntimeError):
47 |         ipf.calculate_constraints(marginals, joint_dist, max_iterations=2)
48 | 


--------------------------------------------------------------------------------
/synthpop/ipu/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UDST/synthpop/6fb13991c9d3ede2d8cf80512bd1102e37b98971/synthpop/ipu/__init__.py


--------------------------------------------------------------------------------
/synthpop/ipu/ipu.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | from __future__ import division
  4 | 
  5 | import itertools
  6 | from collections import OrderedDict
  7 | import warnings
  8 | 
  9 | import numpy as np
 10 | import pandas as pd
 11 | 
 12 | 
 13 | def _drop_zeros(df):
 14 |     """
 15 |     Drop zeros from a DataFrame, returning an iterator over the columns
 16 |     in the DataFrame.
 17 | 
 18 |     Yields tuples of (column name, non-zero column values, non-zero indexes).
 19 | 
 20 |     Parameters
 21 |     ----------
 22 |     df : pandas.DataFrame
 23 | 
 24 |     """
 25 |     def for_each_col(col):
 26 |         nz = col.values.nonzero()[0]
 27 |         return col.iloc[nz], nz
 28 | 
 29 |     for (col_idx, (col, nz)) in df.apply(for_each_col, axis=0, raw=False).items():
 30 |         yield (col_idx, col.values, nz)
 31 | 
 32 | 
 33 | class _FrequencyAndConstraints(object):
 34 |     """
 35 |     Wrap frequency tables and constraints for both household and
 36 |     person classes for easy iteration over all of them.
 37 | 
 38 |     Also tracks the locations of non-zero elements in each column
 39 |     of the frequency tables. If including person classes, both
 40 |     `person_freq` and `person_constraints` are required.
 41 | 
 42 |     Parameters
 43 |     ----------
 44 |     household_freq : pandas.DataFrame
 45 |         Frequency table for household attributes. Columns should be
 46 |         a MultiIndex matching the index of `household_constraints` and
 47 |         index should be household IDs matching the index of
 48 |         `person_freq`.
 49 |     household_constraints : pandas.Series
 50 |         Target marginal constraints for household classes.
 51 |         Index must be the same as the columns of `household_freq`.
 52 |     person_freq : pandas.DataFrame, optional
 53 |         Frequency table for household person. Columns should be
 54 |         a MultiIndex matching the index of `person_constraints` and
 55 |         index should be household IDs matching the index of
 56 |         `household_freq`.
 57 |     person_constraints : pandas.Series, optional
 58 |         Target marginal constraints for person classes.
 59 |         Index must be the same as the columns of `person_freq`.
 60 | 
 61 |     Attributes
 62 |     ----------
 63 |     ncols : int
 64 |         Total number household_wof columns across household and person classes.
 65 | 
 66 |     """
 67 | 
 68 |     def __init__(self, household_freq, household_constraints, person_freq=None,
 69 |                  person_constraints=None):
 70 | 
 71 |         hh_cols = ((key, col, household_constraints[key], nz)
 72 |                    for key, col, nz in _drop_zeros(household_freq))
 73 | 
 74 |         has_pers = person_freq is not None and person_constraints is not None
 75 |         if has_pers:
 76 |             p_cols = ((key, col, person_constraints[key], nz)
 77 |                       for key, col, nz in _drop_zeros(person_freq))
 78 |         else:
 79 |             p_cols = []
 80 | 
 81 |         self._everything = OrderedDict(
 82 |             (t[0], t) for t in itertools.chain(hh_cols, p_cols))
 83 |         self.ncols = len(self._everything)
 84 | 
 85 |         """
 86 |         Check for problems in the resulting keys.
 87 |         These typically arise when column names are shared accross
 88 |         households and persons.
 89 |         """
 90 |         keys = set([c[0] for c in self.iter_columns()])
 91 |         assert len(set(household_freq.columns) - keys) == 0
 92 |         if has_pers:
 93 |             assert len(set(person_freq.columns) - keys) == 0
 94 |             assert self.ncols == len(household_freq.columns) + len(person_freq.columns)
 95 | 
 96 |     def iter_columns(self):
 97 |         """
 98 |         Iterate over columns of both household and frequency tables AND
 99 |         the corresponding constraints for each column AND non-zero indexes
100 |         applicable to each column.
101 |         Yields tuples of (column name, column, constraint, nonzero).
102 |         The returned column contains only the non-zero elements.
103 | 
104 |         """
105 |         return list(self._everything.values())
106 | 
107 |     def get_column(self, key):
108 |         """
109 |         Return a specific column's info by its name.
110 | 
111 |         Parameters
112 |         ----------
113 |         key : object
114 |             Column name or tuple required to index a MultiIndex column.
115 | 
116 |         Returns
117 |         -------
118 |         col_name : object
119 |             Same as `key`.
120 |         column : pandas.Series
121 |             Has only the non-zero elements.
122 |         constraint : float
123 |             The target constraint for this type.
124 |         nonzero : array
125 |             The location of the non-zero items in the column.
126 | 
127 |         """
128 |         return self._everything[key]
129 | 
130 | 
131 | def _fit_quality(column, weights, constraint):
132 |     """
133 |     Calculate quality of fit metric for a column of the frequency table.
134 |     (The 𝛿 parameter described in the IPU paper.)
135 | 
136 |     Parameters
137 |     ----------
138 |     column : 1D array
139 |         Non-zero elements of a column of a frequency table.
140 |         Must have the same length as `weights`.
141 |     weights : 1D array
142 |         Weights corresponding to the household rows in `column`.
143 |         Must have the same length as `column`.
144 |     constraint : float
145 |         Target marginal constraint for this column.
146 | 
147 |     Returns
148 |     -------
149 |     quality : float
150 | 
151 |     """
152 |     return abs((column * weights).sum() - constraint) / constraint
153 | 
154 | 
155 | def _average_fit_quality(freq_wrap, weights):
156 |     """
157 |     Parameters
158 |     ----------
159 |     freq_wrap : `_FrequencyAndConstraints`
160 |     weights : ndarray
161 |         Array of weights for all households.
162 | 
163 |     """
164 |     return sum(
165 |         _fit_quality(col, weights[nz], constraint)
166 |         for _, col, constraint, nz in freq_wrap.iter_columns()
167 |     ) / freq_wrap.ncols
168 | 
169 | 
170 | def _update_weights(column, weights, constraint):
171 |     """
172 |     Update household weights based on a single column.
173 | 
174 |     The update will be applied to all weights, so make sure only the
175 |     non-zero elements of `column` and the corresponding weights are given.
176 | 
177 |     Parameters
178 |     ----------
179 |     column : 1D array
180 |         Non-zero elements of a column of a frequency table.
181 |         Must have the same length as `weights`.
182 |     weights : 1D array
183 |         Weights corresponding to the household rows in `column`.
184 |         Must have the same length as `column`.
185 |     constraint : float
186 |         Target marginal constraint for this column.
187 | 
188 |     Returns
189 |     -------
190 |     new_weights : ndarray
191 | 
192 |     """
193 |     adj = constraint / float((column * weights).sum())
194 |     return weights * adj
195 | 
196 | 
197 | def household_weights(
198 |         household_freq, person_freq, household_constraints,
199 |         person_constraints, geography, ignore_max_iters,
200 |         convergence=1e-4, max_iterations=20000):
201 |     """
202 |     Calculate the household weights that best match household and
203 |     person level attributes.
204 | 
205 |     Parameters
206 |     ----------
207 |     household_freq : pandas.DataFrame
208 |         Frequency table for household attributes. Columns should be
209 |         a MultiIndex matching the index of `household_constraints` and
210 |         index should be household IDs matching the index of
211 |         `person_freq`.
212 |     person_Freq : pandas.DataFrame
213 |         Frequency table for household person. Columns should be
214 |         a MultiIndex matching the index of `person_constraints` and
215 |         index should be household IDs matching the index of
216 |         `household_freq`.
217 |     household_constraints : pandas.Series
218 |         Target marginal constraints for household classes.
219 |         Index must be the same as the columns of `household_freq`.
220 |     person_constraints : pandas.Series
221 |         Target marginal constraints for person classes.
222 |         Index must be the same as the columns of `person_freq`.
223 |     convergence : float, optional
224 |         When the average fit quality metric changes by less than this value
225 |         after an iteration we declare done and send back the weights
226 |         from the best fit.
227 |     max_iterations, int, optional
228 |         Maximum number of iterations to do before stopping and raising
229 |         an exception.
230 | 
231 |     Returns
232 |     -------
233 |     weights : pandas.Series
234 |     fit_qual : float
235 |         The final average fit quality metric.
236 |     iterations : int
237 |         Number of iterations made.
238 | 
239 |     """
240 |     weights = np.ones(len(household_freq), dtype='float')
241 |     best_weights = weights.copy()
242 | 
243 |     freq_wrap = _FrequencyAndConstraints(
244 |         household_freq, household_constraints, person_freq, person_constraints)
245 | 
246 |     fit_qual = _average_fit_quality(freq_wrap, weights)
247 |     best_fit_qual = fit_qual
248 |     fit_change = np.inf
249 |     iterations = 0
250 | 
251 |     while fit_change > convergence:
252 |         for _, col, constraint, nz in freq_wrap.iter_columns():
253 |             weights[nz] = _update_weights(col, weights[nz], constraint)
254 | 
255 |         new_fit_qual = _average_fit_quality(freq_wrap, weights)
256 |         fit_change = abs(new_fit_qual - fit_qual)
257 | 
258 |         if new_fit_qual < fit_qual:
259 |             best_fit_qual = new_fit_qual
260 |             best_weights = weights.copy()
261 | 
262 |         fit_qual = new_fit_qual
263 |         iterations += 1
264 | 
265 |         if iterations > max_iterations:
266 |             if ignore_max_iters:
267 |                 fitting_tolerance = fit_change - convergence
268 |                 print('Fitting tolerance before 20000 iterations: %s' % str(fitting_tolerance))
269 |                 ipu_dict = {'best_fit_qual': best_fit_qual,
270 |                             'fit_change': fit_change,
271 |                             'fitting_tolerance': fitting_tolerance,
272 |                             'geog_id': geography}
273 |                 if isinstance(geography, pd.Series):
274 |                     state, county = geography['state'], geography['county']
275 |                     tract, bgroup = geography['tract'], geography['block group']
276 |                     np.save('max_iter_{}_{}_{}_{}.npy'.format(state, county,
277 |                                                               tract, bgroup), ipu_dict)
278 |                 elif isinstance(geography, list):
279 |                     np.save('max_iter_{}_{}.npy'.format(geography[0], geography[1]), ipu_dict)
280 |                 else:
281 |                     np.save('max_iter_{}.npy'.format(str(geography)), ipu_dict)
282 | 
283 |                 warnings.warn(
284 |                     'Maximum number of iterations reached '
285 |                     'during IPU: {}'.format(max_iterations), UserWarning)
286 |                 return (
287 |                     pd.Series(best_weights, index=household_freq.index),
288 |                     best_fit_qual, iterations)
289 |             else:
290 |                 raise RuntimeError(
291 |                     'Maximum number of iterations reached '
292 |                     'during IPU: {}'.format(max_iterations))
293 | 
294 |     return (
295 |         pd.Series(best_weights, index=household_freq.index),
296 |         best_fit_qual, iterations)
297 | 


--------------------------------------------------------------------------------
/synthpop/ipu/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UDST/synthpop/6fb13991c9d3ede2d8cf80512bd1102e37b98971/synthpop/ipu/test/__init__.py


--------------------------------------------------------------------------------
/synthpop/ipu/test/test_ipu.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import numpy.testing as npt
  3 | import pandas as pd
  4 | import random
  5 | import pytest
  6 | from pandas.util import testing as pdt
  7 | 
  8 | from .. import ipu
  9 | 
 10 | 
 11 | @pytest.fixture(scope='module')
 12 | def household_columns():
 13 |     return pd.MultiIndex.from_product(
 14 |         [('yes',), ('blue', 'red')],
 15 |         names=['cat_owner', 'car_color'])
 16 | 
 17 | 
 18 | @pytest.fixture(scope='module')
 19 | def person_columns():
 20 |     return pd.MultiIndex.from_product(
 21 |         [(7, 8, 9), ('pink',)], names=['shoe_size', 'shirt_color'])
 22 | 
 23 | 
 24 | @pytest.fixture(scope='module')
 25 | def household_freqs(household_columns):
 26 |     return pd.DataFrame(
 27 |         [(1, 0),
 28 |          (1, 0),
 29 |          (1, 0),
 30 |          (0, 1),
 31 |          (0, 1),
 32 |          (0, 1),
 33 |          (0, 1),
 34 |          (0, 1)],
 35 |         index=range(1, 9),
 36 |         columns=household_columns)
 37 | 
 38 | 
 39 | @pytest.fixture(scope='module')
 40 | def person_freqs(person_columns):
 41 |     return pd.DataFrame(
 42 |         [(1, 1, 1),
 43 |          (1, 0, 1),
 44 |          (2, 1, 0),
 45 |          (1, 0, 2),
 46 |          (0, 2, 1),
 47 |          (1, 1, 0),
 48 |          (2, 1, 2),
 49 |          (1, 1, 0)],
 50 |         index=range(1, 9),
 51 |         columns=person_columns)
 52 | 
 53 | 
 54 | @pytest.fixture(scope='module')
 55 | def household_constraints(household_columns):
 56 |     return pd.Series([35, 65], index=household_columns)
 57 | 
 58 | 
 59 | @pytest.fixture(scope='module')
 60 | def person_constraints(person_columns):
 61 |     return pd.Series([91, 65, 104], index=person_columns)
 62 | 
 63 | 
 64 | @pytest.fixture(scope='module')
 65 | def geography():
 66 |     dtypes = ['serie', 'list']
 67 |     dtype = random.choice(dtypes)
 68 | 
 69 |     if dtype == 'serie':
 70 |         geography = pd.Series({'state': '02',
 71 |                                'county': '270',
 72 |                                'tract': '000100',
 73 |                                'block group': '1'})
 74 |     else:
 75 |         geography = ['02', '270']
 76 | 
 77 |     return geography
 78 | 
 79 | 
 80 | @pytest.fixture
 81 | def freq_wrap(
 82 |         household_freqs, person_freqs, household_constraints,
 83 |         person_constraints):
 84 |     return ipu._FrequencyAndConstraints(
 85 |         household_freqs, household_constraints, person_freqs,
 86 |         person_constraints)
 87 | 
 88 | 
 89 | def test_drop_zeros_households(household_freqs):
 90 |     df = list(ipu._drop_zeros(household_freqs))
 91 | 
 92 |     assert len(df) == 2
 93 |     assert df[0][0] == ('yes', 'blue')
 94 |     npt.assert_array_equal(df[0][1], [1, 1, 1])
 95 |     npt.assert_array_equal(df[0][2], [0, 1, 2])
 96 |     assert df[1][0] == ('yes', 'red')
 97 |     npt.assert_array_equal(df[1][1], [1, 1, 1, 1, 1])
 98 |     npt.assert_array_equal(df[1][2], [3, 4, 5, 6, 7])
 99 | 
100 | 
101 | def test_drop_zeros_person(person_freqs):
102 |     df = list(ipu._drop_zeros(person_freqs))
103 | 
104 |     assert len(df) == 3
105 |     assert df[0][0] == (7, 'pink')
106 |     npt.assert_array_equal(df[0][1], [1, 1, 2, 1, 1, 2, 1])
107 |     npt.assert_array_equal(df[0][2], [0, 1, 2, 3, 5, 6, 7])
108 | 
109 | 
110 | def test_fit_quality(
111 |         household_freqs, person_freqs, household_constraints,
112 |         person_constraints):
113 |     weights = np.ones(len(household_freqs), dtype='float')
114 |     column = household_freqs[('yes', 'blue')]
115 |     constraint = household_constraints[('yes', 'blue')]
116 | 
117 |     npt.assert_allclose(
118 |         ipu._fit_quality(column, weights, constraint), 0.9143,
119 |         atol=0.0001)
120 | 
121 |     weights = np.array([12.37, 14.61, 8.05, 16.28, 16.91, 8.97, 13.78, 8.97])
122 |     column = person_freqs[(8, 'pink')]
123 |     constraint = person_constraints[(8, 'pink')]
124 | 
125 |     npt.assert_allclose(
126 |         ipu._fit_quality(column, weights, constraint), 0.3222,
127 |         atol=0.0003)
128 | 
129 | 
130 | def test_average_fit_quality(household_freqs, freq_wrap):
131 |     weights = np.ones(len(household_freqs), dtype='float')
132 |     npt.assert_allclose(
133 |         ipu._average_fit_quality(freq_wrap, weights),
134 |         0.9127,
135 |         atol=0.0001)
136 | 
137 |     weights = np.array([12.37, 14.61, 8.05, 16.28, 16.91, 8.97, 13.78, 8.97])
138 |     npt.assert_allclose(
139 |         ipu._average_fit_quality(freq_wrap, weights),
140 |         0.0954,
141 |         atol=0.0001)
142 | 
143 | 
144 | def test_update_weights(
145 |         household_freqs, person_freqs, household_constraints,
146 |         person_constraints):
147 |     column = household_freqs[('yes', 'blue')]
148 |     column = column.iloc[column.values.nonzero()[0]]
149 |     constraint = household_constraints[('yes', 'blue')]
150 |     weights = pd.Series(
151 |         np.ones(len(column)),
152 |         index=column.index)
153 | 
154 |     npt.assert_allclose(
155 |         ipu._update_weights(column, weights, constraint),
156 |         [11.67, 11.67, 11.67],
157 |         atol=0.01)
158 | 
159 |     column = person_freqs[(9, 'pink')]
160 |     column = column.iloc[column.values.nonzero()[0]]
161 |     constraint = person_constraints[(9, 'pink')]
162 |     weights = pd.Series(
163 |         [8.05, 9.51, 8.05, 10.59, 11.0, 8.97, 8.97, 8.97],
164 |         index=range(1, 9)).loc[column.index]
165 | 
166 |     npt.assert_allclose(
167 |         ipu._update_weights(column, weights, constraint),
168 |         [12.37, 14.61, 16.28, 16.91, 13.78],
169 |         atol=0.01)
170 | 
171 | 
172 | def test_household_weights(
173 |         household_freqs, person_freqs, household_constraints,
174 |         person_constraints, geography, ignore_max_iters=False):
175 |     weights, fit_qual, iterations = ipu.household_weights(
176 |         household_freqs, person_freqs, household_constraints,
177 |         person_constraints, geography, ignore_max_iters, convergence=1e-7)
178 |     npt.assert_allclose(
179 |         weights.values,
180 |         [1.36, 25.66, 7.98, 27.79, 18.45, 8.64, 1.47, 8.64],
181 |         atol=0.02)
182 |     npt.assert_allclose(fit_qual, 8.51e-6, atol=1e-8)
183 |     npt.assert_allclose(iterations, 637, atol=5)
184 | 
185 | 
186 | def test_household_weights_max_iter(
187 |         household_freqs, person_freqs, household_constraints,
188 |         person_constraints, geography, ignore_max_iters=False):
189 |     with pytest.raises(RuntimeError):
190 |         ipu.household_weights(
191 |             household_freqs, person_freqs, household_constraints,
192 |             person_constraints, geography, ignore_max_iters, convergence=1e-7, max_iterations=10)
193 | 
194 | 
195 | def test_FrequencyAndConstraints(freq_wrap):
196 |     assert freq_wrap.ncols == 5
197 |     assert len(list(freq_wrap.iter_columns())) == 5
198 | 
199 |     iter_cols = iter(freq_wrap.iter_columns())
200 | 
201 |     key, col, constraint, nz = next(iter_cols)
202 |     assert key == ('yes', 'blue')
203 |     npt.assert_array_equal(col, [1, 1, 1])
204 |     assert constraint == 35
205 |     npt.assert_array_equal(nz, [0, 1, 2])
206 | 
207 |     key, col, constraint, nz = next(iter_cols)
208 |     assert key == ('yes', 'red')
209 |     npt.assert_array_equal(col, [1, 1, 1, 1, 1])
210 |     assert constraint == 65
211 |     npt.assert_array_equal(nz, [3, 4, 5, 6, 7])
212 | 
213 |     # should be into person cols now
214 |     key, col, constraint, nz = next(iter_cols)
215 |     assert key == (7, 'pink')
216 |     npt.assert_array_equal(col, [1, 1, 2, 1, 1, 2, 1])
217 |     assert constraint == 91
218 |     npt.assert_array_equal(nz, [0, 1, 2, 3, 5, 6, 7])
219 | 
220 |     # test getting a column by name
221 |     key, col, constraint, nz = freq_wrap.get_column((7, 'pink'))
222 |     assert key == (7, 'pink')
223 |     npt.assert_array_equal(col, [1, 1, 2, 1, 1, 2, 1])
224 |     assert constraint == 91
225 |     npt.assert_array_equal(nz, [0, 1, 2, 3, 5, 6, 7])
226 | 


--------------------------------------------------------------------------------
/synthpop/recipes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UDST/synthpop/6fb13991c9d3ede2d8cf80512bd1102e37b98971/synthpop/recipes/__init__.py


--------------------------------------------------------------------------------
/synthpop/recipes/starter.py:
--------------------------------------------------------------------------------
  1 | from .. import categorizer as cat
  2 | from ..census_helpers import Census
  3 | import pandas as pd
  4 | import numpy as np
  5 | 
  6 | 
  7 | # TODO DOCSTRINGS!!
  8 | class Starter:
  9 |     """
 10 |     This is a recipe for getting the marginals and joint distributions to use
 11 |     to pass to the synthesizer using simple categories - population, age,
 12 |     race, and sex for people, and children, income, cars, and workers for
 13 |     households.  This module is responsible for
 14 | 
 15 |     Parameters
 16 |     ----------
 17 |     c : object
 18 |         census_helpers.Census object
 19 |     state : string
 20 |         FIPS code the state
 21 |     county : string
 22 |         FIPS code for the county
 23 |     tract : string, optional
 24 |         FIPS code for a specific track or None for all tracts in the county
 25 | 
 26 |     Returns
 27 |     -------
 28 |     household_marginals : DataFrame
 29 |         Marginals per block group for the household data (from ACS)
 30 |     person_marginals : DataFrame
 31 |         Marginals per block group for the person data (from ACS)
 32 |     household_jointdist : DataFrame
 33 |         joint distributions for the households (from PUMS), one joint
 34 |         distribution for each PUMA (one row per PUMA)
 35 |     person_jointdist : DataFrame
 36 |         joint distributions for the persons (from PUMS), one joint
 37 |         distribution for each PUMA (one row per PUMA)
 38 |     tract_to_puma_map : dictionary
 39 |         keys are tract ids and pumas are puma ids
 40 |     """
 41 | 
 42 |     def __init__(self, key, state, county, tract=None, acsyear=2016):
 43 |         self.c = c = Census(key, acsyear)
 44 |         self.state = state
 45 |         self.county = county
 46 |         self.tract = tract
 47 |         self.acsyear = acsyear
 48 | 
 49 |         income_columns = ['B19001_0%02dE' % i for i in range(1, 18)]
 50 |         vehicle_columns = ['B08201_0%02dE' % i for i in range(1, 7)]
 51 |         workers_columns = ['B08202_0%02dE' % i for i in range(1, 6)]
 52 |         families_columns = ['B11001_001E', 'B11001_002E']
 53 |         block_group_columns = income_columns + families_columns
 54 |         tract_columns = vehicle_columns + workers_columns
 55 |         h_acs = c.block_group_and_tract_query(
 56 |             block_group_columns, tract_columns, state, county,
 57 |             merge_columns=['tract', 'county', 'state'],
 58 |             block_group_size_attr="B11001_001E",
 59 |             tract_size_attr="B08201_001E",
 60 |             tract=tract, year=acsyear)
 61 |         self.h_acs = h_acs
 62 | 
 63 |         self.h_acs_cat = cat.categorize(h_acs, {
 64 |             ("children", "yes"): "B11001_002E",
 65 |             ("children", "no"): "B11001_001E - B11001_002E",
 66 |             ("income", "lt35"): "B19001_002E + B19001_003E + B19001_004E + "
 67 |                                 "B19001_005E + B19001_006E + B19001_007E",
 68 |             ("income", "gt35-lt100"): "B19001_008E + B19001_009E + "
 69 |                                       "B19001_010E + B19001_011E + B19001_012E"
 70 |                                       "+ B19001_013E",
 71 |             ("income", "gt100"): "B19001_014E + B19001_015E + B19001_016E"
 72 |                                  "+ B19001_017E",
 73 |             ("cars", "none"): "B08201_002E",
 74 |             ("cars", "one"): "B08201_003E",
 75 |             ("cars", "two or more"): "B08201_004E + B08201_005E + B08201_006E",
 76 |             ("workers", "none"): "B08202_002E",
 77 |             ("workers", "one"): "B08202_003E",
 78 |             ("workers", "two or more"): "B08202_004E + B08202_005E"
 79 |         }, index_cols=['state', 'county', 'tract', 'block group'])
 80 | 
 81 |         population = ['B01001_001E']
 82 |         sex = ['B01001_002E', 'B01001_026E']
 83 |         race = ['B02001_0%02dE' % i for i in range(1, 11)]
 84 |         male_age_columns = ['B01001_0%02dE' % i for i in range(3, 26)]
 85 |         female_age_columns = ['B01001_0%02dE' % i for i in range(27, 50)]
 86 |         all_columns = population + sex + race + male_age_columns + \
 87 |             female_age_columns
 88 |         p_acs = c.block_group_query(all_columns, state, county, tract=tract, year=acsyear)
 89 |         self.p_acs = p_acs
 90 |         self.p_acs_cat = cat.categorize(p_acs, {
 91 |             ("age", "19 and under"): (
 92 |                 "B01001_003E + B01001_004E + B01001_005E + "
 93 |                 "B01001_006E + B01001_007E + B01001_027E + "
 94 |                 "B01001_028E + B01001_029E + B01001_030E + "
 95 |                 "B01001_031E"),
 96 |             ("age", "20 to 35"): "B01001_008E + B01001_009E + B01001_010E + "
 97 |                                  "B01001_011E + B01001_012E + B01001_032E + "
 98 |                                  "B01001_033E + B01001_034E + B01001_035E + "
 99 |                                  "B01001_036E",
100 |             ("age", "35 to 60"): "B01001_013E + B01001_014E + B01001_015E + "
101 |                                  "B01001_016E + B01001_017E + B01001_037E + "
102 |                                  "B01001_038E + B01001_039E + B01001_040E + "
103 |                                  "B01001_041E",
104 |             ("age", "above 60"): "B01001_018E + B01001_019E + B01001_020E + "
105 |                                  "B01001_021E + B01001_022E + B01001_023E + "
106 |                                  "B01001_024E + B01001_025E + B01001_042E + "
107 |                                  "B01001_043E + B01001_044E + B01001_045E + "
108 |                                  "B01001_046E + B01001_047E + B01001_048E + "
109 |                                  "B01001_049E",
110 |             ("race", "white"):   "B02001_002E",
111 |             ("race", "black"):   "B02001_003E",
112 |             ("race", "asian"):   "B02001_005E",
113 |             ("race", "other"):   "B02001_004E + B02001_006E + B02001_007E + "
114 |                                  "B02001_008E",
115 |             ("sex", "male"):     "B01001_002E",
116 |             ("sex", "female"):   "B01001_026E"
117 |         }, index_cols=['state', 'county', 'tract', 'block group'])
118 | 
119 |         # Put the needed PUMS variables here.  These are also the PUMS variables
120 |         # that will be in the outputted synthetic population
121 |         self.h_pums_cols = ('serialno', 'PUMA10', 'RT', 'NP',
122 |                             'TYPE', 'VEH', 'WIF', 'NOC', 'FINCP')
123 |         self.p_pums_cols = ('serialno', 'PUMA10', 'AGEP', 'RAC1P', 'SEX')
124 | 
125 |         if self.acsyear < 2018:
126 |             self.h_pums_cols = list(self.h_pums_cols)
127 |             self.h_pums_cols.insert(1, 'PUMA00')
128 |             self.h_pums_cols = tuple(self.h_pums_cols)
129 |             self.p_pums_cols = list(self.p_pums_cols)
130 |             self.p_pums_cols.insert(1, 'PUMA00')
131 |             self.p_pums_cols = tuple(self.p_pums_cols)
132 | 
133 |     def get_geography_name(self):
134 |         # this synthesis is at the block group level for most variables
135 |         return "block_group"
136 | 
137 |     def get_num_geographies(self):
138 |         return len(self.p_acs_cat)
139 | 
140 |     def get_available_geography_ids(self):
141 |         # return the ids of the geographies, in this case a state, county,
142 |         # tract, block_group id tuple
143 |         for tup in self.p_acs_cat.index:
144 |             yield pd.Series(tup, index=self.p_acs_cat.index.names)
145 | 
146 |     def get_household_marginal_for_geography(self, ind):
147 |         return self.h_acs_cat.loc[tuple(ind.values)]
148 | 
149 |     def get_person_marginal_for_geography(self, ind):
150 |         return self.p_acs_cat.loc[tuple(ind.values)]
151 | 
152 |     def get_household_joint_dist_for_geography(self, ind):
153 |         c = self.c
154 | 
155 |         puma10, puma00 = c.tract_to_puma(ind.state, ind.county, ind.tract)
156 |         # this is cached so won't download more than once
157 |         if type(puma00) == str:
158 |             h_pums = self.c.download_household_pums(ind.state, puma10, puma00,
159 |                                                     usecols=self.h_pums_cols)
160 |         elif np.isnan(puma00):  # only puma10 available
161 |             h_pums = self.c.download_household_pums(ind.state, puma10, None,
162 |                                                     usecols=self.h_pums_cols)
163 | 
164 |         def cars_cat(r):
165 |             if r.VEH == 0:
166 |                 return "none"
167 |             elif r.VEH == 1:
168 |                 return "one"
169 |             return "two or more"
170 | 
171 |         def children_cat(r):
172 |             if r.NOC > 0:
173 |                 return "yes"
174 |             return "no"
175 | 
176 |         def income_cat(r):
177 |             if r.FINCP > 100000:
178 |                 return "gt100"
179 |             elif r.FINCP > 35000:
180 |                 return "gt35-lt100"
181 |             return "lt35"
182 | 
183 |         def workers_cat(r):
184 |             if r.WIF == 3:
185 |                 return "two or more"
186 |             elif r.WIF == 2:
187 |                 return "two or more"
188 |             elif r.WIF == 1:
189 |                 return "one"
190 |             return "none"
191 | 
192 |         h_pums, jd_households = cat.joint_distribution(
193 |             h_pums,
194 |             cat.category_combinations(self.h_acs_cat.columns),
195 |             {"cars": cars_cat, "children": children_cat,
196 |              "income": income_cat, "workers": workers_cat}
197 |         )
198 |         return h_pums, jd_households
199 | 
200 |     def get_person_joint_dist_for_geography(self, ind):
201 |         c = self.c
202 | 
203 |         puma10, puma00 = c.tract_to_puma(ind.state, ind.county, ind.tract)
204 |         # this is cached so won't download more than once
205 |         if type(puma00) == str:
206 |             p_pums = self.c.download_population_pums(ind.state, puma10, puma00,
207 |                                                      usecols=self.p_pums_cols)
208 |         elif np.isnan(puma00):  # only puma10 available
209 |             p_pums = self.c.download_population_pums(ind.state, puma10, None,
210 |                                                      usecols=self.p_pums_cols)
211 | 
212 |         def age_cat(r):
213 |             if r.AGEP <= 19:
214 |                 return "19 and under"
215 |             elif r.AGEP <= 35:
216 |                 return "20 to 35"
217 |             elif r.AGEP <= 60:
218 |                 return "35 to 60"
219 |             return "above 60"
220 | 
221 |         def race_cat(r):
222 |             if r.RAC1P == 1:
223 |                 return "white"
224 |             elif r.RAC1P == 2:
225 |                 return "black"
226 |             elif r.RAC1P == 6:
227 |                 return "asian"
228 |             return "other"
229 | 
230 |         def sex_cat(r):
231 |             if r.SEX == 1:
232 |                 return "male"
233 |             return "female"
234 | 
235 |         p_pums, jd_persons = cat.joint_distribution(
236 |             p_pums,
237 |             cat.category_combinations(self.p_acs_cat.columns),
238 |             {"age": age_cat, "race": race_cat, "sex": sex_cat}
239 |         )
240 |         return p_pums, jd_persons
241 | 


--------------------------------------------------------------------------------
/synthpop/recipes/starter2.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | 
  4 | from .. import categorizer as cat
  5 | from ..census_helpers import Census
  6 | 
  7 | 
  8 | # TODO DOCSTRINGS!!
  9 | class Starter:
 10 |     """
 11 |     This is a recipe for getting the marginals and joint distributions to use
 12 |     to pass to the synthesizer using simple categories - population, age,
 13 |     race, and sex for people, and children, income, cars, and workers for
 14 |     households.  This module is responsible for
 15 | 
 16 |     Parameters
 17 |     ----------
 18 |     c : object
 19 |         census_helpers.Census object
 20 |     state : string
 21 |         FIPS code the state
 22 |     county : string
 23 |         FIPS code for the county
 24 |     tract : string, optional
 25 |         FIPS code for a specific track or None for all tracts in the county
 26 |     acsyear : integer, optional
 27 |         Final year in the 5-year estimates ACS dataset.
 28 |         Default: 2016, which corresponds to 2011-2016 ACS dataset
 29 | 
 30 |     Returns
 31 |     -------
 32 |     household_marginals : DataFrame
 33 |         Marginals per block group for the household data (from ACS 5-year estimates)
 34 |     person_marginals : DataFrame
 35 |         Marginals per block group for the person data (from ACS 5-year estimates)
 36 |     household_jointdist : DataFrame
 37 |         joint distributions for the households (from PUMS 2010-2000), one joint
 38 |         distribution for each PUMA (one row per PUMA)
 39 |     person_jointdist : DataFrame
 40 |         joint distributions for the persons (from PUMS 2010-2000), one joint
 41 |         distribution for each PUMA (one row per PUMA)
 42 |     tract_to_puma_map : dictionary
 43 |         keys are tract ids and pumas are puma ids
 44 |     """
 45 | 
 46 |     def __init__(self, key, state, county, tract=None, acsyear=2016):
 47 |         self.c = c = Census(key, acsyear)
 48 |         self.state = state
 49 |         self.county = county
 50 |         self.tract = tract
 51 |         self.acsyear = acsyear
 52 | 
 53 |         structure_size_columns = ['B25032_0%02dE' % i for i in range(1, 24)]
 54 |         age_of_head_columns = ['B25007_0%02dE' % i for i in range(1, 22)]
 55 |         race_of_head_columns = ['B25006_0%02dE' % i for i in range(1, 11)]
 56 |         hispanic_head_columns = ['B25003I_0%02dE' % i for i in range(1, 4)]
 57 |         hh_size_columns = ['B25009_0%02dE' % i for i in range(1, 18)]
 58 |         income_columns = ['B19001_0%02dE' % i for i in range(1, 18)]
 59 |         vehicle_columns = ['B08201_0%02dE' % i for i in range(1, 7)]
 60 |         workers_columns = ['B08202_0%02dE' % i for i in range(1, 6)]
 61 |         presence_of_children_columns = ['B11005_001E', 'B11005_002E', 'B11005_011E']
 62 |         presence_of_seniors_columns = ['B11007_002E', 'B11007_007E']
 63 |         tenure_mover_columns = ['B25038_0%02dE' % i for i in range(1, 16)]
 64 |         block_group_columns = (
 65 |             income_columns + presence_of_children_columns +
 66 |             presence_of_seniors_columns + tenure_mover_columns +
 67 |             hh_size_columns + age_of_head_columns + structure_size_columns +
 68 |             race_of_head_columns + hispanic_head_columns)
 69 |         tract_columns = vehicle_columns + workers_columns
 70 |         h_acs = c.block_group_and_tract_query(
 71 |             block_group_columns,
 72 |             tract_columns, state, county,
 73 |             merge_columns=['tract', 'county', 'state'],
 74 |             block_group_size_attr="B11005_001E",
 75 |             tract_size_attr="B08201_001E",
 76 |             tract=tract, year=acsyear)
 77 |         self.h_acs = h_acs
 78 | 
 79 |         self.h_acs_cat = cat.categorize(h_acs, {
 80 |             ("sf_detached", "yes"): "B25032_003E + B25032_014E",
 81 |             ("sf_detached", "no"): "B25032_001E - B25032_003E - B25032_014E",
 82 |             ("hh_age_of_head", "lt35"):
 83 |                 "B25007_003E + B25007_004E + B25007_013E + B25007_014E",
 84 |             ("hh_age_of_head", "gt35-lt65"):
 85 |                 "B25007_005E + B25007_006E + B25007_007E + B25007_008E + "
 86 |                 "B25007_015E + B25007_016E + B25007_017E + B25007_018E",
 87 |             ("hh_age_of_head", "gt65"):
 88 |                 "B25007_009E + B25007_010E + B25007_011E + "
 89 |                 "B25007_019E + B25007_020E + B25007_021E",
 90 |             ("hh_race_of_head", "black"): "B25006_003E",
 91 |             ("hh_race_of_head", "white"): "B25006_002E",
 92 |             ("hh_race_of_head", "asian"): "B25006_005E",
 93 |             ("hh_race_of_head", "other"):
 94 |                 "B25006_004E + B25006_006E + B25006_007E + B25006_008E ",
 95 |             ("hispanic_head", "yes"): "B25003I_001E",
 96 |             ("hispanic_head", "no"): "B11005_001E - B25003I_001E",
 97 |             ("hh_children", "yes"): "B11005_002E",
 98 |             ("hh_children", "no"): "B11005_011E",
 99 |             ("seniors", "yes"): "B11007_002E",
100 |             ("seniors", "no"): "B11007_007E",
101 |             ("hh_income", "lt30"):
102 |                 "B19001_002E + B19001_003E + B19001_004E + "
103 |                 "B19001_005E + B19001_006E",
104 |             ("hh_income", "gt30-lt60"):
105 |                 "B19001_007E + B19001_008E + B19001_009E + "
106 |                 "B19001_010E + B19001_011E",
107 |             ("hh_income", "gt60-lt100"): "B19001_012E + B19001_013E",
108 |             ("hh_income", "gt100-lt150"): "B19001_014E + B19001_015E",
109 |             ("hh_income", "gt150"): "B19001_016E + B19001_017E",
110 |             ("hh_cars", "none"): "B08201_002E",
111 |             ("hh_cars", "one"): "B08201_003E",
112 |             ("hh_cars", "two or more"):
113 |                 "B08201_004E + B08201_005E + B08201_006E",
114 |             ("hh_workers", "none"): "B08202_002E",
115 |             ("hh_workers", "one"): "B08202_003E",
116 |             ("hh_workers", "two or more"): "B08202_004E + B08202_005E",
117 |             ("tenure_mover", "own recent"): "B25038_003E",
118 |             ("tenure_mover", "own not recent"): "B25038_002E - B25038_003E",
119 |             ("tenure_mover", "rent recent"): "B25038_010E",
120 |             ("tenure_mover", "rent not recent"): "B25038_009E - B25038_010E",
121 |             ("hh_size", "one"): "B25009_003E + B25009_011E",
122 |             ("hh_size", "two"): "B25009_004E + B25009_012E",
123 |             ("hh_size", "three"): "B25009_005E + B25009_013E",
124 |             ("hh_size", "four or more"): "B25009_006E + B25009_014E + "
125 |                                          "B25009_007E + B25009_015E + "
126 |                                          "B25009_008E + B25009_016E + "
127 |                                          "B25009_009E + B25009_017E",
128 |         }, index_cols=['state', 'county', 'tract', 'block group'])
129 | 
130 |         # gq_population = ['B26001_001E']
131 |         # HH population, for the hhpop/totalpop adjustment
132 |         hh_population = ['B11002_001E']
133 |         population = ['B01001_001E']  # This includes GQ
134 |         hispanic = ['B03003_002E', 'B03003_003E']
135 |         sex = ['B01001_002E', 'B01001_026E']
136 |         race = ['B02001_0%02dE' % i for i in range(1, 11)]
137 |         male_age_columns = ['B01001_0%02dE' % i for i in range(3, 26)]
138 |         female_age_columns = ['B01001_0%02dE' % i for i in range(27, 50)]
139 |         all_columns = population + sex + race + male_age_columns + \
140 |             female_age_columns + hh_population + hispanic
141 |         p_acs = c.block_group_query(all_columns, state, county, tract=tract, year=acsyear)
142 |         self.p_acs = p_acs
143 |         self.p_acs_cat = cat.categorize(p_acs, {
144 |             ("person_age", "19 and under"):
145 |                 "(B01001_003E + B01001_004E + B01001_005E + "
146 |                 "B01001_006E + B01001_007E + B01001_027E + "
147 |                 "B01001_028E + B01001_029E + B01001_030E + "
148 |                 "B01001_031E) * B11002_001E*1.0/B01001_001E",
149 |             ("person_age", "20 to 35"):
150 |                 "(B01001_008E + B01001_009E + B01001_010E + "
151 |                 "B01001_011E + B01001_012E + B01001_032E + "
152 |                 "B01001_033E + B01001_034E + B01001_035E + "
153 |                 "B01001_036E) * B11002_001E*1.0/B01001_001E",
154 |             ("person_age", "35 to 60"):
155 |                 "(B01001_013E + B01001_014E + B01001_015E + "
156 |                 "B01001_016E + B01001_017E + B01001_037E + "
157 |                 "B01001_038E + B01001_039E + B01001_040E + "
158 |                 "B01001_041E) * B11002_001E*1.0/B01001_001E",
159 |             ("person_age", "above 60"):
160 |                 "(B01001_018E + B01001_019E + B01001_020E + "
161 |                 "B01001_021E + B01001_022E + B01001_023E + "
162 |                 "B01001_024E + B01001_025E + B01001_042E + "
163 |                 "B01001_043E + B01001_044E + B01001_045E + "
164 |                 "B01001_046E + B01001_047E + B01001_048E + "
165 |                 "B01001_049E) * B11002_001E*1.0/B01001_001E",
166 |             ("race", "white"):   "(B02001_002E) * B11002_001E*1.0/B01001_001E",
167 |             ("race", "black"):   "(B02001_003E) * B11002_001E*1.0/B01001_001E",
168 |             ("race", "asian"):   "(B02001_005E) * B11002_001E*1.0/B01001_001E",
169 |             ("race", "other"):   "(B02001_004E + B02001_006E + B02001_007E + "
170 |                                  "B02001_008E) * B11002_001E*1.0/B01001_001E",
171 |             ("person_sex", "male"):
172 |                 "(B01001_002E) * B11002_001E*1.0/B01001_001E",
173 |             ("person_sex", "female"):
174 |                 "(B01001_026E) * B11002_001E*1.0/B01001_001E",
175 |             ("hispanic", "yes"):
176 |                 "(B03003_003E) * B11002_001E*1.0/B01001_001E",
177 |             ("hispanic", "no"):
178 |                 "(B03003_002E) * B11002_001E*1.0/B01001_001E",
179 |         }, index_cols=['state', 'county', 'tract', 'block group'])
180 | 
181 |         # Put the needed PUMS variables here.  These are also the PUMS variables
182 |         # that will be in the outputted synthetic population
183 |         self.h_pums_cols = ('serialno', 'PUMA10', 'RT', 'NP', 'TYPE',
184 |                             'R65', 'HINCP', 'VEH', 'MV', 'TEN', 'BLD', 'R18')
185 |         self.p_pums_cols = ('serialno', 'PUMA10', 'RELP', 'AGEP',
186 |                             'ESR', 'RAC1P', 'HISP', 'SEX', 'SPORDER',
187 |                             'PERNP', 'SCHL', 'WKHP', 'JWTR', 'SCH')
188 |         if self.acsyear < 2018:
189 |             self.h_pums_cols = list(self.h_pums_cols)
190 |             self.h_pums_cols.insert(1, 'PUMA00')
191 |             self.h_pums_cols = tuple(self.h_pums_cols)
192 |             self.p_pums_cols = list(self.p_pums_cols)
193 |             self.p_pums_cols.insert(1, 'PUMA00')
194 |             self.p_pums_cols = tuple(self.p_pums_cols)
195 | 
196 |     def get_geography_name(self):
197 |         # this synthesis is at the block group level for most variables
198 |         return "block_group"
199 | 
200 |     def get_num_geographies(self):
201 |         return len(self.p_acs_cat)
202 | 
203 |     def get_available_geography_ids(self):
204 |         # return the ids of the geographies, in this case a state, county,
205 |         # tract, block_group id tuple
206 |         for tup in self.p_acs_cat.index:
207 |             yield pd.Series(tup, index=self.p_acs_cat.index.names)
208 | 
209 |     def get_household_marginal_for_geography(self, ind):
210 |         return self.h_acs_cat.loc[tuple(ind.values)]
211 | 
212 |     def get_person_marginal_for_geography(self, ind):
213 |         return self.p_acs_cat.loc[tuple(ind.values)]
214 | 
215 |     def get_household_joint_dist_for_geography(self, ind):
216 |         c = self.c
217 | 
218 |         puma10, puma00 = c.tract_to_puma(ind.state, ind.county, ind.tract)
219 | 
220 |         # this is cached so won't download more than once
221 |         if type(puma00) == str:
222 |             h_pums = self.c.download_household_pums(ind.state, puma10, puma00,
223 |                                                     usecols=self.h_pums_cols)
224 |             p_pums = self.c.download_population_pums(ind.state, puma10, puma00,
225 |                                                      usecols=self.p_pums_cols)
226 |         elif np.isnan(puma00):  # only puma10 available
227 |             h_pums = self.c.download_household_pums(ind.state, puma10, None,
228 |                                                     usecols=self.h_pums_cols)
229 |             p_pums = self.c.download_population_pums(ind.state, puma10, None,
230 |                                                      usecols=self.p_pums_cols)
231 | 
232 |         h_pums = h_pums.set_index('serialno')
233 | 
234 |         # join persons to households,
235 |         # calculate needed household-level variables
236 |         age_of_head = p_pums[p_pums.RELP == 0].groupby('serialno').AGEP.max()
237 |         num_workers = p_pums[p_pums.ESR.isin([1, 2, 4, 5])].groupby(
238 |             'serialno').size()
239 |         h_pums['race_of_head'] = p_pums[p_pums.RELP == 0].groupby(
240 |             'serialno').RAC1P.max()
241 |         h_pums['hispanic_head'] = p_pums[p_pums.RELP == 0].groupby(
242 |             'serialno').HISP.max()
243 |         h_pums['age_of_head'] = age_of_head
244 |         h_pums['workers'] = num_workers
245 |         h_pums.workers = h_pums.workers.fillna(0)
246 |         h_pums = h_pums.reset_index()
247 | 
248 |         def sf_detached_cat(r):
249 |             if r.BLD == 2:
250 |                 return "yes"
251 |             return "no"
252 | 
253 |         def age_of_head_cat(r):
254 |             if r.age_of_head < 35:
255 |                 return "lt35"
256 |             elif r.age_of_head >= 65:
257 |                 return "gt65"
258 |             return "gt35-lt65"
259 | 
260 |         def race_of_head_cat(r):
261 |             if r.race_of_head == 1:
262 |                 return "white"
263 |             elif r.race_of_head == 2:
264 |                 return "black"
265 |             elif r.race_of_head == 6:
266 |                 return "asian"
267 |             return "other"
268 | 
269 |         def hispanic_head_cat(r):
270 |             if r.hispanic_head == 1:
271 |                 return "no"
272 |             return "yes"
273 | 
274 |         def hh_size_cat(r):
275 |             if r.NP == 1:
276 |                 return "one"
277 |             elif r.NP == 2:
278 |                 return "two"
279 |             elif r.NP == 3:
280 |                 return "three"
281 |             return "four or more"
282 | 
283 |         def cars_cat(r):
284 |             if r.VEH == 0:
285 |                 return "none"
286 |             elif r.VEH == 1:
287 |                 return "one"
288 |             return "two or more"
289 | 
290 |         def children_cat(r):
291 |             if r.R18 == 1:
292 |                 return "yes"
293 |             return "no"
294 | 
295 |         def seniors_cat(r):
296 |             if r.R65 > 0:
297 |                 return "yes"
298 |             return "no"
299 | 
300 |         def income_cat(r):
301 |             if r.HINCP >= 150000:
302 |                 return "gt150"
303 |             elif (r.HINCP >= 100000) & (r.HINCP < 150000):
304 |                 return "gt100-lt150"
305 |             elif (r.HINCP >= 60000) & (r.HINCP < 100000):
306 |                 return "gt60-lt100"
307 |             elif (r.HINCP >= 30000) & (r.HINCP < 60000):
308 |                 return "gt30-lt60"
309 |             return "lt30"
310 | 
311 |         def workers_cat(r):
312 |             if r.workers >= 2:
313 |                 return "two or more"
314 |             elif r.workers == 1:
315 |                 return "one"
316 |             return "none"
317 | 
318 |         def tenure_mover_cat(r):
319 |             if (r.MV < 4) & (r.TEN < 3):
320 |                 return "own recent"
321 |             elif (r.MV >= 4) & (r.TEN < 3):
322 |                 return "own not recent"
323 |             elif (r.MV < 4) & (r.TEN >= 3):
324 |                 return "rent recent"
325 |             return "rent not recent"
326 | 
327 |         h_pums, jd_households = cat.joint_distribution(
328 |             h_pums,
329 |             cat.category_combinations(self.h_acs_cat.columns),
330 |             {"hh_cars": cars_cat,
331 |              "hh_children": children_cat,
332 |              "hh_income": income_cat,
333 |              "hh_workers": workers_cat,
334 |              "tenure_mover": tenure_mover_cat,
335 |              "seniors": seniors_cat,
336 |              "hh_size": hh_size_cat,
337 |              "hh_age_of_head": age_of_head_cat,
338 |              "sf_detached": sf_detached_cat,
339 |              "hh_race_of_head": race_of_head_cat,
340 |              "hispanic_head": hispanic_head_cat}
341 |         )
342 |         return h_pums, jd_households
343 | 
344 |     def get_person_joint_dist_for_geography(self, ind):
345 |         c = self.c
346 | 
347 |         puma10, puma00 = c.tract_to_puma(ind.state, ind.county, ind.tract)
348 |         # this is cached so won't download more than once
349 |         if type(puma00) == str:
350 |             p_pums = self.c.download_population_pums(ind.state, puma10, puma00,
351 |                                                      usecols=self.p_pums_cols)
352 |         elif np.isnan(puma00):  # only puma10 available
353 |             p_pums = self.c.download_population_pums(ind.state, puma10, None,
354 |                                                      usecols=self.p_pums_cols)
355 | 
356 |         def age_cat(r):
357 |             if r.AGEP <= 19:
358 |                 return "19 and under"
359 |             elif r.AGEP <= 35:
360 |                 return "20 to 35"
361 |             elif r.AGEP <= 60:
362 |                 return "35 to 60"
363 |             return "above 60"
364 | 
365 |         def race_cat(r):
366 |             if r.RAC1P == 1:
367 |                 return "white"
368 |             elif r.RAC1P == 2:
369 |                 return "black"
370 |             elif r.RAC1P == 6:
371 |                 return "asian"
372 |             return "other"
373 | 
374 |         def sex_cat(r):
375 |             if r.SEX == 1:
376 |                 return "male"
377 |             return "female"
378 | 
379 |         def hispanic_cat(r):
380 |             if r.HISP == 1:
381 |                 return "no"
382 |             return "yes"
383 | 
384 |         p_pums, jd_persons = cat.joint_distribution(
385 |             p_pums,
386 |             cat.category_combinations(self.p_acs_cat.columns),
387 |             {"person_age": age_cat, "race": race_cat, "person_sex": sex_cat,
388 |              "hispanic": hispanic_cat}
389 |         )
390 |         return p_pums, jd_persons
391 | 


--------------------------------------------------------------------------------
/synthpop/recipes/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UDST/synthpop/6fb13991c9d3ede2d8cf80512bd1102e37b98971/synthpop/recipes/tests/__init__.py


--------------------------------------------------------------------------------
/synthpop/recipes/tests/test_starter.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from ...synthesizer import *
 3 | from ..starter import Starter
 4 | 
 5 | 
 6 | @pytest.fixture
 7 | def key():
 8 |     return "827402c2958dcf515e4480b7b2bb93d1025f9389"
 9 | 
10 | 
11 | def test_starter(key):
12 |     st = Starter(key, "CA", "Napa County")
13 |     synthesize_all(st, num_geogs=1)
14 | 


--------------------------------------------------------------------------------
/synthpop/synthesizer.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import sys
  3 | from collections import namedtuple
  4 | 
  5 | import numpy as np
  6 | import pandas as pd
  7 | from scipy.stats import chisquare
  8 | 
  9 | from . import categorizer as cat
 10 | from . import draw
 11 | from .ipf.ipf import calculate_constraints
 12 | from .ipu.ipu import household_weights
 13 | 
 14 | logger = logging.getLogger("synthpop")
 15 | FitQuality = namedtuple(
 16 |     'FitQuality',
 17 |     ('people_chisq', 'people_p'))
 18 | BlockGroupID = namedtuple(
 19 |     'BlockGroupID', ('state', 'county', 'tract', 'block_group'))
 20 | 
 21 | 
 22 | def enable_logging():
 23 |     handler = logging.StreamHandler(stream=sys.stdout)
 24 |     logger.addHandler(handler)
 25 |     logger.setLevel(logging.DEBUG)
 26 | 
 27 | 
 28 | def synthesize(h_marg, p_marg, h_jd, p_jd, h_pums, p_pums, geography, ignore_max_iters,
 29 |                marginal_zero_sub=.01, jd_zero_sub=.001, hh_index_start=0):
 30 | 
 31 |     # this is the zero marginal problem
 32 |     h_marg = h_marg.replace(0, marginal_zero_sub)
 33 |     p_marg = p_marg.replace(0, marginal_zero_sub)
 34 | 
 35 |     # zero cell problem
 36 |     h_jd.frequency = h_jd.frequency.replace(0, jd_zero_sub)
 37 |     p_jd.frequency = p_jd.frequency.replace(0, jd_zero_sub)
 38 | 
 39 |     # ipf for households
 40 |     logger.info("Running ipf for households")
 41 |     h_constraint, _ = calculate_constraints(h_marg, h_jd.frequency)
 42 |     h_constraint.index = h_jd.cat_id
 43 | 
 44 |     logger.debug("Household constraint")
 45 |     logger.debug(h_constraint)
 46 | 
 47 |     # ipf for persons
 48 |     logger.info("Running ipf for persons")
 49 |     p_constraint, _ = calculate_constraints(p_marg, p_jd.frequency)
 50 |     # p_constraint.index = p_jd.cat_id
 51 | 
 52 |     logger.debug("Person constraint")
 53 |     logger.debug(p_constraint)
 54 | 
 55 |     # modify person cat ids so they are unique when combined with households
 56 |     p_starting_cat_id = h_jd['cat_id'].max() + 1
 57 |     p_jd['cat_id'] += p_starting_cat_id
 58 |     p_pums['cat_id'] += p_starting_cat_id
 59 |     p_constraint.index = p_jd.cat_id
 60 | 
 61 |     # make frequency tables that the ipu expects
 62 |     household_freq, person_freq = cat.frequency_tables(p_pums, h_pums,
 63 |                                                        p_jd.cat_id,
 64 |                                                        h_jd.cat_id)
 65 | 
 66 |     # do the ipu to match person marginals
 67 |     logger.info("Running ipu")
 68 |     import time
 69 |     t1 = time.time()
 70 |     best_weights, fit_quality, iterations = household_weights(household_freq,
 71 |                                                               person_freq,
 72 |                                                               h_constraint,
 73 |                                                               p_constraint,
 74 |                                                               geography,
 75 |                                                               ignore_max_iters)
 76 |     logger.info("Time to run ipu: %.3fs" % (time.time()-t1))
 77 | 
 78 |     logger.debug("IPU weights:")
 79 |     logger.debug(best_weights.describe())
 80 |     logger.debug("Fit quality:")
 81 |     logger.debug(fit_quality)
 82 |     logger.debug("Number of iterations:")
 83 |     logger.debug(iterations)
 84 | 
 85 |     num_households = int(h_marg.groupby(level=0).sum().mean())
 86 |     print("Drawing %d households" % num_households)
 87 | 
 88 |     best_chisq = np.inf
 89 | 
 90 |     return draw.draw_households(
 91 |         num_households, h_pums, p_pums, household_freq, h_constraint,
 92 |         p_constraint, best_weights, hh_index_start=hh_index_start)
 93 | 
 94 | 
 95 | def synthesize_all(recipe, num_geogs=None, indexes=None, ignore_max_iters=False,
 96 |                    marginal_zero_sub=.01, jd_zero_sub=.001):
 97 |     """
 98 |     Returns
 99 |     -------
100 |     households, people : pandas.DataFrame
101 |     fit_quality : dict of FitQuality
102 |         Keys are geographic IDs, values are namedtuples with attributes
103 |         ``.household_chisq``, ``household_p``, ``people_chisq``,
104 |         and ``people_p``.
105 | 
106 |     """
107 |     print("Synthesizing at geog level: '{}' (number of geographies is {})"
108 |           .format(recipe.get_geography_name(), recipe.get_num_geographies()))
109 | 
110 |     if indexes is None:
111 |         indexes = recipe.get_available_geography_ids()
112 | 
113 |     hh_list = []
114 |     people_list = []
115 |     cnt = 0
116 |     fit_quality = {}
117 |     hh_index_start = 0
118 | 
119 |     # TODO will parallelization work here?
120 |     for geog_id in indexes:
121 |         print("Synthesizing geog id:\n", geog_id)
122 | 
123 |         h_marg = recipe.get_household_marginal_for_geography(geog_id)
124 |         logger.debug("Household marginal")
125 |         logger.debug(h_marg)
126 | 
127 |         p_marg = recipe.get_person_marginal_for_geography(geog_id)
128 |         logger.debug("Person marginal")
129 |         logger.debug(p_marg)
130 | 
131 |         h_pums, h_jd = recipe.\
132 |             get_household_joint_dist_for_geography(geog_id)
133 |         logger.debug("Household joint distribution")
134 |         logger.debug(h_jd)
135 | 
136 |         p_pums, p_jd = recipe.get_person_joint_dist_for_geography(geog_id)
137 |         logger.debug("Person joint distribution")
138 |         logger.debug(p_jd)
139 | 
140 |         households, people, people_chisq, people_p = \
141 |             synthesize(
142 |                 h_marg, p_marg, h_jd, p_jd, h_pums, p_pums, geog_id, ignore_max_iters,
143 |                 marginal_zero_sub=marginal_zero_sub, jd_zero_sub=jd_zero_sub,
144 |                 hh_index_start=hh_index_start)
145 | 
146 |         # Append location identifiers to the synthesized households
147 |         for geog_cat in geog_id.keys():
148 |             households[geog_cat] = geog_id[geog_cat]
149 | 
150 |         hh_list.append(households)
151 |         people_list.append(people)
152 |         key = BlockGroupID(
153 |             geog_id['state'], geog_id['county'], geog_id['tract'],
154 |             geog_id['block group'])
155 |         fit_quality[key] = FitQuality(people_chisq, people_p)
156 | 
157 |         cnt += 1
158 |         if len(households) > 0:
159 |             hh_index_start = households.index.values[-1] + 1
160 | 
161 |         if num_geogs is not None and cnt >= num_geogs:
162 |             break
163 | 
164 |     # TODO might want to write this to disk as we go?
165 |     all_households = pd.concat(hh_list)
166 |     all_persons = pd.concat(people_list, ignore_index=True)
167 | 
168 |     return (all_households, all_persons, fit_quality)
169 | 


--------------------------------------------------------------------------------
/synthpop/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UDST/synthpop/6fb13991c9d3ede2d8cf80512bd1102e37b98971/synthpop/test/__init__.py


--------------------------------------------------------------------------------
/synthpop/test/test_categorizer.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import numpy as np
 3 | from synthpop.census_helpers import Census
 4 | from synthpop import categorizer as cat
 5 | import os
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def c():
10 |     return Census('bfa6b4e541243011fab6307a31aed9e91015ba90')
11 | 
12 | 
13 | @pytest.fixture
14 | def acs_data(c):
15 |     population = ['B01001_001E']
16 |     sex = ['B01001_002E', 'B01001_026E']
17 |     race = ['B02001_0%02dE' % i for i in range(1, 11)]
18 |     male_age_columns = ['B01001_0%02dE' % i for i in range(3, 26)]
19 |     female_age_columns = ['B01001_0%02dE' % i for i in range(27, 50)]
20 |     all_columns = population + sex + race + male_age_columns + \
21 |         female_age_columns
22 |     df = c.block_group_query(all_columns, "06", "075", tract="030600")
23 |     return df
24 | 
25 | 
26 | @pytest.fixture
27 | def pums_data(c):
28 |     return c.download_population_pums("06", "07506")
29 | 
30 | 
31 | def test_categorize(acs_data, pums_data):
32 |     p_acs_cat = cat.categorize(acs_data, {
33 |         ("population", "total"): "B01001_001E",
34 |         ("age", "19 and under"): "B01001_003E + B01001_004E + B01001_005E + "
35 |                                  "B01001_006E + B01001_007E + B01001_027E + "
36 |                                  "B01001_028E + B01001_029E + B01001_030E + "
37 |                                  "B01001_031E",
38 |         ("age", "20 to 35"): "B01001_008E + B01001_009E + B01001_010E + "
39 |                              "B01001_011E + B01001_012E + B01001_032E + "
40 |                              "B01001_033E + B01001_034E + B01001_035E + "
41 |                              "B01001_036E",
42 |         ("age", "35 to 60"): "B01001_013E + B01001_014E + B01001_015E + "
43 |                              "B01001_016E + B01001_017E + B01001_037E + "
44 |                              "B01001_038E + B01001_039E + B01001_040E + "
45 |                              "B01001_041E",
46 |         ("age", "above 60"): "B01001_018E + B01001_019E + B01001_020E + "
47 |                              "B01001_021E + B01001_022E + B01001_023E + "
48 |                              "B01001_024E + B01001_025E + B01001_042E + "
49 |                              "B01001_043E + B01001_044E + B01001_045E + "
50 |                              "B01001_046E + B01001_047E + B01001_048E + "
51 |                              "B01001_049E",
52 |         ("race", "white"):   "B02001_002E",
53 |         ("race", "black"):   "B02001_003E",
54 |         ("race", "asian"):   "B02001_005E",
55 |         ("race", "other"):   "B02001_004E + B02001_006E + B02001_007E + "
56 |                              "B02001_008E",
57 |         ("sex", "male"):     "B01001_002E",
58 |         ("sex", "female"):   "B01001_026E"
59 |     }, index_cols=['NAME'])
60 | 
61 |     assert len(p_acs_cat) == 3
62 |     assert len(p_acs_cat.columns) == 11
63 |     assert len(p_acs_cat.columns.names) == 2
64 |     assert p_acs_cat.columns[0][0] == "age"
65 | 
66 |     assert np.all(cat.sum_accross_category(p_acs_cat) < 2)
67 | 
68 |     def age_cat(r):
69 |         if r.AGEP <= 19:
70 |             return "19 and under"
71 |         elif r.AGEP <= 35:
72 |             return "20 to 35"
73 |         elif r.AGEP <= 60:
74 |             return "35 to 60"
75 |         return "above 60"
76 | 
77 |     def race_cat(r):
78 |         if r.RAC1P == 1:
79 |             return "white"
80 |         elif r.RAC1P == 2:
81 |             return "black"
82 |         elif r.RAC1P == 6:
83 |             return "asian"
84 |         return "other"
85 | 
86 |     def sex_cat(r):
87 |         if r.SEX == 1:
88 |             return "male"
89 |         return "female"
90 | 
91 |     pums_data, jd_persons = cat.joint_distribution(
92 |         pums_data,
93 |         cat.category_combinations(p_acs_cat.columns),
94 |         {"age": age_cat, "race": race_cat, "sex": sex_cat}
95 |     )
96 | 


--------------------------------------------------------------------------------
/synthpop/test/test_censushelpers.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from ..census_helpers import Census
 3 | import numpy as np
 4 | from pandas.util.testing import assert_series_equal
 5 | import os
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def c():
10 |     return Census('bfa6b4e541243011fab6307a31aed9e91015ba90')
11 | 
12 | 
13 | def test_block_group_and_tract_query(c):
14 |     income_columns = ['B19001_0%02dE' % i for i in range(1, 18)]
15 |     vehicle_columns = ['B08201_0%02dE' % i for i in range(1, 7)]
16 |     workers_columns = ['B08202_0%02dE' % i for i in range(1, 6)]
17 |     families_columns = ['B11001_001E', 'B11001_002E']
18 |     block_group_columns = income_columns + families_columns
19 |     tract_columns = vehicle_columns + workers_columns
20 |     df = c.block_group_and_tract_query(block_group_columns,
21 |                                        tract_columns, "06", "075",
22 |                                        merge_columns=['tract', 'county',
23 |                                                       'state'],
24 |                                        block_group_size_attr="B11001_001E",
25 |                                        tract_size_attr="B08201_001E",
26 |                                        tract="030600")
27 | 
28 |     assert len(df) == 3
29 |     assert_series_equal(
30 |       df["B11001_001E"], df["B08201_001E"], check_names=False)
31 |     assert np.all(df.state == "06")
32 |     assert np.all(df.county == "075")
33 | 
34 |     df = c.block_group_and_tract_query(block_group_columns,
35 |                                        tract_columns, "06", "075",
36 |                                        merge_columns=['tract', 'county',
37 |                                                       'state'],
38 |                                        block_group_size_attr="B11001_001E",
39 |                                        tract_size_attr="B08201_001E",
40 |                                        tract=None)
41 | 
42 |     # number of block groups in San Francisco
43 |     assert len(df) == 581
44 |     assert_series_equal(
45 |       df["B11001_001E"], df["B08201_001E"], check_names=False)
46 |     assert np.all(df.state == "06")
47 |     assert np.all(df.county == "075")
48 | 
49 | 
50 | def test_wide_block_group_query(c):
51 |     population = ['B01001_001E']
52 |     sex = ['B01001_002E', 'B01001_026E']
53 |     race = ['B02001_0%02dE' % i for i in range(1, 11)]
54 |     male_age_columns = ['B01001_0%02dE' % i for i in range(3, 26)]
55 |     female_age_columns = ['B01001_0%02dE' % i for i in range(27, 50)]
56 |     all_columns = population + sex + race + male_age_columns + \
57 |         female_age_columns
58 |     df = c.block_group_query(all_columns, "06", "075", tract="030600")
59 | 
60 |     assert len(df) == 3
61 |     assert np.all(df.state == "06")
62 |     assert np.all(df.county == "075")
63 |     assert len(df.columns) > 50
64 | 
65 | 
66 | def test_tract_to_puma(c):
67 |     puma = c.tract_to_puma("06", "075", "030600")[0]
68 |     assert puma == "07506"
69 | 
70 | 
71 | def test_download_pums(c):
72 |     puma = "07506"
73 |     c.download_population_pums("06", puma)
74 |     c.download_household_pums("06", puma)
75 |     c.download_population_pums("10")
76 |     c.download_household_pums("10")
77 | 


--------------------------------------------------------------------------------
/synthpop/test/test_data/hh_marginals.csv:
--------------------------------------------------------------------------------
 1 | zone_id,sample_geog,cars,cars,cars,children,children,income,income,income,workers,workers,workers
 2 | ,,none,one,two or more,no,yes,gt100,gt35-lt100,lt35,none,one,two or more
 3 | 1,1,7,49,197,41,215,57,125,74,72,77,105
 4 | 2,1,9,59,237,68,239,83,126,98,87,93,125
 5 | 3,1,10,69,275,79,279,74,170,114,102,108,146
 6 | 4,1,11,76,302,167,224,42,105,244,111,118,160
 7 | 5,1,18,117,466,86,517,50,261,292,171,182,247
 8 | 6,1,9,63,252,65,261,80,139,107,92,98,133
 9 | 7,1,19,159,377,160,397,96,186,275,199,194,162
10 | 8,1,11,98,231,86,257,30,99,214,123,119,100
11 | 9,1,9,78,186,49,226,22,164,89,98,95,79
12 | 10,1,7,65,155,55,175,21,143,66,82,80,66
13 | 11,1,17,297,542,289,570,118,407,334,303,279,274
14 | 12,1,15,258,474,201,548,76,371,302,264,244,240
15 | 13,1,40,217,486,251,495,121,314,311,269,259,216
16 | 14,1,51,278,622,472,482,53,320,581,344,332,277
17 | 15,1,38,210,470,220,501,68,350,303,259,251,209
18 | 16,1,23,79,83,45,142,0,60,127,87,54,43
19 | 17,1,23,78,81,47,137,0,49,135,86,54,43
20 | 18,1,36,122,127,103,184,0,134,153,134,84,67
21 | 19,1,40,135,141,66,252,23,190,105,149,93,75
22 | 20,1,89,303,318,442,271,19,167,527,334,209,168
23 | 21,1,43,147,154,108,238,0,161,185,162,101,81


--------------------------------------------------------------------------------
/synthpop/test/test_data/person_marginals.csv:
--------------------------------------------------------------------------------
 1 | zone_id,age,age,age,age,race,race,race,race,sex,sex
 2 | ,19 and under,20 to 35,35 to 60,above 60,asian,black,other,white,female,male
 3 | 1,312,108,223,177,64,0,0,756,440,380
 4 | 2,235,143,296,181,0,0,0,855,452,403
 5 | 3,303,229,445,174,0,0,24,1127,565,586
 6 | 4,215,77,356,189,0,0,29,808,389,448
 7 | 5,506,539,619,262,0,0,0,1926,981,945
 8 | 6,377,171,285,102,0,0,47,888,476,459
 9 | 7,312,150,488,382,0,0,14,1318,681,651
10 | 8,246,100,229,242,0,0,0,817,337,480
11 | 9,218,182,203,185,0,0,6,782,411,377
12 | 10,52,75,150,227,0,0,22,482,206,298
13 | 11,490,314,617,721,21,82,14,2025,1062,1080
14 | 12,639,356,721,381,7,4,46,2040,1162,935
15 | 13,345,341,647,564,0,21,179,1697,895,1002
16 | 14,372,363,708,638,0,6,89,1986,1044,1037
17 | 15,361,281,624,528,6,0,141,1647,871,923
18 | 16,149,92,67,157,0,0,0,465,212,253
19 | 17,287,69,196,81,0,0,0,633,366,267
20 | 18,160,128,265,93,0,0,20,626,366,280
21 | 19,418,158,313,198,0,0,0,1087,546,541
22 | 20,238,151,495,327,0,0,132,1079,748,463
23 | 21,272,133,203,279,0,0,0,887,401,486


--------------------------------------------------------------------------------
/synthpop/test/test_draw.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import numpy.testing as npt
  3 | import pandas as pd
  4 | import pytest
  5 | from pandas.util import testing as pdt
  6 | 
  7 | from .. import draw
  8 | from ..ipu.ipu import _FrequencyAndConstraints
  9 | 
 10 | 
 11 | @pytest.fixture
 12 | def seed(request):
 13 |     current = np.random.get_state()
 14 | 
 15 |     def fin():
 16 |         np.random.set_state(current)
 17 |     request.addfinalizer(fin)
 18 | 
 19 |     np.random.seed(0)
 20 | 
 21 | 
 22 | @pytest.fixture
 23 | def index():
 24 |     return np.array(['v', 'w', 'x', 'y', 'z'], dtype=np.str_)
 25 | 
 26 | 
 27 | @pytest.fixture
 28 | def weights():
 29 |     return np.array([1, 2, 3, 4, 5])
 30 | 
 31 | 
 32 | @pytest.fixture
 33 | def num():
 34 |     return 10
 35 | 
 36 | 
 37 | def test_simple_draw(index, weights, num, seed):
 38 |     drawn_indexes = draw.simple_draw(num, weights, index)
 39 | 
 40 |     npt.assert_array_equal(
 41 |         drawn_indexes, ['y', 'z', 'y', 'y', 'y', 'y', 'y', 'z', 'z', 'x'])
 42 | 
 43 | 
 44 | def test_execute_draw():
 45 |     hh_df = pd.DataFrame(
 46 |         {'a': range(5),
 47 |          'b': range(5, 10),
 48 |          'serialno': [11, 22, 33, 44, 55]},
 49 |         index=pd.Index(['a', 'b', 'c', 'd', 'e'], name='hh_id'))
 50 | 
 51 |     pp_df = pd.DataFrame(
 52 |         {'x': range(100, 110),
 53 |          'y': range(110, 120),
 54 |          'serialno': [22, 33, 11, 55, 22, 33, 44, 55, 11, 33]},
 55 |         index=pd.Index(['q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']))
 56 | 
 57 |     indexes = ['c', 'a', 'd', 'e', 'a', 'c', 'e', 'e', 'a', 'c', 'e']
 58 | 
 59 |     synth_hh, synth_pp = draw.execute_draw(
 60 |         indexes, hh_df, pp_df, hh_index_start=1000)
 61 | 
 62 |     expected_index = pd.Index(range(1000, 1011))
 63 |     pdt.assert_index_equal(synth_hh.index, expected_index)
 64 |     pdt.assert_series_equal(
 65 |         synth_hh.serialno,
 66 |         pd.Series(
 67 |             [33, 11, 44, 55, 11, 33, 55, 55, 11, 33, 55],
 68 |             index=expected_index, name='serialno'))
 69 |     assert list(synth_hh.columns) == ['a', 'b', 'serialno']
 70 | 
 71 |     pdt.assert_index_equal(synth_pp.index, pd.Index(range(24)))
 72 |     pdt.assert_series_equal(
 73 |         synth_pp.serialno,
 74 |         pd.Series(
 75 |             ([33] * 9) + ([11] * 6) + ([55] * 8) + [44], name='serialno'))
 76 |     pdt.assert_series_equal(
 77 |         synth_pp.hh_id,
 78 |         pd.Series(
 79 |             ([1000, 1005, 1009] * 3) + ([1001, 1004, 1008] * 2) +
 80 |             ([1003, 1006, 1007, 1010] * 2) + [1002],
 81 |             name='hh_id'))
 82 | 
 83 | 
 84 | def test_compare_to_constraints_exact():
 85 |     constraints = pd.Series([1, 3, 2], index=['a', 'b', 'c'])
 86 |     synth = pd.Series(['a', 'c', 'b', 'c', 'b', 'b'])
 87 | 
 88 |     chisq, p = draw.compare_to_constraints(synth, constraints)
 89 | 
 90 |     assert chisq == 0
 91 |     assert p == 1
 92 | 
 93 | 
 94 | def test_compare_to_constraints():
 95 |     constraints = pd.Series([1, 1, 2, 1, 3], index=['a', 'b', 'c', 'd', 'e'])
 96 |     synth = pd.Series(['e', 'a', 'e', 'e', 'c', 'e'])
 97 | 
 98 |     chisq, p = draw.compare_to_constraints(synth, constraints)
 99 | 
100 | 
101 | @pytest.fixture
102 | def freqs():
103 |     return pd.DataFrame(
104 |         {'a': [1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
105 |          'b': [0, 0, 1, 1, 1, 0, 0, 0, 0, 0],
106 |          'c': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
107 |          'd': [0, 0, 0, 0, 0, 0, 1, 1, 1, 1]})
108 | 
109 | 
110 | def test_draw_indexes_easy(freqs, seed):
111 |     # constraints are integers, add up to the total we want
112 |     constraints = pd.Series([6, 4, 3, 9], index=freqs.columns)
113 | 
114 |     fac = _FrequencyAndConstraints(freqs, constraints)
115 |     weights = pd.Series(np.ones(10))
116 | 
117 |     idx = draw._draw_indexes(constraints.sum(), fac, weights)
118 | 
119 |     assert isinstance(idx, pd.Index)
120 |     assert len(idx) == constraints.sum()
121 |     assert idx.isin(weights.index).all()
122 | 
123 |     with pytest.raises(RuntimeError):
124 |         draw._draw_indexes(100, fac, weights)
125 | 
126 | 
127 | def test_draw_indexes(freqs, seed):
128 |     num = 22
129 |     constraints = pd.Series([6.1, 3.2, 2.5, 8.9], index=freqs.columns)
130 |     fac = _FrequencyAndConstraints(freqs, constraints)
131 |     weights = pd.Series(
132 |         [0.1012815,  0.11915142,  0.0369963,  0.20165698,  0.14132664,
133 |          0.02791166,  0.06182466,  0.17389766,  0.11982733,  0.01612583])
134 | 
135 |     idx = draw._draw_indexes(num, fac, weights)
136 | 
137 |     assert isinstance(idx, pd.Index)
138 |     assert len(idx) == num
139 |     assert idx.isin(weights.index).all()
140 | 
141 |     assert idx.isin({0, 1}).sum() == 6
142 |     assert idx.isin({2, 3, 4}).sum() == 4
143 |     assert idx.isin({5}).sum() == 3
144 |     assert idx.isin({6, 7, 8, 9}).sum() == 9
145 | 


--------------------------------------------------------------------------------
/synthpop/test/test_zone_synthesizer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pytest
 3 | import pandas as pd
 4 | 
 5 | import synthpop.zone_synthesizer as zs
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def hh_marg():
10 |     fname = os.path.join(os.path.dirname(__file__),
11 |                          'test_data/hh_marginals.csv')
12 |     return fname
13 | 
14 | 
15 | @pytest.fixture
16 | def p_marg():
17 |     fname = os.path.join(os.path.dirname(__file__),
18 |                          'test_data/person_marginals.csv')
19 |     return fname
20 | 
21 | 
22 | @pytest.fixture
23 | def hh_sample():
24 |     fname = os.path.join(os.path.dirname(__file__),
25 |                          'test_data/household_sample.csv')
26 |     return fname
27 | 
28 | 
29 | @pytest.fixture
30 | def p_sample():
31 |     fname = os.path.join(os.path.dirname(__file__),
32 |                          'test_data/person_sample.csv')
33 |     return fname
34 | 
35 | 
36 | def test_run(hh_marg, p_marg, hh_sample, p_sample):
37 |     hh_marg, p_marg, hh_sample, p_sample, xwalk = zs.load_data(hh_marg,
38 |                                                                p_marg,
39 |                                                                hh_sample,
40 |                                                                p_sample)
41 |     all_households, all_persons, all_stats = zs.synthesize_all_zones(hh_marg,
42 |                                                                      p_marg,
43 |                                                                      hh_sample,
44 |                                                                      p_sample,
45 |                                                                      xwalk)
46 | 
47 | 
48 | def test_run_multi(hh_marg, p_marg, hh_sample, p_sample):
49 |     hhm, pm, hhs, ps, xwalk = zs.load_data(hh_marg, p_marg,
50 |                                            hh_sample, p_sample)
51 |     all_persons, all_households, all_stats = zs.multiprocess_synthesize(hhm, pm,
52 |                                                                         hhs, ps,
53 |                                                                         xwalk)
54 | 


--------------------------------------------------------------------------------
/synthpop/zone_synthesizer.py:
--------------------------------------------------------------------------------
  1 | from functools import partial
  2 | import multiprocessing
  3 | 
  4 | import pandas as pd
  5 | 
  6 | from .synthesizer import synthesize, enable_logging
  7 | from . import categorizer as cat
  8 | 
  9 | 
 10 | def load_data(hh_marginal_file, person_marginal_file,
 11 |               hh_sample_file, person_sample_file):
 12 |     """
 13 |     Load and process data inputs from .csv files on disk
 14 | 
 15 |     Parameters
 16 |     ----------
 17 |     hh_marginal_file : string
 18 |         path to a csv file of household marginals
 19 |     person_marginal_file : string
 20 |         path to a csv file of person marginals
 21 |     hh_sample_file : string
 22 |         path to a csv file of sample household records to be drawn from
 23 |     person_sample_file : string
 24 |         path to a csv file of sample person records
 25 |     Returns
 26 |     -------
 27 |     hh_marg : pandas.DataFrame
 28 |         processed and properly indexed household marginals table
 29 |     p_marg : pandas.DataFrame
 30 |         processed and properly indexed person marginals table
 31 |     hh_sample : pandas.DataFrame
 32 |         household sample table
 33 |     p_sample : pandas.DataFrame
 34 |         person sample table
 35 |     xwalk : list of tuples
 36 |         list of marginal-to-sample geography crosswalks to iterate over
 37 |     """
 38 |     hh_sample = pd.read_csv(hh_sample_file)
 39 |     p_sample = pd.read_csv(person_sample_file)
 40 | 
 41 |     hh_marg = pd.read_csv(hh_marginal_file, header=[0, 1], index_col=0)
 42 |     hh_marg.columns.levels[0].set_names('cat_name', inplace=True)
 43 |     hh_marg.columns.levels[1].set_names('cat_values', inplace=True)
 44 | 
 45 |     xwalk = list(zip(hh_marg.index, hh_marg.sample_geog.unstack().values))
 46 |     hh_marg = hh_marg.drop('sample_geog', axis=1, level=0)
 47 | 
 48 |     p_marg = pd.read_csv(person_marginal_file, header=[0, 1], index_col=0)
 49 |     p_marg.columns.levels[0].set_names('cat_name', inplace=True)
 50 |     p_marg.columns.levels[1].set_names('cat_values', inplace=True)
 51 | 
 52 |     return hh_marg, p_marg, hh_sample, p_sample, xwalk
 53 | 
 54 | 
 55 | def synthesize_all_zones(hh_marg, p_marg, hh_sample, p_sample, xwalk):
 56 |     """
 57 |     Iterate over a geography crosswalk list and synthesize in-line
 58 | 
 59 |     Parameters
 60 |     ----------
 61 |     hh_marg : pandas.DataFrame
 62 |         processed and properly indexed household marginals table
 63 |     p_marg : pandas.DataFrame
 64 |         processed and properly indexed person marginals table
 65 |     hh_sample : pandas.DataFrame
 66 |         household sample table
 67 |     p_sample : pandas.DataFrame
 68 |         person sample table
 69 |     xwalk : list of tuples
 70 |         list of marginal-to-sample geography crosswalks to iterate over
 71 |     Returns
 72 |     -------
 73 |     all_households : pandas.DataFrame
 74 |         synthesized household records
 75 |     all_persons : pandas.DataFrame
 76 |         synthesized person records
 77 |     all_stats : pandas.DataFrame
 78 |         chi-square and p-score values for each marginal geography drawn
 79 |     """
 80 |     hh_list = []
 81 |     people_list = []
 82 |     stats_list = []
 83 |     hh_index_start = 1
 84 |     for geogs in xwalk:
 85 |         households, people, stats = synthesize_zone(hh_marg, p_marg,
 86 |                                                     hh_sample, p_sample, geogs)
 87 |         stats_list.append(stats)
 88 |         hh_list.append(households)
 89 |         people_list.append(people)
 90 | 
 91 |         if len(households) > 0:
 92 |             hh_index_start = households.index.values[-1] + 1
 93 |     all_households = pd.concat(hh_list)
 94 |     all_persons = pd.concat(people_list)
 95 |     all_households, all_persons = synch_hhids(all_households, all_persons)
 96 |     all_stats = pd.DataFrame(stats_list)
 97 |     return all_households, all_persons, all_stats
 98 | 
 99 | 
100 | def synch_hhids(households, persons):
101 |     """
102 |     Synchronize household ids with corresponding person records
103 | 
104 |     Parameters
105 |     ----------
106 |     households : pandas.DataFrame
107 |         full households table with id values sequential by geog
108 |     persons : pandas.DataFrame
109 |         full persons table with id values sequential by geog
110 |     Returns
111 |     -------
112 |     households : pandas.DataFrame
113 |         households table with reindexed sequential household ids
114 |     persons : pandas.DataFrame
115 |         persons table synchronized with updated household ids
116 |     """
117 |     households['hh_id'] = households.index
118 |     households['household_id'] = range(1, len(households.index)+1)
119 |     persons = pd.merge(
120 |             persons, households[['household_id', 'geog', 'hh_id']],
121 |             how='left', left_on=['geog', 'hh_id'], right_on=['geog', 'hh_id'],
122 |             suffixes=('', '_x')).drop('hh_id', axis=1)
123 |     households.set_index('household_id', inplace=True)
124 |     households.drop('hh_id', axis=1, inplace=True)
125 |     return households, persons
126 | 
127 | 
128 | def synthesize_zone(hh_marg, p_marg, hh_sample, p_sample, xwalk):
129 |     """
130 |     Synthesize a single zone (Used within multiprocessing synthesis)
131 | 
132 |     Parameters
133 |     ----------
134 |     hh_marg : pandas.DataFrame
135 |         processed and properly indexed household marginals table
136 |     p_marg : pandas.DataFrame
137 |         processed and properly indexed person marginals table
138 |     hh_sample : pandas.DataFrame
139 |         household sample table
140 |     p_sample : pandas.DataFrame
141 |         person sample table
142 |     xwalk : tuple
143 |         tuple of marginal-to-sample geography crosswalk
144 |     Returns
145 |     -------
146 |     households : pandas.DataFrame
147 |         synthesized household records
148 |     people : pandas.DataFrame
149 |         synthesized person records
150 |     stats : pandas.DataFrame
151 |         chi-square and p-score values for marginal geography drawn
152 |     """
153 |     hhs, hh_jd = cat.joint_distribution(
154 |             hh_sample[hh_sample.sample_geog == xwalk[1]],
155 |             cat.category_combinations(hh_marg.columns))
156 |     ps, p_jd = cat.joint_distribution(
157 |             p_sample[p_sample.sample_geog == xwalk[1]],
158 |             cat.category_combinations(p_marg.columns))
159 |     households, people, people_chisq, people_p = synthesize(
160 |             hh_marg.loc[xwalk[0]], p_marg.loc[xwalk[0]], hh_jd, p_jd, hhs, ps, xwalk[0],
161 |             ignore_max_iters=False, hh_index_start=1)
162 |     households['geog'] = xwalk[0]
163 |     people['geog'] = xwalk[0]
164 |     stats = {'geog': xwalk[0], 'chi-square': people_chisq, 'p-score': people_p}
165 |     return households, people, stats
166 | 
167 | 
168 | def multiprocess_synthesize(hh_marg, p_marg, hh_sample,
169 |                             p_sample, xwalk, cores=False):
170 |     """
171 |     Synthesize for a set of marginal geographies via multiprocessing
172 | 
173 |     Parameters
174 |     ----------
175 |     hh_marg : pandas.DataFrame
176 |         processed and properly indexed household marginals table
177 |     p_marg : pandas.DataFrame
178 |         processed and properly indexed person marginals table
179 |     hh_sample : pandas.DataFrame
180 |         household sample table
181 |     p_sample : pandas.DataFrame
182 |         person sample table
183 |     xwalk : list of tuples
184 |         list of marginal-to-sample geography crosswalks to iterate over
185 |     cores : integer, optional
186 |         number of cores to use in the multiprocessing pool. defaults to
187 |         multiprocessing.cpu_count() - 1
188 |     Returns
189 |     -------
190 |     all_households : pandas.DataFrame
191 |         synthesized household records
192 |     all_persons : pandas.DataFrame
193 |         synthesized person records
194 |     all_stats : pandas.DataFrame
195 |         chi-square and p-score values for each marginal geography drawn
196 |     """
197 |     cores = cores if cores else (multiprocessing.cpu_count()-1)
198 |     part = partial(synthesize_zone, hh_marg, p_marg, hh_sample, p_sample)
199 |     p = multiprocessing.Pool(cores)
200 |     results = p.map(part, list(xwalk))
201 |     p.close()
202 |     p.join()
203 | 
204 |     hh_list = [result[0] for result in results]
205 |     people_list = [result[1] for result in results]
206 |     all_stats = pd.DataFrame([result[2] for result in results])
207 |     all_households = pd.concat(hh_list)
208 |     all_persons = pd.concat(people_list)
209 |     all_households, all_persons = synch_hhids(all_households, all_persons)
210 |     return all_households, all_persons,  all_stats
211 | 


--------------------------------------------------------------------------------