├── pyproject.toml
├── LICENSE
├── README.md
├── .gitignore
├── crosswalks
    ├── judicial_districts
    │   ├── population_by_district_acs2018_5yr.csv
    │   ├── COUNTY_DISTRICT_README.md
    │   ├── population_by_district.ipynb
    │   └── race_by_district.ipynb
    └── zip_to_zcta
    │   ├── ZIP_ZCTA_README.md
    │   └── build_crosswalk.ipynb
├── chicago_2010pop_by_2020policedistricts.csv
├── generalized
    └── GENERALIZED_README.md
├── BOUNDARIES.md
└── requirements.txt


/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "acs-aggregate"
 3 | version = "0.1.0"
 4 | description = "Tools to help aggregate American Community Survey data to non-Census geographies"
 5 | authors = ["Joe Germuska <JoeGermuska@Northwestern.edu>"]
 6 | license = "MIT"
 7 | 
 8 | [tool.poetry.dependencies]
 9 | python = "^3.9"
10 | pandas = "^1.4.2"
11 | requests = "^2.24.0"
12 | cenpy = {version = "^1.0.0", extras = ["python-Levenshtein"]}
13 | python-Levenshtein = "^0.12.0"
14 | census-data-aggregator = "^0.0.6"
15 | 
16 | [tool.poetry.dev-dependencies]
17 | jupyterlab = "^2.2.10"
18 | folium = "^0.11.0"
19 | 
20 | [build-system]
21 | requires = ["poetry>=0.12"]
22 | build-backend = "poetry.masonry.api"
23 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Census Reporter
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 |  # acs-aggregate
 2 | 
 3 | Tools to help aggregate American Community Survey data to non-Census ("custom") geographies.
 4 | 
 5 | A common problem for journalists and other analysts is wishing that the Census Bureau tabulated American Community Survey (ACS) data for locally meaningful geographies, such as neighborhoods, wards, or police districts. This project aims to make that as easy as possible, while acknowledging that there are some wrinkles.
 6 | 
 7 | * See the `crosswalks` directory for crosswalks for specific geography types.
 8 | * See `generalized` for examples of a general method (with a worked example). 
 9 | * see BOUNDARIES.md for a randomly assembled list of available GIS data which might be the kinds of things for which people would want to use this.
10 | 
11 | The longer term goal is to make this as automated as possible, but we're still getting a sense of the problem. We welcome discussion, or even just expressions of interest and votes of confidence.
12 | 
13 | To read: [Target‐Density Weighting Interpolation and Uncertainty Evaluation for Temporal Analysis of Census Data](https://onlinelibrary.wiley.com/doi/full/10.1111/j.1538-4632.2007.00706.x), which may provide insights on whether these methods are well-designed.
14 | 
15 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | work
  2 | crosswalks/zip_to_zcta/tl_2019_us_zcta510.zip
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | pip-wheel-metadata/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | *.py,cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | local_settings.py
 64 | db.sqlite3
 65 | db.sqlite3-journal
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | target/
 79 | 
 80 | # Jupyter Notebook
 81 | .ipynb_checkpoints
 82 | 
 83 | # IPython
 84 | profile_default/
 85 | ipython_config.py
 86 | 
 87 | # pyenv
 88 | .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 98 | __pypackages__/
 99 | 
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 | 
104 | # SageMath parsed files
105 | *.sage.py
106 | 
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 | 
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 | 
120 | # Rope project settings
121 | .ropeproject
122 | 
123 | # mkdocs documentation
124 | /site
125 | 
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 | 
131 | # Pyre type checker
132 | .pyre/
133 | 


--------------------------------------------------------------------------------
/crosswalks/judicial_districts/population_by_district_acs2018_5yr.csv:
--------------------------------------------------------------------------------
 1 | state,district,total_pop
 2 | Alabama,Middle,1151252
 3 | Alabama,Northern,2870454
 4 | Alabama,Southern,842974
 5 | Alaska,Alaska,738516
 6 | Arizona,Arizona,6946685
 7 | Arkansas,Eastern,1639567
 8 | Arkansas,Western,1351104
 9 | California,Central,19354238
10 | California,Eastern,7993050
11 | California,Northern,8318423
12 | California,Southern,3483049
13 | Colorado,Colorado,5531141
14 | Connecticut,Connecticut,3581504
15 | Delaware,Delaware,949495
16 | District of Columbia,District of Columbia,684498
17 | Florida,Middle,11853354
18 | Florida,Northern,1840687
19 | Florida,Southern,6904098
20 | Georgia,Middle,2008351
21 | Georgia,Northern,6716197
22 | Georgia,Southern,1572936
23 | Hawaii,Hawaii,1422029
24 | Idaho,Idaho,1687809
25 | Illinois,Central,2220390
26 | Illinois,Northern,9340002
27 | Illinois,Southern,1261105
28 | Indiana,Northern,2589824
29 | Indiana,Southern,4047602
30 | Iowa,Northern,1325394
31 | Iowa,Southern,1807105
32 | Kansas,Kansas,2908776
33 | Kentucky,Eastern,2202959
34 | Kentucky,Western,2237245
35 | Louisiana,Eastern,1674111
36 | Louisiana,Middle,829642
37 | Louisiana,Western,2159863
38 | Maine,Maine,1332813
39 | Maryland,Maryland,6003435
40 | Massachusetts,Massachusetts,6830193
41 | Michigan,Eastern,6461168
42 | Michigan,Western,3496320
43 | Minnesota,Minnesota,5527358
44 | Mississippi,northern,1114229
45 | Mississippi,southern,1874533
46 | Missouri,Eastern,2927578
47 | Missouri,Western,3162484
48 | Montana,Montana,1041732
49 | Nebraska,Nebraska,1904760
50 | Nevada,Nevada,2922849
51 | New Hampshire,New Hampshire,1343622
52 | New Jersey,New Jersey,8881845
53 | New Mexico,New Mexico,2092434
54 | New York,Eastern,8217826
55 | New York,Northern,3396820
56 | New York,Southern,5209255
57 | New York,Western,2794552
58 | North Carolina,Eastern,3999741
59 | North Carolina,Middle,2952469
60 | North Carolina,Western,3203414
61 | North Dakota,North Dakota,752201
62 | Ohio,Northern,5733949
63 | Ohio,Southern,5907930
64 | Oklahoma,Eastern,748060
65 | Oklahoma,Northern,1060029
66 | Oklahoma,Western,2110048
67 | Oregon,Oregon,4081943
68 | Pennsylvania,Eastern,5723256
69 | Pennsylvania,Middle,3325682
70 | Pennsylvania,Western,3742243
71 | Puerto Rico,Puerto Rico,3386941
72 | Rhode Island,Rhode Island,1056611
73 | South Carolina,South Carolina,4955925
74 | South Dakota,South Dakota,864289
75 | Tennessee,Eastern,2601077
76 | Tennessee,Middle,2478940
77 | Tennessee,Western,1571072
78 | Texas,Eastern,3923823
79 | Texas,Northern,7252194
80 | Texas,Southern,9611091
81 | Texas,Western,7098087
82 | Utah,Utah,3045350
83 | Vermont,Vermont,624977
84 | Virginia,Eastern,6097466
85 | Virginia,Western,2316308
86 | Washington,Eastern,1584162
87 | Washington,Western,5710174
88 | West Virginia,Northern,869001
89 | West Virginia,Southern,960053
90 | Wisconsin,Eastern,3405147
91 | Wisconsin,Western,2373247
92 | Wyoming,Wyoming,581836
93 | 


--------------------------------------------------------------------------------
/chicago_2010pop_by_2020policedistricts.csv:
--------------------------------------------------------------------------------
 1 | dist_num,P003001,P003002,P003003,P003004,P003005,P003006,P003007,P003008,P005001,P005002,P005003,P005004,P005005,P005006,P005007,P005008,P005009,P005010,P005011,P005012,P005013,P005014,P005015,P005016,P005017
 2 | 1,62781,35208,13657,138,10835,29,1088,1826,62781,59015,32952,13452,95,10790,22,157,1547,3766,2256,205,43,45,7,931,279
 3 | 2,95439,19189,66577,187,5867,13,963,2643,95439,92197,17747,65993,138,5837,13,202,2267,3242,1442,584,49,30,0,761,376
 4 | 3,75235,1654,71508,182,318,4,332,1237,75235,74112,1472,71010,169,312,4,98,1047,1123,182,498,13,6,0,234,190
 5 | 4,123575,26117,77303,689,311,31,16378,2746,123575,88194,9925,76399,223,258,29,138,1222,35381,16192,904,466,53,2,16240,1524
 6 | 5,74396,1629,70429,166,41,10,1202,919,74396,71872,843,70064,118,38,8,55,746,2524,786,365,48,3,2,1147,173
 7 | 6,90841,446,88938,182,61,7,269,938,90841,89927,312,88525,164,61,6,60,799,914,134,413,18,0,1,209,139
 8 | 7,71071,511,69202,149,62,9,435,703,71071,69904,262,68787,136,56,8,49,606,1167,249,415,13,6,1,386,97
 9 | 8,247373,118778,53462,1632,2168,61,64904,6368,247373,107519,51491,52219,247,2001,18,231,1312,139854,67287,1243,1385,167,43,64673,5056
10 | 9,165201,65820,19860,1153,26106,35,47743,4484,165201,70591,24307,19044,173,25894,16,146,1011,94610,41513,816,980,212,19,47597,3473
11 | 10,118093,38171,40080,1099,321,16,35543,2863,118093,44652,4300,39440,144,239,3,121,405,73441,33871,640,955,82,13,35422,2458
12 | 11,70474,4673,60385,174,356,18,3782,1086,70474,62356,1664,59671,73,329,4,46,569,8118,3009,714,101,27,14,3736,517
13 | 12,127869,69537,23781,839,8290,94,21374,3954,127869,85540,51775,23039,185,8148,49,273,2071,42329,17762,742,654,142,45,21101,1883
14 | 14,117738,75162,9448,828,3779,68,23759,4694,117738,63699,49809,8027,189,3604,33,236,1801,54039,25353,1421,639,175,35,23523,2893
15 | 15,59458,1571,55861,144,187,7,1041,647,59458,57193,959,55468,107,174,5,24,456,2265,612,393,37,13,2,1017,191
16 | 16,199476,162554,2438,854,10920,99,17142,5469,199476,154042,138043,2022,299,10711,44,266,2657,45434,24511,416,555,209,55,16876,2812
17 | 17,144096,84601,5625,1024,17692,85,27794,7275,144096,81864,55743,4782,321,17373,48,409,3188,62232,28858,843,703,319,37,27385,4087
18 | 18,117041,92302,10875,153,9837,49,1455,2370,117041,111235,88418,10681,108,9775,39,237,1977,5806,3884,194,45,62,10,1218,393
19 | 19,200786,161709,13753,558,12426,85,7031,5224,200786,180761,150551,13305,308,12277,67,341,3912,20025,11158,448,250,149,18,6690,1312
20 | 20,91279,57451,10230,510,12938,34,6922,3194,91279,74760,49420,9909,223,12792,27,251,2138,16519,8031,321,287,146,7,6671,1056
21 | 22,101941,36750,62510,165,366,27,686,1437,101941,98623,34863,62094,112,352,25,82,1095,3318,1887,416,53,14,2,604,342
22 | 24,141038,73653,26057,780,20954,72,13795,5727,141038,111292,60488,25322,351,20796,45,466,3824,29746,13165,735,429,158,27,13329,1903
23 | 25,200391,85347,35629,1731,3329,160,66851,7344,200391,67386,29371,33033,214,3086,44,339,1299,133005,55976,2596,1517,243,116,66512,6045
24 | 31,24266,21756,129,43,1227,6,744,361,24266,22036,20452,116,23,1205,3,23,214,2230,1304,13,20,22,3,721,147
25 | 


--------------------------------------------------------------------------------
/crosswalks/judicial_districts/COUNTY_DISTRICT_README.md:
--------------------------------------------------------------------------------
 1 | # US Federal Court District to US County Crosswalk
 2 | 
 3 | As part of our work with the [SCALES](https://scales-okn.org/) project, we set out to create a crosswalk which would support analysis of Census and other data by US Federal Court District.
 4 | 
 5 | The districts are established by statute, specifically, [Title 28, United States Code, Chapter 5](https://www.law.cornell.edu/uscode/text/28/part-I/chapter-5). Generally, each US county is in exactly one District. There are a few special cases such as with waterways around New York City, a Federal Correctional Institution in North Carolina, and the like, but those are disregarded for the purposes of creating this cross-reference.
 6 | 
 7 | Some Federal Court Districts are split into "Divisions" by the statute. There are also cases of local (non-statutory) rules creating Divisions in Federal Court Districts. Statutory divisions are included in this data set -- if a District has Divisions, each county is in exactly one. Local-rule Divisions are **not included** in this data. 
 8 | 
 9 | ## The data file
10 | 
11 | The [crosswalk](county_district_xref.csv) is a UTF-8 encoded CSV file with one row per US county or county-equivalent. Each row has the following columns, each of which should be treated as text, even though some have only digits:
12 | 
13 | * geoid - a US Census Bureau [GEOID](https://www.census.gov/programs-surveys/geography/guidance/geo-identifiers.html). Each value is unique in this file. 
14 | * state_fips - a two-digit [state FIPS code](https://en.wikipedia.org/wiki/Federal_Information_Processing_Standard_state_code#FIPS_state_codes)
15 | * county_fips - a three-digit [county FIPS code](https://en.wikipedia.org/wiki/List_of_United_States_FIPS_codes_by_county)
16 | * state - the state's name, in text
17 | * county - the county or county-equivalent's name
18 | * district - the name of the district, or the state name if the state is not divided into districts
19 | * division - the name of the division, or blank if the district does not have statutory divisions
20 | * statute_url - a link to the Cornell LII version of the statute for the given state, in case one wants to validate/review the data
21 | 
22 | We welcome input from people with expertise about whether there's a more systematic way to represent the districts and divisions, such as numeric or coded identifiers. 
23 | 
24 | (We've since learned about [a GIS file of districts](https://hifld-geoplatform.opendata.arcgis.com/datasets/us-district-court-jurisdictions) which includes identifiers. In the future we may either use that file to create the crosswalk, or at least integrate its IDs to make it easier to create maps based on aggregated data. See also [this nice interactive javascript map](https://observablehq.com/@caged/the-united-states-courts-of-appeals-and-district-courts) of the districts and counties...)
25 | 
26 | ## Using this crosswalk
27 | 
28 | This repository includes two notebooks demonstrating how you can use the crosswalk with python code to aggregate ACS data by Judicial District:
29 | 
30 | * [population_by_district.ipynb](population_by_district.ipynb) - a simple case to get the estimated total population for each district. If you just want that data, download [population_by_district_acs2018_5yr.csv](population_by_district_acs2018_5yr.csv)
31 | * [race_by_district.ipynb](race_by_district.ipynb) - a more involved example which also shows how to account for the aggregated margin of error, and how to test the reliability of the aggregates.
32 | 
33 | Of course, you don't have to use python to use the crosswalk, but it's our working language, so it was easiest to use for demonstration. We'll gladly link to examples using `R` or other tools.
34 | 
35 | ## More on the method
36 | 
37 | The crosswalk here is not purely created by code. We were able to match most of the counties with a scraper (available in this [Google Colab notebook](https://colab.research.google.com/drive/1ghrzwtNhwlN6E3GBH8N5zqP9cAOPOGd0#scrollTo=LtDXNodX4KO9)), but at a certain point, it didn't seem worth working through formatting peculiarities, misspellings in the statute, and annoying nuances of regular expressions. 
38 | 
39 | We are particularly grateful to Mary Catherine Talbott, University of Richmond Law Student, Class of 2022, for careful review of the cities of Virginia, which, while treated as "county-equivalents" by the Census, are not specifically enumerated in the statute.
40 | 


--------------------------------------------------------------------------------
/generalized/GENERALIZED_README.md:
--------------------------------------------------------------------------------
 1 | # Towards a Generalized Tool for Aggregating ACS Data for non-Census Geographies
 2 | 
 3 | A common problem for journalists and other analysts is wishing that the Census Bureau tabulated American Community Survey (ACS) data for locally meaningful geographies, such as neighborhoods, wards, or police districts. 
 4 | 
 5 | The [jupyter notebook](notebook.ipynb) in this directory provides working python code which does this, and demonstrates its use to create datasets of ACS estimates for Chicago Police districts.  An obvious next step would be to factor the code out of a notebook into a reusable library.  
 6 | 
 7 | It would be even more convenient to provide this as a web-hosted service, but we have some concerns about the system resources to support whatever requests people might bring. But it's still something to consider.
 8 | 
 9 | 
10 | ## Method
11 | 
12 | Without access to individual Census responses, the only way to obtain Census data for custom geographies is to map Census geographies to your custom geographies and add up the figures. This is relatively straightforward, unless the Census geographies are split between two or more custom geographies. 
13 | 
14 | While census blocks vary in size, [more than half of them are smaller than 0.1 sq. miles](http://proximityone.com/geo_blocks.htm). Smaller census blocks are less likely to cross boundaries of custom geographies, and since they also, generally, have smaller populations, the inaccuracy introduced by treating them as they are not split is usually tolerable.
15 | 
16 | However, block-level data is only provided for the Decennial Census. The ACS, which is released every year, and which also includes many topics not covered by the Decennial Census, uses the *block group* as its smallest geography. While small, block groups still contain dozens of blocks or more, increasing the likelihood of distorting the data if block groups are simply assigned to a single custom geography when they are, in reality, split between two or more. 
17 | 
18 | To address this issue, when a block group is split, we allocate its data to each segment segment in proportion to the population of that segment, based on the block-level population of the most recent Decennial Census. (For certain data, using the block-level housing unit count is more appropriate.)
19 | 
20 | It's actually still an open question as to whether it would be better to use block groups or tracts as the building block of aggregate data. Block groups, being smaller, might seem to provide closer alignment. However, because the ACS is a survey, smaller geographies tend to have larger margins of error, especially for small sub-populations. ACS census tract level data may reduce some of that uncertainty.  For now, this library supports using either.
21 | 
22 | 
23 | ## Status
24 | 
25 | Currently, there's a [Jupyter notebook](https://github.com/censusreporter/acs-aggregate/blob/master/notebook.ipynb) which explains the origin of the project and demonstrates the basic method. If you have a "block assignment" file, you can use it now to pull ACS data for your custom geographies.
26 | 
27 | Next steps:
28 | 
29 | * Package the code in the notebook into a library
30 | * Make it easier to create a "block assignment file"
31 | * Address the caveats below
32 | 
33 | ### Caveats and limitations
34 | 
35 | * Right now, margin of error is simply disregarded. It would not be too hard to aggregate the margin of error as part of the process, but it sort of clutters up things by doubling the number of columns. At some point, I think I'd like to add an option to include aggregated MOE.
36 | 
37 | * The library is not equipped to aggregate median values such as "median household income". Folks at the LA Times have [done work in this area](https://github.com/datadesk/census-data-aggregator#approximating-medians) but applying it requires a slightly different API than the library currently uses. It's something I'd like to work on, though. [This post to the ACS Data Community](https://acsdatacommunity.prb.org/discussion-forum/f/forum/898/allocating-median-household-income-across-census-boundaries/2290#2290) recommends a more nuanced approach, and cautions against problems that come from assuming a symmetric income distribution.
38 | 
39 | * The library is not equipped to aggregate percent values. The ACS Subject tables and Data Profile tables have a mix of "total" and "percent" variables. It's probably possible to aggregate percentages, but I'm not clear on the method.
40 | 
41 | * Making a block assignment file is a lot of work, if not just out-of-reach, for most people. In a project which re-ignited this one, [John Keefe reported out cases](https://johnkeefe.net/chicago-race-and-ethnicity-data-by-police-district) where blocks themselves were split by custom geographies. Other approaches simply assign blocks to whatever custom geography contains their centroid, which should be automatable. A near-term future goal is to support creating the block assignment files using centroid-assignments. I'd imagined trying to make a web-tool to help with the review/assignment, but I'm not sure it's worth the considerable effort, especially for ACS data, which is always imprecise by its survey nature.
42 | 


--------------------------------------------------------------------------------
/crosswalks/zip_to_zcta/ZIP_ZCTA_README.md:
--------------------------------------------------------------------------------
 1 | # ZIP Code to ZCTA Crosswalk
 2 | 
 3 | While Census Reporter refers to ZIP codes in its interface, it's actually the case that American Community Survey (ACS) data is not available by ZIP code. Instead, the geography we call ZIP Code is a ZIP Code Tabulation Area, or ZCTA.
 4 | 
 5 | While folks commonly think of them as geographic areas, ZIP Codes actually identify a post office which handles delivering the mail to its final destination. While, in many cases, there's an implicit area that contains all of the addresses in that ZIP code, there are other ZIP codes where that doesn't work. There are ZIP codes which are only used for PO Boxes, and others which collect all the mail for a large business or organization that then handles the final delivery. In both of these cases, there's no straightforward way to draw them as an area on a map.
 6 | 
 7 | The key issue is that there are thousands of ZIP codes for which there is no corresponding ZCTA. And while the ACS data is tabulated by ZCTA, not ZIP code, there are other data sources which are at the ZIP Code level, not the ZCTA level, including Census programs like ZIP Code Business Patterns, or ZBP (part of the [County Business Patterns](https://www.census.gov/programs-surveys/cbp.html) program).
 8 | 
 9 | For a project where we wanted to integrate data from the ACS and the ZBP, we needed to come up with [a crosswalk assigning each ZIP Code to a ZCTA](zip_zcta_xref.csv). Since it was a bit of work, we wanted to share it with others who might need it. But, again, ZIP Codes change, so this file may go out of date. So we are also sharing the code and method in case you need to do it again with updated data files.
10 | 
11 | ## The data file
12 | 
13 | `zip_zcta_xref.csv` provides a crosswalk between ZIP Codes and ZCTAs. It was created using [build_crosswalk.ipynb](build_crosswalk.ipynb), a reimplementation of the method described below.
14 | 
15 | <table>
16 |     <tr>
17 |         <th>column</th>
18 |         <th>datatype</th>
19 |         <th>notes</th>
20 |     </tr>
21 |     <tr>
22 |         <td>zip_code</td>
23 |         <td>text</td>
24 |         <td>Source: Census Gazetteer, GeoNames or ZIP Code Business Patterns</td>
25 |     </tr>
26 |     <tr>
27 |         <td>zcta</td>
28 |         <td>text</td>
29 |         <td>the best available ZCTA for the ZIP Code, or null in a small number of cases</td>
30 |     </tr>
31 |     <tr>
32 |         <td>source</td>
33 |         <td>text</td>
34 |         <td>the original source of the ZIP Code in our data processing pipeline. You can probably ignore this.</td>
35 |     </tr>
36 |         
37 | </table>
38 | 
39 | ## Our method
40 | 
41 | Getting data about USPS ZIP codes is not exactly straightforward. The USPS does not provide a simple, free list. We used [a dataset](https://download.geonames.org/export/dump/US.zip) from [GeoNames](https://www.geonames.org/) as our master list. For a comprehensive list of ZCTAs, we used [the ZCTA file](https://www2.census.gov/geo/docs/maps-data/data/gazetteer/2017_Gazetteer/2017_Gaz_zcta_national.zip) from the [2017 Gazetteer files](https://www.census.gov/geographies/reference-files/time-series/geo/gazetteer-files.2017.html), although for this version, we use the Census's ZCTA ESRI Shapefile, which is just the Gazetteer plus geographic boundaries.
42 | 
43 | To the GeoNames master list, we added ZIP Codes in the ZIP Code Business Patterns data which weren't already in GeoNames. (See what we mean about it being hard without an authoritative master list?)
44 | 
45 | We began with the assumption that any ZIP Code which had a corresponding ZCTA (same 5-digit identifier) should be treated as the same as that ZCTA. This is not strictly true: the Census Bureau acknowledges that their method for assigning ZCTAs sometimes results in addresses being placed in a ZCTA that differs from the address's ZIP code. However, we didn't see any way we could feasibly deal with that issue.
46 | 
47 | After comparing the ZIP and ZCTA master lists, we identified nearly 8,000 ZIP Codes which do not have matching ZCTAs. 
48 | 
49 | The GeoNames dataset includes a geocode (latitude/longitude) for each ZIP code. It's not clear how those geocodes were assigned, so this is a leap of faith, but it's the best we had to go-on. We use GIS software to try locating non-ZCTA ZIP Codes in a ZCTA. It’s difficult to estimate what distortion might be introduced by this approach.
50 | 
51 | After the GIS analysis, we were left with about 100 ZIP Codes which were in GeoNames, and so had a geocode, but were not located in any ZCTA. There were also a few dozen ZIP codes which appeared in the ZBP dataset but which were not in either GeoNames or the ZCTA gazetteer. These were put to a manual review process. 
52 | 
53 | The process was thus: we loaded the ZIP Codes which weren't ZCTAs into [this Google Sheets document](https://docs.google.com/spreadsheets/d/1sbf-15PzHTnT6CsUMKcVnmhoHx-wKZ_PR-f_1WS5l5A/edit#gid=1978067583). We added a couple of columns to do Google Map searches: `point map url`, based on the latitude and longitude, if we had them, and `zip map url`, searching Google Maps for the ZIP Code.  We included ZIP Codes which were matched to ZCTAs by geocoding (as above), in case we wanted to spot check any of them, but we were focused on those which had no ZCTA.
54 | 
55 | For each row, the reviewer would load the point map. If the result was implausible, like many which came up in water, then the reviewer would load the zip code map. 
56 | 
57 | For plausible maps, the reviewer would right-click on the map to bring up the Google Maps context menu, and choose "What's Here?" If that gave further information that had a ZIP Code, we used it, otherwise we tried "What's Here" for very nearby points on the map until a ZIP Code was found.  
58 | 
59 | That ZIP Code was placed in the `result` column, which would then generate a link in the `census_reporter_check` column for that row. Clicking on that link would try to open the Census Reporter page for the ZCTA that was entered in the "result" column. If loading the page on Census Reporter errored, that was a sign that the ZIP Code found was not a ZCTA.  
60 | 
61 | For ZIPs which could not be resolved to ZCTAs, we used a `notes` column to provide more information.
62 | 
63 | After this process, only three ZIP Codes remained unresolved, and for each, plausible explanations were documented in the ‘notes’ column of the Google Sheets document. In the rebuilt process, documented here, we also turned up two new ZIP Codes from the ZBP data which haven't been manually reviewed.
64 | 
65 | We're very grateful to Caroline Dudlak, Medill '21, for her human review efforts.
66 | 
67 | 
68 | ## More info
69 | 
70 | * [US Census: ZIP Code Tabulation Areas (ZCTAs)](https://www.census.gov/programs-surveys/geography/guidance/geo-areas/zctas.html)
71 | * [What is the difference between ZIP code "boundaries" and ZCTA areas?](http://gis.washington.edu/phurvitz/zip_or_zcta/index.html) by Phil Hurvitz, University of Washington
72 | * [HUD USPS ZIP Code Crosswalk Files](https://www.huduser.gov/portal/datasets/usps_crosswalk.html) — we found this after we created our own, but it looks like this is regularly updated, and ZIP Codes change fairly frequently, so it is probably a better resource than ours. Registration is required, so that is a bit of friction.
73 | * HRSA [ZIP Code to ZCTA Crosswalk](https://data.hrsa.gov/DataDownload/GeoCareNavigator/ZIP%20Code%20to%20ZCTA%20Crosswalk.xlsx) (linked from [Health Center Program GeoCare Navigator](https://geocarenavigator.hrsa.gov/)) — this is a directly downloadable Excel file provided by the US Dept. of Health and Human Services, and it seems to be updated frequently.
74 | * [ZIP Codes by Area and District codes](https://postalpro.usps.com/ZIP_Locale_Detail), provided by the USPS, may be the most authoritative source. This is another we found after we did our work, and this one requires synthesizing data from different worksheets in an Excel file, but for many data users, that will be straightforward.
75 | 


--------------------------------------------------------------------------------
/BOUNDARIES.md:
--------------------------------------------------------------------------------
  1 | # Sources of Boundary Information
  2 | 
  3 | Often people who want to use a tool like `acs-aggregate` have a bootstrapping problem: where are the GIS files for the areas for which they want to aggregate data?
  4 | 
  5 | While this will probably be hard to keep up to date, and perhaps should be in some form other than markdown in this repository, let's take a stab at building a list. 
  6 | 
  7 | Since we started this, Census Reporter launched [a tool](https://censusreporter.org/2020/) which uses this kind of data, and which may list some options which are not included here.
  8 | 
  9 | *Since this is about aggregating Census data, let's stipulate that it should only include polygon data, not points and lines. And, only boundaries relevant to the United States and its territories. We'll organize it by state (or state-like.)*
 10 | 
 11 | ## US - United States
 12 | 
 13 | * [Home Owners' Loan Corporation (HOLC) "Redlining" maps](https://dsl.richmond.edu/panorama/redlining/#text=downloads) (also available for many specific cities)
 14 | * [US District Court Jurisdictions](https://hifld-geoplatform.opendata.arcgis.com/datasets/us-district-court-jurisdictions) (see also [`COUNTY_DISTRICT_README.md`](crosswalks/judicial_districts/COUNTY_DISTRICT_README.md))
 15 | 
 16 | ## AL - Alabama
 17 | 
 18 | * [Birmingham Neighborhoods](https://data.birminghamal.gov/dataset/gis-mapping-files/resource/bb378880-fdbb-40a2-89ef-27582adef3bc)
 19 | * [Huntsville City Council Districts](https://gis-huntsvilleal.opendata.arcgis.com/datasets/city-council-districts/explore)
 20 | 
 21 | ## CA - California 
 22 | 
 23 | * [Los Angeles County, CA Neighborhoods](https://apps.gis.ucla.edu/geodata/dataset/los-angeles-county-neighborhoods)
 24 | * [Oakland Community Police Beats](https://data.oaklandca.gov/dataset/Oakland-Community-Police-Beats/tp8r-5gzs)
 25 | * [Oakland Council Districts](https://data.oaklandca.gov/City-Government/City-of-Oakland-Council-Districts/g7vb-tiyh)
 26 | * [Sacramento, CA City Council Districts](https://data.cityofsacramento.org/datasets/28bd505c8e674a49ba5f782d0d806033_0/about)
 27 | * [San Francisco, CA Supervisor Districts (2012 Redistricting)](https://data.sfgov.org/Geographic-Locations-and-Boundaries/Current-Supervisor-Districts/keex-zmn4)
 28 | 
 29 | ## CO - Colorado
 30 | 
 31 | * [Various state-level files](https://demography.dola.colorado.gov/gis/gis-data/) including hospital, library, water, fire protection districts and more
 32 | * [Denver "Statistical Neighborhoods"](https://www.denvergov.org/opendata/dataset/city-and-county-of-denver-statistical-neighborhoods)
 33 | 
 34 | 
 35 | ## DC - Washington, DC
 36 | 
 37 | * [Washington, DC Neighborhood Clusters](https://opendata.dc.gov/datasets/f6c703ebe2534fc3800609a07bad8f5b_17)
 38 | 
 39 | ## FL - Florida
 40 | 
 41 | * [Orlando, FL Neighborhoods](https://orl.hub.arcgis.com/datasets/orlando-political-neighborhoods/explore?location=28.481107%2C-81.342842%2C11.31)
 42 | 
 43 | ## GA - Georgia
 44 | 
 45 | * [Atlanta Neighborhoods](https://dpcd-coaplangis.opendata.arcgis.com/datasets/neighborhood/)
 46 | * [Atlanta Neighborhood Planning Units](https://dpcd-coaplangis.opendata.arcgis.com/datasets/npu) (NPUs)
 47 | 
 48 | 
 49 | ## HI - Hawaii
 50 | 
 51 | * [Honolulu "Realtor Neighborhoods"](https://honolulu-cchnl.opendata.arcgis.com/datasets/neighborhoods-realtor-neighborhoods/)
 52 | 
 53 | ## IL - Illinois
 54 | 
 55 | * [Chicago Neighborhoods](https://data.cityofchicago.org/Facilities-Geographic-Boundaries/Boundaries-Neighborhoods/bbvz-uum9)
 56 | * [Chicago Community Areas](https://data.cityofchicago.org/Facilities-Geographic-Boundaries/Boundaries-Community-Areas-current-/cauq-8yn6)
 57 | * [Chicago Police Districts](https://data.cityofchicago.org/Public-Safety/Boundaries-Police-Districts/4dt9-88ua)
 58 | * [Chicago City Council Wards](https://data.cityofchicago.org/Facilities-Geographic-Boundaries/Boundaries-Wards-2015-/sp34-6z76)
 59 | 
 60 | ## IN - Indiana
 61 | 
 62 | * [Bloomington City Neighborhood Associations](https://data.bloomington.in.gov/dataset/city-neigbhorhoods-gis-data)
 63 | 
 64 | 
 65 | ## MA - Massachusetts
 66 | 
 67 | * [Boston Neighborhoods](https://data.boston.gov/dataset/boston-neighborhoods)
 68 | * [Cambridge CDD (Community Development Department) Neighborhoods](https://www.cambridgema.gov/GIS/gisdatadictionary/Boundary/BOUNDARY_CDDNeighborhoods)
 69 | 
 70 | ## MD - Maryland
 71 | 
 72 | * [Baltimore Neighborhoods](https://data.baltimorecity.gov/datasets/baltimore::neighborhoods/about)
 73 | 
 74 | 
 75 | ## MI - Michigan
 76 | 
 77 | * [Detroit neighborhood boundaries](https://data.detroitmi.gov/datasets/neighborhoods)
 78 | 
 79 | ## MN - Minnesota
 80 | 
 81 | * [Minneapolis, MN Neighborhoods](https://opendata.minneapolismn.gov/datasets/cityoflakes::minneapolis-neighborhoods/about)
 82 | 
 83 | ## MO - Missouri
 84 | 
 85 | * [Kansas City neighborhood boundaries](https://data.kcmo.org/Neighborhoods/Kansas-City-Neighborhood-Boundaries/q45j-ejyk)
 86 | * [St. Louis city shapefile data](https://www.stlouis-mo.gov/data/formats/format.cfm?id=21) (includes ward and neighborhood boundaries as well as other divisions)
 87 | 
 88 | ## NY - New York
 89 | 
 90 | * [New York City Community Districts](https://data.cityofnewyork.us/City-Government/Community-Districts/yfnk-k7r4)
 91 | * [New York City Council Districts](https://data.cityofnewyork.us/City-Government/City-Council-Districts/yusd-j4xi)
 92 | * [New York City Election Districts](https://data.cityofnewyork.us/City-Government/Election-Districts/h2n3-98hq)
 93 | * [New York City Neighborhood Tabulation Areas (NTAs)](https://data.cityofnewyork.us/City-Government/NTA-map/d3qk-pfyz) (note that an [NTA-census tract crosswalk](https://www1.nyc.gov/assets/planning/download/office/data-maps/nyc-population/census2010/nyc2010census_tabulation_equiv.xlsx) is available)
 94 | * [New York City School Districts](https://data.cityofnewyork.us/Education/School-Districts/r8nu-ymqj) (to the Census, NYC is one big district)
 95 | * [Syracuse, NY Common Council Districts](https://data.syrgov.net/datasets/881e71dbbea84de28b3fb0b840bc2067_0/explore?location=43.035052%2C-76.139450%2C13.41)
 96 | 
 97 | ## OH - Ohio
 98 | 
 99 | * [Cleveland City Neighborhoods (Statistical Planning Areas)](https://planning.clevelandohio.gov/maps/OpenData.php)
100 | 
101 | ## OK - Oklahoma
102 | 
103 | * [Oklahoma City Neighborhood Associations](https://data.okc.gov/portal/page/viewer?datasetName=Neighborhood%20Associations&view=map)
104 | * [Oklahoma City City Council Wards](https://data.okc.gov/portal/page/viewer?datasetName=City%20Council%20Wards&view=map)
105 | 
106 | ## OR - Oregon
107 | 
108 | * [Portland Neighborhood Association Boundaries](https://hub.arcgis.com/datasets/1ef75e34b8504ab9b14bef0c26cade2c_3)
109 | 
110 | ## PA - Pennsylvania
111 | 
112 | * [Philadelphia Neighborhoods](https://www.opendataphilly.org/dataset/philadelphia-neighborhoods)
113 | * [Pittsburgh Neighborhoods](https://data.wprdc.org/dataset/neighborhoods2)
114 | 
115 | ## RI - Rhode Island
116 | 
117 | * [Providence Neighborhoods](https://pvdgis.maps.arcgis.com/home/item.html?id=368395369304497090ddb33f5636da87)
118 | * [Providence Wards](https://pvdgis.maps.arcgis.com/home/item.html?id=36468e873abd482ba89aa58be9613ce0)
119 | 
120 | ## TX - Texas
121 | 
122 | * [Houston "Super Neighborhoods"](https://cohgis-mycity.opendata.arcgis.com/datasets/c3bfee99cbc14a899e4a603ee73203ee_3/)
123 | 
124 | ## VA - Virginia
125 | 
126 | * [Richmond, VA neighborhoods](https://data.richmondgov.com/Unique-and-Inclusive-Neighborhoods/Neighborhoods/e9k6-65id)
127 | 
128 | ## WA - Washington
129 | 
130 | * [Seattle Community Reporting Areas](http://data-seattlecitygis.opendata.arcgis.com/datasets/community-reporting-areas)
131 | * [Seattle "City Clerk" Neighborhoods](http://data-seattlecitygis.opendata.arcgis.com/datasets/city-clerk-neighborhoods)
132 | * [Seattle Council Districts](http://data-seattlecitygis.opendata.arcgis.com/datasets/council-districts)
133 | * [Spokane, WA Neighborhoods](https://data-spokane.opendata.arcgis.com/datasets/neighborhood-1/explore)
134 | 
135 | ---
136 | If you aren't finding what you're looking for above, here are some other resources which haven't been fully explored yet:
137 | 
138 | 
139 | * While it may or may not be current, the [GitHub repo](https://github.com/codeforgermany/click_that_hood/tree/main/public/data) for [Click That Hood](http://click-that-hood.com/) is worth a look if you don't find what you're looking for here -- and it goes far beyond the US as well.
140 | * The Big Ten Academic Alliance has a [Geoportal](https://geo.btaa.org/) with links to [geodata for a number of US municipalities](https://geo.btaa.org/?f%5Bdc_subject_sm%5D%5B%5D=Municipalities+geospatial+data)
141 | * For a while, Zillow offered neighborhood maps that they pulled together for their service. They no longer provide it, but I came across [this site](https://mapcruzin.com/free-download-neighborhood-boundary-shapefiles.htm) which seems to have archived them. They're organized by state, but for many states, there are only neighborhoods for a single city.
142 | * [Koordinates.com](https://Koordinates.com) is a geospatial data management platform which has aggregated GIS data from diverse sources. At the time of this writing, [a search for 'neighborhood'](https://koordinates.com/search/?q=neighborhood) gets well over 500 hits.
143 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | appdirs==1.4.4; python_version >= "3.7" \
  2 |     --hash=sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128 \
  3 |     --hash=sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41
  4 | attrs==21.4.0; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7" \
  5 |     --hash=sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4 \
  6 |     --hash=sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd
  7 | beautifulsoup4==4.11.1; python_full_version >= "3.6.0" and python_version >= "3.7" \
  8 |     --hash=sha256:58d5c3d29f5a36ffeb94f02f0d786cd53014cf9b3b3951d42e0080d8a9498d30 \
  9 |     --hash=sha256:ad9aa55b65ef2808eb405f46cf74df7fcb7044d5cbc26487f96eb2ef2e436693
 10 | cenpy==1.0.0.post4; python_version >= "3.5" \
 11 |     --hash=sha256:43d24ffbff6d1c2879a05499f2ac0776f10803524d466614cdec2cd3c9e9ff20
 12 | census-data-aggregator==0.0.6 \
 13 |     --hash=sha256:4443165f9e9fc00becb346e7af58868b4a0f80c77b3ca8eb1f468e35bf920f52 \
 14 |     --hash=sha256:cf527a1378aebe688584f5828a403778f73be5a74b2f04b9a13edc65c57db49e
 15 | certifi==2021.10.8; python_version >= "3.8" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.8" \
 16 |     --hash=sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569 \
 17 |     --hash=sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872
 18 | charset-normalizer==2.0.12; python_full_version >= "3.6.0" and python_version >= "3.7" \
 19 |     --hash=sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597 \
 20 |     --hash=sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df
 21 | click-plugins==1.1.1; python_version >= "3.7" \
 22 |     --hash=sha256:46ab999744a9d831159c3411bb0c79346d94a444df9a3a3742e9ed63645f264b \
 23 |     --hash=sha256:5d262006d3222f5057fd81e1623d4443e41dcda5dc815c06b442aa3c02889fc8
 24 | click==8.1.3; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version < "4" and python_version >= "3.7" \
 25 |     --hash=sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48 \
 26 |     --hash=sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e
 27 | cligj==0.7.2; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version < "4" and python_version >= "3.7" \
 28 |     --hash=sha256:c1ca117dbce1fe20a5809dc96f01e1c2840f6dcc939b3ddbb1111bf330ba82df \
 29 |     --hash=sha256:a4bc13d623356b373c2c27c53dbd9c68cae5d526270bfa71f6c6fa69669c6b27
 30 | colorama==0.4.4; python_version >= "3.7" and python_full_version < "3.0.0" and platform_system == "Windows" or platform_system == "Windows" and python_version >= "3.7" and python_full_version >= "3.5.0" \
 31 |     --hash=sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2 \
 32 |     --hash=sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b
 33 | fiona==1.8.21; python_version >= "3.7" \
 34 |     --hash=sha256:39c656421e25b4d0d73d0b6acdcbf9848e71f3d9b74f44c27d2d516d463409ae \
 35 |     --hash=sha256:43b1d2e45506e56cf3a9f59ba5d6f7981f3f75f4725d1e6cb9a33ba856371ebd \
 36 |     --hash=sha256:315e186cb880a8128e110312eb92f5956bbc54d7152af999d3483b463758d6f9 \
 37 |     --hash=sha256:9fb2407623c4f44732a33b3f056f8c58c54152b51f0324bf8f10945e711eb549 \
 38 |     --hash=sha256:b69054ed810eb7339d7effa88589afca48003206d7627d0b0b149715fc3fde41 \
 39 |     --hash=sha256:11532ccfda1073d3f5f558e4bb78d45b268e8680fd6e14993a394c564ddbd069 \
 40 |     --hash=sha256:3789523c811809a6e2e170cf9c437631f959f4c7a868f024081612d30afab468 \
 41 |     --hash=sha256:085f18d943097ac3396f3f9664ac1ae04ad0ff272f54829f03442187f01b6116 \
 42 |     --hash=sha256:388acc9fa07ba7858d508dfe826d4b04d813818bced16c4049de19cc7ca322ef \
 43 |     --hash=sha256:40b4eaf5b88407421d6c9e707520abd2ff16d7cd43efb59cd398aa41d2de332c \
 44 |     --hash=sha256:3a0edca2a7a070db405d71187214a43d2333a57b4097544a3fcc282066a58bfc
 45 | fuzzywuzzy==0.18.0; python_version >= "3.5" \
 46 |     --hash=sha256:928244b28db720d1e0ee7587acf660ea49d7e4c632569cad4f1cd7e68a5f0993 \
 47 |     --hash=sha256:45016e92264780e58972dca1b3d939ac864b78437422beecebb3095f8efd00e8
 48 | geopandas==0.10.2; python_version >= "3.7" \
 49 |     --hash=sha256:1722853464441b603d9be3d35baf8bde43831424a891e82a8545eb8997b65d6c \
 50 |     --hash=sha256:efbf47e70732e25c3727222019c92b39b2e0a66ebe4fe379fbe1aa43a2a871db
 51 | idna==3.3; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.7" \
 52 |     --hash=sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff \
 53 |     --hash=sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d
 54 | jinja2==3.1.2; python_version >= "3.7" \
 55 |     --hash=sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61 \
 56 |     --hash=sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852
 57 | libpysal==4.6.2; python_version >= "3.7" \
 58 |     --hash=sha256:dfb30f4ad8c882492571120487b246fbad19370bc9bb2bbc77c89d0fcddb0792 \
 59 |     --hash=sha256:8a4c4651394aefc6332f2fb1f38336c559e50dc89f977bfaa3d8541610eaa634
 60 | markupsafe==2.1.1; python_version >= "3.7" \
 61 |     --hash=sha256:86b1f75c4e7c2ac2ccdaec2b9022845dbb81880ca318bb7a0a01fbf7813e3812 \
 62 |     --hash=sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a \
 63 |     --hash=sha256:a49907dd8420c5685cfa064a1335b6754b74541bbb3706c259c02ed65b644b3e \
 64 |     --hash=sha256:10c1bfff05d95783da83491be968e8fe789263689c02724e0c691933c52994f5 \
 65 |     --hash=sha256:b7bd98b796e2b6553da7225aeb61f447f80a1ca64f41d83612e6139ca5213aa4 \
 66 |     --hash=sha256:b09bf97215625a311f669476f44b8b318b075847b49316d3e28c08e41a7a573f \
 67 |     --hash=sha256:694deca8d702d5db21ec83983ce0bb4b26a578e71fbdbd4fdcd387daa90e4d5e \
 68 |     --hash=sha256:efc1913fd2ca4f334418481c7e595c00aad186563bbc1ec76067848c7ca0a933 \
 69 |     --hash=sha256:4a33dea2b688b3190ee12bd7cfa29d39c9ed176bda40bfa11099a3ce5d3a7ac6 \
 70 |     --hash=sha256:dda30ba7e87fbbb7eab1ec9f58678558fd9a6b8b853530e176eabd064da81417 \
 71 |     --hash=sha256:671cd1187ed5e62818414afe79ed29da836dde67166a9fac6d435873c44fdd02 \
 72 |     --hash=sha256:3799351e2336dc91ea70b034983ee71cf2f9533cdff7c14c90ea126bfd95d65a \
 73 |     --hash=sha256:e72591e9ecd94d7feb70c1cbd7be7b3ebea3f548870aa91e2732960fa4d57a37 \
 74 |     --hash=sha256:6fbf47b5d3728c6aea2abb0589b5d30459e369baa772e0f37a0320185e87c980 \
 75 |     --hash=sha256:d5ee4f386140395a2c818d149221149c54849dfcfcb9f1debfe07a8b8bd63f9a \
 76 |     --hash=sha256:bcb3ed405ed3222f9904899563d6fc492ff75cce56cba05e32eff40e6acbeaa3 \
 77 |     --hash=sha256:e1c0b87e09fa55a220f058d1d49d3fb8df88fbfab58558f1198e08c1e1de842a \
 78 |     --hash=sha256:8dc1c72a69aa7e082593c4a203dcf94ddb74bb5c8a731e4e1eb68d031e8498ff \
 79 |     --hash=sha256:97a68e6ada378df82bc9f16b800ab77cbf4b2fada0081794318520138c088e4a \
 80 |     --hash=sha256:e8c843bbcda3a2f1e3c2ab25913c80a3c5376cd00c6e8c4a86a89a28c8dc5452 \
 81 |     --hash=sha256:0212a68688482dc52b2d45013df70d169f542b7394fc744c02a57374a4207003 \
 82 |     --hash=sha256:8e576a51ad59e4bfaac456023a78f6b5e6e7651dcd383bcc3e18d06f9b55d6d1 \
 83 |     --hash=sha256:4b9fe39a2ccc108a4accc2676e77da025ce383c108593d65cc909add5c3bd601 \
 84 |     --hash=sha256:96e37a3dc86e80bf81758c152fe66dbf60ed5eca3d26305edf01892257049925 \
 85 |     --hash=sha256:6d0072fea50feec76a4c418096652f2c3238eaa014b2f94aeb1d56a66b41403f \
 86 |     --hash=sha256:089cf3dbf0cd6c100f02945abeb18484bd1ee57a079aefd52cffd17fba910b88 \
 87 |     --hash=sha256:6a074d34ee7a5ce3effbc526b7083ec9731bb3cbf921bbe1d3005d4d2bdb3a63 \
 88 |     --hash=sha256:421be9fbf0ffe9ffd7a378aafebbf6f4602d564d34be190fc19a193232fd12b1 \
 89 |     --hash=sha256:fc7b548b17d238737688817ab67deebb30e8073c95749d55538ed473130ec0c7 \
 90 |     --hash=sha256:e04e26803c9c3851c931eac40c695602c6295b8d432cbe78609649ad9bd2da8a \
 91 |     --hash=sha256:b87db4360013327109564f0e591bd2a3b318547bcef31b468a92ee504d07ae4f \
 92 |     --hash=sha256:99a2a507ed3ac881b975a2976d59f38c19386d128e7a9a18b7df6fff1fd4c1d6 \
 93 |     --hash=sha256:56442863ed2b06d19c37f94d999035e15ee982988920e12a5b4ba29b62ad1f77 \
 94 |     --hash=sha256:3ce11ee3f23f79dbd06fb3d63e2f6af7b12db1d46932fe7bd8afa259a5996603 \
 95 |     --hash=sha256:33b74d289bd2f5e527beadcaa3f401e0df0a89927c1559c8566c066fa4248ab7 \
 96 |     --hash=sha256:43093fb83d8343aac0b1baa75516da6092f58f41200907ef92448ecab8825135 \
 97 |     --hash=sha256:8e3dcf21f367459434c18e71b2a9532d96547aef8a871872a5bd69a715c15f96 \
 98 |     --hash=sha256:d4306c36ca495956b6d568d276ac11fdd9c30a36f1b6eb928070dc5360b22e1c \
 99 |     --hash=sha256:46d00d6cfecdde84d40e572d63735ef81423ad31184100411e6e3388d405e247 \
100 |     --hash=sha256:7f91197cc9e48f989d12e4e6fbc46495c446636dfc81b9ccf50bb0ec74b91d4b
101 | munch==2.5.0; python_version >= "3.7" \
102 |     --hash=sha256:6f44af89a2ce4ed04ff8de41f70b226b984db10a91dcc7b9ac2efc1c77022fdd \
103 |     --hash=sha256:2d735f6f24d4dba3417fa448cae40c6e896ec1fdab6cdb5e6510999758a4dbd2
104 | numpy==1.22.3 \
105 |     --hash=sha256:92bfa69cfbdf7dfc3040978ad09a48091143cffb778ec3b03fa170c494118d75 \
106 |     --hash=sha256:8251ed96f38b47b4295b1ae51631de7ffa8260b5b087808ef09a39a9d66c97ab \
107 |     --hash=sha256:48a3aecd3b997bf452a2dedb11f4e79bc5bfd21a1d4cc760e703c31d57c84b3e \
108 |     --hash=sha256:a3bae1a2ed00e90b3ba5f7bd0a7c7999b55d609e0c54ceb2b076a25e345fa9f4 \
109 |     --hash=sha256:f950f8845b480cffe522913d35567e29dd381b0dc7e4ce6a4a9f9156417d2430 \
110 |     --hash=sha256:08d9b008d0156c70dc392bb3ab3abb6e7a711383c3247b410b39962263576cd4 \
111 |     --hash=sha256:201b4d0552831f7250a08d3b38de0d989d6f6e4658b709a02a73c524ccc6ffce \
112 |     --hash=sha256:f8c1f39caad2c896bc0018f699882b345b2a63708008be29b1f355ebf6f933fe \
113 |     --hash=sha256:568dfd16224abddafb1cbcce2ff14f522abe037268514dd7e42c6776a1c3f8e5 \
114 |     --hash=sha256:3ca688e1b9b95d80250bca34b11a05e389b1420d00e87a0d12dc45f131f704a1 \
115 |     --hash=sha256:e7927a589df200c5e23c57970bafbd0cd322459aa7b1ff73b7c2e84d6e3eae62 \
116 |     --hash=sha256:07a8c89a04997625236c5ecb7afe35a02af3896c8aa01890a849913a2309c676 \
117 |     --hash=sha256:2c10a93606e0b4b95c9b04b77dc349b398fdfbda382d2a39ba5a822f669a0123 \
118 |     --hash=sha256:fade0d4f4d292b6f39951b6836d7a3c7ef5b2347f3c420cd9820a1d90d794802 \
119 |     --hash=sha256:5bfb1bb598e8229c2d5d48db1860bcf4311337864ea3efdbe1171fb0c5da515d \
120 |     --hash=sha256:97098b95aa4e418529099c26558eeb8486e66bd1e53a6b606d684d0c3616b168 \
121 |     --hash=sha256:fdf3c08bce27132395d3c3ba1503cac12e17282358cb4bddc25cc46b0aca07aa \
122 |     --hash=sha256:639b54cdf6aa4f82fe37ebf70401bbb74b8508fddcf4797f9fe59615b8c5813a \
123 |     --hash=sha256:c34ea7e9d13a70bf2ab64a2532fe149a9aced424cd05a2c4ba662fd989e3e45f \
124 |     --hash=sha256:dbc7601a3b7472d559dc7b933b18b4b66f9aa7452c120e87dfb33d02008c8a18
125 | packaging==21.3; python_version >= "3.7" \
126 |     --hash=sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522 \
127 |     --hash=sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb
128 | pandas==1.4.2; python_version >= "3.8" \
129 |     --hash=sha256:be67c782c4f1b1f24c2f16a157e12c2693fd510f8df18e3287c77f33d124ed07 \
130 |     --hash=sha256:5a206afa84ed20e07603f50d22b5f0db3fb556486d8c2462d8bc364831a4b417 \
131 |     --hash=sha256:0010771bd9223f7afe5f051eb47c4a49534345dfa144f2f5470b27189a4dd3b5 \
132 |     --hash=sha256:3228198333dd13c90b6434ddf61aa6d57deaca98cf7b654f4ad68a2db84f8cfe \
133 |     --hash=sha256:5b79af3a69e5175c6fa7b4e046b21a646c8b74e92c6581a9d825687d92071b51 \
134 |     --hash=sha256:5586cc95692564b441f4747c47c8a9746792e87b40a4680a2feb7794defb1ce3 \
135 |     --hash=sha256:061609334a8182ab500a90fe66d46f6f387de62d3a9cb9aa7e62e3146c712167 \
136 |     --hash=sha256:b8134651258bce418cb79c71adeff0a44090c98d955f6953168ba16cc285d9f7 \
137 |     --hash=sha256:df82739e00bb6daf4bba4479a40f38c718b598a84654cbd8bb498fd6b0aa8c16 \
138 |     --hash=sha256:385c52e85aaa8ea6a4c600a9b2821181a51f8be0aee3af6f2dcb41dafc4fc1d0 \
139 |     --hash=sha256:295872bf1a09758aba199992c3ecde455f01caf32266d50abc1a073e828a7b9d \
140 |     --hash=sha256:95c1e422ced0199cf4a34385ff124b69412c4bc912011ce895582bee620dfcaa \
141 |     --hash=sha256:5c54ea4ef3823108cd4ec7fb27ccba4c3a775e0f83e39c5e17f5094cb17748bc \
142 |     --hash=sha256:c072c7f06b9242c855ed8021ff970c0e8f8b10b35e2640c657d2a541c5950f59 \
143 |     --hash=sha256:f549097993744ff8c41b5e8f2f0d3cbfaabe89b4ae32c8c08ead6cc535b80139 \
144 |     --hash=sha256:ff08a14ef21d94cdf18eef7c569d66f2e24e0bc89350bcd7d243dd804e3b5eb2 \
145 |     --hash=sha256:8c5bf555b6b0075294b73965adaafb39cf71c312e38c5935c93d78f41c19828a \
146 |     --hash=sha256:51649ef604a945f781105a6d2ecf88db7da0f4868ac5d45c51cb66081c4d9c73 \
147 |     --hash=sha256:d0d4f13e4be7ce89d7057a786023c461dd9370040bdb5efa0a7fe76b556867a0 \
148 |     --hash=sha256:09d8be7dd9e1c4c98224c4dfe8abd60d145d934e9fc1f5f411266308ae683e6a \
149 |     --hash=sha256:92bc1fc585f1463ca827b45535957815b7deb218c549b7c18402c322c7549a12
150 | pyparsing==3.0.8; python_full_version >= "3.6.8" and python_version >= "3.7" \
151 |     --hash=sha256:ef7b523f6356f763771559412c0d7134753f037822dad1b16945b7b846f7ad06 \
152 |     --hash=sha256:7bf433498c016c4314268d95df76c81b842a4cb2b276fa3312cfb1e1d85f6954
153 | pyproj==3.3.1; python_version >= "3.8" \
154 |     --hash=sha256:473961faef7a9fd723c5d432f65220ea6ab3854e606bf84b4d409a75a4261c78 \
155 |     --hash=sha256:2fef9c1e339f25c57f6ae0558b5ab1bbdf7994529a30d8d7504fc6302ea51c03 \
156 |     --hash=sha256:140fa649fedd04f680a39f8ad339799a55cb1c49f6a84e1b32b97e49646647aa \
157 |     --hash=sha256:b59c08aea13ee428cf8a919212d55c036cc94784805ed77c8f31a4d1f541058c \
158 |     --hash=sha256:1adc9ccd1bf04998493b6a2e87e60656c75ab790653b36cfe351e9ef214828ed \
159 |     --hash=sha256:42eea10afc750fccd1c5c4ba56de29ab791ab4d83c1f7db72705566282ac5396 \
160 |     --hash=sha256:531ea36519fa7b581466d4b6ab32f66ae4dadd9499d726352f71ee5e19c3d1c5 \
161 |     --hash=sha256:67025e37598a6bbed2c9c6c9e4c911f6dd39315d3e1148ead935a5c4d64309d5 \
162 |     --hash=sha256:aed1a3c0cd4182425f91b48d5db39f459bc2fe0d88017ead6425a1bc85faee33 \
163 |     --hash=sha256:3cc4771403db54494e1e55bca8e6d33cde322f8cf0ed39f1557ff109c66d2cd1 \
164 |     --hash=sha256:c99f7b5757a28040a2dd4a28c9805fdf13eef79a796f4a566ab5cb362d10630d \
165 |     --hash=sha256:5dac03d4338a4c8bd0f69144c527474f517b4cbd7d2d8c532cd8937799723248 \
166 |     --hash=sha256:56b0f9ee2c5b2520b18db30a393a7b86130cf527ddbb8c96e7f3c837474a9d79 \
167 |     --hash=sha256:5f92d8f6514516124abb714dce912b20867831162cfff9fae2678ef07b6fcf0f \
168 |     --hash=sha256:1ef1bfbe2dcc558c7a98e2f1836abdcd630390f3160724a6f4f5c818b2be0ad5 \
169 |     --hash=sha256:5ca5f32b56210429b367ca4f9a57ffe67975c487af82e179a24370879a3daf68 \
170 |     --hash=sha256:aba199704c824fb84ab64927e7bc9ef71e603e483130ec0f7e09e97259b8f61f \
171 |     --hash=sha256:120d45ed73144c65e9677dc73ba8a531c495d179dd9f9f0471ac5acc02d7ac4b \
172 |     --hash=sha256:52efb681647dfac185cc655a709bc0caaf910031a0390f816f5fc8ce150cbedc \
173 |     --hash=sha256:5ab0d6e38fda7c13726afacaf62e9f9dd858089d67910471758afd9cb24e0ecd \
174 |     --hash=sha256:45487942c19c5a8b09c91964ea3201f4e094518e34743cae373889a36e3d9260 \
175 |     --hash=sha256:797ad5655d484feac14b0fbb4a4efeaac0cf780a223046e2465494c767fd1c3b \
176 |     --hash=sha256:b3d8e14d91cc95fb3dbc03a9d0588ac58326803eefa5bbb0978d109de3304fbe
177 | python-dateutil==2.8.2; python_version >= "3.8" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version >= "3.8" \
178 |     --hash=sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86 \
179 |     --hash=sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9
180 | python-levenshtein==0.12.2 \
181 |     --hash=sha256:dc2395fbd148a1ab31090dd113c366695934b9e85fe5a4b2a032745efd0346f6
182 | pytz==2022.1; python_version >= "3.8" \
183 |     --hash=sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c \
184 |     --hash=sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7
185 | requests==2.27.1; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0") \
186 |     --hash=sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d \
187 |     --hash=sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61
188 | rtree==1.0.0; python_version >= "3.7" \
189 |     --hash=sha256:757bbf9ca38c241e34812a646f16ffda2cabd535bcd815041b83fe091df7a85c \
190 |     --hash=sha256:fe3954a51d691d3938cbac42ac97f4acacbea8ea622a375df901318a5c4ab0e9 \
191 |     --hash=sha256:24185f39b277aaca0566284858de02edc80dc7b120233be38fcf3b4c7d2e72dc \
192 |     --hash=sha256:b2110fb8675bf809bba431a1876ba76ca5dde829a4de40aa7851941452a01278 \
193 |     --hash=sha256:b0256ed9c27037892bcb7167e7f5c469ee7c5de38c5a895145e33c320584babe \
194 |     --hash=sha256:7f2c0bd3e7d4b68cc27ab605b18487440427d5febba5f4b747b694f9de601c6f \
195 |     --hash=sha256:c2b14f7603576b73a5e0fd2e35394db08c5ca3cfa41e4c8530128d91e5e43dd3 \
196 |     --hash=sha256:973ce22ee8bafa44b3df24c6bf78012e534e1f36103e0bbfbb193ec48e9be22a \
197 |     --hash=sha256:55b771e62b1e391a44776ef9f906944796213cc3cb48ffd6b22493684c68a859 \
198 |     --hash=sha256:0475b2e7fe813c427ceb21e57c22f8b4b7fee6e5966db8a200688163d4853f14 \
199 |     --hash=sha256:e436d8da7527655fd0512dd6a5218f604a3806849f3981ec0ca64930dc19b7f2 \
200 |     --hash=sha256:8d18efe4e69f6b7daee9aaced21e0218786209d55235c909c78dbc5c12368790 \
201 |     --hash=sha256:728cf9b774ed6f120f2ed072082431c14af8243d477656b5b7dc1ff855fe7786 \
202 |     --hash=sha256:3e28303d84f8b5509e26db7c2aa533692a6112a430cc955a7a7e6d899c9d5996 \
203 |     --hash=sha256:062439d3a33d95281445960af76b6189b987cda0803fdc1818e31b68bce989d1 \
204 |     --hash=sha256:0ab0dccff665389329f8d2e623131a1af3ab82b6de570f8c494a429c129f3e65 \
205 |     --hash=sha256:44df5adc12841b94adcbc4e5aaada248e98a4dc2017c8c7060f9a782ef63e050 \
206 |     --hash=sha256:29a1a4452e334eaf3299c8b95f137a2ccafbccfd856041f612ec933eeafb2cf5 \
207 |     --hash=sha256:efdaf7137303af7a85ddd224bacdb27f9f7ece99e0dec627c900e12f22cdefd0 \
208 |     --hash=sha256:264e3b255a1fc6aaa2ddbcedfc15ac40578433f6b35a0c7aaba026215d91d8c3 \
209 |     --hash=sha256:26b2275ebc738cb6a0473c15d80fdfe820ef319015009f8f0789e586552cf411 \
210 |     --hash=sha256:825c1f74a84e9857657c04503c4c50b9f170114183fa2db9211a5d8650cf1ffa \
211 |     --hash=sha256:a91d7b514210ae93029c2a7ed83b2595ca73de5e08a9d87fcdf3a784a7b3ef54 \
212 |     --hash=sha256:0ffaa03d1f7e8291de7cd8a11f92e10579f145dc3a08cd46a9eea65cc7b42173 \
213 |     --hash=sha256:4f2f93c997de551a1a0fa4065e713270ad9a509aeeb143c5b46f332c0759f314 \
214 |     --hash=sha256:a48f46dbb6ab0cb135a43d90529e1fa09a6dd80149a34844f2adf8414b4ab71a \
215 |     --hash=sha256:171aa361b3542bf1e47bdee54c611644bb33d35502e2ceea57ac89cf35330554 \
216 |     --hash=sha256:bc18d4df3edb3b889b177ba39238770afdb5787fb803677c3aadea42a6931485 \
217 |     --hash=sha256:bc6e7384684a260eb2f04fcac64ca5ffe28876132a11d1a883db2a5db8becb64 \
218 |     --hash=sha256:d0483482121346b093b9a42518d40f921adf445915b7aea307eb26768c839682
219 | scipy==1.6.1; python_version >= "3.7" \
220 |     --hash=sha256:a15a1f3fc0abff33e792d6049161b7795909b40b97c6cc2934ed54384017ab76 \
221 |     --hash=sha256:e79570979ccdc3d165456dd62041d9556fb9733b86b4b6d818af7a0afc15f092 \
222 |     --hash=sha256:a423533c55fec61456dedee7b6ee7dce0bb6bfa395424ea374d25afa262be261 \
223 |     --hash=sha256:33d6b7df40d197bdd3049d64e8e680227151673465e5d85723b3b8f6b15a6ced \
224 |     --hash=sha256:6725e3fbb47da428794f243864f2297462e9ee448297c93ed1dcbc44335feb78 \
225 |     --hash=sha256:5fa9c6530b1661f1370bcd332a1e62ca7881785cc0f80c0d559b636567fab63c \
226 |     --hash=sha256:bd50daf727f7c195e26f27467c85ce653d41df4358a25b32434a50d8870fc519 \
227 |     --hash=sha256:f46dd15335e8a320b0fb4685f58b7471702234cba8bb3442b69a3e1dc329c345 \
228 |     --hash=sha256:0e5b0ccf63155d90da576edd2768b66fb276446c371b73841e3503be1d63fb5d \
229 |     --hash=sha256:2481efbb3740977e3c831edfd0bd9867be26387cacf24eb5e366a6a374d3d00d \
230 |     --hash=sha256:68cb4c424112cd4be886b4d979c5497fba190714085f46b8ae67a5e4416c32b4 \
231 |     --hash=sha256:5f331eeed0297232d2e6eea51b54e8278ed8bb10b099f69c44e2558c090d06bf \
232 |     --hash=sha256:0c8a51d33556bf70367452d4d601d1742c0e806cd0194785914daf19775f0e67 \
233 |     --hash=sha256:83bf7c16245c15bc58ee76c5418e46ea1811edcc2e2b03041b804e46084ab627 \
234 |     --hash=sha256:794e768cc5f779736593046c9714e0f3a5940bc6dcc1dba885ad64cbfb28e9f0 \
235 |     --hash=sha256:5da5471aed911fe7e52b86bf9ea32fb55ae93e2f0fac66c32e58897cfb02fa07 \
236 |     --hash=sha256:8e403a337749ed40af60e537cc4d4c03febddcc56cd26e774c9b1b600a70d3e4 \
237 |     --hash=sha256:a5193a098ae9f29af283dcf0041f762601faf2e595c0db1da929875b7570353f \
238 |     --hash=sha256:c4fceb864890b6168e79b0e714c585dbe2fd4222768ee90bc1aa0f8218691b11
239 | shapely==1.8.2; python_version >= "3.7" \
240 |     --hash=sha256:7c9e3400b716c51ba43eea1678c28272580114e009b6c78cdd00c44df3e325fa \
241 |     --hash=sha256:ce0b5c5f7acbccf98b3460eecaa40e9b18272b2a734f74fcddf1d7696e047e95 \
242 |     --hash=sha256:3a40bf497b57a6625b83996aed10ce2233bca0e5471b8af771b186d681433ac5 \
243 |     --hash=sha256:6bdc7728f1e5df430d8c588661f79f1eed4a2728c8b689e12707cfec217f68f8 \
244 |     --hash=sha256:a60861b5ca2c488ebcdc706eca94d325c26d1567921c74acc83df5e6913590c7 \
245 |     --hash=sha256:840be3f27a1152851c54b968f2e12d718c9f13b7acd51c482e58a70f60f29e31 \
246 |     --hash=sha256:c60f3758212ec480675b820b13035dda8af8f7cc560d2cc67999b2717fb8faef \
247 |     --hash=sha256:56413f7d32c70b63f239eb0865b24c0c61029e38757de456cc4ab3c416559a0b \
248 |     --hash=sha256:256bdf8080bb7bb504d47b2c76919ecebab9708cc1b26266b3ec32b42448f642 \
249 |     --hash=sha256:c0a0d7752b145343838bd36ed09382d85f5befe426832d7384c5b051c147acbd \
250 |     --hash=sha256:62056e64b12b6d483d79f8e34bf058d2fe734d51c9227c1713705399434eff3b \
251 |     --hash=sha256:8e3ed52a081da58eb4a885c157c594876633dbd4eb283f13ba5bf39c82322d76 \
252 |     --hash=sha256:7c8eda45085ccdd7f9805ea4a93fdd5eb0b6039a61d5f0cefb960487e6dc17a1 \
253 |     --hash=sha256:beee3949ddf381735049cfa6532fb234d5d20a5be910c4f2fb7c7295fd7960e3 \
254 |     --hash=sha256:e07b0bd2a0e61a8afd4d1c1bd23f3550b711f01274ffb53de99358fd781eefd8 \
255 |     --hash=sha256:78966332a89813b237de357a03f612fd451a871fe6e26c12b6b71645fe8eee39 \
256 |     --hash=sha256:8fe641f1f61b3d43dd61b5a85d2ef023e6e19bf8f204a5160a1cb1ec645cbc09 \
257 |     --hash=sha256:cec89a5617c0137f4678282e983c3d63bf838fb00cdf318cc555b4d8409f7130 \
258 |     --hash=sha256:68c8e18dc9dc8a198c3addc8c9596f64137101f566f04b96ecfca0b214cb8b12 \
259 |     --hash=sha256:f12695662c3ad1e6031b3de98f191963d0f09de6d1a4988acd907405644032ba \
260 |     --hash=sha256:15a856fbb588ad5d042784e00918c662902776452008c771ecba2ff615cd197a \
261 |     --hash=sha256:d74de394684d66e25e780b0359fda85be7766af85940fa2dfad728b1a815c71f \
262 |     --hash=sha256:d3f3fac625690f01f35af665649e993f15f924e740b5c0ac0376900655815521 \
263 |     --hash=sha256:1d95842cc6bbbeab673061b63e70b07be9a375c15a60f4098f8fbd29f43af1b4 \
264 |     --hash=sha256:a58e1f362f2091743e5e13212f5d5d16251a4bb63dd0ed587c652d3be9620d3a \
265 |     --hash=sha256:5254240eefc44139ab0d128faf671635d8bdd9c23955ee063d4d6b8f20073ae0 \
266 |     --hash=sha256:75042e8039c79dd01f102bb288beace9dc2f49fc44a2dea875f9b697aa8cd30d \
267 |     --hash=sha256:0c0fd457ce477b1dced507a72f1e2084c9191bfcb8a1e09886990ebd02acf024 \
268 |     --hash=sha256:6fcb28836ae93809de1dde73c03c9c24bab0ba2b2bf419ddb2aeb72c96d110e9 \
269 |     --hash=sha256:44d2832c1b706bf43101fda92831a083467cc4b4923a7ed17319ab599c1025d8 \
270 |     --hash=sha256:137f1369630408024a62ff79a437a5657e6c5b76b9cd352dde704b425acdb298 \
271 |     --hash=sha256:2e02da2e988e74d61f15c720f9f613fab51942aae2dfeacdcb78eadece00e1f3 \
272 |     --hash=sha256:3423299254deec075e79fb7dc7909d702104e4167149de7f45510c3a6342eeea \
273 |     --hash=sha256:572af9d5006fd5e3213e37ee548912b0341fb26724d6dc8a4e3950c10197ebb6
274 | six==1.16.0; python_version >= "3.8" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version >= "3.8" \
275 |     --hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 \
276 |     --hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926
277 | soupsieve==2.3.2.post1; python_full_version >= "3.6.0" and python_version >= "3.7" \
278 |     --hash=sha256:3b2503d3c7084a42b1ebd08116e5f81aadfaea95863628c80a3b774a11b7c759 \
279 |     --hash=sha256:fc53893b3da2c33de295667a0e19f078c14bf86544af307354de5fcf12a3f30d
280 | urllib3==1.26.9; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version < "4" and python_version >= "3.7" \
281 |     --hash=sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14 \
282 |     --hash=sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e
283 | 


--------------------------------------------------------------------------------
/crosswalks/judicial_districts/population_by_district.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Computing Total Population of each Judicial District\n",
  8 |     "\n",
  9 |     "This notebook demonstrates using the `county_district_xref.csv` crosswalk to aggregate data from the American Community Survey by Federal Judicial District."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import pandas as pd\n",
 19 |     "import cenpy                  # https://pypi.org/project/cenpy/ \n",
 20 |     "import census_data_aggregator # https://pypi.org/project/census-data-aggregator/"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {},
 26 |    "source": [
 27 |     "First, let's get the population for all counties from the most recent ACS"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 2,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "acs = cenpy.products.APIConnection('ACSDT5Y2018')\n",
 37 |     "county_pop = acs.query(cols=['GEO_ID','B01003_001E','B01003_001M'],geo_unit='county')"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 3,
 43 |    "metadata": {},
 44 |    "outputs": [
 45 |     {
 46 |      "data": {
 47 |       "text/html": [
 48 |        "<div>\n",
 49 |        "<style scoped>\n",
 50 |        "    .dataframe tbody tr th:only-of-type {\n",
 51 |        "        vertical-align: middle;\n",
 52 |        "    }\n",
 53 |        "\n",
 54 |        "    .dataframe tbody tr th {\n",
 55 |        "        vertical-align: top;\n",
 56 |        "    }\n",
 57 |        "\n",
 58 |        "    .dataframe thead th {\n",
 59 |        "        text-align: right;\n",
 60 |        "    }\n",
 61 |        "</style>\n",
 62 |        "<table border=\"1\" class=\"dataframe\">\n",
 63 |        "  <thead>\n",
 64 |        "    <tr style=\"text-align: right;\">\n",
 65 |        "      <th></th>\n",
 66 |        "      <th>GEO_ID</th>\n",
 67 |        "      <th>B01003_001E</th>\n",
 68 |        "      <th>B01003_001M</th>\n",
 69 |        "      <th>state</th>\n",
 70 |        "      <th>county</th>\n",
 71 |        "    </tr>\n",
 72 |        "  </thead>\n",
 73 |        "  <tbody>\n",
 74 |        "    <tr>\n",
 75 |        "      <th>0</th>\n",
 76 |        "      <td>0500000US28151</td>\n",
 77 |        "      <td>47086</td>\n",
 78 |        "      <td>-555555555</td>\n",
 79 |        "      <td>28</td>\n",
 80 |        "      <td>151</td>\n",
 81 |        "    </tr>\n",
 82 |        "    <tr>\n",
 83 |        "      <th>1</th>\n",
 84 |        "      <td>0500000US28111</td>\n",
 85 |        "      <td>12028</td>\n",
 86 |        "      <td>-555555555</td>\n",
 87 |        "      <td>28</td>\n",
 88 |        "      <td>111</td>\n",
 89 |        "    </tr>\n",
 90 |        "    <tr>\n",
 91 |        "      <th>2</th>\n",
 92 |        "      <td>0500000US28019</td>\n",
 93 |        "      <td>8321</td>\n",
 94 |        "      <td>-555555555</td>\n",
 95 |        "      <td>28</td>\n",
 96 |        "      <td>019</td>\n",
 97 |        "    </tr>\n",
 98 |        "    <tr>\n",
 99 |        "      <th>3</th>\n",
100 |        "      <td>0500000US28057</td>\n",
101 |        "      <td>23480</td>\n",
102 |        "      <td>-555555555</td>\n",
103 |        "      <td>28</td>\n",
104 |        "      <td>057</td>\n",
105 |        "    </tr>\n",
106 |        "    <tr>\n",
107 |        "      <th>4</th>\n",
108 |        "      <td>0500000US28015</td>\n",
109 |        "      <td>10129</td>\n",
110 |        "      <td>-555555555</td>\n",
111 |        "      <td>28</td>\n",
112 |        "      <td>015</td>\n",
113 |        "    </tr>\n",
114 |        "  </tbody>\n",
115 |        "</table>\n",
116 |        "</div>"
117 |       ],
118 |       "text/plain": [
119 |        "           GEO_ID B01003_001E B01003_001M state county\n",
120 |        "0  0500000US28151       47086  -555555555    28    151\n",
121 |        "1  0500000US28111       12028  -555555555    28    111\n",
122 |        "2  0500000US28019        8321  -555555555    28    019\n",
123 |        "3  0500000US28057       23480  -555555555    28    057\n",
124 |        "4  0500000US28015       10129  -555555555    28    015"
125 |       ]
126 |      },
127 |      "execution_count": 3,
128 |      "metadata": {},
129 |      "output_type": "execute_result"
130 |     }
131 |    ],
132 |    "source": [
133 |     "county_pop.head()"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "markdown",
138 |    "metadata": {},
139 |    "source": [
140 |     "We wanted to be responsible and handle the margin of error correctly, but our sanity check above shows a number of `-555555555` values.  According to the Census Bureau's [Notes on ACS 5-Year Data\n",
141 |     "](https://www.census.gov/data/developers/data-sets/acs-5year/data-notes.html), \n",
142 |     "> A '*****' entry in the margin of error column indicates that the estimate is controlled. A statistical test for sampling variability is not appropriate.\n",
143 |     "\n",
144 |     "That `*****` would be in the \"margin of error annotation\" column, which we didn't get, but the corresponding value for the \"margin of error estimate\" (`B01003_001M`) is `-555555555` so... in short, maybe we don't need to deal with the MOE? Let's take another look:"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": 4,
150 |    "metadata": {},
151 |    "outputs": [
152 |     {
153 |      "data": {
154 |       "text/html": [
155 |        "<div>\n",
156 |        "<style scoped>\n",
157 |        "    .dataframe tbody tr th:only-of-type {\n",
158 |        "        vertical-align: middle;\n",
159 |        "    }\n",
160 |        "\n",
161 |        "    .dataframe tbody tr th {\n",
162 |        "        vertical-align: top;\n",
163 |        "    }\n",
164 |        "\n",
165 |        "    .dataframe thead th {\n",
166 |        "        text-align: right;\n",
167 |        "    }\n",
168 |        "</style>\n",
169 |        "<table border=\"1\" class=\"dataframe\">\n",
170 |        "  <thead>\n",
171 |        "    <tr style=\"text-align: right;\">\n",
172 |        "      <th></th>\n",
173 |        "      <th>GEO_ID</th>\n",
174 |        "      <th>B01003_001E</th>\n",
175 |        "      <th>state</th>\n",
176 |        "      <th>county</th>\n",
177 |        "    </tr>\n",
178 |        "    <tr>\n",
179 |        "      <th>B01003_001M</th>\n",
180 |        "      <th></th>\n",
181 |        "      <th></th>\n",
182 |        "      <th></th>\n",
183 |        "      <th></th>\n",
184 |        "    </tr>\n",
185 |        "  </thead>\n",
186 |        "  <tbody>\n",
187 |        "    <tr>\n",
188 |        "      <th>-555555555</th>\n",
189 |        "      <td>3104</td>\n",
190 |        "      <td>3104</td>\n",
191 |        "      <td>3104</td>\n",
192 |        "      <td>3104</td>\n",
193 |        "    </tr>\n",
194 |        "    <tr>\n",
195 |        "      <th>100</th>\n",
196 |        "      <td>2</td>\n",
197 |        "      <td>2</td>\n",
198 |        "      <td>2</td>\n",
199 |        "      <td>2</td>\n",
200 |        "    </tr>\n",
201 |        "    <tr>\n",
202 |        "      <th>102</th>\n",
203 |        "      <td>2</td>\n",
204 |        "      <td>2</td>\n",
205 |        "      <td>2</td>\n",
206 |        "      <td>2</td>\n",
207 |        "    </tr>\n",
208 |        "    <tr>\n",
209 |        "      <th>104</th>\n",
210 |        "      <td>4</td>\n",
211 |        "      <td>4</td>\n",
212 |        "      <td>4</td>\n",
213 |        "      <td>4</td>\n",
214 |        "    </tr>\n",
215 |        "    <tr>\n",
216 |        "      <th>105</th>\n",
217 |        "      <td>2</td>\n",
218 |        "      <td>2</td>\n",
219 |        "      <td>2</td>\n",
220 |        "      <td>2</td>\n",
221 |        "    </tr>\n",
222 |        "    <tr>\n",
223 |        "      <th>...</th>\n",
224 |        "      <td>...</td>\n",
225 |        "      <td>...</td>\n",
226 |        "      <td>...</td>\n",
227 |        "      <td>...</td>\n",
228 |        "    </tr>\n",
229 |        "    <tr>\n",
230 |        "      <th>90</th>\n",
231 |        "      <td>3</td>\n",
232 |        "      <td>3</td>\n",
233 |        "      <td>3</td>\n",
234 |        "      <td>3</td>\n",
235 |        "    </tr>\n",
236 |        "    <tr>\n",
237 |        "      <th>92</th>\n",
238 |        "      <td>2</td>\n",
239 |        "      <td>2</td>\n",
240 |        "      <td>2</td>\n",
241 |        "      <td>2</td>\n",
242 |        "    </tr>\n",
243 |        "    <tr>\n",
244 |        "      <th>95</th>\n",
245 |        "      <td>2</td>\n",
246 |        "      <td>2</td>\n",
247 |        "      <td>2</td>\n",
248 |        "      <td>2</td>\n",
249 |        "    </tr>\n",
250 |        "    <tr>\n",
251 |        "      <th>96</th>\n",
252 |        "      <td>2</td>\n",
253 |        "      <td>2</td>\n",
254 |        "      <td>2</td>\n",
255 |        "      <td>2</td>\n",
256 |        "    </tr>\n",
257 |        "    <tr>\n",
258 |        "      <th>97</th>\n",
259 |        "      <td>1</td>\n",
260 |        "      <td>1</td>\n",
261 |        "      <td>1</td>\n",
262 |        "      <td>1</td>\n",
263 |        "    </tr>\n",
264 |        "  </tbody>\n",
265 |        "</table>\n",
266 |        "<p>64 rows × 4 columns</p>\n",
267 |        "</div>"
268 |       ],
269 |       "text/plain": [
270 |        "             GEO_ID  B01003_001E  state  county\n",
271 |        "B01003_001M                                    \n",
272 |        "-555555555     3104         3104   3104    3104\n",
273 |        "100               2            2      2       2\n",
274 |        "102               2            2      2       2\n",
275 |        "104               4            4      4       4\n",
276 |        "105               2            2      2       2\n",
277 |        "...             ...          ...    ...     ...\n",
278 |        "90                3            3      3       3\n",
279 |        "92                2            2      2       2\n",
280 |        "95                2            2      2       2\n",
281 |        "96                2            2      2       2\n",
282 |        "97                1            1      1       1\n",
283 |        "\n",
284 |        "[64 rows x 4 columns]"
285 |       ]
286 |      },
287 |      "execution_count": 4,
288 |      "metadata": {},
289 |      "output_type": "execute_result"
290 |     }
291 |    ],
292 |    "source": [
293 |     "county_pop.groupby('B01003_001M').count()"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "markdown",
298 |    "metadata": {},
299 |    "source": [
300 |     "Yep, almost all of the rows indicate that we shouldn't or can't take the margin of error into account, so now all we need to do is sum the values."
301 |    ]
302 |   },
303 |   {
304 |    "cell_type": "code",
305 |    "execution_count": 5,
306 |    "metadata": {},
307 |    "outputs": [],
308 |    "source": [
309 |     "county_pop = county_pop.rename(columns={'B01003_001E': 'total_pop'}).set_index('GEO_ID')\n",
310 |     "\n",
311 |     "xref = pd.read_csv('county_district_xref.csv',index_col='geoid', usecols=['geoid','state', 'district'])\n",
312 |     "joined = xref.join(county_pop[['total_pop']].astype(int))"
313 |    ]
314 |   },
315 |   {
316 |    "cell_type": "code",
317 |    "execution_count": 6,
318 |    "metadata": {},
319 |    "outputs": [
320 |     {
321 |      "data": {
322 |       "text/html": [
323 |        "<div>\n",
324 |        "<style scoped>\n",
325 |        "    .dataframe tbody tr th:only-of-type {\n",
326 |        "        vertical-align: middle;\n",
327 |        "    }\n",
328 |        "\n",
329 |        "    .dataframe tbody tr th {\n",
330 |        "        vertical-align: top;\n",
331 |        "    }\n",
332 |        "\n",
333 |        "    .dataframe thead th {\n",
334 |        "        text-align: right;\n",
335 |        "    }\n",
336 |        "</style>\n",
337 |        "<table border=\"1\" class=\"dataframe\">\n",
338 |        "  <thead>\n",
339 |        "    <tr style=\"text-align: right;\">\n",
340 |        "      <th></th>\n",
341 |        "      <th></th>\n",
342 |        "      <th>total_pop</th>\n",
343 |        "    </tr>\n",
344 |        "    <tr>\n",
345 |        "      <th>state</th>\n",
346 |        "      <th>district</th>\n",
347 |        "      <th></th>\n",
348 |        "    </tr>\n",
349 |        "  </thead>\n",
350 |        "  <tbody>\n",
351 |        "    <tr>\n",
352 |        "      <th rowspan=\"3\" valign=\"top\">Alabama</th>\n",
353 |        "      <th>Middle</th>\n",
354 |        "      <td>1151252</td>\n",
355 |        "    </tr>\n",
356 |        "    <tr>\n",
357 |        "      <th>Northern</th>\n",
358 |        "      <td>2870454</td>\n",
359 |        "    </tr>\n",
360 |        "    <tr>\n",
361 |        "      <th>Southern</th>\n",
362 |        "      <td>842974</td>\n",
363 |        "    </tr>\n",
364 |        "    <tr>\n",
365 |        "      <th>Alaska</th>\n",
366 |        "      <th>Alaska</th>\n",
367 |        "      <td>738516</td>\n",
368 |        "    </tr>\n",
369 |        "    <tr>\n",
370 |        "      <th>Arizona</th>\n",
371 |        "      <th>Arizona</th>\n",
372 |        "      <td>6946685</td>\n",
373 |        "    </tr>\n",
374 |        "    <tr>\n",
375 |        "      <th>...</th>\n",
376 |        "      <th>...</th>\n",
377 |        "      <td>...</td>\n",
378 |        "    </tr>\n",
379 |        "    <tr>\n",
380 |        "      <th rowspan=\"2\" valign=\"top\">West Virginia</th>\n",
381 |        "      <th>Northern</th>\n",
382 |        "      <td>869001</td>\n",
383 |        "    </tr>\n",
384 |        "    <tr>\n",
385 |        "      <th>Southern</th>\n",
386 |        "      <td>960053</td>\n",
387 |        "    </tr>\n",
388 |        "    <tr>\n",
389 |        "      <th rowspan=\"2\" valign=\"top\">Wisconsin</th>\n",
390 |        "      <th>Eastern</th>\n",
391 |        "      <td>3405147</td>\n",
392 |        "    </tr>\n",
393 |        "    <tr>\n",
394 |        "      <th>Western</th>\n",
395 |        "      <td>2373247</td>\n",
396 |        "    </tr>\n",
397 |        "    <tr>\n",
398 |        "      <th>Wyoming</th>\n",
399 |        "      <th>Wyoming</th>\n",
400 |        "      <td>581836</td>\n",
401 |        "    </tr>\n",
402 |        "  </tbody>\n",
403 |        "</table>\n",
404 |        "<p>91 rows × 1 columns</p>\n",
405 |        "</div>"
406 |       ],
407 |       "text/plain": [
408 |        "                        total_pop\n",
409 |        "state         district           \n",
410 |        "Alabama       Middle      1151252\n",
411 |        "              Northern    2870454\n",
412 |        "              Southern     842974\n",
413 |        "Alaska        Alaska       738516\n",
414 |        "Arizona       Arizona     6946685\n",
415 |        "...                           ...\n",
416 |        "West Virginia Northern     869001\n",
417 |        "              Southern     960053\n",
418 |        "Wisconsin     Eastern     3405147\n",
419 |        "              Western     2373247\n",
420 |        "Wyoming       Wyoming      581836\n",
421 |        "\n",
422 |        "[91 rows x 1 columns]"
423 |       ]
424 |      },
425 |      "execution_count": 6,
426 |      "metadata": {},
427 |      "output_type": "execute_result"
428 |     }
429 |    ],
430 |    "source": [
431 |     "joined.groupby(['state', 'district'])[['total_pop']].sum()"
432 |    ]
433 |   },
434 |   {
435 |    "cell_type": "code",
436 |    "execution_count": 7,
437 |    "metadata": {},
438 |    "outputs": [],
439 |    "source": [
440 |     "joined.groupby(['state', 'district'])[['total_pop']].sum().to_csv('population_by_district_acs2018_5yr.csv')"
441 |    ]
442 |   },
443 |   {
444 |    "cell_type": "code",
445 |    "execution_count": 55,
446 |    "metadata": {},
447 |    "outputs": [
448 |     {
449 |      "data": {
450 |       "text/plain": [
451 |        "total_moe            567.2\n",
452 |        "nh_white_moe        2622.7\n",
453 |        "nh_black_moe        4705.5\n",
454 |        "nh_amerind_moe      1834.2\n",
455 |        "nh_asian_moe        4833.5\n",
456 |        "nh_nhpi_moe         2184.8\n",
457 |        "nh_some_other_moe   3311.7\n",
458 |        "nh_twoplus_moe      6888.9\n",
459 |        "hispanic_moe        2001.3\n",
460 |        "dtype: float64"
461 |       ]
462 |      },
463 |      "execution_count": 55,
464 |      "metadata": {},
465 |      "output_type": "execute_result"
466 |     }
467 |    ],
468 |    "source": [
469 |     "race_by_district_base[moe_cols.values()].max()"
470 |    ]
471 |   },
472 |   {
473 |    "cell_type": "code",
474 |    "execution_count": 10,
475 |    "metadata": {},
476 |    "outputs": [
477 |     {
478 |      "data": {
479 |       "text/html": [
480 |        "<div>\n",
481 |        "<style scoped>\n",
482 |        "    .dataframe tbody tr th:only-of-type {\n",
483 |        "        vertical-align: middle;\n",
484 |        "    }\n",
485 |        "\n",
486 |        "    .dataframe tbody tr th {\n",
487 |        "        vertical-align: top;\n",
488 |        "    }\n",
489 |        "\n",
490 |        "    .dataframe thead th {\n",
491 |        "        text-align: right;\n",
492 |        "    }\n",
493 |        "</style>\n",
494 |        "<table border=\"1\" class=\"dataframe\">\n",
495 |        "  <thead>\n",
496 |        "    <tr style=\"text-align: right;\">\n",
497 |        "      <th></th>\n",
498 |        "      <th>GEO_ID</th>\n",
499 |        "      <th>B03002_001E</th>\n",
500 |        "      <th>B03002_003E</th>\n",
501 |        "      <th>B03002_004E</th>\n",
502 |        "      <th>B03002_005E</th>\n",
503 |        "      <th>B03002_006E</th>\n",
504 |        "      <th>B03002_007E</th>\n",
505 |        "      <th>B03002_008E</th>\n",
506 |        "      <th>B03002_009E</th>\n",
507 |        "      <th>B03002_012E</th>\n",
508 |        "      <th>...</th>\n",
509 |        "      <th>B03002_003M</th>\n",
510 |        "      <th>B03002_004M</th>\n",
511 |        "      <th>B03002_005M</th>\n",
512 |        "      <th>B03002_006M</th>\n",
513 |        "      <th>B03002_007M</th>\n",
514 |        "      <th>B03002_008M</th>\n",
515 |        "      <th>B03002_009M</th>\n",
516 |        "      <th>B03002_012M</th>\n",
517 |        "      <th>state</th>\n",
518 |        "      <th>county</th>\n",
519 |        "    </tr>\n",
520 |        "  </thead>\n",
521 |        "  <tbody>\n",
522 |        "    <tr>\n",
523 |        "      <th>4</th>\n",
524 |        "      <td>0500000US28015</td>\n",
525 |        "      <td>10129</td>\n",
526 |        "      <td>6511</td>\n",
527 |        "      <td>3501</td>\n",
528 |        "      <td>6</td>\n",
529 |        "      <td>0</td>\n",
530 |        "      <td>0</td>\n",
531 |        "      <td>0</td>\n",
532 |        "      <td>82</td>\n",
533 |        "      <td>29</td>\n",
534 |        "      <td>...</td>\n",
535 |        "      <td>20</td>\n",
536 |        "      <td>121</td>\n",
537 |        "      <td>10</td>\n",
538 |        "      <td>20</td>\n",
539 |        "      <td>20</td>\n",
540 |        "      <td>20</td>\n",
541 |        "      <td>110</td>\n",
542 |        "      <td>35</td>\n",
543 |        "      <td>28</td>\n",
544 |        "      <td>015</td>\n",
545 |        "    </tr>\n",
546 |        "    <tr>\n",
547 |        "      <th>5</th>\n",
548 |        "      <td>0500000US28043</td>\n",
549 |        "      <td>21278</td>\n",
550 |        "      <td>11683</td>\n",
551 |        "      <td>9205</td>\n",
552 |        "      <td>135</td>\n",
553 |        "      <td>0</td>\n",
554 |        "      <td>0</td>\n",
555 |        "      <td>0</td>\n",
556 |        "      <td>115</td>\n",
557 |        "      <td>140</td>\n",
558 |        "      <td>...</td>\n",
559 |        "      <td>24</td>\n",
560 |        "      <td>217</td>\n",
561 |        "      <td>163</td>\n",
562 |        "      <td>24</td>\n",
563 |        "      <td>24</td>\n",
564 |        "      <td>24</td>\n",
565 |        "      <td>93</td>\n",
566 |        "      <td>125</td>\n",
567 |        "      <td>28</td>\n",
568 |        "      <td>043</td>\n",
569 |        "    </tr>\n",
570 |        "    <tr>\n",
571 |        "      <th>7</th>\n",
572 |        "      <td>0500000US28041</td>\n",
573 |        "      <td>13714</td>\n",
574 |        "      <td>9896</td>\n",
575 |        "      <td>3496</td>\n",
576 |        "      <td>53</td>\n",
577 |        "      <td>0</td>\n",
578 |        "      <td>0</td>\n",
579 |        "      <td>0</td>\n",
580 |        "      <td>107</td>\n",
581 |        "      <td>162</td>\n",
582 |        "      <td>...</td>\n",
583 |        "      <td>4</td>\n",
584 |        "      <td>108</td>\n",
585 |        "      <td>43</td>\n",
586 |        "      <td>20</td>\n",
587 |        "      <td>20</td>\n",
588 |        "      <td>20</td>\n",
589 |        "      <td>95</td>\n",
590 |        "      <td>-555555555</td>\n",
591 |        "      <td>28</td>\n",
592 |        "      <td>041</td>\n",
593 |        "    </tr>\n",
594 |        "    <tr>\n",
595 |        "      <th>8</th>\n",
596 |        "      <td>0500000US28093</td>\n",
597 |        "      <td>35787</td>\n",
598 |        "      <td>17068</td>\n",
599 |        "      <td>17118</td>\n",
600 |        "      <td>15</td>\n",
601 |        "      <td>24</td>\n",
602 |        "      <td>0</td>\n",
603 |        "      <td>0</td>\n",
604 |        "      <td>281</td>\n",
605 |        "      <td>1281</td>\n",
606 |        "      <td>...</td>\n",
607 |        "      <td>27</td>\n",
608 |        "      <td>140</td>\n",
609 |        "      <td>18</td>\n",
610 |        "      <td>35</td>\n",
611 |        "      <td>27</td>\n",
612 |        "      <td>27</td>\n",
613 |        "      <td>135</td>\n",
614 |        "      <td>-555555555</td>\n",
615 |        "      <td>28</td>\n",
616 |        "      <td>093</td>\n",
617 |        "    </tr>\n",
618 |        "    <tr>\n",
619 |        "      <th>10</th>\n",
620 |        "      <td>0500000US28011</td>\n",
621 |        "      <td>32592</td>\n",
622 |        "      <td>10579</td>\n",
623 |        "      <td>20869</td>\n",
624 |        "      <td>41</td>\n",
625 |        "      <td>213</td>\n",
626 |        "      <td>0</td>\n",
627 |        "      <td>0</td>\n",
628 |        "      <td>189</td>\n",
629 |        "      <td>701</td>\n",
630 |        "      <td>...</td>\n",
631 |        "      <td>27</td>\n",
632 |        "      <td>70</td>\n",
633 |        "      <td>52</td>\n",
634 |        "      <td>112</td>\n",
635 |        "      <td>27</td>\n",
636 |        "      <td>27</td>\n",
637 |        "      <td>125</td>\n",
638 |        "      <td>-555555555</td>\n",
639 |        "      <td>28</td>\n",
640 |        "      <td>011</td>\n",
641 |        "    </tr>\n",
642 |        "    <tr>\n",
643 |        "      <th>...</th>\n",
644 |        "      <td>...</td>\n",
645 |        "      <td>...</td>\n",
646 |        "      <td>...</td>\n",
647 |        "      <td>...</td>\n",
648 |        "      <td>...</td>\n",
649 |        "      <td>...</td>\n",
650 |        "      <td>...</td>\n",
651 |        "      <td>...</td>\n",
652 |        "      <td>...</td>\n",
653 |        "      <td>...</td>\n",
654 |        "      <td>...</td>\n",
655 |        "      <td>...</td>\n",
656 |        "      <td>...</td>\n",
657 |        "      <td>...</td>\n",
658 |        "      <td>...</td>\n",
659 |        "      <td>...</td>\n",
660 |        "      <td>...</td>\n",
661 |        "      <td>...</td>\n",
662 |        "      <td>...</td>\n",
663 |        "      <td>...</td>\n",
664 |        "      <td>...</td>\n",
665 |        "    </tr>\n",
666 |        "    <tr>\n",
667 |        "      <th>3208</th>\n",
668 |        "      <td>0500000US19067</td>\n",
669 |        "      <td>15858</td>\n",
670 |        "      <td>14593</td>\n",
671 |        "      <td>274</td>\n",
672 |        "      <td>90</td>\n",
673 |        "      <td>224</td>\n",
674 |        "      <td>27</td>\n",
675 |        "      <td>0</td>\n",
676 |        "      <td>183</td>\n",
677 |        "      <td>467</td>\n",
678 |        "      <td>...</td>\n",
679 |        "      <td>15</td>\n",
680 |        "      <td>79</td>\n",
681 |        "      <td>110</td>\n",
682 |        "      <td>32</td>\n",
683 |        "      <td>40</td>\n",
684 |        "      <td>15</td>\n",
685 |        "      <td>112</td>\n",
686 |        "      <td>-555555555</td>\n",
687 |        "      <td>19</td>\n",
688 |        "      <td>067</td>\n",
689 |        "    </tr>\n",
690 |        "    <tr>\n",
691 |        "      <th>3212</th>\n",
692 |        "      <td>0500000US19087</td>\n",
693 |        "      <td>19926</td>\n",
694 |        "      <td>17585</td>\n",
695 |        "      <td>449</td>\n",
696 |        "      <td>51</td>\n",
697 |        "      <td>522</td>\n",
698 |        "      <td>9</td>\n",
699 |        "      <td>15</td>\n",
700 |        "      <td>347</td>\n",
701 |        "      <td>948</td>\n",
702 |        "      <td>...</td>\n",
703 |        "      <td>25</td>\n",
704 |        "      <td>66</td>\n",
705 |        "      <td>41</td>\n",
706 |        "      <td>69</td>\n",
707 |        "      <td>14</td>\n",
708 |        "      <td>25</td>\n",
709 |        "      <td>104</td>\n",
710 |        "      <td>-555555555</td>\n",
711 |        "      <td>19</td>\n",
712 |        "      <td>087</td>\n",
713 |        "    </tr>\n",
714 |        "    <tr>\n",
715 |        "      <th>3213</th>\n",
716 |        "      <td>0500000US19145</td>\n",
717 |        "      <td>15363</td>\n",
718 |        "      <td>14020</td>\n",
719 |        "      <td>260</td>\n",
720 |        "      <td>101</td>\n",
721 |        "      <td>147</td>\n",
722 |        "      <td>0</td>\n",
723 |        "      <td>0</td>\n",
724 |        "      <td>323</td>\n",
725 |        "      <td>512</td>\n",
726 |        "      <td>...</td>\n",
727 |        "      <td>15</td>\n",
728 |        "      <td>117</td>\n",
729 |        "      <td>82</td>\n",
730 |        "      <td>57</td>\n",
731 |        "      <td>15</td>\n",
732 |        "      <td>15</td>\n",
733 |        "      <td>105</td>\n",
734 |        "      <td>-555555555</td>\n",
735 |        "      <td>19</td>\n",
736 |        "      <td>145</td>\n",
737 |        "    </tr>\n",
738 |        "    <tr>\n",
739 |        "      <th>3214</th>\n",
740 |        "      <td>0500000US19155</td>\n",
741 |        "      <td>93503</td>\n",
742 |        "      <td>82251</td>\n",
743 |        "      <td>1335</td>\n",
744 |        "      <td>257</td>\n",
745 |        "      <td>619</td>\n",
746 |        "      <td>11</td>\n",
747 |        "      <td>7</td>\n",
748 |        "      <td>1901</td>\n",
749 |        "      <td>7122</td>\n",
750 |        "      <td>...</td>\n",
751 |        "      <td>8</td>\n",
752 |        "      <td>193</td>\n",
753 |        "      <td>126</td>\n",
754 |        "      <td>179</td>\n",
755 |        "      <td>18</td>\n",
756 |        "      <td>8</td>\n",
757 |        "      <td>285</td>\n",
758 |        "      <td>-555555555</td>\n",
759 |        "      <td>19</td>\n",
760 |        "      <td>155</td>\n",
761 |        "    </tr>\n",
762 |        "    <tr>\n",
763 |        "      <th>3216</th>\n",
764 |        "      <td>0500000US19021</td>\n",
765 |        "      <td>20260</td>\n",
766 |        "      <td>12226</td>\n",
767 |        "      <td>551</td>\n",
768 |        "      <td>3</td>\n",
769 |        "      <td>2125</td>\n",
770 |        "      <td>97</td>\n",
771 |        "      <td>11</td>\n",
772 |        "      <td>140</td>\n",
773 |        "      <td>5107</td>\n",
774 |        "      <td>...</td>\n",
775 |        "      <td>12</td>\n",
776 |        "      <td>57</td>\n",
777 |        "      <td>4</td>\n",
778 |        "      <td>142</td>\n",
779 |        "      <td>141</td>\n",
780 |        "      <td>12</td>\n",
781 |        "      <td>65</td>\n",
782 |        "      <td>-555555555</td>\n",
783 |        "      <td>19</td>\n",
784 |        "      <td>021</td>\n",
785 |        "    </tr>\n",
786 |        "  </tbody>\n",
787 |        "</table>\n",
788 |        "<p>2069 rows × 21 columns</p>\n",
789 |        "</div>"
790 |       ],
791 |       "text/plain": [
792 |        "              GEO_ID  B03002_001E  B03002_003E  B03002_004E  B03002_005E  \\\n",
793 |        "4     0500000US28015        10129         6511         3501            6   \n",
794 |        "5     0500000US28043        21278        11683         9205          135   \n",
795 |        "7     0500000US28041        13714         9896         3496           53   \n",
796 |        "8     0500000US28093        35787        17068        17118           15   \n",
797 |        "10    0500000US28011        32592        10579        20869           41   \n",
798 |        "...              ...          ...          ...          ...          ...   \n",
799 |        "3208  0500000US19067        15858        14593          274           90   \n",
800 |        "3212  0500000US19087        19926        17585          449           51   \n",
801 |        "3213  0500000US19145        15363        14020          260          101   \n",
802 |        "3214  0500000US19155        93503        82251         1335          257   \n",
803 |        "3216  0500000US19021        20260        12226          551            3   \n",
804 |        "\n",
805 |        "      B03002_006E  B03002_007E  B03002_008E  B03002_009E  B03002_012E  ...  \\\n",
806 |        "4               0            0            0           82           29  ...   \n",
807 |        "5               0            0            0          115          140  ...   \n",
808 |        "7               0            0            0          107          162  ...   \n",
809 |        "8              24            0            0          281         1281  ...   \n",
810 |        "10            213            0            0          189          701  ...   \n",
811 |        "...           ...          ...          ...          ...          ...  ...   \n",
812 |        "3208          224           27            0          183          467  ...   \n",
813 |        "3212          522            9           15          347          948  ...   \n",
814 |        "3213          147            0            0          323          512  ...   \n",
815 |        "3214          619           11            7         1901         7122  ...   \n",
816 |        "3216         2125           97           11          140         5107  ...   \n",
817 |        "\n",
818 |        "      B03002_003M  B03002_004M  B03002_005M  B03002_006M  B03002_007M  \\\n",
819 |        "4              20          121           10           20           20   \n",
820 |        "5              24          217          163           24           24   \n",
821 |        "7               4          108           43           20           20   \n",
822 |        "8              27          140           18           35           27   \n",
823 |        "10             27           70           52          112           27   \n",
824 |        "...           ...          ...          ...          ...          ...   \n",
825 |        "3208           15           79          110           32           40   \n",
826 |        "3212           25           66           41           69           14   \n",
827 |        "3213           15          117           82           57           15   \n",
828 |        "3214            8          193          126          179           18   \n",
829 |        "3216           12           57            4          142          141   \n",
830 |        "\n",
831 |        "      B03002_008M  B03002_009M  B03002_012M  state county  \n",
832 |        "4              20          110           35     28    015  \n",
833 |        "5              24           93          125     28    043  \n",
834 |        "7              20           95   -555555555     28    041  \n",
835 |        "8              27          135   -555555555     28    093  \n",
836 |        "10             27          125   -555555555     28    011  \n",
837 |        "...           ...          ...          ...    ...    ...  \n",
838 |        "3208           15          112   -555555555     19    067  \n",
839 |        "3212           25          104   -555555555     19    087  \n",
840 |        "3213           15          105   -555555555     19    145  \n",
841 |        "3214            8          285   -555555555     19    155  \n",
842 |        "3216           12           65   -555555555     19    021  \n",
843 |        "\n",
844 |        "[2069 rows x 21 columns]"
845 |       ]
846 |      },
847 |      "execution_count": 10,
848 |      "metadata": {},
849 |      "output_type": "execute_result"
850 |     }
851 |    ],
852 |    "source": [
853 |     "county_race[(county_race['B03002_004M'] > 100) \n",
854 |     "            | (county_race['B03002_006M'] > 100) \n",
855 |     "            | (county_race['B03002_008M'] > 100) \n",
856 |     "            | (county_race['B03002_009M'] > 100) ]"
857 |    ]
858 |   },
859 |   {
860 |    "cell_type": "code",
861 |    "execution_count": null,
862 |    "metadata": {},
863 |    "outputs": [],
864 |    "source": []
865 |   }
866 |  ],
867 |  "metadata": {
868 |   "kernelspec": {
869 |    "display_name": "Python 3",
870 |    "language": "python",
871 |    "name": "python3"
872 |   },
873 |   "language_info": {
874 |    "codemirror_mode": {
875 |     "name": "ipython",
876 |     "version": 3
877 |    },
878 |    "file_extension": ".py",
879 |    "mimetype": "text/x-python",
880 |    "name": "python",
881 |    "nbconvert_exporter": "python",
882 |    "pygments_lexer": "ipython3",
883 |    "version": "3.7.6"
884 |   }
885 |  },
886 |  "nbformat": 4,
887 |  "nbformat_minor": 4
888 | }
889 | 


--------------------------------------------------------------------------------
/crosswalks/zip_to_zcta/build_crosswalk.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "code",
   5 |    "execution_count": 1,
   6 |    "metadata": {},
   7 |    "outputs": [],
   8 |    "source": [
   9 |     "import pandas as pd\n",
  10 |     "import geopandas as gpd\n",
  11 |     "import json\n",
  12 |     "import urllib.request\n",
  13 |     "from pathlib import Path"
  14 |    ]
  15 |   },
  16 |   {
  17 |    "cell_type": "markdown",
  18 |    "metadata": {},
  19 |    "source": [
  20 |     "# Build a master list of ZIP Codes\n",
  21 |     "\n",
  22 |     "ZIP Codes change frequently, so this is challenging, and they aren't authoritatively documented in any public resource we know about. \n",
  23 |     "We'll merge together two sources, GeoNames, and a ZIP Code Business Patters (ZBP) dataset, to get the biggest list of potential ZIPs we'd need to map to a ZCTA. \n",
  24 |     "\n",
  25 |     "## GeoNames\n",
  26 |     "\n",
  27 |     "The good thing about GeoNames is that each ZIP is assigned a latitude/longitude. It's not clear how those were assigned, which is a liability for this entire process, but we'll hope that they are accurate and, for ZIPs that are not ZCTAs, we'll try to locate the GeoNames coordinate in a ZCTA geometry (below)."
  28 |    ]
  29 |   },
  30 |   {
  31 |    "cell_type": "code",
  32 |    "execution_count": 2,
  33 |    "metadata": {},
  34 |    "outputs": [
  35 |     {
  36 |      "data": {
  37 |       "text/html": [
  38 |        "<div>\n",
  39 |        "<style scoped>\n",
  40 |        "    .dataframe tbody tr th:only-of-type {\n",
  41 |        "        vertical-align: middle;\n",
  42 |        "    }\n",
  43 |        "\n",
  44 |        "    .dataframe tbody tr th {\n",
  45 |        "        vertical-align: top;\n",
  46 |        "    }\n",
  47 |        "\n",
  48 |        "    .dataframe thead th {\n",
  49 |        "        text-align: right;\n",
  50 |        "    }\n",
  51 |        "</style>\n",
  52 |        "<table border=\"1\" class=\"dataframe\">\n",
  53 |        "  <thead>\n",
  54 |        "    <tr style=\"text-align: right;\">\n",
  55 |        "      <th></th>\n",
  56 |        "      <th>country</th>\n",
  57 |        "      <th>zip</th>\n",
  58 |        "      <th>city</th>\n",
  59 |        "      <th>state</th>\n",
  60 |        "      <th>stusab</th>\n",
  61 |        "      <th>county</th>\n",
  62 |        "      <th>county_fips</th>\n",
  63 |        "      <th>community</th>\n",
  64 |        "      <th>community_code</th>\n",
  65 |        "      <th>latitude</th>\n",
  66 |        "      <th>longitude</th>\n",
  67 |        "      <th>accuracy</th>\n",
  68 |        "      <th>source</th>\n",
  69 |        "    </tr>\n",
  70 |        "  </thead>\n",
  71 |        "  <tbody>\n",
  72 |        "    <tr>\n",
  73 |        "      <th>0</th>\n",
  74 |        "      <td>US</td>\n",
  75 |        "      <td>99553</td>\n",
  76 |        "      <td>Akutan</td>\n",
  77 |        "      <td>Alaska</td>\n",
  78 |        "      <td>AK</td>\n",
  79 |        "      <td>Aleutians East</td>\n",
  80 |        "      <td>013</td>\n",
  81 |        "      <td>NaN</td>\n",
  82 |        "      <td>NaN</td>\n",
  83 |        "      <td>54.1430</td>\n",
  84 |        "      <td>-165.7854</td>\n",
  85 |        "      <td>1.0</td>\n",
  86 |        "      <td>geonames</td>\n",
  87 |        "    </tr>\n",
  88 |        "    <tr>\n",
  89 |        "      <th>1</th>\n",
  90 |        "      <td>US</td>\n",
  91 |        "      <td>99571</td>\n",
  92 |        "      <td>Cold Bay</td>\n",
  93 |        "      <td>Alaska</td>\n",
  94 |        "      <td>AK</td>\n",
  95 |        "      <td>Aleutians East</td>\n",
  96 |        "      <td>013</td>\n",
  97 |        "      <td>NaN</td>\n",
  98 |        "      <td>NaN</td>\n",
  99 |        "      <td>55.1858</td>\n",
 100 |        "      <td>-162.7211</td>\n",
 101 |        "      <td>1.0</td>\n",
 102 |        "      <td>geonames</td>\n",
 103 |        "    </tr>\n",
 104 |        "    <tr>\n",
 105 |        "      <th>2</th>\n",
 106 |        "      <td>US</td>\n",
 107 |        "      <td>99583</td>\n",
 108 |        "      <td>False Pass</td>\n",
 109 |        "      <td>Alaska</td>\n",
 110 |        "      <td>AK</td>\n",
 111 |        "      <td>Aleutians East</td>\n",
 112 |        "      <td>013</td>\n",
 113 |        "      <td>NaN</td>\n",
 114 |        "      <td>NaN</td>\n",
 115 |        "      <td>54.8542</td>\n",
 116 |        "      <td>-163.4113</td>\n",
 117 |        "      <td>1.0</td>\n",
 118 |        "      <td>geonames</td>\n",
 119 |        "    </tr>\n",
 120 |        "    <tr>\n",
 121 |        "      <th>3</th>\n",
 122 |        "      <td>US</td>\n",
 123 |        "      <td>99612</td>\n",
 124 |        "      <td>King Cove</td>\n",
 125 |        "      <td>Alaska</td>\n",
 126 |        "      <td>AK</td>\n",
 127 |        "      <td>Aleutians East</td>\n",
 128 |        "      <td>013</td>\n",
 129 |        "      <td>NaN</td>\n",
 130 |        "      <td>NaN</td>\n",
 131 |        "      <td>55.0628</td>\n",
 132 |        "      <td>-162.3056</td>\n",
 133 |        "      <td>1.0</td>\n",
 134 |        "      <td>geonames</td>\n",
 135 |        "    </tr>\n",
 136 |        "    <tr>\n",
 137 |        "      <th>4</th>\n",
 138 |        "      <td>US</td>\n",
 139 |        "      <td>99661</td>\n",
 140 |        "      <td>Sand Point</td>\n",
 141 |        "      <td>Alaska</td>\n",
 142 |        "      <td>AK</td>\n",
 143 |        "      <td>Aleutians East</td>\n",
 144 |        "      <td>013</td>\n",
 145 |        "      <td>NaN</td>\n",
 146 |        "      <td>NaN</td>\n",
 147 |        "      <td>55.3192</td>\n",
 148 |        "      <td>-160.4914</td>\n",
 149 |        "      <td>1.0</td>\n",
 150 |        "      <td>geonames</td>\n",
 151 |        "    </tr>\n",
 152 |        "  </tbody>\n",
 153 |        "</table>\n",
 154 |        "</div>"
 155 |       ],
 156 |       "text/plain": [
 157 |        "  country    zip        city   state stusab          county county_fips  \\\n",
 158 |        "0      US  99553      Akutan  Alaska     AK  Aleutians East         013   \n",
 159 |        "1      US  99571    Cold Bay  Alaska     AK  Aleutians East         013   \n",
 160 |        "2      US  99583  False Pass  Alaska     AK  Aleutians East         013   \n",
 161 |        "3      US  99612   King Cove  Alaska     AK  Aleutians East         013   \n",
 162 |        "4      US  99661  Sand Point  Alaska     AK  Aleutians East         013   \n",
 163 |        "\n",
 164 |        "  community  community_code  latitude  longitude  accuracy    source  \n",
 165 |        "0       NaN             NaN   54.1430  -165.7854       1.0  geonames  \n",
 166 |        "1       NaN             NaN   55.1858  -162.7211       1.0  geonames  \n",
 167 |        "2       NaN             NaN   54.8542  -163.4113       1.0  geonames  \n",
 168 |        "3       NaN             NaN   55.0628  -162.3056       1.0  geonames  \n",
 169 |        "4       NaN             NaN   55.3192  -160.4914       1.0  geonames  "
 170 |       ]
 171 |      },
 172 |      "execution_count": 2,
 173 |      "metadata": {},
 174 |      "output_type": "execute_result"
 175 |     }
 176 |    ],
 177 |    "source": [
 178 |     "gn = pd.read_csv('geonames_us_zips.csv', dtype={\n",
 179 |     "    'zip': 'object',\n",
 180 |     "    'county_fips': 'object'\n",
 181 |     "})\n",
 182 |     "gn['source'] = 'geonames'\n",
 183 |     "\n",
 184 |     "# We know that GeoNames includes military and diplomatic ZIP Codes and ZIP Codes in the Marshall Islands, none of which have ZCTAs. \n",
 185 |     "# drop those now so we can avoid the trouble. We'll include other US Island Area postal codes, too, in case we run this with a new file.\n",
 186 |     "# Puerto Rico DOES have ZCTAs\n",
 187 |     "NON_ZCTA_POSTAL_ABBRS = ['AS', 'GU', 'MP', 'VI', 'FM', 'MH', 'PW', 'AA', 'AE', 'AP']\n",
 188 |     "gn = gn[(gn['stusab'].notna()) & (~gn['stusab'].isin(NON_ZCTA_POSTAL_ABBRS))]\n",
 189 |     "gn.head()"
 190 |    ]
 191 |   },
 192 |   {
 193 |    "cell_type": "markdown",
 194 |    "metadata": {},
 195 |    "source": [
 196 |     "## ZIP Code Business Patterns\n",
 197 |     "\n",
 198 |     "The Census Bureau's ZIP Code Business Patterns was the original dataset we wanted to integrate with other data collected at the ZBP level. \n",
 199 |     "We'll get a bit of data from that program to give us a list of ZIP Codes that \"matter\".  The specific query doesn't matter much. We set the `NAICS2017` and `EMPSZES` predicates to values indicating summary statistics, so that we only get back one row per ZIP. \n"
 200 |    ]
 201 |   },
 202 |   {
 203 |    "cell_type": "code",
 204 |    "execution_count": 3,
 205 |    "metadata": {},
 206 |    "outputs": [
 207 |     {
 208 |      "data": {
 209 |       "text/html": [
 210 |        "<div>\n",
 211 |        "<style scoped>\n",
 212 |        "    .dataframe tbody tr th:only-of-type {\n",
 213 |        "        vertical-align: middle;\n",
 214 |        "    }\n",
 215 |        "\n",
 216 |        "    .dataframe tbody tr th {\n",
 217 |        "        vertical-align: top;\n",
 218 |        "    }\n",
 219 |        "\n",
 220 |        "    .dataframe thead th {\n",
 221 |        "        text-align: right;\n",
 222 |        "    }\n",
 223 |        "</style>\n",
 224 |        "<table border=\"1\" class=\"dataframe\">\n",
 225 |        "  <thead>\n",
 226 |        "    <tr style=\"text-align: right;\">\n",
 227 |        "      <th></th>\n",
 228 |        "      <th>NAME</th>\n",
 229 |        "      <th>ZIPCODE</th>\n",
 230 |        "      <th>ESTAB</th>\n",
 231 |        "      <th>NAICS2017</th>\n",
 232 |        "      <th>EMPSZES</th>\n",
 233 |        "    </tr>\n",
 234 |        "  </thead>\n",
 235 |        "  <tbody>\n",
 236 |        "    <tr>\n",
 237 |        "      <th>0</th>\n",
 238 |        "      <td>ZIP 01001 (Agawam, MA)</td>\n",
 239 |        "      <td>01001</td>\n",
 240 |        "      <td>473</td>\n",
 241 |        "      <td>00</td>\n",
 242 |        "      <td>001</td>\n",
 243 |        "    </tr>\n",
 244 |        "    <tr>\n",
 245 |        "      <th>1</th>\n",
 246 |        "      <td>ZIP 01002 (Amherst, MA)</td>\n",
 247 |        "      <td>01002</td>\n",
 248 |        "      <td>539</td>\n",
 249 |        "      <td>00</td>\n",
 250 |        "      <td>001</td>\n",
 251 |        "    </tr>\n",
 252 |        "    <tr>\n",
 253 |        "      <th>2</th>\n",
 254 |        "      <td>ZIP 01007 (Belchertown, MA)</td>\n",
 255 |        "      <td>01007</td>\n",
 256 |        "      <td>222</td>\n",
 257 |        "      <td>00</td>\n",
 258 |        "      <td>001</td>\n",
 259 |        "    </tr>\n",
 260 |        "    <tr>\n",
 261 |        "      <th>3</th>\n",
 262 |        "      <td>ZIP 01550 (Southbridge, MA)</td>\n",
 263 |        "      <td>01550</td>\n",
 264 |        "      <td>316</td>\n",
 265 |        "      <td>00</td>\n",
 266 |        "      <td>001</td>\n",
 267 |        "    </tr>\n",
 268 |        "    <tr>\n",
 269 |        "      <th>4</th>\n",
 270 |        "      <td>ZIP 01003 (Amherst, MA)</td>\n",
 271 |        "      <td>01003</td>\n",
 272 |        "      <td>20</td>\n",
 273 |        "      <td>00</td>\n",
 274 |        "      <td>001</td>\n",
 275 |        "    </tr>\n",
 276 |        "  </tbody>\n",
 277 |        "</table>\n",
 278 |        "</div>"
 279 |       ],
 280 |       "text/plain": [
 281 |        "                          NAME ZIPCODE ESTAB NAICS2017 EMPSZES\n",
 282 |        "0       ZIP 01001 (Agawam, MA)   01001   473        00     001\n",
 283 |        "1      ZIP 01002 (Amherst, MA)   01002   539        00     001\n",
 284 |        "2  ZIP 01007 (Belchertown, MA)   01007   222        00     001\n",
 285 |        "3  ZIP 01550 (Southbridge, MA)   01550   316        00     001\n",
 286 |        "4      ZIP 01003 (Amherst, MA)   01003    20        00     001"
 287 |       ]
 288 |      },
 289 |      "execution_count": 3,
 290 |      "metadata": {},
 291 |      "output_type": "execute_result"
 292 |     }
 293 |    ],
 294 |    "source": [
 295 |     "request = urllib.request.urlopen('https://api.census.gov/data/2018/zbp?get=NAME,ZIPCODE,ESTAB&NAICS2017=00&EMPSZES=001')\n",
 296 |     "data = request.read() \n",
 297 |     "raw_zbp_data = json.loads(data.decode(request.info().get_content_charset()))\n",
 298 |     "zbp = pd.DataFrame(data=raw_zbp_data[1:],columns=raw_zbp_data[0])\n",
 299 |     "zbp.head()"
 300 |    ]
 301 |   },
 302 |   {
 303 |    "cell_type": "code",
 304 |    "execution_count": 4,
 305 |    "metadata": {},
 306 |    "outputs": [],
 307 |    "source": [
 308 |     "# we don't need all this data\n",
 309 |     "# from GeoNames, we'll use 'zip', 'city', 'stusab', 'latitude', 'longitude' -- for context, and to position ZIPs in ZCTAs\n",
 310 |     "# from ZBP so we'll only merge the ZIPCODE and NAME -- for context\n",
 311 |     "master_zip = gn[['zip', 'city', 'stusab', 'latitude', 'longitude', 'source']].merge(zbp[['ZIPCODE', 'NAME']],left_on='zip', right_on='ZIPCODE',how='outer')\n"
 312 |    ]
 313 |   },
 314 |   {
 315 |    "cell_type": "code",
 316 |    "execution_count": 5,
 317 |    "metadata": {},
 318 |    "outputs": [],
 319 |    "source": [
 320 |     "master_zip = master_zip.rename(columns={\n",
 321 |     "    'zip': 'geonames_zip',\n",
 322 |     "    'ZIPCODE': 'zbp_zip',\n",
 323 |     "    'NAME': 'zbp_title'\n",
 324 |     "})\n",
 325 |     "master_zip['zip_code'] = master_zip.apply(lambda x: x['geonames_zip'] if not pd.isnull(x['geonames_zip']) else x['zbp_zip'],axis=1)\n",
 326 |     "master_zip['source'] = master_zip['source'].fillna('zbp')"
 327 |    ]
 328 |   },
 329 |   {
 330 |    "cell_type": "markdown",
 331 |    "metadata": {},
 332 |    "source": [
 333 |     "## ZCTAs\n",
 334 |     "\n",
 335 |     "The TIGER ZCTA shapefile provides us with a master list of ZCTAs and their geometries (boundaries). This requires `tl_2019_us_zcta510.zip`, a 500MB shapefile, which is larger than we can store in GitHub.\n",
 336 |     "\n",
 337 |     "This code will download it if it's not available, or you can get it from https://www2.census.gov/geo/tiger/TIGER2019/ZCTA5/"
 338 |    ]
 339 |   },
 340 |   {
 341 |    "cell_type": "code",
 342 |    "execution_count": 6,
 343 |    "metadata": {},
 344 |    "outputs": [
 345 |     {
 346 |      "name": "stdout",
 347 |      "output_type": "stream",
 348 |      "text": [
 349 |       "tl_2019_us_zcta510.zip is available for use\n"
 350 |      ]
 351 |     }
 352 |    ],
 353 |    "source": [
 354 |     "p = Path('tl_2019_us_zcta510.zip')\n",
 355 |     "p.exists()\n",
 356 |     "if not p.exists():\n",
 357 |     "    print(f\"{p.resolve()} not found. Downloading\")\n",
 358 |     "    urllib.request.urlretrieve('https://www2.census.gov/geo/tiger/TIGER2019/ZCTA5/tl_2019_us_zcta510.zip',p.resolve())\n",
 359 |     "else:\n",
 360 |     "    print(f\"{p} is available for use\")"
 361 |    ]
 362 |   },
 363 |   {
 364 |    "cell_type": "code",
 365 |    "execution_count": 7,
 366 |    "metadata": {},
 367 |    "outputs": [
 368 |     {
 369 |      "data": {
 370 |       "text/html": [
 371 |        "<div>\n",
 372 |        "<style scoped>\n",
 373 |        "    .dataframe tbody tr th:only-of-type {\n",
 374 |        "        vertical-align: middle;\n",
 375 |        "    }\n",
 376 |        "\n",
 377 |        "    .dataframe tbody tr th {\n",
 378 |        "        vertical-align: top;\n",
 379 |        "    }\n",
 380 |        "\n",
 381 |        "    .dataframe thead th {\n",
 382 |        "        text-align: right;\n",
 383 |        "    }\n",
 384 |        "</style>\n",
 385 |        "<table border=\"1\" class=\"dataframe\">\n",
 386 |        "  <thead>\n",
 387 |        "    <tr style=\"text-align: right;\">\n",
 388 |        "      <th></th>\n",
 389 |        "      <th>ZCTA5CE10</th>\n",
 390 |        "      <th>GEOID10</th>\n",
 391 |        "      <th>CLASSFP10</th>\n",
 392 |        "      <th>MTFCC10</th>\n",
 393 |        "      <th>FUNCSTAT10</th>\n",
 394 |        "      <th>ALAND10</th>\n",
 395 |        "      <th>AWATER10</th>\n",
 396 |        "      <th>INTPTLAT10</th>\n",
 397 |        "      <th>INTPTLON10</th>\n",
 398 |        "      <th>geometry</th>\n",
 399 |        "    </tr>\n",
 400 |        "  </thead>\n",
 401 |        "  <tbody>\n",
 402 |        "    <tr>\n",
 403 |        "      <th>0</th>\n",
 404 |        "      <td>43451</td>\n",
 405 |        "      <td>43451</td>\n",
 406 |        "      <td>B5</td>\n",
 407 |        "      <td>G6350</td>\n",
 408 |        "      <td>S</td>\n",
 409 |        "      <td>63484186</td>\n",
 410 |        "      <td>157689</td>\n",
 411 |        "      <td>+41.3183010</td>\n",
 412 |        "      <td>-083.6174935</td>\n",
 413 |        "      <td>POLYGON ((-83.70873 41.32733, -83.70815 41.327...</td>\n",
 414 |        "    </tr>\n",
 415 |        "    <tr>\n",
 416 |        "      <th>1</th>\n",
 417 |        "      <td>43452</td>\n",
 418 |        "      <td>43452</td>\n",
 419 |        "      <td>B5</td>\n",
 420 |        "      <td>G6350</td>\n",
 421 |        "      <td>S</td>\n",
 422 |        "      <td>121522304</td>\n",
 423 |        "      <td>13721730</td>\n",
 424 |        "      <td>+41.5157923</td>\n",
 425 |        "      <td>-082.9809454</td>\n",
 426 |        "      <td>POLYGON ((-83.08698 41.53780, -83.08256 41.537...</td>\n",
 427 |        "    </tr>\n",
 428 |        "    <tr>\n",
 429 |        "      <th>2</th>\n",
 430 |        "      <td>43456</td>\n",
 431 |        "      <td>43456</td>\n",
 432 |        "      <td>B5</td>\n",
 433 |        "      <td>G6350</td>\n",
 434 |        "      <td>S</td>\n",
 435 |        "      <td>9320975</td>\n",
 436 |        "      <td>1003775</td>\n",
 437 |        "      <td>+41.6318300</td>\n",
 438 |        "      <td>-082.8393923</td>\n",
 439 |        "      <td>MULTIPOLYGON (((-82.83558 41.71082, -82.83515 ...</td>\n",
 440 |        "    </tr>\n",
 441 |        "    <tr>\n",
 442 |        "      <th>3</th>\n",
 443 |        "      <td>43457</td>\n",
 444 |        "      <td>43457</td>\n",
 445 |        "      <td>B5</td>\n",
 446 |        "      <td>G6350</td>\n",
 447 |        "      <td>S</td>\n",
 448 |        "      <td>48004681</td>\n",
 449 |        "      <td>0</td>\n",
 450 |        "      <td>+41.2673301</td>\n",
 451 |        "      <td>-083.4274872</td>\n",
 452 |        "      <td>POLYGON ((-83.49650 41.25371, -83.48382 41.253...</td>\n",
 453 |        "    </tr>\n",
 454 |        "    <tr>\n",
 455 |        "      <th>4</th>\n",
 456 |        "      <td>43458</td>\n",
 457 |        "      <td>43458</td>\n",
 458 |        "      <td>B5</td>\n",
 459 |        "      <td>G6350</td>\n",
 460 |        "      <td>S</td>\n",
 461 |        "      <td>2573816</td>\n",
 462 |        "      <td>39915</td>\n",
 463 |        "      <td>+41.5304461</td>\n",
 464 |        "      <td>-083.2133648</td>\n",
 465 |        "      <td>POLYGON ((-83.22229 41.53102, -83.22228 41.532...</td>\n",
 466 |        "    </tr>\n",
 467 |        "  </tbody>\n",
 468 |        "</table>\n",
 469 |        "</div>"
 470 |       ],
 471 |       "text/plain": [
 472 |        "  ZCTA5CE10 GEOID10 CLASSFP10 MTFCC10 FUNCSTAT10    ALAND10  AWATER10  \\\n",
 473 |        "0     43451   43451        B5   G6350          S   63484186    157689   \n",
 474 |        "1     43452   43452        B5   G6350          S  121522304  13721730   \n",
 475 |        "2     43456   43456        B5   G6350          S    9320975   1003775   \n",
 476 |        "3     43457   43457        B5   G6350          S   48004681         0   \n",
 477 |        "4     43458   43458        B5   G6350          S    2573816     39915   \n",
 478 |        "\n",
 479 |        "    INTPTLAT10    INTPTLON10  \\\n",
 480 |        "0  +41.3183010  -083.6174935   \n",
 481 |        "1  +41.5157923  -082.9809454   \n",
 482 |        "2  +41.6318300  -082.8393923   \n",
 483 |        "3  +41.2673301  -083.4274872   \n",
 484 |        "4  +41.5304461  -083.2133648   \n",
 485 |        "\n",
 486 |        "                                            geometry  \n",
 487 |        "0  POLYGON ((-83.70873 41.32733, -83.70815 41.327...  \n",
 488 |        "1  POLYGON ((-83.08698 41.53780, -83.08256 41.537...  \n",
 489 |        "2  MULTIPOLYGON (((-82.83558 41.71082, -82.83515 ...  \n",
 490 |        "3  POLYGON ((-83.49650 41.25371, -83.48382 41.253...  \n",
 491 |        "4  POLYGON ((-83.22229 41.53102, -83.22228 41.532...  "
 492 |       ]
 493 |      },
 494 |      "execution_count": 7,
 495 |      "metadata": {},
 496 |      "output_type": "execute_result"
 497 |     }
 498 |    ],
 499 |    "source": [
 500 |     "zcta_geo = gpd.read_file('zip://tl_2019_us_zcta510.zip')\n",
 501 |     "zcta_geo.head()"
 502 |    ]
 503 |   },
 504 |   {
 505 |    "cell_type": "code",
 506 |    "execution_count": 8,
 507 |    "metadata": {},
 508 |    "outputs": [],
 509 |    "source": [
 510 |     "# update the `master_zip` data frame with all ZCTAs that match ZIP codes\n",
 511 |     "mz_w_zcta = master_zip.merge(zcta_geo.rename(columns={'ZCTA5CE10': 'zcta'})[['zcta']],left_on='zip_code', right_on='zcta', how='outer')\n",
 512 |     "\n",
 513 |     "# get rid of some of the columns we've been dragging along, and re-order\n",
 514 |     "mz_w_zcta = mz_w_zcta[['zip_code', 'zcta', 'geonames_zip', 'zbp_zip', 'city', 'stusab', 'zbp_title', 'latitude', 'longitude', 'source']]\n",
 515 |     "mz_w_zcta['source'] = mz_w_zcta['source'].fillna('tiger')"
 516 |    ]
 517 |   },
 518 |   {
 519 |    "cell_type": "markdown",
 520 |    "metadata": {},
 521 |    "source": [
 522 |     "## Handle ZIPs with no ZCTA\n",
 523 |     "\n",
 524 |     "How many are there?"
 525 |    ]
 526 |   },
 527 |   {
 528 |    "cell_type": "code",
 529 |    "execution_count": 9,
 530 |    "metadata": {},
 531 |    "outputs": [
 532 |     {
 533 |      "name": "stdout",
 534 |      "output_type": "stream",
 535 |      "text": [
 536 |       "ZIPs with no ZCTA: 7987\n"
 537 |      ]
 538 |     }
 539 |    ],
 540 |    "source": [
 541 |     "print(f\"ZIPs with no ZCTA: {len(mz_w_zcta[pd.isnull(mz_w_zcta['zcta'])])}\")"
 542 |    ]
 543 |   },
 544 |   {
 545 |    "cell_type": "code",
 546 |    "execution_count": 10,
 547 |    "metadata": {},
 548 |    "outputs": [],
 549 |    "source": [
 550 |     "# Create a GeoDataFrame for the ZIP Codes which don't yet have ZCTAs but which do have lat/lon\n",
 551 |     "temp = mz_w_zcta[pd.isnull(mz_w_zcta['zcta'])][['zip_code', 'latitude', 'longitude']].dropna() # no point in keeping null lat/lng\n",
 552 |     "zip_wo_zcta_gdf = gpd.GeoDataFrame(temp,geometry=gpd.points_from_xy(temp['longitude'],temp['latitude']), \n",
 553 |     "                                   crs=\"EPSG:4269\") # projection wasn't actually specified but this is a good bet"
 554 |    ]
 555 |   },
 556 |   {
 557 |    "cell_type": "code",
 558 |    "execution_count": 11,
 559 |    "metadata": {},
 560 |    "outputs": [],
 561 |    "source": [
 562 |     "# Create a new dataframe which adds ZCTAs for ZIP Codes which can be located within some ZCTA\n",
 563 |     "# only keep the useful columns from zcta_geo\n",
 564 |     "geo_joined = gpd.sjoin(zip_wo_zcta_gdf,zcta_geo[['ZCTA5CE10', 'geometry']],how='inner',op='intersects')"
 565 |    ]
 566 |   },
 567 |   {
 568 |    "cell_type": "code",
 569 |    "execution_count": 12,
 570 |    "metadata": {},
 571 |    "outputs": [
 572 |     {
 573 |      "data": {
 574 |       "text/html": [
 575 |        "<div>\n",
 576 |        "<style scoped>\n",
 577 |        "    .dataframe tbody tr th:only-of-type {\n",
 578 |        "        vertical-align: middle;\n",
 579 |        "    }\n",
 580 |        "\n",
 581 |        "    .dataframe tbody tr th {\n",
 582 |        "        vertical-align: top;\n",
 583 |        "    }\n",
 584 |        "\n",
 585 |        "    .dataframe thead th {\n",
 586 |        "        text-align: right;\n",
 587 |        "    }\n",
 588 |        "</style>\n",
 589 |        "<table border=\"1\" class=\"dataframe\">\n",
 590 |        "  <thead>\n",
 591 |        "    <tr style=\"text-align: right;\">\n",
 592 |        "      <th></th>\n",
 593 |        "      <th>zip_code</th>\n",
 594 |        "      <th>latitude</th>\n",
 595 |        "      <th>longitude</th>\n",
 596 |        "      <th>geometry</th>\n",
 597 |        "      <th>index_right</th>\n",
 598 |        "      <th>ZCTA5CE10</th>\n",
 599 |        "    </tr>\n",
 600 |        "  </thead>\n",
 601 |        "  <tbody>\n",
 602 |        "    <tr>\n",
 603 |        "      <th>20</th>\n",
 604 |        "      <td>99509</td>\n",
 605 |        "      <td>61.2181</td>\n",
 606 |        "      <td>-149.9003</td>\n",
 607 |        "      <td>POINT (-149.90030 61.21810)</td>\n",
 608 |        "      <td>19459</td>\n",
 609 |        "      <td>99501</td>\n",
 610 |        "    </tr>\n",
 611 |        "    <tr>\n",
 612 |        "      <th>24</th>\n",
 613 |        "      <td>99514</td>\n",
 614 |        "      <td>61.2181</td>\n",
 615 |        "      <td>-149.9003</td>\n",
 616 |        "      <td>POINT (-149.90030 61.21810)</td>\n",
 617 |        "      <td>19459</td>\n",
 618 |        "      <td>99501</td>\n",
 619 |        "    </tr>\n",
 620 |        "    <tr>\n",
 621 |        "      <th>30</th>\n",
 622 |        "      <td>99520</td>\n",
 623 |        "      <td>61.2181</td>\n",
 624 |        "      <td>-149.9003</td>\n",
 625 |        "      <td>POINT (-149.90030 61.21810)</td>\n",
 626 |        "      <td>19459</td>\n",
 627 |        "      <td>99501</td>\n",
 628 |        "    </tr>\n",
 629 |        "    <tr>\n",
 630 |        "      <th>31</th>\n",
 631 |        "      <td>99521</td>\n",
 632 |        "      <td>61.2181</td>\n",
 633 |        "      <td>-149.9003</td>\n",
 634 |        "      <td>POINT (-149.90030 61.21810)</td>\n",
 635 |        "      <td>19459</td>\n",
 636 |        "      <td>99501</td>\n",
 637 |        "    </tr>\n",
 638 |        "    <tr>\n",
 639 |        "      <th>32</th>\n",
 640 |        "      <td>99522</td>\n",
 641 |        "      <td>61.2181</td>\n",
 642 |        "      <td>-149.9003</td>\n",
 643 |        "      <td>POINT (-149.90030 61.21810)</td>\n",
 644 |        "      <td>19459</td>\n",
 645 |        "      <td>99501</td>\n",
 646 |        "    </tr>\n",
 647 |        "  </tbody>\n",
 648 |        "</table>\n",
 649 |        "</div>"
 650 |       ],
 651 |       "text/plain": [
 652 |        "   zip_code  latitude  longitude                     geometry  index_right  \\\n",
 653 |        "20    99509   61.2181  -149.9003  POINT (-149.90030 61.21810)        19459   \n",
 654 |        "24    99514   61.2181  -149.9003  POINT (-149.90030 61.21810)        19459   \n",
 655 |        "30    99520   61.2181  -149.9003  POINT (-149.90030 61.21810)        19459   \n",
 656 |        "31    99521   61.2181  -149.9003  POINT (-149.90030 61.21810)        19459   \n",
 657 |        "32    99522   61.2181  -149.9003  POINT (-149.90030 61.21810)        19459   \n",
 658 |        "\n",
 659 |        "   ZCTA5CE10  \n",
 660 |        "20     99501  \n",
 661 |        "24     99501  \n",
 662 |        "30     99501  \n",
 663 |        "31     99501  \n",
 664 |        "32     99501  "
 665 |       ]
 666 |      },
 667 |      "execution_count": 12,
 668 |      "metadata": {},
 669 |      "output_type": "execute_result"
 670 |     }
 671 |    ],
 672 |    "source": [
 673 |     "geo_joined.head()"
 674 |    ]
 675 |   },
 676 |   {
 677 |    "cell_type": "code",
 678 |    "execution_count": 13,
 679 |    "metadata": {},
 680 |    "outputs": [],
 681 |    "source": [
 682 |     "# update the zcta column with values we found by geocoding\n",
 683 |     "mz_w_zcta = mz_w_zcta.set_index('zip_code')\n",
 684 |     "mz_w_zcta['zcta'].update(geo_joined.set_index('zip_code')['ZCTA5CE10'])"
 685 |    ]
 686 |   },
 687 |   {
 688 |    "cell_type": "code",
 689 |    "execution_count": 14,
 690 |    "metadata": {},
 691 |    "outputs": [
 692 |     {
 693 |      "name": "stdout",
 694 |      "output_type": "stream",
 695 |      "text": [
 696 |       "Still need 110\n"
 697 |      ]
 698 |     }
 699 |    ],
 700 |    "source": [
 701 |     "# what's left?\n",
 702 |     "print(f\"Still need {len(mz_w_zcta[pd.isnull(mz_w_zcta['zcta'])])}\")"
 703 |    ]
 704 |   },
 705 |   {
 706 |    "cell_type": "markdown",
 707 |    "metadata": {},
 708 |    "source": [
 709 |     "## Manual review\n",
 710 |     "\n",
 711 |     "At a certain point, one runs out of technical strategies. We enlisted a student to manually review the remaining unmatched ZIP Codes. The list that student worked with was shorter than our `still_null` here, so even after including these manual updates, this process will leave ZIP Codes not in any ZCTA.  See [ZIP_ZCTA_README.md]() for more details on the method.\n",
 712 |     "\n"
 713 |    ]
 714 |   },
 715 |   {
 716 |    "cell_type": "code",
 717 |    "execution_count": 15,
 718 |    "metadata": {},
 719 |    "outputs": [
 720 |     {
 721 |      "data": {
 722 |       "text/html": [
 723 |        "<div>\n",
 724 |        "<style scoped>\n",
 725 |        "    .dataframe tbody tr th:only-of-type {\n",
 726 |        "        vertical-align: middle;\n",
 727 |        "    }\n",
 728 |        "\n",
 729 |        "    .dataframe tbody tr th {\n",
 730 |        "        vertical-align: top;\n",
 731 |        "    }\n",
 732 |        "\n",
 733 |        "    .dataframe thead th {\n",
 734 |        "        text-align: right;\n",
 735 |        "    }\n",
 736 |        "</style>\n",
 737 |        "<table border=\"1\" class=\"dataframe\">\n",
 738 |        "  <thead>\n",
 739 |        "    <tr style=\"text-align: right;\">\n",
 740 |        "      <th></th>\n",
 741 |        "      <th>zip</th>\n",
 742 |        "      <th>zcta</th>\n",
 743 |        "    </tr>\n",
 744 |        "  </thead>\n",
 745 |        "  <tbody>\n",
 746 |        "    <tr>\n",
 747 |        "      <th>0</th>\n",
 748 |        "      <td>02123</td>\n",
 749 |        "      <td>02215</td>\n",
 750 |        "    </tr>\n",
 751 |        "    <tr>\n",
 752 |        "      <th>1</th>\n",
 753 |        "      <td>02204</td>\n",
 754 |        "      <td>02203</td>\n",
 755 |        "    </tr>\n",
 756 |        "    <tr>\n",
 757 |        "      <th>2</th>\n",
 758 |        "      <td>02206</td>\n",
 759 |        "      <td>02203</td>\n",
 760 |        "    </tr>\n",
 761 |        "    <tr>\n",
 762 |        "      <th>3</th>\n",
 763 |        "      <td>02217</td>\n",
 764 |        "      <td>02108</td>\n",
 765 |        "    </tr>\n",
 766 |        "    <tr>\n",
 767 |        "      <th>4</th>\n",
 768 |        "      <td>02283</td>\n",
 769 |        "      <td>02111</td>\n",
 770 |        "    </tr>\n",
 771 |        "  </tbody>\n",
 772 |        "</table>\n",
 773 |        "</div>"
 774 |       ],
 775 |       "text/plain": [
 776 |        "     zip   zcta\n",
 777 |        "0  02123  02215\n",
 778 |        "1  02204  02203\n",
 779 |        "2  02206  02203\n",
 780 |        "3  02217  02108\n",
 781 |        "4  02283  02111"
 782 |       ]
 783 |      },
 784 |      "execution_count": 15,
 785 |      "metadata": {},
 786 |      "output_type": "execute_result"
 787 |     }
 788 |    ],
 789 |    "source": [
 790 |     "# Load in the key columns from the manual review process\n",
 791 |     "manual = pd.read_csv('zcta_review.csv',\n",
 792 |     "                    dtype={'zip': 'object', 'result': 'object'},\n",
 793 |     "                    usecols=['zip','result']).rename(\n",
 794 |     "                        columns={ 'result': 'zcta' }\n",
 795 |     "                    ).dropna() # drop rows which didn't get a ZCTA\n",
 796 |     "manual.head()"
 797 |    ]
 798 |   },
 799 |   {
 800 |    "cell_type": "code",
 801 |    "execution_count": 16,
 802 |    "metadata": {},
 803 |    "outputs": [],
 804 |    "source": [
 805 |     "# We'll raise errors if anything in manual tries to overwrite something which \n",
 806 |     "# is not null, since the manual review was based off of a slightly different \n",
 807 |     "# starting dataset. It would probably be fine to just let them go, or to use\n",
 808 |     "# overwrite=False to silently ignore manual values if mw_w_zcta already has something\n",
 809 |     "mz_w_zcta.update(manual.set_index('zip'),errors='raise')"
 810 |    ]
 811 |   },
 812 |   {
 813 |    "cell_type": "code",
 814 |    "execution_count": 17,
 815 |    "metadata": {},
 816 |    "outputs": [
 817 |     {
 818 |      "data": {
 819 |       "text/html": [
 820 |        "<div>\n",
 821 |        "<style scoped>\n",
 822 |        "    .dataframe tbody tr th:only-of-type {\n",
 823 |        "        vertical-align: middle;\n",
 824 |        "    }\n",
 825 |        "\n",
 826 |        "    .dataframe tbody tr th {\n",
 827 |        "        vertical-align: top;\n",
 828 |        "    }\n",
 829 |        "\n",
 830 |        "    .dataframe thead th {\n",
 831 |        "        text-align: right;\n",
 832 |        "    }\n",
 833 |        "</style>\n",
 834 |        "<table border=\"1\" class=\"dataframe\">\n",
 835 |        "  <thead>\n",
 836 |        "    <tr style=\"text-align: right;\">\n",
 837 |        "      <th></th>\n",
 838 |        "      <th>zcta</th>\n",
 839 |        "      <th>geonames_zip</th>\n",
 840 |        "      <th>zbp_zip</th>\n",
 841 |        "      <th>city</th>\n",
 842 |        "      <th>stusab</th>\n",
 843 |        "      <th>zbp_title</th>\n",
 844 |        "      <th>latitude</th>\n",
 845 |        "      <th>longitude</th>\n",
 846 |        "      <th>source</th>\n",
 847 |        "    </tr>\n",
 848 |        "    <tr>\n",
 849 |        "      <th>zip_code</th>\n",
 850 |        "      <th></th>\n",
 851 |        "      <th></th>\n",
 852 |        "      <th></th>\n",
 853 |        "      <th></th>\n",
 854 |        "      <th></th>\n",
 855 |        "      <th></th>\n",
 856 |        "      <th></th>\n",
 857 |        "      <th></th>\n",
 858 |        "      <th></th>\n",
 859 |        "    </tr>\n",
 860 |        "  </thead>\n",
 861 |        "  <tbody>\n",
 862 |        "    <tr>\n",
 863 |        "      <th>96718</th>\n",
 864 |        "      <td>NaN</td>\n",
 865 |        "      <td>96718</td>\n",
 866 |        "      <td>96718</td>\n",
 867 |        "      <td>Hawaii National Park</td>\n",
 868 |        "      <td>HI</td>\n",
 869 |        "      <td>ZIP 96718 (Hawaii National Park, HI)</td>\n",
 870 |        "      <td>19.5935</td>\n",
 871 |        "      <td>-155.4380</td>\n",
 872 |        "      <td>geonames</td>\n",
 873 |        "    </tr>\n",
 874 |        "    <tr>\n",
 875 |        "      <th>04737</th>\n",
 876 |        "      <td>NaN</td>\n",
 877 |        "      <td>04737</td>\n",
 878 |        "      <td>NaN</td>\n",
 879 |        "      <td>Clayton Lake</td>\n",
 880 |        "      <td>ME</td>\n",
 881 |        "      <td>NaN</td>\n",
 882 |        "      <td>46.6109</td>\n",
 883 |        "      <td>-69.5223</td>\n",
 884 |        "      <td>geonames</td>\n",
 885 |        "    </tr>\n",
 886 |        "    <tr>\n",
 887 |        "      <th>89023</th>\n",
 888 |        "      <td>NaN</td>\n",
 889 |        "      <td>89023</td>\n",
 890 |        "      <td>89023</td>\n",
 891 |        "      <td>Mercury</td>\n",
 892 |        "      <td>NV</td>\n",
 893 |        "      <td>ZIP 89023 (Mercury, NV)</td>\n",
 894 |        "      <td>36.6605</td>\n",
 895 |        "      <td>-115.9945</td>\n",
 896 |        "      <td>geonames</td>\n",
 897 |        "    </tr>\n",
 898 |        "    <tr>\n",
 899 |        "      <th>72405</th>\n",
 900 |        "      <td>NaN</td>\n",
 901 |        "      <td>NaN</td>\n",
 902 |        "      <td>72405</td>\n",
 903 |        "      <td>NaN</td>\n",
 904 |        "      <td>NaN</td>\n",
 905 |        "      <td>ZIP 72405 (Jonesboro, AR)</td>\n",
 906 |        "      <td>NaN</td>\n",
 907 |        "      <td>NaN</td>\n",
 908 |        "      <td>zbp</td>\n",
 909 |        "    </tr>\n",
 910 |        "    <tr>\n",
 911 |        "      <th>89437</th>\n",
 912 |        "      <td>NaN</td>\n",
 913 |        "      <td>NaN</td>\n",
 914 |        "      <td>89437</td>\n",
 915 |        "      <td>NaN</td>\n",
 916 |        "      <td>NaN</td>\n",
 917 |        "      <td>ZIP 89437 (Sparks, NV)</td>\n",
 918 |        "      <td>NaN</td>\n",
 919 |        "      <td>NaN</td>\n",
 920 |        "      <td>zbp</td>\n",
 921 |        "    </tr>\n",
 922 |        "    <tr>\n",
 923 |        "      <th>99999</th>\n",
 924 |        "      <td>NaN</td>\n",
 925 |        "      <td>NaN</td>\n",
 926 |        "      <td>99999</td>\n",
 927 |        "      <td>NaN</td>\n",
 928 |        "      <td>NaN</td>\n",
 929 |        "      <td>ZIP 99999 (Unclassified)</td>\n",
 930 |        "      <td>NaN</td>\n",
 931 |        "      <td>NaN</td>\n",
 932 |        "      <td>zbp</td>\n",
 933 |        "    </tr>\n",
 934 |        "  </tbody>\n",
 935 |        "</table>\n",
 936 |        "</div>"
 937 |       ],
 938 |       "text/plain": [
 939 |        "         zcta geonames_zip zbp_zip                  city stusab  \\\n",
 940 |        "zip_code                                                          \n",
 941 |        "96718     NaN        96718   96718  Hawaii National Park     HI   \n",
 942 |        "04737     NaN        04737     NaN          Clayton Lake     ME   \n",
 943 |        "89023     NaN        89023   89023               Mercury     NV   \n",
 944 |        "72405     NaN          NaN   72405                   NaN    NaN   \n",
 945 |        "89437     NaN          NaN   89437                   NaN    NaN   \n",
 946 |        "99999     NaN          NaN   99999                   NaN    NaN   \n",
 947 |        "\n",
 948 |        "                                     zbp_title  latitude  longitude    source  \n",
 949 |        "zip_code                                                                       \n",
 950 |        "96718     ZIP 96718 (Hawaii National Park, HI)   19.5935  -155.4380  geonames  \n",
 951 |        "04737                                      NaN   46.6109   -69.5223  geonames  \n",
 952 |        "89023                  ZIP 89023 (Mercury, NV)   36.6605  -115.9945  geonames  \n",
 953 |        "72405                ZIP 72405 (Jonesboro, AR)       NaN        NaN       zbp  \n",
 954 |        "89437                   ZIP 89437 (Sparks, NV)       NaN        NaN       zbp  \n",
 955 |        "99999                 ZIP 99999 (Unclassified)       NaN        NaN       zbp  "
 956 |       ]
 957 |      },
 958 |      "execution_count": 17,
 959 |      "metadata": {},
 960 |      "output_type": "execute_result"
 961 |     }
 962 |    ],
 963 |    "source": [
 964 |     "# what's left?\n",
 965 |     "mz_w_zcta[pd.isnull(mz_w_zcta['zcta'])]"
 966 |    ]
 967 |   },
 968 |   {
 969 |    "cell_type": "markdown",
 970 |    "metadata": {},
 971 |    "source": [
 972 |     "## This will have to do!\n",
 973 |     "\n",
 974 |     "The three geonames addresses were ones our student reviewed and found good reasons for them not having ZCTAs.\n",
 975 |     "\n",
 976 |     "ZIP Code 99999 isn't real, and maybe we should have just dropped it above!\n",
 977 |     "\n",
 978 |     "[72405](https://about.usps.com/newsroom/local-releases/ar/2019/0603-new-jonesboro-zip-code.htm) and [89437](https://www.kolotv.com/content/news/Tahoe-Reno-Industrial-Center-to-get-its-own-zip-code-497853001.html) are both quite new, so may get ZCTAs in an upcoming update, or could be added to the manual review file in a future update."
 979 |    ]
 980 |   },
 981 |   {
 982 |    "cell_type": "code",
 983 |    "execution_count": 18,
 984 |    "metadata": {},
 985 |    "outputs": [],
 986 |    "source": [
 987 |     "# just keep the columns we care about\n",
 988 |     "# If we were working more on this, we might somehow save the \"authority\" or \"source\" so that we would have some idea about where\n",
 989 |     "# we got the ZIP Codes\n",
 990 |     "temp = mz_w_zcta.reset_index()[['zip_code','zcta','source']] \n",
 991 |     "\n",
 992 |     "# some null zip codes got in here from the ZCTA Shapefile. Why aren't those in GeoNames or ZIP Code Business Patterns?\n",
 993 |     "# Who can know? But logically, if it's a ZCTA, then we assume that it has a matching ZIP Code\n",
 994 |     "temp['zip_code'] = temp['zip_code'].fillna(temp['zcta'])\n"
 995 |    ]
 996 |   },
 997 |   {
 998 |    "cell_type": "code",
 999 |    "execution_count": 19,
1000 |    "metadata": {},
1001 |    "outputs": [],
1002 |    "source": [
1003 |     "temp.to_csv('zip_zcta_xref.csv',index=False)"
1004 |    ]
1005 |   }
1006 |  ],
1007 |  "metadata": {
1008 |   "kernelspec": {
1009 |    "display_name": "Python 3",
1010 |    "language": "python",
1011 |    "name": "python3"
1012 |   },
1013 |   "language_info": {
1014 |    "codemirror_mode": {
1015 |     "name": "ipython",
1016 |     "version": 3
1017 |    },
1018 |    "file_extension": ".py",
1019 |    "mimetype": "text/x-python",
1020 |    "name": "python",
1021 |    "nbconvert_exporter": "python",
1022 |    "pygments_lexer": "ipython3",
1023 |    "version": "3.7.6"
1024 |   }
1025 |  },
1026 |  "nbformat": 4,
1027 |  "nbformat_minor": 4
1028 | }
1029 | 


--------------------------------------------------------------------------------
/crosswalks/judicial_districts/race_by_district.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "# Race by District (with Margin of Error)\n",
   8 |     "\n",
   9 |     "This workbook demonstrates how to aggregate ACS data where some estimates may be less reliable, typically because they are for small subgroups.\n"
  10 |    ]
  11 |   },
  12 |   {
  13 |    "cell_type": "code",
  14 |    "execution_count": 1,
  15 |    "metadata": {},
  16 |    "outputs": [],
  17 |    "source": [
  18 |     "import pandas as pd\n",
  19 |     "import cenpy                  # https://pypi.org/project/cenpy/ \n",
  20 |     "import census_data_aggregator # https://pypi.org/project/census-data-aggregator/"
  21 |    ]
  22 |   },
  23 |   {
  24 |    "cell_type": "code",
  25 |    "execution_count": 2,
  26 |    "metadata": {},
  27 |    "outputs": [
  28 |     {
  29 |      "data": {
  30 |       "text/html": [
  31 |        "<div>\n",
  32 |        "<style scoped>\n",
  33 |        "    .dataframe tbody tr th:only-of-type {\n",
  34 |        "        vertical-align: middle;\n",
  35 |        "    }\n",
  36 |        "\n",
  37 |        "    .dataframe tbody tr th {\n",
  38 |        "        vertical-align: top;\n",
  39 |        "    }\n",
  40 |        "\n",
  41 |        "    .dataframe thead th {\n",
  42 |        "        text-align: right;\n",
  43 |        "    }\n",
  44 |        "</style>\n",
  45 |        "<table border=\"1\" class=\"dataframe\">\n",
  46 |        "  <thead>\n",
  47 |        "    <tr style=\"text-align: right;\">\n",
  48 |        "      <th></th>\n",
  49 |        "      <th>label</th>\n",
  50 |        "    </tr>\n",
  51 |        "  </thead>\n",
  52 |        "  <tbody>\n",
  53 |        "    <tr>\n",
  54 |        "      <th>B03002_001E</th>\n",
  55 |        "      <td>Estimate!!Total</td>\n",
  56 |        "    </tr>\n",
  57 |        "    <tr>\n",
  58 |        "      <th>B03002_002E</th>\n",
  59 |        "      <td>Estimate!!Total!!Not Hispanic or Latino</td>\n",
  60 |        "    </tr>\n",
  61 |        "    <tr>\n",
  62 |        "      <th>B03002_003E</th>\n",
  63 |        "      <td>Estimate!!Total!!Not Hispanic or Latino!!White alone</td>\n",
  64 |        "    </tr>\n",
  65 |        "    <tr>\n",
  66 |        "      <th>B03002_004E</th>\n",
  67 |        "      <td>Estimate!!Total!!Not Hispanic or Latino!!Black or African American alone</td>\n",
  68 |        "    </tr>\n",
  69 |        "    <tr>\n",
  70 |        "      <th>B03002_005E</th>\n",
  71 |        "      <td>Estimate!!Total!!Not Hispanic or Latino!!American Indian and Alaska Native alone</td>\n",
  72 |        "    </tr>\n",
  73 |        "    <tr>\n",
  74 |        "      <th>B03002_006E</th>\n",
  75 |        "      <td>Estimate!!Total!!Not Hispanic or Latino!!Asian alone</td>\n",
  76 |        "    </tr>\n",
  77 |        "    <tr>\n",
  78 |        "      <th>B03002_007E</th>\n",
  79 |        "      <td>Estimate!!Total!!Not Hispanic or Latino!!Native Hawaiian and Other Pacific Islander alone</td>\n",
  80 |        "    </tr>\n",
  81 |        "    <tr>\n",
  82 |        "      <th>B03002_008E</th>\n",
  83 |        "      <td>Estimate!!Total!!Not Hispanic or Latino!!Some other race alone</td>\n",
  84 |        "    </tr>\n",
  85 |        "    <tr>\n",
  86 |        "      <th>B03002_009E</th>\n",
  87 |        "      <td>Estimate!!Total!!Not Hispanic or Latino!!Two or more races</td>\n",
  88 |        "    </tr>\n",
  89 |        "    <tr>\n",
  90 |        "      <th>B03002_010E</th>\n",
  91 |        "      <td>Estimate!!Total!!Not Hispanic or Latino!!Two or more races!!Two races including Some other race</td>\n",
  92 |        "    </tr>\n",
  93 |        "    <tr>\n",
  94 |        "      <th>B03002_011E</th>\n",
  95 |        "      <td>Estimate!!Total!!Not Hispanic or Latino!!Two or more races!!Two races excluding Some other race, and three or more races</td>\n",
  96 |        "    </tr>\n",
  97 |        "    <tr>\n",
  98 |        "      <th>B03002_012E</th>\n",
  99 |        "      <td>Estimate!!Total!!Hispanic or Latino</td>\n",
 100 |        "    </tr>\n",
 101 |        "    <tr>\n",
 102 |        "      <th>B03002_013E</th>\n",
 103 |        "      <td>Estimate!!Total!!Hispanic or Latino!!White alone</td>\n",
 104 |        "    </tr>\n",
 105 |        "    <tr>\n",
 106 |        "      <th>B03002_014E</th>\n",
 107 |        "      <td>Estimate!!Total!!Hispanic or Latino!!Black or African American alone</td>\n",
 108 |        "    </tr>\n",
 109 |        "    <tr>\n",
 110 |        "      <th>B03002_015E</th>\n",
 111 |        "      <td>Estimate!!Total!!Hispanic or Latino!!American Indian and Alaska Native alone</td>\n",
 112 |        "    </tr>\n",
 113 |        "    <tr>\n",
 114 |        "      <th>B03002_016E</th>\n",
 115 |        "      <td>Estimate!!Total!!Hispanic or Latino!!Asian alone</td>\n",
 116 |        "    </tr>\n",
 117 |        "    <tr>\n",
 118 |        "      <th>B03002_017E</th>\n",
 119 |        "      <td>Estimate!!Total!!Hispanic or Latino!!Native Hawaiian and Other Pacific Islander alone</td>\n",
 120 |        "    </tr>\n",
 121 |        "    <tr>\n",
 122 |        "      <th>B03002_018E</th>\n",
 123 |        "      <td>Estimate!!Total!!Hispanic or Latino!!Some other race alone</td>\n",
 124 |        "    </tr>\n",
 125 |        "    <tr>\n",
 126 |        "      <th>B03002_019E</th>\n",
 127 |        "      <td>Estimate!!Total!!Hispanic or Latino!!Two or more races</td>\n",
 128 |        "    </tr>\n",
 129 |        "    <tr>\n",
 130 |        "      <th>B03002_020E</th>\n",
 131 |        "      <td>Estimate!!Total!!Hispanic or Latino!!Two or more races!!Two races including Some other race</td>\n",
 132 |        "    </tr>\n",
 133 |        "    <tr>\n",
 134 |        "      <th>B03002_021E</th>\n",
 135 |        "      <td>Estimate!!Total!!Hispanic or Latino!!Two or more races!!Two races excluding Some other race, and three or more races</td>\n",
 136 |        "    </tr>\n",
 137 |        "  </tbody>\n",
 138 |        "</table>\n",
 139 |        "</div>"
 140 |       ],
 141 |       "text/plain": [
 142 |        "                                                                                                                                label\n",
 143 |        "B03002_001E                                                                                                           Estimate!!Total\n",
 144 |        "B03002_002E                                                                                   Estimate!!Total!!Not Hispanic or Latino\n",
 145 |        "B03002_003E                                                                      Estimate!!Total!!Not Hispanic or Latino!!White alone\n",
 146 |        "B03002_004E                                                  Estimate!!Total!!Not Hispanic or Latino!!Black or African American alone\n",
 147 |        "B03002_005E                                          Estimate!!Total!!Not Hispanic or Latino!!American Indian and Alaska Native alone\n",
 148 |        "B03002_006E                                                                      Estimate!!Total!!Not Hispanic or Latino!!Asian alone\n",
 149 |        "B03002_007E                                 Estimate!!Total!!Not Hispanic or Latino!!Native Hawaiian and Other Pacific Islander alone\n",
 150 |        "B03002_008E                                                            Estimate!!Total!!Not Hispanic or Latino!!Some other race alone\n",
 151 |        "B03002_009E                                                                Estimate!!Total!!Not Hispanic or Latino!!Two or more races\n",
 152 |        "B03002_010E                           Estimate!!Total!!Not Hispanic or Latino!!Two or more races!!Two races including Some other race\n",
 153 |        "B03002_011E  Estimate!!Total!!Not Hispanic or Latino!!Two or more races!!Two races excluding Some other race, and three or more races\n",
 154 |        "B03002_012E                                                                                       Estimate!!Total!!Hispanic or Latino\n",
 155 |        "B03002_013E                                                                          Estimate!!Total!!Hispanic or Latino!!White alone\n",
 156 |        "B03002_014E                                                      Estimate!!Total!!Hispanic or Latino!!Black or African American alone\n",
 157 |        "B03002_015E                                              Estimate!!Total!!Hispanic or Latino!!American Indian and Alaska Native alone\n",
 158 |        "B03002_016E                                                                          Estimate!!Total!!Hispanic or Latino!!Asian alone\n",
 159 |        "B03002_017E                                     Estimate!!Total!!Hispanic or Latino!!Native Hawaiian and Other Pacific Islander alone\n",
 160 |        "B03002_018E                                                                Estimate!!Total!!Hispanic or Latino!!Some other race alone\n",
 161 |        "B03002_019E                                                                    Estimate!!Total!!Hispanic or Latino!!Two or more races\n",
 162 |        "B03002_020E                               Estimate!!Total!!Hispanic or Latino!!Two or more races!!Two races including Some other race\n",
 163 |        "B03002_021E      Estimate!!Total!!Hispanic or Latino!!Two or more races!!Two races excluding Some other race, and three or more races"
 164 |       ]
 165 |      },
 166 |      "execution_count": 2,
 167 |      "metadata": {},
 168 |      "output_type": "execute_result"
 169 |     }
 170 |    ],
 171 |    "source": [
 172 |     "acs = cenpy.products.APIConnection('ACSDT5Y2018')\n",
 173 |     "\n",
 174 |     "# Refresh our memory on the variable codes for various columns in the race tables\n",
 175 |     "pd.set_option('display.max_colwidth',None)\n",
 176 |     "acs.varslike('B03002_*')[['label']].sort_index()"
 177 |    ]
 178 |   },
 179 |   {
 180 |    "cell_type": "code",
 181 |    "execution_count": 3,
 182 |    "metadata": {},
 183 |    "outputs": [],
 184 |    "source": [
 185 |     "# make something to help us use friendlier names for the columns\n",
 186 |     "# Use an ordered dict to ensure that things between estimate and MOE cols stay in sync\n",
 187 |     "from collections import OrderedDict\n",
 188 |     "race_cols = OrderedDict([\n",
 189 |     "    ('B03002_001E', 'total'),\n",
 190 |     "    ('B03002_003E', 'nh_white'),\n",
 191 |     "    ('B03002_004E', 'nh_black'),\n",
 192 |     "    ('B03002_005E', 'nh_amerind'),\n",
 193 |     "    ('B03002_006E', 'nh_asian'),\n",
 194 |     "    ('B03002_007E', 'nh_nhpi'),\n",
 195 |     "    ('B03002_008E', 'nh_some_other'),\n",
 196 |     "    ('B03002_009E', 'nh_twoplus'),\n",
 197 |     "    ('B03002_012E', 'hispanic')\n",
 198 |     "])\n",
 199 |     "moe_cols = OrderedDict((k.replace('E','M'),v+\"_moe\") for k,v in race_cols.items())\n",
 200 |     "query_cols = ['GEO_ID'] + list(race_cols.keys()) + list(moe_cols.keys())\n",
 201 |     "county_race = acs.query(query_cols,'county')\n",
 202 |     "for k in query_cols[1:]: # cenpy doesn't cast estimates to integer so we have to handle that.\n",
 203 |     "    county_race[k] = county_race[k].astype(int)\n",
 204 |     "county_race = county_race.rename(columns=race_cols).rename(columns=moe_cols)\n",
 205 |     "\n",
 206 |     "# a Margin of Error value of -555555555 \"indicates that the estimate is controlled. \n",
 207 |     "# A statistical test for sampling variability is not appropriate.\"\n",
 208 |     "# The math doesn't work with that value, so replace those with 0\n",
 209 |     "county_race = county_race.replace(-555555555,0) \n",
 210 |     "county_race = county_race.drop(['state', 'county'], axis='columns') # API gives us those but we don't need them"
 211 |    ]
 212 |   },
 213 |   {
 214 |    "cell_type": "code",
 215 |    "execution_count": 4,
 216 |    "metadata": {},
 217 |    "outputs": [],
 218 |    "source": [
 219 |     "# Join our crosswalk to the ACS data\n",
 220 |     "xref = pd.read_csv('county_district_xref.csv',index_col='geoid', usecols=['geoid','state', 'district'])\n",
 221 |     "joined = xref.join(county_race.set_index('GEO_ID'))# xref\n"
 222 |    ]
 223 |   },
 224 |   {
 225 |    "cell_type": "code",
 226 |    "execution_count": 5,
 227 |    "metadata": {},
 228 |    "outputs": [],
 229 |    "source": [
 230 |     "# a helper function so that we can sum more than one estimate/moe pair in a given data frame\n",
 231 |     "def sum_with_moe(df, *column_pairs):\n",
 232 |     "    \"\"\"Given a data frame and a list of one or more tuples representing estimate/error pairs,\n",
 233 |     "       return a dictionary where each key is one of the values from column pairs and the corresponding\n",
 234 |     "       value is the approximate sum, or approximate error for the sum.\n",
 235 |     "    \"\"\"\n",
 236 |     "    result = {}\n",
 237 |     "    for est,err in column_pairs:\n",
 238 |     "        tuples = [tuple(x) for x in df[[est,err]].to_numpy()]\n",
 239 |     "        est_sum, err_sum = census_data_aggregator.approximate_sum(*tuples)\n",
 240 |     "        result[est] = est_sum\n",
 241 |     "        result[err] = err_sum\n",
 242 |     "    return result\n",
 243 |     "\n",
 244 |     "def compute_single_cv(est,moe):\n",
 245 |     "    se = moe/1.645 # assumes normal distribution\n",
 246 |     "    cv = se/est*100\n",
 247 |     "    return cv\n",
 248 |     "\n",
 249 |     "def compute_cvs(df, *column_pairs):\n",
 250 |     "    \"\"\"Given a data frame and a list of one or more tuples representing estimate/error pairs,\n",
 251 |     "       return a new DataFrame where each column represents the CV for one of the pairs.\n",
 252 |     "       Columns in the new DataFrame will be named by appending \"_cv\" to the first value\n",
 253 |     "       in each column_pair.\n",
 254 |     "    \"\"\"\n",
 255 |     "    cvs = []\n",
 256 |     "    for est,moe in column_pairs:\n",
 257 |     "        cv = df[[est,moe]].apply(lambda x: compute_single_cv(x[est],x[moe]),axis=1)\n",
 258 |     "        cv.name = f\"{est}_cv\"\n",
 259 |     "        cvs.append(cv)\n",
 260 |     "    return pd.concat(cvs,axis=1)\n"
 261 |    ]
 262 |   },
 263 |   {
 264 |    "cell_type": "code",
 265 |    "execution_count": 6,
 266 |    "metadata": {},
 267 |    "outputs": [
 268 |     {
 269 |      "data": {
 270 |       "text/html": [
 271 |        "<div>\n",
 272 |        "<style scoped>\n",
 273 |        "    .dataframe tbody tr th:only-of-type {\n",
 274 |        "        vertical-align: middle;\n",
 275 |        "    }\n",
 276 |        "\n",
 277 |        "    .dataframe tbody tr th {\n",
 278 |        "        vertical-align: top;\n",
 279 |        "    }\n",
 280 |        "\n",
 281 |        "    .dataframe thead th {\n",
 282 |        "        text-align: right;\n",
 283 |        "    }\n",
 284 |        "</style>\n",
 285 |        "<table border=\"1\" class=\"dataframe\">\n",
 286 |        "  <thead>\n",
 287 |        "    <tr style=\"text-align: right;\">\n",
 288 |        "      <th></th>\n",
 289 |        "      <th>state</th>\n",
 290 |        "      <th>district</th>\n",
 291 |        "      <th>total</th>\n",
 292 |        "      <th>total_moe</th>\n",
 293 |        "      <th>nh_white</th>\n",
 294 |        "      <th>nh_white_moe</th>\n",
 295 |        "      <th>nh_black</th>\n",
 296 |        "      <th>nh_black_moe</th>\n",
 297 |        "      <th>nh_amerind</th>\n",
 298 |        "      <th>nh_amerind_moe</th>\n",
 299 |        "      <th>nh_asian</th>\n",
 300 |        "      <th>nh_asian_moe</th>\n",
 301 |        "      <th>nh_nhpi</th>\n",
 302 |        "      <th>nh_nhpi_moe</th>\n",
 303 |        "      <th>nh_some_other</th>\n",
 304 |        "      <th>nh_some_other_moe</th>\n",
 305 |        "      <th>nh_twoplus</th>\n",
 306 |        "      <th>nh_twoplus_moe</th>\n",
 307 |        "      <th>hispanic</th>\n",
 308 |        "      <th>hispanic_moe</th>\n",
 309 |        "    </tr>\n",
 310 |        "  </thead>\n",
 311 |        "  <tbody>\n",
 312 |        "    <tr>\n",
 313 |        "      <th>0</th>\n",
 314 |        "      <td>Alabama</td>\n",
 315 |        "      <td>Middle</td>\n",
 316 |        "      <td>1151252</td>\n",
 317 |        "      <td>0.0</td>\n",
 318 |        "      <td>684038</td>\n",
 319 |        "      <td>454.9</td>\n",
 320 |        "      <td>382206</td>\n",
 321 |        "      <td>1223.0</td>\n",
 322 |        "      <td>3308</td>\n",
 323 |        "      <td>396.8</td>\n",
 324 |        "      <td>18388</td>\n",
 325 |        "      <td>533.3</td>\n",
 326 |        "      <td>205</td>\n",
 327 |        "      <td>143.7</td>\n",
 328 |        "      <td>1551</td>\n",
 329 |        "      <td>479.4</td>\n",
 330 |        "      <td>20799</td>\n",
 331 |        "      <td>1337.3</td>\n",
 332 |        "      <td>40757</td>\n",
 333 |        "      <td>148.2</td>\n",
 334 |        "    </tr>\n",
 335 |        "    <tr>\n",
 336 |        "      <th>1</th>\n",
 337 |        "      <td>Alabama</td>\n",
 338 |        "      <td>Northern</td>\n",
 339 |        "      <td>2870454</td>\n",
 340 |        "      <td>0.0</td>\n",
 341 |        "      <td>1999982</td>\n",
 342 |        "      <td>585.5</td>\n",
 343 |        "      <td>628466</td>\n",
 344 |        "      <td>1964.2</td>\n",
 345 |        "      <td>12469</td>\n",
 346 |        "      <td>846.8</td>\n",
 347 |        "      <td>35008</td>\n",
 348 |        "      <td>1034.5</td>\n",
 349 |        "      <td>1169</td>\n",
 350 |        "      <td>223.0</td>\n",
 351 |        "      <td>4155</td>\n",
 352 |        "      <td>918.7</td>\n",
 353 |        "      <td>50506</td>\n",
 354 |        "      <td>2237.2</td>\n",
 355 |        "      <td>138699</td>\n",
 356 |        "      <td>193.9</td>\n",
 357 |        "    </tr>\n",
 358 |        "    <tr>\n",
 359 |        "      <th>2</th>\n",
 360 |        "      <td>Alabama</td>\n",
 361 |        "      <td>Southern</td>\n",
 362 |        "      <td>842974</td>\n",
 363 |        "      <td>0.0</td>\n",
 364 |        "      <td>512710</td>\n",
 365 |        "      <td>370.5</td>\n",
 366 |        "      <td>275065</td>\n",
 367 |        "      <td>1002.8</td>\n",
 368 |        "      <td>7466</td>\n",
 369 |        "      <td>537.9</td>\n",
 370 |        "      <td>10540</td>\n",
 371 |        "      <td>583.5</td>\n",
 372 |        "      <td>147</td>\n",
 373 |        "      <td>103.0</td>\n",
 374 |        "      <td>1797</td>\n",
 375 |        "      <td>566.0</td>\n",
 376 |        "      <td>11559</td>\n",
 377 |        "      <td>1097.3</td>\n",
 378 |        "      <td>23690</td>\n",
 379 |        "      <td>294.9</td>\n",
 380 |        "    </tr>\n",
 381 |        "    <tr>\n",
 382 |        "      <th>3</th>\n",
 383 |        "      <td>Alaska</td>\n",
 384 |        "      <td>Alaska</td>\n",
 385 |        "      <td>738516</td>\n",
 386 |        "      <td>564.0</td>\n",
 387 |        "      <td>450754</td>\n",
 388 |        "      <td>677.2</td>\n",
 389 |        "      <td>22817</td>\n",
 390 |        "      <td>730.1</td>\n",
 391 |        "      <td>103506</td>\n",
 392 |        "      <td>1448.5</td>\n",
 393 |        "      <td>45617</td>\n",
 394 |        "      <td>998.2</td>\n",
 395 |        "      <td>8544</td>\n",
 396 |        "      <td>395.9</td>\n",
 397 |        "      <td>1459</td>\n",
 398 |        "      <td>515.6</td>\n",
 399 |        "      <td>54633</td>\n",
 400 |        "      <td>1880.2</td>\n",
 401 |        "      <td>51186</td>\n",
 402 |        "      <td>257.9</td>\n",
 403 |        "    </tr>\n",
 404 |        "    <tr>\n",
 405 |        "      <th>4</th>\n",
 406 |        "      <td>Arizona</td>\n",
 407 |        "      <td>Arizona</td>\n",
 408 |        "      <td>6946685</td>\n",
 409 |        "      <td>0.0</td>\n",
 410 |        "      <td>3825886</td>\n",
 411 |        "      <td>1206.2</td>\n",
 412 |        "      <td>286614</td>\n",
 413 |        "      <td>2527.9</td>\n",
 414 |        "      <td>271946</td>\n",
 415 |        "      <td>1834.2</td>\n",
 416 |        "      <td>222477</td>\n",
 417 |        "      <td>2081.6</td>\n",
 418 |        "      <td>12523</td>\n",
 419 |        "      <td>561.2</td>\n",
 420 |        "      <td>9177</td>\n",
 421 |        "      <td>1290.8</td>\n",
 422 |        "      <td>154750</td>\n",
 423 |        "      <td>3919.1</td>\n",
 424 |        "      <td>2163312</td>\n",
 425 |        "      <td>0.0</td>\n",
 426 |        "    </tr>\n",
 427 |        "  </tbody>\n",
 428 |        "</table>\n",
 429 |        "</div>"
 430 |       ],
 431 |       "text/plain": [
 432 |        "     state  district    total  total_moe  nh_white  nh_white_moe  nh_black  \\\n",
 433 |        "0  Alabama    Middle  1151252        0.0    684038         454.9    382206   \n",
 434 |        "1  Alabama  Northern  2870454        0.0   1999982         585.5    628466   \n",
 435 |        "2  Alabama  Southern   842974        0.0    512710         370.5    275065   \n",
 436 |        "3   Alaska    Alaska   738516      564.0    450754         677.2     22817   \n",
 437 |        "4  Arizona   Arizona  6946685        0.0   3825886        1206.2    286614   \n",
 438 |        "\n",
 439 |        "   nh_black_moe  nh_amerind  nh_amerind_moe  nh_asian  nh_asian_moe  nh_nhpi  \\\n",
 440 |        "0        1223.0        3308           396.8     18388         533.3      205   \n",
 441 |        "1        1964.2       12469           846.8     35008        1034.5     1169   \n",
 442 |        "2        1002.8        7466           537.9     10540         583.5      147   \n",
 443 |        "3         730.1      103506          1448.5     45617         998.2     8544   \n",
 444 |        "4        2527.9      271946          1834.2    222477        2081.6    12523   \n",
 445 |        "\n",
 446 |        "   nh_nhpi_moe  nh_some_other  nh_some_other_moe  nh_twoplus  nh_twoplus_moe  \\\n",
 447 |        "0        143.7           1551              479.4       20799          1337.3   \n",
 448 |        "1        223.0           4155              918.7       50506          2237.2   \n",
 449 |        "2        103.0           1797              566.0       11559          1097.3   \n",
 450 |        "3        395.9           1459              515.6       54633          1880.2   \n",
 451 |        "4        561.2           9177             1290.8      154750          3919.1   \n",
 452 |        "\n",
 453 |        "   hispanic  hispanic_moe  \n",
 454 |        "0     40757         148.2  \n",
 455 |        "1    138699         193.9  \n",
 456 |        "2     23690         294.9  \n",
 457 |        "3     51186         257.9  \n",
 458 |        "4   2163312           0.0  "
 459 |       ]
 460 |      },
 461 |      "execution_count": 6,
 462 |      "metadata": {},
 463 |      "output_type": "execute_result"
 464 |     }
 465 |    ],
 466 |    "source": [
 467 |     "# sum the counties\n",
 468 |     "sums = []\n",
 469 |     "\n",
 470 |     "for (state, district), df in joined.groupby(['state', 'district']):\n",
 471 |     "    tuples = zip(race_cols.values(), moe_cols.values())  # we've renamed the columns to the values of those dicts\n",
 472 |     "    d = sum_with_moe(df, *tuples)\n",
 473 |     "    d['state'] = state\n",
 474 |     "    d['district'] = district\n",
 475 |     "    sums.append(d)\n",
 476 |     "\n",
 477 |     "race_by_district_base = pd.DataFrame(sums)    \n",
 478 |     "\n",
 479 |     "cols = list(race_by_district_base.columns) # for review purposes, it will be nice to have our grouping values at the front\n",
 480 |     "cols.remove('state') # so take them out\n",
 481 |     "cols.remove('district')\n",
 482 |     "cols = ['state', 'district'] + cols # put them where we want them\n",
 483 |     "race_by_district_base = race_by_district_base[cols]\n",
 484 |     "pd.options.display.float_format = '{:.1f}'.format\n",
 485 |     "race_by_district_base.head() # how does that look?"
 486 |    ]
 487 |   },
 488 |   {
 489 |    "cell_type": "markdown",
 490 |    "metadata": {},
 491 |    "source": [
 492 |     "## Now what?\n",
 493 |     "\n",
 494 |     "Having aggregated margins of error enables two things: to test whether any given estimate is \"reliable\", and to test whether any two values are *significantly* different.  \n",
 495 |     "\n",
 496 |     "For now, we'll defer checking for \"significant difference,\" since I didn't feel like fishing around for pairs to compare. I'll just say that the LA Times DataDesk team has a python library which encapsulates the [statistical difference test](https://github.com/datadesk/census-error-analyzer#test-statistical-difference), so you might want to use that instead of re-implementing it. \n",
 497 |     "\n",
 498 |     "\n",
 499 |     "Testing reliability involves computing the Coefficient of Variation (CV). There are no hard and fast rules, but, as documented in this [Tufts GIS tutorial](http://sites.tufts.edu/gis/files/2013/11/Amercian-Community-Survey_Margin-of-error-tutorial.pdf), here are two rules of thumb about how to proceed with a given CV.\n",
 500 |     "\n",
 501 |     "<table>\n",
 502 |     "    <tr>\n",
 503 |     "        <th>Source</th>\n",
 504 |     "        <th>High reliability</th>\n",
 505 |     "        <th>Medium \"be careful\"</th>\n",
 506 |     "        <th>Low \"use extreme caution\"</th>\n",
 507 |     "    </tr>\n",
 508 |     "    <tr>\n",
 509 |     "        <td>Census Bureau</td>\n",
 510 |     "        <td>CV &lt;15%</td>        \n",
 511 |     "        <td>CV 15-30%</td>\n",
 512 |     "        <td>CV &gt;30%</td>        \n",
 513 |     "    </tr>\n",
 514 |     "    <tr>\n",
 515 |     "        <td>ESRI</td>\n",
 516 |     "        <td>CV &lt;12%</td>        \n",
 517 |     "        <td>CV 12-40%</td>\n",
 518 |     "        <td>CV &gt;40%</td>        \n",
 519 |     "    </tr>\n",
 520 |     "</table>\n",
 521 |     "    \n",
 522 |     "    "
 523 |    ]
 524 |   },
 525 |   {
 526 |    "cell_type": "code",
 527 |    "execution_count": 7,
 528 |    "metadata": {},
 529 |    "outputs": [
 530 |     {
 531 |      "data": {
 532 |       "text/html": [
 533 |        "<div>\n",
 534 |        "<style scoped>\n",
 535 |        "    .dataframe tbody tr th:only-of-type {\n",
 536 |        "        vertical-align: middle;\n",
 537 |        "    }\n",
 538 |        "\n",
 539 |        "    .dataframe tbody tr th {\n",
 540 |        "        vertical-align: top;\n",
 541 |        "    }\n",
 542 |        "\n",
 543 |        "    .dataframe thead th {\n",
 544 |        "        text-align: right;\n",
 545 |        "    }\n",
 546 |        "</style>\n",
 547 |        "<table border=\"1\" class=\"dataframe\">\n",
 548 |        "  <thead>\n",
 549 |        "    <tr style=\"text-align: right;\">\n",
 550 |        "      <th></th>\n",
 551 |        "      <th>state</th>\n",
 552 |        "      <th>district</th>\n",
 553 |        "      <th>total_cv</th>\n",
 554 |        "      <th>nh_white_cv</th>\n",
 555 |        "      <th>nh_black_cv</th>\n",
 556 |        "      <th>nh_amerind_cv</th>\n",
 557 |        "      <th>nh_asian_cv</th>\n",
 558 |        "      <th>nh_nhpi_cv</th>\n",
 559 |        "      <th>nh_some_other_cv</th>\n",
 560 |        "      <th>nh_twoplus_cv</th>\n",
 561 |        "      <th>hispanic_cv</th>\n",
 562 |        "    </tr>\n",
 563 |        "  </thead>\n",
 564 |        "  <tbody>\n",
 565 |        "    <tr>\n",
 566 |        "      <th>0</th>\n",
 567 |        "      <td>Alabama</td>\n",
 568 |        "      <td>Middle</td>\n",
 569 |        "      <td>0.0</td>\n",
 570 |        "      <td>0.0</td>\n",
 571 |        "      <td>0.2</td>\n",
 572 |        "      <td>7.3</td>\n",
 573 |        "      <td>1.8</td>\n",
 574 |        "      <td>42.6</td>\n",
 575 |        "      <td>18.8</td>\n",
 576 |        "      <td>3.9</td>\n",
 577 |        "      <td>0.2</td>\n",
 578 |        "    </tr>\n",
 579 |        "    <tr>\n",
 580 |        "      <th>1</th>\n",
 581 |        "      <td>Alabama</td>\n",
 582 |        "      <td>Northern</td>\n",
 583 |        "      <td>0.0</td>\n",
 584 |        "      <td>0.0</td>\n",
 585 |        "      <td>0.2</td>\n",
 586 |        "      <td>4.1</td>\n",
 587 |        "      <td>1.8</td>\n",
 588 |        "      <td>11.6</td>\n",
 589 |        "      <td>13.4</td>\n",
 590 |        "      <td>2.7</td>\n",
 591 |        "      <td>0.1</td>\n",
 592 |        "    </tr>\n",
 593 |        "    <tr>\n",
 594 |        "      <th>2</th>\n",
 595 |        "      <td>Alabama</td>\n",
 596 |        "      <td>Southern</td>\n",
 597 |        "      <td>0.0</td>\n",
 598 |        "      <td>0.0</td>\n",
 599 |        "      <td>0.2</td>\n",
 600 |        "      <td>4.4</td>\n",
 601 |        "      <td>3.4</td>\n",
 602 |        "      <td>42.6</td>\n",
 603 |        "      <td>19.1</td>\n",
 604 |        "      <td>5.8</td>\n",
 605 |        "      <td>0.8</td>\n",
 606 |        "    </tr>\n",
 607 |        "    <tr>\n",
 608 |        "      <th>3</th>\n",
 609 |        "      <td>Alaska</td>\n",
 610 |        "      <td>Alaska</td>\n",
 611 |        "      <td>0.0</td>\n",
 612 |        "      <td>0.1</td>\n",
 613 |        "      <td>1.9</td>\n",
 614 |        "      <td>0.9</td>\n",
 615 |        "      <td>1.3</td>\n",
 616 |        "      <td>2.8</td>\n",
 617 |        "      <td>21.5</td>\n",
 618 |        "      <td>2.1</td>\n",
 619 |        "      <td>0.3</td>\n",
 620 |        "    </tr>\n",
 621 |        "    <tr>\n",
 622 |        "      <th>4</th>\n",
 623 |        "      <td>Arizona</td>\n",
 624 |        "      <td>Arizona</td>\n",
 625 |        "      <td>0.0</td>\n",
 626 |        "      <td>0.0</td>\n",
 627 |        "      <td>0.5</td>\n",
 628 |        "      <td>0.4</td>\n",
 629 |        "      <td>0.6</td>\n",
 630 |        "      <td>2.7</td>\n",
 631 |        "      <td>8.6</td>\n",
 632 |        "      <td>1.5</td>\n",
 633 |        "      <td>0.0</td>\n",
 634 |        "    </tr>\n",
 635 |        "  </tbody>\n",
 636 |        "</table>\n",
 637 |        "</div>"
 638 |       ],
 639 |       "text/plain": [
 640 |        "     state  district  total_cv  nh_white_cv  nh_black_cv  nh_amerind_cv  \\\n",
 641 |        "0  Alabama    Middle       0.0          0.0          0.2            7.3   \n",
 642 |        "1  Alabama  Northern       0.0          0.0          0.2            4.1   \n",
 643 |        "2  Alabama  Southern       0.0          0.0          0.2            4.4   \n",
 644 |        "3   Alaska    Alaska       0.0          0.1          1.9            0.9   \n",
 645 |        "4  Arizona   Arizona       0.0          0.0          0.5            0.4   \n",
 646 |        "\n",
 647 |        "   nh_asian_cv  nh_nhpi_cv  nh_some_other_cv  nh_twoplus_cv  hispanic_cv  \n",
 648 |        "0          1.8        42.6              18.8            3.9          0.2  \n",
 649 |        "1          1.8        11.6              13.4            2.7          0.1  \n",
 650 |        "2          3.4        42.6              19.1            5.8          0.8  \n",
 651 |        "3          1.3         2.8              21.5            2.1          0.3  \n",
 652 |        "4          0.6         2.7               8.6            1.5          0.0  "
 653 |       ]
 654 |      },
 655 |      "execution_count": 7,
 656 |      "metadata": {},
 657 |      "output_type": "execute_result"
 658 |     }
 659 |    ],
 660 |    "source": [
 661 |     "tuples = zip(race_cols.values(), moe_cols.values())  # again, get pairs of column names for estimate/moe\n",
 662 |     "race_district_cvs = compute_cvs(race_by_district_base,*tuples)\n",
 663 |     "race_district_cvs.insert(0,'state',race_by_district_base['state'])       # the indexes will be aligned, so we can just\n",
 664 |     "race_district_cvs.insert(1,'district',race_by_district_base['district']) # insert the group labels\n",
 665 |     "race_district_cvs.head() "
 666 |    ]
 667 |   },
 668 |   {
 669 |    "cell_type": "code",
 670 |    "execution_count": null,
 671 |    "metadata": {},
 672 |    "outputs": [],
 673 |    "source": []
 674 |   },
 675 |   {
 676 |    "cell_type": "markdown",
 677 |    "metadata": {},
 678 |    "source": [
 679 |     " ## What have we got\n",
 680 |     " \n",
 681 |     "Typically, you'd probably just consult the CV matrix for specific values before you went too far using them, but for our purposes, let's iterate through and see where we should take care. You'll see that the most common cases of caution are for populations which tend to be small -- \"Native Hawaiian/Pacific Islander\" (except in Hawaii) and \"Some other race\" (which is most often used by Latinos, and so is often quite small among non-hispanic populations)"
 682 |    ]
 683 |   },
 684 |   {
 685 |    "cell_type": "code",
 686 |    "execution_count": 8,
 687 |    "metadata": {},
 688 |    "outputs": [
 689 |     {
 690 |      "name": "stdout",
 691 |      "output_type": "stream",
 692 |      "text": [
 693 |       "Reviewing reliability of aggregated race by district\n",
 694 |       "\n",
 695 |       "Alabama Middle\n",
 696 |       "         nh_nhpi_cv - 42.6 - low reliability - use extreme caution\n",
 697 |       "   nh_some_other_cv - 18.8 - med reliability - use caution\n",
 698 |       "\n",
 699 |       "Alabama Northern\n",
 700 |       "  No warnings\n",
 701 |       "\n",
 702 |       "Alabama Southern\n",
 703 |       "         nh_nhpi_cv - 42.6 - low reliability - use extreme caution\n",
 704 |       "   nh_some_other_cv - 19.1 - med reliability - use caution\n",
 705 |       "\n",
 706 |       "Alaska\n",
 707 |       "   nh_some_other_cv - 21.5 - med reliability - use caution\n",
 708 |       "\n",
 709 |       "Arizona\n",
 710 |       "  No warnings\n",
 711 |       "\n",
 712 |       "Arkansas Eastern\n",
 713 |       "         nh_nhpi_cv - 28.5 - med reliability - use caution\n",
 714 |       "   nh_some_other_cv - 22.4 - med reliability - use caution\n",
 715 |       "\n",
 716 |       "Arkansas Western\n",
 717 |       "   nh_some_other_cv - 18.3 - med reliability - use caution\n",
 718 |       "\n",
 719 |       "California Central\n",
 720 |       "  No warnings\n",
 721 |       "\n",
 722 |       "California Eastern\n",
 723 |       "  No warnings\n",
 724 |       "\n",
 725 |       "California Northern\n",
 726 |       "  No warnings\n",
 727 |       "\n",
 728 |       "California Southern\n",
 729 |       "  No warnings\n",
 730 |       "\n",
 731 |       "Colorado\n",
 732 |       "  No warnings\n",
 733 |       "\n",
 734 |       "Connecticut\n",
 735 |       "         nh_nhpi_cv - 18.2 - med reliability - use caution\n",
 736 |       "\n",
 737 |       "Delaware\n",
 738 |       "         nh_nhpi_cv - 18.7 - med reliability - use caution\n",
 739 |       "\n",
 740 |       "District of Columbia\n",
 741 |       "         nh_nhpi_cv - 16.3 - med reliability - use caution\n",
 742 |       "\n",
 743 |       "Florida Middle\n",
 744 |       "  No warnings\n",
 745 |       "\n",
 746 |       "Florida Northern\n",
 747 |       "  No warnings\n",
 748 |       "\n",
 749 |       "Florida Southern\n",
 750 |       "  No warnings\n",
 751 |       "\n",
 752 |       "Georgia Middle\n",
 753 |       "         nh_nhpi_cv - 17.6 - med reliability - use caution\n",
 754 |       "\n",
 755 |       "Georgia Northern\n",
 756 |       "         nh_nhpi_cv - 16.2 - med reliability - use caution\n",
 757 |       "\n",
 758 |       "Georgia Southern\n",
 759 |       "  No warnings\n",
 760 |       "\n",
 761 |       "Hawaii\n",
 762 |       "  No warnings\n",
 763 |       "\n",
 764 |       "Idaho\n",
 765 |       "   nh_some_other_cv - 16.7 - med reliability - use caution\n",
 766 |       "\n",
 767 |       "Illinois Central\n",
 768 |       "         nh_nhpi_cv - 20.0 - med reliability - use caution\n",
 769 |       "\n",
 770 |       "Illinois Northern\n",
 771 |       "  No warnings\n",
 772 |       "\n",
 773 |       "Illinois Southern\n",
 774 |       "         nh_nhpi_cv - 27.0 - med reliability - use caution\n",
 775 |       "   nh_some_other_cv - 18.1 - med reliability - use caution\n",
 776 |       "\n",
 777 |       "Indiana Northern\n",
 778 |       "  No warnings\n",
 779 |       "\n",
 780 |       "Indiana Southern\n",
 781 |       "  No warnings\n",
 782 |       "\n",
 783 |       "Iowa Northern\n",
 784 |       "   nh_some_other_cv - 23.6 - med reliability - use caution\n",
 785 |       "\n",
 786 |       "Iowa Southern\n",
 787 |       "   nh_some_other_cv - 18.5 - med reliability - use caution\n",
 788 |       "\n",
 789 |       "Kansas\n",
 790 |       "  No warnings\n",
 791 |       "\n",
 792 |       "Kentucky Eastern\n",
 793 |       "         nh_nhpi_cv - 16.8 - med reliability - use caution\n",
 794 |       "   nh_some_other_cv - 16.4 - med reliability - use caution\n",
 795 |       "\n",
 796 |       "Kentucky Western\n",
 797 |       "  No warnings\n",
 798 |       "\n",
 799 |       "Louisiana Eastern\n",
 800 |       "         nh_nhpi_cv - 28.1 - med reliability - use caution\n",
 801 |       "\n",
 802 |       "Louisiana Middle\n",
 803 |       "         nh_nhpi_cv - 24.8 - med reliability - use caution\n",
 804 |       "   nh_some_other_cv - 26.6 - med reliability - use caution\n",
 805 |       "\n",
 806 |       "Louisiana Western\n",
 807 |       "  No warnings\n",
 808 |       "\n",
 809 |       "Maine\n",
 810 |       "         nh_nhpi_cv - 22.9 - med reliability - use caution\n",
 811 |       "   nh_some_other_cv - 18.4 - med reliability - use caution\n",
 812 |       "\n",
 813 |       "Maryland\n",
 814 |       "  No warnings\n",
 815 |       "\n",
 816 |       "Massachusetts\n",
 817 |       "  No warnings\n",
 818 |       "\n",
 819 |       "Michigan Eastern\n",
 820 |       "  No warnings\n",
 821 |       "\n",
 822 |       "Michigan Western\n",
 823 |       "  No warnings\n",
 824 |       "\n",
 825 |       "Minnesota\n",
 826 |       "  No warnings\n",
 827 |       "\n",
 828 |       "Mississippi northern\n",
 829 |       "         nh_nhpi_cv - 30.0 - med reliability - use caution\n",
 830 |       "   nh_some_other_cv - 24.4 - med reliability - use caution\n",
 831 |       "\n",
 832 |       "Mississippi southern\n",
 833 |       "         nh_nhpi_cv - 28.8 - med reliability - use caution\n",
 834 |       "   nh_some_other_cv - 16.1 - med reliability - use caution\n",
 835 |       "\n",
 836 |       "Missouri Eastern\n",
 837 |       "  No warnings\n",
 838 |       "\n",
 839 |       "Missouri Western\n",
 840 |       "  No warnings\n",
 841 |       "\n",
 842 |       "Montana\n",
 843 |       "   nh_some_other_cv - 25.0 - med reliability - use caution\n",
 844 |       "\n",
 845 |       "Nebraska\n",
 846 |       "  No warnings\n",
 847 |       "\n",
 848 |       "Nevada\n",
 849 |       "  No warnings\n",
 850 |       "\n",
 851 |       "New Hampshire\n",
 852 |       "         nh_nhpi_cv - 20.7 - med reliability - use caution\n",
 853 |       "   nh_some_other_cv - 18.0 - med reliability - use caution\n",
 854 |       "\n",
 855 |       "New Jersey\n",
 856 |       "  No warnings\n",
 857 |       "\n",
 858 |       "New Mexico\n",
 859 |       "  No warnings\n",
 860 |       "\n",
 861 |       "New York Eastern\n",
 862 |       "  No warnings\n",
 863 |       "\n",
 864 |       "New York Northern\n",
 865 |       "         nh_nhpi_cv - 15.1 - med reliability - use caution\n",
 866 |       "\n",
 867 |       "New York Southern\n",
 868 |       "  No warnings\n",
 869 |       "\n",
 870 |       "New York Western\n",
 871 |       "  No warnings\n",
 872 |       "\n",
 873 |       "North Carolina Eastern\n",
 874 |       "  No warnings\n",
 875 |       "\n",
 876 |       "North Carolina Middle\n",
 877 |       "  No warnings\n",
 878 |       "\n",
 879 |       "North Carolina Western\n",
 880 |       "  No warnings\n",
 881 |       "\n",
 882 |       "North Dakota\n",
 883 |       "         nh_nhpi_cv - 24.4 - med reliability - use caution\n",
 884 |       "   nh_some_other_cv - 30.5 - low reliability - use extreme caution\n",
 885 |       "\n",
 886 |       "Ohio Northern\n",
 887 |       "  No warnings\n",
 888 |       "\n",
 889 |       "Ohio Southern\n",
 890 |       "  No warnings\n",
 891 |       "\n",
 892 |       "Oklahoma Eastern\n",
 893 |       "   nh_some_other_cv - 16.7 - med reliability - use caution\n",
 894 |       "\n",
 895 |       "Oklahoma Northern\n",
 896 |       "  No warnings\n",
 897 |       "\n",
 898 |       "Oklahoma Western\n",
 899 |       "  No warnings\n",
 900 |       "\n",
 901 |       "Oregon\n",
 902 |       "  No warnings\n",
 903 |       "\n",
 904 |       "Pennsylvania Eastern\n",
 905 |       "  No warnings\n",
 906 |       "\n",
 907 |       "Pennsylvania Middle\n",
 908 |       "         nh_nhpi_cv - 17.8 - med reliability - use caution\n",
 909 |       "\n",
 910 |       "Pennsylvania Western\n",
 911 |       "  No warnings\n",
 912 |       "\n",
 913 |       "Puerto Rico\n",
 914 |       "      nh_amerind_cv - 53.5 - low reliability - use extreme caution\n",
 915 |       "        nh_asian_cv - 17.3 - med reliability - use caution\n",
 916 |       "         nh_nhpi_cv - 66.0 - low reliability - use extreme caution\n",
 917 |       "\n",
 918 |       "Rhode Island\n",
 919 |       "         nh_nhpi_cv - 22.4 - med reliability - use caution\n",
 920 |       "\n",
 921 |       "South Carolina\n",
 922 |       "  No warnings\n",
 923 |       "\n",
 924 |       "South Dakota\n",
 925 |       "         nh_nhpi_cv - 23.9 - med reliability - use caution\n",
 926 |       "   nh_some_other_cv - 33.3 - low reliability - use extreme caution\n",
 927 |       "\n",
 928 |       "Tennessee Eastern\n",
 929 |       "         nh_nhpi_cv - 18.3 - med reliability - use caution\n",
 930 |       "   nh_some_other_cv - 15.6 - med reliability - use caution\n",
 931 |       "\n",
 932 |       "Tennessee Middle\n",
 933 |       "  No warnings\n",
 934 |       "\n",
 935 |       "Tennessee Western\n",
 936 |       "         nh_nhpi_cv - 23.7 - med reliability - use caution\n",
 937 |       "   nh_some_other_cv - 18.4 - med reliability - use caution\n",
 938 |       "\n",
 939 |       "Texas Eastern\n",
 940 |       "  No warnings\n",
 941 |       "\n",
 942 |       "Texas Northern\n",
 943 |       "  No warnings\n",
 944 |       "\n",
 945 |       "Texas Southern\n",
 946 |       "  No warnings\n",
 947 |       "\n",
 948 |       "Texas Western\n",
 949 |       "  No warnings\n",
 950 |       "\n",
 951 |       "Utah\n",
 952 |       "  No warnings\n",
 953 |       "\n",
 954 |       "Vermont\n",
 955 |       "         nh_nhpi_cv - 25.5 - med reliability - use caution\n",
 956 |       "   nh_some_other_cv - 17.9 - med reliability - use caution\n",
 957 |       "\n",
 958 |       "Virginia Eastern\n",
 959 |       "  No warnings\n",
 960 |       "\n",
 961 |       "Virginia Western\n",
 962 |       "         nh_nhpi_cv - 18.4 - med reliability - use caution\n",
 963 |       "\n",
 964 |       "Washington Eastern\n",
 965 |       "   nh_some_other_cv - 20.6 - med reliability - use caution\n",
 966 |       "\n",
 967 |       "Washington Western\n",
 968 |       "  No warnings\n",
 969 |       "\n",
 970 |       "West Virginia Northern\n",
 971 |       "         nh_nhpi_cv - 46.7 - low reliability - use extreme caution\n",
 972 |       "   nh_some_other_cv - 18.6 - med reliability - use caution\n",
 973 |       "\n",
 974 |       "West Virginia Southern\n",
 975 |       "         nh_nhpi_cv - 30.5 - low reliability - use extreme caution\n",
 976 |       "   nh_some_other_cv - 22.1 - med reliability - use caution\n",
 977 |       "\n",
 978 |       "Wisconsin Eastern\n",
 979 |       "  No warnings\n",
 980 |       "\n",
 981 |       "Wisconsin Western\n",
 982 |       "         nh_nhpi_cv - 23.7 - med reliability - use caution\n",
 983 |       "\n",
 984 |       "Wyoming\n",
 985 |       "         nh_nhpi_cv - 32.5 - low reliability - use extreme caution\n",
 986 |       "   nh_some_other_cv - 34.3 - low reliability - use extreme caution\n",
 987 |       "\n"
 988 |      ]
 989 |     }
 990 |    ],
 991 |    "source": [
 992 |     "print(\"Reviewing reliability of aggregated race by district\\n\")\n",
 993 |     "for idx, row in race_district_cvs.iterrows():\n",
 994 |     "    warnings = []\n",
 995 |     "    for col in race_district_cvs.columns[2:]: # iterate all the non-label columns\n",
 996 |     "        if row[col] > 30:\n",
 997 |     "            warnings.append(f\"{col:>17} - {row[col]:.1f} - low reliability - use extreme caution\")\n",
 998 |     "        elif row[col] > 15:\n",
 999 |     "            warnings.append(f\"{col:>17} - {row[col]:.1f} - med reliability - use caution\")\n",
1000 |     "    if row['state'] == row['district']: # simplify for single-district states\n",
1001 |     "        print(f\"{row['state']}\")\n",
1002 |     "    else:\n",
1003 |     "        print(f\"{row['state']} {row['district']}\")\n",
1004 |     "    if len(warnings) == 0:\n",
1005 |     "        print(\"  No warnings\")\n",
1006 |     "    else:\n",
1007 |     "        for w in warnings:\n",
1008 |     "            print(f\"  {w}\")\n",
1009 |     "    print(\"\")\n",
1010 |     "    "
1011 |    ]
1012 |   }
1013 |  ],
1014 |  "metadata": {
1015 |   "kernelspec": {
1016 |    "display_name": "Python 3",
1017 |    "language": "python",
1018 |    "name": "python3"
1019 |   },
1020 |   "language_info": {
1021 |    "codemirror_mode": {
1022 |     "name": "ipython",
1023 |     "version": 3
1024 |    },
1025 |    "file_extension": ".py",
1026 |    "mimetype": "text/x-python",
1027 |    "name": "python",
1028 |    "nbconvert_exporter": "python",
1029 |    "pygments_lexer": "ipython3",
1030 |    "version": "3.7.6"
1031 |   }
1032 |  },
1033 |  "nbformat": 4,
1034 |  "nbformat_minor": 4
1035 | }
1036 | 


--------------------------------------------------------------------------------