├── .gitignore ├── .ipynb_checkpoints ├── BEA-checkpoint.ipynb ├── BLS-checkpoint.ipynb ├── Census-Trade-checkpoint.ipynb ├── Census_ACS-checkpoint.ipynb ├── ComTrade-checkpoint.ipynb ├── ECB-checkpoint.ipynb ├── FRED-checkpoint.ipynb ├── IMF-checkpoint.ipynb ├── OECD-checkpoint.ipynb ├── World_Bank-checkpoint.ipynb └── Yahoo_Finance-checkpoint.ipynb ├── APIs ├── 440710.csv ├── BEA.ipynb ├── BLS.ipynb ├── BLS_Prices.ipynb ├── BLS_tool.ipynb ├── Census_ACS.ipynb ├── Census_TimeSeries_M3.ipynb ├── Census_Trade.ipynb ├── ComTrade.ipynb ├── ECB.ipynb ├── EIA.ipynb ├── FRED.ipynb ├── ILO.ipynb ├── IMF.ipynb ├── OECD.ipynb ├── OECD_Updated.ipynb ├── README.md ├── Treasury.ipynb ├── World_Bank.ipynb ├── Yahoo_Finance.ipynb ├── codes │ └── country_codes.csv ├── geo │ ├── us_counties.topo.json │ ├── us_states.topo.json │ └── vega.json └── rec_dates.csv ├── README.md ├── _config.yml ├── bd_CPS ├── README.md ├── bd_CPS_1976-88.ipynb ├── bd_CPS_1989-93.ipynb ├── bd_CPS_cpi.ipynb ├── bd_CPS_dd.ipynb ├── bd_CPS_details.py ├── bd_CPS_id.ipynb ├── bd_CPS_reader.ipynb ├── bd_CPS_revisions_reader.ipynb └── codebook.txt └── micro ├── 440710.csv ├── ACS_to_CZ_income_map.ipynb ├── CBSA_2YR_Indicators.csv ├── CBSA_Names.csv ├── CBSA_Split.ipynb ├── COVID_CPS.ipynb ├── CPS-ASEC_Income-of-the-Aged.ipynb ├── CPS-ASEC_poverty.ipynb ├── CPS_Disability_NILF_CBSA.ipynb ├── CPS_EPOP_Map_Example.ipynb ├── CPS_EPOP_P10wage_CBSA.ipynb ├── CPS_EPOP_P25Wage_CBSA.ipynb ├── CPS_Example_Notebook_UPDATED.ipynb ├── CPS_Family_Income.ipynb ├── CPS_Matching_Flow_Disabled_to_Work.ipynb ├── CPS_Matching_Flow_Retired_to_Work.ipynb ├── CPS_NILF.ipynb ├── CPS_PECERT_Mapper.ipynb ├── CPS_Scatter_CBSA.ipynb ├── CPS_Wages.ipynb ├── README.md ├── SCF_saving_income_percentile.ipynb ├── Trade_Network_example.ipynb ├── acs_map.pgf ├── bd_CPS-age_cohorts.ipynb ├── bd_CPS_HRSUSL1I.ipynb ├── bd_CPS_INDM.ipynb ├── bd_CPS_ONET.ipynb ├── bd_CPS_benchmark.ipynb ├── bd_CPS_codebook.ipynb ├── bd_CPS_flow_MM.ipynb ├── bd_CPS_flow_YY.ipynb ├── bd_CPS_headship.ipynb ├── bd_CPS_ind_occ_wage.ipynb ├── bd_CPS_retirement.ipynb ├── bd_CPS_same_job.ipynb ├── bd_CPS_seasonal_adjustment.ipynb ├── bd_CPS_value_labels.ipynb ├── cpi.csv ├── cps_example └── January_2017_Record_Layout.txt ├── fed_hh_example.csv ├── rec_dates.csv ├── results └── wealth_dist.csv ├── shapefiles ├── cb_2019_us_cbsa_20m.dbf ├── cb_2019_us_cbsa_20m.shp ├── cb_2019_us_cbsa_20m.shx ├── cb_2019_us_nation_20m.dbf ├── cb_2019_us_nation_20m.shp ├── cb_2019_us_nation_20m.shx ├── cb_2019_us_state_20m.dbf ├── cb_2019_us_state_20m.shp ├── cb_2019_us_state_20m.shx ├── st99_d00.dbf ├── st99_d00.shp ├── st99_d00.shx ├── states.csv ├── states.dbf ├── states.prj ├── states.shp ├── states.shx ├── tl_2013_us_cbsa.dbf ├── tl_2013_us_cbsa.prj ├── tl_2013_us_cbsa.shp ├── tl_2013_us_cbsa.shp.xml ├── tl_2013_us_cbsa.shx ├── tl_2013_us_coastline.dbf ├── tl_2013_us_coastline.prj ├── tl_2013_us_coastline.shp ├── tl_2013_us_coastline.shp.xml └── tl_2013_us_coastline.shx └── x13as └── x13as /.gitignore: -------------------------------------------------------------------------------- 1 | *.app 2 | __pycache__/ 3 | *.dat 4 | /micro/CWS/data/*.dat 5 | *.pyc 6 | */data/* 7 | */Data/* 8 | */CWS/* 9 | */micro/CWS/* 10 | */Shape/* 11 | */.ipynb_checkpoints/* 12 | /Macro_Dash/Data/* 13 | /Japan/Data/* 14 | /CEPR/* 15 | *config.py 16 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/ComTrade-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true 7 | }, 8 | "source": [ 9 | "United Nations ComTrade\n", 10 | "=====\n", 11 | "\n", 12 | "## Bilateral trade data by product\n", 13 | "\n", 14 | "----\n", 15 | "\n", 16 | "*September 3, 2017*
\n", 17 | "*@bd_econ*\n", 18 | "\n", 19 | "This example retrieves annual data for the trade of a specific product by all countries for which data are available.\n", 20 | "\n", 21 | "[Documentation](https://comtrade.un.org/data/doc/api/) for the UN Comtrade API.\n", 22 | "\n", 23 | "This example uses a list of country codes stored as a csv file. " 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 1, 29 | "metadata": { 30 | "collapsed": true 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "import requests\n", 35 | "import pandas as pd\n", 36 | "import time\n", 37 | "\n", 38 | "# Used to loop over countries 5 at a time.\n", 39 | "def chunker(seq, size):\n", 40 | " return (seq[pos:pos + size] for pos in xrange(0, len(seq), size))\n", 41 | "\n", 42 | "c_codes = pd.read_csv('codes/country_codes.csv').set_index('id')" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "## Paramaters/ Settings for request" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 5, 55 | "metadata": { 56 | "collapsed": true 57 | }, 58 | "outputs": [], 59 | "source": [ 60 | "prod_type = 'C' # Commodity\n", 61 | "freq = 'A' # Annual \n", 62 | "classification = 'HS' # harmonized system\n", 63 | "prod = '440710' # HS 6-digit production ID\n", 64 | "years = ['2005', '2010', '2015']\n", 65 | "base = 'http://comtrade.un.org/api/get?'\n", 66 | "url = '{}max=50000&type={}&freq={}&px={}'.format(\n", 67 | " base,\n", 68 | " prod_type,\n", 69 | " freq,\n", 70 | " classification)\n", 71 | "d = {}" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 6, 77 | "metadata": { 78 | "collapsed": true 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "for n in chunker(c_codes.index.values[1:], 5):\n", 83 | " req = '&ps={}&r=all&p={}&rg=1%2C2&cc={}'.format(\n", 84 | " '%2C'.join(years),\n", 85 | " '%2C'.join(n), \n", 86 | " prod)\n", 87 | " data = requests.get('{}{}'.format(url, req)).json()\n", 88 | " d[n[0]] = pd.DataFrame(data['dataset'])[['TradeValue', 'pt3ISO', \n", 89 | " 'rt3ISO', 'rtTitle', \n", 90 | " 'period', 'rgDesc'\n", 91 | " ]].set_index('pt3ISO')\n", 92 | " time.sleep(5)" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 7, 98 | "metadata": {}, 99 | "outputs": [ 100 | { 101 | "data": { 102 | "text/html": [ 103 | "
\n", 104 | "\n", 117 | "\n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | "
TradeValuert3ISOperiodrgDesc
pt3ISO
MYS277953ARG2015Export
MYS120140AUS2015Import
MYS7349902AUS2015Export
MYS671AUT2015Export
MLT2868925AUT2015Export
\n", 172 | "
" 173 | ], 174 | "text/plain": [ 175 | " TradeValue rt3ISO period rgDesc\n", 176 | "pt3ISO \n", 177 | "MYS 277953 ARG 2015 Export\n", 178 | "MYS 120140 AUS 2015 Import\n", 179 | "MYS 7349902 AUS 2015 Export\n", 180 | "MYS 671 AUT 2015 Export\n", 181 | "MLT 2868925 AUT 2015 Export" 182 | ] 183 | }, 184 | "execution_count": 7, 185 | "metadata": {}, 186 | "output_type": "execute_result" 187 | } 188 | ], 189 | "source": [ 190 | "df = pd.concat(d.values())\n", 191 | "df.fillna(value='TWN', inplace=True)\n", 192 | "df = df.drop('rtTitle', 1)\n", 193 | "df.head()" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 8, 199 | "metadata": { 200 | "collapsed": true 201 | }, 202 | "outputs": [], 203 | "source": [ 204 | "df.to_csv('440710.csv')" 205 | ] 206 | } 207 | ], 208 | "metadata": { 209 | "kernelspec": { 210 | "display_name": "Python [default]", 211 | "language": "python", 212 | "name": "python2" 213 | }, 214 | "language_info": { 215 | "codemirror_mode": { 216 | "name": "ipython", 217 | "version": 2 218 | }, 219 | "file_extension": ".py", 220 | "mimetype": "text/x-python", 221 | "name": "python", 222 | "nbconvert_exporter": "python", 223 | "pygments_lexer": "ipython2", 224 | "version": "2.7.12" 225 | } 226 | }, 227 | "nbformat": 4, 228 | "nbformat_minor": 2 229 | } 230 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/ECB-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "European Central Bank (ECB) API with Python 2.7\n", 8 | "=====\n", 9 | "\n", 10 | "## Data on Eurozone countries\n", 11 | "\n", 12 | "-----\n", 13 | "\n", 14 | "*September 3, 2017*
\n", 15 | "*@bd_econ*\n", 16 | "\n", 17 | "ECB API Documentation is [here](https://sdw-wsrest.ecb.europa.eu/web/generator/index.html)\n", 18 | "\n", 19 | "This example requests the interest rate by country on 10-year bonds over the period since 2012." 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 1, 25 | "metadata": { 26 | "collapsed": true 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "import requests\n", 31 | "import pandas as pd" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "## Parameters/Settings\n", 39 | "\n", 40 | "List of data sets is [here](https://sdw-wsrest.ecb.europa.eu/service/dataflow)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 2, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "# List of countries\n", 50 | "c_list = ['DE', \n", 51 | " 'FR', \n", 52 | " 'IT', \n", 53 | " 'ES', \n", 54 | " 'PT', \n", 55 | " 'GR', \n", 56 | " 'NL', \n", 57 | " 'BE', \n", 58 | " 'AT', \n", 59 | " 'IE']\n", 60 | "\n", 61 | "param = [('dataflow', 'IRS'),\n", 62 | " ('freq', 'M'),\n", 63 | " ('countries', '+'.join(c_list)),\n", 64 | " ('series', 'L'),\n", 65 | " ('trans type', 'L40'),\n", 66 | " ('maturity cat', 'CI'),\n", 67 | " ('counterpart', '0000'),\n", 68 | " ('currency', 'EUR'),\n", 69 | " ('business coverage', 'N'),\n", 70 | " ('interest rate type', 'Z'),\n", 71 | " ('start', '?startPeriod=2012-01-01')]\n", 72 | "\n", 73 | "param_joined = '.'.join(value for key, value in param[1:-1])\n", 74 | "\n", 75 | "series_key = '{}/{}{}'.format(param[0][1],\n", 76 | " param_joined,\n", 77 | " param[-1][1])" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 34, 83 | "metadata": { 84 | "scrolled": true 85 | }, 86 | "outputs": [ 87 | { 88 | "data": { 89 | "text/plain": [ 90 | "'IRS/M.DE+FR+IT+ES+PT+GR+NL+BE+AT+IE.L.L40.CI.0000.EUR.N.Z?startPeriod=2012-01-01'" 91 | ] 92 | }, 93 | "execution_count": 34, 94 | "metadata": {}, 95 | "output_type": "execute_result" 96 | } 97 | ], 98 | "source": [ 99 | "series_key" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "## Request data" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 3, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "url = 'https://sdw-wsrest.ecb.europa.eu/service/data/'\n", 116 | "# headers used as content negotiation to return data in json format\n", 117 | "headers = {'Accept':'application/json'}\n", 118 | "r = requests.get('{}{}'.format(url, series_key), headers=headers).json()" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "## Pandas DataFrame" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 5, 131 | "metadata": {}, 132 | "outputs": [ 133 | { 134 | "data": { 135 | "text/html": [ 136 | "
\n", 137 | "\n", 150 | "\n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | "
AustriaBelgiumGermanySpainFranceGreeceIrelandItalyNetherlandsPortugal
2017-03-010.59240.870.351.7171.027.171.052.4010.4853.992
2017-04-010.49030.780.221.6130.886.700.912.2570.4983.774
2017-05-010.65170.770.341.5710.815.860.832.1940.5893.287
2017-06-010.55470.620.251.4530.665.760.702.0530.5032.968
2017-07-010.72920.830.461.5990.845.330.872.2310.6883.025
\n", 234 | "
" 235 | ], 236 | "text/plain": [ 237 | " Austria Belgium Germany Spain France Greece Ireland Italy \\\n", 238 | "2017-03-01 0.5924 0.87 0.35 1.717 1.02 7.17 1.05 2.401 \n", 239 | "2017-04-01 0.4903 0.78 0.22 1.613 0.88 6.70 0.91 2.257 \n", 240 | "2017-05-01 0.6517 0.77 0.34 1.571 0.81 5.86 0.83 2.194 \n", 241 | "2017-06-01 0.5547 0.62 0.25 1.453 0.66 5.76 0.70 2.053 \n", 242 | "2017-07-01 0.7292 0.83 0.46 1.599 0.84 5.33 0.87 2.231 \n", 243 | "\n", 244 | " Netherlands Portugal \n", 245 | "2017-03-01 0.485 3.992 \n", 246 | "2017-04-01 0.498 3.774 \n", 247 | "2017-05-01 0.589 3.287 \n", 248 | "2017-06-01 0.503 2.968 \n", 249 | "2017-07-01 0.688 3.025 " 250 | ] 251 | }, 252 | "execution_count": 5, 253 | "metadata": {}, 254 | "output_type": "execute_result" 255 | } 256 | ], 257 | "source": [ 258 | "date_list = r['structure']['dimensions']['observation'][0]['values']\n", 259 | "dates = [date['start'][:10] for date in date_list]\n", 260 | " \n", 261 | "areas = [v['name'] for v in r['structure']['dimensions']['series'][1]['values']]\n", 262 | "\n", 263 | "df = pd.DataFrame()\n", 264 | "for i, area in enumerate(areas):\n", 265 | " s_key = '0:{}:0:0:0:0:0:0:0'.format(i)\n", 266 | " s_list = r['dataSets'][0]['series'][s_key]['observations']\n", 267 | " df[area] = pd.Series([s_list[val][0] for val in sorted(s_list, key=int)])\n", 268 | "df.index = dates\n", 269 | "df.tail()" 270 | ] 271 | } 272 | ], 273 | "metadata": { 274 | "kernelspec": { 275 | "display_name": "Python [conda root]", 276 | "language": "python", 277 | "name": "conda-root-py" 278 | }, 279 | "language_info": { 280 | "codemirror_mode": { 281 | "name": "ipython", 282 | "version": 2 283 | }, 284 | "file_extension": ".py", 285 | "mimetype": "text/x-python", 286 | "name": "python", 287 | "nbconvert_exporter": "python", 288 | "pygments_lexer": "ipython2", 289 | "version": "2.7.12" 290 | } 291 | }, 292 | "nbformat": 4, 293 | "nbformat_minor": 2 294 | } 295 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/FRED-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "FRED API with Python 2.7\n", 8 | "-----\n", 9 | "\n", 10 | "## Wage data example\n", 11 | "\n", 12 | "*July 23, 2017*
\n", 13 | "*@bd_econ*\n", 14 | "\n", 15 | "Retrieve data from FRED API using Python 2.7 with the requests package. Use pandas for calculations and analysis. Create a dictionary of relevant results and present it as a table.\n", 16 | "\n", 17 | "FRED API Documentation is [here](https://research.stlouisfed.org/docs/api/fred/)" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 1, 23 | "metadata": { 24 | "collapsed": true 25 | }, 26 | "outputs": [], 27 | "source": [ 28 | "# Import preliminaries\n", 29 | "import requests\n", 30 | "import pandas as pd\n", 31 | "import config # file with api key" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "## Parameters/ Settings" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 2, 44 | "metadata": { 45 | "collapsed": true 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "# The URL for the FRED API\n", 50 | "base = 'https://api.stlouisfed.org/fred/series/observations?series_id='\n", 51 | "\n", 52 | "# List of FRED series IDs and their description\n", 53 | "s_dict = {'CES3000000008': 'Manufacturing AHE, SA', \n", 54 | " 'CES1000000008': 'Mining and Logging AHE, SA',\n", 55 | " 'CES4000000008': 'Trade, Transportation, and Utilities AHE, SA',\n", 56 | " 'CES2000000008': 'Construction AHE, SA',\n", 57 | " 'CES5000000008': 'Information AHE, SA',\n", 58 | " 'CES5500000008': 'Financial Activities AHE, SA',\n", 59 | " 'CES6000000008': 'Professional and Business Services AHE, SA',\n", 60 | " 'CES6500000008': 'Education and Health Services AHE, SA',\n", 61 | " 'CES7000000008': 'Leisure and Hospitality AHE, SA',\n", 62 | " 'AHETPI': 'Total Private AHE, SA',\n", 63 | " }\n", 64 | "\n", 65 | "# Include start date, API key from config.py file and file type json\n", 66 | "start_date = '1980-01-01'\n", 67 | "dates = '&observation_start={}'.format(start_date)\n", 68 | "api_key = '&api_key={}'.format(config.fred_key)\n", 69 | "ftype = '&file_type=json'" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "## Make requests" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 3, 82 | "metadata": {}, 83 | "outputs": [ 84 | { 85 | "data": { 86 | "text/html": [ 87 | "
\n", 88 | "\n", 101 | "\n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | "
Construction AHE, SAFinancial Activities AHE, SALeisure and Hospitality AHE, SAEducation and Health Services AHE, SAInformation AHE, SATrade, Transportation, and Utilities AHE, SAManufacturing AHE, SAMining and Logging AHE, SATotal Private AHE, SAProfessional and Business Services AHE, SA
2017-04-0126.3726.4613.2922.9730.6119.2620.7727.5521.9625.99
2017-05-0126.5226.4913.3422.9830.7219.2820.8027.5021.9925.98
2017-06-0126.6326.5313.3523.0030.8019.3520.8027.6422.0326.02
2017-07-0126.7526.6013.3623.0430.8119.3620.9027.8822.0826.09
2017-08-0126.8126.5913.4123.0930.8219.3820.9027.7822.1226.20
\n", 185 | "
" 186 | ], 187 | "text/plain": [ 188 | " Construction AHE, SA Financial Activities AHE, SA \\\n", 189 | "2017-04-01 26.37 26.46 \n", 190 | "2017-05-01 26.52 26.49 \n", 191 | "2017-06-01 26.63 26.53 \n", 192 | "2017-07-01 26.75 26.60 \n", 193 | "2017-08-01 26.81 26.59 \n", 194 | "\n", 195 | " Leisure and Hospitality AHE, SA \\\n", 196 | "2017-04-01 13.29 \n", 197 | "2017-05-01 13.34 \n", 198 | "2017-06-01 13.35 \n", 199 | "2017-07-01 13.36 \n", 200 | "2017-08-01 13.41 \n", 201 | "\n", 202 | " Education and Health Services AHE, SA Information AHE, SA \\\n", 203 | "2017-04-01 22.97 30.61 \n", 204 | "2017-05-01 22.98 30.72 \n", 205 | "2017-06-01 23.00 30.80 \n", 206 | "2017-07-01 23.04 30.81 \n", 207 | "2017-08-01 23.09 30.82 \n", 208 | "\n", 209 | " Trade, Transportation, and Utilities AHE, SA Manufacturing AHE, SA \\\n", 210 | "2017-04-01 19.26 20.77 \n", 211 | "2017-05-01 19.28 20.80 \n", 212 | "2017-06-01 19.35 20.80 \n", 213 | "2017-07-01 19.36 20.90 \n", 214 | "2017-08-01 19.38 20.90 \n", 215 | "\n", 216 | " Mining and Logging AHE, SA Total Private AHE, SA \\\n", 217 | "2017-04-01 27.55 21.96 \n", 218 | "2017-05-01 27.50 21.99 \n", 219 | "2017-06-01 27.64 22.03 \n", 220 | "2017-07-01 27.88 22.08 \n", 221 | "2017-08-01 27.78 22.12 \n", 222 | "\n", 223 | " Professional and Business Services AHE, SA \n", 224 | "2017-04-01 25.99 \n", 225 | "2017-05-01 25.98 \n", 226 | "2017-06-01 26.02 \n", 227 | "2017-07-01 26.09 \n", 228 | "2017-08-01 26.20 " 229 | ] 230 | }, 231 | "execution_count": 3, 232 | "metadata": {}, 233 | "output_type": "execute_result" 234 | } 235 | ], 236 | "source": [ 237 | "df = pd.DataFrame()\n", 238 | "for code, name in s_dict.items():\n", 239 | " url = '{}{}{}{}{}'.format(base, code, dates, api_key, ftype)\n", 240 | " r = requests.get(url).json()['observations']\n", 241 | " df[name] = [i['value'] for i in r]\n", 242 | "df.index = pd.to_datetime([i['date'] for i in r])\n", 243 | "df.tail()" 244 | ] 245 | } 246 | ], 247 | "metadata": { 248 | "anaconda-cloud": {}, 249 | "kernelspec": { 250 | "display_name": "Python [default]", 251 | "language": "python", 252 | "name": "python2" 253 | }, 254 | "language_info": { 255 | "codemirror_mode": { 256 | "name": "ipython", 257 | "version": 2 258 | }, 259 | "file_extension": ".py", 260 | "mimetype": "text/x-python", 261 | "name": "python", 262 | "nbconvert_exporter": "python", 263 | "pygments_lexer": "ipython2", 264 | "version": "2.7.12" 265 | } 266 | }, 267 | "nbformat": 4, 268 | "nbformat_minor": 1 269 | } 270 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/OECD-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "OECD API with Python 2.7\n", 8 | "======\n", 9 | "\n", 10 | "## Example showing GDP growth rates by selected country\n", 11 | "\n", 12 | "-----\n", 13 | "\n", 14 | "*September 3, 2017*
\n", 15 | "*@bd_econ*\n", 16 | "\n", 17 | "The documentation for the Organization for Economic Cooperation and Development (OECD) API can be found [here](https://data.oecd.org/api/)" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 1, 23 | "metadata": { 24 | "collapsed": true 25 | }, 26 | "outputs": [], 27 | "source": [ 28 | "import requests\n", 29 | "import pandas as pd" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "## Parameters/ Settings" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 81, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "base = 'http://stats.oecd.org/sdmx-json/data/'\n", 46 | "param = [('dataset', 'QNA'),\n", 47 | " ('country', 'FRA+ITA+ESP+GBR'),\n", 48 | " ('indicators', 'GDP+B1_GE.CUR+VOBARSA'), \n", 49 | " ('freq', 'Q'), \n", 50 | " ('start_date' , '?startTime=1999-Q4')\n", 51 | " ]\n", 52 | "\n", 53 | "series = '.'.join(x[1] for x in param[1:-1])\n", 54 | "url = '{}{}/{}{}'.format(base, param[0][1], series, param[-1][1])" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "## Request data" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 82, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "r = requests.get(url).json()" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 83, 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "name": "stdout", 80 | "output_type": "stream", 81 | "text": [ 82 | "Gross domestic product - expenditure approach\n" 83 | ] 84 | }, 85 | { 86 | "data": { 87 | "text/html": [ 88 | "
\n", 89 | "\n", 102 | "\n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | "
FranceItalySpainUnited Kingdom
2016-04-01-0.3803110.3865543.3698412.426973
2016-07-010.6862871.2387042.7864392.013684
2016-10-012.0536211.5915762.7701142.670542
2017-01-012.0271151.7692703.1780450.850258
2017-04-011.9114641.5058423.5150211.197218
\n", 150 | "
" 151 | ], 152 | "text/plain": [ 153 | " France Italy Spain United Kingdom\n", 154 | "2016-04-01 -0.380311 0.386554 3.369841 2.426973\n", 155 | "2016-07-01 0.686287 1.238704 2.786439 2.013684\n", 156 | "2016-10-01 2.053621 1.591576 2.770114 2.670542\n", 157 | "2017-01-01 2.027115 1.769270 3.178045 0.850258\n", 158 | "2017-04-01 1.911464 1.505842 3.515021 1.197218" 159 | ] 160 | }, 161 | "execution_count": 83, 162 | "metadata": {}, 163 | "output_type": "execute_result" 164 | } 165 | ], 166 | "source": [ 167 | "date_list = r['structure']['dimensions']['observation'][0]['values']\n", 168 | "dates = pd.to_datetime([x['id'] for x in date_list])\n", 169 | " \n", 170 | "areas = [v['name'] for v in r['structure']['dimensions']['series'][0]['values']]\n", 171 | "\n", 172 | "title = r['structure']['dimensions']['series'][1]['values'][0]['name']\n", 173 | "\n", 174 | "df = pd.DataFrame()\n", 175 | "for i, area in enumerate(areas):\n", 176 | " s_key = '{}:0:0:0'.format(i)\n", 177 | " s_list = r['dataSets'][0]['series'][s_key]['observations']\n", 178 | " df[area] = pd.Series([s_list[val][0] for val in sorted(s_list, key=int)])\n", 179 | " df[area] = (((df[area]/df[area].shift())**4)-1)*100\n", 180 | "df.index = dates\n", 181 | "df = df.dropna()\n", 182 | "print title\n", 183 | "df.tail()" 184 | ] 185 | } 186 | ], 187 | "metadata": { 188 | "kernelspec": { 189 | "display_name": "Python [default]", 190 | "language": "python", 191 | "name": "python2" 192 | }, 193 | "language_info": { 194 | "codemirror_mode": { 195 | "name": "ipython", 196 | "version": 2 197 | }, 198 | "file_extension": ".py", 199 | "mimetype": "text/x-python", 200 | "name": "python", 201 | "nbconvert_exporter": "python", 202 | "pygments_lexer": "ipython2", 203 | "version": "2.7.12" 204 | } 205 | }, 206 | "nbformat": 4, 207 | "nbformat_minor": 2 208 | } 209 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/Yahoo_Finance-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Yahoo! Finance API with Python 2.7\n", 8 | "=====\n", 9 | "\n", 10 | "## Yahoo Finance dowload historical stock index data\n", 11 | "\n", 12 | "-----\n", 13 | "\n", 14 | "*September 3, 2017*
\n", 15 | "*@bd_econ* \n", 16 | "\n", 17 | "This example retrieves the adjusted daily closing price for the Dow Jones Industrial Average, S&P 500 and Nasdaq 100. " 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 1, 23 | "metadata": { 24 | "collapsed": true 25 | }, 26 | "outputs": [], 27 | "source": [ 28 | "import requests\n", 29 | "import re\n", 30 | "import pandas as pd\n", 31 | "from StringIO import StringIO\n", 32 | "import time " 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "## Parameters/Settings" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 2, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "tickers = ['^DJI', '^GSPC', '^IXIC']\n", 49 | "start = '1104537600'\n", 50 | "base = 'https://finance.yahoo.com/quote/'\n", 51 | "base2 = 'https://query1.finance.yahoo.com/v7/finance/download/'\n", 52 | "end = int(time.time())\n", 53 | "dates = '?period1={}&period2={}'.format(start, end)\n", 54 | "param = '&interval=1d&filter=history&frequency=1d'" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "## Request Data" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 4, 67 | "metadata": {}, 68 | "outputs": [ 69 | { 70 | "data": { 71 | "text/html": [ 72 | "
\n", 73 | "\n", 86 | "\n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | "
DJIGSPCIXIC
Date
2017-08-2821808.402444.246283.02
2017-08-2921865.372446.306301.89
2017-08-3021892.432457.596368.31
2017-08-3121948.102471.656428.66
2017-09-0121987.562476.556435.33
\n", 134 | "
" 135 | ], 136 | "text/plain": [ 137 | " DJI GSPC IXIC\n", 138 | "Date \n", 139 | "2017-08-28 21808.40 2444.24 6283.02\n", 140 | "2017-08-29 21865.37 2446.30 6301.89\n", 141 | "2017-08-30 21892.43 2457.59 6368.31\n", 142 | "2017-08-31 21948.10 2471.65 6428.66\n", 143 | "2017-09-01 21987.56 2476.55 6435.33" 144 | ] 145 | }, 146 | "execution_count": 4, 147 | "metadata": {}, 148 | "output_type": "execute_result" 149 | } 150 | ], 151 | "source": [ 152 | "df = pd.DataFrame()\n", 153 | "\n", 154 | "for t in tickers:\n", 155 | " url = '{}{}/history{}{}'.format(base, t, dates, param)\n", 156 | " s = requests.Session()\n", 157 | " r = s.get(url)\n", 158 | " regex = '\"CrumbStore\":{\"crumb\":\"(.+?)\"},'\n", 159 | " pattern = re.compile(regex)\n", 160 | " crumb = re.findall(pattern, r.content)[0]\n", 161 | " param2 = '{}{}&interval=1d&events=history&crumb={}/Q'.format(t, dates, crumb)\n", 162 | " url2 = '{}{}'.format(base2, param2)\n", 163 | " data = s.post(url2)\n", 164 | " csv = StringIO(data.content)\n", 165 | " df[t[1:]] = pd.read_table(csv, sep=',', \n", 166 | " parse_dates=['Date']).set_index('Date')['Adj Close'].round(2)\n", 167 | "df.tail()" 168 | ] 169 | } 170 | ], 171 | "metadata": { 172 | "kernelspec": { 173 | "display_name": "Python [conda root]", 174 | "language": "python", 175 | "name": "conda-root-py" 176 | }, 177 | "language_info": { 178 | "codemirror_mode": { 179 | "name": "ipython", 180 | "version": 2 181 | }, 182 | "file_extension": ".py", 183 | "mimetype": "text/x-python", 184 | "name": "python", 185 | "nbconvert_exporter": "python", 186 | "pygments_lexer": "ipython2", 187 | "version": "2.7.12" 188 | } 189 | }, 190 | "nbformat": 4, 191 | "nbformat_minor": 2 192 | } 193 | -------------------------------------------------------------------------------- /APIs/BLS_tool.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### BLS API retrieval tool\n", 8 | "\n", 9 | "January 14, 2019\n", 10 | "\n", 11 | "Parameterized version of the technique shown in BLS.ipynb." 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "metadata": { 18 | "ExecuteTime": { 19 | "end_time": "2018-09-01T03:16:59.387250Z", 20 | "start_time": "2018-09-01T03:16:58.887346Z" 21 | }, 22 | "code_folding": [] 23 | }, 24 | "outputs": [], 25 | "source": [ 26 | "# Import preliminaries\n", 27 | "import requests\n", 28 | "import pandas as pd\n", 29 | "import json\n", 30 | "\n", 31 | "# Local file with API key\n", 32 | "import config\n", 33 | "\n", 34 | "\n", 35 | "def api(series, date_range):\n", 36 | " \"\"\"Collect list of series from BLS API for given dates\"\"\"\n", 37 | " # The url for BLS API v2\n", 38 | " url = 'https://api.bls.gov/publicAPI/v2/timeseries/data/'\n", 39 | "\n", 40 | " # API key in config.py which contains: bls_key = 'key'\n", 41 | " key = '?registrationkey={}'.format(config.bls_key)\n", 42 | "\n", 43 | " # Handle dates\n", 44 | " dates = [(str(date_range[0]), str(date_range[1]))]\n", 45 | " while int(dates[-1][1]) - int(dates[-1][0]) > 10:\n", 46 | " dates = [(str(date_range[0]), str(date_range[0] + 9))]\n", 47 | " d1 = int(dates[-1][0])\n", 48 | " while int(dates[-1][1]) < date_range[1]:\n", 49 | " d1 = d1 + 10\n", 50 | " d2 = min([date_range[1], d1 + 9])\n", 51 | " dates.append((str(d1), (d2)))\n", 52 | "\n", 53 | " df = pd.DataFrame()\n", 54 | "\n", 55 | " for start, end in dates:\n", 56 | " # Submit the list of series as data\n", 57 | " data = json.dumps({\n", 58 | " \"seriesid\": list(series.keys()),\n", 59 | " \"startyear\": start, \"endyear\": end})\n", 60 | "\n", 61 | " # Post request for the data\n", 62 | " p = requests.post(\n", 63 | " '{}{}'.format(url, key),\n", 64 | " headers={'Content-type': 'application/json'},\n", 65 | " data=data).json()\n", 66 | " for s in p['Results']['series']:\n", 67 | " col = series[s['seriesID']]\n", 68 | " for r in s['data']:\n", 69 | " date = pd.to_datetime('{} {}'.format(\n", 70 | " r['periodName'], r['year']))\n", 71 | " df.at[date, col] = float(r['value'])\n", 72 | " df = df.sort_index()\n", 73 | " # Output results\n", 74 | " print('Request Status: {}'.format(p['status']))\n", 75 | " print(f'Columns: {df.shape[1]}, Rows: {df.shape[0]}')\n", 76 | " print(f'Latest date: {df.index[-1].date()}')\n", 77 | " \n", 78 | " return df" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 4, 84 | "metadata": { 85 | "ExecuteTime": { 86 | "end_time": "2018-09-01T03:19:26.426854Z", 87 | "start_time": "2018-09-01T03:19:26.416577Z" 88 | } 89 | }, 90 | "outputs": [], 91 | "source": [ 92 | "# Series stored as {id: name}\n", 93 | "slist = {'CUUR0000SA0': 'CPI'}\n", 94 | "\n", 95 | "# Start year and end year\n", 96 | "dates = (1994, 2018)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 5, 102 | "metadata": { 103 | "ExecuteTime": { 104 | "end_time": "2018-09-01T03:19:28.387172Z", 105 | "start_time": "2018-09-01T03:19:27.387042Z" 106 | } 107 | }, 108 | "outputs": [ 109 | { 110 | "name": "stdout", 111 | "output_type": "stream", 112 | "text": [ 113 | "Request Status: REQUEST_SUCCEEDED\n", 114 | "Columns: 1, Rows: 295\n", 115 | "Latest date: 2018-07-01\n" 116 | ] 117 | } 118 | ], 119 | "source": [ 120 | "df = api(slist, dates)" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [] 136 | } 137 | ], 138 | "metadata": { 139 | "kernelspec": { 140 | "display_name": "Python 3", 141 | "language": "python", 142 | "name": "python3" 143 | }, 144 | "language_info": { 145 | "codemirror_mode": { 146 | "name": "ipython", 147 | "version": 3 148 | }, 149 | "file_extension": ".py", 150 | "mimetype": "text/x-python", 151 | "name": "python", 152 | "nbconvert_exporter": "python", 153 | "pygments_lexer": "ipython3", 154 | "version": "3.7.5" 155 | } 156 | }, 157 | "nbformat": 4, 158 | "nbformat_minor": 2 159 | } 160 | -------------------------------------------------------------------------------- /APIs/Census_ACS.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "U.S. Census Bureau API with Python\n", 8 | "======\n", 9 | "\n", 10 | "## American Community Survey\n", 11 | "\n", 12 | "----\n", 13 | "\n", 14 | "*September 3, 2017*
\n", 15 | "*@bd_econ*\n", 16 | "\n", 17 | "Using the American Community Survey (ACS) to examine some demographic and economic trends at the U.S. county level.\n", 18 | "\n", 19 | "List of variables from the [5-year ACS](https://www.census.gov/data/developers/data-sets/acs-5year.html) are found [here](https://api.census.gov/data/2015/acs5/variables.html).\n", 20 | "\n", 21 | "The vincent example requires two topo.json files: [States](https://github.com/wrobstory/vincent_map_data/blob/master/us_states.topo.json) and [Counties](https://github.com/wrobstory/vincent_map_data/blob/master/us_counties.topo.json)" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 1, 27 | "metadata": { 28 | "ExecuteTime": { 29 | "end_time": "2019-01-14T22:53:23.858320Z", 30 | "start_time": "2019-01-14T22:53:23.560421Z" 31 | } 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "import requests\n", 36 | "import pandas as pd\n", 37 | "\n", 38 | "import config\n", 39 | "key = config.census_key" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 2, 45 | "metadata": { 46 | "ExecuteTime": { 47 | "end_time": "2019-01-14T22:53:23.863121Z", 48 | "start_time": "2019-01-14T22:53:23.860292Z" 49 | } 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "base = 'https://api.census.gov/data/'\n", 54 | "years = ['2015']#['2009', '2012', '2015']\n", 55 | "variables = {'NAME':'Name',\n", 56 | " 'B01001_001E': 'Population total',\n", 57 | " 'B19013_001E': 'Real Median Income',}\n", 58 | "v = ','.join(variables.keys())\n", 59 | "c = '*'\n", 60 | "s = '*'" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 6, 66 | "metadata": {}, 67 | "outputs": [ 68 | { 69 | "data": { 70 | "text/plain": [ 71 | "'Error report

HTTP Status 404 - /data/2015/acs5

'" 72 | ] 73 | }, 74 | "execution_count": 6, 75 | "metadata": {}, 76 | "output_type": "execute_result" 77 | } 78 | ], 79 | "source": [ 80 | "requests.get(url).text" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 3, 86 | "metadata": { 87 | "ExecuteTime": { 88 | "end_time": "2019-01-14T22:53:25.400850Z", 89 | "start_time": "2019-01-14T22:53:23.866061Z" 90 | } 91 | }, 92 | "outputs": [ 93 | { 94 | "ename": "JSONDecodeError", 95 | "evalue": "Expecting value: line 1 column 1 (char 0)", 96 | "output_type": "error", 97 | "traceback": [ 98 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 99 | "\u001b[0;31mJSONDecodeError\u001b[0m Traceback (most recent call last)", 100 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m url = '{}{}/acs5?get={}&for=county:{}&in=state:{}&key={}'.format(\n\u001b[1;32m 4\u001b[0m base, y, v, c, s, key)\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrequests\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0mdft\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0mdft\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Year'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 101 | "\u001b[0;32m~/miniconda3/lib/python3.8/site-packages/requests/models.py\u001b[0m in \u001b[0;36mjson\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 896\u001b[0m \u001b[0;31m# used.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 897\u001b[0m \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 898\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mcomplexjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloads\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 899\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 900\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 102 | "\u001b[0;32m~/miniconda3/lib/python3.8/json/__init__.py\u001b[0m in \u001b[0;36mloads\u001b[0;34m(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)\u001b[0m\n\u001b[1;32m 355\u001b[0m \u001b[0mparse_int\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mparse_float\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mand\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 356\u001b[0m parse_constant is None and object_pairs_hook is None and not kw):\n\u001b[0;32m--> 357\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_default_decoder\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdecode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 358\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcls\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 359\u001b[0m \u001b[0mcls\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mJSONDecoder\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 103 | "\u001b[0;32m~/miniconda3/lib/python3.8/json/decoder.py\u001b[0m in \u001b[0;36mdecode\u001b[0;34m(self, s, _w)\u001b[0m\n\u001b[1;32m 335\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 336\u001b[0m \"\"\"\n\u001b[0;32m--> 337\u001b[0;31m \u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mraw_decode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0midx\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0m_w\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 338\u001b[0m \u001b[0mend\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_w\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 339\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mend\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 104 | "\u001b[0;32m~/miniconda3/lib/python3.8/json/decoder.py\u001b[0m in \u001b[0;36mraw_decode\u001b[0;34m(self, s, idx)\u001b[0m\n\u001b[1;32m 353\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscan_once\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0midx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 354\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mStopIteration\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 355\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mJSONDecodeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Expecting value\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 356\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 105 | "\u001b[0;31mJSONDecodeError\u001b[0m: Expecting value: line 1 column 1 (char 0)" 106 | ] 107 | } 108 | ], 109 | "source": [ 110 | "df = pd.DataFrame()\n", 111 | "for y in years:\n", 112 | " url = '{}{}/acs5?get={}&for=county:{}&in=state:{}&key={}'.format(\n", 113 | " base, y, v, c, s, key)\n", 114 | " r = requests.get(url).json()\n", 115 | " dft = pd.DataFrame(r[1:], columns=r[0])\n", 116 | " dft['Year'] = y\n", 117 | " df = df.append(dft)\n", 118 | "df = df.rename(columns=variables).set_index(\n", 119 | " ['Name', 'Year']).sort_index(level='Name')\n", 120 | "df.head()" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "### Map the results\n", 128 | "\n", 129 | "Note: to make the example below work, you will first need to save [this](https://raw.githubusercontent.com/wrobstory/vincent_map_data/master/us_counties.topo.json) topo.json file in the same directory as the jupyter notebook." 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": { 136 | "ExecuteTime": { 137 | "end_time": "2019-01-14T22:53:25.416435Z", 138 | "start_time": "2019-01-14T22:53:25.404865Z" 139 | } 140 | }, 141 | "outputs": [], 142 | "source": [ 143 | "df['Real Median Income'] = df['Real Median Income'].astype(float)\n", 144 | "\n", 145 | "df['FIPS'] = df['state'] + df['county']\n", 146 | "df['FIPS'] = df['FIPS'].astype(int)\n", 147 | "df['FIPS'] = df['FIPS'].map(lambda i: str(i).zfill(5))\n", 148 | "# County FIP Codes that have changed:\n", 149 | "df['FIPS'] = df['FIPS'].str.replace('46102', '46113')" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": { 156 | "ExecuteTime": { 157 | "end_time": "2019-01-14T22:54:27.680439Z", 158 | "start_time": "2019-01-14T22:54:27.397541Z" 159 | } 160 | }, 161 | "outputs": [], 162 | "source": [ 163 | "# For mapping results\n", 164 | "import vincent\n", 165 | "vincent.core.initialize_notebook()\n", 166 | "\n", 167 | "geo_data = [{'name': 'counties',\n", 168 | " 'url': 'geo/us_counties.topo.json',\n", 169 | " 'feature': 'us_counties.geo'}, \n", 170 | " {'name': 'states',\n", 171 | " 'url': 'geo/us_states.topo.json',\n", 172 | " 'feature': 'us_states.geo'}\n", 173 | " ]\n", 174 | "\n", 175 | "vis = vincent.Map(data=df, geo_data=geo_data, scale=1100,\n", 176 | " projection='albersUsa', data_bind='Real Median Income',\n", 177 | " data_key='FIPS', map_key={'counties': 'properties.FIPS'})\n", 178 | "\n", 179 | "del vis.marks[1].properties.update\n", 180 | "vis.marks[0].properties.enter.stroke.value = '#fff'\n", 181 | "vis.marks[1].properties.enter.stroke.value = '#000000'\n", 182 | "vis.scales['color'].domain = [0, 75000] # Adjust\n", 183 | "vis.legend(title='Real Median Income')\n", 184 | "vis.to_json('geo/vega.json')\n", 185 | "\n", 186 | "vis.display()" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [] 195 | } 196 | ], 197 | "metadata": { 198 | "kernelspec": { 199 | "display_name": "Python 3", 200 | "language": "python", 201 | "name": "python3" 202 | }, 203 | "language_info": { 204 | "codemirror_mode": { 205 | "name": "ipython", 206 | "version": 3 207 | }, 208 | "file_extension": ".py", 209 | "mimetype": "text/x-python", 210 | "name": "python", 211 | "nbconvert_exporter": "python", 212 | "pygments_lexer": "ipython3", 213 | "version": "3.8.5" 214 | } 215 | }, 216 | "nbformat": 4, 217 | "nbformat_minor": 2 218 | } 219 | -------------------------------------------------------------------------------- /APIs/Census_TimeSeries_M3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### Census economic indicators time series\n", 8 | "\n", 9 | "Brian Dew, @bd_econ, March 23, 2018\n", 10 | "\n", 11 | "The documentation for retrieving these series from the API is [here](https://www.census.gov/data/developers/data-sets/economic-indicators.html). The example below retrieves the nondefense capital goods excluding aircraft new orders monthly percent change (basically a proxy for businesses new investment in equipment). Information on what is required to make the API call is [here](https://api.census.gov/data/timeseries/eits/m3/variables.html), and the list of individual series is [here](https://api.census.gov/data/timeseries/eits/eits_program_code_M3.xml)." 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "metadata": { 18 | "ExecuteTime": { 19 | "end_time": "2021-05-24T13:26:22.462911Z", 20 | "start_time": "2021-05-24T13:26:22.134173Z" 21 | } 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "import requests\n", 26 | "import pandas as pd\n", 27 | "import datetime as dt\n", 28 | "import config\n", 29 | "key = config.census_key\n", 30 | "\n", 31 | "from matplotlib import pyplot as plt\n", 32 | "\n", 33 | "%matplotlib inline" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 2, 39 | "metadata": { 40 | "ExecuteTime": { 41 | "end_time": "2021-05-24T13:26:22.465989Z", 42 | "start_time": "2021-05-24T13:26:22.463930Z" 43 | } 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "base = 'https://api.census.gov/data/timeseries/eits/advm3'\n", 48 | "param = 'cell_value,time_slot_id'\n", 49 | "time = '&time=from+2015'\n", 50 | "cat = '&category_code=NXA'\n", 51 | "dtc = '&data_type_code=MPCNO'\n", 52 | "oth = '&for=us&seasonally_adj=yes'\n", 53 | "\n", 54 | "url = f'{base}?get={param}&key={key}{cat}{dtc}{time}{oth}'" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 3, 60 | "metadata": { 61 | "ExecuteTime": { 62 | "end_time": "2021-05-24T13:26:23.217235Z", 63 | "start_time": "2021-05-24T13:26:22.467351Z" 64 | } 65 | }, 66 | "outputs": [], 67 | "source": [ 68 | "r = requests.get(url).json()" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 4, 74 | "metadata": { 75 | "ExecuteTime": { 76 | "end_time": "2021-05-24T13:26:23.221887Z", 77 | "start_time": "2021-05-24T13:26:23.218256Z" 78 | } 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "df = pd.DataFrame({'date': [f'{i[4]}-01' for i in r[1:]], \n", 83 | " 'value': [float(i[0]) for i in r[1:]]}).sort_values('date')" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 5, 89 | "metadata": { 90 | "ExecuteTime": { 91 | "end_time": "2021-05-24T13:26:23.415332Z", 92 | "start_time": "2021-05-24T13:26:23.223009Z" 93 | } 94 | }, 95 | "outputs": [ 96 | { 97 | "data": { 98 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlIAAAEvCAYAAACOiy/xAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAAAQ9klEQVR4nO3de4xmZ10H8O+PLmgCJVx2y60dFiKEVECBoYKIl0IVKAETNLYKNCFmFSOBRIOt1QSjxlINoqmJ2QBGI3ILIGJBbZGqEC52S7lsl5ZCttCCtlUTWohg08c/5mwy3U738pwz875n5vNJ3sy8t+d9zm/OvPN9f+cy1VoLAAAn736LngAAwFwJUgAAnQQpAIBOghQAQCdBCgCgkyAFANBp1yJedPfu3W3v3r2LeGkAgJNy4MCB21treza6byFBau/evbn66qsX8dIAACelqm66r/ts2gMA6CRIAQB0EqQAADoJUgAAnQQpAIBOghQAQCdBCgCgkyAFANBJkAIA6CRIAQB0Wsi/iAEAxtl74eX3uu3wJecuYCY7m44UAEAnQQoAoNNkQaqqTqmqz1TV3081JgDAMpuyI/XaJIcmHA8AYKlNEqSq6vQk5yZ5yxTjAQDMwVQdqTcneX2SuycaDwBg6Y0OUlX14iS3ttYOHOdx+6rq6qq6+rbbbhv7sgAACzdFR+o5SV5SVYeTvDPJ2VX110c/qLW2v7W22lpb3bNnzwQvCwCwWKODVGvtotba6a21vUnOS/LPrbWXj54ZAMCScx4pAIBOk/6LmNbaVUmumnJMAIBlpSMFANBJkAIA6CRIAQB0EqQAADpNurM5sLG9F15+j+uHLzl3QTOZ1nZdrl7qATuPjhQAQCdBCgCgkyAFANBJkAIA6CRIAQB0EqQAADoJUgAAnQQpAIBOghQAQCdBCgCgkyAFANBJkAIA6CRIAQB0EqQAADoJUgAAnQQpAIBOghQAQKddi54AbJa9F15+r9sOX3LuAmYCwHalIwUA0EmQAgDoJEgBAHQSpAAAOtnZHAC2maMPtnGgzebRkQIA6KQjBXASfNIH1tORAgDopCMFwDHpwm0vfp7T0pECAOgkSAEAdBKkAAA6CVIAAJ0EKQCATqODVFWdUVUfrapDVXWwql47xcQAAJbdFKc/uCvJr7XWrqmqU5McqKorWmvXTTD27B19mGniUFNYBn43F0ft2U5Gd6Raa99orV0zfH9HkkNJHjN2XACAZTfpCTmram+SpyX51Ab37UuyL0lWVlamfFkATpCTMcK0JtvZvKoelOS9SV7XWvvm0fe31va31lZba6t79uyZ6mUBABZmkiBVVffPWoh6e2vtfVOMCQCw7EZv2quqSvLWJIdaa28aPyUAYBFs+j15U3SknpPkFUnOrqprh8uLJhgXAGCpje5ItdY+lqQmmAsAwKw4szkAQKdJT38AAGw/TqJ633SkAAA6CVIAAJ0EKQCAToIUAEAnO5sDwJKa+07eO+EEnzpSAACdBCkAgE6CFABAJ/tIATtiP4adxM8Tto6OFABAJ0EKAKCTTXsAQLe5n6JhLB0pAIBOghQAQCdBCgCgkyAFANBJkAIA6OSoPY7Jif1gcXb60VBboec9zs9lcZax9jpSAACdBCkAgE427QHb1jJuBgC2Fx0pAIBOOlIAwJbbLgcz6UgBAHQSpAAAOglSAACd7CPFlpr6KCpHZW0vfp7A3OhIAQB0EqQAADrZtAcztF0OGwaYOx0pAIBOOlLr2NGVRbiv7pKuE0zD7xKbSUcKAKCTjhSzMIdPlHOYY4/tulwAU5ikI1VVL6iq66vqxqq6cIoxAQCW3eggVVWnJPmzJC9McmaS86vqzLHjAgAsuyk27Z2V5MbW2leSpKremeSlSa6bYGxOgp3lp6GObCXrG8zbFJv2HpPka+uu3zzcBgCwrVVrbdwAVT+b5Kdaa784XH9FkrNaa6856nH7kuxLkpWVlWfcdNNNo173eI71Ka/nE+DU4230vPXP6b3vRF9r/fN6dyY+0cP2e+e4mZ/K5zDHHpuxnk41l61ch090zBN9rbF12ozab8a6OMXPZer3gWONOXad6p1H7/O2cv5zt2z1qKoDrbXVje6boiN1c5Iz1l0/PcnXj35Qa21/a221tba6Z8+eCV4WAGCxpthH6t+TPKGqHpfkliTnJfn5Ccbd0RadvtmYn8uJUyu2kvWNRRkdpFprd1XVryb5xySnJHlba+3g6JkBACy5SU7I2Vr7UJIPTTEWx+eTF9zT1L8TfseAE+VfxAAAdBKkAAA6+V97C2TzAcA9eV9kbnSkAAA66UidIJ+SmAPrKcDW0pECAOikIwXALGxlx1V3lxOlIwUA0ElHim4+sQHck/fFnUdHCgCgkyAFANBJkAIA6CRIAQB0srM5sOXskAscy5zeI3SkAAA6CVIAAJ0EKQCATvaRAmZhTvtMzJUaw8nTkQIA6CRIAQB02pGb9rSvAYAp6EgBAHTath0pXSe4J78TANPTkQIA6LRtO1Isjs4HwL15b9yedKQAADoJUgAAnQQpAIBOghQAQCdBCgCgkyAFANDJ6Q92CIfdAlvJew47hY4UAEAnQQoAoJMgBQDQSZACAOg0KkhV1R9W1Rer6nNV9f6qeshE8wIAWHpjO1JXJHlya+2pSW5IctH4KQEAzMOo0x+01v5p3dVPJvmZcdMBYE6c5oCdbsp9pF6V5MMTjgcAsNSO25GqqiuTPHKDuy5urX1geMzFSe5K8vZjjLMvyb4kWVlZ6ZosAMAyOW6Qaq09/1j3V9UFSV6c5HmttXaMcfYn2Z8kq6ur9/k4AIC5GLWPVFW9IMlvJPmx1tq3p5kSO5V9LQCYm7H7SF2W5NQkV1TVtVX15xPMCQBgFsYetfd9U00EAGBunNkcAKCTIAUA0EmQAgDoJEgBAHQSpAAAOglSAACdBCkAgE6CFABAJ0EKAKCTIAUA0EmQAgDoJEgBAHQSpAAAOu1a9AS2g8OXnLvoKQAAC6AjBQDQSZACAOhk0x42TQJAJx0pAIBOghQAQCdBCgCgkyAFANBJkAIA6OSoPYAZcrQtLAcdKQCAToIUAEAnm/bYkWwWAWAKOlIAAJ0EKQCAToIUAEAnQQoAoJMgBQDQSZACAOgkSAEAdBKkAAA6CVIAAJ0EKQCAToIUAECnSYJUVf16VbWq2j3FeAAAczD6nxZX1RlJzkny1fHTAdh+/JNs2L6m6Ej9cZLXJ2kTjAUAMBujglRVvSTJLa21z040HwCA2Tjupr2qujLJIze46+Ikv5nkJ0/khapqX5J9SbKysnISUwQAWE7HDVKttedvdHtVPSXJ45J8tqqS5PQk11TVWa21/9hgnP1J9ifJ6uqqzYAAwOx172zeWvt8ktOOXK+qw0lWW2u3TzAvAICl5zxSAACdRp/+4IjW2t6pxgIAmAMdKQCAToIUAEAnQQoAoJMgBQDQSZACAOgkSAEAdBKkAAA6CVIAAJ0EKQCAToIUAEAnQQoAoJMgBQDQSZACAOgkSAEAdBKkAAA6CVIAAJ0EKQCAToIUAEAnQQoAoJMgBQDQSZACAOgkSAEAdBKkAAA6CVIAAJ0EKQCAToIUAEAnQQoAoJMgBQDQSZACAOgkSAEAdBKkAAA6CVIAAJ0EKQCAToIUAEAnQQoAoJMgBQDQaXSQqqrXVNX1VXWwqi6dYlIAAHOwa8yTq+onkrw0yVNba9+pqtOmmRYAwPIb25F6dZJLWmvfSZLW2q3jpwQAMA9jg9QTkzy3qj5VVf9SVc+cYlIAAHNw3E17VXVlkkducNfFw/MfmuRZSZ6Z5N1V9fjWWttgnH1J9iXJysrKmDkDACyF4wap1trz7+u+qnp1kvcNwenTVXV3kt1JbttgnP1J9ifJ6urqvYIWAMDcjN2097dJzk6SqnpikgckuX3kmAAAszDqqL0kb0vytqr6QpLvJrlgo816AADb0agg1Vr7bpKXTzQXAIBZcWZzAIBOghQAQCdBCgCgkyAFANBJkAIA6CRIAQB0EqQAADoJUgAAncae2RwAGOnwJecuegp00pECAOgkSAEAdBKkAAA6CVIAAJ0EKQCAToIUAEAnQQoAoJMgBQDQSZACAOgkSAEAdBKkAAA6CVIAAJ2qtbb1L1p1W5KbtvAldye5fQtfbztSw2mo43hqOA11HE8NpzGHOj62tbZnozsWEqS2WlVd3VpbXfQ85kwNp6GO46nhNNRxPDWcxtzraNMeAEAnQQoAoNNOCVL7Fz2BbUANp6GO46nhNNRxPDWcxqzruCP2kQIA2Aw7pSMFADC5WQapqjqjqj5aVYeq6mBVvXa4/WFVdUVVfWn4+tDh9ocPj7+zqi47aqyrqur6qrp2uJy2iGXaahPX8AFVtb+qbqiqL1bVyxaxTIswVR2r6tR16+C1VXV7Vb15QYu1pSZeF8+vqs9X1eeq6h+qavcilmkRJq7jzw01PFhVly5ieRaho4bnVNWBYZ07UFVnrxvrGcPtN1bVn1ZVLWq5ttrEdfz9qvpaVd25qOU5rtba7C5JHpXk6cP3pya5IcmZSS5NcuFw+4VJ3jh8/8AkP5Lkl5NcdtRYVyVZXfQyzbyGv5Pk94bv75dk96KXb451PGrcA0l+dNHLN6caJtmV5NYj69/w/DcsevlmWMeHJ/lqkj3D9b9M8rxFL9+S1vBpSR49fP/kJLesG+vTSZ6dpJJ8OMkLF718M63js4bx7lz0ct3XZZYdqdbaN1pr1wzf35HkUJLHJHlp1n7pM3z96eEx32qtfSzJ/279bJfTxDV8VZI/GB53d2tt2U+sNpnNWBer6glJTkvyb5s38+UxYQ1ruDxw+PT/4CRf3/QFWBIT1vHxSW5ord02XL8yyY7oMnfU8DOttSPr2MEk31tV31NVj0ry4NbaJ9paGvirI8/ZCaaq43DfJ1tr39jC6Z+0WQap9apqb9bS7KeSPOJIwYevJ7qZ7i+GzSm/vZPar0eMqWFVPWT49ner6pqqek9VPWITp7u0JloXk+T8JO8a3oB3lDE1bK39X5JXJ/l81gLUmUneupnzXVYj18UbkzypqvZW1a6s/bE7Y/Nmu5w6aviyJJ9prX0na6Hh5nX33TzctuOMrOMszDpIVdWDkrw3yetaa9/sHOYXWmtPSfLc4fKKqeY3BxPUcFeS05N8vLX29CSfSPJHE05xFiZaF484L8k7xs9qXsbWsKrun7Ug9bQkj07yuSQXTTrJGRhbx9ba/2Stju/KWlf0cJK7ppzjsjvZGlbV9yd5Y5JfOnLTBg/biR+MxtZxFmYbpIY3zfcmeXtr7X3Dzf85tFQzfL31eOO01m4Zvt6R5G+SnLU5M14+E9Xwv5J8O8n7h+vvSfL0TZju0ppqXRwe+wNJdrXWDmzKZJfURDX8wSRprX156Oa9O8kPb86Ml9OE74sfbK39UGvt2UmuT/KlzZrzsjnZGlbV6Vl7/3tla+3Lw803Z+0D5hGnZwdtZk4mq+MszDJIDZvf3prkUGvtTevu+rskFwzfX5DkA8cZZ9eRo3qGH/qLk3xh+hkvn6lqOPzB+mCSHx9uel6S6yad7BKbqo7rnJ8d1o2asIa3JDmzqo78Y9FzsrZvxo4w5bpYw9HLw1FVv5LkLdPOdjmdbA2HXRsuT3JRa+3jRx48bLa6o6qeNYz5ypz4e8DsTVXH2disvdg385K1I01a1lr31w6XF2XtaJOPZO3T00eSPGzdcw4n+e8kd2bt08KZWTtq5cAwzsEkf5LklEUv35xqONz+2CT/Ooz1kSQri16+OdZxuO8rSZ606OWaaw2zdgTaoWGsDyZ5+KKXb6Z1fEfWPhBdl+S8RS/bstYwyW8l+da6x16b5LThvtWsfTD/cpLLMpwAeydcJq7jpcO6effw9Q2LXr6jL85sDgDQaZab9gAAloEgBQDQSZACAOgkSAEAdBKkAAA6CVIAAJ0EKQCAToIUAECn/wez6EleH4fAkQAAAABJRU5ErkJggg==\n", 99 | "text/plain": [ 100 | "
" 101 | ] 102 | }, 103 | "metadata": { 104 | "needs_background": "light" 105 | }, 106 | "output_type": "display_data" 107 | } 108 | ], 109 | "source": [ 110 | "plt.figure(figsize=(10,5))\n", 111 | "ax = plt.subplot(111)\n", 112 | "ax.bar(x=pd.to_datetime(df.date), height=df['value'], width=20)\n", 113 | "ax.xaxis_date()\n" 114 | ] 115 | } 116 | ], 117 | "metadata": { 118 | "kernelspec": { 119 | "display_name": "Python 3", 120 | "language": "python", 121 | "name": "python3" 122 | }, 123 | "language_info": { 124 | "codemirror_mode": { 125 | "name": "ipython", 126 | "version": 3 127 | }, 128 | "file_extension": ".py", 129 | "mimetype": "text/x-python", 130 | "name": "python", 131 | "nbconvert_exporter": "python", 132 | "pygments_lexer": "ipython3", 133 | "version": "3.8.8" 134 | }, 135 | "varInspector": { 136 | "cols": { 137 | "lenName": 16, 138 | "lenType": 16, 139 | "lenVar": 40 140 | }, 141 | "kernels_config": { 142 | "python": { 143 | "delete_cmd_postfix": "", 144 | "delete_cmd_prefix": "del ", 145 | "library": "var_list.py", 146 | "varRefreshCmd": "print(var_dic_list())" 147 | }, 148 | "r": { 149 | "delete_cmd_postfix": ") ", 150 | "delete_cmd_prefix": "rm(", 151 | "library": "var_list.r", 152 | "varRefreshCmd": "cat(var_dic_list()) " 153 | } 154 | }, 155 | "types_to_exclude": [ 156 | "module", 157 | "function", 158 | "builtin_function_or_method", 159 | "instance", 160 | "_Feature" 161 | ], 162 | "window_display": false 163 | } 164 | }, 165 | "nbformat": 4, 166 | "nbformat_minor": 2 167 | } 168 | -------------------------------------------------------------------------------- /APIs/ComTrade.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true, 7 | "jupyter": { 8 | "outputs_hidden": true 9 | } 10 | }, 11 | "source": [ 12 | "United Nations ComTrade\n", 13 | "=====\n", 14 | "\n", 15 | "## Bilateral trade data by product\n", 16 | "\n", 17 | "----\n", 18 | "\n", 19 | "*September 3, 2017*
\n", 20 | "*@bd_econ*\n", 21 | "\n", 22 | "This example retrieves annual data for the trade of a specific product by all countries for which data are available. To make the most of the features that the UN allows, the notebook runs very slowly (pausing for 5 seconds between requests) and can only be run once a day because of API limits. \n", 23 | "\n", 24 | "[Documentation](https://comtrade.un.org/data/doc/api/) for the UN Comtrade API.\n", 25 | "\n", 26 | "This example uses a list of country codes stored as a csv file. " 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 1, 32 | "metadata": { 33 | "ExecuteTime": { 34 | "end_time": "2019-01-14T22:33:58.383815Z", 35 | "start_time": "2019-01-14T22:33:57.936970Z" 36 | } 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "import requests\n", 41 | "import pandas as pd\n", 42 | "import time\n", 43 | "\n", 44 | "# Used to loop over countries 5 at a time.\n", 45 | "def chunker(seq, size):\n", 46 | " return (seq[pos:pos + size] for pos in range(0, len(seq), size))\n", 47 | "\n", 48 | "c_codes = pd.read_csv('codes/country_codes.csv').set_index('id')" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "## Paramaters/ Settings for request" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 2, 61 | "metadata": { 62 | "ExecuteTime": { 63 | "end_time": "2019-01-14T22:33:58.393316Z", 64 | "start_time": "2019-01-14T22:33:58.385698Z" 65 | } 66 | }, 67 | "outputs": [], 68 | "source": [ 69 | "prod_type = 'C' # Commodity\n", 70 | "freq = 'A' # Annual \n", 71 | "classification = 'HS' # harmonized system\n", 72 | "prod = '440710' # HS 6-digit production ID\n", 73 | "years = ['2005', '2010', '2015']\n", 74 | "base = 'http://comtrade.un.org/api/get?'\n", 75 | "url = '{}max=50000&type={}&freq={}&px={}'.format(\n", 76 | " base, prod_type, freq, classification)\n", 77 | "df = pd.DataFrame(columns=['period', 'pt3ISO', 'rt3ISO', 'TradeValue'])" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 3, 83 | "metadata": { 84 | "ExecuteTime": { 85 | "end_time": "2019-01-14T22:55:49.100134Z", 86 | "start_time": "2019-01-14T22:33:58.394701Z" 87 | }, 88 | "scrolled": true 89 | }, 90 | "outputs": [ 91 | { 92 | "name": "stderr", 93 | "output_type": "stream", 94 | "text": [ 95 | "/home/brian/miniconda3/lib/python3.7/site-packages/pandas/core/indexes/api.py:107: RuntimeWarning: '<' not supported between instances of 'str' and 'int', sort order is undefined for incomparable objects\n", 96 | " result = result.union(other)\n", 97 | "/home/brian/miniconda3/lib/python3.7/site-packages/pandas/core/indexing.py:1472: FutureWarning: \n", 98 | "Passing list-likes to .loc or [] with any missing label will raise\n", 99 | "KeyError in the future, you can use .reindex() as an alternative.\n", 100 | "\n", 101 | "See the documentation here:\n", 102 | "https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike\n", 103 | " return self._getitem_tuple(key)\n" 104 | ] 105 | } 106 | ], 107 | "source": [ 108 | "for n in chunker(c_codes.index.values[1:], 5):\n", 109 | " req = '&ps={}&r=all&p={}&rg=2&cc={}'.format(\n", 110 | " '%2C'.join(years), '%2C'.join(n), prod)\n", 111 | " r = requests.get('{}{}'.format(url, req)).json()['dataset']\n", 112 | " for f in r:\n", 113 | " df = df.append([f['period'], f['pt3ISO'], f['rt3ISO'], f['TradeValue']])\n", 114 | " time.sleep(5)" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 4, 120 | "metadata": { 121 | "ExecuteTime": { 122 | "end_time": "2019-01-14T22:55:49.165354Z", 123 | "start_time": "2019-01-14T22:55:49.104520Z" 124 | } 125 | }, 126 | "outputs": [], 127 | "source": [ 128 | "df.to_csv('440710.csv')" 129 | ] 130 | } 131 | ], 132 | "metadata": { 133 | "kernelspec": { 134 | "display_name": "Python 3 (ipykernel)", 135 | "language": "python", 136 | "name": "python3" 137 | }, 138 | "language_info": { 139 | "codemirror_mode": { 140 | "name": "ipython", 141 | "version": 3 142 | }, 143 | "file_extension": ".py", 144 | "mimetype": "text/x-python", 145 | "name": "python", 146 | "nbconvert_exporter": "python", 147 | "pygments_lexer": "ipython3", 148 | "version": "3.12.4" 149 | } 150 | }, 151 | "nbformat": 4, 152 | "nbformat_minor": 4 153 | } 154 | -------------------------------------------------------------------------------- /APIs/OECD.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "OECD API with Python\n", 8 | "======\n", 9 | "\n", 10 | "## Example showing GDP growth rates by selected country\n", 11 | "\n", 12 | "-----\n", 13 | "\n", 14 | "*October 17, 2018*
\n", 15 | "*@bd_econ*\n", 16 | "\n", 17 | "The documentation for the Organization for Economic Cooperation and Development (OECD) API can be found [here](https://data.oecd.org/api/)" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 1, 23 | "metadata": { 24 | "ExecuteTime": { 25 | "end_time": "2018-10-18T01:38:11.229430Z", 26 | "start_time": "2018-10-18T01:38:10.934209Z" 27 | } 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "import requests\n", 32 | "import pandas as pd" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "## Parameters/ Settings" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 2, 45 | "metadata": { 46 | "ExecuteTime": { 47 | "end_time": "2018-10-18T01:38:13.571764Z", 48 | "start_time": "2018-10-18T01:38:13.565867Z" 49 | } 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "base = 'http://stats.oecd.org/sdmx-json/data/'\n", 54 | "param = [('dataset', 'QNA'),\n", 55 | " ('country', 'FRA+ITA+ESP+GBR+CAN+DEU'),\n", 56 | " ('indicators', 'GDP+B1_GE.CUR+VOBARSA'), \n", 57 | " ('freq', 'Q'), \n", 58 | " ('start_date' , '?startTime=1999-Q4')\n", 59 | " ]\n", 60 | "\n", 61 | "series = '.'.join(x[1] for x in param[1:-1])\n", 62 | "url = '{}{}/{}{}'.format(base, param[0][1], series, param[-1][1])" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "## Request data" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 3, 75 | "metadata": { 76 | "ExecuteTime": { 77 | "end_time": "2018-10-18T01:38:24.706627Z", 78 | "start_time": "2018-10-18T01:38:23.201377Z" 79 | } 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "r = requests.get(url).json()" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 4, 89 | "metadata": { 90 | "ExecuteTime": { 91 | "end_time": "2018-10-18T01:38:28.067420Z", 92 | "start_time": "2018-10-18T01:38:28.025474Z" 93 | } 94 | }, 95 | "outputs": [ 96 | { 97 | "name": "stdout", 98 | "output_type": "stream", 99 | "text": [ 100 | "Gross domestic product - expenditure approach\n" 101 | ] 102 | }, 103 | { 104 | "data": { 105 | "text/html": [ 106 | "
\n", 107 | "\n", 120 | "\n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | "
CanadaFranceGermanyItalySpainUnited Kingdom
2019-01-011.22.22.50.82.32.7
2019-04-013.21.0-2.00.41.5-0.2
2019-07-011.10.61.20.01.62.1
2019-10-010.6-0.7-0.1-0.91.7-0.0
2020-01-01-8.2-21.7-7.8-19.8-19.3-8.5
\n", 180 | "
" 181 | ], 182 | "text/plain": [ 183 | " Canada France Germany Italy Spain United Kingdom\n", 184 | "2019-01-01 1.2 2.2 2.5 0.8 2.3 2.7\n", 185 | "2019-04-01 3.2 1.0 -2.0 0.4 1.5 -0.2\n", 186 | "2019-07-01 1.1 0.6 1.2 0.0 1.6 2.1\n", 187 | "2019-10-01 0.6 -0.7 -0.1 -0.9 1.7 -0.0\n", 188 | "2020-01-01 -8.2 -21.7 -7.8 -19.8 -19.3 -8.5" 189 | ] 190 | }, 191 | "execution_count": 4, 192 | "metadata": {}, 193 | "output_type": "execute_result" 194 | } 195 | ], 196 | "source": [ 197 | "date_list = r['structure']['dimensions']['observation'][0]['values']\n", 198 | "dates = pd.to_datetime([x['id'] for x in date_list])\n", 199 | " \n", 200 | "areas = [v['name'] for v in r['structure']['dimensions']['series'][0]['values']]\n", 201 | "\n", 202 | "title = r['structure']['dimensions']['series'][1]['values'][0]['name']\n", 203 | "\n", 204 | "df = pd.DataFrame()\n", 205 | "for i, area in enumerate(areas):\n", 206 | " s_key = '{}:0:0:0'.format(i)\n", 207 | " s_list = r['dataSets'][0]['series'][s_key]['observations']\n", 208 | " df[area] = pd.Series([s_list[val][0] for val in sorted(s_list, key=int)])\n", 209 | " df[area] = (((df[area]/df[area].shift())**4)-1)*100\n", 210 | "df.index = dates\n", 211 | "df = df.dropna()\n", 212 | "print(title)\n", 213 | "df.tail().round(1)" 214 | ] 215 | } 216 | ], 217 | "metadata": { 218 | "kernelspec": { 219 | "display_name": "Python 3", 220 | "language": "python", 221 | "name": "python3" 222 | }, 223 | "language_info": { 224 | "codemirror_mode": { 225 | "name": "ipython", 226 | "version": 3 227 | }, 228 | "file_extension": ".py", 229 | "mimetype": "text/x-python", 230 | "name": "python", 231 | "nbconvert_exporter": "python", 232 | "pygments_lexer": "ipython3", 233 | "version": "3.8.5" 234 | } 235 | }, 236 | "nbformat": 4, 237 | "nbformat_minor": 2 238 | } 239 | -------------------------------------------------------------------------------- /APIs/README.md: -------------------------------------------------------------------------------- 1 | # APIs 2 | 3 | ## BD Economics examples of using economic data Application Programming Interfaces (APIs) 4 | 5 | Updated: December 28, 2019 6 | 7 | Contact: Brian Dew, twitter: @bd_econ; email: brian.w.dew@gmail.com 8 | 9 | Goal: Retrieve data from several common sources, on demand, without clicking around on their website. 10 | 11 | ------ 12 | 13 | ### Contents 14 | 15 | - [U.S. Bureau of Economic Analysis](https://github.com/bdecon/econ_data/blob/master/APIs/BEA.ipynb): Provider of US national accounts statistics. [nbviewer](https://nbviewer.jupyter.org/github/bdecon/econ_data/blob/master/APIs/BEA.ipynb) 16 | 17 | - [U.S. Bureau of Labor Statistics](https://github.com/bdecon/econ_data/blob/master/APIs/BLS.ipynb): Provider of labor statistics and prices statistics. [nbviewer](https://nbviewer.jupyter.org/github/bdecon/econ_data/blob/master/APIs/BLS.ipynb). See also: [tool](https://github.com/bdecon/econ_data/blob/master/APIs/BLS_tool.ipynb) and [prices example](https://github.com/bdecon/econ_data/blob/master/APIs/BLS_Prices.ipynb). 18 | 19 | - [U.S. Census Bureau](https://github.com/bdecon/econ_data/blob/master/APIs/Census_ACS.ipynb): Example of collected American Community Survey published results. [nbviewer](https://nbviewer.jupyter.org/github/bdecon/econ_data/blob/master/APIs/Census_ACS.ipynb) 20 | 21 | - [U.S. Census Bureau](https://github.com/bdecon/econ_data/blob/master/APIs/Census_TimeSeries_M3.ipynb): Manufacturers survey (M3) time series example. [nbviewer](https://nbviewer.jupyter.org/github/bdecon/econ_data/blob/master/APIs/Census_TimeSeries_M3.ipynb) 22 | 23 | - [U.S. Census Bureau](https://github.com/bdecon/econ_data/blob/master/APIs/Census_Trade.ipynb): International trade by partner example. [nbviewer](https://nbviewer.jupyter.org/github/bdecon/econ_data/blob/master/APIs/Census_Trade.ipynb) 24 | 25 | - [UN ComTrade](https://github.com/bdecon/econ_data/blob/master/APIs/ComTrade.ipynb): Data on bilateral trade by product. [nbviewer](https://nbviewer.jupyter.org/github/bdecon/econ_data/blob/master/APIs/ComTrade.ipynb) 26 | 27 | - [European Central Bank](https://github.com/bdecon/econ_data/blob/master/APIs/ECB.ipynb): Data on government bond yields in selected EU countries. [nbviewer](https://nbviewer.jupyter.org/github/bdecon/econ_data/blob/master/APIs/ECB.ipynb) 28 | 29 | - [FRED](https://github.com/bdecon/econ_data/blob/master/APIs/FRED.ipynb): Collected statistics from the Federal Reserve Bank of Saint Louis. [nbviewer](https://nbviewer.jupyter.org/github/bdecon/econ_data/blob/master/APIs/FRED.ipynb) 30 | 31 | - [Engery Information Administration](https://github.com/bdecon/econ_data/blob/master/APIs/EIA.ipynb): Energy production and consumption data primarily for the US but with some international data. [nbviewer](https://nbviewer.jupyter.org/github/bdecon/econ_data/blob/master/APIs/EIA.ipynb) 32 | 33 | - [International Labour Organization](https://github.com/bdecon/econ_data/blob/master/APIs/ILO.ipynb): Labor statistics for many countries, included harmonized microdata results. [nbviewer](https://nbviewer.jupyter.org/github/bdecon/econ_data/blob/master/APIs/ILO.ipynb) 34 | 35 | - [International Monatery Fund](https://github.com/bdecon/econ_data/blob/master/APIs/IMF.ipynb): Statistics published by the IMF's Statistics Department, including International Financial Statistics (IFS) data. [nbviewer](https://nbviewer.jupyter.org/github/bdecon/econ_data/blob/master/APIs/IMF.ipynb) 36 | 37 | - [Organization for Economic Cooperation and Development](https://github.com/bdecon/econ_data/blob/master/APIs/OECD.ipynb): Detailed analysis/harmonization of data from 35 or so wealthy countries. [nbviewer](https://nbviewer.jupyter.org/github/bdecon/econ_data/blob/master/APIs/OECD.ipynb) 38 | 39 | - [World Bank](https://github.com/bdecon/econ_data/blob/master/APIs/World_Bank.ipynb): Data on poverty and international development for most countries. [nbviewer](https://nbviewer.jupyter.org/github/bdecon/econ_data/blob/master/APIs/World_Bank.ipynb) 40 | 41 | - [Yahoo! Finance](https://github.com/bdecon/econ_data/blob/master/APIs/Yahoo_Finance.ipynb): Daily stock price time series. [nbviewer](https://nbviewer.jupyter.org/github/bdecon/econ_data/blob/master/APIs/Yahoo_Finance.ipynb) 42 | 43 | -------------------------------------------------------------------------------- /APIs/Yahoo_Finance.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Yahoo! Finance API with Python\n", 8 | "=====\n", 9 | "\n", 10 | "## Yahoo Finance dowload historical stock index data\n", 11 | "\n", 12 | "-----\n", 13 | "\n", 14 | "*September 3, 2017*
\n", 15 | "*@bd_econ* \n", 16 | "\n", 17 | "This example retrieves the adjusted daily closing price for the Dow Jones Industrial Average, S&P 500 and Nasdaq 100. " 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": { 24 | "ExecuteTime": { 25 | "end_time": "2018-06-09T18:16:15.756860Z", 26 | "start_time": "2018-06-09T18:16:15.753346Z" 27 | } 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "import requests\n", 32 | "import re\n", 33 | "import pandas as pd\n", 34 | "from io import StringIO\n", 35 | "import time " 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "## Parameters/Settings" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": { 49 | "ExecuteTime": { 50 | "end_time": "2018-06-09T18:16:18.314632Z", 51 | "start_time": "2018-06-09T18:16:18.311623Z" 52 | } 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "tickers = ['^DJI', '^GSPC', '^IXIC']\n", 57 | "start = '1104537600'\n", 58 | "base = 'https://finance.yahoo.com/quote/'\n", 59 | "base2 = 'https://query1.finance.yahoo.com/v7/finance/download/'\n", 60 | "end = int(time.time())\n", 61 | "dates = '?period1={}&period2={}'.format(start, end)\n", 62 | "param = '&interval=1d&filter=history&frequency=1d'" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "## Request Data" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": { 76 | "ExecuteTime": { 77 | "end_time": "2018-06-09T18:17:58.955878Z", 78 | "start_time": "2018-06-09T18:17:55.435661Z" 79 | } 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "d = {}\n", 84 | "for t in tickers:\n", 85 | " url = '{}{}/history{}{}'.format(base, t, dates, param)\n", 86 | " s = requests.Session()\n", 87 | " r = s.get(url)\n", 88 | " regex = '\"CrumbStore\":{\"crumb\":\"(.+?)\"},'\n", 89 | " pattern = re.compile(regex)\n", 90 | " crumb = re.findall(pattern, r.text)[0]\n", 91 | " param2 = '{}{}&interval=1d&events=history&crumb={}/Q'.format(\n", 92 | " t, dates, crumb)\n", 93 | " url2 = '{}{}'.format(base2, param2)\n", 94 | " data = s.post(url2)\n", 95 | " csv = StringIO(data.text)\n", 96 | " d[t[1:]] = pd.read_table(csv, sep=',',\n", 97 | " parse_dates=['Date']).set_index('Date')['Adj Close']\n", 98 | "pd.DataFrame(d).tail()" 99 | ] 100 | } 101 | ], 102 | "metadata": { 103 | "kernelspec": { 104 | "display_name": "Python 3", 105 | "language": "python", 106 | "name": "python3" 107 | }, 108 | "language_info": { 109 | "codemirror_mode": { 110 | "name": "ipython", 111 | "version": 3 112 | }, 113 | "file_extension": ".py", 114 | "mimetype": "text/x-python", 115 | "name": "python", 116 | "nbconvert_exporter": "python", 117 | "pygments_lexer": "ipython3", 118 | "version": "3.8.2" 119 | } 120 | }, 121 | "nbformat": 4, 122 | "nbformat_minor": 2 123 | } 124 | -------------------------------------------------------------------------------- /APIs/codes/country_codes.csv: -------------------------------------------------------------------------------- 1 | id,text 2 | all,All 3 | 4,Afghanistan 4 | 8,Albania 5 | 12,Algeria 6 | 20,Andorra 7 | 24,Angola 8 | 660,Anguilla 9 | 28,Antigua and Barbuda 10 | 32,Argentina 11 | 51,Armenia 12 | 533,Aruba 13 | 36,Australia 14 | 40,Austria 15 | 31,Azerbaijan 16 | 44,Bahamas 17 | 48,Bahrain 18 | 50,Bangladesh 19 | 52,Barbados 20 | 112,Belarus 21 | 56,Belgium 22 | 58,Belgium-Luxembourg 23 | 84,Belize 24 | 204,Benin 25 | 60,Bermuda 26 | 64,Bhutan 27 | 68,Bolivia (Plurinational State of) 28 | 535,Bonaire 29 | 70,Bosnia Herzegovina 30 | 72,Botswana 31 | 92,Br. Virgin Isds 32 | 76,Brazil 33 | 96,Brunei Darussalam 34 | 100,Bulgaria 35 | 854,Burkina Faso 36 | 108,Burundi 37 | 132,Cabo Verde 38 | 116,Cambodia 39 | 120,Cameroon 40 | 124,Canada 41 | 136,Cayman Isds 42 | 140,Central African Rep. 43 | 148,Chad 44 | 152,Chile 45 | 156,China 46 | 344,"China, Hong Kong SAR" 47 | 446,"China, Macao SAR" 48 | 170,Colombia 49 | 174,Comoros 50 | 178,Congo 51 | 184,Cook Isds 52 | 188,Costa Rica 53 | 384,Cote d'Ivoire 54 | 191,Croatia 55 | 192,Cuba 56 | 531,Curacao 57 | 196,Cyprus 58 | 203,Czechia 59 | 200,Czechoslovakia 60 | 408,Dem. People's Rep. of Korea 61 | 180,Dem. Rep. of the Congo 62 | 208,Denmark 63 | 262,Djibouti 64 | 212,Dominica 65 | 214,Dominican Rep. 66 | 588,East and West Pakistan 67 | 218,Ecuador 68 | 818,Egypt 69 | 222,El Salvador 70 | 226,Equatorial Guinea 71 | 232,Eritrea 72 | 233,Estonia 73 | 231,Ethiopia 74 | 97,EU-28 75 | 234,Faeroe Isds 76 | 238,Falkland Isds (Malvinas) 77 | 242,Fiji 78 | 246,Finland 79 | 251,France 80 | 254,French Guiana 81 | 258,French Polynesia 82 | 583,FS Micronesia 83 | 266,Gabon 84 | 270,Gambia 85 | 268,Georgia 86 | 276,Germany 87 | 288,Ghana 88 | 292,Gibraltar 89 | 300,Greece 90 | 304,Greenland 91 | 308,Grenada 92 | 312,Guadeloupe 93 | 320,Guatemala 94 | 324,Guinea 95 | 624,Guinea-Bissau 96 | 328,Guyana 97 | 332,Haiti 98 | 336,Holy See (Vatican City State) 99 | 340,Honduras 100 | 348,Hungary 101 | 352,Iceland 102 | 699,India 103 | 356,"India, excl. Sikkim" 104 | 360,Indonesia 105 | 364,Iran 106 | 368,Iraq 107 | 372,Ireland 108 | 376,Israel 109 | 381,Italy 110 | 388,Jamaica 111 | 392,Japan 112 | 400,Jordan 113 | 398,Kazakhstan 114 | 404,Kenya 115 | 296,Kiribati 116 | 414,Kuwait 117 | 417,Kyrgyzstan 118 | 418,Lao People's Dem. Rep. 119 | 428,Latvia 120 | 422,Lebanon 121 | 426,Lesotho 122 | 430,Liberia 123 | 434,Libya 124 | 440,Lithuania 125 | 442,Luxembourg 126 | 450,Madagascar 127 | 454,Malawi 128 | 458,Malaysia 129 | 462,Maldives 130 | 466,Mali 131 | 470,Malta 132 | 584,Marshall Isds 133 | 474,Martinique 134 | 478,Mauritania 135 | 480,Mauritius 136 | 175,Mayotte 137 | 484,Mexico 138 | 496,Mongolia 139 | 499,Montenegro 140 | 500,Montserrat 141 | 504,Morocco 142 | 508,Mozambique 143 | 104,Myanmar 144 | 580,N. Mariana Isds 145 | 516,Namibia 146 | 524,Nepal 147 | 530,Neth. Antilles 148 | 532,Neth. Antilles and Aruba 149 | 528,Netherlands 150 | 540,New Caledonia 151 | 554,New Zealand 152 | 558,Nicaragua 153 | 562,Niger 154 | 566,Nigeria 155 | 579,Norway 156 | 512,Oman 157 | 490,"Other Asia, nes" 158 | 586,Pakistan 159 | 585,Palau 160 | 591,Panama 161 | 598,Papua New Guinea 162 | 600,Paraguay 163 | 459,Peninsula Malaysia 164 | 604,Peru 165 | 608,Philippines 166 | 616,Poland 167 | 620,Portugal 168 | 634,Qatar 169 | 410,Rep. of Korea 170 | 498,Rep. of Moldova 171 | 638,Reunion 172 | 642,Romania 173 | 643,Russian Federation 174 | 646,Rwanda 175 | 647,Ryukyu Isd 176 | 461,Sabah 177 | 654,Saint Helena 178 | 659,Saint Kitts and Nevis 179 | 658,"Saint Kitts, Nevis and Anguilla" 180 | 662,Saint Lucia 181 | 534,Saint Maarten 182 | 666,Saint Pierre and Miquelon 183 | 670,Saint Vincent and the Grenadines 184 | 882,Samoa 185 | 674,San Marino 186 | 678,Sao Tome and Principe 187 | 457,Sarawak 188 | 682,Saudi Arabia 189 | 686,Senegal 190 | 688,Serbia 191 | 891,Serbia and Montenegro 192 | 690,Seychelles 193 | 694,Sierra Leone 194 | 702,Singapore 195 | 703,Slovakia 196 | 705,Slovenia 197 | 711,So. African Customs Union 198 | 90,Solomon Isds 199 | 706,Somalia 200 | 710,South Africa 201 | 728,South Sudan 202 | 724,Spain 203 | 144,Sri Lanka 204 | 275,State of Palestine 205 | 729,Sudan 206 | 740,Suriname 207 | 748,Swaziland 208 | 752,Sweden 209 | 757,Switzerland 210 | 760,Syria 211 | 762,Tajikistan 212 | 807,TFYR of Macedonia 213 | 764,Thailand 214 | 626,Timor-Leste 215 | 768,Togo 216 | 772,Tokelau 217 | 776,Tonga 218 | 780,Trinidad and Tobago 219 | 788,Tunisia 220 | 792,Turkey 221 | 795,Turkmenistan 222 | 796,Turks and Caicos Isds 223 | 798,Tuvalu 224 | 800,Uganda 225 | 804,Ukraine 226 | 784,United Arab Emirates 227 | 826,United Kingdom 228 | 834,United Rep. of Tanzania 229 | 858,Uruguay 230 | 850,US Virgin Isds 231 | 842,USA 232 | 860,Uzbekistan 233 | 548,Vanuatu 234 | 862,Venezuela 235 | 704,Viet Nam 236 | 876,Wallis and Futuna Isds 237 | 887,Yemen 238 | 894,Zambia 239 | 716,Zimbabwe 240 | -------------------------------------------------------------------------------- /APIs/rec_dates.csv: -------------------------------------------------------------------------------- 1 | peak,trough 2 | 1970-01-01,1970-12-01 3 | 1973-11-01,1975-04-01 4 | 1980-02-01,1980-08-01 5 | 1981-08-01,1982-12-01 6 | 1990-08-01,1991-04-01 7 | 2001-04-01,2001-12-01 8 | 2008-01-01,2009-07-01 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # econ_data 2 | 3 | ### Tools for Working with Economic Data in Python 4 | 5 | [@bd_econ](https://twitter.com/bd_econ)
6 | *Updated: September 6, 2020* 7 | 8 | Off the shelf examples of gathering, cleaning, and storing economic data with Python. Divided into three parts: 1) microdata (basic CPS, CPS-ASEC); 2) economic data APIs; and 3) the bd CPS extract. 9 | 10 | ----- 11 | 12 | Currently includes the following: 13 | 1) [Microdata examples](https://github.com/bdecon/econ_data/tree/master/micro) 14 | * [Basic Monthly CPS](https://github.com/bdecon/econ_data/blob/master/micro/CPS_Example_Notebook_UPDATED.ipynb) 15 | * [CPS ASEC](https://github.com/bdecon/econ_data/blob/master/micro/CPS-ASEC_median_income.ipynb) 16 | * [ACS](https://github.com/bdecon/econ_data/blob/master/micro/ACS_to_CZ_income_map.ipynb) 17 | 2) Economic Data Application Programing Interfaces ([APIs](https://github.com/bdecon/econ_data/tree/master/APIs)) 18 | * [International Monetary Fund (IMF)](https://github.com/bdecon/econ_data/blob/master/APIs/IMF.ipynb) 19 | * [US Bureau of Labor Statistics (BLS)](https://github.com/bdecon/econ_data/blob/master/APIs/BLS.ipynb) 20 | * [US Census Bureau - ACS](https://github.com/bdecon/econ_data/blob/master/APIs/Census_ACS.ipynb) 21 | * [US Census Bureau - Trade](https://github.com/bdecon/econ_data/blob/master/APIs/Census_Trade.ipynb) 22 | * *New* [Energy Information Administration (EIA)](https://github.com/bdecon/econ_data/blob/master/APIs/EIA.ipynb) 23 | * [Federal Reserve Economic Data (FRED)](https://github.com/bdecon/econ_data/blob/master/APIs/FRED.ipynb) 24 | * [UN Comtrade](https://github.com/bdecon/econ_data/blob/master/APIs/ComTrade.ipynb) 25 | * [US Bureau of Economic Analysis](https://github.com/bdecon/econ_data/blob/master/APIs/BEA.ipynb) 26 | * [Organization for Economic Cooperation and Development (OECD)](https://github.com/bdecon/econ_data/blob/master/APIs/OECD.ipynb) 27 | * [Yahoo! Finance](https://github.com/bdecon/econ_data/blob/master/APIs/Yahoo_Finance.ipynb) 28 | * [European Central Bank (ECB)](https://github.com/bdecon/econ_data/blob/master/APIs/ECB.ipynb) 29 | * [World Bank](https://github.com/bdecon/econ_data/blob/master/APIs/World_Bank.ipynb) 30 | * [International Labour Organization (ILO)](https://github.com/bdecon/econ_data/blob/master/APIs/ILO.ipynb) 31 | 3) [bd CPS](https://github.com/bdecon/econ_data/tree/master/bd_CPS) (a tool for creating harmonized partial extracts from Current Population Survey public use microdata) 32 | * bd_CPS_dd.ipynb 33 | * bd_CPS_reader.ipynb 34 | * additional notebooks, see link. 35 | 36 | If you have a question or find an error, please e-mail me at brian.w.dew@gmail.com. 37 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman -------------------------------------------------------------------------------- /bd_CPS/README.md: -------------------------------------------------------------------------------- 1 | # bd CPS 2 | ## BD Economics Current Population Survey Extract 3 | 4 | v0.4.1, updated: May 6, 2021 5 | 6 | Working with Current Population Survey (CPS) public use microdata using jupyter notebooks and python. 7 | 8 | Brian Dew, twitter: @bd_econ, email: brian.w.dew@gmail.com 9 | 10 | ### Contents 11 | - [Example](#example) 12 | - [Overview](#overview) 13 | - [How to run/ update](#directions) 14 | - [How to add a variable](#directions2) 15 | - [bd CPS variables](#variables) 16 | - [Long-term roadmap](#roadmap) 17 | - [Acknowledgements](#acknowledgements) 18 | - [Contact me](#contact) 19 | - [List of CPS related links](#links) 20 | - [Project dependencies](#dependencies) 21 | 22 | ----- 23 | 24 | 25 | 26 | ### Example 27 | 28 | Input (after running programs on raw data downloaded from Census): 29 | 30 | ``` 31 | import pandas as pd 32 | 33 | df = (pd.read_feather('cps2017.ft') 34 | .query('MONTH == 10 and 25 <= AGE <= 54') 35 | .groupby('EDUC') 36 | .PWSSWGT 37 | .sum()) 38 | ``` 39 | 40 | Output: 41 | 42 | ``` 43 | EDUC 44 | 45 | ADV 16551343.0 46 | COLL 30948892.0 47 | HS 33313412.0 48 | LTHS 11389192.0 49 | SC 33637956.0 50 | 51 | Name: PWSSWGT, dtype: float32 52 | ``` 53 | 54 | The above arbitrary example calculates how many age 25-54 people are in each of five educational categories in October 2017. It shows, for example, that about 16.6 million have advanced degrees. 55 | 56 | 57 | 58 | ### Overview 59 | 60 | **UPDATE: v0.4.1 released.** The bd CPS is a series of jupyter notebooks I wrote to work with monthly Current Population Survey public use microdata. If the notebooks, or any part of them, could be helpful to you, please feel free to use them or modify them in any way. When set up correctly, the notebooks generate annual [feather](https://github.com/wesm/feather) files, for the years from 1989-present, which contain cleaned-up partial extracts of basic monthly CPS data. The raw source microdata files and data dictionaries can be downloaded from the [US Census Bureau's CPS page](https://www.census.gov/data/datasets/time-series/demo/cps/cps-basic.html). 61 | 62 | The bd CPS notebooks include: 63 | 64 | 1) `bd_CPS_dd.ipynb` reads settings from bd_CPS_details.py and creates a python dictionary with information needed to read the raw CPS microdata files in the next step, and adjust them to be more time-consistent and useful. The program requires downloading the CPS data dictionary text files from the Census CPS page. 65 | 66 | 2) `bd_CPS_reader.ipynb` reads the raw monthy CPS microdata files downloaded from Census and converts them into annual feather format files that can be read by python or R. The feather format is particularly fast when the data are mostly integers or categorical, as in this case. Works for years 1994-onward. 67 | 68 | 3) `bd_CPS_1989-93.ipynb` creates partial extracts for 1989-93. It is a work in progress, but creates many variables that are consistent with those in the 1994-onward extracts. 69 | 70 | Settings and other required code are also contained in the python file bd_CPS_details.py. There is additionally a notebook that downloads regional consumer price index data from BLS (used as the price deflator for real wage series), as well as a separate notebook that generates a unique (over time) household ID for CPS households from mid-1995 onward. Lastly, the bd CPS incorporates several supplements and revisions to the basic monthly CPS. 71 | 72 | 73 | 74 | ### How to run/ update 75 | 76 | The Wednesday following the release of the jobs report, the Census Bureau will release the related previous-month CPS public use microdata in a compressed file on the [Basic Monthly CPS page](https://www.census.gov/data/datasets/time-series/demo/cps/cps-basic.html). The full set of 1994 onward monthly microdata files are available to download on the Census CPS page. NBER [hosts](https://www.nber.org/data/cps_basic.html) the 1989 to 1993 files. For the bd CPS program to work, a local folder must contain the relevant uncompressed CPS microdata files. Next, the data dictionary files that correspond to each microdata file should be downloaded and stored in the same folder as the microdata. Separately, to adjust wages for inflation the CPI for each of four US regions should be downloaded using the notebook `bd_CPS_cpi.ipynb` (requires a [free BLS API key](https://data.bls.gov/registrationEngine/)). Version 0.4 of the bd CPS can also generate a unique CPSID for all households, by running `bd_CPS_id.ipynb`. 77 | 78 | The first step in generating the bd CPS is to run the data dictionary generator, which creates a pickled python dictionary that provides information needed for reading the raw monthly CPS microdata files. This is done by running the notebook called `bd_CPS_dd.ipynb`. To run the bd CPS for 2000, 2001, and 2002, which utilize revised 2000-based weights and revised union data, or for December 2007, which uses revised weights, or for 2015-16, which uses separate data to identify persons with professional certifications, or for May 2020 onward, which include the COVID-19 supplemental questions, you'll also need to download and unzip the related source files and run `bd_CPS_revisions_reader.ipynb`. 79 | 80 | The next step is to run the notebook called `bd_CPS_reader.ipynb`. This will create a feather file called `cpsYYYY.ft` for each year included in the command in the `bd_CPS_reader` notebook. The feather file can be read into pandas as a dataframe, and, as I understand but have not tested, can be read into R and other statistical software programs. The file contains a subset of variables that are most commonly used for research. 81 | 82 | 83 | 84 | ### How to add a variable 85 | 86 | To include an additional CPS variable in your local version of the bd CPS extract, add the variable name (from the Census data dictionary) to the list of variables names in `VarList` in `bd_CPS_details.py` and re-run `bd_CPS_dd.ipynb` and `bd_CPS_reader.ipynb`. 87 | 88 | 89 | 90 | ### bd CPS variables 91 | 92 | The bd CPS contains several variables that are recodes of other CPS variables or combinations of CPS data and outside data. The two most important examples of this are the labor force status (`LFS`) and the real wage variables (`RHRWAGE` and `RWKWAGE`). 93 | 94 | The bd CPS includes a [codebook](https://github.com/bdecon/econ_data/blob/master/bd_CPS/codebook.txt) that shows which variables are available for which dates, and what values the variables include. 95 | 96 | Details on selected bd CPS variables are as follows: 97 | 98 | * `LFS` - Labor force status - Employed, Unemployed, or Not in Labor Force (NILF). 99 | * `COW1` - Class of worker on first job: Federal Government, State Government, Local Government, Private, Self-employed Incorporated, Self-employed Unincorporated, Without Pay. 100 | * `NILFREASON` - Reason for non-participation in the labor market: Discouraged, Disabled/Ill, Family, Retired, In School, Other (currently available 1994-onward only). 101 | * `HRWAGE` - Hourly wage - Available in ORG quartersample. 102 | * `WKEARN` - Usual weekly earnings - Same as above, except the usual weekly pay (therefore factoring in hours worked). 103 | * `WKEARNADJ` - Usual weekly earnings with topcode replaced with estimated mean above topcode. 104 | * `HRWAGEADJ` - Hourly wage but also includes wages based on imputed hours for observations where usual weekly hours vary and uses WKEARNADJ. 105 | * `MINWAGE` - equal to 1 if worker is paid the federal minimum wage or less. 106 | * `PAIDHRLY` - equal to 1 if paid hourly and 0 if person has earnings but is not paid hourly. 107 | * `INDGRP` - Industry group of first job - Consistent industry groups for first job: Construction and mining (also includes agriculture and the like), Manufacturing, Trade, transportation, and utilties, Finance and business services (also includes Information and the like), Leisure and hospitality, and Public administration. See bd_CPS_reader.ipynb for mapping. 108 | * `INDM` - Major industry group on fisrst job. More groups than INDGRP. 109 | * `UNEMPTYPE` - type of unemployment: job loser, job leaver, new entrant, or re-entrant. 110 | * `UNEMPDUR` - duration of unemployment, in weeks. Slight definition change in 1994 revamp. 111 | * `VETERAN` - binary variable equal to 1 if served active duty armed forces. 112 | * `UNION` - equal to 1 if a union member or covered by a union contract. 113 | * `UNIONMEM` - equal to 1 if a union member. 114 | * `CERT` - has a professional certification (available 2015-onward). 115 | * `STATE` - conversion of state FIPS code to two letter state abbreviation. 116 | * `REGION` - Census region (Northeast, South, Midwest, West) 117 | * `CBSA` - center-based statistical area (where identified). 118 | * `CSA` - consolidated statistical area (where identified). 119 | * `EDUC` - Highest level of education obtained - Maps the educational categories to five groups: Less than high school, High school, Some college, Bachelor degree, Advanced degree. 120 | * `EDUCDT` - Detailed highest level of education attained. 121 | * `WBHAO` - race/ethnic group - Each observation is mapped to one of five racial/ethnic groups: White, Black, Hispanic, Asian, and Other. White is white non-Hispanic only, black is any black non-Hispanic, Asian is any Asian but not black and non-Hispanic, Other is Native American, Native Hawaiian, Pacific Islander, and other groups. Hispanic is someone of Hispanic ethnicity of any race. 122 | * `WBHAOM` - race/ethnic group - white, non-Hispanic only, black, non-Hispanic only, Asian or Pacific Islander, non-Hispanic only, Native American, non-Hispanic only, persons of more than one racial group but non-Hispanic, and Hispanic, and race/ethnicity. Available 2003 onward, only. 123 | * `MARRIED` - binary variable equal to 1 if married and otherwise 0. 124 | * `FORBORN` - binary variable equal to 1 if born outside the US and otherwise 0. 125 | * `CTYBIRTH` - country of birth. 126 | * `SCHENR` - binary variable equal to 1 if enrolled in high school, college, or university and otherwise 0. 127 | * `PTECON` - binary variable equal to 1 if usually part-time for economic reasons and otherwise 0. 128 | * `WORKFT` - equal to one if person worked full time during the reference week (35 hours or more) regardless of whether they usually work full-time. 129 | * `ABSTYPE` - reason person is not at work or working part time during the reference week. 130 | * `PRNMCHLD` - number of own children under age 18 (available November 1999-onward). 131 | * `DISABILITY` - binary equal to one if person has any of six disabilities (available June 2008 onward). 132 | * `CPSID` - unique (over time, in bd CPS) household ID (available 1998-onward; OPTIONAL - run the reader, run `bd_CPS_id` and then re-run the reader, to add the `CPSID`). 133 | * `BASICWGT` - weight equal to `PWSSWGT` before 1998 and `PWCMPWGT` after. The weight variables use the 2000-based revised weights for the years 2000-2002 and the December 2007 revised weights. 134 | 135 | 136 | 137 | ### Long-term road map 138 | 139 | The following items are included in the proposed version 1.0: Cleaned-up industry, occupation, and geography codes, complete coverage for CPSID, new CPSIDP for persons within households, new longitudinal weights, and CPI data without an API key. I'd love help, comments, or suggestions. See [active issues](https://github.com/bdecon/econ_data/issues) on the project's github repo. 140 | 141 | Separately, if someone is willing to fund some server space, I would *really* like to put the actual bd_CPS data online. This would make it possible for people to easily use the fruits of my labor, which, I think, make CPS analysis much easier for people who like python and R instead of Stata. Please contact me if you want to chip in for this (brian.w.dew@gmail.com). 142 | 143 | 144 | 145 | ### Acknowlegements 146 | 147 | Many many thanks to John Schmitt for countless hours of kind and patient guidance. Many thanks to the staff and management of CEPR for giving me the chance to learn about the CPS. Thanks to EPI, and Ben Zipperer in particular, for providing very helpful documentation. Thanks to NBER, FRBATL, FRBKC, IPUMS, Urban Institute, Tom Augspurger, and staff of BLS and Census, for making analysis of the CPS easier by putting helpful information online. The bd CPS is basically just translating a lot of other people's work into python code. 148 | 149 | 150 | 151 | ### Contact me 152 | 153 | I would really appreciate feedback, especially if you spot an error. I also welcome opportunities to work with people on projects that might make use of these notebooks, and would be most grateful for any help in making the project better! Feel free to email me at brian.w.dew@gmail.com. 154 | 155 | 156 | 157 | 158 | ### List of CPS related links 159 | 160 | [BLS regional CPI](https://www.bls.gov/cpi/regional-resources.htm) 161 | 162 | [CEPR data CPS extracts](http://ceprdata.org/cps-uniform-data-extracts/) 163 | 164 | [FRBATL Labor Market Status Categorization](https://www.frbatlanta.org/chcs/human-capital-currents/2015/0612-measuring-labor-market-status-using-basic-data.aspx) 165 | 166 | [FRBKC CPS resources](https://www.kansascityfed.org/research/kcdc/cps) 167 | 168 | [FRBKC Psuedocode](https://www.kansascityfed.org/research/kcdc/cps/coreinfo/pseudocode/hrswk) 169 | 170 | [US Census Bureau's Basic Monthly CPS page](https://www.census.gov/data/datasets/time-series/demo/cps/cps-basic.html) 171 | 172 | [NBER CPS Basic Data](http://www.nber.org/data/cps_basic.html) 173 | 174 | [NBER CPS Supplements](https://www.nber.org/data/current-population-survey-data.html) 175 | 176 | Tom Augspurger CPS in Python examples: 177 | 178 | [Part 1: Using Python to tackle the CPS](http://tomaugspurger.github.io/tackling%20the%20cps.html) 179 | 180 | [Part 2: Using Python to tackle the CPS](http://tomaugspurger.github.io/tackling%20the%20cps%20%28part%202%29.html) 181 | 182 | [Part 3: Using Python to tackle the CPS](http://tomaugspurger.github.io/tackling%20the%20cps%20%28part%203%29.html) 183 | 184 | [Part 4: Using Python to tackle the CPS](http://tomaugspurger.github.io/tackling%20the%20cps%20%28part%204%29.html) 185 | 186 | ------ 187 | 188 | 189 | 190 | ### Project dependencies 191 | 192 | In addition to the files in this repo, to run the bd_CPS notebook, you will need: 193 | 194 | - CPS microdata files from Census for 1994-present and from NBER (renamed slightly) for 1989-93; 195 | 196 | - Data dictionary files from Census for 1994-present and from NBER for 1989-93; 197 | 198 | - Revised data from Census for 2000-based weights, and corrected union data (2000-2003); 199 | 200 | - Revised data from Census for revised December 2007 weights; 201 | 202 | - Additional data from Census for 2015-16 professional certification variables; 203 | 204 | - Additional data on COVID-19, covering May 2020 onward; 205 | 206 | - BLS API code (to retrieve CPI data, free but requires registration); 207 | 208 | - Python 3.7 installation (I recommend miniconda); 209 | 210 | - pandas 0.24 or later; 211 | 212 | - numpy 1.12 or later; 213 | 214 | - jupyter; 215 | 216 | - feather-format; and 217 | 218 | - requests package to access BLS API 219 | 220 | -------------------------------------------------------------------------------- /bd_CPS/bd_CPS_cpi.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### bd econ CPS price level retrieval\n", 8 | "\n", 9 | "bd_CPS_cpi.ipynb\n", 10 | "\n", 11 | "January 30, 2019\n", 12 | "\n", 13 | "Brian Dew, @bd_econ\n", 14 | "\n", 15 | "-----\n", 16 | "This file is used to download the 1989-present consumer price indices for all urban consumers, as well as for urban consumers in the four census regions: Northeast, Midwest, South, and West. These data are used to adjust the wage and overtime values reported in the current population survey for changes to the price level. The 1989 to 1999 consumer price data does tend to overstate the inflation rate, thus making wage growth seem lower than it actually was over the period. Therefore, I may opt, at a later date, to replace the regional CPI approach with the BLS research series, referred to as the CPI-U-RS." 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": { 23 | "ExecuteTime": { 24 | "end_time": "2024-06-12T20:41:23.396599Z", 25 | "start_time": "2024-06-12T20:41:23.109108Z" 26 | }, 27 | "code_folding": [], 28 | "execution": { 29 | "iopub.execute_input": "2025-01-17T12:49:57.696562Z", 30 | "iopub.status.busy": "2025-01-17T12:49:57.696324Z", 31 | "iopub.status.idle": "2025-01-17T12:49:58.213006Z", 32 | "shell.execute_reply": "2025-01-17T12:49:58.212496Z", 33 | "shell.execute_reply.started": "2025-01-17T12:49:57.696546Z" 34 | } 35 | }, 36 | "outputs": [ 37 | { 38 | "name": "stdout", 39 | "output_type": "stream", 40 | "text": [ 41 | "pandas: 2.2.2\n", 42 | "requests: 2.32.3\n" 43 | ] 44 | } 45 | ], 46 | "source": [ 47 | "# Import packages and bls api key\n", 48 | "import pandas as pd\n", 49 | "print('pandas:', pd.__version__)\n", 50 | "import os\n", 51 | "import requests\n", 52 | "print('requests:', requests.__version__)\n", 53 | "import json\n", 54 | "import config\n", 55 | "import time\n", 56 | "\n", 57 | "os.chdir('/home/brian/Documents/CPS/data/')\n", 58 | "\n", 59 | "def fred_df2(series, start='1989', skip=1000):\n", 60 | "\turl = f'https://fred.stlouisfed.org/data/{series}'\n", 61 | "\tdf = pd.read_html(url, parse_dates=True, skiprows=skip)[1].set_index('DATE')['VALUE']\n", 62 | "\tdf.index = pd.to_datetime(df.index)\n", 63 | "\t\n", 64 | "\treturn df.loc[start:]\n", 65 | "\n", 66 | "def fred_df3(series, start='1989'):\n", 67 | " url = (f'https://fred.stlouisfed.org/graph/fredgraph.csv?id={series}')\n", 68 | " df = pd.read_csv(url, index_col='observation_date', parse_dates=True)[series]\n", 69 | " return df.loc[start:]" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 2, 75 | "metadata": { 76 | "ExecuteTime": { 77 | "end_time": "2024-06-12T20:41:25.361349Z", 78 | "start_time": "2024-06-12T20:41:23.398245Z" 79 | }, 80 | "execution": { 81 | "iopub.execute_input": "2025-01-17T12:49:58.214051Z", 82 | "iopub.status.busy": "2025-01-17T12:49:58.213800Z", 83 | "iopub.status.idle": "2025-01-17T12:49:59.584599Z", 84 | "shell.execute_reply": "2025-01-17T12:49:59.584079Z", 85 | "shell.execute_reply.started": "2025-01-17T12:49:58.214036Z" 86 | } 87 | }, 88 | "outputs": [], 89 | "source": [ 90 | "# Date of latest CPS file\n", 91 | "# List of monthly raw CPS data files to process\n", 92 | "files = [file for file in os.listdir() \n", 93 | " if file.endswith('pub.dat')]\n", 94 | "\n", 95 | "cps_mo = pd.Series([pd.to_datetime(f[:5], format='%b%y') \n", 96 | " for f in files]).sort_values().iloc[-1]\n", 97 | "\n", 98 | "# Retrieve CPI\n", 99 | "srs = {'CPIAUCNS': 'ALL',\n", 100 | " 'CUUR0100SA0': 'Northeast',\n", 101 | " 'CUUR0200SA0': 'Midwest',\n", 102 | " 'CUUR0300SA0': 'South',\n", 103 | " 'CUUR0400SA0': 'West'}\n", 104 | "\n", 105 | "df = pd.DataFrame()\n", 106 | "for i, n in srs.items():\n", 107 | " df[n] = fred_df3(i).astype('float')\n", 108 | " \n", 109 | "# Handle cases with CPS before CPI\n", 110 | "if df.index[-1] < cps_mo:\n", 111 | " # Retrieve latest nowcast\n", 112 | " cpsdt = cps_mo.strftime('%B %Y')\n", 113 | " print(f'CPI not yet available for {cpsdt}, retrieving nowcast')\n", 114 | " url = 'https://www.clevelandfed.org/indicators-and-data/inflation-nowcasting'\n", 115 | " r = pd.read_html(url)[0].set_index('Month')\n", 116 | " nowcast = 1 + (float(r.loc[cpsdt, 'CPI']) / 100)\n", 117 | " df.loc[cps_mo] = df.iloc[-1] * nowcast\n", 118 | " \n", 119 | "cpi = (df.iloc[-1] / df)\n", 120 | "cpi.to_csv('clean/cpi.csv')" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "BLS version stopped working" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 3, 175 | "metadata": { 176 | "ExecuteTime": { 177 | "end_time": "2024-06-12T20:41:25.371100Z", 178 | "start_time": "2024-06-12T20:41:25.365303Z" 179 | }, 180 | "code_folding": [], 181 | "execution": { 182 | "iopub.execute_input": "2025-01-17T12:49:59.585361Z", 183 | "iopub.status.busy": "2025-01-17T12:49:59.585211Z", 184 | "iopub.status.idle": "2025-01-17T12:49:59.588390Z", 185 | "shell.execute_reply": "2025-01-17T12:49:59.587790Z", 186 | "shell.execute_reply.started": "2025-01-17T12:49:59.585346Z" 187 | } 188 | }, 189 | "outputs": [], 190 | "source": [ 191 | "# # Code to update CPI as needed\n", 192 | "# api_url = 'https://api.bls.gov/publicAPI/v2/timeseries/data/'\n", 193 | "\n", 194 | "# # API key in config.py which contains: bls_key = 'key'\n", 195 | "# key = f'?registrationkey={config.bls_key}'\n", 196 | "\n", 197 | "# # Series stored as a dictionary\n", 198 | "# series_dict = {'CUUR0000SA0': 'ALL',\n", 199 | "# 'CUUR0100SA0': 'Northeast',\n", 200 | "# 'CUUR0200SA0': 'Midwest',\n", 201 | "# 'CUUR0300SA0': 'South',\n", 202 | "# 'CUUR0400SA0': 'West'}\n", 203 | "\n", 204 | "# # Start year and end year\n", 205 | "# date_r = (1989, 2023)\n", 206 | "\n", 207 | "# # Because API requests are limited to 10 years at a time,\n", 208 | "# # this code splits the dates above in 10-year chunks\n", 209 | "# dates = [(str(date_r[0]), str(date_r[1]))]\n", 210 | "# while int(dates[-1][1]) - int(dates[-1][0]) > 10:\n", 211 | "# dates = [(str(date_r[0]), str(date_r[0]+9))]\n", 212 | "# d1 = int(dates[-1][0])\n", 213 | "# while int(dates[-1][1]) < date_r[1]:\n", 214 | "# d1 = d1 + 10\n", 215 | "# d2 = min([date_r[1], d1+9])\n", 216 | "# dates.append((str(d1),(d2)))\n", 217 | " \n", 218 | "# df = pd.DataFrame()\n", 219 | "\n", 220 | "# for start, end in dates:\n", 221 | "# # Submit the list of series as data\n", 222 | "# data = json.dumps({\n", 223 | "# \"seriesid\": list(series_dict.keys()),\n", 224 | "# \"startyear\": start, \"endyear\": end})\n", 225 | "\n", 226 | "# # Post request for the data\n", 227 | "# p = requests.post(f'{api_url}{key}', \n", 228 | "# headers={'Content-type': 'application/json',\n", 229 | "# 'User-Agent': \n", 230 | "# 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/110.0'}, \n", 231 | "# data=data).json()\n", 232 | "# time.sleep(0.1)\n", 233 | "# for s in p['Results']['series']:\n", 234 | "# col = series_dict[s['seriesID']]\n", 235 | "# for r in s['data']:\n", 236 | "# date = pd.to_datetime(f'{r[\"periodName\"]} {r[\"year\"]}')\n", 237 | "# df.at[date, col] = float(r['value'])\n", 238 | "# df = df.sort_index()\n", 239 | "# # Output results\n", 240 | "# print(f'Post Request Status: {p[\"status\"]}')\n", 241 | "# print('Latest month: ', df.index[-1])\n", 242 | " \n", 243 | "# cpi = (df.iloc[-1] / df)\n", 244 | "# cpi.to_csv('cpi.csv')" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": null, 250 | "metadata": {}, 251 | "outputs": [], 252 | "source": [] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": null, 257 | "metadata": {}, 258 | "outputs": [], 259 | "source": [] 260 | } 261 | ], 262 | "metadata": { 263 | "kernelspec": { 264 | "display_name": "Python 3 (ipykernel)", 265 | "language": "python", 266 | "name": "python3" 267 | }, 268 | "language_info": { 269 | "codemirror_mode": { 270 | "name": "ipython", 271 | "version": 3 272 | }, 273 | "file_extension": ".py", 274 | "mimetype": "text/x-python", 275 | "name": "python", 276 | "nbconvert_exporter": "python", 277 | "pygments_lexer": "ipython3", 278 | "version": "3.12.4" 279 | } 280 | }, 281 | "nbformat": 4, 282 | "nbformat_minor": 4 283 | } 284 | -------------------------------------------------------------------------------- /micro/CBSA_2YR_Indicators.csv: -------------------------------------------------------------------------------- 1 | GTCBSA,Count,EPOP,P10wage,Union,Unemp,Name 2 | 10580,359,80.59196991635245,12.137668982296496,33.76380205154419,2.63194739818573,"Albany-Schenectady-Troy, NY" 3 | 10740,1380,77.22387554304933,10.734826360109398,8.999846130609512,3.2405123114585876,"Albuquerque, NM" 4 | 12060,2787,81.6256582736969,11.22676116310731,4.258915036916733,2.861403115093708,"Atlanta-Sandy Springs-Marietta, GA" 5 | 12420,1038,82.2737196920917,11.057835814919947,3.041069768369198,2.1704018115997314,"Austin-Round Rock-San Marcos, TX" 6 | 12540,302,66.90368800824632,10.599819894324048,19.17772740125656,9.57183837890625,"Bakersfield-Delano, CA" 7 | 12580,1181,81.18678743863474,12.16485858983148,12.053236365318298,3.4748613834381104,"Baltimore-Towson, MD" 8 | 12940,769,79.09964957106712,10.564339926380931,4.004212468862534,3.2160215079784393,"Baton Rouge, LA" 9 | 13740,547,84.07964769876645,11.853009839208434,12.68882155418396,2.929023839533329,"Billings, MT" 10 | 13820,919,73.98375798146908,10.694885262521144,11.733197420835495,3.479926288127899,"Birmingham-Hoover, AL" 11 | 14260,1315,81.4937015382326,11.262313445069168,4.10507507622242,2.19527967274189,"Boise City-Nampa, ID" 12 | 14460,3602,83.63051414489746,13.05647135336624,12.037485092878342,2.9117727652192116,"Boston-Cambridge-Quincy, MA-NH" 13 | 14860,478,83.01315017193738,11.555187845368698,13.701653480529785,3.8547784090042114,"Bridgeport-Stamford-Norwalk, CT" 14 | 15380,424,79.27505659716442,11.39754949964753,21.600639820098877,3.6113642156124115,"Buffalo-Niagara Falls, NY" 15 | 15540,930,85.92043762430846,13.000255042686092,11.994532495737076,1.7719600349664688,"Burlington-South Burlington, VT" 16 | 16620,357,71.5050341549569,10.25911002112144,9.762387722730637,5.382524058222771,"Charleston, WV" 17 | 16700,490,78.5636701385951,11.534448726461042,1.304765697568655,1.5618364326655865,"Charleston-North Charleston-Summerville, SC" 18 | 16740,1209,80.99535689618598,10.855023675761233,4.673834890127182,2.6839617639780045,"Charlotte-Gastonia-Rock Hill, NC-SC" 19 | 16860,305,79.18777553110495,10.794941474187961,5.7798705995082855,3.8946159183979034,"Chattanooga, TN-GA" 20 | 16980,4091,79.64133620262146,11.12326857327078,14.308413863182068,3.6912839859724045,"Chicago-Joliet-Naperville, IL-IN-WI" 21 | 17140,978,82.85874883210438,11.893200608458773,11.159396916627884,2.0265182480216026,"Cincinnati-Middletown, OH-KY-IN" 22 | 17460,849,76.80335421427145,11.789931440367292,15.612085163593292,4.748096317052841,"Cleveland-Elyria-Mentor, OH" 23 | 17900,526,77.69430480320388,11.896347715221085,4.440126940608025,2.783937379717827,"Columbia, SC" 24 | 18140,806,80.91685569304803,10.937231685590174,10.025689005851746,3.318001702427864,"Columbus, OH" 25 | 19100,3167,81.04472160339355,11.148574410941169,5.389170721173286,2.9802700504660606,"Dallas-Fort Worth-Arlington, TX" 26 | 19740,1405,85.82923471576875,12.453109419890305,7.676409929990768,2.5642599910497665,"Denver-Aurora-Broomfield, CO" 27 | 19780,518,88.88259164943848,12.230949860276032,9.914394468069077,1.8668610602617264,"Des Moines-West Des Moines, IA" 28 | 19820,1785,77.82449722290039,11.179364205056977,18.02106946706772,3.5917978733778,"Detroit-Warren-Livonia, MI" 29 | 20100,375,79.10542627566134,10.46543438217805,10.62975600361824,4.058358445763588,"Dover, DE" 30 | 21340,303,72.64607228600043,9.149995591862933,9.234639257192612,5.680961534380913,"El Paso, TX" 31 | 22020,784,87.59241218027262,11.576701862555245,6.807486712932587,2.211626246571541,"Fargo, ND-MN" 32 | 22220,632,80.17517758963734,10.947240284270631,1.6735685989260674,2.2615984082221985,"Fayetteville-Springdale-Rogers, AR-MO" 33 | 23420,352,66.79911351470918,10.93413274298518,12.775354087352753,7.210563123226166,"Fresno, CA" 34 | 24340,453,82.38675748950597,12.180443489929608,9.29042249917984,3.1445305794477463,"Grand Rapids-Wyoming, MI" 35 | 24860,526,78.09861147902527,11.374510604688153,2.4113310500979424,2.6714274659752846,"Greenville-Mauldin-Easley, SC" 36 | 25180,313,79.11387499347123,11.821330851158063,7.976314425468445,4.3343618512153625,"Hagerstown-Martinsburg, MD-WV" 37 | 25540,641,80.78930862445203,12.639776595252298,18.591400980949402,3.176991268992424,"Hartford-West Hartford-East Hartford, CT" 38 | 26420,2743,77.15521454811096,10.090912515683614,4.339946433901787,3.7010621279478073,"Houston-Sugar Land-Baytown, TX" 39 | 26580,394,72.49272010502959,10.431407115932334,9.805348515510559,4.5393794775009155,"Huntington-Ashland, WV-KY-OH" 40 | 26620,378,76.63964558840256,10.353272591704524,7.208169251680374,4.38946820795536,"Huntsville, AL" 41 | 26900,1084,83.13970357904897,11.333713448195688,8.592136949300766,2.5455858558416367,"Indianapolis-Carmel, IN" 42 | 27140,643,79.22685958951409,9.649321628841054,3.6895208060741425,2.3477882146835327,"Jackson, MS" 43 | 27260,590,80.23986863688842,11.722371242186092,9.441956132650375,3.3988557755947113,"Jacksonville, FL" 44 | 27980,342,82.88056113907987,11.069488269391002,24.358470737934113,2.894197218120098,"Kahului-Wailuku-Lahaina, HI" 45 | 28140,1343,84.13748456910021,11.898911122438202,9.694062918424606,2.649597078561783,"Kansas City, MO-KS" 46 | 28940,422,80.87214282774998,10.121181534993061,5.209219083189964,2.9554717242717743,"Knoxville, TN" 47 | 29180,413,74.55146854553564,10.297382730255022,3.635520115494728,5.067672580480576,"Lafayette, LA" 48 | 29820,1864,78.54754328727722,10.74262960988714,15.308420360088348,4.086436331272125,"Las Vegas-Paradise, NV" 49 | 30780,880,78.80899014285531,10.498059852804225,4.566233605146408,2.852735109627247,"Little Rock-North Little Rock-Conway, AR" 50 | 31080,5646,77.61861085891724,11.203069011496682,14.410239458084106,3.723817691206932,"Los Angeles-Long Beach-Anaheim, CA" 51 | 31140,727,81.41023250283212,11.620167765156397,12.766291201114655,3.510476276278496,"Louisville/Jefferson County, KY-IN" 52 | 31540,327,88.83645031213192,13.105974449547219,8.048394322395325,1.1635655537247658,"Madison, WI" 53 | 31700,834,83.17885841434348,12.322102515890501,13.42085748910904,2.583371475338936,"Manchester-Nashua, NH" 54 | 32820,820,76.98604970811246,10.254170156367373,8.416824787855148,3.713763877749443,"Memphis, TN-MS-AR" 55 | 33100,2332,79.95343208312988,9.89906492822806,5.680510774254799,3.3603984862565994,"Miami-Fort Lauderdale-Pompano Beach, FL" 56 | 33340,771,84.0001182898828,12.40998145010656,7.819357514381409,3.0552227050065994,"Milwaukee-Waukesha-West Allis, WI" 57 | 33460,1790,87.15723156929016,13.64280550251967,15.632440149784088,2.411799691617489,"Minneapolis-St. Paul-Bloomington, MN-WI" 58 | 34060,324,79.92324733581691,10.945976920250574,8.724634349346161,2.7864355593919754,"Morgantown, WV" 59 | 34980,1148,81.74603623511277,11.437617591546639,5.5109672248363495,1.8867632374167442,"Nashville-Davidson--Murfreesboro--Franklin, TN" 60 | 35300,461,80.09163036311072,13.047094894096325,19.37093287706375,5.420692637562752,"New Haven-Milford, CT" 61 | 35380,1065,79.26220747641864,10.057587360462092,6.02368451654911,2.9829252511262894,"New Orleans-Metairie-Kenner, LA" 62 | 35620,8174,78.47193479537964,11.490482085436318,21.73830419778824,3.4205935895442963,"New York-Northern New Jersey-Long Island, NY-NJ-PA" 63 | 36260,599,78.8757121002963,11.836732316374505,5.427036061882973,2.236824855208397,"Ogden-Clearfield, UT" 64 | 36420,958,78.41042640903208,11.004586585916131,6.15469254553318,3.3399518579244614,"Oklahoma City, OK" 65 | 36540,1160,85.36357933267489,12.134078400120424,8.610019087791443,2.759469486773014,"Omaha-Council Bluffs, NE-IA" 66 | 36740,1025,79.60632924607684,10.222017959253252,4.906842112541199,2.7245402336120605,"Orlando-Kissimmee-Sanford, FL" 67 | 37980,3268,80.08522987365723,11.755873926189631,13.738100230693817,4.108202084898949,"Philadelphia-Camden-Wilmington, PA-NJ-DE-MD" 68 | 38060,1957,77.47998833656311,11.92151143064868,4.206294566392899,3.268207609653473,"Phoenix-Mesa-Glendale, AZ" 69 | 38220,347,72.96589404833519,10.133041283576494,4.61396649479866,2.7993831783533096,"Pine Bluff, AR" 70 | 38300,989,80.49796882342632,12.132232898756909,16.435791552066803,3.8390815258026123,"Pittsburgh, PA" 71 | 38860,682,86.35274202832879,12.460443937081445,13.019174337387085,1.5515326522290707,"Portland-South Portland-Biddeford, ME" 72 | 38900,1689,80.88963627815247,12.451021942476727,14.528335630893707,3.9137374609708786,"Portland-Vancouver-Hillsboro, OR-WA" 73 | 39300,1977,79.6460509300232,11.899695146087943,15.942679345607758,3.361048549413681,"Providence-New Bedford-Fall River, RI-MA" 74 | 39340,465,78.36187438438408,12.745115728169884,4.232854396104813,2.6982728391885757,"Provo-Orem, UT" 75 | 39580,748,82.92311335260376,11.10755047742262,2.637834846973419,2.6521110907197,"Raleigh-Cary, NC" 76 | 40060,554,79.1233284873219,11.455678079814707,4.62307371199131,3.7462569773197174,"Richmond, VA" 77 | 40140,1688,74.40036535263062,11.181807762461773,21.59913182258606,3.5253435373306274,"Riverside-San Bernardino-Ontario, CA" 78 | 40380,433,75.91666216755425,12.049386158209016,18.311141431331635,6.138800829648972,"Rochester, NY" 79 | 40900,986,77.2204408430841,12.478342153211816,20.32015770673752,2.8933085501194,"Sacramento--Arden-Arcade--Roseville, CA" 80 | 41180,1353,81.93959984729837,11.150347058184087,12.796233594417572,3.321760520339012,"St. Louis, MO-IL" 81 | 41540,455,81.3236927534255,10.430159958551755,13.970257341861725,2.2263914346694946,"Salisbury, MD" 82 | 41620,1233,82.45666549652756,12.429583014455341,3.891073539853096,2.520568110048771,"Salt Lake City, UT" 83 | 41700,893,76.76887558786912,10.44689357172879,5.276498571038246,4.012038931250572,"San Antonio-New Braunfels, TX" 84 | 41740,1408,80.32848025333818,12.085045472759388,10.467828810214996,2.3953134194016457,"San Diego-Carlsbad-San Marcos, CA" 85 | 41860,2127,81.89465999603271,14.047298909212227,14.320175349712372,2.7285486459732056,"San Francisco-Oakland-Fremont, CA" 86 | 41940,953,81.6185988226307,14.89526965303884,9.17394906282425,3.525771200656891,"San Jose-Sunnyvale-Santa Clara, CA" 87 | 42660,2074,80.75202703475952,13.47257349371309,18.061460554599762,3.3027667552232742,"Seattle-Tacoma-Bellevue, WA" 88 | 43340,348,71.3694942443947,10.103105899847893,6.499981135129929,5.400945246219635,"Shreveport-Bossier City, LA" 89 | 43620,779,87.88308234629044,12.142397593309848,6.718858331441879,1.5787752345204353,"Sioux Falls, SD" 90 | 44140,329,76.38988113004535,11.67008693998488,19.20141726732254,3.9205625653266907,"Springfield, MA" 91 | 45300,1381,80.07387931664199,10.610162754510183,5.173010379076004,3.1794890761375427,"Tampa-St. Petersburg-Clearwater, FL" 92 | 46060,404,78.89750053607551,10.698571984447668,5.295708402991295,3.462519124150276,"Tucson, AZ" 93 | 46140,689,75.7468823465372,10.970973760494129,4.937415197491646,3.4558184444904327,"Tulsa, OK" 94 | 46520,1880,78.67490649223328,11.518248422348776,23.196369409561157,2.1576952189207077,"Urban Honolulu, HI" 95 | 47260,610,79.03242279533406,10.702793313510657,5.906898155808449,3.7961963564157486,"Virginia Beach-Norfolk-Newport News, VA-NC" 96 | 47900,6879,83.77501964569092,12.388502781187954,6.846601516008377,3.19642573595047,"Washington-Arlington-Alexandria, DC-VA-MD-WV" 97 | 48620,508,80.47649145669796,11.206759355175716,7.6338030397892,3.9441484957933426,"Wichita, KS" 98 | 49340,587,81.84417225443977,12.533613716629324,14.180564880371094,4.056357964873314,"Worcester, MA" 99 | -------------------------------------------------------------------------------- /micro/CBSA_Names.csv: -------------------------------------------------------------------------------- 1 | 10180,"Abilene, TX" 2 | 10380,"Aguadilla-Isabela-San Sebastián, PR" 3 | 10420,"Akron, OH" 4 | 10500,"Albany, GA" 5 | 10580,"Albany-Schenectady-Troy, NY" 6 | 10740,"Albuquerque, NM" 7 | 10780,"Alexandria, LA" 8 | 10900,"Allentown-Bethlehem-Easton, PA-NJ" 9 | 11020,"Altoona, PA" 10 | 11100,"Amarillo, TX" 11 | 11180,"Ames, IA" 12 | 11260,"Anchorage, AK" 13 | 11300,"Anderson, IN" 14 | 11340,"Anderson, SC" 15 | 11460,"Ann Arbor, MI" 16 | 11500,"Anniston-Oxford, AL" 17 | 11540,"Appleton, WI" 18 | 11700,"Asheville, NC" 19 | 12020,"Athens-Clarke County, GA" 20 | 12060,"Atlanta-Sandy Springs-Marietta, GA" 21 | 12100,"Atlantic City-Hammonton, NJ" 22 | 12220,"Auburn-Opelika, AL" 23 | 12260,"Augusta-Richmond County, GA-SC" 24 | 12420,"Austin-Round Rock-San Marcos, TX" 25 | 12540,"Bakersfield-Delano, CA" 26 | 12580,"Baltimore-Towson, MD" 27 | 12620,"Bangor, ME" 28 | 12700,"Barnstable Town, MA" 29 | 12940,"Baton Rouge, LA" 30 | 12980,"Battle Creek, MI" 31 | 13020,"Bay City, MI" 32 | 13140,"Beaumont-Port Arthur, TX" 33 | 13380,"Bellingham, WA" 34 | 13460,"Bend, OR" 35 | 13740,"Billings, MT" 36 | 13780,"Binghamton, NY" 37 | 13820,"Birmingham-Hoover, AL" 38 | 13900,"Bismarck, ND" 39 | 13980,"Blacksburg-Christiansburg-Radford, VA" 40 | 14020,"Bloomington, IN" 41 | 14060,"Bloomington-Normal, IL" 42 | 14260,"Boise City-Nampa, ID" 43 | 14460,"Boston-Cambridge-Quincy, MA-NH" 44 | 14500,"Boulder, CO" 45 | 14540,"Bowling Green, KY" 46 | 14740,"Bremerton-Silverdale, WA" 47 | 14860,"Bridgeport-Stamford-Norwalk, CT" 48 | 15180,"Brownsville-Harlingen, TX" 49 | 15260,"Brunswick, GA" 50 | 15380,"Buffalo-Niagara Falls, NY" 51 | 15500,"Burlington, NC" 52 | 15540,"Burlington-South Burlington, VT" 53 | 15940,"Canton-Massillon, OH" 54 | 15980,"Cape Coral-Fort Myers, FL" 55 | 16020,"Cape Girardeau-Jackson, MO-IL" 56 | 16180,"Carson City, NV" 57 | 16220,"Casper, WY" 58 | 16300,"Cedar Rapids, IA" 59 | 16580,"Champaign-Urbana, IL" 60 | 16620,"Charleston, WV" 61 | 16700,"Charleston-North Charleston-Summerville, SC" 62 | 16740,"Charlotte-Gastonia-Rock Hill, NC-SC" 63 | 16820,"Charlottesville, VA" 64 | 16860,"Chattanooga, TN-GA" 65 | 16940,"Cheyenne, WY" 66 | 16980,"Chicago-Joliet-Naperville, IL-IN-WI" 67 | 17020,"Chico, CA" 68 | 17140,"Cincinnati-Middletown, OH-KY-IN" 69 | 17300,"Clarksville, TN-KY" 70 | 17420,"Cleveland, TN" 71 | 17460,"Cleveland-Elyria-Mentor, OH" 72 | 17660,"Coeur d'Alene, ID" 73 | 17780,"College Station-Bryan, TX" 74 | 17820,"Colorado Springs, CO" 75 | 17860,"Columbia, MO" 76 | 17900,"Columbia, SC" 77 | 17980,"Columbus, GA-AL" 78 | 18020,"Columbus, IN" 79 | 18140,"Columbus, OH" 80 | 18580,"Corpus Christi, TX" 81 | 18700,"Corvallis, OR" 82 | 18880,"Crestview-Fort Walton Beach-Destin, FL" 83 | 19060,"Cumberland, MD-WV" 84 | 19100,"Dallas-Fort Worth-Arlington, TX" 85 | 19140,"Dalton, GA" 86 | 19180,"Danville, IL" 87 | 19260,"Danville, VA" 88 | 19340,"Davenport-Moline-Rock Island, IA-IL" 89 | 19380,"Dayton, OH" 90 | 19460,"Decatur, AL" 91 | 19500,"Decatur, IL" 92 | 19660,"Deltona-Daytona Beach-Ormond Beach, FL" 93 | 19740,"Denver-Aurora-Broomfield, CO" 94 | 19780,"Des Moines-West Des Moines, IA" 95 | 19820,"Detroit-Warren-Livonia, MI" 96 | 20020,"Dothan, AL" 97 | 20100,"Dover, DE" 98 | 20220,"Dubuque, IA" 99 | 20260,"Duluth, MN-WI" 100 | 20500,"Durham-Chapel Hill, NC" 101 | 20740,"Eau Claire, WI" 102 | 20940,"El Centro, CA" 103 | 21060,"Elizabethtown, KY" 104 | 21140,"Elkhart-Goshen, IN" 105 | 21300,"Elmira, NY" 106 | 21340,"El Paso, TX" 107 | 21500,"Erie, PA" 108 | 21660,"Eugene-Springfield, OR" 109 | 21780,"Evansville, IN-KY" 110 | 21820,"Fairbanks, AK" 111 | 21940,"Fajardo, PR" 112 | 22020,"Fargo, ND-MN" 113 | 22140,"Farmington, NM" 114 | 22180,"Fayetteville, NC" 115 | 22220,"Fayetteville-Springdale-Rogers, AR-MO" 116 | 22380,"Flagstaff, AZ" 117 | 22420,"Flint, MI" 118 | 22500,"Florence, SC" 119 | 22520,"Florence-Muscle Shoals, AL" 120 | 22540,"Fond du Lac, WI" 121 | 22660,"Fort Collins-Loveland, CO" 122 | 22900,"Fort Smith, AR-OK" 123 | 23060,"Fort Wayne, IN" 124 | 23420,"Fresno, CA" 125 | 23460,"Gadsden, AL" 126 | 23540,"Gainesville, FL" 127 | 23580,"Gainesville, GA" 128 | 24020,"Glens Falls, NY" 129 | 24140,"Goldsboro, NC" 130 | 24220,"Grand Forks, ND-MN" 131 | 24300,"Grand Junction, CO" 132 | 24340,"Grand Rapids-Wyoming, MI" 133 | 24500,"Great Falls, MT" 134 | 24540,"Greeley, CO" 135 | 24580,"Green Bay, WI" 136 | 24660,"Greensboro-High Point, NC" 137 | 24780,"Greenville, NC" 138 | 24860,"Greenville-Mauldin-Easley, SC" 139 | 25020,"Guayama, PR" 140 | 25060,"Gulfport-Biloxi, MS" 141 | 25180,"Hagerstown-Martinsburg, MD-WV" 142 | 25260,"Hanford-Corcoran, CA" 143 | 25420,"Harrisburg-Carlisle, PA" 144 | 25500,"Harrisonburg, VA" 145 | 25540,"Hartford-West Hartford-East Hartford, CT" 146 | 25620,"Hattiesburg, MS" 147 | 25860,"Hickory-Lenoir-Morganton, NC" 148 | 25980,"Hinesville-Fort Stewart, GA" 149 | 26100,"Holland-Grand Haven, MI" 150 | 26180,"Honolulu, HI" 151 | 26300,"Hot Springs, AR" 152 | 26380,"Houma-Bayou Cane-Thibodaux, LA" 153 | 26420,"Houston-Sugar Land-Baytown, TX" 154 | 26580,"Huntington-Ashland, WV-KY-OH" 155 | 26620,"Huntsville, AL" 156 | 26820,"Idaho Falls, ID" 157 | 26900,"Indianapolis-Carmel, IN" 158 | 26980,"Iowa City, IA" 159 | 27060,"Ithaca, NY" 160 | 27100,"Jackson, MI" 161 | 27140,"Jackson, MS" 162 | 27180,"Jackson, TN" 163 | 27260,"Jacksonville, FL" 164 | 27340,"Jacksonville, NC" 165 | 27500,"Janesville, WI" 166 | 27620,"Jefferson City, MO" 167 | 27740,"Johnson City, TN" 168 | 27780,"Johnstown, PA" 169 | 27860,"Jonesboro, AR" 170 | 27900,"Joplin, MO" 171 | 28020,"Kalamazoo-Portage, MI" 172 | 28100,"Kankakee-Bradley, IL" 173 | 28140,"Kansas City, MO-KS" 174 | 28420,"Kennewick-Pasco-Richland, WA" 175 | 28660,"Killeen-Temple-Fort Hood, TX" 176 | 28700,"Kingsport-Bristol-Bristol, TN-VA" 177 | 28740,"Kingston, NY" 178 | 28940,"Knoxville, TN" 179 | 29020,"Kokomo, IN" 180 | 29100,"La Crosse, WI-MN" 181 | 29140,"Lafayette, IN" 182 | 29180,"Lafayette, LA" 183 | 29340,"Lake Charles, LA" 184 | 29420,"Lake Havasu City-Kingman, AZ" 185 | 29460,"Lakeland-Winter Haven, FL" 186 | 29540,"Lancaster, PA" 187 | 29620,"Lansing-East Lansing, MI" 188 | 29700,"Laredo, TX" 189 | 29740,"Las Cruces, NM" 190 | 29820,"Las Vegas-Paradise, NV" 191 | 29940,"Lawrence, KS" 192 | 30020,"Lawton, OK" 193 | 30140,"Lebanon, PA" 194 | 30300,"Lewiston, ID-WA" 195 | 30340,"Lewiston-Auburn, ME" 196 | 30460,"Lexington-Fayette, KY" 197 | 30620,"Lima, OH" 198 | 30700,"Lincoln, NE" 199 | 30780,"Little Rock-North Little Rock-Conway, AR" 200 | 30860,"Logan, UT-ID" 201 | 30980,"Longview, TX" 202 | 31020,"Longview, WA" 203 | 31100,"Los Angeles-Long Beach-Santa Ana, CA" 204 | 31140,"Louisville/Jefferson County, KY-IN" 205 | 31180,"Lubbock, TX" 206 | 31340,"Lynchburg, VA" 207 | 31420,"Macon, GA" 208 | 31460,"Madera-Chowchilla, CA" 209 | 31540,"Madison, WI" 210 | 31700,"Manchester-Nashua, NH" 211 | 31740,"Manhattan, KS" 212 | 31860,"Mankato-North Mankato, MN" 213 | 31900,"Mansfield, OH" 214 | 32420,"Mayagüez, PR" 215 | 32580,"McAllen-Edinburg-Mission, TX" 216 | 32780,"Medford, OR" 217 | 32820,"Memphis, TN-MS-AR" 218 | 32900,"Merced, CA" 219 | 33100,"Miami-Fort Lauderdale-Pompano Beach, FL" 220 | 33140,"Michigan City-La Porte, IN" 221 | 33260,"Midland, TX" 222 | 33340,"Milwaukee-Waukesha-West Allis, WI" 223 | 33460,"Minneapolis-St. Paul-Bloomington, MN-WI" 224 | 33540,"Missoula, MT" 225 | 33660,"Mobile, AL" 226 | 33700,"Modesto, CA" 227 | 33740,"Monroe, LA" 228 | 33780,"Monroe, MI" 229 | 33860,"Montgomery, AL" 230 | 34060,"Morgantown, WV" 231 | 34100,"Morristown, TN" 232 | 34580,"Mount Vernon-Anacortes, WA" 233 | 34620,"Muncie, IN" 234 | 34740,"Muskegon-Norton Shores, MI" 235 | 34820,"Myrtle Beach-North Myrtle Beach-Conway, SC" 236 | 34900,"Napa, CA" 237 | 34940,"Naples-Marco Island, FL" 238 | 34980,"Nashville-Davidson--Murfreesboro--Franklin, TN" 239 | 35300,"New Haven-Milford, CT" 240 | 35380,"New Orleans-Metairie-Kenner, LA" 241 | 35620,"New York-Northern New Jersey-Long Island, NY-NJ-PA" 242 | 35660,"Niles-Benton Harbor, MI" 243 | 35840,"North Port-Bradenton-Sarasota, FL" 244 | 35980,"Norwich-New London, CT" 245 | 36100,"Ocala, FL" 246 | 36140,"Ocean City, NJ" 247 | 36220,"Odessa, TX" 248 | 36260,"Ogden-Clearfield, UT" 249 | 36420,"Oklahoma City, OK" 250 | 36500,"Olympia, WA" 251 | 36540,"Omaha-Council Bluffs, NE-IA" 252 | 36740,"Orlando-Kissimmee-Sanford, FL" 253 | 36780,"Oshkosh-Neenah, WI" 254 | 36980,"Owensboro, KY" 255 | 37100,"Oxnard-Thousand Oaks-Ventura, CA" 256 | 37340,"Palm Bay-Melbourne-Titusville, FL" 257 | 37380,"Palm Coast, FL" 258 | 37460,"Panama City-Lynn Haven-Panama City Beach, FL" 259 | 37620,"Parkersburg-Marietta-Vienna, WV-OH" 260 | 37700,"Pascagoula, MS" 261 | 37860,"Pensacola-Ferry Pass-Brent, FL" 262 | 37900,"Peoria, IL" 263 | 37980,"Philadelphia-Camden-Wilmington, PA-NJ-DE-MD" 264 | 38060,"Phoenix-Mesa-Glendale, AZ" 265 | 38220,"Pine Bluff, AR" 266 | 38300,"Pittsburgh, PA" 267 | 38340,"Pittsfield, MA" 268 | 38540,"Pocatello, ID" 269 | 38660,"Ponce, PR" 270 | 38860,"Portland-South Portland-Biddeford, ME" 271 | 38900,"Portland-Vancouver-Hillsboro, OR-WA" 272 | 38940,"Port St. Lucie, FL" 273 | 39100,"Poughkeepsie-Newburgh-Middletown, NY" 274 | 39140,"Prescott, AZ" 275 | 39300,"Providence-New Bedford-Fall River, RI-MA" 276 | 39340,"Provo-Orem, UT" 277 | 39380,"Pueblo, CO" 278 | 39460,"Punta Gorda, FL" 279 | 39540,"Racine, WI" 280 | 39580,"Raleigh-Cary, NC" 281 | 39660,"Rapid City, SD" 282 | 39740,"Reading, PA" 283 | 39820,"Redding, CA" 284 | 39900,"Reno-Sparks, NV" 285 | 40060,"Richmond, VA" 286 | 40140,"Riverside-San Bernardino-Ontario, CA" 287 | 40220,"Roanoke, VA" 288 | 40340,"Rochester, MN" 289 | 40380,"Rochester, NY" 290 | 40420,"Rockford, IL" 291 | 40580,"Rocky Mount, NC" 292 | 40660,"Rome, GA" 293 | 40900,"Sacramento--Arden-Arcade--Roseville, CA" 294 | 40980,"Saginaw-Saginaw Township North, MI" 295 | 41060,"St. Cloud, MN" 296 | 41100,"St. George, UT" 297 | 41140,"St. Joseph, MO-KS" 298 | 41180,"St. Louis, MO-IL" 299 | 41420,"Salem, OR" 300 | 41500,"Salinas, CA" 301 | 41540,"Salisbury, MD" 302 | 41620,"Salt Lake City, UT" 303 | 41660,"San Angelo, TX" 304 | 41700,"San Antonio-New Braunfels, TX" 305 | 41740,"San Diego-Carlsbad-San Marcos, CA" 306 | 41780,"Sandusky, OH" 307 | 41860,"San Francisco-Oakland-Fremont, CA" 308 | 41900,"San Germán-Cabo Rojo, PR" 309 | 41940,"San Jose-Sunnyvale-Santa Clara, CA" 310 | 41980,"San Juan-Caguas-Guaynabo, PR" 311 | 42020,"San Luis Obispo-Paso Robles, CA" 312 | 42060,"Santa Barbara-Santa Maria-Goleta, CA" 313 | 42100,"Santa Cruz-Watsonville, CA" 314 | 42140,"Santa Fe, NM" 315 | 42220,"Santa Rosa-Petaluma, CA" 316 | 42340,"Savannah, GA" 317 | 42540,"Scranton--Wilkes-Barre, PA" 318 | 42660,"Seattle-Tacoma-Bellevue, WA" 319 | 42680,"Sebastian-Vero Beach, FL" 320 | 43100,"Sheboygan, WI" 321 | 43300,"Sherman-Denison, TX" 322 | 43340,"Shreveport-Bossier City, LA" 323 | 43580,"Sioux City, IA-NE-SD" 324 | 43620,"Sioux Falls, SD" 325 | 43780,"South Bend-Mishawaka, IN-MI" 326 | 43900,"Spartanburg, SC" 327 | 44060,"Spokane, WA" 328 | 44100,"Springfield, IL" 329 | 44140,"Springfield, MA" 330 | 44180,"Springfield, MO" 331 | 44220,"Springfield, OH" 332 | 44300,"State College, PA" 333 | 44600,"Steubenville-Weirton, OH-WV" 334 | 44700,"Stockton, CA" 335 | 44940,"Sumter, SC" 336 | 45060,"Syracuse, NY" 337 | 45220,"Tallahassee, FL" 338 | 45300,"Tampa-St. Petersburg-Clearwater, FL" 339 | 45460,"Terre Haute, IN" 340 | 45500,"Texarkana, TX-Texarkana, AR" 341 | 45780,"Toledo, OH" 342 | 45820,"Topeka, KS" 343 | 45940,"Trenton-Ewing, NJ" 344 | 46060,"Tucson, AZ" 345 | 46140,"Tulsa, OK" 346 | 46220,"Tuscaloosa, AL" 347 | 46340,"Tyler, TX" 348 | 46540,"Utica-Rome, NY" 349 | 46660,"Valdosta, GA" 350 | 46700,"Vallejo-Fairfield, CA" 351 | 47020,"Victoria, TX" 352 | 47220,"Vineland-Millville-Bridgeton, NJ" 353 | 47260,"Virginia Beach-Norfolk-Newport News, VA-NC" 354 | 47300,"Visalia-Porterville, CA" 355 | 47380,"Waco, TX" 356 | 47580,"Warner Robins, GA" 357 | 47900,"Washington-Arlington-Alexandria, DC-VA-MD-WV" 358 | 47940,"Waterloo-Cedar Falls, IA" 359 | 48140,"Wausau, WI" 360 | 48300,"Wenatchee-East Wenatchee, WA" 361 | 48540,"Wheeling, WV-OH" 362 | 48620,"Wichita, KS" 363 | 48660,"Wichita Falls, TX" 364 | 48700,"Williamsport, PA" 365 | 48900,"Wilmington, NC" 366 | 49020,"Winchester, VA-WV" 367 | 49180,"Winston-Salem, NC" 368 | 49340,"Worcester, MA" 369 | 49420,"Yakima, WA" 370 | 49500,"Yauco, PR" 371 | 49620,"York-Hanover, PA" 372 | 49660,"Youngstown-Warren-Boardman, OH-PA" 373 | 49700,"Yuba City, CA" 374 | 49740,"Yuma, AZ" 375 | 14010,"Bloomington, IL" 376 | 15680,"California-Lexington Park, MD" 377 | 16060,"Carbondale-Marion, IL" 378 | 16540,"Chambersburg-Waynesboro, PA" 379 | 19300,"Daphne-Fairhope-Foley, AL" 380 | 20700,"East Stroudsburg, PA" 381 | 25940,"Hilton Head Island-Bluffton-Beaufort, SC" 382 | 27980,"Kahului-Wailuku-Lahaina, HI" 383 | 29200,"Lafayette-West Lafayette, IN" 384 | 31080,"Los Angeles-Long Beach-Anaheim, CA" 385 | 42200,"Santa Maria-Santa Barbara, CA" 386 | 46520,"Urban Honolulu, HI" 387 | 48060,"Watertown-Fort Drum, NY" 388 | -------------------------------------------------------------------------------- /micro/COVID_CPS.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#### COVID-19 Data example\n", 8 | "\n", 9 | "November 21, 2020\n", 10 | "\n", 11 | "Brian Dew, brian.w.dew@gmail.com" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "metadata": { 18 | "ExecuteTime": { 19 | "end_time": "2020-11-24T16:42:40.003203Z", 20 | "start_time": "2020-11-24T16:42:39.826235Z" 21 | } 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "import pandas as pd\n", 26 | "import os\n", 27 | "\n", 28 | "os.chdir('/home/brian/Documents/CPS/data/clean')" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": { 35 | "ExecuteTime": { 36 | "end_time": "2020-11-24T16:42:40.117996Z", 37 | "start_time": "2020-11-24T16:42:40.004267Z" 38 | } 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "cols = ['AGE', 'YEAR', 'MONTH', 'WBHAO', 'LFS', 'PTECON', 'UNEMPTYPE',\n", 43 | " 'FEMALE', 'EDUC', 'BASICWGT', 'PWSSWGT']\n", 44 | "\n", 45 | "covcols = cols + ['PTCOVID1', 'PTCOVID2', 'PTCOVID3', 'PTCOVID4']\n", 46 | "\n", 47 | "d20 = (pd.read_feather('cps2020.ft', columns=covcols)\n", 48 | " .query('MONTH in [8, 9, 10] and AGE >= 25 and AGE <= 34 and FEMALE == 1'))\n", 49 | "\n", 50 | "d19 = (pd.read_feather('cps2019.ft', columns=cols)\n", 51 | " .query('MONTH in [8, 9, 10] and AGE >= 25 and AGE <= 34 and FEMALE == 1'))" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 3, 57 | "metadata": { 58 | "ExecuteTime": { 59 | "end_time": "2020-11-24T16:42:40.130802Z", 60 | "start_time": "2020-11-24T16:42:40.119244Z" 61 | } 62 | }, 63 | "outputs": [ 64 | { 65 | "data": { 66 | "text/plain": [ 67 | "PTCOVID2\n", 68 | "1 0.136502\n", 69 | "2 0.863497\n", 70 | "Name: BASICWGT, dtype: float32" 71 | ] 72 | }, 73 | "execution_count": 3, 74 | "metadata": {}, 75 | "output_type": "execute_result" 76 | } 77 | ], 78 | "source": [ 79 | "df1 = d20.query('WBHAO == \"Black\" and EDUC == \"COLL\"')\n", 80 | "df1.groupby('PTCOVID2').BASICWGT.sum() / df1.BASICWGT.sum()" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 4, 86 | "metadata": { 87 | "ExecuteTime": { 88 | "end_time": "2020-11-24T16:42:40.138791Z", 89 | "start_time": "2020-11-24T16:42:40.131912Z" 90 | } 91 | }, 92 | "outputs": [ 93 | { 94 | "data": { 95 | "text/plain": [ 96 | "PTCOVID2\n", 97 | "1 0.080773\n", 98 | "2 0.919228\n", 99 | "Name: BASICWGT, dtype: float32" 100 | ] 101 | }, 102 | "execution_count": 4, 103 | "metadata": {}, 104 | "output_type": "execute_result" 105 | } 106 | ], 107 | "source": [ 108 | "df2 = d20.query('WBHAO == \"White\" and EDUC == \"HS\"')\n", 109 | "df2.groupby('PTCOVID2').BASICWGT.sum() / df2.BASICWGT.sum()" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 5, 115 | "metadata": { 116 | "ExecuteTime": { 117 | "end_time": "2020-11-24T16:42:40.143994Z", 118 | "start_time": "2020-11-24T16:42:40.139716Z" 119 | } 120 | }, 121 | "outputs": [ 122 | { 123 | "data": { 124 | "text/plain": [ 125 | "LFS\n", 126 | "Employed 0.820260\n", 127 | "NILF 0.131046\n", 128 | "Unemployed 0.048694\n", 129 | "nan 0.000000\n", 130 | "Name: BASICWGT, dtype: float32" 131 | ] 132 | }, 133 | "execution_count": 5, 134 | "metadata": {}, 135 | "output_type": "execute_result" 136 | } 137 | ], 138 | "source": [ 139 | "df1.groupby('LFS').BASICWGT.sum() / df1.BASICWGT.sum()" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 6, 145 | "metadata": { 146 | "ExecuteTime": { 147 | "end_time": "2020-11-24T16:42:40.152044Z", 148 | "start_time": "2020-11-24T16:42:40.144805Z" 149 | } 150 | }, 151 | "outputs": [ 152 | { 153 | "data": { 154 | "text/plain": [ 155 | "LFS\n", 156 | "Employed 0.625746\n", 157 | "NILF 0.315726\n", 158 | "Unemployed 0.058528\n", 159 | "nan 0.000000\n", 160 | "Name: BASICWGT, dtype: float32" 161 | ] 162 | }, 163 | "execution_count": 6, 164 | "metadata": {}, 165 | "output_type": "execute_result" 166 | } 167 | ], 168 | "source": [ 169 | "df2.groupby('LFS').BASICWGT.sum() / df2.BASICWGT.sum()" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 7, 175 | "metadata": { 176 | "ExecuteTime": { 177 | "end_time": "2020-11-24T16:42:40.157816Z", 178 | "start_time": "2020-11-24T16:42:40.153112Z" 179 | } 180 | }, 181 | "outputs": [ 182 | { 183 | "data": { 184 | "text/plain": [ 185 | "PTECON\n", 186 | "0.0 0.793797\n", 187 | "1.0 0.026463\n", 188 | "Name: BASICWGT, dtype: float32" 189 | ] 190 | }, 191 | "execution_count": 7, 192 | "metadata": {}, 193 | "output_type": "execute_result" 194 | } 195 | ], 196 | "source": [ 197 | "df1.groupby('PTECON').BASICWGT.sum() / df1.BASICWGT.sum()" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 8, 203 | "metadata": { 204 | "ExecuteTime": { 205 | "end_time": "2020-11-24T16:42:40.167495Z", 206 | "start_time": "2020-11-24T16:42:40.159144Z" 207 | } 208 | }, 209 | "outputs": [ 210 | { 211 | "data": { 212 | "text/plain": [ 213 | "PTECON\n", 214 | "0.0 0.591466\n", 215 | "1.0 0.034281\n", 216 | "Name: BASICWGT, dtype: float32" 217 | ] 218 | }, 219 | "execution_count": 8, 220 | "metadata": {}, 221 | "output_type": "execute_result" 222 | } 223 | ], 224 | "source": [ 225 | "df2.groupby('PTECON').BASICWGT.sum() / df2.BASICWGT.sum()" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": 9, 231 | "metadata": { 232 | "ExecuteTime": { 233 | "end_time": "2020-11-24T16:42:40.178463Z", 234 | "start_time": "2020-11-24T16:42:40.168596Z" 235 | } 236 | }, 237 | "outputs": [ 238 | { 239 | "data": { 240 | "text/plain": [ 241 | "PTCOVID3\n", 242 | "1 0.083241\n", 243 | "2 0.916759\n", 244 | "Name: BASICWGT, dtype: float32" 245 | ] 246 | }, 247 | "execution_count": 9, 248 | "metadata": {}, 249 | "output_type": "execute_result" 250 | } 251 | ], 252 | "source": [ 253 | "df1 = d20.query('WBHAO == \"Black\" and EDUC == \"COLL\" and PTCOVID2 == 1')\n", 254 | "df1.groupby('PTCOVID3').BASICWGT.sum() / df1.BASICWGT.sum()" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 10, 260 | "metadata": { 261 | "ExecuteTime": { 262 | "end_time": "2020-11-24T16:42:40.187200Z", 263 | "start_time": "2020-11-24T16:42:40.179425Z" 264 | } 265 | }, 266 | "outputs": [ 267 | { 268 | "data": { 269 | "text/plain": [ 270 | "PTCOVID3\n", 271 | "1 0.09266\n", 272 | "2 0.90734\n", 273 | "Name: BASICWGT, dtype: float32" 274 | ] 275 | }, 276 | "execution_count": 10, 277 | "metadata": {}, 278 | "output_type": "execute_result" 279 | } 280 | ], 281 | "source": [ 282 | "df2 = d20.query('WBHAO == \"White\" and EDUC == \"HS\" and PTCOVID2 == 1')\n", 283 | "df2.groupby('PTCOVID3').BASICWGT.sum() / df2.BASICWGT.sum()" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 11, 289 | "metadata": { 290 | "ExecuteTime": { 291 | "end_time": "2020-11-24T16:42:40.196355Z", 292 | "start_time": "2020-11-24T16:42:40.188920Z" 293 | } 294 | }, 295 | "outputs": [ 296 | { 297 | "data": { 298 | "text/plain": [ 299 | "PTCOVID4\n", 300 | "1 0.320237\n", 301 | "2 0.679763\n", 302 | "Name: BASICWGT, dtype: float32" 303 | ] 304 | }, 305 | "execution_count": 11, 306 | "metadata": {}, 307 | "output_type": "execute_result" 308 | } 309 | ], 310 | "source": [ 311 | "df1 = d20.query('WBHAO == \"Black\" and EDUC == \"COLL\" and PTCOVID4 > 0')\n", 312 | "df1.groupby('PTCOVID4').BASICWGT.sum() / df1.BASICWGT.sum()" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": 12, 318 | "metadata": { 319 | "ExecuteTime": { 320 | "end_time": "2020-11-24T16:42:40.205472Z", 321 | "start_time": "2020-11-24T16:42:40.197735Z" 322 | } 323 | }, 324 | "outputs": [ 325 | { 326 | "data": { 327 | "text/plain": [ 328 | "PTCOVID4\n", 329 | "1 0.067293\n", 330 | "2 0.932707\n", 331 | "Name: BASICWGT, dtype: float32" 332 | ] 333 | }, 334 | "execution_count": 12, 335 | "metadata": {}, 336 | "output_type": "execute_result" 337 | } 338 | ], 339 | "source": [ 340 | "df2 = d20.query('WBHAO == \"White\" and EDUC == \"HS\" and PTCOVID4 > 0')\n", 341 | "df2.groupby('PTCOVID4').BASICWGT.sum() / df2.BASICWGT.sum()" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": null, 347 | "metadata": { 348 | "ExecuteTime": { 349 | "end_time": "2020-11-24T16:42:40.257410Z", 350 | "start_time": "2020-11-24T16:42:40.206412Z" 351 | } 352 | }, 353 | "outputs": [], 354 | "source": [] 355 | }, 356 | { 357 | "cell_type": "code", 358 | "execution_count": null, 359 | "metadata": {}, 360 | "outputs": [], 361 | "source": [] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": null, 366 | "metadata": {}, 367 | "outputs": [], 368 | "source": [] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": null, 373 | "metadata": {}, 374 | "outputs": [], 375 | "source": [] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": null, 380 | "metadata": {}, 381 | "outputs": [], 382 | "source": [] 383 | } 384 | ], 385 | "metadata": { 386 | "kernelspec": { 387 | "display_name": "Python 3", 388 | "language": "python", 389 | "name": "python3" 390 | }, 391 | "language_info": { 392 | "codemirror_mode": { 393 | "name": "ipython", 394 | "version": 3 395 | }, 396 | "file_extension": ".py", 397 | "mimetype": "text/x-python", 398 | "name": "python", 399 | "nbconvert_exporter": "python", 400 | "pygments_lexer": "ipython3", 401 | "version": "3.8.6" 402 | } 403 | }, 404 | "nbformat": 4, 405 | "nbformat_minor": 4 406 | } 407 | -------------------------------------------------------------------------------- /micro/CPS-ASEC_poverty.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### CPS ASEC - Replicating Poverty Estimates\n", 8 | "\n", 9 | "Brian Dew\n", 10 | "\n", 11 | "Updated September 15, 2020 for 2019 data\n", 12 | "\n", 13 | "----\n", 14 | "\n", 15 | "\n", 16 | "Replicate official poverty rate and number of people in poverty. Then replicate the same for the SPM.\n", 17 | "\n", 18 | "Next replicate Matt Bruenig data--grouping and market income" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": { 25 | "ExecuteTime": { 26 | "end_time": "2020-12-13T02:00:58.429915Z", 27 | "start_time": "2020-12-13T02:00:58.259743Z" 28 | } 29 | }, 30 | "outputs": [], 31 | "source": [ 32 | "import re\n", 33 | "import pandas as pd\n", 34 | "import numpy as np" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 2, 40 | "metadata": { 41 | "ExecuteTime": { 42 | "end_time": "2020-12-13T02:01:03.335260Z", 43 | "start_time": "2020-12-13T02:00:58.431164Z" 44 | } 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "# Extract person records for selected variables\n", 49 | "variables = ['PRECORD', 'A_AGE', 'MARSUPWT', 'PERLIS', \n", 50 | " 'SPM_Resources', 'SPM_PovThreshold', 'WKSWORK',\n", 51 | " 'PRDISFLG', 'RSNNOTW', 'PYRSN', 'LKWEEKS',\n", 52 | " 'SPM_Weight', 'SPM_ID', 'SPM_CapWkCCXpns',\n", 53 | " 'SPM_Totval', 'SPM_MedXpns', 'SPM_ChildSupPd',\n", 54 | " 'SPM_Poor']\n", 55 | "\n", 56 | "benefits = ['SS_VAL', 'SSI_VAL', 'UC_VAL', 'VET_VAL', \n", 57 | " 'WC_VAL', 'PAW_VAL']\n", 58 | "\n", 59 | "path = '/home/brian/Documents/ASEC/'\n", 60 | "\n", 61 | "dictfile = f'{path}/data/persfmt.txt'\n", 62 | "dd = open(dictfile).read()\n", 63 | "p = f'({\"|\".join(variables + benefits)})\\s+(\\d+)\\s+(\\d+)\\s'\n", 64 | "cols = {name: (int(start) - 1, int(start) - 1 + int(length)) \n", 65 | " for name, length, start in re.findall(p, dd)}\n", 66 | "\n", 67 | "# Manually adjust because 2020 text dict missing\n", 68 | "cols['SPM_ID'] = (1419-1, 1419-1+8)\n", 69 | "cols['SPM_CapWkCCXpns'] = (1437-1, 1437-1+6)\n", 70 | "cols['SPM_ChildSupPd'] = (1449-1, 1449-1+6)\n", 71 | "cols['SPM_MedXpns'] = (1500-1, 1500-1+7)\n", 72 | "cols['SPM_Poor'] = (1513-1, 1513-1+1)\n", 73 | "cols['SPM_PovThreshold'] = (1514-1, 1514-1+5)\n", 74 | "cols['SPM_Resources'] = (1519-1, 1519-1+7)\n", 75 | "cols['SPM_Totval'] = (1542-1, 1542-1+7)\n", 76 | "cols['SPM_Weight'] = (1550-1, 1550-1+7)\n", 77 | "cols['PERLIS'] = (914-1, 914-1+2)\n", 78 | "cols['PYRSN'] = (322-1, 322-1+1)\n", 79 | "cols['WKSWORK'] = (337-1, 337-1+2)\n", 80 | "cols['LKWEEKS'] = (304-1, 304-1+2)\n", 81 | "cols['RSNNOTW'] = (323-1, 323-1+1)\n", 82 | "cols['SS_VAL'] = (623-1, 623-1+5)\n", 83 | "cols['SSI_VAL'] = (629-1, 629-1+5)\n", 84 | "cols['UC_VAL'] = (661-1, 661-1+5)\n", 85 | "cols['VET_VAL'] = (673-1, 673-1+6)\n", 86 | "cols['WC_VAL'] = (681-1, 681-1+5)\n", 87 | "cols['PAW_VAL'] = (690-1, 690-1+5)\n", 88 | "\n", 89 | "datafile = f'{path}/data/asec2020_pubuse.dat'\n", 90 | "df = (pd.read_fwf(datafile, \n", 91 | " colspecs=list(cols.values()), \n", 92 | " header=None, \n", 93 | " names=list(cols.keys()))\n", 94 | " .query('PRECORD == 3'))" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 3, 100 | "metadata": { 101 | "ExecuteTime": { 102 | "end_time": "2020-12-13T02:01:03.401055Z", 103 | "start_time": "2020-12-13T02:01:03.336564Z" 104 | } 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "# Categorize population\n", 109 | "df['Category'] = (np.where(df.A_AGE < 18, 'Children', \n", 110 | " np.where(df.A_AGE > 64, 'Elderly', \n", 111 | " np.where(((df.PRDISFLG == 1) | (df.PYRSN == 1) | (df.RSNNOTW == 1)), 'Disabled', \n", 112 | " np.where(((df.PYRSN == 3) | (df.RSNNOTW == 4)), 'Student', \n", 113 | " np.where(((df.PYRSN == 2) | (df.RSNNOTW == 3)), 'Carers',\n", 114 | " np.where(((df.PYRSN == 5) | (df.RSNNOTW == 5) | (df.LKWEEKS > 0)), 'Unemployed', \n", 115 | " np.where(((df.PYRSN == 4) | (df.RSNNOTW == 2)), 'Early Retired', \n", 116 | " np.where(df.WKSWORK > 49, 'Fully Employed', 'All Other')))))))))\n", 117 | "\n", 118 | "df['SPM'] = np.where(df['SPM_Resources'] < df['SPM_PovThreshold'], 1, 0)\n", 119 | "df['OPM'] = np.where(df['PERLIS'] == 1, 1, 0)\n", 120 | "df['MARKET_INCOME'] = (df['SPM_Resources'] - \n", 121 | " df[['SPM_CapWkCCXpns','SPM_MedXpns', 'SPM_ChildSupPd']].sum(axis=1) - \n", 122 | " df[benefits].sum(axis=1).groupby(df['SPM_ID']).transform('sum'))\n", 123 | "df['SPM_MI'] = np.where(df['MARKET_INCOME'] < df['SPM_PovThreshold'], 1, 0)" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 4, 129 | "metadata": { 130 | "ExecuteTime": { 131 | "end_time": "2020-12-13T02:01:03.463061Z", 132 | "start_time": "2020-12-13T02:01:03.402182Z" 133 | } 134 | }, 135 | "outputs": [ 136 | { 137 | "name": "stdout", 138 | "output_type": "stream", 139 | "text": [ 140 | " SPM OPM SPM_MI\n", 141 | "Category \n", 142 | "Children 23.552475 30.795875 18.317198\n", 143 | "Elderly 19.033379 14.295022 32.765160\n", 144 | "Fully Employed 15.779747 10.050124 17.645154\n", 145 | "Disabled 13.183932 15.317319 12.326213\n", 146 | "Carers 8.629066 10.026422 5.276517\n", 147 | "Student 8.063140 8.087707 4.736417\n", 148 | "Unemployed 5.738762 5.819194 3.992129\n", 149 | "Early Retired 3.626288 3.407859 3.298225\n", 150 | "All Other 2.393210 2.200479 1.642987\n" 151 | ] 152 | } 153 | ], 154 | "source": [ 155 | "results = pd.DataFrame()\n", 156 | "\n", 157 | "# Group share of poor people\n", 158 | "results['SPM'] = (df.query('SPM == 1').groupby('Category').SPM_Weight.sum() / \n", 159 | " df.query('SPM == 1').SPM_Weight.sum() * 100).sort_values(ascending=False)\n", 160 | "\n", 161 | "results['OPM'] = (df.query('OPM == 1').groupby('Category').MARSUPWT.sum() / \n", 162 | " df.query('OPM == 1').MARSUPWT.sum() * 100).sort_values(ascending=False)\n", 163 | "\n", 164 | "\n", 165 | "results['SPM_MI'] = (df.query('SPM_MI == 1').groupby('Category').SPM_Weight.sum() / \n", 166 | " df.query('SPM_MI == 1').SPM_Weight.sum() * 100).sort_values(ascending=False)\n", 167 | "\n", 168 | "print(results)" 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": { 174 | "ExecuteTime": { 175 | "end_time": "2020-12-13T02:01:03.467090Z", 176 | "start_time": "2020-12-13T02:01:03.464119Z" 177 | } 178 | }, 179 | "source": [ 180 | " SPM OPM SPM_MI\n", 181 | "Category \n", 182 | "Children 23.552475 30.795875 21.062952\n", 183 | "Elderly 19.033379 14.295022 33.935611\n", 184 | "Fully Employed 15.779747 10.050124 12.264199\n", 185 | "Disabled 13.183932 15.317319 13.416647\n", 186 | "Carers 8.629066 10.026422 5.671404\n", 187 | "Student 8.063140 8.087707 4.775935\n", 188 | "Unemployed 5.738762 5.819194 3.933209\n", 189 | "Early Retired 3.626288 3.407859 3.468531\n", 190 | "All Other 2.393210 2.200479 1.471512" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 6, 196 | "metadata": { 197 | "ExecuteTime": { 198 | "end_time": "2020-12-13T02:01:12.898413Z", 199 | "start_time": "2020-12-13T02:01:12.819954Z" 200 | } 201 | }, 202 | "outputs": [ 203 | { 204 | "name": "stdout", 205 | "output_type": "stream", 206 | "text": [ 207 | " SPM OPM SPM_MI\n", 208 | "Category \n", 209 | "Children 12.406551 14.307068 24.287481\n", 210 | "Elderly 12.751231 8.890741 55.253194\n", 211 | "Fully Employed 4.857504 2.780689 13.672485\n", 212 | "Disabled 23.521702 24.229192 55.355702\n", 213 | "Carers 21.369661 22.580328 32.891986\n", 214 | "Student 22.411029 18.548014 33.137247\n", 215 | "Unemployed 20.887578 18.172532 36.574913\n", 216 | "Early Retired 19.312494 16.703581 44.214559\n", 217 | "All Other 16.977166 13.751578 29.337783\n" 218 | ] 219 | } 220 | ], 221 | "source": [ 222 | "results2 = pd.DataFrame()\n", 223 | "\n", 224 | "# Poverty rate of each group\n", 225 | "results2['SPM'] = (df.groupby('Category')\n", 226 | " .apply(lambda x: np.average(x['SPM'], weights=x['SPM_Weight']))\n", 227 | " * 100).loc[results.index]\n", 228 | "results2['OPM'] = (df.groupby('Category')\n", 229 | " .apply(lambda x: np.average(x['OPM'], weights=x['MARSUPWT']))\n", 230 | " * 100).loc[results.index]\n", 231 | "\n", 232 | "results2['SPM_MI'] = (df.groupby('Category')\n", 233 | " .apply(lambda x: np.average(x['SPM_MI'], weights=x['SPM_Weight']))\n", 234 | " * 100).loc[results.index]\n", 235 | "\n", 236 | "print(results2)" 237 | ] 238 | }, 239 | { 240 | "cell_type": "markdown", 241 | "metadata": { 242 | "ExecuteTime": { 243 | "end_time": "2020-12-13T02:01:03.468906Z", 244 | "start_time": "2020-12-13T02:00:58.270Z" 245 | } 246 | }, 247 | "source": [ 248 | " SPM OPM SPM_MI\n", 249 | "Category \n", 250 | "Children 12.406551 14.307068 22.143021\n", 251 | "Elderly 12.751231 8.890741 45.372737\n", 252 | "Fully Employed 4.857504 2.780689 7.534517\n", 253 | "Disabled 23.521702 24.229192 47.771721\n", 254 | "Carers 21.369661 22.580328 28.030288\n", 255 | "Student 22.411029 18.548014 26.492266\n", 256 | "Unemployed 20.887578 18.172532 28.570644\n", 257 | "Early Retired 19.312494 16.703581 36.865899\n", 258 | "All Other 16.977166 13.751578 20.832978\n" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": null, 264 | "metadata": {}, 265 | "outputs": [], 266 | "source": [] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": null, 271 | "metadata": {}, 272 | "outputs": [], 273 | "source": [] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": null, 278 | "metadata": {}, 279 | "outputs": [], 280 | "source": [] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": null, 285 | "metadata": {}, 286 | "outputs": [], 287 | "source": [] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": null, 292 | "metadata": {}, 293 | "outputs": [], 294 | "source": [] 295 | } 296 | ], 297 | "metadata": { 298 | "kernelspec": { 299 | "display_name": "Python 3", 300 | "language": "python", 301 | "name": "python3" 302 | }, 303 | "language_info": { 304 | "codemirror_mode": { 305 | "name": "ipython", 306 | "version": 3 307 | }, 308 | "file_extension": ".py", 309 | "mimetype": "text/x-python", 310 | "name": "python", 311 | "nbconvert_exporter": "python", 312 | "pygments_lexer": "ipython3", 313 | "version": "3.8.6" 314 | } 315 | }, 316 | "nbformat": 4, 317 | "nbformat_minor": 2 318 | } 319 | -------------------------------------------------------------------------------- /micro/CPS_Example_Notebook_UPDATED.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### Current Population Survey Microdata with Python\n", 8 | "\n", 9 | "\n", 10 | "*Brian Dew, @bd_econ, July 7, 2019*\n", 11 | "\n", 12 | "The following is a minimal working example of using python and the struct method to read CPS microdata. It requires two files to be saved locally: the data dictionary, `January_2017_Record_Layout.txt`, and the large (13MB compressed, 146MB uncompressed) CPS public use microdata file covering April 2017, `apr17pub.dat`. Both are downloaded from the [Census CPS page](https://www.census.gov/data/datasets/time-series/demo/cps/cps-basic.html).\n", 13 | "\n", 14 | "First, the Census data dictionary file identifies where the relevant variables are located in the microdata file. Because the microdata file is fixed-width format with no headers, there is nothing inside of the file that tells what each variable is or where it is stored--hence the need for a separate dictionary.\n", 15 | "\n", 16 | "Once the data structure is identified, the relevant parts of each row of microdata are extracted into a big list. Pandas, which makes further data cleaning and calculations easier, is able to quickly read this list. To confirm that the data are loaded properly, the example successfully benchmarks the results of a local calculation of what share of women age 25-54 are employed in April 2017 against the published BLS estimate. \n", 17 | "\n", 18 | "The code benefits greatly from the excellent four-part series ([1](https://tomaugspurger.github.io/tackling%20the%20cps.html), [2](https://tomaugspurger.github.io/tackling%20the%20cps%20%28part%202%29.html), [3](https://tomaugspurger.github.io/tackling%20the%20cps%20%28part%203%29.html), [4](https://tomaugspurger.github.io/tackling%20the%20cps%20%28part%204%29.html)) by Tom Augspurger and from very patient guidance with the CPS from John Schmitt.\n", 19 | "\n", 20 | "Grateful for any advice on making the code better!" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "#### Requirements:\n", 28 | "\n", 29 | "Python (3.6 used, probably works on 2.7+), pandas, and numpy\n", 30 | "\n", 31 | "Two external files: \n", 32 | "\n", 33 | " 1) January_2017_Record_Layout.txt (a data dictionary text file: https://www2.census.gov/programs-surveys/cps/datasets/2017/basic/January_2017_Record_Layout.txt); and \n", 34 | " 2) apr17pub.dat (*CAUTION*: large public use microdata file [13MB compressed, 146MB uncompressed]: https://www2.census.gov/programs-surveys/cps/datasets/2017/basic/apr17pub.zip)" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 1, 40 | "metadata": { 41 | "ExecuteTime": { 42 | "end_time": "2019-07-07T14:46:53.160437Z", 43 | "start_time": "2019-07-07T14:46:52.913529Z" 44 | }, 45 | "code_folding": [] 46 | }, 47 | "outputs": [ 48 | { 49 | "name": "stdout", 50 | "output_type": "stream", 51 | "text": [ 52 | "['January_2017_Record_Layout.txt', 'apr17pub.dat']\n" 53 | ] 54 | } 55 | ], 56 | "source": [ 57 | "# Import relevant libraries (python 3.7)\n", 58 | "import os, re, struct\n", 59 | "import pandas as pd\n", 60 | "import numpy as np\n", 61 | "\n", 62 | "# check that two required files are saved locally\n", 63 | "os.chdir('cps_example/')\n", 64 | "print(os.listdir(os.curdir))" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "#### CPS data dictionary\n", 72 | "\n", 73 | "Census posts a [file](https://thedataweb.rm.census.gov/pub/cps/basic/201701-/January_2017_Record_Layout.txt) that describes how to read the fixed-width format CPS microdata files. This \"data dictionary\" file can be machine read. In this example, the January 2017 data dictionary text file is saved locally then used to manually find and type out the \"names\" of four variables: `PRTAGE` for age, `PESEX` for gender, `PREMPNOT` for employment status, and `PWCMPWGT` for the sample weight. " 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 2, 79 | "metadata": { 80 | "ExecuteTime": { 81 | "end_time": "2019-07-07T14:46:53.166088Z", 82 | "start_time": "2019-07-07T14:46:53.162074Z" 83 | } 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "# read data dictionary text file \n", 88 | "data_dict = open('January_2017_Record_Layout.txt', 'r', encoding='iso-8859-1').read()\n", 89 | "\n", 90 | "# manually list out the IDs for series of interest \n", 91 | "var_names = ['PRTAGE', 'PESEX', 'PREMPNOT', 'PWCMPWGT']" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "#### Regular expressions\n", 99 | "\n", 100 | "The data dictionary text file follows a pattern that makes it machine readable. In this case, specific rows of interest follow the pattern of: VariableName [space] VariableLength [space] VariableDescription [space] VariableStart - VariableEnd. The regex pattern that captures the data of interest is identified by `p`.\n", 101 | "\n", 102 | "The python numbering system starts at zero and does not include the last number from a range. As a result, the start location of a variable, as identified in the data dictionary, is adjusted by -1. Additionally, the length of the variable value, its \"width\" in the dataset, is stored as a string ending in `s`, the struct format code for a byte containing a single character. This will be used later in reading the microdata" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 3, 108 | "metadata": { 109 | "ExecuteTime": { 110 | "end_time": "2019-07-07T14:46:53.175482Z", 111 | "start_time": "2019-07-07T14:46:53.167593Z" 112 | } 113 | }, 114 | "outputs": [ 115 | { 116 | "name": "stdout", 117 | "output_type": "stream", 118 | "text": [ 119 | "{'PRTAGE': [121, 123, '2s'], 'PESEX': [128, 130, '2s'], 'PREMPNOT': [392, 394, '2s'], 'PWCMPWGT': [845, 855, '10s']}\n" 120 | ] 121 | } 122 | ], 123 | "source": [ 124 | "# regular expression matching series name and data dict pattern\n", 125 | "p = f'\\n({\"|\".join(var_names)})\\s+(\\d+)\\s+.*?\\t+.*?(\\d\\d*).*?(\\d\\d+)'\n", 126 | "\n", 127 | "# dictionary of variable name: [start, end, and length + 's']\n", 128 | "d = {s[0]: [int(s[2])-1, int(s[3]), f'{s[1]}s']\n", 129 | " for s in re.findall(p, data_dict)}\n", 130 | "\n", 131 | "print(d)" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "#### Struct method\n", 139 | "\n", 140 | "There are at least three ways to read a fixed-width format file in python: 1) string slicing on each row for each variable; 2) pandas.read_fwf(file, colspecs); or 3) use struct and a format string to identify what part of each CPS row to keep as a variable and what part to ignore. The pandas method is the easiest to use, but the slowest. The struct method is the most difficult to use but the fastest. If you need to read 40 variables for all months going back to 1994, you might want to consider writing out a script that applies the struct method.\n", 141 | "\n", 142 | "Here is a minimal example:" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 4, 148 | "metadata": { 149 | "ExecuteTime": { 150 | "end_time": "2019-07-07T14:46:53.182220Z", 151 | "start_time": "2019-07-07T14:46:53.176793Z" 152 | } 153 | }, 154 | "outputs": [ 155 | { 156 | "name": "stdout", 157 | "output_type": "stream", 158 | "text": [ 159 | "121x2s5x2s262x2s451x10s\n" 160 | ] 161 | } 162 | ], 163 | "source": [ 164 | "# lists of variable starts, ends, and lengths\n", 165 | "start, end, width = zip(*d.values())\n", 166 | "\n", 167 | "# create list of which characters to skip in each row\n", 168 | "skip = ([f'{s - e}x' for s, e in zip(start, [0] + list(end[:-1]))])\n", 169 | "\n", 170 | "# create format string by joining skip and variable segments\n", 171 | "unpack_fmt = ''.join([j for i in zip(skip, width) for j in i])\n", 172 | "print(unpack_fmt)\n", 173 | "\n", 174 | "# struct can interpret row bytes with the format string\n", 175 | "unpacker = struct.Struct(unpack_fmt).unpack_from" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "#### Fixed width format\n", 183 | "\n", 184 | "The string format above, called `unpack_fmt`, can be read as: ignore the first 121 characters (`121x`), store the next two (`2s`), ignore the next five (`5x`), store the next two (`2s`), and so on. This format matches the structure of each line of the fwf data. Here's the first row of the raw CPS microdata, as an example:" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 5, 190 | "metadata": { 191 | "ExecuteTime": { 192 | "end_time": "2019-07-07T14:46:53.187929Z", 193 | "start_time": "2019-07-07T14:46:53.183598Z" 194 | } 195 | }, 196 | "outputs": [ 197 | { 198 | "name": "stdout", 199 | "output_type": "stream", 200 | "text": [ 201 | "000110116792163 42017 120100-1 1 1-1 115-1-1-1 15049796 1 2 1 7 2 0 205011 2 1 1-1-1-1 36 01 338600001103000 -1-1 1-1420 1 2 1 2-1 243 1-1 9-1 1-1 1 1 1 2 1 2 57 57 57 1 0 0 1 1 1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1 2-150-1-1 50-1-1-1-1 2-1 2-150-1 50-1-1 2 5 5-1 2 3 5 2-1-1-1-1-1 -1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1 -1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1 1-121 1 1 1 6-1-1-1 -1-1-1 1 2-1-1-1-1 1 2 1 6 4 -1-1 4 3 3 1 2 4-1-1 6-138-114-1 1 9-1 3-1 2 1 1 1 0-1-1-1-1 -1 -1 -1 -10-1 -10-1-1 -1 -10-1-1-1-1-1-1-1-1-1 2-1-1 2 15049796 22986106 0 16044411 15280235 0 0 1-1-1-1 0 0 1 0-1 050 0 0 0 0 1 0 0 0-1-1-1 1 0 0-1 1 1 0 1 0 1 1 0 1 1 1 0 1 0 1 1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1 0 0 0-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1 0 1 1 3865 1-1-1-1-1-1-1 1 1 1-1-1-1 1573071277704210 -1 -114-1-1-1-1-1 0-1-1-1-1-15050 1 1 1 2 2 2 2 2 2 2 0 0 0 0 0 0 0-1-1-1-1-1 1 1 1202020 A\n", 202 | "\n" 203 | ] 204 | } 205 | ], 206 | "source": [ 207 | "print(open('apr17pub.dat').readline())" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": {}, 213 | "source": [ 214 | "If we ignore the first 121 characters and then keep the next two, we find `42` which is the age of the person in the first row of the microdata. " 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": {}, 220 | "source": [ 221 | "#### Read the raw microdata \n", 222 | "\n", 223 | "First, open the raw CPS microdata file with the readlines method. The raw data is filtered to only include observations with a positive sample weight. The unpacker identified above is applied to each row of microdata, while the extracted variable values are converted to integers from strings.\n", 224 | "\n", 225 | "The first five observations are printed. They are much more readable than the raw data, but still not identified by name." 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": 6, 231 | "metadata": { 232 | "ExecuteTime": { 233 | "end_time": "2019-07-07T14:46:53.528130Z", 234 | "start_time": "2019-07-07T14:46:53.189154Z" 235 | } 236 | }, 237 | "outputs": [ 238 | { 239 | "name": "stdout", 240 | "output_type": "stream", 241 | "text": [ 242 | "[[42, 1, 1, 15730712], [26, 2, 1, 14582612], [25, 2, 1, 20672047], [42, 2, 4, 15492377], [47, 1, 1, 18155638]]\n" 243 | ] 244 | } 245 | ], 246 | "source": [ 247 | "# open file (read as binary) and read lines into \"raw_data\"\n", 248 | "raw_data = open('apr17pub.dat', 'rb').readlines()\n", 249 | "\n", 250 | "wgt = d['PWCMPWGT'] # Location of sample weight variable\n", 251 | "\n", 252 | "# unpack and store data of interest if sample weight > 0\n", 253 | "data = [[*map(int, unpacker(row))] for row in raw_data\n", 254 | " if int(row[wgt[0]:wgt[1]]) > 0]\n", 255 | "\n", 256 | "print(data[:5])" 257 | ] 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "metadata": {}, 262 | "source": [ 263 | "#### Create pandas dataframe from CPS data\n", 264 | "\n", 265 | "Pandas is a useful tool for working with the data--here is it used to filter out the observations by age and gender. I also convert the sample weights to the values they are intended to represent, as the data dictionary indicates that they have four implied decimal places. That is, the first person in the sample below represents \"1,458.2612\" people in the US population. " 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": 7, 271 | "metadata": { 272 | "ExecuteTime": { 273 | "end_time": "2019-07-07T14:46:53.606959Z", 274 | "start_time": "2019-07-07T14:46:53.529470Z" 275 | } 276 | }, 277 | "outputs": [ 278 | { 279 | "name": "stdout", 280 | "output_type": "stream", 281 | "text": [ 282 | " PRTAGE PESEX PREMPNOT PWCMPWGT\n", 283 | " 26 2 1 1458.2612\n", 284 | " 25 2 1 2067.2047\n", 285 | " 42 2 4 1549.2377\n", 286 | " 49 2 1 1633.0038\n", 287 | " 26 2 1 1611.2316\n" 288 | ] 289 | } 290 | ], 291 | "source": [ 292 | "# Pandas dataframe of women age 25 to 54\n", 293 | "df = (pd.DataFrame(data, columns=d.keys())\n", 294 | " .query('PESEX == 2 and 25 <= PRTAGE <= 54')\n", 295 | " .assign(PWCMPWGT = lambda x: x['PWCMPWGT'] / 10000))\n", 296 | "\n", 297 | "print(df.head().to_string(index=False))" 298 | ] 299 | }, 300 | { 301 | "cell_type": "markdown", 302 | "metadata": {}, 303 | "source": [ 304 | "#### Benchmarking against BLS published data\n", 305 | "\n", 306 | "The [published value](https://data.bls.gov/timeseries/LNU02300062) for April 2017 is 72.3%" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": 8, 312 | "metadata": { 313 | "ExecuteTime": { 314 | "end_time": "2019-07-07T14:46:53.612687Z", 315 | "start_time": "2019-07-07T14:46:53.608522Z" 316 | } 317 | }, 318 | "outputs": [ 319 | { 320 | "name": "stdout", 321 | "output_type": "stream", 322 | "text": [ 323 | "April 2017: 72.3%\n" 324 | ] 325 | } 326 | ], 327 | "source": [ 328 | "# identify employed portion of group as 1 & the rest as 0\n", 329 | "empl = np.where(df['PREMPNOT'] == 1, 1, 0)\n", 330 | "\n", 331 | "# take sample weighted average of employed portion of group\n", 332 | "epop = np.average(empl, weights=df['PWCMPWGT'])\n", 333 | "\n", 334 | "# print out the result to check against LNU02300062\n", 335 | "print(f'April 2017: {epop*100:.1f}%')" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": null, 341 | "metadata": {}, 342 | "outputs": [], 343 | "source": [] 344 | } 345 | ], 346 | "metadata": { 347 | "kernelspec": { 348 | "display_name": "Python 3 (ipykernel)", 349 | "language": "python", 350 | "name": "python3" 351 | }, 352 | "language_info": { 353 | "codemirror_mode": { 354 | "name": "ipython", 355 | "version": 3 356 | }, 357 | "file_extension": ".py", 358 | "mimetype": "text/x-python", 359 | "name": "python", 360 | "nbconvert_exporter": "python", 361 | "pygments_lexer": "ipython3", 362 | "version": "3.8.13" 363 | } 364 | }, 365 | "nbformat": 4, 366 | "nbformat_minor": 2 367 | } 368 | -------------------------------------------------------------------------------- /micro/README.md: -------------------------------------------------------------------------------- 1 | # micro 2 | 3 | ## BD Economics microdata examples 4 | 5 | Updated: March 2, 2019 6 | 7 | Contact: Brian Dew, twitter: @bd_econ; email: brian.w.dew@gmail.com 8 | 9 | Goal: Working with public-use microdata using jupyter notebooks and python. 10 | 11 | Some examples use the [bd CPS](https://github.com/bdecon/econ_data/tree/master/bd_CPS), a set of annual CPS extracts. 12 | 13 | ------ 14 | 15 | ### Contents 16 | 17 | - [CPS-ASEC_median_income.ipynb](https://github.com/bdecon/econ_data/blob/master/micro/CPS-ASEC_median_income.ipynb): Replicates (closely) the published median wage estimate from the Annual Social and Economic Supplement to the Current Population Survey. [nbviewer](https://nbviewer.jupyter.org/github/bdecon/econ_data/blob/master/micro/CPS-ASEC_median_income.ipynb) 18 | 19 | - [CPS_Disability_NILF_CBSA.ipynb](https://github.com/bdecon/econ_data/blob/master/micro/CPS_Disability_NILF_CBSA.ipynb): Generates a map showing share of age 16-64 population that is out of work due to disability, for each metro area in the US. Uses raw Current Population Survey monthly data. [nbviewer](https://nbviewer.jupyter.org/github/bdecon/econ_data/blob/master/micro/CPS_Disability_NILF_CBSA.ipynb) 20 | 21 | - [CPS_EPOP_P10wage_CBSA.ipynb](https://github.com/bdecon/econ_data/blob/master/micro/CPS_EPOP_P10wage_CBSA.ipynb): Generates a scatter plot showing the relationship between the employed share of the population and the first decile real hourly wage for each metro area. Uses bd CPS as the source. [nbviewer](https://nbviewer.jupyter.org/github/bdecon/econ_data/blob/master/micro/CPS_EPOP_P10wage_CBSA.ipynb) 22 | 23 | - [CPS_Example_Notebook_UPDATED.ipynb](https://github.com/bdecon/econ_data/blob/master/micro/CPS_Example_Notebook_UPDATED.ipynb): Example of using the struct method to read a fixed-width format monthly Current Population Survey data file. [nbviewer](https://nbviewer.jupyter.org/github/bdecon/econ_data/blob/master/micro/CPS_Example_Notebook_UPDATED.ipynb) 24 | 25 | - [CPS_Matching_Flow_Disabled_to_Work.ipynb](https://github.com/bdecon/econ_data/blob/master/micro/CPS_Matching_Flow_Disabled_to_Work.ipynb): Example of matching one-year apart bd CPS observations to measure the employed share of age 25-54 people who were not working the year before due to disability or illness. [nbviewer](https://nbviewer.jupyter.org/github/bdecon/econ_data/blob/master/micro/CPS_Matching_Flow_Disabled_to_Work.ipynb) 26 | 27 | - [CPS_NILF.ipynb](https://github.com/bdecon/econ_data/blob/master/micro/CPS_NILF.ipynb): Create line chart showing contributions to labor force participation rate since March 2001. Uses bd CPS data. [nbviewer](https://nbviewer.jupyter.org/github/bdecon/econ_data/blob/master/micro/CPS_NILF.ipynb) 28 | 29 | - [CPS_PECERT_Mapper.ipynb](https://github.com/bdecon/econ_data/blob/master/micro/CPS_PECERT_Mapper.ipynb): Create a choropleth map of the US showing what percent of each state's population has a professional certification. Uses bd CPS as data source. [nbviewer](https://nbviewer.jupyter.org/github/bdecon/econ_data/blob/master/micro/CPS_PECERT_Mapper.ipynb) 30 | 31 | 32 | - [bd_CPS_benchmark.ipynb](https://github.com/bdecon/econ_data/blob/master/micro/bd_CPS_benchmark.ipynb): Several examples of benchmarking bd CPS results to published estimates. Used to check validity of bd CPS extract. [nbviewer](https://nbviewer.jupyter.org/github/bdecon/econ_data/blob/master/micro/bd_CPS_benchmark.ipynb) 33 | 34 | - [bd_CPS_flow_MM.ipynb](https://github.com/bdecon/econ_data/blob/master/micro/bd_CPS_flow_MM.ipynb): Create line chart showing what percent of newly employed people were not in the labor force last month. Uses month-to-month matching of bd CPS data. [nbviewer](https://nbviewer.jupyter.org/github/bdecon/econ_data/blob/master/micro/bd_CPS_flow_MM.ipynb) 35 | 36 | - [bd_CPS_flow_YY.ipynb](https://github.com/bdecon/econ_data/blob/master/micro/bd_CPS_flow_YY.ipynb): Create a line share showing the share of unemployed people with a job one year later. Uses one-year apart matching of bd CPS data. [nbviewer](https://nbviewer.jupyter.org/github/bdecon/econ_data/blob/master/micro/bd_CPS_flow_YY.ipynb) 37 | 38 | - [bd_CPS_grapher.ipynb](https://github.com/bdecon/econ_data/blob/master/micro/bd_CPS_grapher.ipynb): Tool for graphing common labor economics statistics from bd CPS data. For example, the unemployment rate, union membership rate, share part-time for economic reasons, share retired, etc. [nbviewer](https://nbviewer.jupyter.org/github/bdecon/econ_data/blob/master/micro/bd_CPS_grapher.ipynb) 39 | 40 | - [bd_CPS_grapher_two_lines.ipynb](https://github.com/bdecon/econ_data/blob/master/micro/bd_CPS_grapher_two_lines.ipynb): Draft version of bd_CPS_grapher.ipynb that can create more complex line charts from bd CPS data. [nbviewer](https://nbviewer.jupyter.org/github/bdecon/econ_data/blob/master/micro/bd_CPS_grapher_two_lines.ipynb) 41 | 42 | -------------------------------------------------------------------------------- /micro/bd_CPS_codebook.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Codebook\n", 8 | "\n", 9 | "## bd CPS list of variables and possible values\n", 10 | "\n", 11 | "April 29, 2019\n", 12 | "\n", 13 | "Brian Dew, @bd_econ, brianwdew@gmail.com\n", 14 | "\n", 15 | "-----\n", 16 | "\n", 17 | "Store a list of bd CPS variables and their datatype, availability, average coverage, and list of possible entries." 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 1, 23 | "metadata": { 24 | "ExecuteTime": { 25 | "end_time": "2024-03-13T18:15:31.582515Z", 26 | "start_time": "2024-03-13T18:15:31.360116Z" 27 | }, 28 | "code_folding": [ 29 | 0 30 | ], 31 | "run_control": { 32 | "marked": false 33 | } 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "# Preliminaries\n", 38 | "import pandas as pd\n", 39 | "import numpy as np\n", 40 | "import sys\n", 41 | "old_stdout = sys.stdout\n", 42 | "sys.stdout = open('/home/brian/Documents/econ_data/bd_CPS/codebook.txt', 'w')\n", 43 | "\n", 44 | "import os\n", 45 | "os.chdir('/home/brian/Documents/econ_data/bd_CPS/')\n", 46 | "\n", 47 | "from bd_CPS_details import CodebookNotes, ValueLabels\n", 48 | "\n", 49 | "os.chdir('/home/brian/Documents/CPS/data/clean/')" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 2, 55 | "metadata": { 56 | "ExecuteTime": { 57 | "end_time": "2024-03-13T18:18:09.350492Z", 58 | "start_time": "2024-03-13T18:15:31.584173Z" 59 | }, 60 | "code_folding": [ 61 | 0 62 | ], 63 | "run_control": { 64 | "marked": false 65 | } 66 | }, 67 | "outputs": [], 68 | "source": [ 69 | "# Go through data and store requested details\n", 70 | "d = {}\n", 71 | "\n", 72 | "for year in range(1989, 2025):\n", 73 | " data = pd.read_feather(f'cps{year}.ft')\n", 74 | " for month, df in data.groupby('MONTH'):\n", 75 | " date = pd.to_datetime(f'{year}-{month}-01')\n", 76 | " variables = list(df.keys())\n", 77 | " for var in variables:\n", 78 | " \n", 79 | " values_list = list(df[var].dropna().unique())\n", 80 | " \n", 81 | " if (var in d.keys()) and (len(values_list) > 1):\n", 82 | "\n", 83 | " date_list = d[var]['avail']\n", 84 | " date_list.append(date)\n", 85 | " \n", 86 | " dtypes = d[var]['dtype']\n", 87 | " if df[var].dtype.name not in dtypes:\n", 88 | " if len(dtypes) > 0:\n", 89 | " d[var]['breaks'].append(date)\n", 90 | " dtypes.append(df[var].dtype.name)\n", 91 | " \n", 92 | " if len(values_list) < 100 and 'HHID2' not in var and 'OTC' not in var and 'PRICE' not in var:\n", 93 | " values = d[var]['values']\n", 94 | " for value in values_list:\n", 95 | " if value not in values:\n", 96 | " values.append(value)\n", 97 | " else:\n", 98 | " loc_max = sorted(values_list)[-1]\n", 99 | " loc_min = sorted(values_list)[0]\n", 100 | " if d[var]['max_val'] == None:\n", 101 | " max_val = loc_max\n", 102 | " min_val = loc_min\n", 103 | " if d[var]['max_val'] != None:\n", 104 | " max_val = d[var]['max_val']\n", 105 | " min_val = d[var]['min_val']\n", 106 | "\n", 107 | " if loc_max >= max_val:\n", 108 | " d[var]['max_val'] = loc_max\n", 109 | " if loc_min <= min_val:\n", 110 | " d[var]['min_val'] = loc_min\n", 111 | " \n", 112 | " if (var not in d.keys()) and (len(values_list) > 1):\n", 113 | " d[var] = {}\n", 114 | " d[var]['breaks'] = []\n", 115 | " d[var]['max_val'] = None\n", 116 | " d[var]['min_val'] = None\n", 117 | " d[var]['avail'] = [date]\n", 118 | " d[var]['dtype'] = [df[var].dtype.name]\n", 119 | " if len(values_list) < 100 and len(values_list) > 0:\n", 120 | " d[var]['values'] = values_list\n", 121 | " else:\n", 122 | " d[var]['values'] = []\n", 123 | " \n", 124 | "for key, values in d.items():\n", 125 | " avail = sorted(values['avail'])\n", 126 | " max_date = avail[-1].strftime('%Y-%m')\n", 127 | " values['date_max'] = max_date\n", 128 | " min_date = avail[0].strftime('%Y-%m')\n", 129 | " values['date_min'] = min_date " 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "## Print Codebook" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 3, 142 | "metadata": { 143 | "ExecuteTime": { 144 | "end_time": "2024-03-13T18:18:09.364350Z", 145 | "start_time": "2024-03-13T18:18:09.352448Z" 146 | }, 147 | "code_folding": [ 148 | 0 149 | ], 150 | "run_control": { 151 | "marked": false 152 | }, 153 | "scrolled": false 154 | }, 155 | "outputs": [], 156 | "source": [ 157 | "# Print out the bd CPS codebook\n", 158 | "print('\\n========================\\n\\n bd CPS Codebook'\n", 159 | " '\\n\\n========================\\n')\n", 160 | "today = pd.to_datetime('today').strftime('%B %d, %Y')\n", 161 | "print(f'updated: {today}\\n\\nvariables:\\n')\n", 162 | "for key, values in d.items():\n", 163 | " if key in CodebookNotes.keys():\n", 164 | " print(f'{key} - {CodebookNotes[key][\"Name\"]}')\n", 165 | " print(f' Notes: {CodebookNotes[key][\"Notes\"]}')\n", 166 | " else:\n", 167 | " print(f'{key}')\n", 168 | " print(f' Data types: {values[\"dtype\"]}')\n", 169 | " print(f' Available from: {values[\"date_min\"]} to: {values[\"date_max\"]}')\n", 170 | " if len(values['breaks']) > 0:\n", 171 | " print(f' Breaks in dtype: {values[\"breaks\"]}')\n", 172 | " print(' Value range: ')\n", 173 | " if len(sorted(values['values'])) > 0:\n", 174 | " print(sorted(values['values']))\n", 175 | " else:\n", 176 | " print(f'{values[\"min_val\"]} to {values[\"max_val\"]}')\n", 177 | " print('\\n\\n')\n", 178 | "print('\\n==============================\\n\\n Variable value labels'\n", 179 | " '\\n\\n==============================\\n\\n')\n", 180 | "print('2018 Occupation codes (OCC18):\\n')\n", 181 | "for key, value in ValueLabels['OCC18'].items():\n", 182 | " print(f'{key} {value}')\n", 183 | " \n", 184 | "print('\\n\\n2010 Occupation detailed recodes (OCC03D):\\n')\n", 185 | "for key, value in ValueLabels['OCC03D'].items():\n", 186 | " print(f'{key} {value}')\n", 187 | " \n", 188 | "print('\\n\\n2010 Occupation major recodes (OCC03M):\\n')\n", 189 | "for key, value in ValueLabels['OCC03M'].items():\n", 190 | " print(f'{key} {value}')\n", 191 | " \n", 192 | "print('\\n\\n\\n2017 Industry codes (IND17):\\n')\n", 193 | "for key, value in ValueLabels['IND17'].items():\n", 194 | " print(f'{key} {value}')\n", 195 | " \n", 196 | "print('\\n\\n2012 Industry detailed recodes (IND03D):\\n')\n", 197 | "for key, value in ValueLabels['IND03D'].items():\n", 198 | " print(f'{key} {value}')\n", 199 | " \n", 200 | "print('\\n\\n2012 Industry major recodes (IND03M):\\n')\n", 201 | "for key, value in ValueLabels['IND03M'].items():\n", 202 | " print(f'{key} {value}')\n", 203 | " \n", 204 | "print('\\n\\nCore-based statistical areas (CBSA):\\n')\n", 205 | "for key, value in ValueLabels['CBSA'].items():\n", 206 | " print(f'{key} {value}')\n", 207 | " \n", 208 | "print('\\n\\nConsolidated statistical areas (CSA):\\n')\n", 209 | "for key, value in ValueLabels['CSA'].items():\n", 210 | " print(f'{key} {value}')\n", 211 | " \n", 212 | "print('\\n\\nCounties (COUNTY):\\n')\n", 213 | "for key, value in ValueLabels['COUNTY'].items():\n", 214 | " print(f'{key} {value}')" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 4, 220 | "metadata": { 221 | "ExecuteTime": { 222 | "end_time": "2024-03-13T18:18:09.388564Z", 223 | "start_time": "2024-03-13T18:18:09.365831Z" 224 | }, 225 | "code_folding": [] 226 | }, 227 | "outputs": [], 228 | "source": [ 229 | "#sys.stdout = old_stdout\n", 230 | "\n", 231 | "#codebook = open('/home/brian/Documents/econ_data/bd_CPS/codebook.txt', 'r').read()\n", 232 | "#print(codebook)" 233 | ] 234 | } 235 | ], 236 | "metadata": { 237 | "kernelspec": { 238 | "display_name": "Python 3 (ipykernel)", 239 | "language": "python", 240 | "name": "python3" 241 | }, 242 | "language_info": { 243 | "codemirror_mode": { 244 | "name": "ipython", 245 | "version": 3 246 | }, 247 | "file_extension": ".py", 248 | "mimetype": "text/x-python", 249 | "name": "python", 250 | "nbconvert_exporter": "python", 251 | "pygments_lexer": "ipython3", 252 | "version": "3.8.13" 253 | } 254 | }, 255 | "nbformat": 4, 256 | "nbformat_minor": 2 257 | } 258 | -------------------------------------------------------------------------------- /micro/bd_CPS_ind_occ_wage.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "id": "a854664e", 7 | "metadata": { 8 | "ExecuteTime": { 9 | "end_time": "2021-09-03T15:31:04.107248Z", 10 | "start_time": "2021-09-03T15:31:04.101450Z" 11 | } 12 | }, 13 | "outputs": [], 14 | "source": [ 15 | "import os\n", 16 | "os.chdir('/home/brian/Documents/CPS/data/clean/')" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 33, 22 | "id": "fc094538", 23 | "metadata": { 24 | "ExecuteTime": { 25 | "end_time": "2021-09-03T16:55:15.369624Z", 26 | "start_time": "2021-09-03T16:55:11.787719Z" 27 | } 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "import pandas as pd\n", 32 | "import numpy as np\n", 33 | "import itertools\n", 34 | "import datetime as dt1\n", 35 | "import matplotlib as mpl\n", 36 | "import matplotlib.pyplot as plt\n", 37 | "import wquantiles\n", 38 | "\n", 39 | "age_tuple = (18, 64)\n", 40 | "path_feather = ''\n", 41 | "\n", 42 | "# Age and sex demographics in March 2001\n", 43 | "filter_cols = ['AGE','BASICWGT', 'MONTH','IND03D','OCC03M','HRSUSL1','WORKFT', 'USLFT', 'PWORWGT','HRWAGE', 'PRICEADJ']\n", 44 | "\n", 45 | "# Python function returns percentile using interpolation\n", 46 | "def binned_wage2(df, wage_var='HRWAGE', perc=0.5, \n", 47 | " bins=np.arange(-0.25, 300, 0.5)):\n", 48 | " '''\n", 49 | " Returns wage estimate based on linear interpolation through \n", 50 | " the bin containing the wage.\n", 51 | " \n", 52 | " perc = percentile of interest (0.5 is median)\n", 53 | " bins = list of bin start locations\n", 54 | " '''\n", 55 | " cdf = (df.groupby(pd.cut(df[wage_var], bins))\n", 56 | " .PWORWGT.sum().cumsum() / df.PWORWGT.sum())\n", 57 | " \n", 58 | " return np.interp(perc, cdf, bins[1:])\n", 59 | "\n", 60 | "rhrwage = lambda x: x.HRWAGE * x.PRICEADJ\n", 61 | "\n", 62 | "ind_dict={'Agriculture':1,\n", 63 | " 'Forestry_logging':2,\n", 64 | " 'Mining':3,\n", 65 | " 'Construction':4}\n", 66 | "\n", 67 | "occ_dict={'prodn_occpn':9,'sales_related_occpn':4}\n", 68 | "ind_occ_list=list(itertools.product(list(ind_dict.keys()),list(occ_dict.keys())))\n", 69 | "results = {}\n", 70 | "for idx,val in enumerate(ind_occ_list):\n", 71 | " results[val[0]]={}\n", 72 | " results[val[0]][val[1]]=pd.DataFrame()\n", 73 | " for year in range(2019, 2021):\n", 74 | " annual_data = (pd.read_feather(f'{path_feather}//cps{year}.ft', columns=filter_cols)\n", 75 | " .query(f'{age_tuple[0]} <= AGE <= {age_tuple[1]} and HRSUSL1 >= 35 and OCC03M == 9 and HRWAGE > 0 and WORKFT == 1 and PWORWGT > 0 and IND03D == {ind_dict[val[0]]}')\n", 76 | " .assign(RHRWAGE = rhrwage))\n", 77 | " for month, df in annual_data.groupby('MONTH'):\n", 78 | " # df=annual_data.query(f\"'MONTH'=={month}\") \n", 79 | " date = pd.to_datetime(f'{year}-{month}-01')\n", 80 | " results[val[0]][val[1]].at[date, 'wq_wage'] = wquantiles.median(df['HRWAGE'], df['PWORWGT'])\n", 81 | " results[val[0]][val[1]].at[date, 'bw'] = binned_wage2(df)" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "id": "5ac5f433", 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 34, 95 | "id": "e66c5f92", 96 | "metadata": { 97 | "ExecuteTime": { 98 | "end_time": "2021-09-03T16:55:17.395537Z", 99 | "start_time": "2021-09-03T16:55:17.384938Z" 100 | } 101 | }, 102 | "outputs": [ 103 | { 104 | "data": { 105 | "text/plain": [ 106 | "{'Agriculture': {'sales_related_occpn': wq_wage bw\n", 107 | " 2019-01-01 19.020000 19.000000\n", 108 | " 2019-02-01 7.211500 7.000000\n", 109 | " 2019-03-01 16.259642 17.924459\n", 110 | " 2019-04-01 16.565003 15.191605\n", 111 | " 2019-05-01 16.000000 16.031366\n", 112 | " 2019-06-01 23.059999 23.000000\n", 113 | " 2019-07-01 42.000000 42.000000\n", 114 | " 2019-08-01 15.588489 15.825183\n", 115 | " 2019-11-01 17.000000 17.000000\n", 116 | " 2019-12-01 16.965986 16.212837\n", 117 | " 2020-01-01 12.000000 12.000000\n", 118 | " 2020-02-01 24.839750 25.000000\n", 119 | " 2020-03-01 21.500000 21.500000\n", 120 | " 2020-04-01 16.220330 16.783271\n", 121 | " 2020-05-01 13.490110 12.185347\n", 122 | " 2020-06-01 16.850000 17.000000\n", 123 | " 2020-07-01 16.698788 16.793628\n", 124 | " 2020-08-01 14.185547 15.792685\n", 125 | " 2020-11-01 19.446164 19.058897\n", 126 | " 2020-12-01 14.000000 14.000000},\n", 127 | " 'Forestry_logging': {'sales_related_occpn': wq_wage bw\n", 128 | " 2019-04-01 12.449462 1.655970\n", 129 | " 2019-05-01 18.500000 18.500000\n", 130 | " 2019-06-01 20.539100 20.049289\n", 131 | " 2019-08-01 23.332766 27.388371\n", 132 | " 2019-09-01 18.000000 18.000000\n", 133 | " 2019-10-01 25.000000 25.000000\n", 134 | " 2019-11-01 22.000000 22.000000\n", 135 | " 2020-01-01 17.836443 16.232866\n", 136 | " 2020-02-01 8.000000 8.000000\n", 137 | " 2020-03-01 34.599998 34.500000\n", 138 | " 2020-04-01 23.244269 23.023035\n", 139 | " 2020-06-01 12.593093 12.645117\n", 140 | " 2020-07-01 18.500000 18.500000\n", 141 | " 2020-08-01 20.293141 18.154655\n", 142 | " 2020-09-01 13.000000 13.000000\n", 143 | " 2020-10-01 15.000000 15.000000\n", 144 | " 2020-11-01 19.080000 19.000000},\n", 145 | " 'Mining': {'sales_related_occpn': wq_wage bw\n", 146 | " 2019-01-01 41.199136 52.750461\n", 147 | " 2019-02-01 22.303229 22.959520\n", 148 | " 2019-03-01 34.686486 34.740649\n", 149 | " 2019-04-01 17.151307 17.907419\n", 150 | " 2019-05-01 17.000000 17.000000\n", 151 | " 2019-06-01 13.143558 13.026459\n", 152 | " 2019-07-01 16.301859 16.494935\n", 153 | " 2019-08-01 20.696978 20.602178\n", 154 | " 2019-09-01 17.000000 17.000000\n", 155 | " 2019-10-01 21.400497 21.357583\n", 156 | " 2019-11-01 23.225391 23.896334\n", 157 | " 2019-12-01 51.054012 59.806005\n", 158 | " 2020-01-01 19.229759 19.752783\n", 159 | " 2020-02-01 21.012879 22.797509\n", 160 | " 2020-03-01 23.379470 23.896813\n", 161 | " 2020-04-01 26.942597 28.884404\n", 162 | " 2020-05-01 21.816771 21.964364\n", 163 | " 2020-06-01 22.098023 22.080757\n", 164 | " 2020-07-01 20.199880 20.113441\n", 165 | " 2020-08-01 17.029528 18.887609\n", 166 | " 2020-09-01 26.401277 28.768507\n", 167 | " 2020-10-01 23.536002 23.540163\n", 168 | " 2020-11-01 21.553099 20.198454\n", 169 | " 2020-12-01 31.299098 28.194922},\n", 170 | " 'Construction': {'sales_related_occpn': wq_wage bw\n", 171 | " 2019-01-01 16.500000 16.650564\n", 172 | " 2019-02-01 16.116426 16.119574\n", 173 | " 2019-03-01 20.996488 20.965999\n", 174 | " 2019-04-01 20.381324 20.228640\n", 175 | " 2019-05-01 19.443874 18.231327\n", 176 | " 2019-06-01 15.000000 15.091411\n", 177 | " 2019-07-01 20.400715 20.942000\n", 178 | " 2019-08-01 17.335797 17.531088\n", 179 | " 2019-09-01 16.980865 16.579821\n", 180 | " 2019-10-01 23.603595 26.282357\n", 181 | " 2019-11-01 20.958801 20.852415\n", 182 | " 2019-12-01 15.951303 15.929130\n", 183 | " 2020-01-01 20.944416 20.139497\n", 184 | " 2020-02-01 21.161480 21.015488\n", 185 | " 2020-03-01 21.673268 20.142470\n", 186 | " 2020-04-01 26.202610 26.194914\n", 187 | " 2020-05-01 19.917236 19.604956\n", 188 | " 2020-06-01 14.663183 14.230381\n", 189 | " 2020-07-01 35.117659 37.847443\n", 190 | " 2020-08-01 17.579986 18.773411\n", 191 | " 2020-09-01 23.766303 29.764785\n", 192 | " 2020-10-01 18.436565 18.444733\n", 193 | " 2020-11-01 25.271128 25.277243\n", 194 | " 2020-12-01 17.822108 19.872629}}" 195 | ] 196 | }, 197 | "execution_count": 34, 198 | "metadata": {}, 199 | "output_type": "execute_result" 200 | } 201 | ], 202 | "source": [ 203 | "results" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "id": "d773d75f", 210 | "metadata": {}, 211 | "outputs": [], 212 | "source": [] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 8, 217 | "id": "19caec13", 218 | "metadata": { 219 | "ExecuteTime": { 220 | "end_time": "2021-09-03T15:32:25.165826Z", 221 | "start_time": "2021-09-03T15:32:24.831479Z" 222 | } 223 | }, 224 | "outputs": [], 225 | "source": [ 226 | "df = pd.read_feather('cps2019.ft')" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": 9, 232 | "id": "652eb033", 233 | "metadata": { 234 | "ExecuteTime": { 235 | "end_time": "2021-09-03T15:32:31.835903Z", 236 | "start_time": "2021-09-03T15:32:31.825168Z" 237 | } 238 | }, 239 | "outputs": [ 240 | { 241 | "name": "stdout", 242 | "output_type": "stream", 243 | "text": [ 244 | "HHID\n", 245 | "MONTH\n", 246 | "YEAR\n", 247 | "FAMINC\n", 248 | "HHWGT\n", 249 | "MIS\n", 250 | "HHID2\n", 251 | "CBSA\n", 252 | "GTCBSASZ\n", 253 | "CSA\n", 254 | "PARENT\n", 255 | "SPOUSE\n", 256 | "PRDTRACE\n", 257 | "LINENO\n", 258 | "FAMNUM\n", 259 | "PRFAMREL\n", 260 | "HRSUSL1\n", 261 | "HRSUSL2\n", 262 | "HRSUSLT\n", 263 | "HRSACT1\n", 264 | "HRSACT2\n", 265 | "HRSACTT\n", 266 | "UNEMPDUR\n", 267 | "PRWKSTAT\n", 268 | "PRAGNA\n", 269 | "IND03D\n", 270 | "IND203D\n", 271 | "OCC03D\n", 272 | "OCC203D\n", 273 | "IND03M\n", 274 | "IND203M\n", 275 | "OCC03M\n", 276 | "OCC203M\n", 277 | "PTWK\n", 278 | "PWFMWGT\n", 279 | "PWLGWGT\n", 280 | "PWORWGT\n", 281 | "PWSSWGT\n", 282 | "PRCHLD\n", 283 | "PRNMCHLD\n", 284 | "WKEARNFLG\n", 285 | "QSTNUM\n", 286 | "OCCURNUM\n", 287 | "BASICWGT\n", 288 | "IND12\n", 289 | "OCC10\n", 290 | "IND212\n", 291 | "OCC210\n", 292 | "AGE\n", 293 | "FEMALE\n", 294 | "STATE\n", 295 | "REGION\n", 296 | "EDUCDT\n", 297 | "EDUC\n", 298 | "SCHENR\n", 299 | "SCHOOL\n", 300 | "RETIRED\n", 301 | "MARRIED\n", 302 | "WBHAO\n", 303 | "WBAO\n", 304 | "HISPANIC\n", 305 | "VETERAN\n", 306 | "FORBORN\n", 307 | "CITIZEN\n", 308 | "UNEMPTYPE\n", 309 | "JLTYPE\n", 310 | "LAYOFF\n", 311 | "PTECON\n", 312 | "USLFT\n", 313 | "WORKFT\n", 314 | "FTLF\n", 315 | "SAMEEMP\n", 316 | "CHDUTIES\n", 317 | "SAMEACT\n", 318 | "CHJOBACT\n", 319 | "NOTATWORK\n", 320 | "ABSTYPE\n", 321 | "ABSPAID\n", 322 | "PTREASON\n", 323 | "WANTFT\n", 324 | "DWTYPE\n", 325 | "PAIDHRLY\n", 326 | "PROXY\n", 327 | "LFS\n", 328 | "COW1\n", 329 | "COW2\n", 330 | "INDGRP\n", 331 | "MANAGER\n", 332 | "MJH\n", 333 | "NUMJOBS\n", 334 | "NILFREASON\n", 335 | "NLFFAM\n", 336 | "WKEARN\n", 337 | "HRWAGE\n", 338 | "OTCAMT\n", 339 | "PRICEADJ\n", 340 | "MINWAGE\n", 341 | "CTYBIRTH\n", 342 | "WBHAOM\n", 343 | "HISPDT03\n", 344 | "HISPDT\n", 345 | "ATLFLG\n", 346 | "INDM\n", 347 | "ASIANDT\n", 348 | "CERT\n", 349 | "DISABILITY\n", 350 | "CPSID\n", 351 | "COUNTY\n", 352 | "METSTAT\n", 353 | "MPCSTAT\n", 354 | "WKEARNADJ\n", 355 | "HRSUSL1I\n", 356 | "HRWAGEADJ\n", 357 | "UNION\n", 358 | "UNIONMEM\n" 359 | ] 360 | } 361 | ], 362 | "source": [ 363 | "for key in df.keys():\n", 364 | " print(key)" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 28, 370 | "id": "5ddf0fbf", 371 | "metadata": { 372 | "ExecuteTime": { 373 | "end_time": "2021-09-03T16:04:44.267590Z", 374 | "start_time": "2021-09-03T16:04:44.259718Z" 375 | } 376 | }, 377 | "outputs": [ 378 | { 379 | "data": { 380 | "text/html": [ 381 | "
\n", 382 | "\n", 395 | "\n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | "
AGEBASICWGTMONTHIND03DOCC03MHRSUSL1WORKFTUSLFTPWORWGTHRWAGE
\n", 414 | "
" 415 | ], 416 | "text/plain": [ 417 | "Empty DataFrame\n", 418 | "Columns: [AGE, BASICWGT, MONTH, IND03D, OCC03M, HRSUSL1, WORKFT, USLFT, PWORWGT, HRWAGE]\n", 419 | "Index: []" 420 | ] 421 | }, 422 | "execution_count": 28, 423 | "metadata": {}, 424 | "output_type": "execute_result" 425 | } 426 | ], 427 | "source": [ 428 | "df.query('USLFT == -4')" 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": null, 434 | "id": "9c955a82", 435 | "metadata": {}, 436 | "outputs": [], 437 | "source": [] 438 | } 439 | ], 440 | "metadata": { 441 | "kernelspec": { 442 | "display_name": "Python 3", 443 | "language": "python", 444 | "name": "python3" 445 | }, 446 | "language_info": { 447 | "codemirror_mode": { 448 | "name": "ipython", 449 | "version": 3 450 | }, 451 | "file_extension": ".py", 452 | "mimetype": "text/x-python", 453 | "name": "python", 454 | "nbconvert_exporter": "python", 455 | "pygments_lexer": "ipython3", 456 | "version": "3.8.8" 457 | } 458 | }, 459 | "nbformat": 4, 460 | "nbformat_minor": 5 461 | } 462 | -------------------------------------------------------------------------------- /micro/cps_example/January_2017_Record_Layout.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bdecon/econ_data/ed4003a633b46e718d29e0c3a9c5e28662f2e090/micro/cps_example/January_2017_Record_Layout.txt -------------------------------------------------------------------------------- /micro/fed_hh_example.csv: -------------------------------------------------------------------------------- 1 | date,headship,living_fam 2 | 1995-01-01,50.78,17.3 3 | 1995-02-01,50.72,17.32 4 | 1995-03-01,50.66,17.28 5 | 1995-04-01,50.66,17.37 6 | 1995-05-01,50.78,17.19 7 | 1995-06-01,50.71,17.46 8 | 1995-07-01,50.71,17.47 9 | 1995-08-01,50.7,17.51 10 | 1995-09-01,50.5,17.55 11 | 1995-10-01,50.67,17.46 12 | 1995-11-01,50.65,17.55 13 | 1995-12-01,50.65,17.4 14 | 1996-01-01,50.65,17.5 15 | 1996-02-01,50.61,17.44 16 | 1996-03-01,50.63,17.46 17 | 1996-04-01,50.72,17.38 18 | 1996-05-01,50.77,17.22 19 | 1996-06-01,50.74,17.39 20 | 1996-07-01,50.72,17.44 21 | 1996-08-01,50.66,17.51 22 | 1996-09-01,50.59,17.51 23 | 1996-10-01,50.59,17.42 24 | 1996-11-01,50.58,17.37 25 | 1996-12-01,50.61,17.27 26 | 1997-01-01,50.49,17.55 27 | 1997-02-01,50.67,17.57 28 | 1997-03-01,50.63,17.65 29 | 1997-04-01,50.61,17.75 30 | 1997-05-01,50.62,17.6 31 | 1997-06-01,50.6,17.55 32 | 1997-07-01,50.66,17.55 33 | 1997-08-01,50.67,17.56 34 | 1997-09-01,50.73,17.55 35 | 1997-10-01,50.7,17.57 36 | 1997-11-01,50.73,17.5 37 | 1997-12-01,50.53,17.59 38 | 1998-01-01,50.7,17.46 39 | 1998-02-01,50.71,17.5 40 | 1998-03-01,50.82,17.43 41 | 1998-04-01,50.87,17.47 42 | 1998-05-01,50.74,17.5 43 | 1998-06-01,50.8,17.55 44 | 1998-07-01,50.69,17.64 45 | 1998-08-01,50.72,17.5 46 | 1998-09-01,50.75,17.5 47 | 1998-10-01,50.76,17.51 48 | 1998-11-01,50.77,17.53 49 | 1998-12-01,50.79,17.57 50 | 1999-01-01,50.76,17.57 51 | 1999-02-01,50.73,17.62 52 | 1999-03-01,50.79,17.64 53 | 1999-04-01,50.72,17.67 54 | 1999-05-01,50.75,17.69 55 | 1999-06-01,50.72,17.57 56 | 1999-07-01,50.74,17.55 57 | 1999-08-01,50.81,17.56 58 | 1999-09-01,50.74,17.6 59 | 1999-10-01,50.75,17.71 60 | 1999-11-01,50.7,17.72 61 | 1999-12-01,50.58,17.81 62 | 2000-01-01,50.55,17.72 63 | 2000-02-01,50.59,17.7 64 | 2000-03-01,50.43,17.82 65 | 2000-04-01,50.45,17.77 66 | 2000-05-01,50.44,17.77 67 | 2000-06-01,50.42,17.74 68 | 2000-07-01,50.61,17.65 69 | 2000-08-01,50.6,17.71 70 | 2000-09-01,50.7,17.68 71 | 2000-10-01,50.74,17.63 72 | 2000-11-01,50.72,17.68 73 | 2000-12-01,50.9,17.54 74 | 2001-01-01,50.75,17.67 75 | 2001-02-01,50.75,17.68 76 | 2001-03-01,50.73,17.57 77 | 2001-04-01,50.68,17.62 78 | 2001-05-01,50.76,17.56 79 | 2001-06-01,50.73,17.73 80 | 2001-07-01,50.73,17.8 81 | 2001-08-01,50.69,17.85 82 | 2001-09-01,50.64,18.04 83 | 2001-10-01,50.72,17.78 84 | 2001-11-01,50.8,17.81 85 | 2001-12-01,50.89,17.72 86 | 2002-01-01,50.89,17.69 87 | 2002-02-01,50.95,17.57 88 | 2002-03-01,50.95,17.5 89 | 2002-04-01,51.04,17.4 90 | 2002-05-01,51.03,17.55 91 | 2002-06-01,50.97,17.62 92 | 2002-07-01,50.95,17.68 93 | 2002-08-01,50.94,17.69 94 | 2002-09-01,50.95,17.61 95 | 2002-10-01,50.93,17.47 96 | 2002-11-01,50.94,17.6 97 | 2002-12-01,50.99,17.57 98 | 2003-01-01,51,17.57 99 | 2003-02-01,51.05,17.6 100 | 2003-03-01,51.05,17.64 101 | 2003-04-01,51.06,17.67 102 | 2003-05-01,50.91,17.83 103 | 2003-06-01,51.01,17.68 104 | 2003-07-01,50.99,17.64 105 | 2003-08-01,51,17.52 106 | 2003-09-01,51.07,17.45 107 | 2003-10-01,50.95,17.54 108 | 2003-11-01,51.06,17.45 109 | 2003-12-01,51,17.57 110 | 2004-01-01,51.05,17.58 111 | 2004-02-01,50.95,17.74 112 | 2004-03-01,50.92,17.81 113 | 2004-04-01,50.94,17.75 114 | 2004-05-01,50.91,17.81 115 | 2004-06-01,50.93,17.73 116 | 2004-07-01,51.08,17.71 117 | 2004-08-01,51.04,17.68 118 | 2004-09-01,50.99,17.62 119 | 2004-10-01,51,17.74 120 | 2004-11-01,50.94,17.77 121 | 2004-12-01,50.91,17.79 122 | 2005-01-01,50.99,17.77 123 | 2005-02-01,51.01,17.74 124 | 2005-03-01,51.04,17.75 125 | 2005-04-01,51.14,17.65 126 | 2005-05-01,51.02,17.83 127 | 2005-06-01,51,17.91 128 | 2005-07-01,50.9,18.03 129 | 2005-08-01,50.91,18.01 130 | 2005-09-01,50.8,18.15 131 | 2005-10-01,50.75,18.13 132 | 2005-11-01,50.91,18.03 133 | 2005-12-01,50.93,18.07 134 | 2006-01-01,50.92,17.99 135 | 2006-02-01,50.99,17.87 136 | 2006-03-01,50.92,17.94 137 | 2006-04-01,50.96,17.89 138 | 2006-05-01,51.03,17.8 139 | 2006-06-01,51.06,17.87 140 | 2006-07-01,51.08,17.9 141 | 2006-08-01,50.89,18.22 142 | 2006-09-01,50.9,18.1 143 | 2006-10-01,50.86,18.05 144 | 2006-11-01,50.81,18.2 145 | 2006-12-01,50.84,18.05 146 | 2007-01-01,50.87,18.14 147 | 2007-02-01,50.9,18.12 148 | 2007-03-01,50.92,18.09 149 | 2007-04-01,50.91,18.13 150 | 2007-05-01,50.91,18.05 151 | 2007-06-01,50.89,18.15 152 | 2007-07-01,51.01,18.07 153 | 2007-08-01,51,18.09 154 | 2007-09-01,50.97,18.18 155 | 2007-10-01,50.93,18.17 156 | 2007-11-01,50.95,18.22 157 | 2007-12-01,50.93,18.23 158 | 2008-01-01,50.82,18.34 159 | 2008-02-01,50.78,18.51 160 | 2008-03-01,50.84,18.42 161 | 2008-04-01,50.77,18.43 162 | 2008-05-01,50.79,18.46 163 | 2008-06-01,50.75,18.42 164 | 2008-07-01,50.71,18.42 165 | 2008-08-01,50.69,18.46 166 | 2008-09-01,50.65,18.49 167 | 2008-10-01,50.7,18.56 168 | 2008-11-01,50.68,18.75 169 | 2008-12-01,50.63,18.73 170 | 2009-01-01,50.64,18.62 171 | 2009-02-01,50.55,18.73 172 | 2009-03-01,50.54,18.65 173 | 2009-04-01,50.52,18.68 174 | 2009-05-01,50.4,18.77 175 | 2009-06-01,50.47,18.79 176 | 2009-07-01,50.4,18.93 177 | 2009-08-01,50.38,19.15 178 | 2009-09-01,50.42,19.15 179 | 2009-10-01,50.45,18.97 180 | 2009-11-01,50.47,19.02 181 | 2009-12-01,50.4,18.93 182 | 2010-01-01,50.2,19.19 183 | 2010-02-01,50.05,19.28 184 | 2010-03-01,49.92,19.43 185 | 2010-04-01,49.96,19.42 186 | 2010-05-01,49.9,19.47 187 | 2010-06-01,49.98,19.44 188 | 2010-07-01,50,19.5 189 | 2010-08-01,50.07,19.58 190 | 2010-09-01,50.27,19.38 191 | 2010-10-01,50.18,19.42 192 | 2010-11-01,50.16,19.42 193 | 2010-12-01,50.1,19.44 194 | 2011-01-01,50.06,19.56 195 | 2011-02-01,50.22,19.46 196 | 2011-03-01,50.16,19.54 197 | 2011-04-01,50.21,19.58 198 | 2011-05-01,50.24,19.5 199 | 2011-06-01,50.16,19.52 200 | 2011-07-01,50.12,19.5 201 | 2011-08-01,50.07,19.5 202 | 2011-09-01,50.02,19.6 203 | 2011-10-01,49.91,19.7 204 | 2011-11-01,49.95,19.71 205 | 2011-12-01,50,19.64 206 | 2012-01-01,50.13,19.59 207 | 2012-02-01,50.33,19.45 208 | 2012-03-01,50.19,19.61 209 | 2012-04-01,50.12,19.69 210 | 2012-05-01,49.98,19.77 211 | 2012-06-01,49.97,19.78 212 | 2012-07-01,50.11,19.62 213 | 2012-08-01,50.11,19.52 214 | 2012-09-01,50.16,19.57 215 | 2012-10-01,50.1,19.65 216 | 2012-11-01,50.08,19.8 217 | 2012-12-01,50.23,19.61 218 | 2013-01-01,50.2,19.54 219 | 2013-02-01,50.34,19.44 220 | 2013-03-01,50.2,19.57 221 | 2013-04-01,50.13,19.68 222 | 2013-05-01,50.06,19.77 223 | 2013-06-01,50.11,19.81 224 | 2013-07-01,50.19,19.85 225 | 2013-08-01,50.19,19.8 226 | 2013-09-01,50.19,19.82 227 | 2013-10-01,50.25,19.72 228 | 2013-11-01,50.28,19.74 229 | 2013-12-01,50.23,19.8 230 | 2014-01-01,50.23,19.8 231 | 2014-02-01,50.21,19.78 232 | 2014-03-01,50.19,19.74 233 | 2014-04-01,50.19,19.71 234 | 2014-05-01,50.22,19.65 235 | 2014-06-01,50.17,19.79 236 | 2014-07-01,50.14,19.89 237 | 2014-08-01,50.28,19.64 238 | 2014-09-01,50.26,19.59 239 | 2014-10-01,50.22,19.52 240 | 2014-11-01,50.2,19.52 241 | 2014-12-01,50.15,19.59 242 | 2015-01-01,50.12,19.66 243 | 2015-02-01,50.18,19.63 244 | 2015-03-01,50.17,19.68 245 | 2015-04-01,50.28,19.66 246 | 2015-05-01,50.34,19.68 247 | 2015-06-01,50.31,19.64 248 | 2015-07-01,50.32,19.69 249 | 2015-08-01,50.21,19.79 250 | 2015-09-01,50.2,19.7 251 | 2015-10-01,50.25,19.68 252 | 2015-11-01,50.2,19.57 253 | 2015-12-01,50.16,19.63 254 | 2016-01-01,50.14,19.67 255 | 2016-02-01,50.21,19.67 256 | 2016-03-01,50.33,19.64 257 | 2016-04-01,50.37,19.57 258 | 2016-05-01,50.43,19.59 259 | 2016-06-01,50.34,19.59 260 | 2016-07-01,50.18,19.66 261 | 2016-08-01,50.01,19.72 262 | 2016-09-01,50.15,19.75 263 | 2016-10-01,50.15,19.78 264 | 2016-11-01,50.19,19.79 265 | 2016-12-01,50.2,19.73 266 | 2017-01-01,50.27,19.72 267 | 2017-02-01,50.18,19.81 268 | 2017-03-01,50.13,19.85 269 | 2017-04-01,50.24,19.82 270 | 2017-05-01,50.17,19.76 271 | 2017-06-01,50.11,19.76 272 | 2017-07-01,50.12,19.64 273 | 2017-08-01,50.04,19.89 274 | 2017-09-01,50.22,19.84 275 | 2017-10-01,50.06,19.89 276 | 2017-11-01,49.95,20.16 277 | 2017-12-01,50.1,19.89 278 | 2018-01-01,50.07,19.83 279 | 2018-02-01,50.08,19.82 280 | 2018-03-01,50.13,19.68 281 | 2018-04-01,50.06,19.7 282 | 2018-05-01,50.05,19.76 283 | 2018-06-01,50.16,19.83 284 | 2018-07-01,50.19,19.8 285 | 2018-08-01,50.23,19.71 286 | 2018-09-01,50.2,19.78 287 | 2018-10-01,50.2,19.81 288 | 2018-11-01,50.12,19.92 289 | 2018-12-01,50.16,19.87 290 | 2019-01-01,50.13,19.88 291 | 2019-02-01,50.22,19.8 292 | 2019-03-01,50.16,19.83 293 | 2019-04-01,50.1,19.91 294 | 2019-05-01,50.21,19.87 295 | 2019-06-01,50.19,19.84 296 | 2019-07-01,50.21,19.79 297 | 2019-08-01,50.23,19.57 298 | 2019-09-01,50.21,19.86 299 | 2019-10-01,50.22,19.83 300 | 2019-11-01,50.27,19.81 301 | 2019-12-01,50.18,19.89 302 | 2020-01-01,50.3,19.72 303 | 2020-02-01,50.08,20.11 304 | 2020-03-01,49.71,20.42 305 | 2020-04-01,49.36,20.91 306 | 2020-05-01,49.12,20.94 307 | 2020-06-01,49.28,20.76 308 | -------------------------------------------------------------------------------- /micro/rec_dates.csv: -------------------------------------------------------------------------------- 1 | peak,trough 2 | 1970-01-01,1970-12-01 3 | 1973-11-01,1975-04-01 4 | 1980-02-01,1980-08-01 5 | 1981-08-01,1982-12-01 6 | 1990-08-01,1991-04-01 7 | 2001-04-01,2001-12-01 8 | 2008-01-01,2009-07-01 9 | -------------------------------------------------------------------------------- /micro/results/wealth_dist.csv: -------------------------------------------------------------------------------- 1 | pct,Share 2 | 0-10,-0.2872909616377216 3 | 10-20,0.06173335966296544 4 | 20-30,0.2698319007306401 5 | 30-40,0.7483554557818934 6 | 40-50,1.4068407252343091 7 | 50-60,2.359696988806056 8 | 60-70,3.7655933030232145 9 | 70-80,6.307681535400681 10 | 80-90,11.999567442829262 11 | 90-100,73.36799025016869 12 | -------------------------------------------------------------------------------- /micro/shapefiles/cb_2019_us_cbsa_20m.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bdecon/econ_data/ed4003a633b46e718d29e0c3a9c5e28662f2e090/micro/shapefiles/cb_2019_us_cbsa_20m.dbf -------------------------------------------------------------------------------- /micro/shapefiles/cb_2019_us_cbsa_20m.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bdecon/econ_data/ed4003a633b46e718d29e0c3a9c5e28662f2e090/micro/shapefiles/cb_2019_us_cbsa_20m.shp -------------------------------------------------------------------------------- /micro/shapefiles/cb_2019_us_cbsa_20m.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bdecon/econ_data/ed4003a633b46e718d29e0c3a9c5e28662f2e090/micro/shapefiles/cb_2019_us_cbsa_20m.shx -------------------------------------------------------------------------------- /micro/shapefiles/cb_2019_us_nation_20m.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bdecon/econ_data/ed4003a633b46e718d29e0c3a9c5e28662f2e090/micro/shapefiles/cb_2019_us_nation_20m.dbf -------------------------------------------------------------------------------- /micro/shapefiles/cb_2019_us_nation_20m.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bdecon/econ_data/ed4003a633b46e718d29e0c3a9c5e28662f2e090/micro/shapefiles/cb_2019_us_nation_20m.shp -------------------------------------------------------------------------------- /micro/shapefiles/cb_2019_us_nation_20m.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bdecon/econ_data/ed4003a633b46e718d29e0c3a9c5e28662f2e090/micro/shapefiles/cb_2019_us_nation_20m.shx -------------------------------------------------------------------------------- /micro/shapefiles/cb_2019_us_state_20m.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bdecon/econ_data/ed4003a633b46e718d29e0c3a9c5e28662f2e090/micro/shapefiles/cb_2019_us_state_20m.dbf -------------------------------------------------------------------------------- /micro/shapefiles/cb_2019_us_state_20m.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bdecon/econ_data/ed4003a633b46e718d29e0c3a9c5e28662f2e090/micro/shapefiles/cb_2019_us_state_20m.shp -------------------------------------------------------------------------------- /micro/shapefiles/cb_2019_us_state_20m.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bdecon/econ_data/ed4003a633b46e718d29e0c3a9c5e28662f2e090/micro/shapefiles/cb_2019_us_state_20m.shx -------------------------------------------------------------------------------- /micro/shapefiles/st99_d00.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bdecon/econ_data/ed4003a633b46e718d29e0c3a9c5e28662f2e090/micro/shapefiles/st99_d00.dbf -------------------------------------------------------------------------------- /micro/shapefiles/st99_d00.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bdecon/econ_data/ed4003a633b46e718d29e0c3a9c5e28662f2e090/micro/shapefiles/st99_d00.shp -------------------------------------------------------------------------------- /micro/shapefiles/st99_d00.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bdecon/econ_data/ed4003a633b46e718d29e0c3a9c5e28662f2e090/micro/shapefiles/st99_d00.shx -------------------------------------------------------------------------------- /micro/shapefiles/states.csv: -------------------------------------------------------------------------------- 1 | "State","Abbreviation" 2 | "Alabama","AL" 3 | "Alaska","AK" 4 | "Arizona","AZ" 5 | "Arkansas","AR" 6 | "California","CA" 7 | "Colorado","CO" 8 | "Connecticut","CT" 9 | "Delaware","DE" 10 | "District of Columbia","DC" 11 | "Florida","FL" 12 | "Georgia","GA" 13 | "Hawaii","HI" 14 | "Idaho","ID" 15 | "Illinois","IL" 16 | "Indiana","IN" 17 | "Iowa","IA" 18 | "Kansas","KS" 19 | "Kentucky","KY" 20 | "Louisiana","LA" 21 | "Maine","ME" 22 | "Montana","MT" 23 | "Nebraska","NE" 24 | "Nevada","NV" 25 | "New Hampshire","NH" 26 | "New Jersey","NJ" 27 | "New Mexico","NM" 28 | "New York","NY" 29 | "North Carolina","NC" 30 | "North Dakota","ND" 31 | "Ohio","OH" 32 | "Oklahoma","OK" 33 | "Oregon","OR" 34 | "Maryland","MD" 35 | "Massachusetts","MA" 36 | "Michigan","MI" 37 | "Minnesota","MN" 38 | "Mississippi","MS" 39 | "Missouri","MO" 40 | "Pennsylvania","PA" 41 | "Rhode Island","RI" 42 | "South Carolina","SC" 43 | "South Dakota","SD" 44 | "Tennessee","TN" 45 | "Texas","TX" 46 | "Utah","UT" 47 | "Vermont","VT" 48 | "Virginia","VA" 49 | "Washington","WA" 50 | "West Virginia","WV" 51 | "Wisconsin","WI" 52 | "Wyoming","WY" 53 | -------------------------------------------------------------------------------- /micro/shapefiles/states.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bdecon/econ_data/ed4003a633b46e718d29e0c3a9c5e28662f2e090/micro/shapefiles/states.dbf -------------------------------------------------------------------------------- /micro/shapefiles/states.prj: -------------------------------------------------------------------------------- 1 | GEOGCS["GCS_North_American_1983",DATUM["D_North_American_1983",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]] -------------------------------------------------------------------------------- /micro/shapefiles/states.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bdecon/econ_data/ed4003a633b46e718d29e0c3a9c5e28662f2e090/micro/shapefiles/states.shp -------------------------------------------------------------------------------- /micro/shapefiles/states.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bdecon/econ_data/ed4003a633b46e718d29e0c3a9c5e28662f2e090/micro/shapefiles/states.shx -------------------------------------------------------------------------------- /micro/shapefiles/tl_2013_us_cbsa.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bdecon/econ_data/ed4003a633b46e718d29e0c3a9c5e28662f2e090/micro/shapefiles/tl_2013_us_cbsa.dbf -------------------------------------------------------------------------------- /micro/shapefiles/tl_2013_us_cbsa.prj: -------------------------------------------------------------------------------- 1 | GEOGCS["GCS_North_American_1983",DATUM["D_North_American_1983",SPHEROID["GRS_1980",6378137,298.257222101]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]] -------------------------------------------------------------------------------- /micro/shapefiles/tl_2013_us_cbsa.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bdecon/econ_data/ed4003a633b46e718d29e0c3a9c5e28662f2e090/micro/shapefiles/tl_2013_us_cbsa.shp -------------------------------------------------------------------------------- /micro/shapefiles/tl_2013_us_cbsa.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bdecon/econ_data/ed4003a633b46e718d29e0c3a9c5e28662f2e090/micro/shapefiles/tl_2013_us_cbsa.shx -------------------------------------------------------------------------------- /micro/shapefiles/tl_2013_us_coastline.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bdecon/econ_data/ed4003a633b46e718d29e0c3a9c5e28662f2e090/micro/shapefiles/tl_2013_us_coastline.dbf -------------------------------------------------------------------------------- /micro/shapefiles/tl_2013_us_coastline.prj: -------------------------------------------------------------------------------- 1 | GEOGCS["GCS_North_American_1983",DATUM["D_North_American_1983",SPHEROID["GRS_1980",6378137,298.257222101]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]] -------------------------------------------------------------------------------- /micro/shapefiles/tl_2013_us_coastline.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bdecon/econ_data/ed4003a633b46e718d29e0c3a9c5e28662f2e090/micro/shapefiles/tl_2013_us_coastline.shp -------------------------------------------------------------------------------- /micro/shapefiles/tl_2013_us_coastline.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bdecon/econ_data/ed4003a633b46e718d29e0c3a9c5e28662f2e090/micro/shapefiles/tl_2013_us_coastline.shx -------------------------------------------------------------------------------- /micro/x13as/x13as: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bdecon/econ_data/ed4003a633b46e718d29e0c3a9c5e28662f2e090/micro/x13as/x13as --------------------------------------------------------------------------------