├── .gitattributes
├── README.md
├── SCF_plus
    ├── SCF_plus.dta
    ├── SCF_plus_mini.csv
    ├── generating_mini.md
    └── README.md
└── cross_section
    ├── cities_brazil.csv
    ├── cities_us.csv
    ├── forbes-global2000.csv
    ├── forbes-billionaires.csv
    ├── README.md
    └── webscrape_forbes.ipynb


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.dta filter=lfs diff=lfs merge=lfs -text
2 | *.csv filter=lfs diff=lfs merge=lfs -text
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Data files for High Dimensional Economics
2 | 
3 | This repository collects data sets for the textbook High Dimensional Economics
4 | 


--------------------------------------------------------------------------------
/SCF_plus/SCF_plus.dta:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:c208ccd49b3bd11205a88bce08864ea445898b182098002fd6b5e38664aa3f01
3 | size 103934093
4 | 


--------------------------------------------------------------------------------
/SCF_plus/SCF_plus_mini.csv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:38d4cb6640fcfbd5ee204a27ca2e6a4d5da9a1313a7e4edb9884ccc359ac0e31
3 | size 32853734
4 | 


--------------------------------------------------------------------------------
/cross_section/cities_brazil.csv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:127c77ad01d5c6fc6225222698bee6a0f2fbf9e6dbef9ccff96e6cc92d722e39
3 | size 17888
4 | 


--------------------------------------------------------------------------------
/cross_section/cities_us.csv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:fa831de3a9e318712fc688e62908fb353eb18497b729fc8b6fda17b2653ceebb
3 | size 48126
4 | 


--------------------------------------------------------------------------------
/cross_section/forbes-global2000.csv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:29c7af7c1ee77064094b17327150e27dd3c56147f8e343526681e35bcee3e4c6
3 | size 118381
4 | 


--------------------------------------------------------------------------------
/cross_section/forbes-billionaires.csv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:45618eb7ccab9a132c87005d74bfbd0d18ffb7ff599f619f85083edad0149d63
3 | size 794366
4 | 


--------------------------------------------------------------------------------
/SCF_plus/generating_mini.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | jupytext:
 3 |   text_representation:
 4 |     extension: .md
 5 |     format_name: myst
 6 |     format_version: 0.13
 7 |     jupytext_version: 1.14.1
 8 | kernelspec:
 9 |   display_name: Python 3 (ipykernel)
10 |   language: python
11 |   name: python3
12 | ---
13 | 
14 | 
15 | 
16 | Regarding converting between ``.ipynb`` and ``.md`` please refer to https://manual.quantecon.org/writing/converting.html
17 | 
18 | ```{code-cell} ipython3
19 | import pandas as pd
20 | ```
21 | 
22 | ```{code-cell} ipython3
23 | var_list = ['yearmerge',    # 3-year window
24 |             'ffanw',        # net wealth (ffafin + ffanfin - tdebt)
25 |             'tinc',         # total household income, excluding capital gains
26 |             'incws',        # income from wages, salaries and self-employment
27 |             'wgtI95W95',    # survey weight 
28 |             'ffanwgroups',  # wealth groups
29 |             'tincgroups']   # income groups
30 | ```
31 | 
32 | Rename the variables needed.
33 | 
34 | ```{code-cell} ipython3
35 | var_names_new = 'year', 'n_wealth', 't_income', 'l_income', 'weights', 'nw_groups', 'ti_groups'
36 | ```
37 | 
38 | ```{code-cell} ipython3
39 | df = pd.read_stata('https://github.com/QuantEcon/high_dim_data/blob/main/SCF_plus/SCF_plus.dta?raw=true')
40 | ```
41 | 
42 | ```{code-cell} ipython3
43 | df = df[[*var_list]]
44 | df1=df.astype({'yearmerge': int}).dropna()
45 | df1.columns = var_names_new
46 | ```
47 | 
48 | ```{code-cell} ipython3
49 | df1
50 | ```
51 | 
52 | ```{code-cell} ipython3
53 | df1.to_csv('SCF_plus_mini.csv', index=None)
54 | ```
55 | 
56 | ```{code-cell} ipython3
57 | 
58 | ```
59 | 


--------------------------------------------------------------------------------
/cross_section/README.md:
--------------------------------------------------------------------------------
 1 | ### Datasets information
 2 | 
 3 | Here is an information table for the dataset:
 4 | 
 5 | | name                     | description                                                 | size  | source                                                    | Features                                                     |
 6 | | ------------------------ | ----------------------------------------------------------- | ----- | --------------------------------------------------------- | ------------------------------------------------------------ |
 7 | | cities_brazil.csv        | Population of cities in 2023 Brazil                         | 18kb  | https://worldpopulationreview.com/countries/cities/brazil |                                                              |
 8 | | cities_us.csv            | Population of cities in United States                       | 37kb  | https://worldpopulationreview.com/us-cities               | Including population data in 2023, 2022, 2020 census, 2010 census |
 9 | | forbes-billionaires.csv* | The world's richest 2000 people in 2020 according to Forbes | 790kb | https://www.forbes.com/billionaires/                      |                                                              |
10 | | forbes-global2000.csv*   | The world's largest 2000 firms in 2020 according to Forbes  | 118kb | https://www.forbes.com/lists/global2000/                  | Variables measuring the firm size: sales, profits, assets, market value |
11 | |                          |                                                             |       |                                                           |                                                              |
12 | 
13 | *These two datasets are generated using the file ``webscrape_forbes.ipynb`` with the Forbes API.
14 | 
15 | ### Instructions on fetching the data for use in a lecture
16 | 
17 | There are many different method of fetching and using the data.
18 | 
19 | In ``Python`` environment you can follow:
20 | 
21 | Step 1: find the url code by clicking the file -->> view raw -->> copy the url path
22 | 
23 | Step 2: paste the url path in the following code
24 | 
25 | ```
26 | import pandas as pd
27 | 
28 | url = "<Paste the url path here>"
29 | pd.read_csv(url)                              # for csv files
30 | ```
31 | 
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/SCF_plus/README.md:
--------------------------------------------------------------------------------
 1 | # SCF+ dataset and generating ginis
 2 | 
 3 | ## 1 usage on files
 4 | 
 5 | The ``SCF+`` dataset is saved in ``SCF_plus.dta``.
 6 | - All monetary variables are in 2016 dollars, deflated using the CPI of the same year (note that income variables refer to the previous year).
 7 | - There are 5 implicates per observation due to multiple imputation. The implicates are indicated by the variable "impnum".
 8 | - For detailed variables, please see the following table.
 9 | 
10 | ## 2 overview over ``SCF+`` variables
11 | 
12 | |  variable name  |                            label                             |
13 | | :-------------: | :----------------------------------------------------------: |
14 | |     adults      |                       number of adults                       |
15 | |      ageh       |                         age of head                          |
16 | |    agehgroup    |                          age group                           |
17 | |     blackh      |                    whether head is black                     |
18 | |       bnd       |                            bonds                             |
19 | |     ccdebt      |                       credit card debt                       |
20 | |      cerde      |                   certificates of deposit                    |
21 | |    children     |                      number of children                      |
22 | |    collegeh     |       whether head has attained at least some college        |
23 | |       CPI       |                     consumer price index                     |
24 | |     ffaass      |                         total assets                         |
25 | |     ffabus      |                       business wealth                        |
26 | |     ffaequ      |               equity and other managed assets                |
27 | |     ffafin      | financial assets (ffaequ, liqcer, bnd, mfun, ofin, life, pen) |
28 | |     ffanfin     |   non-financial assets (ffabus, house, oest, vehi, onfin)    |
29 | |      ffanw      |            net wealth (ffafin + ffanfin - tdebt)             |
30 | |   ffanwgroups   |                        wealth groups                         |
31 | |      hdebt      |          housing debt on owner-occupied real estate          |
32 | |     hhequiv     |                    OECD equivalence scale                    |
33 | |   highsample    |           indicator for high-income sample in 1983           |
34 | |      house      |                     asset value of house                     |
35 | | housing_rent_yd |       housing rental yield from Macrohistory Database        |
36 | |       id        |                         household id                         |
37 | |     impnum      |                 imputation implicate number                  |
38 | |     inccap      |                        capital income                        |
39 | |    inctrans     |                       transfer income                        |
40 | |      incws      |       income from wages, salaries and self-employment        |
41 | |     incwsse     |                income from wages and salaries                |
42 | |      life       |                    life insurance assets                     |
43 | |       liq       |                        liquid assets                         |
44 | |     liqcer      |          liquid assets and certificates of deposit           |
45 | |      mfun       |                         mutual funds                         |
46 | | moneymarketacc  |                    money market accounts                     |
47 | |      oest       |               other real estate (net position)               |
48 | |    oestdebt     |                    other real estate debt                    |
49 | |      ofin       |                    other financial assets                    |
50 | |      onfin      |                  other non-financial assets                  |
51 | |     othdebt     |                          other debt                          |
52 | |       PCE       |           personal consumption expenditures index            |
53 | |      pdebt      |                        personal debt                         |
54 | |       pen       |                           pensions                           |
55 | |     prepaid     |                        prepaid cards                         |
56 | |      raceh      |                         race of head                         |
57 | |     savbnd      |                        savings bonds                         |
58 | |      tdebt      | total household debt (excluding other real estate debt, i.e. hdebt + pdebt) |
59 | |      tinc       |       total household income, excluding capital gains        |
60 | |   tincgroups    |                        income groups                         |
61 | |      vehi       |                           vehicles                           |
62 | |       wgt       |                      unadjusted weight                       |
63 | |    wgtI95W95    |                        survey weight                         |
64 | |      year       |                             year                             |
65 | |    yearmerge    |                        3-year window                         |
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/cross_section/webscrape_forbes.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# parse_forbeslists\n",
  8 |     "\n",
  9 |     "This notebook \n",
 10 |     "- parses Forbes richest lists and Forbes global 2000 list and\n",
 11 |     "- saves them as csv files."
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 1,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "import requests\n",
 21 |     "import pandas as pd\n",
 22 |     "from pathlib import Path\n",
 23 |     "from pandas import DataFrame"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 2,
 29 |    "metadata": {},
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "# Forbes lists\n",
 33 |     "lists = [ \n",
 34 |     "    { 'type': 'person', 'year': 2020, 'uri': 'billionaires' },                  # World richest\n",
 35 |     "    # { 'type': 'person', 'year': 2020, 'uri': 'forbes-400' },                  # American richest 400\n",
 36 |     "    # { 'type': 'person', 'year': 2020, 'uri': 'hong-kong-billionaires' },      # Hong Kong richest 50\n",
 37 |     "    # { 'type': 'person', 'year': 2020, 'uri': 'australia-billionaires' },      # Australia richest 50\n",
 38 |     "    # { 'type': 'person', 'year': 2020, 'uri': 'china-billionaires' },          # China richest 400\n",
 39 |     "    # { 'type': 'person', 'year': 2020, 'uri': 'taiwan-billionaires' },         # Taiwan richest 50\n",
 40 |     "    # { 'type': 'person', 'year': 2020, 'uri': 'india-billionaires' },          # India richest 100\n",
 41 |     "    # { 'type': 'person', 'year': 2020, 'uri': 'japan-billionaires' },          # Japan richest 50\n",
 42 |     "    # { 'type': 'person', 'year': 2020, 'uri': 'africa-billionaires' },         # Africa richest 50\n",
 43 |     "    # { 'type': 'person', 'year': 2020, 'uri': 'korea-billionaires' },          # Korea richest 50\n",
 44 |     "    # { 'type': 'person', 'year': 2020, 'uri': 'malaysia-billionaires' },       # Malaysia richest 50\n",
 45 |     "    # { 'type': 'person', 'year': 2020, 'uri': 'philippines-billionaires' },    # Philippines richest 50\n",
 46 |     "    # { 'type': 'person', 'year': 2020, 'uri': 'singapore-billionaires' },      # Singapore richest 50\n",
 47 |     "    # { 'type': 'person', 'year': 2020, 'uri': 'indonesia-billionaires' },      # Indonesia richest 50\n",
 48 |     "    # { 'type': 'person', 'year': 2020, 'uri': 'thailand-billionaires' },       # Thailand richest 50\n",
 49 |     "    # { 'type': 'person', 'year': 2020, 'uri': 'self-made-women' },             # American richest self-made women\n",
 50 |     "    # { 'type': 'person', 'year': 2018, 'uri': 'richest-in-tech' },             # tech richest\n",
 51 |     "    # { 'type': 'person', 'year': 2020, 'uri': 'hedge-fund-managers' },         # hedge fund highest-earning\n",
 52 |     "    # { 'type': 'person', 'year': 2020, 'uri': 'powerful-people' },             # world powerful\n",
 53 |     "    # { 'type': 'person', 'year': 2020, 'uri': 'power-women' },                 # world powerful women\n",
 54 |     "    # { 'type': 'person', 'year': 0, 'uri': 'rtb' },                            # real-time world billionaires\n",
 55 |     "    # { 'type': 'person', 'year': 0, 'uri': 'rtrl' },                           # real-time American richest 400\n",
 56 |     "]\n",
 57 |     "\n",
 58 |     "url = 'http://www.forbes.com/ajax/list/data'\n",
 59 |     "SOURCES_DIR = Path('./sources')\n",
 60 |     "\n",
 61 |     "for forbes_list in lists:\n",
 62 |     "    response = requests.get(url, params=forbes_list)\n",
 63 |     "\n",
 64 |     "    if not SOURCES_DIR.exists():\n",
 65 |     "        SOURCES_DIR.mkdir(exist_ok=True, parents=True)\n",
 66 |     "\n",
 67 |     "    DataFrame(response.json()).to_csv('forbes-{}.csv'.format(forbes_list['uri']))"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "Then Forbes Global 2000 for the largest 2000 firms globally."
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 3,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "headers = {\n",
 84 |     "    \"accept\": \"application/json, text/plain, */*\",\n",
 85 |     "    \"referer\": \"https://www.forbes.com/global2000/\",\n",
 86 |     "    \"user-agent\": \"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.67 Safari/537.36\",\n",
 87 |     "}\n",
 88 |     "\n",
 89 |     "cookies = {\n",
 90 |     "    \"notice_behavior\": \"expressed,eu\",\n",
 91 |     "    \"notice_gdpr_prefs\": \"0,1,2:1a8b5228dd7ff0717196863a5d28ce6c\",\n",
 92 |     "}\n",
 93 |     "\n",
 94 |     "api_url = \"https://www.forbes.com/forbesapi/org/global2000/2020/position/true.json?limit=2000\"\n",
 95 |     "response = requests.get(api_url, headers=headers, cookies=cookies).json()\n",
 96 |     "\n",
 97 |     "sample_table = [\n",
 98 |     "    [\n",
 99 |     "        item[\"organizationName\"],\n",
100 |     "        item[\"country\"],\n",
101 |     "        item[\"revenue\"],\n",
102 |     "        item[\"profits\"],\n",
103 |     "        item[\"assets\"],\n",
104 |     "        item[\"marketValue\"]\n",
105 |     "    ] for item in\n",
106 |     "    sorted(response[\"organizationList\"][\"organizationsLists\"], key=lambda k: k[\"position\"])\n",
107 |     "]\n",
108 |     "\n",
109 |     "dfff = pd.DataFrame(sample_table, columns=[\"Company\", \"Country\", \"Sales\", \"Profits\", \"Assets\", \"Market Value\"])\n",
110 |     "dfff.to_csv('forbes-global2000.csv')"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": null,
116 |    "metadata": {},
117 |    "outputs": [],
118 |    "source": []
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": null,
123 |    "metadata": {},
124 |    "outputs": [],
125 |    "source": []
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": null,
130 |    "metadata": {},
131 |    "outputs": [],
132 |    "source": []
133 |   }
134 |  ],
135 |  "metadata": {
136 |   "kernelspec": {
137 |    "display_name": "Python 3",
138 |    "language": "python",
139 |    "name": "python3"
140 |   },
141 |   "language_info": {
142 |    "codemirror_mode": {
143 |     "name": "ipython",
144 |     "version": 3
145 |    },
146 |    "file_extension": ".py",
147 |    "mimetype": "text/x-python",
148 |    "name": "python",
149 |    "nbconvert_exporter": "python",
150 |    "pygments_lexer": "ipython3",
151 |    "version": "3.10.9"
152 |   }
153 |  },
154 |  "nbformat": 4,
155 |  "nbformat_minor": 5
156 | }
157 | 


--------------------------------------------------------------------------------