├── .gitattributes ├── .gitignore ├── LICENSE ├── README.html ├── README.md ├── README.pdf ├── code ├── 1_Download_WRDS_Data.py ├── 2_Process_WRDS_Data.py ├── 3_Calculate_Kappas.py ├── firminfo.py ├── investors.py ├── kappas.py ├── our_plot_config.py ├── plots10_kappa_comparison_appendix.py ├── plots11_profit_simulations.py ├── plots1_basic_descriptives.py ├── plots2_kappa_official.py ├── plots3_big_three_four.py ├── plots4_investor_similarity.py ├── plots5_airlines_cereal.py ├── plots6_sole_vs_shared.py ├── plots7_short_interest_coverage.py ├── plots8_individual_firm_coverage.py ├── plots9_blackrock_vanguard.py ├── table3_variance_decomp.py ├── table4_kappa_correlation.py ├── utilities │ ├── date_util.py │ ├── matlab_util.py │ └── quantiles.py ├── wrds_checks.py ├── wrds_cleaning.py └── wrds_downloads.py ├── data ├── checks │ └── .keep ├── derived │ └── .keep ├── public │ ├── .gitattributes │ ├── .keep │ ├── DLE_markups_fig_v2.csv │ ├── airlines.parquet │ ├── big4.csv │ ├── cereal.parquet │ ├── manager_consolidations.csv │ ├── out_scrape.parquet │ └── permno_drops.csv └── wrds │ └── .keep ├── figures └── .keep ├── requirements.txt ├── run_all.bat ├── run_all.sh ├── tables └── .keep └── wrds_constituents.pdf /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # celery beat schedule file 95 | celerybeat-schedule 96 | 97 | # SageMath parsed files 98 | *.sage.py 99 | 100 | # Environments 101 | .env 102 | .venv 103 | env/ 104 | venv/ 105 | ENV/ 106 | env.bak/ 107 | venv.bak/ 108 | 109 | # Spyder project settings 110 | .spyderproject 111 | .spyproject 112 | 113 | # Rope project settings 114 | .ropeproject 115 | 116 | # mkdocs documentation 117 | /site 118 | 119 | # mypy 120 | .mypy_cache/ 121 | .dmypy.json 122 | dmypy.json 123 | 124 | # Pyre type checker 125 | .pyre/ 126 | 127 | # Project Data files 128 | *.parquet 129 | !airlines.parquet 130 | !cereal.parquet 131 | 132 | *.xlsx 133 | *.pickle 134 | markup-simulations.csv 135 | 136 | # Tex Files 137 | *.tex 138 | *.aux 139 | *.log 140 | 141 | # Figures 142 | *.pdf 143 | !README.pdf 144 | !wrds_constituents.pdf 145 | 146 | # Mac Garbage 147 | .DS_Store 148 | code.zip 149 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 chrisconlon 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.html: -------------------------------------------------------------------------------- 1 | README

Replication Instructions for: Common Ownership in America: 1980-2017

1059 |

Backus, Conlon and Sinkinson (2020) 1060 | AEJMicro-2019-0389 1061 | openicpsr-120083 1062 | A copy of the paper is here: https://chrisconlon.github.io/site/common_owner.pdf

1063 |

Open ICPSR Install Instructions

1064 |
    1065 |
  1. Download and unzip the repository.
  2. 1066 |
  3. All required files are included or are downloaded programatically from WRDS (see notes below).
  4. 1067 |
1068 |

Github Install Instructions

1069 |

To download the repo simply type:

1070 |
git clone https://github.com/chrisconlon/CommonOwnerReplication
1071 | 
1072 | 1073 | 1074 |

You will need to have the git large file storage extension installed. (Which you probably do not).

1075 |

To install this extension follow the directions at: 1076 | https://git-lfs.github.com

1077 |

Dataset Size and Memory

1078 |
    1079 |
  1. We recommend that you have at least 64GB of RAM available.
  2. 1080 |
  3. All of the datasets saved will take up about 14 GB of drive space.
  4. 1081 |
  5. NumPy is used extensively for the calculations and is multithreaded (so more cores will help).
  6. 1082 |
  7. The computation of the $\kappa_{fg}$ terms is parallelized quarter by quarter explicitly (so cores will help a lot here).
  8. 1083 |
  9. But most of the time spent is in merging and filtering data in pandas (more cores don’t help much).
  10. 1084 |
  11. Total runtime on a 2015 iMac with 64GB of RAM is around 3 hours.
  12. 1085 |
  13. WRDS download time is about an hour (Depends on internet speed) and total download is > 10GB.
  14. 1086 |
1087 |

Downloading from WRDS

1088 |

User must provide their own WRDS account. User will be prompted for WRDS username and password in file 1_Download_WRDS_Data.py.

1089 |

To request an account, please visit: 1090 | https://wrds-www.wharton.upenn.edu/register/

1091 |

If you do not have API access, you will need to consult the wrds_constituents.pdf document for instructions on using the WRDS web interface. This is strongly NOT RECOMMENDED. Because you cannot apply complex filters to the SQL queries as we do programatically, you will also need much more disk space (on the order of a Terabyte to save the entire Thomson-Reuters s34 13f database.)

1092 |

If you are running this on a batch job (not interactively) such as on a HPC cluster you will need to pre-enter your WRDS password by creating a pgpass file.

1093 |

As an example:

1094 |
    import wrds
1095 |     db = wrds.Connection(wrds_username='joe')
1096 |     db.create_pgpass_file()
1097 | 
1098 | 1099 |

If you encounter a problem, it might be that your pgpass file is not accessible by your batch job.

1100 |

For more information please see: https://wrds-www.wharton.upenn.edu/pages/support/programming-wrds/programming-python/python-from-your-computer/ 1101 | for more details.

1102 |

Python dependencies

1103 |

Our run_all.sh bash script should install all of the required python dependencies (assuming python itself is installed correctly and you have necessary acces to install packages).

1104 |

To install those dependencies manually (such as on a shared server) you may need to do the following.

1105 |

Python (version 3.8 or above) - install dependencies with

1106 |
pip3 install -r requirements.txt
1107 | 
1108 | numpy, pandas, matplotlib, pyarrow, brotli, seaborn, wrds, scikit-learn, pyhdfe, pyblp, statsmodels
1109 | 
1110 | 1111 | 1112 |

We anticipate most users will be running this replication package from within an Anaconda environment. To avoid making changes to your base environment you will want to create a separate environment for this replication package. To do that

1113 |
    conda create --name common_owner --file requirements.txt
1114 |     conda activate common_owner
1115 | 
1116 | 1117 |

How to run the code

1118 |

Change to the directory containing this file and run “./run_all.sh” on the terminal. The code should take approximately 3-10 hours to run. Tables and figures will be produced as described below.

1119 |
    cd code
1120 |     ./runall.sh
1121 | 
1122 | 1123 |

Windows Warning

1124 |

Windows Users: instead use “run_all.bat” from the command prompt.

1125 |

There are known conflicts between Windows 10 and core Python DLL’s in versions < 3.7.3. If you are running on Windows 10, all Python programs will run best with Python 3.8 or later (see: https://bugs.python.org/issue35797).

1126 |

File of origin for tables and figures

1127 | 1128 | 1129 | 1130 | 1131 | 1132 | 1133 | 1134 | 1135 | 1136 | 1137 | 1138 | 1139 | 1140 | 1141 | 1142 | 1143 | 1144 | 1145 | 1146 | 1147 | 1148 | 1149 | 1150 | 1151 | 1152 | 1153 | 1154 | 1155 | 1156 | 1157 | 1158 | 1159 | 1160 | 1161 | 1162 | 1163 | 1164 | 1165 | 1166 | 1167 | 1168 | 1169 | 1170 | 1171 | 1172 | 1173 | 1174 | 1175 | 1176 | 1177 | 1178 | 1179 | 1180 | 1181 | 1182 | 1183 | 1184 | 1185 | 1186 | 1187 | 1188 | 1189 | 1190 | 1191 | 1192 | 1193 | 1194 | 1195 | 1196 | 1197 | 1198 | 1199 | 1200 | 1201 | 1202 | 1203 | 1204 | 1205 | 1206 | 1207 | 1208 | 1209 | 1210 | 1211 | 1212 | 1213 | 1214 | 1215 | 1216 | 1217 | 1218 | 1219 | 1220 | 1221 | 1222 | 1223 | 1224 | 1225 | 1226 | 1227 | 1228 | 1229 | 1230 | 1231 | 1232 | 1233 | 1234 | 1235 | 1236 | 1237 | 1238 | 1239 | 1240 | 1241 | 1242 | 1243 | 1244 | 1245 | 1246 | 1247 | 1248 | 1249 | 1250 | 1251 | 1252 |
Table/Figure NumberGenerating File
Table 1(by hand)
Table 2(by hand)
Table 3table3_variance_decomp.py
Table 4table4_kappa_correlations.py
Figure 1plots2_kappa_official.py
Figure 2plots1_basic_descriptives.py
Figure 3plots1_basic_descriptives.py
Figure 4plots1_basic_descriptives.py
Figure 5plots3_big_three_four.py
Figure 6plots2_kappa_official.py
Figure 7plots2_kappa_official.py
Figure 8plots5_investor_similarity.py
Figure 9plots2_kappa_official.py
Figure 10plots11_profit_simulations.py
Figure 11plots11_profit_simulations.py
Figure 12plots9_blackrock_vanguard.py
Figure 13plots2_kappa_official.py
Figure 14plots2_kappa_official.py
Figure 15plots2_kappa_official.py
Figure 16plots5_airlines_cereal.py
Figure 17plots6_sole_vs_shared.py
Figure A1plots1_basic_descriptives.py
Figure A2plots8_individual_firm_coverage.py
Figure A3plots10_kappa_comparison_appendix.py
Figure A4plots7_short_interest_coverage.py
Figure A5plots7_short_interest_coverage.py
Figure A6plots2_kappa_official.py
Figure A7plots2_kappa_official.py
Figure A8plots5_investor_similarity.py
1253 |

Within-File Dependencies:

1254 |

1_Download_WRDS_Data.py:

1255 |
wrds_downloads
1256 | 
1257 | 1258 | 1259 |

2_Process_WRDS_Data.py

1260 |
wrds_cleaning
1261 | wrds_checks
1262 | 
1263 | 1264 | 1265 |

3_Calculate_Kappas.py

1266 |
kappas
1267 | investors
1268 | firminfo
1269 | utilities/quantiles
1270 | 
1271 | 1272 | 1273 |

plots3_big_three_four.py:

1274 |
kappas
1275 | investors
1276 | 
1277 | 1278 | 1279 |

plots5_airlines_cereal.py:

1280 |
kappas
1281 | 
1282 | 1283 | 1284 |

plots9_blackrock_vanguard.py:

1285 |
kappas
1286 | 
1287 | 1288 | 1289 |

plots10_kappa_comparison_appendix.py:

1290 |
utilities/matlab_util
1291 | 
1292 | 1293 | 1294 |

Files Provided and Data Access Statements

1295 |

WRDS

1296 |

We use several data sources from WRDS. These are accessed programatically through the WRDS API and we are not able to include individual files in this replication package. (See terms: https://wrds-www.wharton.upenn.edu/users/tou/).

1297 |

They include: 1298 | A. CRSP: data on securities prices and shares outstanding; list of S&P 500 constituents. 1299 | B. Compustat: business fundamentals, short interest, business segment info. 1300 | C. Thomson-Reuters: s34 database of 13f filings/ownership.

1301 |

Author Constructed files

1302 |

data/public:

1303 |

The below files are publicly available csv’s constructed by the authors. These are drops, consolidations, and manager identifiers that are used in our project. They are distributed with this code package.

1304 |
    1305 |
  1. manager_consolidations.csv: lists consolidated manager numbers: several manager actually correspond to one
  2. 1306 |
  3. permno_drops.csv: lists dropped permno IDs with reasons why they are dropped
  4. 1307 |
  5. big4.csv: lists manager Numbers for Blackrock, Fidelity, State Street, and Vanguard
  6. 1308 |
1309 |

The markups from from DLEU 2020 can be reproduced by running the replication package:

1310 |

DeLoecker Eeckhout Unger Markups

1311 |
    1312 |
  1. DLE_markups_fig_v2.csv: markups from Figure 10 of DeLoecker Eeckhout Unger (QJE 2020)
  2. 1313 |
1314 |

De Loecker, Jan; Eeckhout, Jan; Unger, Gabriel, 2020, 1315 | “Replication Data for: ‘The Rise of Market Power and the Macroeconomic Implications’“, https://doi.org/10.7910/DVN/5GH8XO, Harvard Dataverse, V1

1316 |

That replication package requires access to WRDS. A subset of the markups (and no additional data) are being made publicly available here.

1317 |

Scraped 13f filings

1318 |

The original source data are the publicly available SEC 13f filing data from EDGAR: https://www.sec.gov/edgar/searchedgar/companysearch.html

1319 |

Most users instead access the Thomson-Reuters S34 database from WRDS (as our script above does). We’ve also scraped the original source documents from EDGAR and compiled them into an easy to use format. We provide the entire universe of 13f filings as a separate dataset. For the purposes of replicating this paper, we use three smaller extracts as parquet files:

1320 |
    1321 |
  1. cereal.parquet: extract 13F Filings for firms within the cereal industry (includes small cap)
  2. 1322 |
  3. airlines.parquet: extract 13F Filings for firms within the airline industry (includes small cap)
  4. 1323 |
  5. out_scrape.parquet: extract 13F Filings for LARGE cap firms (a superset of the S&P 500) from 1999-2017 (300MB).
  6. 1324 |
1325 |

Each file contains: 1326 | - 13f filings going back to 1999 and end in late 2017 (Data period for this paper).

1327 |

The full set of scraped 13f filings and a detailed description of how extracts were created are available in two places:

1328 |
    1329 |
  1. 1330 |

    The live version of the 13f scraping project is https://sites.google.com/view/msinkinson/research/common-ownership-data?

    1331 |
  2. 1332 |
  3. 1333 |

    The permanent archived version (including these extracts) is available to the public at Harvard Dataverse (doi:10.7910/DVN/ZRH3EU): 1334 | https://doi.org/10.7910/DVN/ZRH3EU

    1335 |
  4. 1336 |
1337 |

Backus, Matthew; Conlon, Christopher T; Sinkinson, Michael; 2020, “Common Ownership Data: Scraped SEC form 13F filings for 1999-2017”, https://doi.org/10.7910/DVN/ZRH3EU, Harvard Dataverse, V1.1

1338 |

Description of .parquet file format

1339 |

We use the parquet format for:

1340 | 1344 |

Parquet files are compressed columnar storage binaries that are readable by several software packages (R, Python, Stata, Julia, C++, etc.) and platforms. The goal of the parquet project is to maintain good performance for large datasets as well as interoperability.

1345 |

The storage method is stable and maintained by the Apache Foundation. 1346 | https://parquet.apache.org/documentation/latest/

1347 |

We use the python package “pyarrow” to read parquets and the package “brotli” for compression (listed in the requirements.txt).

-------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Replication Instructions for: Common Ownership in America: 1980-2017 2 | Backus, Conlon and Sinkinson (2020) 3 | AEJMicro-2019-0389 4 | openicpsr-120083 5 | A copy of the paper is here: https://chrisconlon.github.io/site/common_owner.pdf 6 | 7 | 8 | ### Open ICPSR Install Instructions 9 | 1. Download and unzip the repository. 10 | 2. All required files are included or are downloaded programatically from WRDS (see notes below). 11 | 12 | ### Github Install Instructions 13 | To download the repo simply type: 14 | 15 | git clone https://github.com/chrisconlon/CommonOwnerReplication 16 | 17 | You will need to have the git large file storage extension installed. (Which you probably do not). 18 | 19 | To install this extension follow the directions at: 20 | https://git-lfs.github.com 21 | 22 | ### Dataset Size and Memory 23 | 1. We recommend that you have at least 64GB of RAM available. 24 | 2. All of the datasets saved will take up about 14 GB of drive space. 25 | 3. NumPy is used extensively for the calculations and is multithreaded (so more cores will help). 26 | 4. The computation of the $\kappa_{fg}$ terms is parallelized quarter by quarter explicitly (so cores will help a lot here). 27 | 5. But most of the time spent is in merging and filtering data in pandas (more cores don't help much). 28 | 5. Total runtime on a 2015 iMac with 64GB of RAM is around 3 hours. 29 | 6. WRDS download time is about an hour (Depends on internet speed) and total download is > 10GB. 30 | 31 | ### Downloading from WRDS 32 | User must provide their own WRDS account. User will be prompted for WRDS username and password in file 1_Download_WRDS_Data.py. 33 | 34 | To request an account, please visit: 35 | https://wrds-www.wharton.upenn.edu/register/ 36 | 37 | If you do not have API access, you will need to consult the wrds_constituents.pdf document for instructions on using the WRDS web interface. This is strongly NOT RECOMMENDED. Because you cannot apply complex filters to the SQL queries as we do programatically, you will also need much more disk space (on the order of a Terabyte to save the entire Thomson-Reuters s34 13f database.) 38 | 39 | If you are running this on a batch job (not interactively) such as on a HPC cluster you will need to pre-enter your WRDS password by creating a pgpass file. 40 | 41 | As an example: 42 | 43 | ``` 44 | import wrds 45 | db = wrds.Connection(wrds_username='joe') 46 | db.create_pgpass_file() 47 | ``` 48 | 49 | If you encounter a problem, it might be that your pgpass file is not accessible by your batch job. 50 | 51 | For more information please see: [https://wrds-www.wharton.upenn.edu/pages/support/programming-wrds/programming-python/python-from-your-computer/](https://wrds-www.wharton.upenn.edu/pages/support/programming-wrds/programming-python/python-from-your-computer/) 52 | for more details. 53 | 54 | ### Python dependencies 55 | Our run_all.sh bash script should install all of the required python dependencies (assuming python itself is installed correctly and you have necessary acces to install packages). 56 | 57 | To install those dependencies manually (such as on a shared server) you may need to do the following. 58 | 59 | Python (version 3.8 or above) - install dependencies with 60 | 61 | pip3 install -r requirements.txt 62 | 63 | numpy, pandas, matplotlib, pyarrow, brotli, seaborn, wrds, scikit-learn, pyhdfe, pyblp, statsmodels 64 | 65 | We anticipate most users will be running this replication package from within an Anaconda environment. To avoid making changes to your base environment you will want to create a separate environment for this replication package. To do that 66 | 67 | ``` 68 | conda create --name common_owner --file requirements.txt 69 | conda activate common_owner 70 | ``` 71 | 72 | ## How to run the code 73 | Change to the directory containing this file and run "./run_all.sh" on the terminal. The code should take approximately 3-10 hours to run. Tables and figures will be produced as described below. 74 | 75 | ``` 76 | cd code 77 | ./runall.sh 78 | ``` 79 | 80 | ### Windows Warning 81 | Windows Users: instead use "run_all.bat" from the command prompt. 82 | 83 | There are known conflicts between Windows 10 and core Python DLL's in versions < 3.7.3. If you are running on Windows 10, all Python programs will run best with Python 3.8 or later (see: https://bugs.python.org/issue35797). 84 | 85 | 86 | ## File of origin for tables and figures 87 | 88 | | Table/Figure Number | Generating File | 89 | | --- |---| 90 | | Table 1 | (by hand) | 91 | | Table 2 | (by hand) | 92 | | Table 3 | table3_variance_decomp.py | 93 | | Table 4 | table4_kappa_correlations.py | 94 | | Figure 1 | plots2_kappa_official.py | 95 | | Figure 2 | plots1_basic_descriptives.py | 96 | | Figure 3 | plots1_basic_descriptives.py | 97 | | Figure 4 | plots1_basic_descriptives.py | 98 | | Figure 5 | plots3_big_three_four.py | 99 | | Figure 6 | plots2_kappa_official.py | 100 | | Figure 7 | plots2_kappa_official.py | 101 | | Figure 8 | plots5_investor_similarity.py | 102 | | Figure 9 | plots2_kappa_official.py | 103 | | Figure 10 | plots11_profit_simulations.py | 104 | | Figure 11 | plots11_profit_simulations.py | 105 | | Figure 12 | plots9_blackrock_vanguard.py | 106 | | Figure 13 | plots2_kappa_official.py | 107 | | Figure 14 | plots2_kappa_official.py | 108 | | Figure 15 | plots2_kappa_official.py | 109 | | Figure 16 | plots5_airlines_cereal.py | 110 | | Figure 17 | plots6_sole_vs_shared.py | 111 | | Figure A1 | plots1_basic_descriptives.py | 112 | | Figure A2 | plots8_individual_firm_coverage.py | 113 | | Figure A3 | plots10_kappa_comparison_appendix.py | 114 | | Figure A4 | plots7_short_interest_coverage.py | 115 | | Figure A5 | plots7_short_interest_coverage.py | 116 | | Figure A6 | plots2_kappa_official.py | 117 | | Figure A7 | plots2_kappa_official.py | 118 | | Figure A8 | plots5_investor_similarity.py | 119 | 120 | 121 | ## Within-File Dependencies: 122 | 1_Download_WRDS_Data.py: 123 | 124 | wrds_downloads 125 | 126 | 2_Process_WRDS_Data.py 127 | 128 | wrds_cleaning 129 | wrds_checks 130 | 131 | 3_Calculate_Kappas.py 132 | 133 | kappas 134 | investors 135 | firminfo 136 | utilities/quantiles 137 | 138 | plots3_big_three_four.py: 139 | 140 | kappas 141 | investors 142 | 143 | plots5_airlines_cereal.py: 144 | 145 | kappas 146 | 147 | plots9_blackrock_vanguard.py: 148 | 149 | kappas 150 | 151 | plots10_kappa_comparison_appendix.py: 152 | 153 | utilities/matlab_util 154 | 155 | 156 | ## Files Provided and Data Access Statements 157 | 158 | ### WRDS 159 | 160 | We use several data sources from WRDS. These are accessed programatically through the WRDS API and we are not able to include individual files in this replication package. (See terms: https://wrds-www.wharton.upenn.edu/users/tou/). 161 | 162 | They include: 163 | A. CRSP: data on securities prices and shares outstanding; list of S&P 500 constituents. 164 | B. Compustat: business fundamentals, short interest, business segment info. 165 | C. Thomson-Reuters: s34 database of 13f filings/ownership. 166 | 167 | ### Author Constructed files 168 | data/public: 169 | 170 | The below files are publicly available csv's constructed by the authors. These are drops, consolidations, and manager identifiers that are used in our project. They are distributed with this code package. 171 | 172 | 1. manager_consolidations.csv: lists consolidated manager numbers: several manager actually correspond to one 173 | 2. permno_drops.csv: lists dropped permno IDs with reasons why they are dropped 174 | 3. big4.csv: lists manager Numbers for Blackrock, Fidelity, State Street, and Vanguard 175 | 176 | The markups from from DLEU 2020 can be reproduced by running the replication package: 177 | 178 | ### DeLoecker Eeckhout Unger Markups 179 | 4. DLE_markups_fig_v2.csv: markups from Figure 10 of DeLoecker Eeckhout Unger (QJE 2020) 180 | 181 | De Loecker, Jan; Eeckhout, Jan; Unger, Gabriel, 2020, 182 | "Replication Data for: 'The Rise of Market Power and the Macroeconomic Implications'", https://doi.org/10.7910/DVN/5GH8XO, Harvard Dataverse, V1 183 | 184 | That replication package requires access to WRDS. A subset of the markups (and no additional data) are being made publicly available here. 185 | 186 | ### Scraped 13f filings 187 | The original source data are the publicly available SEC 13f filing data from EDGAR: https://www.sec.gov/edgar/searchedgar/companysearch.html 188 | 189 | Most users instead access the Thomson-Reuters S34 database from WRDS (as our script above does). We've also scraped the original source documents from EDGAR and compiled them into an easy to use format. We provide the entire universe of 13f filings as a separate dataset. For the purposes of replicating this paper, we use three smaller extracts as parquet files: 190 | 191 | 5. cereal.parquet: extract 13F Filings for firms within the cereal industry (includes small cap) 192 | 6. airlines.parquet: extract 13F Filings for firms within the airline industry (includes small cap) 193 | 7. out_scrape.parquet: extract 13F Filings for LARGE cap firms (a superset of the S&P 500) from 1999-2017 (300MB). 194 | 195 | Each file contains: 196 | - 13f filings going back to 1999 and end in late 2017 (Data period for this paper). 197 | 198 | The full set of scraped 13f filings and a detailed description of how extracts were created are available in two places: 199 | 200 | 1. The live version of the 13f scraping project is [https://sites.google.com/view/msinkinson/research/common-ownership-data?](https://sites.google.com/view/msinkinson/research/common-ownership-data?) 201 | 202 | 2. The permanent archived version (including these extracts) is available to the public at Harvard Dataverse (doi:10.7910/DVN/ZRH3EU): 203 | https://doi.org/10.7910/DVN/ZRH3EU 204 | 205 | Backus, Matthew; Conlon, Christopher T; Sinkinson, Michael; 2020, "Common Ownership Data: Scraped SEC form 13F filings for 1999-2017", https://doi.org/10.7910/DVN/ZRH3EU, Harvard Dataverse, V1.1 206 | 207 | 208 | ### Description of .parquet file format 209 | We use the parquet format for: 210 | 211 | - Large data inputs (above) 212 | - Most intermediary datasets 213 | 214 | Parquet files are compressed columnar storage binaries that are readable by several software packages (R, Python, Stata, Julia, C++, etc.) and platforms. The goal of the parquet project is to maintain good performance for large datasets as well as interoperability. 215 | 216 | The storage method is stable and maintained by the Apache Foundation. 217 | https://parquet.apache.org/documentation/latest/ 218 | 219 | We use the python package "pyarrow" to read parquets and the package "brotli" for compression (listed in the requirements.txt). 220 | -------------------------------------------------------------------------------- /README.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisconlon/CommonOwnerReplication/c430d4c174bed9e2d4a01cec1582ce7841781c0f/README.pdf -------------------------------------------------------------------------------- /code/1_Download_WRDS_Data.py: -------------------------------------------------------------------------------- 1 | # Step 1: Download Data from WRDS 2 | # Note: you will need a WRDS account for wrds.Connection() to work 3 | import pandas as pd 4 | import wrds 5 | from our_plot_config import wrds_dir 6 | from wrds_downloads import clean_wrds, get_names, get_crosswalk 7 | from wrds_downloads import get_fundamentals, get_short_interest 8 | from wrds_downloads import get_segments, get_msf, get_s34 9 | 10 | # raw data pulls -- save in "WRDS" directory 11 | f_raw_s34 = wrds_dir / 'raw_s34.parquet' 12 | f_splist = wrds_dir / 'sp500_list.parquet' 13 | f_crsp_names = wrds_dir / 'crsp_names.parquet' 14 | f_msf_data = wrds_dir / 'crsp_msf.parquet' 15 | f_short = wrds_dir / 'short_interest.parquet' 16 | f_fundamentals = wrds_dir / 'fundamentals_data.parquet' 17 | f_segments = wrds_dir / 'wrds_segments.parquet' 18 | f_managers = wrds_dir / 'manager_list.parquet' 19 | f_managers_all = wrds_dir / 'manager_list_all.parquet' 20 | f_names = wrds_dir / 'all_names.parquet' 21 | 22 | # Pull the Data from WRDS ~20 min (ENTIRE FILE) 23 | # This file requires about 48GB of RAM available 24 | 25 | db = wrds.Connection() 26 | 27 | # Pull the ID/Crosswalk Tables 28 | # - Pull the S&P 500 Constituents List (CRSP) 29 | # - Pull the "names" file: this maps permno to CUSIP, and NCUSIP (current period), and SIC code by date 30 | # - Pull the Compustat link file : Construct a unique mapping from gvkey (Compustat) to Permno (CRSP) 31 | # - Save the raw (un-filtered by time or S&P membership) files 32 | 33 | # This block is < 1m 34 | df_sp500 = clean_wrds(db.get_table('crsp', 'DSP500LIST')) 35 | df_sp500.to_parquet(f_splist) 36 | print("First File Done: WRDS connection is probably ok") 37 | 38 | 39 | # Filter S&P List: Ignore pre-1980 components 40 | df_sp500 = df_sp500[df_sp500.ending > '1979-12-31'] 41 | 42 | df_names = get_names(db) 43 | df_names.to_parquet(f_crsp_names) 44 | 45 | # Grab all possible CUSIPS by Permno 46 | df_names2 = pd.merge(df_sp500, df_names, on='permno') 47 | df_names2 = df_names2[~((df_names2['ending'] < df_names2['st_date']) | ( 48 | df_names2['start'] > df_names2['end_date']))] 49 | 50 | # Get unique list of CUSIPs and Permno's for SQL queries 51 | all_cusips = list(set(df_names2.cusip).union(df_names2.ncusip)) 52 | all_permnos = list(df_names2.permno.unique().astype(int)) 53 | 54 | crosswalk = get_crosswalk(db, all_permnos) 55 | 56 | # Pull the CRSP and Compustat Data Files (< 1m) 57 | # Pull the Compustat Short Interest File 58 | # - Add permno's to short interest table 59 | # - Convert Short interest table to quarterly observations 60 | # - Take last observations within each Permno, Quarter 61 | # 62 | # Pull the Compustat Fundamentals Data 63 | # - Add permnos and CUSIPS to the Fundamentals data 64 | # 65 | # Pull the Compustat Business Segments Data 66 | # - Just count the number of segments 67 | # - Add permnos to number of segments 68 | # 69 | # Pull the CRSP Price and Shares Oustanding MSF Data 70 | # - Save to parquet (around 2MB compressed) 71 | # - Use this to get a single price, shares_oustanding for each security quarter 72 | 73 | df_fund = get_fundamentals(db, crosswalk) 74 | df_fund.to_parquet(f_fundamentals) 75 | 76 | df_short = get_short_interest(db, crosswalk) 77 | df_short.to_parquet(f_short, compression='brotli') 78 | 79 | df_fund = get_fundamentals(db, crosswalk) 80 | df_fund.to_parquet(f_fundamentals) 81 | 82 | df_seg = get_segments(db, crosswalk) 83 | df_seg.to_parquet(f_segments, compression='brotli') 84 | 85 | df_msf2 = get_msf(db, all_permnos, False) 86 | df_msf2.to_parquet(f_msf_data, compression='brotli') 87 | 88 | 89 | # Get Managers and stock names 90 | df_m = db.get_table('tfn', 's34type1') 91 | df_m.to_parquet(f_managers_all, compression='brotli') 92 | 93 | names = db.get_table('crsp', 'stocknames') 94 | names.to_parquet(f_names) 95 | 96 | # #### Pull the S-34 Data -- This is SLOW don't re-run ~15m 97 | # - Only get for 8-digit CUSIPs in our S&P dataset 98 | # - This is VERY slow and around 5.5 GB (320MB on disk) 99 | # - Use this to get holdings for each 13-F investor (Don't trust self reported prices or shares outstanding) 100 | 101 | print("Starting s34 Download...") 102 | s34_data = get_s34(db, all_cusips) 103 | s34_data.to_parquet(f_raw_s34, compression='brotli') 104 | print("S34 Complete!") 105 | 106 | 107 | # unique list of manager names 108 | mgr_list = s34_data.groupby( 109 | ['mgrno'])['mgrname'].agg( 110 | pd.Series.mode).reset_index() 111 | mgr_list['mgrname'] = mgr_list['mgrname'].astype(str) 112 | mgr_list.to_parquet(f_managers, compression='brotli') 113 | -------------------------------------------------------------------------------- /code/2_Process_WRDS_Data.py: -------------------------------------------------------------------------------- 1 | from our_plot_config import raw_dir, wrds_dir, derived_dir, checks_dir 2 | import pandas as pd 3 | 4 | from wrds_cleaning import expand_names, make_cusip_list, construct_fundamentals, get_sp_quarters, read_s34 5 | from wrds_cleaning import construct_bus_segments, consolidate_mgrs, filter_s34 6 | from wrds_cleaning import compute_betas, add_drops, process_scraped, blackrock_fix 7 | from wrds_cleaning import add_stock_splits, dedup_s34, combine_betas 8 | 9 | from wrds_checks import check_bigbeta, check_s34, check_names, check_blackrock 10 | from wrds_checks import check_s34_coverage, check_multiple_cusip, check_fundamental_coverage 11 | 12 | 13 | # Public (hand) inputs 14 | f_scrape = raw_dir / 'out_scrape.parquet' # CRM renamed 15 | f_big4 = raw_dir / 'big4.csv' 16 | 17 | # raw data pulls 18 | # CRM update: calling these WRDS 19 | f_raw_s34 = wrds_dir / 'raw_s34.parquet' 20 | f_splist = wrds_dir / 'sp500_list.parquet' 21 | f_crsp_names = wrds_dir / 'crsp_names.parquet' 22 | f_msf_data = wrds_dir / 'crsp_msf.parquet' 23 | f_short = wrds_dir / 'short_interest.parquet' 24 | f_fundamentals = wrds_dir / 'fundamentals_data.parquet' 25 | f_segments = wrds_dir / 'wrds_segments.parquet' 26 | 27 | # drops and consolidations 28 | f_permno_drops = raw_dir / 'permno_drops.csv' 29 | f_mgr_consolidations = raw_dir / 'manager_consolidations.csv' 30 | 31 | # Outputs 32 | # other info 33 | f_comp_info = derived_dir / 'compustat_info.parquet' 34 | f_names_expanded = derived_dir / 'expanded_names.parquet' 35 | 36 | # Betas 37 | f_betas_unfiltered = derived_dir / '13f_sp500_unfiltered.parquet' 38 | f_betas_scraped = derived_dir / '13f_scraped.parquet' 39 | f_frankenbetas = derived_dir / '13f_sp500_frankenbeta.parquet' 40 | 41 | # Read in the raw parquet files from SQL queries 42 | df_sp500 = pd.read_parquet(f_splist) 43 | df_names = pd.read_parquet(f_crsp_names) 44 | df_msf2 = pd.read_parquet(f_msf_data) 45 | df_short = pd.read_parquet(f_short) 46 | 47 | # Match the names file against the S&P list and expand to quarters 48 | df_names2 = expand_names(df_names, df_sp500) 49 | df_names2.to_parquet(f_names_expanded, compression='brotli') 50 | 51 | # Do Compustat (Fundamentals, Bus Segments, etc.) 52 | # make sure that fundamentals data is unique permno-quarter 53 | cusip_list = make_cusip_list(df_names) 54 | df_fund = construct_fundamentals(pd.read_parquet(f_fundamentals), df_names2) 55 | df_bus = construct_bus_segments(pd.read_parquet(f_segments), df_sp500) 56 | df_fund2 = pd.merge(df_fund, df_bus, on=['permno', 'quarter'], how='outer') 57 | df_fund2.to_parquet(f_comp_info, compression='brotli') 58 | 59 | # List of S&P permo,cusip,quarters 60 | sp_df = get_sp_quarters(df_sp500, cusip_list) 61 | 62 | 63 | # ### Merge and Drops ~ 5m 64 | # - Merge: Permno information from CRSP names file to 13-F filings 65 | # - Drop: Non S&P 500 component filings from 13-f's 66 | # - Fix: Adjust Blackrock dates because of known reporting issue (see https://wrds-www.wharton.upenn.edu/pages/support/research-wrds/research-guides/research-note-regarding-thomson-reuters-ownership-data-issues/) 67 | # - Merge: stock split information from MSF file (cfacshr) (https://wrds-support.wharton.upenn.edu/hc/en-us/articles/115003101112-Adjusting-Splits-Using-CRSP-Data) 68 | # - Fix: Select a single Filing Date (Fdate) for each Rdate. 69 | # - 24,432,318 Obs have single observation 70 | # - 2,608,149 Obs have multiple filings with same shares (different prices) 71 | # - 84,159 Obs have a known share split: take the first filing (before share split) 72 | # - 44,874 Obs have no known share split: take the last filing (assume these are corrections) 73 | # - Merge and Consolidate: Managers using consolidation file (Blackrock, Inc --> Blackrock, etc.) 74 | # - Calculate $\beta_{fs}$ for each quarter in LONG format. 75 | # - Add possible drops: by permno (dual class shares, ADR's,etc.), share class (ADR's, REITs,etc.) 76 | 77 | # Process Thomson-Reuters $\beta$ 78 | # this needs about 20 GB of RAM 79 | # 1. Apply fixes and merges described above 80 | 81 | s34_data = filter_s34(read_s34(f_raw_s34), sp_df) 82 | main_df = consolidate_mgrs( 83 | dedup_s34( 84 | add_stock_splits( 85 | s34_data, 86 | df_msf2)), 87 | f_mgr_consolidations) 88 | df1 = compute_betas(main_df, df_msf2) 89 | df1 = add_drops(df1, f_permno_drops, df_names2) 90 | df1.to_parquet(f_betas_unfiltered, compression='brotli') 91 | 92 | # Process Scraped 13F's ~3min 93 | # 1. Append it to the existing dataset 94 | # 2. Add the drops 95 | 96 | dfs = process_scraped(f_scrape, f_big4) 97 | dfs = add_drops(dfs, f_permno_drops, df_names2) 98 | dfs.to_parquet(f_betas_scraped, compression='brotli') 99 | 100 | # Combine Both Sets of $\beta$s 101 | # - Use TR data before 2001 102 | # - Use scraped data after 2001 103 | # - Save the combined FrankenBeta file 104 | 105 | # use TR before cut-date and scraped data after 106 | df = combine_betas(df1, dfs, cut_date='2000-01-01') 107 | df.to_parquet(f_frankenbetas, compression='brotli') 108 | 109 | # Checks 110 | # 1. Tabulate: Missing Shares Outstanding (TR), Missing Price Info (TR), Duplicate Observations within an Fdate/Rdate and Permno, Manager 111 | # 2. Tabulate: 18 cases where firm exist in S&P500 but not in names file (yet). 112 | # 3. 1057 Observations (Firm-Quarter) in S&P500 but not in S34 Data (959 after 2010). 113 | # 4. 924 Observations with multiple CUSIPS in same period for same firm 114 | # (these are filings with typos, weird share classes, etc.) 115 | 116 | print(checks_dir) 117 | # Define the Checks 118 | f_notin_crsp = checks_dir / 'compustat-notin-crsp.xlsx' 119 | f_shares_out = checks_dir / 's34-no-shares.xlsx' 120 | f_prc_zero = checks_dir / 's34-zero-price.xlsx' 121 | f_duplicates = checks_dir / 's34_duplicate_permno.xlsx' 122 | f_names_missing = checks_dir / 'unmatched-names-splist.xlsx' 123 | f_s34_coverage = checks_dir / 'coverage_s34.xlsx' 124 | f_multiple_cusips = checks_dir / 'multiple_cusips.xlsx' 125 | f_multiple_cusips_summary = checks_dir / 'multiple_cusips_summary.xlsx' 126 | f_missing_betas = checks_dir / 'missing_betas.xlsx' 127 | f_missing_atq = checks_dir / 'missing_atq.xlsx' 128 | f_missing_segments = checks_dir / 'missing_segments.xlsx' 129 | f_bigbeta_1 = checks_dir / 'big_betas_tr.xlsx' 130 | f_bigbeta_2 = checks_dir / 'big_betas_scrape.xlsx' 131 | 132 | 133 | # Run the Checks 134 | check_s34(s34_data, f_shares_out, f_prc_zero, f_duplicates) 135 | check_names(df_sp500, df_names, f_names_missing) 136 | check_s34_coverage(df1, df_sp500, df_names, f_s34_coverage) 137 | check_multiple_cusip(s34_data, f_multiple_cusips, f_multiple_cusips_summary) 138 | check_bigbeta(df1, f_bigbeta_1) 139 | check_bigbeta(dfs, f_bigbeta_2) 140 | check_fundamental_coverage( 141 | df, 142 | df_fund2, 143 | df_names2, 144 | f_missing_betas, 145 | f_missing_atq, 146 | f_missing_segments) 147 | -------------------------------------------------------------------------------- /code/3_Calculate_Kappas.py: -------------------------------------------------------------------------------- 1 | from our_plot_config import derived_dir, raw_dir 2 | import pandas as pd 3 | 4 | from kappas import process_beta, beta_to_kappa, calc_chhis, fix_scrape_cols 5 | from investors import compute_investor_info, calc_big4, do_one_firm_similarity 6 | from firminfo import regression_merge, firm_info_merge, kappa_in_out 7 | 8 | from utilities.quantiles import weighted_quantile 9 | 10 | # Inputs 11 | # Betas 12 | f_betas = derived_dir / '13f_sp500_frankenbeta.parquet' 13 | f_betas_tr = derived_dir / '13f_sp500_unfiltered.parquet' 14 | f_betas_sc = derived_dir / '13f_scraped.parquet' 15 | 16 | # Other inputs 17 | f_names_expanded = derived_dir / 'expanded_names.parquet' 18 | f_comp_info = derived_dir / 'compustat_info.parquet' 19 | f_big4 = raw_dir / 'big4.csv' 20 | 21 | # Outputs 22 | # main outputs (kappas) 23 | f_kappas = derived_dir / 'official-kappas.parquet' 24 | f_kappas_tr = derived_dir / 'appendix_kappa_tr.parquet' 25 | f_kappas_scrape = derived_dir / 'appendix_kappa_scrape.parquet' 26 | f_kappas_combined = derived_dir / 'appendix_kappa_combined.parquet' 27 | 28 | # Firm and Investor Output 29 | f_investor_info = derived_dir / 'investor-info.parquet' 30 | f_firm_info = derived_dir / 'firm-info.parquet' 31 | f_regression = derived_dir / 'regression_data.parquet' 32 | 33 | # Calculate $\kappa$ for combined $\beta$ (Frankenstein version) 34 | # - Apply the $\kappa$ calculations period by period 35 | # - This includes (L2, L1, Sole/Shared, and various options for gamma) 36 | # - Save the output to a new parquet file 37 | df = process_beta(f_betas) 38 | df_kappa = beta_to_kappa(df) 39 | df_kappa.to_parquet(f_kappas, compression='brotli') 40 | 41 | # Calculate alternate Kappas (these are for Appendix) 42 | # - Apply $\kappa$ calculations period by period 43 | # - Do this for the pure TR data and pure scrape data 44 | total_dft = beta_to_kappa(process_beta(f_betas_tr)) 45 | total_dfs = beta_to_kappa(process_beta(f_betas_sc)) 46 | final_df = pd.merge(total_dft, fix_scrape_cols(total_dfs), 47 | on=['from', 'to', 'quarter'], how='outer') 48 | 49 | total_dft.to_parquet(f_kappas_tr, compression='brotli') 50 | total_dfs.to_parquet(f_kappas_scrape, compression='brotli') 51 | final_df.to_parquet(f_kappas_combined, compression='brotli') 52 | 53 | # save some memory 54 | del total_dft, total_dfs, final_df 55 | 56 | # Investor Info: How indexed is each manager? (including big4 information) 57 | df_investor = compute_investor_info(df, f_big4) 58 | df_investor.to_parquet(f_investor_info, compression='brotli') 59 | 60 | # Do the Firm-Level Descriptives 61 | # - Build the fundamentals, names, and business segments for all S&P entries 62 | # - Compute the firm level similarity measure 63 | # - Compute CHHI, IHHI from Betas 64 | # - Combine everything in the firm (permno-quarter) info file 65 | # - Write the file for regressions (merged firm info and kappa) 66 | 67 | df_fund2 = pd.read_parquet(f_comp_info) 68 | df_names2 = pd.read_parquet(f_names_expanded) 69 | firm_similarity = df.groupby(['quarter']).apply( 70 | do_one_firm_similarity).reset_index(drop=True) 71 | big4 = calc_big4(df, pd.read_csv(f_big4)) 72 | chhi = calc_chhis(df) 73 | 74 | df_firm2 = firm_info_merge(df_names2, df_fund2, firm_similarity, big4, chhi) 75 | df_firm2.to_parquet(f_firm_info, compression='brotli') 76 | 77 | df_reg = regression_merge(df_kappa, df_firm2) 78 | df_reg.to_parquet(f_regression, compression='brotli') 79 | 80 | # add in-bound and outbound kappa --this isn't in final draft of paper 81 | # df_firm3=kappa_in_out(df_kappa,df_firm2) 82 | -------------------------------------------------------------------------------- /code/firminfo.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | def regression_merge(df_kappas, df_firm): 5 | firm_cols = ['permno', 'quarter', 'saleq', 'cogsq', 'normalized_l2', 6 | 'retail_share', 'market_cap', 'beta_BlackRock', 'beta_Vanguard', 'beta_StateStreet'] 7 | keep_cols = ['from', 'to', 'quarter', 'kappa', 'cosine', 'retail_share', 'market_cap', 'marginsq', 'saleq', 'cogsq', 'normalized_l2', 8 | 'big3', 'beta_BlackRock', 'beta_Vanguard', 'beta_StateStreet'] 9 | 10 | # Read things in and Merge 11 | df = pd.merge( 12 | df_kappas.loc[(df_kappas['from'] != df_kappas['to']) & ( 13 | df_kappas['quarter'] <= '2017-10-01'), ['from', 'to', 'kappa', 'quarter', 'cosine']], 14 | df_firm[firm_cols], left_on=['from', 'quarter'], right_on=['permno', 'quarter'], how='left' 15 | ).reset_index(drop=True) 16 | 17 | # Calculate derived columns 18 | df['big3'] = df['beta_BlackRock'] + \ 19 | df['beta_Vanguard'] + df['beta_StateStreet'] 20 | df['marginsq'] = (df['saleq'] - df['cogsq']) / df['saleq'] 21 | return df[keep_cols] 22 | 23 | # merge it all together 24 | 25 | 26 | def firm_info_merge(df_names2, df_fund2, firm_similarity, big4, chhi): 27 | df_firm2 = pd.merge(pd.merge(pd.merge(pd.merge( 28 | df_names2, df_fund2, on=['permno', 'quarter'], how='inner'), 29 | firm_similarity, on=['permno', 'quarter'], how='left'), 30 | big4, on=['permno', 'quarter'], how='left'), 31 | chhi, on=['permno', 'quarter'], how='left' 32 | ) 33 | df_firm2['market_cap'] = df_firm2['shares_outstanding'] * df_firm2['price'] 34 | df_firm2[['beta_BlackRock', 35 | 'beta_Vanguard', 36 | 'beta_StateStreet', 37 | 'beta_Fidelity']] = df_firm2[['beta_BlackRock', 38 | 'beta_Vanguard', 39 | 'beta_StateStreet', 40 | 'beta_Fidelity']].fillna(0) 41 | return df_firm2[(df_firm2.quarter >= '1980-01-01') & 42 | (df_firm2.quarter <= '2017-10-01')].drop_duplicates() 43 | 44 | 45 | # This block is for incoming and outgoing kappa 46 | # note: not sure this made it into the paper (keep the code anyway) 47 | def weighted_from(df): 48 | a1 = np.ma.average(df['kappa'].values, weights=df['saleq_x'].values) 49 | return pd.Series({'kappa_in': a1}) 50 | 51 | 52 | def weighted_to(df): 53 | a1 = np.ma.average(df['kappa'].values, weights=df['saleq_y'].values) 54 | return pd.Series({'kappa_out': a1}) 55 | 56 | 57 | def kappa_in_out(df, df_firm): 58 | dfk = df.loc[df['from'] != df['to'], ['from', 'to', 'quarter', 'kappa']] 59 | tmp = pd.merge(pd.merge(dfk, 60 | df_firm[['permno', 'quarter', 'saleq']], left_on=['from', 'quarter'], right_on=['permno', 'quarter']), 61 | df_firm[['permno', 'quarter', 'saleq']], left_on=['to', 'quarter'], right_on=['permno', 'quarter'] 62 | ).fillna(0) 63 | 64 | g1 = tmp.groupby(['quarter', 'to']).apply(weighted_from) 65 | g2 = tmp.groupby(['quarter', 'from']).apply(weighted_to) 66 | 67 | return pd.merge(pd.merge(df_firm, 68 | g1, left_on=['quarter', 'permno'], right_on=['quarter', 'to'], how='left'), 69 | g2, left_on=['quarter', 'permno'], right_on=['quarter', 'from'], how='left') 70 | -------------------------------------------------------------------------------- /code/investors.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from utilities.matlab_util import matlab_sparse 4 | from sklearn.metrics.pairwise import cosine_similarity, manhattan_distances 5 | 6 | 7 | def compute_investor_info(df, f_big4): 8 | tmp = df.groupby(['quarter']).apply( 9 | do_one_investor_similarity).reset_index(drop=True) 10 | return pd.merge(tmp, pd.read_csv(f_big4), how='left', on=['mgrno']) 11 | 12 | 13 | def calc_big4(df, big4): 14 | df2 = pd.merge(df, big4, on=['mgrno'], how='inner').groupby( 15 | ['quarter', 'permno', 'InvestorName'])['beta'].sum().unstack() 16 | df2.columns = [ 17 | 'beta_BlackRock', 18 | 'beta_Fidelity', 19 | 'beta_StateStreet', 20 | 'beta_Vanguard'] 21 | return df2[['beta_BlackRock', 'beta_Vanguard', 22 | 'beta_StateStreet', 'beta_Fidelity']].fillna(0) 23 | 24 | 25 | def investor_helper(betas): 26 | # weights for market porfolio 27 | mkt = betas.sum(axis=0) / betas.sum() 28 | # "AUM" weights to aggregate market portfolio 29 | x = betas.sum(axis=1) 30 | aum = x / x.sum() 31 | nbetas = betas / x[:, None] 32 | 33 | # distance to AUM weighted market portfolio 34 | l2 = cosine_similarity(X=betas, Y=np.expand_dims(mkt, axis=0)).flatten() 35 | l1 = 1 - manhattan_distances(X=nbetas, 36 | Y=np.expand_dims(mkt, 37 | axis=0), 38 | sum_over_features=True).flatten() / 2 39 | return(aum, l2, l1) 40 | 41 | 42 | def do_one_investor_similarity(df): 43 | [betas, mgr_keys, permno_keys] = matlab_sparse( 44 | df.mgrno, df.permno, df.beta) 45 | # Market portfolio weights 46 | (aum, l2, l1) = investor_helper(betas) 47 | out_df = pd.DataFrame({'mgrno': mgr_keys.astype(int), 48 | 'aum_weight': aum, 49 | 'l2_similarity': l2, 50 | 'l1_similarity': l1, 51 | 'cov_aum_l1': np.cov(l1, 52 | aum)[1][0]}) 53 | out_df['quarter'] = df.quarter.iloc[0] 54 | return out_df 55 | 56 | 57 | def do_one_firm_similarity(df): 58 | [betas, mgr_keys, permno_keys] = matlab_sparse( 59 | df.mgrno, df.permno, df.beta) 60 | (aum, l2, l1) = investor_helper(betas) 61 | 62 | norm_l2 = y = (l2 @ (betas / betas.sum(0))) 63 | norm_l1 = y = (l1 @ (betas / betas.sum(0))) 64 | nonnorm_l2 = y = (l2 @ betas) 65 | nonnorm_l1 = y = (l1 @ betas) 66 | 67 | out_df = pd.DataFrame({'permno': permno_keys.astype(int), 68 | 'normalized_l1': norm_l1, 69 | 'nonnormalized_l1': nonnorm_l1, 70 | 'normalized_l2': norm_l2, 71 | 'nonnormalized_l2': nonnorm_l2}) 72 | out_df['quarter'] = df.quarter.iloc[0] 73 | return out_df 74 | -------------------------------------------------------------------------------- /code/kappas.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from utilities.matlab_util import matlab_sparse 4 | from sklearn.metrics.pairwise import cosine_similarity, manhattan_distances 5 | 6 | 7 | def fix_scrape_cols(df): 8 | # fix the names in the scraped data 9 | df = df.set_index(['from', 'to', 'quarter']) 10 | df.columns = ['s' + x for x in df.columns] 11 | return df.reset_index() 12 | 13 | 14 | def process_beta(fn): 15 | df = pd.read_parquet(fn) 16 | df['mgrno'] = df['mgrno'].astype(int) 17 | return df[(df.permno_drop == False) & ( 18 | df.sharecode_drop == False) & (df.beta < 0.5)] 19 | 20 | # This is the main function 21 | def beta_to_kappa(df): 22 | df = df[(df.quarter >= '1980-01-01')] 23 | 24 | df.loc[df.price < 0, 'price'] = 0 25 | df['mkt_cap'] = df['shares_outstanding'] * df['price'] 26 | df_m = df.groupby(['permno', 'quarter'])['mkt_cap'].median() 27 | 28 | total_df = df.groupby(['quarter']).apply(do_one_period) 29 | total_df3 = df[(df.quarter >= '1999-01-01')].groupby(['quarter']).apply(do_one_robustness) 30 | 31 | # merge and clean up missings 32 | total_df = pd.merge( 33 | total_df, total_df3, on=[ 34 | 'quarter', 'from', 'to'], how='left') 35 | total_df[['kappa', 36 | 'kappa_CLWY', 37 | 'kappa_pow2', 38 | 'kappa_pow3', 39 | 'kappa_sqrt', 40 | 'cosine', 41 | 'kappa_sole', 42 | 'kappa_soleshared']] = total_df[['kappa', 43 | 'kappa_CLWY', 44 | 'kappa_pow2', 45 | 'kappa_pow3', 46 | 'kappa_sqrt', 47 | 'cosine', 48 | 'kappa_sole', 49 | 'kappa_soleshared']].fillna(0) 50 | 51 | # Add the market cap 52 | total_df = pd.merge(pd.merge(total_df, 53 | df_m, left_on=['from', 'quarter'], right_on=['permno', 'quarter']), 54 | df_m, left_on=['to', 'quarter'], right_on=['permno', 'quarter'] 55 | ).rename(columns={'mkt_cap_x': 'mkt_cap_from', 'mkt_cap_y': 'mkt_cap_to'}).reset_index() 56 | 57 | return total_df 58 | 59 | 60 | def do_one_robustness(df): 61 | [betas_soleshared, mgr_keys, permno_keys] = matlab_sparse( 62 | df.mgrno, df.permno, df.beta_soleshared, compress=False) 63 | [betas_sole, mgr_keys, permno_keys] = matlab_sparse( 64 | df.mgrno, df.permno, df.beta_sole, compress=False) 65 | 66 | [betas, mgr_keys, permno_keys] = matlab_sparse( 67 | df.mgrno, df.permno, df.beta, compress=False) 68 | 69 | kappa_sole = raw_kappa(betas, betas_sole) 70 | kappa_soleshared = raw_kappa(betas, betas_soleshared) 71 | kappa_all = raw_kappa(betas, betas) 72 | # kappa_drop=raw_kappa(betas_drop,betas_drop) 73 | 74 | idx = kappa_all.nonzero() 75 | return pd.DataFrame({'from': permno_keys[idx[0]], 'to': permno_keys[idx[1]], 'kappa_all': kappa_all[idx].flatten(), 76 | 'kappa_sole': kappa_sole[idx].flatten(), 'kappa_soleshared': kappa_soleshared[idx].flatten()}) 77 | 78 | 79 | def beta_to_kappa_merger_breakup(df): 80 | return df.groupby(['quarter']).apply(do_one_merger_breakup).reset_index(drop=True) 81 | 82 | 83 | def do_one_merger_breakup(df2): 84 | # breakup in three blocks 85 | blockA = df2.loc[~df2['InvestorName'].isnull(), [ 86 | 'mgrno', 'permno', 'beta']] 87 | blockB = df2.loc[df2['InvestorName'].isnull(), ['mgrno', 'permno', 'beta']] 88 | blockA.beta = 0.5 * blockA.beta 89 | blockC = blockA.copy() 90 | blockC.mgrno = -blockC.mgrno 91 | df3 = pd.concat([blockA, blockB, blockC], axis=0, ignore_index=True) 92 | 93 | # first do the regular case 94 | [betas, mgr_keys, permno_keys] = matlab_sparse( 95 | df2.mgrno, df2.permno, df2.beta) 96 | k1 = calc_kappa(betas) 97 | 98 | # now do the breakup case using the augmented data 99 | [betas_b, mgr_keys_b, permno_keys_b] = matlab_sparse( 100 | df3.mgrno, df3.permno, df3.beta) 101 | k2 = calc_kappa(betas_b) 102 | 103 | df4 = df2.groupby(['mgrno_merger', 'permno']).sum().reset_index() 104 | # finally do the merger using the merger mgrno's instead of the real ones 105 | [betas_m, mgr_keys_m, permno_keys_m] = matlab_sparse( 106 | df4.mgrno_merger, df4.permno, df4.beta) 107 | k3 = calc_kappa(betas_m) 108 | 109 | # Ignore BlackRock+Vanguard 110 | df4 = df2[~(df2['InvestorName'].isin(['BlackRock', 'Vanguard']))] 111 | [betas_drop, mgr_keys_drop, permno_keys_drop] = matlab_sparse( 112 | df4.mgrno, df4.permno, df4.beta, compress=False) 113 | k4 = calc_kappa(betas_drop) 114 | 115 | # put it all together and return 116 | idx = k1.nonzero() 117 | out_df = pd.DataFrame({'from': permno_keys[idx[0]], 'to': permno_keys[idx[1]], 'kappa': k1[idx].flatten(), 118 | 'kappa_breakup': k2[idx].flatten(), 'kappa_merger': k3[idx].flatten(), 'kappa_drop': k4[idx].flatten()}) 119 | out_df['quarter'] = df2.quarter.iloc[0] 120 | return out_df 121 | 122 | # handler for L2 Measures (Rotemberg Weights, CLWY Weights, etc.) 123 | # input: long dataframe of Manager, Firm, Beta_fs 124 | # Output: long dataframe of Quarter, Firm_from, Firm_to, kappa_fg, ihhi_f, 125 | # ihhi_g, cosine_fg 126 | 127 | 128 | def do_one_period(df): 129 | [betas, mgr_keys, permno_keys] = matlab_sparse( 130 | df.mgrno, df.permno, df.beta) 131 | kappa = calc_kappa(betas) 132 | kappa2 = calc_kappa(betas, 2) 133 | kappa3 = calc_kappa(betas, 3) 134 | kappa4 = calc_kappa(betas, 0.5) 135 | kappa5 = calc_kappa(betas, 'CLWY') 136 | cosine = cosine_similarity(betas.transpose()) 137 | # this is a bit slow 138 | l1_measure = calc_l1_measure(betas) 139 | 140 | idx = kappa.nonzero() 141 | return pd.DataFrame({'from': permno_keys[idx[0]], 'to': permno_keys[idx[1]], 'kappa': kappa[idx].flatten(), 142 | 'kappa_pow2': kappa2[idx].flatten(), 'kappa_pow3': kappa3[idx].flatten(), 'kappa_sqrt': kappa4[idx].flatten(), 143 | 'kappa_CLWY': kappa5[idx].flatten(), 'cosine': cosine[idx].flatten(), 'l1_measure': l1_measure[idx].flatten()}) 144 | 145 | # This does the work for L1 measure 146 | # Input beta: S x F matrix 147 | # Output L1: F x F matrix 148 | # Subtract beta_f from each column of beta and sum of absolute deviations, 149 | # stack for L1. 150 | def calc_l1_measure(betas): 151 | y = manhattan_distances(betas.transpose()) 152 | tot = betas.sum(axis=0) 153 | return (-y + tot[np.newaxis, :] + tot[:, np.newaxis]) / 2 154 | 155 | # Calculate Summary Stats of Control Weights 156 | # Compute Convex Power Gamma: 157 | # CHHI: Control HHI 158 | # IHHI: Investor HHI 159 | # Retail Share 160 | # 161 | # This is the main function that takes a DF of betas and calculates all of 162 | # the CHHI measures 163 | 164 | def calc_chhis(df): 165 | # apply to multiple groups here 166 | df['inv_total'] = df.groupby(['mgrno', 'quarter'])['beta'].transform(sum) 167 | y = df[['permno', 'quarter', 'beta', 'inv_total']].groupby( 168 | ['permno', 'quarter']).apply(agg_chhi) 169 | x = df.groupby(['permno', 'quarter']).agg( 170 | {'shares_outstanding': np.max, 'price': np.median}) 171 | return pd.merge(x, y, left_index=True, right_index=True, how='outer') 172 | 173 | # this is unitary function that takes in a vector Beta_f that is S x 1 174 | def chhi(beta, power): 175 | gamma = (beta**power) 176 | # scalar adjustment factor 177 | adj = 10000 * ((beta.sum() / gamma.sum())**2) 178 | return (gamma**2).sum() * adj 179 | 180 | # This calculates all of the CHHI measures and returns a (horizontal) series 181 | def agg_chhi(x): 182 | out = [chhi(x['beta'], a) for a in [0.5, 1, 2, 3, 4]] 183 | tmp = x['beta'] / x['inv_total'] 184 | clwy = chhi(tmp, 1) 185 | clwy_alt = 10000 * (tmp**2).sum() 186 | 187 | names = { 188 | 'retail_share': 1 - x['beta'].sum(), 189 | 'chhi_05': out[0], 190 | 'ihhi': out[1], 191 | 'chhi_2': out[2], 192 | 'chhi_3': out[3], 193 | 'chhi_4': out[4], 194 | 'chhi_clwy': clwy, 195 | 'chhi_clwy2': clwy_alt 196 | } 197 | return pd.Series(names, index=['retail_share', 'ihhi', 'chhi_05', 198 | 'chhi_2', 'chhi_3', 'chhi_4', 'chhi_clwy', 'chhi_clwy2']) 199 | 200 | 201 | # This calculates profit weights 202 | # 203 | # Input beta: S x F matrix 204 | # Output kappa: F x F matrix 205 | # Options: Gamma 'CLWY', 'default' (Rotemberg), numeric: convexity 206 | # parameter "a" for gamma=beta^a 207 | def calc_kappa(betas, gamma_type='default'): 208 | # CLWY normalize the gammas 209 | if gamma_type == 'CLWY': 210 | gamma = betas / np.maximum(betas.sum(axis=1), 1e-10)[:, None] 211 | elif isinstance(gamma_type, (int, float)): 212 | if gamma_type > 0: 213 | tmp = betas**(gamma_type) 214 | gamma = tmp # *(betas.sum(axis=0)/tmp.sum(axis=0)) 215 | else: 216 | print("Must provide Positive Parameter") 217 | # proportional control: do we normalize to sum to one? 218 | else: 219 | gamma = betas # /betas.sum(axis=0) 220 | 221 | return raw_kappa(betas, gamma) 222 | 223 | 224 | # This is the ratio of inner products for kappas 225 | def raw_kappa(betas, gamma): 226 | # F x F matrix 227 | numer = gamma.T @ betas 228 | # F x 1 vector 229 | denom = np.diag(numer) 230 | # this is a F x F matirx 231 | return numer / denom[:, None] 232 | -------------------------------------------------------------------------------- /code/our_plot_config.py: -------------------------------------------------------------------------------- 1 | # For files and paths 2 | import pathlib 3 | import os 4 | 5 | 6 | # File Directories 7 | # cc modified to parent 8 | proj_dir = pathlib.Path.cwd().parent 9 | data_dir = proj_dir / 'data' 10 | raw_dir = data_dir / 'public' 11 | wrds_dir = data_dir / 'wrds' 12 | checks_dir = data_dir / 'checks' 13 | derived_dir = data_dir / 'derived' 14 | 15 | fig_dir = proj_dir / 'figures' 16 | tab_dir = proj_dir / 'tables' 17 | 18 | 19 | # For plotting 20 | #import matplotlib 21 | #import matplotlib.pyplot as plt 22 | #from cycler import cycler 23 | #import seaborn as sns 24 | 25 | # Plot Configuration 26 | def setplotstyle(): 27 | from cycler import cycler 28 | import seaborn as sns 29 | import matplotlib 30 | import matplotlib.pyplot as plt 31 | matplotlib.style.use('seaborn-whitegrid') 32 | 33 | matplotlib.rcParams.update({'font.size': 24}) 34 | plt.rc('font', size=24) # controls default text sizes 35 | plt.rc('axes', titlesize=24) # fontsize of the axes title 36 | plt.rc('axes', labelsize=24) # fontsize of the x and y labels 37 | plt.rc('xtick', labelsize=24) # fontsize of the tick labels 38 | plt.rc('ytick', labelsize=24) # fontsize of the tick labels 39 | plt.rc('legend', fontsize=24) # legend fontsize 40 | plt.rc('figure', titlesize=24) 41 | plt.rc( 42 | 'axes', 43 | prop_cycle=cycler( 44 | color=[ 45 | '#252525', 46 | '#636363', 47 | '#969696', 48 | '#bdbdbd']) * 49 | cycler( 50 | linestyle=[ 51 | '-', 52 | ':', 53 | '--', 54 | '-.'])) 55 | plt.rc('lines', linewidth=3) 56 | -------------------------------------------------------------------------------- /code/plots10_kappa_comparison_appendix.py: -------------------------------------------------------------------------------- 1 | # %% 2 | from utilities.matlab_util import coalesce 3 | import pandas as pd 4 | import numpy as np 5 | import pathlib 6 | 7 | import matplotlib 8 | import matplotlib.pyplot as plt 9 | from our_plot_config import derived_dir, fig_dir, setplotstyle 10 | 11 | setplotstyle() 12 | 13 | # %% 14 | 15 | 16 | # Input file 17 | f_kappas = derived_dir / 'appendix_kappa_combined.parquet' 18 | f_firms = derived_dir / 'firm-information.parquet' 19 | 20 | # Figures 21 | f_profitweights_comp1 = fig_dir / 'appfigure_a3.pdf' 22 | 23 | # %% 24 | # ### Read in the (Cleaned) Parquet File 25 | # - Apply the $\kappa$ calculations period by period 26 | # - Save the output to a new parquet file 27 | 28 | total_df = pd.read_parquet(f_kappas) 29 | total_df['tunnel'] = (total_df['skappa'].combine_first(total_df['kappa']) > 1) 30 | total_df = total_df[total_df['from'] != total_df['to']] 31 | qtr_mean = total_df.groupby(['quarter']).mean() 32 | 33 | qtr_mean = total_df.groupby(['quarter']).mean() 34 | 35 | qtr_mean = qtr_mean[qtr_mean.index < '2019-01-01'] 36 | 37 | # %% 38 | 39 | 40 | col_list = [ 41 | 'l1_measure', 42 | 'kappa', 43 | 'kappa_pow2', 44 | 'kappa_pow3', 45 | 'kappa_sqrt', 46 | 'kappa_CLWY'] 47 | qtr_mean = coalesce(qtr_mean, col_list, 's', method='left') 48 | 49 | # %% 50 | 51 | # ## Make the plots 52 | # ### Comparisons 53 | # - Compare TR Data (Solid) and Scraped 13-F Data (Dashed) 54 | 55 | 56 | qtr_mean[['kappa', 'skappa']].plot( 57 | figsize=(20, 10), style=['-', '--'], color=['navy', 'maroon']) 58 | plt.xlabel("") 59 | plt.ylabel(r"$\kappa$ weight") 60 | plt.legend([r'TR Data', 'Scraped Data', ]) 61 | plt.ylim(0, 1.2) 62 | plt.savefig(f_profitweights_comp1, bbox_inches='tight') 63 | -------------------------------------------------------------------------------- /code/plots11_profit_simulations.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import pandas as pd 3 | import numpy as np 4 | import pathlib 5 | import pyblp 6 | import matplotlib 7 | import matplotlib.pyplot as plt 8 | 9 | from our_plot_config import derived_dir, fig_dir, raw_dir, setplotstyle 10 | 11 | setplotstyle() 12 | 13 | 14 | pyblp.options.collinear_atol = pyblp.options.collinear_rtol = 0 15 | pyblp.options.verbose = False 16 | 17 | # jan and jan markups input 18 | f_jj_markups = raw_dir / 'DLE_markups_fig_v2.csv' 19 | 20 | # temp input 21 | f_quarter_mean = derived_dir / 'tmp-quarter-mean.pickle' 22 | f_markup_out = derived_dir / 'markup-simulations.csv' 23 | 24 | fig_markups = fig_dir / 'macro-simulated-markups.pdf' 25 | fig_markups_jj = fig_dir / 'figure10_markups.pdf' 26 | fig_profits = fig_dir / 'figure11_profits.pdf' 27 | 28 | 29 | def combine64(years, months=1, days=1, weeks=None, hours=None, minutes=None, 30 | seconds=None, milliseconds=None, microseconds=None, nanoseconds=None): 31 | years = np.asarray(years) - 1970 32 | months = np.asarray(months) - 1 33 | days = np.asarray(days) - 1 34 | types = ('= '1980-01-01'] 182 | df2 = pd.merge(df_out, df_jj, left_index=True, right_index=True, how='left') 183 | df2.to_csv(f_markup_out) 184 | 185 | 186 | # %% 187 | matplotlib.rc('xtick', labelsize=24) 188 | matplotlib.rc('ytick', labelsize=24) 189 | 190 | df2 = df2[~df2.jj_markup.isnull()].copy() 191 | 192 | 193 | df2[['prices', 'jj_markup', 'prices_mav']].plot( 194 | figsize=(20, 10), color=['black', 'navy', 'maroon']) 195 | plt.legend(['Common Ownership Markups', 196 | 'DeLoecker, Eeckhout, Unger (2020)', 197 | 'Common Ownership (w/ maverick)'], 198 | prop={'size': 24}, 199 | loc='upper left') 200 | plt.xlabel('') 201 | plt.ylabel('Markup over Cost', size=24) 202 | plt.ylim(1, 1.65) 203 | 204 | plt.savefig(fig_markups_jj, bbox_inches='tight') 205 | 206 | # %% 207 | rel_pi = df_out[['total_pi', 'total_pi_mav']] / pi_base 208 | 209 | matplotlib.rc('xtick', labelsize=24) 210 | matplotlib.rc('ytick', labelsize=24) 211 | 212 | rel_pi.plot(figsize=(20, 10), color=['navy', 'maroon']) 213 | #plt.legend(['Profits Relative to Symmetric Differentiated Bertrand (HHI=1250)','With Maverick'], prop={'size': 24}) 214 | plt.legend(['Without Maverick', 'With Maverick']) 215 | plt.xlabel('') 216 | plt.ylim(1, 3.5) 217 | 218 | plt.ylabel('Profits vs. Symmetric Bertrand HHI=1250', size=24) 219 | plt.savefig(fig_profits, bbox_inches='tight') 220 | -------------------------------------------------------------------------------- /code/plots1_basic_descriptives.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import pandas as pd 3 | import numpy as np 4 | import pathlib 5 | 6 | import matplotlib 7 | import matplotlib.pyplot as plt 8 | 9 | from our_plot_config import derived_dir, fig_dir, raw_dir, setplotstyle 10 | 11 | # Call function that sets the plot style 12 | setplotstyle() 13 | # %% 14 | # Input file 15 | f_betas = derived_dir / '13f_sp500_unfiltered.parquet' 16 | f_scraped = derived_dir / '13f_scraped.parquet' 17 | 18 | # Figures 19 | f_numowners = fig_dir / 'appfigure_a1.pdf' 20 | fig_mgrs = fig_dir / 'figure3_nmgrs.pdf' 21 | fig_nfirms = fig_dir / 'figure2_nfirms.pdf' 22 | fig_ownership = fig_dir / 'figure4_inst_share.pdf' 23 | 24 | 25 | # ### Read in the (Cleaned) Parquet Files 26 | # - One for TR $\beta$ 27 | # - One for scraped $\beta$ 28 | 29 | # %% 30 | df = pd.read_parquet(f_betas) 31 | dfs = pd.read_parquet(f_scraped) 32 | 33 | # %% 34 | # calculate number of managers overall 35 | mgrA = df.groupby(['quarter'])['mgrno'].nunique() 36 | mgrB = dfs.groupby(['quarter'])['mgrno'].nunique() 37 | mgrs = pd.concat([mgrA, mgrB], axis=1) 38 | mgrs.columns = ['TR Data', 'Scraped Data'] 39 | 40 | # calculate number of managers per firm 41 | mgr_tots = df.groupby(['quarter', 'permno'])['mgrno'].nunique( 42 | ).reset_index().groupby(['quarter'])['mgrno'].describe(percentiles=[.1, .5]) 43 | mgr_totsS = dfs.groupby(['quarter', 'permno'])['mgrno'].nunique( 44 | ).reset_index().groupby(['quarter'])['mgrno'].describe(percentiles=[.1, .5]) 45 | 46 | # %% 47 | # 2 minutes# calculate number of firms 48 | a = df[(df.sharecode_drop == False) & (df.permno_drop == False) 49 | ].groupby(['quarter'])['permno'].nunique() 50 | b = df.groupby(['quarter'])['permno'].nunique() 51 | c = dfs[(dfs.sharecode_drop == False) & (dfs.permno_drop == False) 52 | ].groupby(['quarter'])['permno'].nunique() 53 | d = dfs.groupby(['quarter'])['permno'].nunique() 54 | 55 | c = c[c.index > '2001-01-01'] 56 | d = d[d.index > '2001-01-01'] 57 | 58 | nfirms = pd.concat([a, b, c, d], axis=1) 59 | nfirms.columns = [ 60 | 'TR (Restricted)', 61 | 'TR (Unrestricted)', 62 | 'Scraped (Restricted)', 63 | 'Scraped (Unrestricted)'] 64 | 65 | # %% 66 | 67 | t1 = dfs[dfs.quarter == '2017-09-30'] 68 | t2 = df[df.quarter == '2017-09-30'] 69 | 70 | 71 | # %% 72 | # ### Plot Number of Firms 73 | # - total for entire dataset ( with and without drops) 74 | 75 | # For matching the figures: truncate at EOY 2018 76 | # comment this out if you want a full update! (not scraped) 77 | nfirms = nfirms[nfirms.index < '2019-01-01'] 78 | 79 | # Figure 2 80 | plt.clf() 81 | ax = nfirms[['TR (Restricted)', 'TR (Unrestricted)']].plot( 82 | figsize=(20, 10), color=['navy', 'maroon']) 83 | #plt.axhline(y=500,color='0.75',linestyle = '--') 84 | nfirms[['Scraped (Restricted)', 'Scraped (Unrestricted)'] 85 | ].plot(ax=ax, color=['navy', 'maroon']) 86 | plt.xlabel("") 87 | 88 | #plt.ylim(top=520, bottom=0) 89 | plt.ylabel("Number of Firms in Sample") 90 | plt.ylim(0, 510) 91 | 92 | plt.savefig(fig_nfirms, bbox_inches="tight") 93 | # ### Plot Number of Managers 94 | # - total for entire dataset 95 | # - per firm 96 | # - Figure 3 97 | 98 | # %% 99 | mgrs = mgrs[mgrs.index < '2019-01-01'] 100 | 101 | # Figure 3 102 | plt.clf() 103 | ax = mgrs['TR Data'].plot(figsize=(20, 10), color='navy', style='-') 104 | mgrs['Scraped Data'].plot(ax=ax, color='maroon', style='--') 105 | plt.xlabel("") 106 | plt.ylabel("Overall Number of 13f Managers") 107 | plt.legend(['Thomson Reuters Data', 'Scraped 13(f) Data']) 108 | plt.ylim(0, 4100) 109 | plt.savefig(fig_mgrs, bbox_inches="tight") 110 | 111 | # %% 112 | mgr_tots = mgr_tots[mgr_tots.index < '2019-01-01'] 113 | mgr_totsS = mgr_totsS[mgr_totsS.index < '2019-01-01'] 114 | 115 | # Appendix Figure A-1 116 | plt.clf() 117 | fig, ax = plt.subplots(figsize=(20, 10)) 118 | mgr_tots[['mean', '50%', '10%', 'min']].plot( 119 | figsize=(20, 10), ax=ax, style='-', color=['b', 'r', 'y', 'g']) 120 | mgr_totsS[['mean', '50%', '10%', 'min']].plot( 121 | figsize=(20, 10), ax=ax, style='--', color=['b', 'r', 'y', 'g']) 122 | plt.xlabel("") 123 | plt.ylabel("Number of Owners") 124 | plt.ylim(0, 900) 125 | plt.legend(['Mean (TR)', 126 | 'Median (TR)', 127 | '10th Percentile (TR)', 128 | 'Min (TR)', 129 | 'Mean (Scrape)', 130 | 'Median (Scrape)', 131 | '10th Percentile (Scrape)', 132 | 'Min (Scrape)'], 133 | ncol=2) 134 | plt.savefig(f_numowners, bbox_inches="tight") 135 | 136 | # ### Percentage Institutional Ownership 137 | 138 | # %% 139 | # Figure 4 140 | plt.clf() 141 | df = df[df.quarter < '2019-01-01'] 142 | dfs = dfs[dfs.quarter < '2019-01-01'] 143 | 144 | 145 | df = df[(df.permno_drop == False) & ( 146 | df.sharecode_drop == False) & (df.beta < 0.5)] 147 | dfs = dfs[(dfs.permno_drop == False) & ( 148 | dfs.sharecode_drop == False) & (dfs.beta < 0.5)] 149 | 150 | a = (100 * df.groupby(['permno', 'quarter']) 151 | ['beta'].sum()).groupby(level=1).mean() 152 | b = (100 * dfs.groupby(['permno', 'quarter']) 153 | ['beta'].sum()).groupby(level=1).mean() 154 | pd.concat([a, b], axis=1).plot(figsize=(20, 10), style=[ 155 | '-', '--', ], color=['navy', 'maroon']) 156 | plt.ylabel("Percent Owned by 13(f) Investors") 157 | plt.xlabel("") 158 | plt.ylim(0, 100) 159 | plt.legend(['Thomson Reuters Data', 'Scraped 13(f) Data']) 160 | plt.savefig(fig_ownership, bbox_inches="tight") 161 | -------------------------------------------------------------------------------- /code/plots2_kappa_official.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import pandas as pd 3 | import numpy as np 4 | import pathlib 5 | 6 | import matplotlib.pyplot as plt 7 | from scipy.stats.mstats import gmean 8 | 9 | from our_plot_config import derived_dir, setplotstyle, fig_dir 10 | setplotstyle() 11 | 12 | # %% 13 | 14 | 15 | # Input file 16 | f_kappas = derived_dir / 'official-kappas.parquet' 17 | f_firms = derived_dir / 'firm-info.parquet' 18 | 19 | # temp output - for macro simulations 20 | f_quarter_mean = derived_dir / 'tmp-quarter-mean.pickle' 21 | 22 | # Figures 23 | # Kappa 24 | f_profitweights = fig_dir / 'figure1_kappa.pdf' 25 | f_profitweights_all = fig_dir / 'figure13_kappa_control.pdf' 26 | f_within_between = fig_dir / 'figure15_within_between.pdf' 27 | f_kappa_quantile = fig_dir / 'appfigure_a6.pdf' 28 | 29 | # Concentration 30 | f_ihhi = fig_dir / 'figure6_ihhi.pdf' 31 | f_cosine = fig_dir / 'figure7_cosine.pdf' 32 | f_chhi = fig_dir / 'figure14_chhi1.pdf' 33 | f_chhi2 = fig_dir / 'figure14_chhi2.pdf' 34 | 35 | # Tunneling 36 | f_tunneling = fig_dir / 'figure9_tunneling.pdf' 37 | f_kap1 = fig_dir / 'appfigure_a7.pdf' 38 | 39 | # compute weighted average for kappa with different weighting schemes 40 | 41 | 42 | def weighted(x, cols): 43 | a1 = np.average(x[cols].values, weights=x['w_amean'].values, axis=0)[0] 44 | a2 = np.average(x[cols].values, weights=x['w_gmean'].values, axis=0)[0] 45 | a3 = np.average(x[cols].values, weights=x['mkt_cap_to'].values, axis=0)[0] 46 | a4 = np.average( 47 | x[cols].values, 48 | weights=x['mkt_cap_from'].values, 49 | axis=0)[0] 50 | a5 = np.average(x[cols].values, weights=x['saleq_x'].values, axis=0)[0] 51 | a6 = np.average(x[cols].values, weights=x['saleq_y'].values, axis=0)[0] 52 | a7 = np.average(x[cols].values, weights=x['w_s_gmean'].values, axis=0)[0] 53 | 54 | return pd.Series({'kappa_amean': a1, 'kappa_gmean': a2, 'kappa_from': a3, 'kappa_to': a4, 55 | 'kappa_sale_from': a4, 'kappa_sale_to': a4, 'kappa_sale_mean': a4}) 56 | 57 | 58 | # ### Read in the (Cleaned) Parquet File 59 | # - Apply the $\kappa$ calculations period by period 60 | # - Save the output to a new parquet file 61 | 62 | # %% 63 | 64 | df = pd.read_parquet(f_kappas) 65 | df_firm = pd.read_parquet(f_firms) 66 | ihhi = df_firm[['permno', 'quarter', 'ihhi', 'siccd', 'saleq']] 67 | 68 | # merge to get weights (sales and market cap, from/to) 69 | total_df = pd.merge( 70 | pd.merge( 71 | df, ihhi, left_on=[ 72 | 'from', 'quarter'], right_on=[ 73 | 'permno', 'quarter'], how='left'), 74 | ihhi, left_on=['to', 'quarter'], right_on=['permno', 'quarter'], how='left') 75 | total_df['same_sic'] = (total_df['siccd_x'] == total_df['siccd_y']) 76 | total_df[total_df['from'] != total_df['to']] 77 | 78 | # Average of weights 79 | total_df['w_amean'] = (total_df['mkt_cap_from'] + total_df['mkt_cap_to']) / 2.0 80 | total_df['w_gmean'] = gmean( 81 | [total_df['mkt_cap_from'], total_df['mkt_cap_to']], axis=0) 82 | total_df['w_s_gmean'] = gmean( 83 | [total_df['saleq_x'], total_df['saleq_y']], axis=0) 84 | 85 | # Apply the weighted averages 86 | y = total_df.groupby(['quarter']).apply(weighted, ["kappa"]) 87 | 88 | 89 | qtr_mean = pd.concat([total_df.groupby(['quarter']).mean(), y], axis=1) 90 | 91 | 92 | df_cosine = total_df.groupby( 93 | ['quarter'])['cosine'].describe( 94 | percentiles=[ 95 | 0.05, 0.25, 0.5, 0.75, 0.95]) 96 | 97 | # Percentiles of Kappa and IHHI 98 | kappa_pct = df.groupby( 99 | ['quarter'])['kappa'].describe( 100 | percentiles=[ 101 | 0.05, 102 | 0.25, 103 | 0.5, 104 | 0.75, 105 | 0.95]) 106 | ihhi_pct = ihhi[~ihhi.ihhi.isnull()].groupby(['quarter'])['ihhi'].describe( 107 | percentiles=[0.05, 0.25, 0.5, 0.75, 0.95]) 108 | 109 | # drop k_ff =1 cases for tunneling 110 | tunnel_df = (df[df['from'] != df['to']].set_index('quarter')[ 111 | ['kappa_sqrt', 'kappa', 'kappa_pow2', 'kappa_pow3']] > 1).groupby(level=0).mean() 112 | tunnel_df2 = (df[df['from'] != df['to']].set_index(['from', 'quarter'])[ 113 | ['kappa_sqrt', 'kappa', 'kappa_pow2', 'kappa_pow3']] > 1).groupby(level=[0, 1]).max() 114 | 115 | 116 | # %% 117 | # need this for the macro simulations 118 | qtr_mean.to_pickle(f_quarter_mean) 119 | 120 | # %% 121 | # ### Kappas 122 | # - Single Kappa ( Figure 1) 123 | # - Alternative Control (Figure 13) 124 | # - Within and Between Industry (Figure 15) 125 | 126 | 127 | # Alternate Figure 1 (revision) 128 | plt.clf() 129 | qtr_mean[['kappa', 'kappa_gmean', 'kappa_sale_mean']].plot(figsize=(20, 10)) 130 | plt.legend(['Equal Weights', 'Market Cap Weighted', 'Revenue Weighted']) 131 | plt.xlabel('') 132 | plt.ylabel(r"$\kappa$ weight") 133 | plt.ylim(0, 1) 134 | plt.savefig(f_profitweights, bbox_inches="tight") 135 | 136 | # %% 137 | 138 | 139 | # Appendix Figure 13 140 | plt.clf() 141 | qtr_mean[['kappa', 'kappa_sqrt', 'kappa_pow2', 142 | 'kappa_pow3']].plot(figsize=(20, 10)) 143 | #plt.title("Average Pairwise Profit Weights $(\kappa)$ Under Different Control Assumptions") 144 | plt.xlabel("") 145 | plt.ylabel(r"$\kappa$ weight") 146 | plt.ylim(0, 1) 147 | plt.legend([r'$\gamma = \beta$', 148 | r'$\gamma \propto \sqrt{\beta}$', 149 | r'$\gamma \propto \beta^2$', 150 | r'$\gamma \propto \beta^3$']) 151 | plt.savefig(f_profitweights_all, bbox_inches="tight") 152 | 153 | # %% 154 | 155 | 156 | # Figure 15: Within Between 157 | plt.clf() 158 | total_df[(total_df.same_sic == True)].groupby( 159 | ['quarter'])['kappa'].mean().plot(figsize=(20, 10)) 160 | total_df[(total_df.same_sic == False)].groupby( 161 | ['quarter'])['kappa'].mean().plot() 162 | #plt.title("Average Pairwise Profit Weights $(\kappa)$ Within and Between SIC code") 163 | plt.xlabel("") 164 | plt.ylabel(r"$\kappa$ weight") 165 | plt.ylim(0, 1) 166 | plt.legend([r"$\kappa$ same SIC", r"$\kappa$ different SIC"]) 167 | plt.savefig(f_within_between, bbox_inches="tight") 168 | 169 | # %% 170 | 171 | 172 | # Response Quantiles of Kappa 173 | plt.clf() 174 | kappa_pct[['95%', '75%', '50%', '25%', '5%']].plot(figsize=(20, 10)) 175 | plt.legend(['95th percentile', 176 | '75th percentile', 177 | '50th percentile', 178 | '25th percentile', 179 | '5th percentile']) 180 | plt.ylabel(r"$\kappa$ Quantiles") 181 | plt.xlabel("") 182 | plt.ylim(0, 1) 183 | plt.savefig(f_kappa_quantile, bbox_inches="tight") 184 | # %% 185 | 186 | # ### Concentration 187 | # - IHHI (Figure 6) 188 | # - Cosine Similarity (Figure 7) 189 | # - CHHI (Figure 14 - 2 parts) 190 | 191 | 192 | # Figure 6 193 | ihhi_pct[['95%', '75%', '50%', '25%', '5%']].plot(figsize=(20, 10)) 194 | plt.legend(['95th percentile', 195 | '75th percentile', 196 | '50th percentile', 197 | '25th percentile', 198 | '5th percentile']) 199 | plt.ylabel("Investor HHI") 200 | plt.xlabel("") 201 | plt.ylim(0, 600) 202 | plt.savefig(f_ihhi, bbox_inches="tight") 203 | 204 | # %% 205 | 206 | 207 | # Figure 7 208 | total_df.groupby(['quarter'])[['kappa', 'cosine', 'l1_measure'] 209 | ].mean().plot(figsize=(20, 10)) 210 | plt.xlabel("") 211 | #plt.title("Cosine Similarity and $\kappa$") 212 | plt.ylim(0, 1) 213 | plt.legend([r'$\kappa_{f,g}$', 214 | r'$L_2$ similarity $cos(\beta_f,\beta_g)$', 215 | r'$L_1$ similarity $|\beta_f - \beta_g|$']) 216 | plt.savefig(f_cosine, bbox_inches="tight") 217 | 218 | # %% 219 | 220 | 221 | # Figure 14a 222 | df_firm[['quarter', 'ihhi', 'chhi_05', 'chhi_2', 'chhi_3', 'chhi_4'] 223 | ].groupby(['quarter']).mean().plot(figsize=(20, 10)) 224 | plt.xlabel("") 225 | plt.ylabel("Effective Control HHI") 226 | plt.ylim(0, 3500) 227 | plt.legend([r'$\gamma = \beta$', 228 | r'$\gamma \propto \sqrt{\beta}$', 229 | r'$\gamma \propto \beta^2$', 230 | r'$\gamma \propto \beta^3$', 231 | r'$\gamma \propto \beta^4$']) 232 | plt.savefig(f_chhi, bbox_inches="tight") 233 | # %% 234 | 235 | 236 | # Figure 14b 237 | df_firm[['quarter', 'ihhi', 'chhi_05']].groupby( 238 | ['quarter']).mean().plot(figsize=(20, 10)) 239 | plt.xlabel("") 240 | plt.ylabel("Effective Control HHI") 241 | plt.ylim(0, 350) 242 | plt.legend([r'$\gamma = \beta$', r'$\gamma \propto \sqrt{\beta}$', ]) 243 | plt.savefig(f_chhi2, bbox_inches="tight") 244 | 245 | # ### Tunneling 246 | # - Figure 9: Tunneling 247 | # - App Figure C-6: Tunneling (Alternative Control) 248 | 249 | # %% 250 | (100.0 * tunnel_df[['kappa']]).plot(figsize=(20, 10)) 251 | plt.xlabel("") 252 | #plt.title("Potential Tunneling") 253 | plt.ylabel(r"Percentage of $\kappa$ > 1") 254 | plt.legend('') 255 | plt.ylim(0, 12) 256 | #plt.legend([r'$\gamma = \beta$',r'$\gamma \propto \sqrt{\beta}$',r'$\gamma \propto \beta^2$',r'$\gamma \propto \beta^3$']) 257 | plt.savefig(f_tunneling, bbox_inches="tight") 258 | 259 | # %% 260 | (100.0 * tunnel_df[['kappa', 'kappa_sqrt', 261 | 'kappa_pow2', 'kappa_pow3']]).plot(figsize=(20, 10)) 262 | plt.xlabel("") 263 | #plt.title("Potential Tunneling") 264 | plt.ylabel(r"Percentage of $\kappa$ > 1") 265 | plt.ylim(0, 20) 266 | plt.legend([r'$\gamma = \beta$', 267 | r'$\gamma \propto \sqrt{\beta}$', 268 | r'$\gamma \propto \beta^2$', 269 | r'$\gamma \propto \beta^3$']) 270 | plt.savefig(f_kap1, bbox_inches="tight") 271 | -------------------------------------------------------------------------------- /code/plots3_big_three_four.py: -------------------------------------------------------------------------------- 1 | # %% 2 | from kappas import process_beta 3 | from investors import calc_big4 4 | import pandas as pd 5 | import numpy as np 6 | import pathlib 7 | 8 | import matplotlib 9 | import matplotlib.pyplot as plt 10 | 11 | from our_plot_config import derived_dir, fig_dir, raw_dir, setplotstyle 12 | 13 | setplotstyle() 14 | 15 | 16 | # Input file 17 | # Get this from 2.5 Scraped Data 18 | f_stata_k = derived_dir / 'tmp-kappas_scrape.dta' 19 | f_betas = derived_dir / '13f_sp500_frankenbeta.parquet' 20 | f_big4 = raw_dir / 'big4.csv' 21 | 22 | f_investor = derived_dir / 'investor-info.parquet' 23 | # Figures 24 | fig_bigfour = fig_dir / 'py_snp_bigfour.pdf' 25 | fig_bigthree = fig_dir / 'figure5_big3.pdf' 26 | 27 | # %% 28 | # ## Description 29 | # 1. Load the data 30 | # 2. Setup the plots 31 | # 3. Plot top 4 32 | # 4. Plot top 3 33 | 34 | 35 | df = 100.0 * calc_big4(process_beta(f_betas), pd.read_csv(f_big4)) 36 | df2 = df.groupby(level=0).mean() 37 | 38 | 39 | # %% 40 | def make_bigfour_plot(df, top3=False): 41 | y = df.groupby(level=0).mean() 42 | y.index = [pd.to_datetime(date, format='%Y-%m-%d').date() 43 | for date in y.index] 44 | if top3: 45 | y = y.iloc[:, 0:-1] 46 | y.plot(figsize=(20, 10), color=['black', 'maroon', 'navy', 'green']) 47 | 48 | plt.ylim(0, 10) 49 | plt.xlim('2000-01-01', '2018-01-01') 50 | 51 | plt.xlabel("") 52 | plt.ylabel('Average Ownership Percentage') 53 | 54 | plt.annotate('iShares Acquisition', xy=('2010-02-15', 6.2), xycoords='data', 55 | xytext=('2010-02-15', 8), textcoords='data', 56 | arrowprops=dict(facecolor='black', shrink=0.05), 57 | horizontalalignment='center', verticalalignment='top', 58 | ) 59 | plt.legend(['BlackRock & Barclays', 'Vanguard', 60 | 'State Street', 'Fidelity']) 61 | return plt 62 | 63 | 64 | # %% 65 | # Figure 5 66 | make_bigfour_plot(df2[df2.index >= '1999-12-31'], True) 67 | plt.savefig(fig_bigthree, bbox_inches="tight") 68 | 69 | 70 | # %% 71 | # Alternate with Fidelity (don't use this one) 72 | # make_bigfour_plot(df,False) 73 | -------------------------------------------------------------------------------- /code/plots4_investor_similarity.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import pandas as pd 3 | import numpy as np 4 | import pathlib 5 | 6 | import matplotlib.pyplot as plt 7 | 8 | from our_plot_config import derived_dir, setplotstyle, fig_dir 9 | 10 | setplotstyle() 11 | 12 | # %% 13 | # input file 14 | f_investor = derived_dir / 'investor-info.parquet' 15 | 16 | # outputs 17 | fig_both_sim = fig_dir / 'figure8_similarity.pdf' 18 | fig_both_sim_drops = fig_dir / 'appfigure_a8.pdf' 19 | 20 | 21 | def wavg_l2(group): 22 | d = group['l2_similarity'] 23 | w = group['aum_weight'] 24 | return (d * w).sum() / w.sum() 25 | 26 | 27 | def wavg_l1(group): 28 | d = group['l1_similarity'] 29 | w = group['aum_weight'] 30 | return (d * w).sum() / w.sum() 31 | 32 | 33 | # %% 34 | df = pd.read_parquet(f_investor) 35 | 36 | df3 = pd.concat([df.groupby('quarter').apply(wavg_l1), 37 | df.groupby('quarter').apply(wavg_l2)], axis=1) 38 | df3.columns = ['investor_l1', 'investor_l2'] 39 | df3 = df3[['investor_l2', 'investor_l1']].copy() 40 | 41 | # Without blackrock vanguard? 42 | df2 = df[~df.InvestorName.isin(['BlackRock', 'Vanguard'])] 43 | df4 = pd.concat([df2.groupby('quarter').apply(wavg_l1), 44 | df2.groupby('quarter').apply(wavg_l2)], axis=1) 45 | df4.columns = ['l1_drop_blackrockvanguard', 'l2_drop_blackrockvanguard'] 46 | 47 | # %% 48 | # ### Make the Plots 49 | 50 | df3.plot(figsize=(20, 10), color=['navy', 'maroon']) 51 | plt.legend(['Investor Similarity $(L_2)$', 'Investor Similarity $(L_1)$']) 52 | plt.ylabel("Investor Similarity (AUM Weighted)") 53 | plt.xlabel("") 54 | plt.ylim(0, 1) 55 | plt.savefig(fig_both_sim, bbox_inches='tight') 56 | 57 | # %% 58 | ax = pd.concat([df3, 59 | df4], 60 | axis=1)[['investor_l2', 61 | 'l2_drop_blackrockvanguard', 62 | 'investor_l1', 63 | 'l1_drop_blackrockvanguard']].plot(figsize=(20, 64 | 10), 65 | color=['navy', 66 | 'navy', 67 | 'maroon', 68 | 'maroon'], 69 | style=['-', 70 | '--', 71 | '-.', 72 | ':']) 73 | plt.legend(['Investor Similarity $(L_2)$', 74 | 'Investor Similarity $(L_2)$ (No BlackRock/Vanguard)', 75 | 'Investor Similarity $(L_1)$', 76 | 'Investor Similarity $(L_1)$ (No BlackRock/Vanguard)']) 77 | plt.xlabel("") 78 | plt.ylabel("Investor Similarity (AUM Weighted)") 79 | plt.ylim(0, 1) 80 | plt.savefig(fig_both_sim_drops, bbox_inches='tight') 81 | -------------------------------------------------------------------------------- /code/plots5_airlines_cereal.py: -------------------------------------------------------------------------------- 1 | # %% 2 | # %% 3 | import pandas as pd 4 | import numpy as np 5 | import pathlib 6 | 7 | import matplotlib 8 | import matplotlib.pyplot as plt 9 | 10 | from our_plot_config import derived_dir, fig_dir, raw_dir, setplotstyle 11 | from kappas import do_one_period 12 | 13 | setplotstyle() 14 | 15 | # %% 16 | # Input files 17 | f_cereal = raw_dir / 'cereal.parquet' 18 | f_airlines = raw_dir / 'airlines.parquet' 19 | f_firm_info = derived_dir / 'firm-info.parquet' 20 | f_kappas = derived_dir / 'official-kappas.parquet' 21 | 22 | # Figure outputs 23 | fig_both = fig_dir / 'figure16_airlines_cereal_banks.pdf' 24 | 25 | # %% 26 | # ### Read in the (Cleaned) Parquet File of Beta's 27 | # - Read in stata file 28 | # - Create the "quarter" variable 29 | # - Apply the $\kappa$ calculations period by period 30 | # - Save the output to a new parquet file 31 | # - Write a Stata file. 32 | 33 | # %% 34 | # read in, create quarter and drop kappa_ff 35 | 36 | 37 | def process_df(fn): 38 | df = pd.read_parquet(fn) 39 | df['quarter'] = pd.to_datetime(df.rdate, format='%Y%m%d') 40 | total_df3 = df[df.beta < 0.5].groupby(['quarter']).apply(do_one_period) 41 | total_df3 = total_df3[total_df3['from'] != total_df3['to']] 42 | return total_df3.reset_index() 43 | 44 | 45 | df_cereal = process_df(f_cereal) 46 | # Clean up airlines a bit more 47 | df_airlines = process_df(f_airlines) 48 | df_airlines = df_airlines[df_airlines.kappa < 4].copy() 49 | 50 | df_firms = pd.read_parquet(f_firm_info) 51 | df_firms2 = df_firms.loc[df_firms['siccd'] == 52 | 6021, ['permno', 'quarter', 'comnam']].copy() 53 | 54 | df_k = pd.read_parquet(f_kappas) 55 | 56 | df_banks = pd.merge(pd.merge(df_k[df_k['from'] != df_k['to']], df_firms2, left_on=['quarter', 'from'], right_on=['quarter', 'permno']), 57 | df_firms2, left_on=['quarter', 'to'], right_on=['quarter', 'permno']) 58 | 59 | # %% 60 | df_tot = pd.concat([df_cereal.groupby(['quarter'])['kappa'].median(), df_airlines.groupby( 61 | ['quarter'])['kappa'].median(), df_banks.groupby(['quarter'])['kappa'].median()], axis=1) 62 | 63 | # %% 64 | df_tot[df_tot.index > 65 | '1999-01-01'].plot(figsize=(20, 10), color=['navy', 'maroon', 'darkgreen']) 66 | plt.legend(['RTE Cereal', 'Airlines', 'Banks']) 67 | plt.ylabel(r"Median Pairwise Profit Weights $(\kappa)$") 68 | plt.xlabel("") 69 | plt.ylim(0, 1) 70 | plt.savefig(fig_both, bbox_inches='tight') 71 | -------------------------------------------------------------------------------- /code/plots6_sole_vs_shared.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import pandas as pd 3 | import numpy as np 4 | import pathlib 5 | 6 | import matplotlib 7 | import matplotlib.pyplot as plt 8 | 9 | from our_plot_config import derived_dir, fig_dir, setplotstyle 10 | 11 | setplotstyle() 12 | 13 | # %% 14 | # inputs 15 | f_kappas_combined = derived_dir / 'appendix_kappa_combined.parquet' 16 | 17 | # outputs 18 | fig_soleshared_tr = fig_dir / 'figure17_tr.pdf' 19 | fig_soleshared_sc = fig_dir / 'figure17_sc.pdf' 20 | 21 | # %% 22 | # ## Read in Data 23 | # 1. Only read in Sole/Shared/All columns 24 | # 2. Do TR data for pre 2011 (afterwards this becomes unreliable). 25 | # 3. Do Scraped data for 2013 onwards (when XML data is available). 26 | # - We would need to re-do scraping to grap sole/shared/all/none for 1999-2013 for non-XML scraped data 27 | 28 | 29 | col_list = [ 30 | 'from', 31 | 'to', 32 | 'quarter', 33 | 'kappa', 34 | 'kappa_all', 35 | 'kappa_sole', 36 | 'kappa_soleshared', 37 | 'skappa', 38 | 'skappa_all', 39 | 'skappa_sole', 40 | 'skappa_soleshared'] 41 | df = pd.read_parquet(f_kappas_combined, columns=col_list) 42 | df = df[df.quarter > '1999-01-01'] 43 | 44 | 45 | # %% 46 | df[df.quarter < '2011-01-01'].groupby(['quarter']).mean( 47 | )[['kappa', 'kappa_sole', 'kappa_soleshared']].plot(figsize=(20, 10)) 48 | plt.xlabel("") 49 | plt.legend(['All Shares', 'Sole Voting Rights', 'Sole+Shared Voting Rights']) 50 | #plt.title('Alternative Control Assumptions: TR data') 51 | plt.ylim(0, 1) 52 | plt.savefig(fig_soleshared_tr, bbox_inches="tight") 53 | 54 | 55 | # %% 56 | df[(df.quarter > '2013-09-30') & (~df.skappa.isnull())].groupby(['quarter'] 57 | ).mean()[['skappa', 'skappa_sole', 'skappa_soleshared']].plot(figsize=(20, 10)) 58 | plt.xlabel("") 59 | plt.legend(['All Shares', 'Sole Voting Rights', 'Sole+Shared Voting Rights']) 60 | plt.ylim(0, 1) 61 | #plt.title('Alternative Control Assumptions: Scraped data') 62 | plt.savefig(fig_soleshared_sc, bbox_inches="tight") 63 | -------------------------------------------------------------------------------- /code/plots7_short_interest_coverage.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import pandas as pd 3 | import numpy as np 4 | import pathlib 5 | 6 | import matplotlib 7 | import matplotlib.pyplot as plt 8 | 9 | from our_plot_config import derived_dir, fig_dir, raw_dir, wrds_dir, setplotstyle 10 | 11 | setplotstyle() 12 | 13 | # %% 14 | 15 | # inputs 16 | f_betas = derived_dir / '13f_sp500_frankenbeta.parquet' 17 | f_short = wrds_dir / 'short_interest.parquet' 18 | 19 | # figures 20 | fig_coverage = fig_dir / 'appfigure_a4.pdf' 21 | fig_distribution = fig_dir / 'appfigure_a5.pdf' 22 | 23 | # %% 24 | # ## Short Interest Checks 25 | # - Read in the main "betas" dataset --> aggregate to firm-quarter across managers 26 | # - Read in the COMPUSTAT Short interest dataset 27 | # - Merge them by firm-quarter 28 | # - Plot S&P Coverage of Short Interest 29 | # - Plot quantiles of short interest distribution (conditional on coverage) 30 | 31 | df = pd.read_parquet(f_betas) 32 | df = df[df.quarter > '1980-01-01'] 33 | df_short = pd.read_parquet(f_short) 34 | 35 | 36 | # %% 37 | tmp = pd.merge(df.groupby(['permno', 38 | 'quarter']).agg({'beta': sum, 39 | 'shares': sum, 40 | 'shares_outstanding': max}).reset_index(), 41 | df_short, 42 | left_on=['permno', 43 | 'quarter'], 44 | right_on=['lpermno', 45 | 'qdate'], 46 | how='left') 47 | tmp['short_coverage'] = 1.0 * (~tmp['shortint'].isnull()) 48 | tmp['coverage'] = 1.0 49 | tmp['short_pct'] = tmp['shortint'] / (tmp['shares_outstanding'] * 1000) 50 | tmp['short_1'] = tmp['short_pct'] > 0.01 51 | tmp['short_2'] = tmp['short_pct'] > 0.02 52 | tmp['short_5'] = tmp['short_pct'] > 0.05 53 | tmp['short_10'] = tmp['short_pct'] > 0.10 54 | tmp['short_20'] = tmp['short_pct'] > 0.20 55 | 56 | # %% 57 | tmp.groupby(['quarter'])[['coverage', 'short_coverage']].sum().plot( 58 | figsize=(20, 10), color=['navy', 'maroon']) 59 | matplotlib.rc('xtick', labelsize=24) 60 | matplotlib.rc('ytick', labelsize=24) 61 | plt.legend(['Number of S&P 500 Firms in Sample', 62 | 'Number of S&P 500 Firms with Short Interest Data']) 63 | #plt.title('Coverage of S&P 500 Firms') 64 | plt.xlabel('') 65 | plt.ylim(0, 510) 66 | plt.savefig(fig_coverage, bbox_inches='tight') 67 | # %% 68 | ax = tmp.groupby(['quarter'])[['short_1', 'short_2', 'short_5', 69 | 'short_10', 'short_20']].mean().plot(figsize=(20, 10)) 70 | ax.set_ylim(0, 1) 71 | matplotlib.rc('xtick', labelsize=24) 72 | matplotlib.rc('ytick', labelsize=24) 73 | plt.legend(['Above 1%', 'Above 2%', 'Above 5%', 'Above 10%', 'Above 20%']) 74 | #plt.title('Fraction of Firms by Short Interest (conditional on coverage) ') 75 | vals = ax.get_yticks() 76 | ax.set_yticklabels(['{:,.2%}'.format(x) for x in vals]) 77 | plt.xlabel('') 78 | plt.savefig(fig_distribution, bbox_inches='tight') 79 | -------------------------------------------------------------------------------- /code/plots8_individual_firm_coverage.py: -------------------------------------------------------------------------------- 1 | # %% 2 | # %% 3 | import pandas as pd 4 | import numpy as np 5 | import pathlib 6 | 7 | import matplotlib 8 | import matplotlib.pyplot as plt 9 | 10 | from our_plot_config import derived_dir, fig_dir, setplotstyle 11 | 12 | setplotstyle() 13 | 14 | # %% 15 | # Input file 16 | f_betas_tr = derived_dir / '13f_sp500_unfiltered.parquet' 17 | f_betas_sc = derived_dir / '13f_scraped.parquet' 18 | 19 | # outputs 20 | fig_case_study = fig_dir / 'appfigure_a2_coverage.pdf' 21 | 22 | # %% 23 | # ## Read in Data 24 | # 1. Need both TR Betas and Scrape Betas 25 | # 2. Extract the three companies 26 | # 3. Plot 27 | 28 | # Read in input files 29 | df_tr = pd.read_parquet(f_betas_tr) 30 | df_sc = pd.read_parquet(f_betas_sc) 31 | 32 | df_scrape_subset = df_sc[df_sc.permno.isin(['24643', '27983', '88661'])] 33 | df_scrape_subset = df_scrape_subset[df_scrape_subset.quarter > '2007-01-01'] 34 | df_scrape_subset = df_scrape_subset[df_scrape_subset.quarter < '2015-01-01'] 35 | df_scrape_holdings = 100 * \ 36 | df_scrape_subset.groupby(['quarter', 'permno']).sum() 37 | df_tr_subset = df_tr[df_tr.permno.isin(['24643', '27983', '88661'])] 38 | df_tr_subset = df_tr_subset[df_tr_subset.quarter > '2007-01-01'] 39 | df_tr_subset = df_tr_subset[df_tr_subset.quarter < '2015-01-01'] 40 | df_tr_holdings = 100 * df_tr_subset.groupby(['quarter', 'permno']).sum() 41 | 42 | # %% 43 | fig, ax = plt.subplots(figsize=(20, 10)) 44 | df_tr_holdings['beta'].unstack().plot( 45 | ax=ax, color=[ 46 | 'navy', 'maroon', 'darkgreen'], style=[ 47 | '-', '-', '-']) 48 | matplotlib.rc('xtick', labelsize=24) 49 | matplotlib.rc('ytick', labelsize=24) 50 | plt.ylabel("Percent of Shares Reported in 13F Filings", {'size': '24'}) 51 | 52 | df_scrape_holdings['beta'].unstack().plot( 53 | ax=ax, 54 | style=[ 55 | '--', 56 | '--', 57 | '--'], 58 | color=[ 59 | 'navy', 60 | 'maroon', 61 | 'darkgreen']) 62 | plt.xlabel("") 63 | 64 | plt.legend(['Alcoa', 'Xerox', 'Coach'], prop={'size': '24'}) 65 | plt.ylim(0, 100) 66 | 67 | plt.show 68 | plt.savefig(fig_case_study, bbox_inches='tight') 69 | -------------------------------------------------------------------------------- /code/plots9_blackrock_vanguard.py: -------------------------------------------------------------------------------- 1 | # %% 2 | from kappas import beta_to_kappa_merger_breakup 3 | import pandas as pd 4 | import numpy as np 5 | import pathlib 6 | 7 | import matplotlib 8 | import matplotlib.pyplot as plt 9 | 10 | from our_plot_config import derived_dir, fig_dir, raw_dir, setplotstyle 11 | 12 | setplotstyle() 13 | 14 | 15 | # %% 16 | # Input file 17 | f_betas = derived_dir / '13f_sp500_frankenbeta.parquet' 18 | f_big4 = raw_dir / 'big4.csv' 19 | 20 | # Figures 21 | f_merger = fig_dir / 'figure12_mergerbreakup.pdf' 22 | 23 | 24 | # ## Load Data and Setup 25 | # - Read in the Parquet File of $\beta$'s 26 | # - Read in the csv file of big four firms 27 | # - Setup the merger using mgrno_merger 28 | # - Setup the breakup using InvestorName 29 | # - Apply the $\kappa$ calculations period by period 30 | 31 | # %% 32 | df = pd.read_parquet( 33 | f_betas, 34 | columns=[ 35 | 'mgrno', 36 | 'permno', 37 | 'quarter', 38 | 'beta', 39 | 'permno_drop', 40 | 'sharecode_drop']) 41 | df = df[(df.permno_drop == False) & ( 42 | df.sharecode_drop == False) & (df.beta < 0.5)] 43 | 44 | big4 = pd.read_csv(f_big4) 45 | 46 | # grab ids for Blackrock and Vanguard 47 | is_blackrock = set(big4[big4.InvestorName == 'BlackRock'].mgrno.values) 48 | is_vanguard = set(big4[big4.InvestorName == 'Vanguard'].mgrno.values) 49 | 50 | # %% 51 | # merge firms meeting the criteria 52 | df['mgrno_merger'] = df.mgrno 53 | df.loc[df['mgrno_merger'].isin(is_blackrock.union( 54 | is_vanguard)), 'mgrno_merger'] = 1139734 55 | 56 | # Only break up firms with a name 57 | df.loc[df.mgrno.isin(is_blackrock), 'InvestorName'] = 'BlackRock' 58 | df.loc[df.mgrno.isin(is_vanguard), 'InvestorName'] = 'Vanguard' 59 | 60 | # %% 61 | # ### Do the work, Make the Plot 62 | kappa_df = beta_to_kappa_merger_breakup(df) 63 | x = kappa_df[kappa_df['from'] != kappa_df['to']].groupby( 64 | ['quarter'])[['kappa', 'kappa_merger', 'kappa_breakup', 'kappa_drop']].mean() 65 | 66 | # %% 67 | x.plot(figsize=(20, 10), color=['black', 'navy', 'maroon', 'darkgreen']) 68 | plt.legend([r'$\kappa$: Actual Ownership', 69 | r'$\kappa$: Merger: BlackRock+Vanguard', 70 | r'$\kappa$: Split in Half: BlackRock+Vanguard', 71 | r'$\kappa$: Ignore: BlackRock+Vanguard']) 72 | plt.xlabel("") 73 | plt.ylim(0, 1) 74 | plt.savefig(f_merger, bbox_inches="tight") 75 | -------------------------------------------------------------------------------- /code/table3_variance_decomp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import our_plot_config 4 | from our_plot_config import derived_dir, tab_dir 5 | import pandas as pd 6 | import numpy as np 7 | import pyhdfe 8 | 9 | # Input 10 | f_regression = derived_dir / 'regression_data.parquet' 11 | 12 | # Output 13 | f_table3 = tab_dir / 'table3.tex' 14 | 15 | # this helper decomposes the variance 16 | 17 | 18 | def do_decomp(x): 19 | var = np.nanvar(x, axis=0) 20 | rel_hhi = var[2] / var[0] 21 | similarity = var[1] / var[0] 22 | return np.array([similarity, rel_hhi, 1.0 - rel_hhi - similarity]) 23 | 24 | 25 | # Read in the Kappas: get only columns we need 26 | df_kappas2 = pd.read_parquet( 27 | f_regression, 28 | columns=[ 29 | 'from', 30 | 'to', 31 | 'quarter', 32 | 'kappa', 33 | 'cosine']) 34 | 35 | # Kappa = cosine * IHHI ratio 36 | df_kappas2['irat'] = df_kappas2['kappa'] / df_kappas2['cosine'] 37 | # Fixed Effects 38 | df_kappas2['pair_fe'] = df_kappas2.groupby(['from', 'to']).ngroup() 39 | df_kappas2['quarter_fe'] = df_kappas2.groupby(['quarter']).ngroup() 40 | 41 | # Report the size of everything 42 | print("N of Overall Dataframe:", len(df_kappas2)) 43 | print("N Quarter FE:", len(df_kappas2.quarter_fe.unique())) 44 | print("N Pair FE:", len(df_kappas2.pair_fe.unique())) 45 | 46 | # Take the logs of everything and get a NumPy array 47 | variables = np.log(df_kappas2[['kappa', 'cosine', 'irat']]).values 48 | 49 | # Use pyhdfe for high-dimensional fixed effects absorption 50 | # This takes 13min on my iMac 51 | resid_cs = pyhdfe.create( 52 | df_kappas2[['quarter_fe']].values).residualize(variables) 53 | resid_ts = pyhdfe.create(df_kappas2[['pair_fe']].values).residualize(variables) 54 | resid_pa = pyhdfe.create( 55 | df_kappas2[['pair_fe', 'quarter_fe']].values).residualize(variables) 56 | 57 | # Do the Variance Decomposition for each case 58 | tab_mat = np.vstack([do_decomp(variables), do_decomp( 59 | resid_cs), do_decomp(resid_ts), do_decomp(resid_pa)]) * 100.0 60 | table3 = pd.DataFrame(tab_mat, index=['Raw', 'Cross-Section', 'Time-Series', 'Panel'], 61 | columns=['Overlapping Ownership', 'Relative IHHI', 'Covariance']) 62 | print(table3) 63 | 64 | # Write the latex table to disk (skip zero covariance column) 65 | table3.iloc[:, 0:2].to_latex( 66 | f_table3, float_format=lambda x: '%.2f' % x + str('%'), column_format='l cc') 67 | -------------------------------------------------------------------------------- /code/table4_kappa_correlation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # Table 4 Correlations with Kappa 4 | 5 | import our_plot_config 6 | from our_plot_config import derived_dir, tab_dir 7 | import pandas as pd 8 | import numpy as np 9 | import re 10 | 11 | # for regressions 12 | import pyhdfe 13 | from sklearn import datasets, linear_model 14 | import statsmodels.formula.api as smf 15 | from statsmodels.iolib.summary2 import summary_col 16 | 17 | # Input 18 | f_regression = derived_dir / 'regression_data.parquet' 19 | 20 | # Output 21 | f_tab4 = tab_dir / 'table4.tex' 22 | 23 | # Read data 24 | cols = [ 25 | 'from', 26 | 'to', 27 | 'quarter', 28 | 'kappa', 29 | 'retail_share', 30 | 'market_cap', 31 | 'marginsq', 32 | 'normalized_l2', 33 | 'big3', 34 | 'beta_BlackRock', 35 | 'beta_Vanguard', 36 | 'beta_StateStreet'] 37 | df = pd.read_parquet( 38 | f_regression, 39 | columns=cols).rename( 40 | columns={ 41 | 'beta_BlackRock': 'blackrock', 42 | 'beta_Vanguard': 'vanguard', 43 | 'beta_StateStreet': 'statestreet'}) 44 | 45 | # Filter on dates 46 | df = df[(df.quarter > '2000-01-01')].copy() 47 | 48 | # Calculate derived columns 49 | df['lcap'] = np.log(df['market_cap']) 50 | # Code the FE first: This speeds things up to avoid type converting 13 51 | # million dates 52 | df['pair_fe'] = df.groupby(['from', 'to']).ngroup() 53 | df['quarter_fe'] = df.groupby(['quarter']).ngroup() 54 | 55 | 56 | # Regressions! 57 | # We will need to absorb: do that first 58 | # This is comically slow and uses 30+GB 59 | var_list = [ 60 | 'kappa', 61 | 'retail_share', 62 | 'lcap', 63 | 'marginsq', 64 | 'normalized_l2', 65 | 'big3', 66 | 'blackrock', 67 | 'vanguard', 68 | 'statestreet'] 69 | 70 | # Drop any missings 71 | df2 = df[var_list + ['pair_fe', 'quarter_fe']].dropna() 72 | 73 | 74 | alg_pa = pyhdfe.create( 75 | df2[['pair_fe', 'quarter_fe']].values, drop_singletons=False) 76 | resid_pa = alg_pa.residualize(df2[var_list].values) 77 | 78 | # Perform Regressions 79 | # no need for fixed effects because we've already residualized everything 80 | # drop rows containing NAs 81 | pd_vars = pd.DataFrame(resid_pa, columns=['kappa', 'retail_share', 'lcap', 82 | 'marginsq', 'normalized_l2', 83 | 'big3', 'blackrock', 'vanguard', 'statestreet']) 84 | 85 | 86 | reg1 = smf.ols( 87 | formula='kappa ~ retail_share + lcap + marginsq + big3', 88 | data=pd_vars).fit() 89 | reg2 = smf.ols( 90 | formula='kappa ~ retail_share + lcap + marginsq + normalized_l2', 91 | data=pd_vars).fit() 92 | reg3 = smf.ols( 93 | formula='kappa ~ retail_share + lcap + marginsq + big3 + normalized_l2', 94 | data=pd_vars).fit() 95 | reg4 = smf.ols( 96 | formula='kappa ~ retail_share + lcap + marginsq + normalized_l2 + blackrock + vanguard + statestreet', 97 | data=pd_vars).fit() 98 | 99 | # Adjust R^2 for the FE 100 | 101 | 102 | def rsq_update(reg): 103 | reg.rsquared = np.var( 104 | reg.predict() + (df2['kappa'].values - resid_pa[:, 0])) / np.var(df2['kappa']) 105 | reg.quarterfe = r" \checkmark " 106 | reg.pairfe = r" \checkmark " 107 | return 108 | 109 | 110 | for r in [reg1, reg2, reg3, reg4]: 111 | rsq_update(r) 112 | 113 | 114 | # Print Output 115 | info_dict = {'R\sq': lambda x: f"{x.rsquared:.4f}", 116 | 'Quarter FE': lambda x: f"{x.quarterfe}", 117 | 'Ordered Pair FE': lambda x: f"{x.pairfe}", 118 | 'N': lambda x: f"{int(x.nobs):d}" 119 | } 120 | 121 | dfoutput = summary_col(results=[reg1, reg2, reg3, reg4], 122 | float_format='%0.4f', 123 | stars=True, 124 | model_names=['(1)', 125 | '(2)', 126 | '(3)', 127 | '(4)'], 128 | info_dict=info_dict, 129 | regressor_order=['retail_share', 130 | 'lcap', 131 | 'marginsq', 132 | 'big3', 133 | 'normalized_l2', 134 | 'blackrock', 135 | 'vanguard', 136 | 'statestreet' 137 | ], 138 | drop_omitted=True) 139 | 140 | # Clean up the TeX by hand for the table 141 | tab_reg2 = re.sub(r'\*\*\*', '*', dfoutput.as_latex()) 142 | tab_reg3 = re.sub(r'hline', 'toprule', tab_reg2, count=1) 143 | tab_reg4 = re.sub(r'hline', 'bottomrule', tab_reg3, count=1) 144 | 145 | tab_reg5 = re.sub(r'retail\\_share', 'Retail Share', tab_reg4) 146 | tab_reg5 = re.sub(r'lcap', 'Log(Market Cap)', tab_reg5) 147 | tab_reg5 = re.sub(r'marginsq', 'Operating Margin', tab_reg5) 148 | tab_reg5 = re.sub(r'big3', 'Big Three Holdings', tab_reg5) 149 | tab_reg5 = re.sub(r'normalized\\_l2', 'Investor Indexing', tab_reg5) 150 | tab_reg5 = re.sub(r'blackrock', 'BlackRock Holdings', tab_reg5) 151 | tab_reg5 = re.sub(r'vanguard', 'Vanguard Holdings', tab_reg5) 152 | tab_reg5 = re.sub(r'statestreet', 'State Street Holdings', tab_reg5) 153 | tab_reg5 = re.sub(r'R\\sq', '$R^2$', tab_reg5) 154 | tab_reg5 = re.sub(r'N', '$N$', tab_reg5) 155 | out_tab = '\n'.join(tab_reg5.splitlines()[3:-2]) 156 | 157 | # Display table and save 158 | print(out_tab) 159 | with open(f_tab4, 'w') as file: 160 | file.write(out_tab) 161 | -------------------------------------------------------------------------------- /code/utilities/date_util.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | def lookup_dates(s): 4 | """ 5 | This is an extremely fast approach to datetime parsing. 6 | For large data, the same dates are often repeated. Rather than 7 | re-parse these, we store all unique dates, parse them, and 8 | use a lookup to convert all dates. 9 | """ 10 | dates_dict = {date:pd.to_datetime(date,errors='coerce') for date in s.unique()} 11 | return s.map(dates_dict) 12 | 13 | def end_quarter(series): 14 | return (series - pd.tseries.offsets.DateOffset(days=1) + pd.tseries.offsets.QuarterEnd()) -------------------------------------------------------------------------------- /code/utilities/matlab_util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | def matlab_sparse(i,j,s,compress=True): 5 | rows, row_pos = np.unique(i, return_inverse=True) 6 | cols, col_pos = np.unique(j, return_inverse=True) 7 | pivoted_arr = np.zeros((len(rows), len(cols))) 8 | pivoted_arr[row_pos, col_pos] = s 9 | if compress: 10 | nz=(pivoted_arr.max(axis=1)>0) 11 | pivoted_arr=pivoted_arr[nz,:] 12 | rows=rows[nz] 13 | return pivoted_arr, rows, cols 14 | 15 | def coalesce(df,col_list,prefix,method='left'): 16 | for x in col_list: 17 | if method=='left': 18 | df['merged_'+x]=df[prefix+x].combine_first(df[x]) 19 | if method=='right': 20 | df['merged_'+x]=df[x].combine_first(df[prefix+x]) 21 | return df -------------------------------------------------------------------------------- /code/utilities/quantiles.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | 5 | def weighted_quantile(values, quantiles, sample_weight=None, values_sorted=False, old_style=False): 6 | values = np.array(values) 7 | quantiles = np.array(quantiles) 8 | if sample_weight is None: 9 | sample_weight = np.ones(len(values)) 10 | sample_weight = np.array(sample_weight) 11 | assert np.all(quantiles >= 0) and np.all(quantiles <= 1), 'quantiles should be in [0, 1]' 12 | 13 | if not values_sorted: 14 | sorter = np.argsort(values) 15 | values = values[sorter] 16 | sample_weight = sample_weight[sorter] 17 | 18 | weighted_quantiles = np.cumsum(sample_weight) - 0.5 * sample_weight 19 | if old_style: 20 | # To be convenient with numpy.percentile 21 | weighted_quantiles -= weighted_quantiles[0] 22 | weighted_quantiles /= weighted_quantiles[-1] 23 | else: 24 | weighted_quantiles /= np.sum(sample_weight) 25 | return pd.Series(np.interp(quantiles, weighted_quantiles, values)) -------------------------------------------------------------------------------- /code/wrds_checks.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import os 4 | import sys 5 | import matplotlib 6 | import matplotlib.pyplot as plt 7 | from wrds_cleaning import expand_splist 8 | # 9 | # These are checks (WRITE MORE) 10 | # 11 | 12 | 13 | def check_bigbeta(df, fn): 14 | df[df.beta > 0.5].to_excel(fn) 15 | return 16 | 17 | 18 | def check_s34(df, f_shares_out, f_prc_zero, f_duplicates): 19 | df[df.shrout1 == 0].to_excel(f_shares_out) 20 | df[df.prc == 0].to_excel(f_prc_zero) 21 | df[df[['permno', 'fdate', 'rdate', 'mgrno']].duplicated( 22 | keep=False)].to_excel(f_duplicates) 23 | return 24 | 25 | 26 | def check_names(df_sp500, df_names, f_names_missing): 27 | df_spe = expand_splist(df_sp500) 28 | x = pd.merge(df_spe, df_names, on=['permno'], how='left') 29 | x = x[(x.qdate >= x.namedt) & (x.qdate <= x.nameenddt)] 30 | y = pd.merge(df_spe, x, on=['permno', 'qdate'], how='left') 31 | y = y[y.ticker.isnull()][['permno', 'qdate']] 32 | pd.merge(y, df_names, on=['permno']).to_excel(f_names_missing) 33 | 34 | 35 | def check_blackrock(df, fig_blackrock1, fig_blackrock2, fig_blackrock3): 36 | blackrock = blackrock_fix(df[df.mgrname.str.contains('BLACKROCK')].copy()) 37 | blackrock['aum'] = blackrock['prc'] * blackrock['shares'] 38 | blackrock[blackrock.rdate == blackrock.fdate].groupby( 39 | 'rdate')['aum'].sum().plot(figsize=(20, 10), title="RDATE==FDATE") 40 | plt.savefig(fig_blackrock1) 41 | blackrock.groupby( 42 | ['rdate'])['aum'].sum().plot( 43 | figsize=( 44 | 20, 45 | 10), 46 | title="BY RDATE") 47 | plt.savefig(fig_blackrock2) 48 | blackrock.groupby( 49 | ['fdate'])['aum'].sum().plot( 50 | figsize=( 51 | 20, 52 | 10), 53 | title="BY FDATE") 54 | plt.savefig(fig_blackrock3) 55 | return blackrock 56 | 57 | 58 | def check_s34_coverage(df, df_sp500, df_names, f_s34_coverage): 59 | totals = df.groupby(['permno', 'quarter'])['mgrno'].nunique().reset_index() 60 | x = pd.merge( 61 | expand_splist(df_sp500), totals, left_on=[ 62 | 'permno', 'qdate'], right_on=[ 63 | 'permno', 'quarter'], how='left') 64 | y = x[x.mgrno.isnull()][['permno', 'qdate']] 65 | z = pd.merge(y, df_names, on=['permno'], how='left') 66 | z[(z.qdate <= z.nameenddt) & (z.qdate >= z.namedt)].to_excel(f_s34_coverage) 67 | 68 | 69 | def check_multiple_cusip(df, f_multiple_cusips, f_multiple_cusips_summary): 70 | x = df.groupby(['permno', 'cusip', 'rdate'])['shares'].sum() 71 | y = x[x.groupby(level=[0, 2]).transform('count') > 1].reset_index( 72 | ).sort_values(['rdate', 'permno', 'shares']) 73 | y['share_pct'] = y['shares'] / \ 74 | y.groupby(['permno', 'rdate'])['shares'].transform(sum) 75 | y.sort_values(['permno', 'rdate', 'cusip']).to_excel(f_multiple_cusips) 76 | z = y.groupby(['permno', 'rdate'])['share_pct'].min( 77 | ).sort_values().reset_index().to_excel(f_multiple_cusips_summary) 78 | 79 | 80 | def check_fundamental_coverage( 81 | df, df_fund2, df_names2, f_missing_betas, f_missing_atq, f_missing_segments): 82 | df3 = df[['permno', 'quarter']].drop_duplicates().reset_index(drop=True) 83 | df3['betas_observed'] = 1 84 | x = pd.merge(df3, df_fund2, on=['permno', 'quarter'], how='outer').sort_values( 85 | ['quarter', 'permno']) 86 | y1 = x[x.betas_observed.isnull()] 87 | pd.merge(y1[['permno', 'quarter']].drop_duplicates(), df_names2).sort_values( 88 | ['permno', 'quarter']).to_excel(f_missing_betas) 89 | pd.merge(x[x['atq'].isnull()], df_names2, on=['permno', 'quarter'] 90 | ).sort_values(['permno', 'quarter']).to_excel(f_missing_atq) 91 | pd.merge(x[x['num_bus_seg'].isnull()], df_names2, on=['permno', 'quarter'] 92 | ).sort_values(['permno', 'quarter']).to_excel(f_missing_segments) 93 | -------------------------------------------------------------------------------- /code/wrds_cleaning.py: -------------------------------------------------------------------------------- 1 | # problems 2 | from our_plot_config import checks_dir 3 | import pandas as pd 4 | import numpy as np 5 | f_no_permnos = checks_dir / 's34_nopermno.xlsx' 6 | 7 | idx = ['permno', 'cusip', 'mgrno', 'rdate'] 8 | data = ['fdate', 'shares', 'prc', 'shrout1', 'shrout2', 'sole', 'shared', 'no'] 9 | 10 | # 11 | # Use CRSP names file to construct mapping from CUSIP/NCUSIP --> Permno 12 | # - Mapping should be N-->1 unique 13 | # 14 | def make_cusip_list(df_names): 15 | cusip_list = pd.concat([df_names[['ncusip', 'permno']].drop_duplicates().rename(columns={ 16 | 'ncusip': 'cusip'}), df_names[['cusip', 'permno']].drop_duplicates()]).drop_duplicates() 17 | x = cusip_list.groupby('cusip')['permno'].count() 18 | if x.max() == 1: 19 | print("CUSIP to Permno mapping unique") 20 | else: 21 | print("CUSIP to Permno mapping not unique") 22 | x = cusip_list.groupby('cusip').count() 23 | print(x[x > 1]) 24 | return cusip_list 25 | 26 | #### 27 | # S-34 Cleaning 28 | ### 29 | # This is S34 Data cleaning/merging 30 | # - df is always the S-34 data 31 | # NB: edited this to include Barclays per Mike's note on 2004 -- MRB 32 | def blackrock_fix(df): 33 | # Use Fdates instead of rdates for BlackRock Inc only 34 | df['blackrock_fix'] = False 35 | df.loc[(df.mgrno.isin([7900, 9385])) & ( 36 | df.rdate != df.fdate), 'blackrock_fix'] = True 37 | df.loc[(df.mgrno.isin([7900, 9385])), 'rdate'] = df.loc[( 38 | df.mgrno.isin([7900, 9385])), 'fdate'] 39 | return df 40 | 41 | 42 | def read_s34(fn): 43 | df = pd.read_parquet( 44 | fn, 45 | columns=[ 46 | 'fdate', 47 | 'mgrno', 48 | 'rdate', 49 | 'cusip', 50 | 'shares', 51 | 'sole', 52 | 'shared', 53 | 'no', 54 | 'prc', 55 | 'shrout1', 56 | 'shrout2']) 57 | df['cusip'] = df['cusip'].astype('category') 58 | return blackrock_fix(df) 59 | 60 | # Merge to get only the Permo Quarters for S&P 500 61 | 62 | 63 | def filter_s34(df, df_sp): 64 | return pd.merge(df, df_sp, left_on=['cusip', 'rdate'], right_on=[ 65 | 'cusip', 'qdate'], how='inner').drop(columns=['qdate']) 66 | 67 | 68 | def get_sp_quarters(df_sp500, cusip_list): 69 | x = pd.merge(expand_splist(df_sp500), cusip_list, on=['permno']) 70 | x['cusip'] = x['cusip'].astype('category') 71 | return x.dropna() 72 | 73 | 74 | # Consolidate managers (various BlackRock entities, FMR, etc.) 75 | def consolidate_mgrs(main_df, f_consolidation): 76 | mgr_consolidations = pd.read_csv(f_consolidation) 77 | merged = pd.merge( 78 | main_df, 79 | mgr_consolidations, 80 | left_on=['mgrno'], 81 | right_on=['mgrno_from'], 82 | how='left') 83 | # don't consolidate these 84 | part1 = merged.loc[merged.mgrno_to.isnull(), main_df.columns] 85 | # consolidate these 86 | x = merged[~merged.mgrno_to.isnull()].copy() 87 | x['mgrno'] = x['mgrno_to'] 88 | part2 = x.drop(columns=['mgrno_from', 89 | 'mgrno_to']).groupby(['permno', 90 | 'cusip', 91 | 'mgrno', 92 | 'quarter', 93 | 'fdate']).agg({'shares': sum, 94 | 'prc': max, 95 | 'shrout1': max, 96 | 'shrout2': max, 97 | 'sole': sum, 98 | 'shared': sum, 99 | 'no': sum, 100 | 'share_split': max}).reset_index() 101 | return pd.concat([part1, part2], axis=0) 102 | 103 | # 104 | # Use f_drops (file) to tag observations 105 | 106 | 107 | def add_drops(df, f_drops, df_names2): 108 | keep_cols = list(set(list(df.columns) + 109 | ['no_managers', 'permno_drop', 'sharecode_drop'])) 110 | # count number of managers 111 | df['no_managers'] = df.groupby(['permno', 'quarter'])[ 112 | 'mgrno'].transform('nunique') 113 | # dual class shares 114 | drops = pd.read_csv(f_drops) 115 | drops['start'] = pd.to_datetime(drops['start']) 116 | drops['end'] = pd.to_datetime(drops['end']) 117 | df = pd.merge(df, drops, on=['permno'], how='left') 118 | df.loc[(df['quarter'] >= df['start']) & ( 119 | df['quarter'] <= df['end']), 'permno_drop'] = True 120 | 121 | df = pd.merge(df, df_names2[['permno', 'quarter', 'shrcd']].drop_duplicates(), on=[ 122 | 'permno', 'quarter']) 123 | # ADR's and REITs 124 | df.loc[~df.shrcd.isin([10, 11, 12, 18]), 'sharecode_drop'] = True 125 | df[['permno_drop', 'sharecode_drop']] = df[[ 126 | 'permno_drop', 'sharecode_drop']].fillna(False) 127 | return df[keep_cols] 128 | 129 | # 130 | # Use MSF data to add a stock split dummy to each 13-F filing date 131 | # 132 | 133 | 134 | def add_stock_splits(df, df_msf): 135 | merged = pd.merge( 136 | pd.merge(df[idx + data], 137 | df_msf[['permno', 'qdate', 'cfacshr']], left_on=['permno', 'rdate'], right_on=['permno', 'qdate'], how='left'), 138 | df_msf[['permno', 'qdate', 'cfacshr']], left_on=['permno', 'fdate'], right_on=['permno', 'qdate'], how='left').drop(columns=['qdate_x', 'qdate_y']) 139 | merged.loc[(merged.cfacshr_x != merged.cfacshr_y) & ( 140 | ~merged.cfacshr_x.isnull()) & (~merged.cfacshr_y.isnull()), 'is_split'] = 1 141 | merged['is_split'].fillna(0, inplace=True) 142 | merged['share_split'] = merged.groupby(['permno', 'mgrno', 'rdate'])[ 143 | 'is_split'].transform(max) 144 | return merged 145 | 146 | 147 | def construct_fundamentals(df_fund, df_names2): 148 | return pd.merge(df_names2[['permno', 'quarter']], df_fund.rename(columns={'datadate': 'quarter'}), on=['permno', 'quarter'], how='left')[['permno', 'quarter', 'oibdpq', 'atq', 'niq', 149 | 'saleq', 'cogsq']].drop_duplicates() 150 | 151 | 152 | def construct_bus_segments(df_seg, df_sp500): 153 | df_spe = expand_splist(df_sp500).rename(columns={'qdate': 'quarter'}) 154 | z = pd.merge(df_spe, df_seg.groupby(['permno', 'quarter'])['stype'].max().reset_index( 155 | ), on=['permno', 'quarter'], how='left').sort_values(['permno', 'quarter']) 156 | z['num_bus_seg'] = z.groupby(['permno'])['stype'].ffill().bfill() 157 | return z[['permno', 'quarter', 'num_bus_seg']].copy() 158 | 159 | 160 | def expand_names(df_names, df_sp500): 161 | x = pd.merge(expand_splist(df_sp500), df_names, on=['permno']) 162 | return x[(x['qdate'] >= x['namedt']) & (x['qdate'] <= x['nameenddt'])].drop(columns=[ 163 | 'namedt', 'nameenddt', 'st_date', 'end_date', 'final_date']).rename(columns={'qdate': 'quarter'}) 164 | 165 | 166 | def expand_splist(df_sp): 167 | df_sp['key'] = 0 168 | alldates = pd.DataFrame({'qdate': pd.date_range( 169 | '01-01-1980', pd.to_datetime('today'), freq='Q')}) 170 | alldates['key'] = 0 171 | x = pd.merge(df_sp, alldates, on='key') 172 | return x[(x['qdate'] >= x['start']) & ( 173 | x['qdate'] <= x['ending'])][['permno', 'qdate']] 174 | # 175 | # Return dataset with single fdate associated with each rdate 176 | # Need split data in the S-34 data 177 | # - 24,432,318 Obs have single observation 178 | # - 2,608,149 Obs have multiple filings with same shares (different prices) 179 | # - 84,159 Obs have a known share split: take the first filing (before share split) 180 | # - 44,874 Obs have no known share split: take the last filing (assume these are corrections) 181 | 182 | 183 | def dedup_s34(df): 184 | # keep these fields only 185 | data2 = data + ['share_split'] 186 | 187 | dups = df[idx + data + ['share_split']].duplicated(subset=idx, keep=False) 188 | dups_df = df.loc[dups, idx + data2] 189 | dups_df['min_shares'] = dups_df.groupby(idx)['shares'].transform(min) 190 | dups_df['max_shares'] = dups_df.groupby(idx)['shares'].transform(max) 191 | dups_df['min_price'] = dups_df.groupby(idx)['prc'].transform(min) 192 | dups_df['max_price'] = dups_df.groupby(idx)['prc'].transform(max) 193 | 194 | # These have one observation per rdate 195 | part1 = df.loc[~dups, idx + data2] 196 | # All fdates have the same shares 197 | part2 = dups_df.loc[dups_df.min_shares == dups_df.max_shares, 198 | idx + data2].groupby(idx).last().reset_index() 199 | 200 | # Choosing different Fdates gives different answers -- these are more 201 | # challenging 202 | problems = dups_df[dups_df.min_shares != dups_df.max_shares] 203 | problems = problems.sort_values(idx + ['fdate']) 204 | 205 | # If a split, take the first fdate for each rdate (usually fdate==rdate) 206 | part3 = problems[problems.share_split == 1].groupby(idx).first()[ 207 | data2].reset_index() 208 | 209 | # If a not split, take the last fdate for each rdate (this is riskier) 210 | part4 = problems[problems.share_split == 0].groupby(idx).last()[ 211 | data2].reset_index() 212 | print("Removing duplicate Fdates within each Rdate...") 213 | print("Observations with one fdate per rdate: ", len(part1)) 214 | print("Observations with multiple fdates but same shares: ", len(part2)) 215 | print("Observations with known split (take first): ", len( 216 | part3), len(problems[problems.share_split == 1])) 217 | print("Other observations (update?) (take last): ", len( 218 | part4), len(problems[problems.share_split == 0])) 219 | 220 | return pd.concat([part1, part2, part3, part4]).rename( 221 | columns={'rdate': 'quarter'}) 222 | 223 | # Merge the CRSP MSF data to the S-34 (13F) data 224 | # - Use MSF data for price and shares out when available 225 | # - Otherwise use median self-reported 13-F values 226 | # - Use shrout2 before shrout1 227 | # - Calculate the betas 228 | 229 | 230 | def compute_betas(df, df_msf): 231 | print('Before 99 Missings:\n', 232 | df[(df.quarter < '1999-01-01')][['sole', 233 | 'shared', 234 | 'no', 235 | 'shares']].isnull().mean()) 236 | print('After 99 Missings:\n', 237 | df[(df.quarter > '1999-01-01')][['sole', 238 | 'shared', 239 | 'no', 240 | 'shares']].isnull().mean()) 241 | df.loc[:, ['no', 'sole', 'shared']].fillna(0, inplace=True) 242 | 243 | y = pd.merge(df, df_msf[['permno', 'qdate', 'prc', 'shrout']], left_on=[ 244 | 'permno', 'quarter'], right_on=['permno', 'qdate'], how='left') 245 | y.loc[:, ['shrout1', 'shrout2', 'shrout']] = y[[ 246 | 'shrout1', 'shrout2', 'shrout']].replace(0, np.nan) 247 | 248 | y['med_price'] = y.groupby(['permno', 'quarter'])[ 249 | 'prc_x'].transform(np.median) 250 | y['med_shares'] = y.groupby(['permno', 'quarter'])['shrout2'].transform(np.median).combine_first( 251 | 1e3 * y.groupby(['permno', 'quarter'])['shrout1'].transform(np.median)) 252 | y[['shared', 'no', 'sole']] = y[['shared', 'no', 'sole']].fillna(0) 253 | 254 | y['price'] = y['prc_y'].combine_first(y.med_price) 255 | y['shares_outstanding'] = y['shrout'].combine_first(y.med_shares) 256 | y = alt_betas(y) 257 | return y[['permno', 'mgrno', 'quarter', 'shares', 'shares_outstanding', 258 | 'price', 'beta', 'beta_sole', 'beta_soleshared', 'sole', 'shared', 'no']] 259 | 260 | 261 | def process_scraped(fn_scrape, fn_big4): 262 | df = pd.read_parquet(fn_scrape) 263 | df['quarter'] = pd.to_datetime(df.rdate, format='%Y%m%d') 264 | df = df.rename( 265 | columns={ 266 | 'prc': 'price', 267 | 'none': 'no'}).drop( 268 | columns=['rdate']) 269 | return alt_betas(pd.merge(df, pd.read_csv( 270 | fn_big4), how='left', on=['mgrno'])) 271 | 272 | # Compute the betas : shares / 1000 x Shares Outstanding 273 | # Compute sole+shared and sole as well (only valid post 99) 274 | def alt_betas(y): 275 | y['beta'] = y['shares'] / (1e3 * y['shares_outstanding']) 276 | y['beta_soleshared'] = (y['shares'] - y['no']) / \ 277 | (1e3 * y['shares_outstanding']) 278 | y['beta_sole'] = (y['shares'] - y['no'] - y['shared']) / \ 279 | (1e3 * y['shares_outstanding']) 280 | return y 281 | 282 | # Combine betas 283 | def combine_betas(df, dfs, cut_date='2000-01-01'): 284 | cols = df.columns 285 | return pd.concat([df[df.quarter <= cut_date], dfs.loc[dfs.quarter > 286 | cut_date, cols]], axis=0, ignore_index=True) 287 | -------------------------------------------------------------------------------- /code/wrds_downloads.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from utilities.date_util import lookup_dates, end_quarter 4 | 5 | # Download the CRSP Names file 6 | # - this links the permno to gvkey (COMPUSTAT) and CUSIP 7 | # - Fix the date ending only for last date within the group 8 | 9 | 10 | def get_names(db): 11 | return fix_ending_dates(clean_wrds(db.get_table( 12 | 'crsp', 'stocknames')), 'nameenddt', ['permno']) 13 | 14 | # Get the Compustat-CRSP Link table 15 | # - fix the dates and use todays date as empty end_date 16 | # - filter on permnos 17 | 18 | 19 | def get_crosswalk(db, permno_list): 20 | crosswalk = clean_wrds(db.get_table('crsp', 'Ccmxpf_linktable')) 21 | crosswalk = clean_wrds(crosswalk[~( 22 | crosswalk.linkenddt < '1980-01-01') & crosswalk.lpermno.isin(permno_list)]) 23 | crosswalk['linkenddt'].fillna(pd.Timestamp("today").date(), inplace=True) 24 | return crosswalk 25 | 26 | # DB Queries 27 | 28 | # Get the Compustat Fundamentals 29 | # Match to names file to get permno instead of gvkey 30 | # Make sure they are unique observations by permno,quarter (this is a pain) 31 | 32 | 33 | def get_fundamentals(db, crosswalk): 34 | fields = [ 35 | 'gvkey', 36 | 'datadate', 37 | 'fyearq', 38 | 'fqtr', 39 | 'fyr', 40 | 'datafqtr', 41 | 'indfmt', 42 | 'cusip', 43 | 'oibdpq', 44 | 'atq', 45 | 'niq', 46 | 'saleq', 47 | 'cogsq'] 48 | query = "select " + \ 49 | ', '.join(fields) + " from comp.fundq where fyearq> 1979 and gvkey in %s" % repr( 50 | tuple(crosswalk.gvkey.unique())) 51 | df_fundq = clean_wrds(db.raw_sql(query)).sort_values(['gvkey', 'datafqtr']) 52 | # remove duplicates by taking last datafqtr within each gvkey-quarter 53 | # note: this is rare and only happens when fiscal year changes, taking 54 | # first has no effect 55 | df_fundq2 = df_fundq.groupby(['gvkey', 'datadate']).last().reset_index() 56 | 57 | # merge in the gvkey-permno crosswalk 58 | x = pd.merge(df_fundq2, 59 | crosswalk[['gvkey', 60 | 'lpermno', 61 | 'linkdt', 62 | 'linkenddt']].drop_duplicates(), 63 | on='gvkey').rename(columns={'lpermno': 'permno'}) 64 | y = x[(x.datadate >= x.linkdt) & (x.datadate <= x.linkenddt)].copy() 65 | return clean_wrds(y.sort_values('linkenddt').groupby( 66 | ['permno', 'datadate']).last().reset_index()[fields + ['permno']]) 67 | 68 | # Download the MSF file from CRSP 69 | # - convert to quarterly data by taking last observation 70 | 71 | 72 | def get_msf(db, permno_list, trim=False): 73 | fields = [ 74 | 'cusip', 75 | 'permno', 76 | 'hsiccd', 77 | 'date', 78 | 'prc', 79 | 'altprc', 80 | 'shrout', 81 | 'altprcdt', 82 | 'cfacshr'] 83 | query = "select " + \ 84 | ', '.join( 85 | fields) + " from crsp.msf where date > '1979-12-31' and permno in %s" % repr(tuple(permno_list)) 86 | df_msf = clean_wrds(db.raw_sql(query)) 87 | df_msf2 = convert_to_quarter(df_msf, 'date', ['cusip', 'permno']) 88 | if trim: 89 | # Trim the MSF data for only dates and permnos in the S&P at the time 90 | df_msf3 = pd.merge(df_msf2, df_sp500, on='permno') 91 | return df_msf3[(df_msf3['date'] >= df_msf3['start']) & 92 | (df_msf3['date'] <= df_msf3['ending'])] 93 | else: 94 | return df_msf2 95 | 96 | # Download the short interest file from COMPUSTAT 97 | # - Merge in the crosswalk to get permnos 98 | # - Filter on time after merge to get correct crosswalk info 99 | 100 | 101 | def get_short_interest(db, crosswalk): 102 | short_int = clean_wrds(db.get_table('comp', 'sec_shortint')) 103 | short_int2 = pd.merge(short_int, crosswalk, on=['gvkey'], how='left') 104 | short_int3 = short_int2[(short_int2.datadate <= short_int2.linkenddt) & ( 105 | short_int2.datadate >= short_int2.linkdt)].copy() 106 | return convert_to_quarter(short_int3, 'datadate', ['lpermno'])[ 107 | ['lpermno', 'lpermco', 'qdate', 'gvkey', 'iid', 'shortint', 'shortintadj', 'datadate', 'splitadjdate']] 108 | 109 | # Download the S-34 Dataset 110 | 111 | 112 | def get_s34(db, cusip_list): 113 | fields = [ 114 | 'fdate', 115 | 'mgrname', 116 | 'mgrno', 117 | 'rdate', 118 | 'cusip', 119 | 'shares', 120 | 'sole', 121 | 'shared', 122 | 'no', 123 | 'stkname', 124 | 'ticker', 125 | 'indcode', 126 | 'prc', 127 | 'shrout1', 128 | 'shrout2'] 129 | fields_str = ', '.join(fields) 130 | query = "select " + fields_str + \ 131 | " from tfn.s34 where rdate > '1979-12-31' and cusip in %s" % repr( 132 | tuple(map(str, cusip_list))) 133 | return clean_wrds(db.raw_sql(query)) 134 | 135 | # Download the business segments 136 | # - merge against crosswalk to get Permno's 137 | # - only need count of observations (number of segments) 138 | # - coverage is not great 139 | 140 | 141 | def get_segments(db, crosswalk): 142 | fields = [ 143 | 'gvkey', 144 | 'stype', 145 | 'datadate', 146 | 'naicss1', 147 | 'naicss2', 148 | 'naicss3', 149 | 'sics1', 150 | 'sics2', 151 | 'sics3'] 152 | query = "select " + \ 153 | ', '.join(fields) + \ 154 | " from comp_segments_hist.wrds_segmerged where stype ='BUSSEG'" 155 | df = db.raw_sql(query) 156 | df['datadate'] = pd.to_datetime(df['datadate']) 157 | df = df.groupby(['gvkey', 'datadate']).count()['stype'].reset_index() 158 | df['quarter'] = df['datadate'].apply(end_quarter) 159 | # these should be unique within the quarter 160 | df.groupby(['gvkey', 'quarter'])['stype'].last().reset_index() 161 | x = pd.merge(df, 162 | crosswalk[['gvkey', 163 | 'lpermno', 164 | 'linkdt', 165 | 'linkenddt']], 166 | on='gvkey').rename(columns={'lpermno': 'permno'}) 167 | return clean_wrds(x[(x.datadate >= x.linkdt) & (x.quarter <= x.linkenddt)].copy())[ 168 | ['permno', 'quarter', 'datadate', 'stype']] 169 | 170 | 171 | # Generic cleaning function 172 | # -adjusts dates to pandas format 173 | # -adjusts integers to correct format 174 | def clean_wrds(df): 175 | col_list = df.iloc[0:1].select_dtypes(exclude=[np.datetime64]).columns 176 | int_cols = ['permno', 'hsiccd', 'siccd', 'permco', 'shares', 'mgrno'] 177 | date_cols = [ 178 | 'start', 179 | 'ending', 180 | 'namedt', 181 | 'nameenddt', 182 | 'st_date', 183 | 'end_date', 184 | 'date', 185 | 'altprcdt', 186 | 'fdate', 187 | 'rdate', 188 | 'linkdt', 189 | 'linkenddt', 190 | 'datadate', 191 | 'splitadjdate'] 192 | my_intcols = [x for x in col_list if x in int_cols] 193 | my_datecols = [x for x in col_list if x in date_cols] 194 | 195 | if my_intcols: 196 | df.loc[:, my_intcols] = df.loc[:, my_intcols].astype(int) 197 | if date_cols: 198 | df.loc[:, my_datecols] = df.loc[:, my_datecols].apply( 199 | lookup_dates, axis=0) 200 | return df 201 | 202 | 203 | # Construct end of quarter date and take the last observation with group_id 204 | def convert_to_quarter(df, date_name, group_ids): 205 | df.sort_values(group_ids + [date_name]) 206 | df['qdate'] = df[date_name] - \ 207 | pd.tseries.offsets.DateOffset(days=1) + pd.tseries.offsets.QuarterEnd() 208 | return df.groupby(group_ids + ['qdate']).last().reset_index() 209 | 210 | # Adjusts date_field to correspond to end of quarter 211 | # - Within a group_list round the date_field to the last date within the corresponding quarter 212 | # (do this only for the final date within group) 213 | 214 | 215 | def fix_ending_dates(df, date_field, group_list): 216 | df['final_date'] = df.groupby(group_list)[date_field].transform('last') 217 | df.loc[df[date_field] == df.final_date, date_field] = df.loc[df[date_field] == df.final_date, 218 | date_field] - pd.tseries.offsets.DateOffset(days=1) + pd.tseries.offsets.QuarterEnd() 219 | return df 220 | -------------------------------------------------------------------------------- /data/checks/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisconlon/CommonOwnerReplication/c430d4c174bed9e2d4a01cec1582ce7841781c0f/data/checks/.keep -------------------------------------------------------------------------------- /data/derived/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisconlon/CommonOwnerReplication/c430d4c174bed9e2d4a01cec1582ce7841781c0f/data/derived/.keep -------------------------------------------------------------------------------- /data/public/.gitattributes: -------------------------------------------------------------------------------- 1 | out_scrape.parquet filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /data/public/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisconlon/CommonOwnerReplication/c430d4c174bed9e2d4a01cec1582ce7841781c0f/data/public/.keep -------------------------------------------------------------------------------- /data/public/DLE_markups_fig_v2.csv: -------------------------------------------------------------------------------- 1 | 1955,1.270532 2 | 1956,1.253736 3 | 1957,1.24945 4 | 1958,1.251073 5 | 1959,1.268111 6 | 1960,1.251219 7 | 1961,1.292389 8 | 1962,1.30727 9 | 1963,1.326792 10 | 1964,1.341484 11 | 1965,1.342777 12 | 1966,1.337394 13 | 1967,1.332563 14 | 1968,1.338048 15 | 1969,1.342243 16 | 1970,1.331192 17 | 1971,1.327665 18 | 1972,1.32232 19 | 1973,1.317865 20 | 1974,1.288649 21 | 1975,1.275229 22 | 1976,1.269911 23 | 1977,1.262357 24 | 1978,1.269142 25 | 1979,1.234792 26 | 1980,1.210774 27 | 1981,1.207805 28 | 1982,1.226315 29 | 1983,1.248345 30 | 1984,1.25991 31 | 1985,1.272463 32 | 1986,1.312284 33 | 1987,1.32418 34 | 1988,1.369869 35 | 1989,1.369207 36 | 1990,1.369593 37 | 1991,1.376616 38 | 1992,1.393396 39 | 1993,1.383539 40 | 1994,1.36621 41 | 1995,1.381385 42 | 1996,1.406225 43 | 1997,1.418455 44 | 1998,1.440481 45 | 1999,1.464898 46 | 2000,1.47598 47 | 2001,1.449807 48 | 2002,1.448455 49 | 2003,1.468176 50 | 2004,1.465078 51 | 2005,1.467366 52 | 2006,1.45446 53 | 2007,1.464368 54 | 2008,1.444562 55 | 2009,1.474613 56 | 2010,1.488541 57 | 2011,1.462762 58 | 2012,1.462578 59 | 2013,1.476454 60 | 2014,1.485026 61 | 2015,1.522256 62 | 2016,1.606264 63 | -------------------------------------------------------------------------------- /data/public/airlines.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisconlon/CommonOwnerReplication/c430d4c174bed9e2d4a01cec1582ce7841781c0f/data/public/airlines.parquet -------------------------------------------------------------------------------- /data/public/big4.csv: -------------------------------------------------------------------------------- 1 | InvestorName,mgrno 2 | BlackRock,312069 3 | BlackRock,1003283 4 | BlackRock,1013231 5 | BlackRock,1139734 6 | Fidelity,315066 7 | Fidelity,27800 8 | State Street,93751 9 | State Street,924355 10 | State Street,81540 11 | State Street,81575 12 | State Street,5960 13 | State Street,22721 14 | Vanguard,102909 15 | Vanguard,90457 16 | BlackRock,9385 17 | BlackRock,39539 18 | BlackRock,91430 19 | BlackRock,56790 20 | BlackRock,11386 21 | BlackRock,12588 22 | BlackRock,7900 23 | BlackRock,92040 24 | BlackRock,92050 25 | BlackRock,7905 -------------------------------------------------------------------------------- /data/public/cereal.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisconlon/CommonOwnerReplication/c430d4c174bed9e2d4a01cec1582ce7841781c0f/data/public/cereal.parquet -------------------------------------------------------------------------------- /data/public/manager_consolidations.csv: -------------------------------------------------------------------------------- 1 | mgrno_from,mgrno_to 2 | 9385,9385 3 | 11386,9385 4 | 12588,9385 5 | 39539,9385 6 | 56790,9385 7 | 81575,9385 8 | 91430,9385 -------------------------------------------------------------------------------- /data/public/out_scrape.parquet: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:84bd97430b97326e229f6ae50b58522ab98d402d1fbb59448b6812c8b83305c6 3 | size 323182551 4 | -------------------------------------------------------------------------------- /data/public/permno_drops.csv: -------------------------------------------------------------------------------- 1 | stkname,permno,start,end,reason 2 | A. O. SMITH CORP,65402,3/31/1900,12/31/2100,Dual-Class Shares 3 | ALPHABET INC,90319,3/31/1900,12/31/2100,Dual-Class Shares 4 | AMERICAN FAMILY CORP,57904,3/31/1900,12/31/2100,Dual-Class Shares 5 | BERKSHIRE HATHAWAY INC,83443,3/31/1900,12/31/2100,Dual-Class Shares 6 | BERKSHIRE HATHAWAY INC CL A,17778,3/31/1900,12/31/2100,Dual-Class Shares 7 | BROADCOM CORP,85963,3/31/1900,12/31/2100,Dual-Class Shares 8 | BROWN FORMAN CORP CL A,29938,3/31/1900,12/31/2100,Dual-Class Shares 9 | BROWN FORMAN DISTILLERS CL B,29946,3/31/1900,12/31/2100,Dual-Class Shares 10 | CANADAIGUA WINE INC CL B,64899,3/31/1900,12/31/2100,Dual-Class Shares 11 | CBS CORP,75104,3/31/1900,12/31/2100,Dual-Class Shares 12 | CBS CORP NEW,76226,3/31/1900,12/31/2100,Dual-Class Shares 13 | CBS INC,20730,3/31/1900,12/31/2100,Dual-Class Shares 14 | COCA COLA BOTTLING CO N Y,27510,3/31/1900,12/31/2100,Dual-Class Shares 15 | COCA COLA ENTERPRISES INC NE,70500,3/31/1900,12/31/2100,Dual-Class Shares 16 | COCA-COLA CO,11308,3/31/1900,12/31/2100,Dual-Class Shares 17 | COMCAST CORP,89525,3/31/1900,12/31/2100,Dual-Class Shares 18 | COMCAST CORP CL A,25022,3/31/1900,12/31/2100,Dual-Class Shares 19 | COMCAST CORP CL A SPL,11997,3/31/1900,12/31/2100,Dual-Class Shares 20 | COMCAST CORP NEW,89565,3/31/1900,12/31/2100,Dual-Class Shares 21 | CONSTELLATION BRANDS INC,69796,3/31/1900,12/31/2100,Dual-Class Shares 22 | COORS ADOLPH CO CL B,59248,3/31/1900,12/31/2100,Dual-Class Shares 23 | DELL COMPUTER CORP,11081,3/31/1900,12/31/2100,Dual-Class Shares 24 | DELL TECHNOLOGIES INC,16267,3/31/1900,12/31/2100,Dual-Class Shares 25 | DISCOVERY COMMUNICATNS NEW,90805,3/31/1900,12/31/2100,Dual-Class Shares 26 | E W SCRIPPS CO,84176,3/31/1900,12/31/2100,Dual-Class Shares 27 | ECHOSTAR COMMUN CORP,81696,3/31/1900,12/31/2100,Dual-Class Shares 28 | EXPEDIA INC DEL,90808,3/31/1900,12/31/2100,Dual-Class Shares 29 | FACEBOOK INC,13407,3/31/1900,12/31/2100,Dual-Class Shares 30 | FEDERATED INVESTORS INC,86102,3/31/1900,12/31/2100,Dual-Class Shares 31 | FEDERATED INVS INC,34527,3/31/1900,12/31/2100,Dual-Class Shares 32 | FIRST DATA CORP,77546,3/31/1900,12/31/2100,Dual-Class Shares 33 | FORD MTR CO DEL,25785,3/31/1900,12/31/2100,Dual-Class Shares 34 | GOOGLE INC,14542,3/31/1900,12/31/2100,Dual-Class Shares 35 | HERSHEY CO,1660,3/31/1900,12/31/2100,Dual-Class Shares 36 | J. M. SMUCKER CO,42585,3/31/1900,12/31/2100,Dual-Class Shares 37 | LAUDER ESTEE COS INC,82642,3/31/1900,12/31/2100,Dual-Class Shares 38 | LENNAR CORP,52708,3/31/1900,12/31/2100,Dual-Class Shares 39 | LYONDELLBASELL INDUSTRIES N V,12345,3/31/1900,12/31/2100,Dual-Class Shares 40 | MCCORMICK & CO INC COM,89155,3/31/1900,12/31/2100,Dual-Class Shares 41 | MCCORMICK & CO INC N V,52090,3/31/1900,12/31/2100,Dual-Class Shares 42 | MEREDITH CORP,42796,3/31/1900,12/31/2100,Dual-Class Shares 43 | NACCO INDS INC,28118,3/31/1900,12/31/2100,Dual-Class Shares 44 | NEW NEWSCORP INC,13963,3/31/1900,12/31/2100,Dual-Class Shares 45 | NEW YORK TIMES CO,47466,3/31/1900,12/31/2100,Dual-Class Shares 46 | NEWS CORP LTD,69593,3/31/1900,12/31/2100,Dual-Class Shares 47 | NIKE INC,57665,3/31/1900,12/31/2100,Dual-Class Shares 48 | POLO RALPH LAUREN CORP,85072,3/31/1900,12/31/2100,Dual-Class Shares 49 | REGENERON PHARMACEUTCL,76614,3/31/1900,12/31/2100,Dual-Class Shares 50 | SCRIPPS E W CO,11936,3/31/1900,12/31/2100,Dual-Class Shares 51 | SCRIPPS NETWORKS INTERACT IN,92709,3/31/1900,12/31/2100,Dual-Class Shares 52 | SMITH A O CORP CL A,19852,3/31/1900,12/31/2100,Dual-Class Shares 53 | SMUCKER J M CO CL B,77058,3/31/1900,12/31/2100,Dual-Class Shares 54 | SYNOVUS FINL CORP,20053,3/31/1900,12/31/2100,Dual-Class Shares 55 | THE HERSHEY CO,16600,3/31/1900,12/31/2100,Dual-Class Shares 56 | TRIBUNE CO NEW,65787,3/31/1900,12/31/2100,Dual-Class Shares 57 | TRIBUNE MEDIA CO,15117,3/31/1900,12/31/2100,Dual-Class Shares 58 | TRIPADVISOR INC,13168,3/31/1900,12/31/2100,Dual-Class Shares 59 | TWENTY-FIRST CENTURY FOX INC,90441,3/31/1900,12/31/2100,Dual-Class Shares 60 | TYSON FOODS INC,77730,3/31/1900,12/31/2100,Dual-Class Shares 61 | UNDER ARMOUR INC,90979,3/31/1900,12/31/2100,Dual-Class Shares 62 | UNITED PARCEL SERVICE INC,87447,3/31/1900,12/31/2100,Dual-Class Shares 63 | UNIVERSAL HEALTH SERVICES IN,79637,3/31/1900,12/31/2100,Dual-Class Shares 64 | USA INTERACTIVE,78840,3/31/1900,12/31/2100,Dual-Class Shares 65 | VIACOM INC NEW,91063,3/31/1900,12/31/2100,Dual-Class Shares 66 | VISA INC,92611,3/31/1900,12/31/2100,Dual-Class Shares 67 | WASHINGTON POST CO,53225,3/31/1900,12/31/2100,Dual-Class Shares 68 | WESTINGHOUSE ELEC CORP,15368,3/31/1900,12/31/2100,Dual-Class Shares 69 | CHRYSLER CORP,11260,3/31/1979,12/31/1983,Bailout 70 | GENERAL MOTORS CO,12369,12/31/2009,12/31/2013,Bailout 71 | AMERICAN INTERNATIONAL GROUP IN,66800,3/31/2008,12/31/2012,Bailout 72 | WALMART INC,55976,3/31/1900,12/31/2100,Controlling Interest 73 | TEXAS GAS TRANSMISSION CORP,26788,3/31/1900,12/31/2100,Controlling Interest 74 | DIAMOND OFFSHORE DRILLING INC,82298,3/31/1900,12/31/2100,Controlling Interest 75 | UNILEVER N V,28310,3/31/1900,12/31/2100,ADR 76 | PULTE HOMES INC,54148,12/31/1987,3/31/1988,Unknown Data Issue 77 | VISTEON CORP,88319,3/31/1900,6/30/2000,First year of spinoff problematic 78 | IDEAL BASIC INDUSTRIES,26286,3/31/1900,12/31/2100,Controlling Interest 79 | R J R NABISCO,14218,6/30/1986,6/30/1988,Unknown Data Issue 80 | CONOCO INC,11471,3/31/1900,6/30/1981,Controlling Interest 81 | BROWN COMPANY,29911,3/31/1900,12/31/2100,Controlling Interest 82 | C N A FINANCIAL CORP,47626,3/31/1900,12/31/2100,Controlling Interest 83 | SEARS HOLDING CORP,89757,3/31/1900,12/31/2100,Controlling Interest 84 | PEABODY ENERGY CORP,88991,3/31/1900,12/31/2100,Controlling Interest -------------------------------------------------------------------------------- /data/wrds/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisconlon/CommonOwnerReplication/c430d4c174bed9e2d4a01cec1582ce7841781c0f/data/wrds/.keep -------------------------------------------------------------------------------- /figures/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisconlon/CommonOwnerReplication/c430d4c174bed9e2d4a01cec1582ce7841781c0f/figures/.keep -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.17.5 2 | pandas>=1.0.4 3 | matplotlib>=3.2.1 4 | pyarrow>=1.0.0 5 | brotli>=1.0.7 6 | seaborn>=0.10.1 7 | wrds>=3.0.8 8 | scikit-learn>=0.23.1 9 | pyhdfe>=0.1.0 10 | pyblp>=0.10.0 11 | xlsxwriter==1.2.9 12 | statsmodels>=0.11.1 -------------------------------------------------------------------------------- /run_all.bat: -------------------------------------------------------------------------------- 1 | 2 | set -e 3 | 4 | rem Install Packages 5 | pip install -r requirements.txt 6 | 7 | rem If you are in main directory with run_all.sh 8 | rem you will need to go to code to run everything 9 | cd code 10 | 11 | 12 | #rem Python block 13 | rem data generating block 14 | 15 | python 1_Download_WRDS_Data.py 16 | python 2_Process_WRDS_Data.py 17 | python 3_Calculate_Kappas.py 18 | 19 | rem plot creating block 20 | 21 | python plots1_basic_descriptives.py 22 | 23 | python plots2_kappa_official.py 24 | 25 | python plots3_big_three_four.py 26 | 27 | python plots4_investor_similarity.py 28 | 29 | python plots5_airlines_cereal.py 30 | 31 | python plots6_sole_vs_shared.py 32 | 33 | python plots7_short_interest_coverage.py 34 | 35 | python plots8_individual_firm_coverage.py 36 | 37 | python plots9_blackrock_vanguard.py 38 | 39 | python plots10_kappa_comparison_appendix.py 40 | 41 | python plots11_profit_simulations.py 42 | 43 | rem table creating block 44 | python table3_variance_decomp.py 45 | 46 | python table4_kappa_correlation.py 47 | 48 | -------------------------------------------------------------------------------- /run_all.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | # Install Packages 5 | pip install -r requirements.txt 6 | 7 | ## If you are in main directory with run_all.sh 8 | # you will need to go to code to run everything 9 | cd code 10 | 11 | 12 | ## Python block 13 | # data generating block 14 | 15 | python 1_Download_WRDS_Data.py 16 | python 2_Process_WRDS_Data.py 17 | python 3_Calculate_Kappas.py 18 | 19 | # plot creating block 20 | 21 | python plots1_basic_descriptives.py 22 | 23 | python plots2_kappa_official.py 24 | 25 | python plots3_big_three_four.py 26 | 27 | python plots4_investor_similarity.py 28 | 29 | python plots5_airlines_cereal.py 30 | 31 | python plots6_sole_vs_shared.py 32 | 33 | python plots7_short_interest_coverage.py 34 | 35 | python plots8_individual_firm_coverage.py 36 | 37 | python plots9_blackrock_vanguard.py 38 | 39 | python plots10_kappa_comparison_appendix.py 40 | 41 | python plots11_profit_simulations.py 42 | 43 | # table creating block 44 | python table3_variance_decomp.py 45 | 46 | python table4_kappa_correlation.py 47 | 48 | -------------------------------------------------------------------------------- /tables/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisconlon/CommonOwnerReplication/c430d4c174bed9e2d4a01cec1582ce7841781c0f/tables/.keep -------------------------------------------------------------------------------- /wrds_constituents.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisconlon/CommonOwnerReplication/c430d4c174bed9e2d4a01cec1582ce7841781c0f/wrds_constituents.pdf --------------------------------------------------------------------------------