├── .gitattributes
├── .gitignore
├── LICENSE
├── README.html
├── README.md
├── README.pdf
├── code
    ├── 1_Download_WRDS_Data.py
    ├── 2_Process_WRDS_Data.py
    ├── 3_Calculate_Kappas.py
    ├── firminfo.py
    ├── investors.py
    ├── kappas.py
    ├── our_plot_config.py
    ├── plots10_kappa_comparison_appendix.py
    ├── plots11_profit_simulations.py
    ├── plots1_basic_descriptives.py
    ├── plots2_kappa_official.py
    ├── plots3_big_three_four.py
    ├── plots4_investor_similarity.py
    ├── plots5_airlines_cereal.py
    ├── plots6_sole_vs_shared.py
    ├── plots7_short_interest_coverage.py
    ├── plots8_individual_firm_coverage.py
    ├── plots9_blackrock_vanguard.py
    ├── table3_variance_decomp.py
    ├── table4_kappa_correlation.py
    ├── utilities
    │   ├── date_util.py
    │   ├── matlab_util.py
    │   └── quantiles.py
    ├── wrds_checks.py
    ├── wrds_cleaning.py
    └── wrds_downloads.py
├── data
    ├── checks
    │   └── .keep
    ├── derived
    │   └── .keep
    ├── public
    │   ├── .gitattributes
    │   ├── .keep
    │   ├── DLE_markups_fig_v2.csv
    │   ├── airlines.parquet
    │   ├── big4.csv
    │   ├── cereal.parquet
    │   ├── manager_consolidations.csv
    │   ├── out_scrape.parquet
    │   └── permno_drops.csv
    └── wrds
    │   └── .keep
├── figures
    └── .keep
├── requirements.txt
├── run_all.bat
├── run_all.sh
├── tables
    └── .keep
└── wrds_constituents.pdf


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # celery beat schedule file
 95 | celerybeat-schedule
 96 | 
 97 | # SageMath parsed files
 98 | *.sage.py
 99 | 
100 | # Environments
101 | .env
102 | .venv
103 | env/
104 | venv/
105 | ENV/
106 | env.bak/
107 | venv.bak/
108 | 
109 | # Spyder project settings
110 | .spyderproject
111 | .spyproject
112 | 
113 | # Rope project settings
114 | .ropeproject
115 | 
116 | # mkdocs documentation
117 | /site
118 | 
119 | # mypy
120 | .mypy_cache/
121 | .dmypy.json
122 | dmypy.json
123 | 
124 | # Pyre type checker
125 | .pyre/
126 | 
127 | # Project Data files
128 | *.parquet
129 | !airlines.parquet
130 | !cereal.parquet
131 | 
132 | *.xlsx
133 | *.pickle
134 | markup-simulations.csv
135 | 
136 | # Tex Files
137 | *.tex
138 | *.aux
139 | *.log
140 | 
141 | # Figures
142 | *.pdf
143 | !README.pdf
144 | !wrds_constituents.pdf
145 | 
146 | # Mac Garbage
147 | .DS_Store
148 | code.zip
149 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 chrisconlon
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.html:
--------------------------------------------------------------------------------
   1 | <!DOCTYPE html><html><head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1"><style>body {
   2 |   max-width: 980px;
   3 |   border: 1px solid #ddd;
   4 |   outline: 1300px solid #fff;
   5 |   margin: 16px auto;
   6 | }
   7 | 
   8 | body .markdown-body
   9 | {
  10 |   padding: 45px;
  11 | }
  12 | 
  13 | @font-face {
  14 |   font-family: fontawesome-mini;
  15 |   src: url(data:font/woff;charset=utf-8;base64,d09GRgABAAAAABE0AA8AAAAAHWwAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAABHU1VCAAABWAAAADsAAABUIIslek9TLzIAAAGUAAAAQwAAAFY3d1HZY21hcAAAAdgAAACqAAACOvWLi0FjdnQgAAAChAAAABMAAAAgBtX/BGZwZ20AAAKYAAAFkAAAC3CKkZBZZ2FzcAAACCgAAAAIAAAACAAAABBnbHlmAAAIMAAABdQAAAjkYT9TNWhlYWQAAA4EAAAAMwAAADYQ6WvNaGhlYQAADjgAAAAfAAAAJAc6A1pobXR4AAAOWAAAACAAAAA0Kmz/7mxvY2EAAA54AAAAHAAAABwQPBJubWF4cAAADpQAAAAgAAAAIAEHC/NuYW1lAAAOtAAAAYQAAALxhQT4h3Bvc3QAABA4AAAAfgAAAMS3SYh9cHJlcAAAELgAAAB6AAAAhuVBK7x4nGNgZGBg4GIwYLBjYHJx8wlh4MtJLMljkGJgYYAAkDwymzEnMz2RgQPGA8qxgGkOIGaDiAIAJjsFSAB4nGNgZHZmnMDAysDAVMW0h4GBoQdCMz5gMGRkAooysDIzYAUBaa4pDA4Pwz+yMwf9z2KIYg5imAYUZgTJAQDcoQvQAHic7ZHNDYJAFIRnBXf94cDRIiyCKkCpwFCPJ092RcKNDoYKcN4+EmMPvpdvk539zQyAPYBCXEUJhBcCrJ5SQ9YLnLJe4qF5rdb+uWPDngNHTkta101pNyWa8lMhn6xx2dqUnW4q9YOIhAOOeueMSgsR/6ry+P7O5s6xVNg4chBsHUuFnWNJ8uZYwrw7chrsHXkODo7cB0dHOYCTY8kv0VE2WJKD6gOlWjsxAAB4nGNgQAMSEMgc9D8LhAESbAPdAHicrVZpd9NGFB15SZyELCULLWphxMRpsEYmbMGACUGyYyBdnK2VoIsUO+m+8Ynf4F/zZNpz6Dd+Wu8bLySQtOdwmpOjd+fN1czbZRJaktgL65GUmy/F1NYmjew8CemGTctRfCg7eyFlisnfBVEQrZbatx2HREQiULWusEQQ+x5ZmmR86FFGy7akV03KLT3pLlvjQb1V334aOsqxO6GkZjN0aD2yJVUYVaJIpj1S0qZlqPorSSu8v8LMV81QwohOImm8GcbQSN4bZ7TKaDW24yiKbLLcKFIkmuFBFHmU1RLn5IoJDMoHzZDyyqcR5cP8iKzYo5xWsEu20/y+L3mndzk/sV9vUbbkQB/Ijuzg7HQlX4RbW2HctJPtKFQRdtd3QmzZ7FT/Zo/ymkYDtysyvdCMYKl8hRArP6HM/iFZLZxP+ZJHo1qykRNB62VO7Es+gdbjiClxzRhZ0N3RCRHU/ZIzDPaYPh788d4plgsTAngcy3pHJZwIEylhczRJ2jByYCVliyqp9a6YOOV1WsRbwn7t2tGXzmjjUHdiPFsPHVs5UcnxaFKnmUyd2knNoykNopR0JnjMrwMoP6JJXm1jNYmVR9M4ZsaERCICLdxLU0EsO7GkKQTNoxm9uRumuXYtWqTJA/Xco/f05la4udNT2g70s0Z/VqdiOtgL0+lp5C/xadrlIkXp+ukZfkziQdYCMpEtNsOUgwdv/Q7Sy9eWHIXXBtju7fMrqH3WRPCkAfsb0B5P1SkJTIWYVYhWQGKta1mWydWsFqnI1HdDmla+rNMEinIcF8e+jHH9XzMzlpgSvt+J07MjLj1z7UsI0xx8m3U9mtepxXIBcWZ5TqdZlu/rNMfyA53mWZ7X6QhLW6ejLD/UaYHlRzodY3lBC5p038GQizDkAg6QMISlA0NYXoIhLBUMYbkIQ1gWYQjLJRjC8mMYwnIZhrC8rGXV1FNJ49qZWAZsQmBijh65zEXlaiq5VEK7aFRqQ54SbpVUFM+qf2WgXjzyhjmwFkiXyJpfMc6Vj0bl+NYVLW8aO1fAsepvH472OfFS1ouFPwX/1dZUJb1izcOTq/Abhp5sJ6o2qXh0TZfPVT26/l9UVFgL9BtIhVgoyrJscGcihI86nYZqoJVDzGzMPLTrdcuan8P9NzFCFlD9+DcUGgvcg05ZSVnt4KzV19uy3DuDcjgTLEkxN/P6VvgiI7PSfpFZyp6PfB5wBYxKZdhqA60VvNknMQ+Z3iTPBHFbUTZI2tjOBIkNHPOAefOdBCZh6qoN5E7hhg34BWFuwXknXKJ6oyyH7kXs8yik/Fun4kT2qGiMwLPZG2Gv70LKb3EMJDT5pX4MVBWhqRg1FdA0Um6oBl/G2bptQsYO9CMqdsOyrOLDxxb3lZJtGYR8pIjVo6Of1l6iTqrcfmYUl++dvgXBIDUxf3vfdHGQyrtayTJHbQNTtxqVU9eaQ+NVh+rmUfW94+wTOWuabronHnpf06rbwcVcLLD2bQ7SUiYX1PVhhQ2iy8WlUOplNEnvuAcYFhjQ71CKjf+r+th8nitVhdFxJN9O1LfR52AM/A/Yf0f1A9D3Y+hyDS7P95oTn2704WyZrqIX66foNzBrrblZugbc0HQD4iFHrY64yg18pwZxeqS5HOkh4GPdFeIBwCaAxeAT3bWM5lMAo/mMOT7A58xh0GQOgy3mMNhmzhrADnMY7DKHwR5zGHzBnHWAL5nDIGQOg4g5DJ4wJwB4yhwGXzGHwdfMYfANc+4DfMscBjFzGCTMYbCv6dYwzC1e0F2gtkFVoANTT1jcw+JQU2XI/o4Xhv29Qcz+wSCm/qjp9pD6Ey8M9WeDmPqLQUz9VdOdIfU3Xhjq7wYx9Q+DmPpMvxjLZQa/jHyXCgeUXWw+5++J9w/bxUC5AAEAAf//AA94nIVVX2hbZRQ/5/t7893s5ja9f7ouzdZ0TTqz3bRJmogbWya6bG6Cq0VbSV2ddIJjFtfIQHEig80Hda8yUN/0YQz8AyriiyD+xQd92R4HCnaCb3samnpumrpsCsLlfPf7zvedc37nL3CAtc/5W/wQZGA3tOBSY/g+TMjHmwzEoM1Q8+ZjRZY4oJhmBw5/YB6Za0yC5AkhlwA1A1yCBIBOwCII0Cj0U8BAMdUCzq05sKwkP7SlUY6fcJk4Fb/RyE79/6P5hjM/F4aZiXBoeMgzcqQ4Xi1hPqfDLG5FT+lchCVU3lYMyvuwhl1mqndQL0RsuloLywHtthLXI06OblTrhfWVnpSJ5+mwu/JdbtuN3IAnkW0LLMcRwaC7ktrlzridM6kVdyf9uO1UNBByI7JhwtG2sEwab07ORBeilWhqavJCqV0qzZTOl/7ZXQ5TbTcdcFelyGhhRDAQpdqp1FEX3w3cFTc1k9pJQkmm4ySCbSikxRP2QOfN+0tHS5MrpQuTU1Mk5nw0E5Xa0WvrOwDyGax9yB9ma6DAg82wHc43SAGTI4GjBWebOePAERFE8/AHaQpZASSTy8A4WwZiLQMQ82mFKATO0ILicRAoDm9p5P99E5b/fXG+kQYY3TYUuqmERWYoT0u/GNYL2q/4WB3LaVS+VynXsVYIcWw6DkCh3nX1D+VzlYN4LClF5yexSQos8exqZ3KVP+wtrC54u4Nznq6cq+xpMpUUnZ8FUYzE86ud0g28NOIv3Gj5/rmA3ABs7S/ywzFuQ4qyd6QxfNtiQIaEgp3w/entQg4Vcbqa16M5FfpeUB8t1+qeg7mI7cUyOe79wOk86gSxkVec4KPTX69++5x68Yubn5/F+w52z7u08sJX7fZXv8ekT/d2mILJxq6sn+SC6qEJknzLJCxyZEKwWVqYmAPBxBE/9DLeZiWHu7lcr/VytrCRuHojncNuTt9h46tmacmYisnSamdN2bZptcsmSysdVsy1PrOvOzF3xN64Rb937t/og9KHxYdcjIUqFAmIAHGHNzlns+RTPgeUYAQm9DwpNxfxbhhBHPaw3/gfTcXO2L+eJVIx5nsyGkvm9X4/f+bGkH45G0PaSjcMXTjcZyTvi3UdHoCDjQd3IDUVsgwYmUoJK/gp4JJxeRI0MKHZIkgynyIBqBTOUs6rOVCojvjZ4mCQz49ZMlMcp8QoYk6NoBfsxnJtsBohpa8iGJS+ZH7gU7NxME6cmF+t7cO9vB8d3jTWSct0ycW9ranXmolNDwmVkNnxe+8JtoztwS5rKJ0xWS95tQ/1zMYzg69MzUZnNtl1ofNbsml/OJm6f9wjRjpnu2o4MzHzn77IQkRd+1DjwMQ2pqSjGMMhyjrgTbBAKksuUm0iU7hI0aN2wOKOq7WYBSH0HGihj/jkiPxAfmwsEbfYrjMG+j3ij932Db/LV7I/xruNrhnroxjR9HRMb2nTvO0ZXOoHPk8H2ZhDPx93qcE/53sH5np/dkIP7zzhTVKdR/BAY/9ElkkR+A6lJGsqpJ4oQcTxpvBT3Kn58VkaJjgHyPEIws57xkaHh9KuVpDEpJZeMbZ5w/zBHi5NMQ4r5VphsFqID7TyB9eR4pX216c3AHxpdAwoqU9qg0ZJ6yVLKmMSz1iG2z27ifx18NkY0LPx1W/wCc2l5LrznrIsiKsqbmB78A9wIGx4tI8rjihVHJyY9pgMirenVq0yWg7Iw7eogG7ZgYM3qR9959A/fZkg6MnD/exlkmc+jWV4SB15XUR+eqC6l6ZmgPtN9z5JMfik05OV8ljylunJ4J+wA/FUaQSSKotsYsCWqaPBidBLcxkWx7XKFRIb45TGaEhjlF9uUVPqXOtcIwsXbBvfoZXIyRYFdkfnqjExH98xpnPczqzjX/uNdO1Y17Wpi5+6Ts8BXtjVFasp9KZ1mOiNbH65c5w6HgmyF2jFCZywM8mWjRc7T5Pmt0lRy7Y71+jYbpGyvwG4sH0XeJxjYGRgYADiwBB/53h+m68M3MwvgCIM1z5N/g6j///9v5H5BbMnkMvBwAQSBQCIcA9gAHicY2BkYGAO+p8FJF/8//v/F/MLBqAICuAFALYQB5kAeJxjfsHAwLwAiCNB+P9fbJjJmoGBMRUo/wKCAfO2EnQAAAAAANoBXgGcAgICVALaA1IDvAPkBAYEPARyAAEAAAANAF0ABAAAAAAAAgAUACQAcwAAAG4LcAAAAAB4nHWRzWrCQBSFT+pPqUIXLXTTzayKUohGKIibCoLuhbrrYtTRxCYZmYyKyz5Fd32HvlDfoO/QkziIFJtw9bvnnpl7ZwLgBt/wcHieGAf2UGd24Atcou+4RH3kuEweO66QXx1XyaHjGh6ROa7jFp/cwStfMVvhy7GHO+/e8QWuvcBxifqz4zL5xXGF/Oa4Sn53XMPE+3Bcx4P3M9DrvYmWoRWNQVN02kFXTPdCU4pSGQu5saE2meiLhU6timPtz3SSs9ypTCdqrJabWJoT5QQnymSRTkXgt0/UkUqVkVbN807ZdtmxdiEWRidi6HqItdErNbN+aO2612qd9sYAGmvsYRBhyUu0EGhQbfK/gzYCdElTOgSdB1eEFBIxFYkNV4RFJWPeZyyYpVQVHTHZx4y/yVGX2LGWFZri51TccUOn5B7nPefVCSPvGhVVwUl9znveO2KkhV8Wk82PZ8qwZf8OVcu1+fSmWCMw/HMOwXvKaysqM+p+cVuWag8tvv+c+xdd+4+teJxtjUEOwiAURJla24KliQfhUA2g/Sl+CKXx+loNrpzVezOLEY34Ron/0WhwQoszOvQYIKFwwQiNSbSBeO2SZ0tBP4j3zVjKNng32ZmtD1VVXCuOiw/pJ8S3WOU6l+K5UOTaDC4+2TjKMtN9KQf1ezLx/Sg/00FCvABHhjDjAAB4nGPw3sFwIihiIyNjX+QGxp0cDBwMyQUbGVidNjEwMmiBGJu5mBg5ICw+BjCLzWkX0wGgNCeQze60i8EBwmZmcNmowtgRGLHBoSNiI3OKy0Y1EG8XRwMDI4tDR3JIBEhJJBBs5mFi5NHawfi/dQNL70YmBhcADHYj9AAA) format('woff');
  16 | }
  17 | 
  18 | .markdown-body {
  19 |   font-family: sans-serif;
  20 |   -ms-text-size-adjust: 100%;
  21 |   -webkit-text-size-adjust: 100%;
  22 |   color: #333333;
  23 |   overflow: hidden;
  24 |   font-family: "Helvetica Neue", Helvetica, "Segoe UI", Arial, freesans, sans-serif;
  25 |   font-size: 16px;
  26 |   line-height: 1.6;
  27 |   word-wrap: break-word;
  28 | }
  29 | 
  30 | .markdown-body a {
  31 |   background: transparent;
  32 | }
  33 | 
  34 | .markdown-body a:active,
  35 | .markdown-body a:hover {
  36 |   outline: 0;
  37 | }
  38 | 
  39 | .markdown-body b,
  40 | .markdown-body strong {
  41 |   font-weight: bold;
  42 | }
  43 | 
  44 | .markdown-body mark {
  45 |   background: #ff0;
  46 |   color: #000;
  47 |   font-style: italic;
  48 |   font-weight: bold;
  49 | }
  50 | 
  51 | .markdown-body sub,
  52 | .markdown-body sup {
  53 |   font-size: 75%;
  54 |   line-height: 0;
  55 |   position: relative;
  56 |   vertical-align: baseline;
  57 | }
  58 | .markdown-body sup {
  59 |   top: -0.5em;
  60 | }
  61 | .markdown-body sub {
  62 |   bottom: -0.25em;
  63 | }
  64 | 
  65 | .markdown-body h1 {
  66 |   font-size: 2em;
  67 |   margin: 0.67em 0;
  68 | }
  69 | 
  70 | .markdown-body img {
  71 |   border: 0;
  72 | }
  73 | 
  74 | .markdown-body hr {
  75 |   -moz-box-sizing: content-box;
  76 |   box-sizing: content-box;
  77 |   height: 0;
  78 | }
  79 | 
  80 | .markdown-body pre {
  81 |   overflow: auto;
  82 | }
  83 | 
  84 | .markdown-body code,
  85 | .markdown-body kbd,
  86 | .markdown-body pre,
  87 | .markdown-body samp {
  88 |   font-family: monospace, monospace;
  89 |   font-size: 1em;
  90 | }
  91 | 
  92 | .markdown-body input {
  93 |   color: inherit;
  94 |   font: inherit;
  95 |   margin: 0;
  96 | }
  97 | 
  98 | .markdown-body html input[disabled] {
  99 |   cursor: default;
 100 | }
 101 | 
 102 | .markdown-body input {
 103 |   line-height: normal;
 104 | }
 105 | 
 106 | .markdown-body input[type="checkbox"] {
 107 |   box-sizing: border-box;
 108 |   padding: 0;
 109 | }
 110 | 
 111 | .markdown-body table {
 112 |   border-collapse: collapse;
 113 |   border-spacing: 0;
 114 | }
 115 | 
 116 | .markdown-body td,
 117 | .markdown-body th {
 118 |   padding: 0;
 119 | }
 120 | 
 121 | .markdown-body .codehilitetable {
 122 |   border: 0;
 123 |   border-spacing: 0;
 124 | }
 125 | 
 126 | .markdown-body .codehilitetable tr {
 127 |   border: 0;
 128 | }
 129 | 
 130 | .markdown-body .codehilitetable pre,
 131 | .markdown-body .codehilitetable div.codehilite {
 132 |   margin: 0;
 133 | }
 134 | 
 135 | .markdown-body .linenos,
 136 | .markdown-body .code,
 137 | .markdown-body .codehilitetable td {
 138 |   border: 0;
 139 |   padding: 0;
 140 | }
 141 | 
 142 | .markdown-body td:not(.linenos) .linenodiv {
 143 |   padding: 0 !important;
 144 | }
 145 | 
 146 | .markdown-body .code {
 147 |   width: 100%;
 148 | }
 149 | 
 150 | .markdown-body .linenos div pre,
 151 | .markdown-body .linenodiv pre,
 152 | .markdown-body .linenodiv {
 153 |   border: 0;
 154 |   -webkit-border-radius: 0;
 155 |   -moz-border-radius: 0;
 156 |   border-radius: 0;
 157 |   -webkit-border-top-left-radius: 3px;
 158 |   -webkit-border-bottom-left-radius: 3px;
 159 |   -moz-border-radius-topleft: 3px;
 160 |   -moz-border-radius-bottomleft: 3px;
 161 |   border-top-left-radius: 3px;
 162 |   border-bottom-left-radius: 3px;
 163 | }
 164 | 
 165 | .markdown-body .code div pre,
 166 | .markdown-body .code div {
 167 |   border: 0;
 168 |   -webkit-border-radius: 0;
 169 |   -moz-border-radius: 0;
 170 |   border-radius: 0;
 171 |   -webkit-border-top-right-radius: 3px;
 172 |   -webkit-border-bottom-right-radius: 3px;
 173 |   -moz-border-radius-topright: 3px;
 174 |   -moz-border-radius-bottomright: 3px;
 175 |   border-top-right-radius: 3px;
 176 |   border-bottom-right-radius: 3px;
 177 | }
 178 | 
 179 | .markdown-body * {
 180 |   -moz-box-sizing: border-box;
 181 |   box-sizing: border-box;
 182 | }
 183 | 
 184 | .markdown-body input {
 185 |   font: 13px Helvetica, arial, freesans, clean, sans-serif, "Segoe UI Emoji", "Segoe UI Symbol";
 186 |   line-height: 1.4;
 187 | }
 188 | 
 189 | .markdown-body a {
 190 |   color: #4183c4;
 191 |   text-decoration: none;
 192 | }
 193 | 
 194 | .markdown-body a:hover,
 195 | .markdown-body a:focus,
 196 | .markdown-body a:active {
 197 |   text-decoration: underline;
 198 | }
 199 | 
 200 | .markdown-body hr {
 201 |   height: 0;
 202 |   margin: 15px 0;
 203 |   overflow: hidden;
 204 |   background: transparent;
 205 |   border: 0;
 206 |   border-bottom: 1px solid #ddd;
 207 | }
 208 | 
 209 | .markdown-body hr:before,
 210 | .markdown-body hr:after {
 211 |   display: table;
 212 |   content: " ";
 213 | }
 214 | 
 215 | .markdown-body hr:after {
 216 |   clear: both;
 217 | }
 218 | 
 219 | .markdown-body h1,
 220 | .markdown-body h2,
 221 | .markdown-body h3,
 222 | .markdown-body h4,
 223 | .markdown-body h5,
 224 | .markdown-body h6 {
 225 |   margin-top: 15px;
 226 |   margin-bottom: 15px;
 227 |   line-height: 1.1;
 228 | }
 229 | 
 230 | .markdown-body h1 {
 231 |   font-size: 30px;
 232 | }
 233 | 
 234 | .markdown-body h2 {
 235 |   font-size: 21px;
 236 | }
 237 | 
 238 | .markdown-body h3 {
 239 |   font-size: 16px;
 240 | }
 241 | 
 242 | .markdown-body h4 {
 243 |   font-size: 14px;
 244 | }
 245 | 
 246 | .markdown-body h5 {
 247 |   font-size: 12px;
 248 | }
 249 | 
 250 | .markdown-body h6 {
 251 |   font-size: 11px;
 252 | }
 253 | 
 254 | .markdown-body blockquote {
 255 |   margin: 0;
 256 | }
 257 | 
 258 | .markdown-body ul,
 259 | .markdown-body ol {
 260 |   padding: 0;
 261 |   margin-top: 0;
 262 |   margin-bottom: 0;
 263 | }
 264 | 
 265 | .markdown-body ol ol,
 266 | .markdown-body ul ol {
 267 |   list-style-type: lower-roman;
 268 | }
 269 | 
 270 | .markdown-body ul ul ol,
 271 | .markdown-body ul ol ol,
 272 | .markdown-body ol ul ol,
 273 | .markdown-body ol ol ol {
 274 |   list-style-type: lower-alpha;
 275 | }
 276 | 
 277 | .markdown-body dd {
 278 |   margin-left: 0;
 279 | }
 280 | 
 281 | .markdown-body code,
 282 | .markdown-body pre,
 283 | .markdown-body samp {
 284 |   font-family: Consolas, "Liberation Mono", Menlo, Courier, monospace;
 285 |   font-size: 12px;
 286 | }
 287 | 
 288 | .markdown-body pre {
 289 |   margin-top: 0;
 290 |   margin-bottom: 0;
 291 | }
 292 | 
 293 | .markdown-body kbd {
 294 |   background-color: #e7e7e7;
 295 |   background-image: -moz-linear-gradient(#fefefe, #e7e7e7);
 296 |   background-image: -webkit-linear-gradient(#fefefe, #e7e7e7);
 297 |   background-image: linear-gradient(#fefefe, #e7e7e7);
 298 |   background-repeat: repeat-x;
 299 |   border-radius: 2px;
 300 |   border: 1px solid #cfcfcf;
 301 |   color: #000;
 302 |   padding: 3px 5px;
 303 |   line-height: 10px;
 304 |   font: 11px Consolas, "Liberation Mono", Menlo, Courier, monospace;
 305 |   display: inline-block;
 306 | }
 307 | 
 308 | .markdown-body>*:first-child {
 309 |   margin-top: 0 !important;
 310 | }
 311 | 
 312 | .markdown-body>*:last-child {
 313 |   margin-bottom: 0 !important;
 314 | }
 315 | 
 316 | .markdown-body .headerlink {
 317 |   font: normal 400 16px fontawesome-mini;
 318 |   vertical-align: middle;
 319 |   margin-left: -16px;
 320 |   float: left;
 321 |   display: inline-block;
 322 |   text-decoration: none;
 323 |   opacity: 0;
 324 |   color: #333;
 325 | }
 326 | 
 327 | .markdown-body .headerlink:focus {
 328 |   outline: none;
 329 | }
 330 | 
 331 | .markdown-body h1 .headerlink {
 332 |   margin-top: 0.8rem;
 333 | }
 334 | 
 335 | .markdown-body h2 .headerlink,
 336 | .markdown-body h3 .headerlink {
 337 |   margin-top: 0.6rem;
 338 | }
 339 | 
 340 | .markdown-body h4 .headerlink {
 341 |   margin-top: 0.2rem;
 342 | }
 343 | 
 344 | .markdown-body h5 .headerlink,
 345 | .markdown-body h6 .headerlink {
 346 |   margin-top: 0;
 347 | }
 348 | 
 349 | .markdown-body .headerlink:hover,
 350 | .markdown-body h1:hover .headerlink,
 351 | .markdown-body h2:hover .headerlink,
 352 | .markdown-body h3:hover .headerlink,
 353 | .markdown-body h4:hover .headerlink,
 354 | .markdown-body h5:hover .headerlink,
 355 | .markdown-body h6:hover .headerlink {
 356 |   opacity: 1;
 357 |   text-decoration: none;
 358 | }
 359 | 
 360 | .markdown-body h1 {
 361 |   padding-bottom: 0.3em;
 362 |   font-size: 2.25em;
 363 |   line-height: 1.2;
 364 |   border-bottom: 1px solid #eee;
 365 | }
 366 | 
 367 | .markdown-body h2 {
 368 |   padding-bottom: 0.3em;
 369 |   font-size: 1.75em;
 370 |   line-height: 1.225;
 371 |   border-bottom: 1px solid #eee;
 372 | }
 373 | 
 374 | .markdown-body h3 {
 375 |   font-size: 1.5em;
 376 |   line-height: 1.43;
 377 | }
 378 | 
 379 | .markdown-body h4 {
 380 |   font-size: 1.25em;
 381 | }
 382 | 
 383 | .markdown-body h5 {
 384 |   font-size: 1em;
 385 | }
 386 | 
 387 | .markdown-body h6 {
 388 |   font-size: 1em;
 389 |   color: #777;
 390 | }
 391 | 
 392 | .markdown-body p,
 393 | .markdown-body blockquote,
 394 | .markdown-body ul,
 395 | .markdown-body ol,
 396 | .markdown-body dl,
 397 | .markdown-body table,
 398 | .markdown-body pre,
 399 | .markdown-body .admonition {
 400 |   margin-top: 0;
 401 |   margin-bottom: 16px;
 402 | }
 403 | 
 404 | .markdown-body hr {
 405 |   height: 4px;
 406 |   padding: 0;
 407 |   margin: 16px 0;
 408 |   background-color: #e7e7e7;
 409 |   border: 0 none;
 410 | }
 411 | 
 412 | .markdown-body ul,
 413 | .markdown-body ol {
 414 |   padding-left: 2em;
 415 | }
 416 | 
 417 | .markdown-body ul ul,
 418 | .markdown-body ul ol,
 419 | .markdown-body ol ol,
 420 | .markdown-body ol ul {
 421 |   margin-top: 0;
 422 |   margin-bottom: 0;
 423 | }
 424 | 
 425 | .markdown-body li>p {
 426 |   margin-top: 16px;
 427 | }
 428 | 
 429 | .markdown-body dl {
 430 |   padding: 0;
 431 | }
 432 | 
 433 | .markdown-body dl dt {
 434 |   padding: 0;
 435 |   margin-top: 16px;
 436 |   font-size: 1em;
 437 |   font-style: italic;
 438 |   font-weight: bold;
 439 | }
 440 | 
 441 | .markdown-body dl dd {
 442 |   padding: 0 16px;
 443 |   margin-bottom: 16px;
 444 | }
 445 | 
 446 | .markdown-body blockquote {
 447 |   padding: 0 15px;
 448 |   color: #777;
 449 |   border-left: 4px solid #ddd;
 450 | }
 451 | 
 452 | .markdown-body blockquote>:first-child {
 453 |   margin-top: 0;
 454 | }
 455 | 
 456 | .markdown-body blockquote>:last-child {
 457 |   margin-bottom: 0;
 458 | }
 459 | 
 460 | .markdown-body table {
 461 |   display: block;
 462 |   width: 100%;
 463 |   overflow: auto;
 464 |   word-break: normal;
 465 |   word-break: keep-all;
 466 | }
 467 | 
 468 | .markdown-body table th {
 469 |   font-weight: bold;
 470 | }
 471 | 
 472 | .markdown-body table th,
 473 | .markdown-body table td {
 474 |   padding: 6px 13px;
 475 |   border: 1px solid #ddd;
 476 | }
 477 | 
 478 | .markdown-body table tr {
 479 |   background-color: #fff;
 480 |   border-top: 1px solid #ccc;
 481 | }
 482 | 
 483 | .markdown-body table tr:nth-child(2n) {
 484 |   background-color: #f8f8f8;
 485 | }
 486 | 
 487 | .markdown-body img {
 488 |   max-width: 100%;
 489 |   -moz-box-sizing: border-box;
 490 |   box-sizing: border-box;
 491 | }
 492 | 
 493 | .markdown-body code,
 494 | .markdown-body samp {
 495 |   padding: 0;
 496 |   padding-top: 0.2em;
 497 |   padding-bottom: 0.2em;
 498 |   margin: 0;
 499 |   font-size: 85%;
 500 |   background-color: rgba(0,0,0,0.04);
 501 |   border-radius: 3px;
 502 | }
 503 | 
 504 | .markdown-body code:before,
 505 | .markdown-body code:after {
 506 |   letter-spacing: -0.2em;
 507 |   content: "\00a0";
 508 | }
 509 | 
 510 | .markdown-body pre>code {
 511 |   padding: 0;
 512 |   margin: 0;
 513 |   font-size: 100%;
 514 |   word-break: normal;
 515 |   white-space: pre;
 516 |   background: transparent;
 517 |   border: 0;
 518 | }
 519 | 
 520 | .markdown-body .codehilite {
 521 |   margin-bottom: 16px;
 522 | }
 523 | 
 524 | .markdown-body .codehilite pre,
 525 | .markdown-body pre {
 526 |   padding: 16px;
 527 |   overflow: auto;
 528 |   font-size: 85%;
 529 |   line-height: 1.45;
 530 |   background-color: #f7f7f7;
 531 |   border-radius: 3px;
 532 | }
 533 | 
 534 | .markdown-body .codehilite pre {
 535 |   margin-bottom: 0;
 536 |   word-break: normal;
 537 | }
 538 | 
 539 | .markdown-body pre {
 540 |   word-wrap: normal;
 541 | }
 542 | 
 543 | .markdown-body pre code {
 544 |   display: inline;
 545 |   max-width: initial;
 546 |   padding: 0;
 547 |   margin: 0;
 548 |   overflow: initial;
 549 |   line-height: inherit;
 550 |   word-wrap: normal;
 551 |   background-color: transparent;
 552 |   border: 0;
 553 | }
 554 | 
 555 | .markdown-body pre code:before,
 556 | .markdown-body pre code:after {
 557 |   content: normal;
 558 | }
 559 | 
 560 | /* Admonition */
 561 | .markdown-body .admonition {
 562 |   -webkit-border-radius: 3px;
 563 |   -moz-border-radius: 3px;
 564 |   position: relative;
 565 |   border-radius: 3px;
 566 |   border: 1px solid #e0e0e0;
 567 |   border-left: 6px solid #333;
 568 |   padding: 10px 10px 10px 30px;
 569 | }
 570 | 
 571 | .markdown-body .admonition table {
 572 |   color: #333;
 573 | }
 574 | 
 575 | .markdown-body .admonition p {
 576 |   padding: 0;
 577 | }
 578 | 
 579 | .markdown-body .admonition-title {
 580 |   font-weight: bold;
 581 |   margin: 0;
 582 | }
 583 | 
 584 | .markdown-body .admonition>.admonition-title {
 585 |   color: #333;
 586 | }
 587 | 
 588 | .markdown-body .attention>.admonition-title {
 589 |   color: #a6d796;
 590 | }
 591 | 
 592 | .markdown-body .caution>.admonition-title {
 593 |   color: #d7a796;
 594 | }
 595 | 
 596 | .markdown-body .hint>.admonition-title {
 597 |   color: #96c6d7;
 598 | }
 599 | 
 600 | .markdown-body .danger>.admonition-title {
 601 |   color: #c25f77;
 602 | }
 603 | 
 604 | .markdown-body .question>.admonition-title {
 605 |   color: #96a6d7;
 606 | }
 607 | 
 608 | .markdown-body .note>.admonition-title {
 609 |   color: #d7c896;
 610 | }
 611 | 
 612 | .markdown-body .admonition:before,
 613 | .markdown-body .attention:before,
 614 | .markdown-body .caution:before,
 615 | .markdown-body .hint:before,
 616 | .markdown-body .danger:before,
 617 | .markdown-body .question:before,
 618 | .markdown-body .note:before {
 619 |   font: normal normal 16px fontawesome-mini;
 620 |   -moz-osx-font-smoothing: grayscale;
 621 |   -webkit-user-select: none;
 622 |   -moz-user-select: none;
 623 |   -ms-user-select: none;
 624 |   user-select: none;
 625 |   line-height: 1.5;
 626 |   color: #333;
 627 |   position: absolute;
 628 |   left: 0;
 629 |   top: 0;
 630 |   padding-top: 10px;
 631 |   padding-left: 10px;
 632 | }
 633 | 
 634 | .markdown-body .admonition:before {
 635 |   content: "\f056\00a0";
 636 |   color: 333;
 637 | }
 638 | 
 639 | .markdown-body .attention:before {
 640 |   content: "\f058\00a0";
 641 |   color: #a6d796;
 642 | }
 643 | 
 644 | .markdown-body .caution:before {
 645 |   content: "\f06a\00a0";
 646 |   color: #d7a796;
 647 | }
 648 | 
 649 | .markdown-body .hint:before {
 650 |   content: "\f05a\00a0";
 651 |   color: #96c6d7;
 652 | }
 653 | 
 654 | .markdown-body .danger:before {
 655 |   content: "\f057\00a0";
 656 |   color: #c25f77;
 657 | }
 658 | 
 659 | .markdown-body .question:before {
 660 |   content: "\f059\00a0";
 661 |   color: #96a6d7;
 662 | }
 663 | 
 664 | .markdown-body .note:before {
 665 |   content: "\f040\00a0";
 666 |   color: #d7c896;
 667 | }
 668 | 
 669 | .markdown-body .admonition::after {
 670 |   content: normal;
 671 | }
 672 | 
 673 | .markdown-body .attention {
 674 |   border-left: 6px solid #a6d796;
 675 | }
 676 | 
 677 | .markdown-body .caution {
 678 |   border-left: 6px solid #d7a796;
 679 | }
 680 | 
 681 | .markdown-body .hint {
 682 |   border-left: 6px solid #96c6d7;
 683 | }
 684 | 
 685 | .markdown-body .danger {
 686 |   border-left: 6px solid #c25f77;
 687 | }
 688 | 
 689 | .markdown-body .question {
 690 |   border-left: 6px solid #96a6d7;
 691 | }
 692 | 
 693 | .markdown-body .note {
 694 |   border-left: 6px solid #d7c896;
 695 | }
 696 | 
 697 | .markdown-body .admonition>*:first-child {
 698 |   margin-top: 0 !important;
 699 | }
 700 | 
 701 | .markdown-body .admonition>*:last-child {
 702 |   margin-bottom: 0 !important;
 703 | }
 704 | 
 705 | /* progress bar*/
 706 | .markdown-body .progress {
 707 |   display: block;
 708 |   width: 300px;
 709 |   margin: 10px 0;
 710 |   height: 24px;
 711 |   -webkit-border-radius: 3px;
 712 |   -moz-border-radius: 3px;
 713 |   border-radius: 3px;
 714 |   background-color: #ededed;
 715 |   position: relative;
 716 |   box-shadow: inset -1px 1px 3px rgba(0, 0, 0, .1);
 717 | }
 718 | 
 719 | .markdown-body .progress-label {
 720 |   position: absolute;
 721 |   text-align: center;
 722 |   font-weight: bold;
 723 |   width: 100%; margin: 0;
 724 |   line-height: 24px;
 725 |   color: #333;
 726 |   text-shadow: 1px 1px 0 #fefefe, -1px -1px 0 #fefefe, -1px 1px 0 #fefefe, 1px -1px 0 #fefefe, 0 1px 0 #fefefe, 0 -1px 0 #fefefe, 1px 0 0 #fefefe, -1px 0 0 #fefefe, 1px 1px 2px #000;
 727 |   -webkit-font-smoothing: antialiased !important;
 728 |   white-space: nowrap;
 729 |   overflow: hidden;
 730 | }
 731 | 
 732 | .markdown-body .progress-bar {
 733 |   height: 24px;
 734 |   float: left;
 735 |   -webkit-border-radius: 3px;
 736 |   -moz-border-radius: 3px;
 737 |   border-radius: 3px;
 738 |   background-color: #96c6d7;
 739 |   box-shadow: inset 0 1px 0 rgba(255, 255, 255, .5), inset 0 -1px 0 rgba(0, 0, 0, .1);
 740 |   background-size: 30px 30px;
 741 |   background-image: -webkit-linear-gradient(
 742 |     135deg, rgba(255, 255, 255, .4) 27%,
 743 |     transparent 27%,
 744 |     transparent 52%, rgba(255, 255, 255, .4) 52%,
 745 |     rgba(255, 255, 255, .4) 77%,
 746 |     transparent 77%, transparent
 747 |   );
 748 |   background-image: -moz-linear-gradient(
 749 |     135deg,
 750 |     rgba(255, 255, 255, .4) 27%, transparent 27%,
 751 |     transparent 52%, rgba(255, 255, 255, .4) 52%,
 752 |     rgba(255, 255, 255, .4) 77%, transparent 77%,
 753 |     transparent
 754 |   );
 755 |   background-image: -ms-linear-gradient(
 756 |     135deg,
 757 |     rgba(255, 255, 255, .4) 27%, transparent 27%,
 758 |     transparent 52%, rgba(255, 255, 255, .4) 52%,
 759 |     rgba(255, 255, 255, .4) 77%, transparent 77%,
 760 |     transparent
 761 |   );
 762 |   background-image: -o-linear-gradient(
 763 |     135deg,
 764 |     rgba(255, 255, 255, .4) 27%, transparent 27%,
 765 |     transparent 52%, rgba(255, 255, 255, .4) 52%,
 766 |     rgba(255, 255, 255, .4) 77%, transparent 77%,
 767 |     transparent
 768 |   );
 769 |   background-image: linear-gradient(
 770 |     135deg,
 771 |     rgba(255, 255, 255, .4) 27%, transparent 27%,
 772 |     transparent 52%, rgba(255, 255, 255, .4) 52%,
 773 |     rgba(255, 255, 255, .4) 77%, transparent 77%,
 774 |     transparent
 775 |   );
 776 | }
 777 | 
 778 | .markdown-body .progress-100plus .progress-bar {
 779 |   background-color: #a6d796;
 780 | }
 781 | 
 782 | .markdown-body .progress-80plus .progress-bar {
 783 |   background-color: #c6d796;
 784 | }
 785 | 
 786 | .markdown-body .progress-60plus .progress-bar {
 787 |   background-color: #d7c896;
 788 | }
 789 | 
 790 | .markdown-body .progress-40plus .progress-bar {
 791 |   background-color: #d7a796;
 792 | }
 793 | 
 794 | .markdown-body .progress-20plus .progress-bar {
 795 |   background-color: #d796a6;
 796 | }
 797 | 
 798 | .markdown-body .progress-0plus .progress-bar {
 799 |   background-color: #c25f77;
 800 | }
 801 | 
 802 | .markdown-body .candystripe-animate .progress-bar{
 803 |   -webkit-animation: animate-stripes 3s linear infinite;
 804 |   -moz-animation: animate-stripes 3s linear infinite;
 805 |   animation: animate-stripes 3s linear infinite;
 806 | }
 807 | 
 808 | @-webkit-keyframes animate-stripes {
 809 |   0% {
 810 |     background-position: 0 0;
 811 |   }
 812 | 
 813 |   100% {
 814 |     background-position: 60px 0;
 815 |   }
 816 | }
 817 | 
 818 | @-moz-keyframes animate-stripes {
 819 |   0% {
 820 |     background-position: 0 0;
 821 |   }
 822 | 
 823 |   100% {
 824 |     background-position: 60px 0;
 825 |   }
 826 | }
 827 | 
 828 | @keyframes animate-stripes {
 829 |   0% {
 830 |     background-position: 0 0;
 831 |   }
 832 | 
 833 |   100% {
 834 |     background-position: 60px 0;
 835 |   }
 836 | }
 837 | 
 838 | .markdown-body .gloss .progress-bar {
 839 |   box-shadow:
 840 |     inset 0 4px 12px rgba(255, 255, 255, .7),
 841 |     inset 0 -12px 0 rgba(0, 0, 0, .05);
 842 | }
 843 | 
 844 | /* MultiMarkdown Critic Blocks */
 845 | .markdown-body .critic_mark {
 846 |   background: #ff0;
 847 | }
 848 | 
 849 | .markdown-body .critic_delete {
 850 |   color: #c82829;
 851 |   text-decoration: line-through;
 852 | }
 853 | 
 854 | .markdown-body .critic_insert {
 855 |   color: #718c00 ;
 856 |   text-decoration: underline;
 857 | }
 858 | 
 859 | .markdown-body .critic_comment {
 860 |   color: #8e908c;
 861 |   font-style: italic;
 862 | }
 863 | 
 864 | .markdown-body .headeranchor {
 865 |   font: normal normal 16px fontawesome-mini;
 866 |   line-height: 1;
 867 |   display: inline-block;
 868 |   text-decoration: none;
 869 |   -webkit-font-smoothing: antialiased;
 870 |   -moz-osx-font-smoothing: grayscale;
 871 |   -webkit-user-select: none;
 872 |   -moz-user-select: none;
 873 |   -ms-user-select: none;
 874 |   user-select: none;
 875 | }
 876 | 
 877 | .headeranchor:before {
 878 |   content: '\e157';
 879 | }
 880 | 
 881 | .markdown-body .task-list-item {
 882 |   list-style-type: none;
 883 | }
 884 | 
 885 | .markdown-body .task-list-item+.task-list-item {
 886 |   margin-top: 3px;
 887 | }
 888 | 
 889 | .markdown-body .task-list-item input {
 890 |   margin: 0 4px 0.25em -20px;
 891 |   vertical-align: middle;
 892 | }
 893 | 
 894 | /* Media */
 895 | @media only screen and (min-width: 480px) {
 896 |   .markdown-body {
 897 |     font-size:14px;
 898 |   }
 899 | }
 900 | 
 901 | @media only screen and (min-width: 768px) {
 902 |   .markdown-body {
 903 |     font-size:16px;
 904 |   }
 905 | }
 906 | 
 907 | @media print {
 908 |   .markdown-body * {
 909 |     background: transparent !important;
 910 |     color: black !important;
 911 |     filter:none !important;
 912 |     -ms-filter: none !important;
 913 |   }
 914 | 
 915 |   .markdown-body {
 916 |     font-size:12pt;
 917 |     max-width:100%;
 918 |     outline:none;
 919 |     border: 0;
 920 |   }
 921 | 
 922 |   .markdown-body a,
 923 |   .markdown-body a:visited {
 924 |     text-decoration: underline;
 925 |   }
 926 | 
 927 |   .markdown-body .headeranchor-link {
 928 |     display: none;
 929 |   }
 930 | 
 931 |   .markdown-body a[href]:after {
 932 |     content: " (" attr(href) ")";
 933 |   }
 934 | 
 935 |   .markdown-body abbr[title]:after {
 936 |     content: " (" attr(title) ")";
 937 |   }
 938 | 
 939 |   .markdown-body .ir a:after,
 940 |   .markdown-body a[href^="javascript:"]:after,
 941 |   .markdown-body a[href^="#"]:after {
 942 |     content: "";
 943 |   }
 944 | 
 945 |   .markdown-body pre {
 946 |     white-space: pre;
 947 |     white-space: pre-wrap;
 948 |     word-wrap: break-word;
 949 |   }
 950 | 
 951 |   .markdown-body pre,
 952 |   .markdown-body blockquote {
 953 |     border: 1px solid #999;
 954 |     padding-right: 1em;
 955 |     page-break-inside: avoid;
 956 |   }
 957 | 
 958 |   .markdown-body .progress,
 959 |   .markdown-body .progress-bar {
 960 |     -moz-box-shadow: none;
 961 |     -webkit-box-shadow: none;
 962 |     box-shadow: none;
 963 |   }
 964 | 
 965 |   .markdown-body .progress {
 966 |     border: 1px solid #ddd;
 967 |   }
 968 | 
 969 |   .markdown-body .progress-bar {
 970 |     height: 22px;
 971 |     border-right: 1px solid #ddd;
 972 |   }
 973 | 
 974 |   .markdown-body tr,
 975 |   .markdown-body img {
 976 |     page-break-inside: avoid;
 977 |   }
 978 | 
 979 |   .markdown-body img {
 980 |     max-width: 100% !important;
 981 |   }
 982 | 
 983 |   .markdown-body p,
 984 |   .markdown-body h2,
 985 |   .markdown-body h3 {
 986 |     orphans: 3;
 987 |     widows: 3;
 988 |   }
 989 | 
 990 |   .markdown-body h2,
 991 |   .markdown-body h3 {
 992 |     page-break-after: avoid;
 993 |   }
 994 | }
 995 | </style><style>/*GitHub*/
 996 | .codehilite {background-color:#fff;color:#333333;}
 997 | .codehilite .hll {background-color:#ffffcc;}
 998 | .codehilite .c{color:#999988;font-style:italic}
 999 | .codehilite .err{color:#a61717;background-color:#e3d2d2}
1000 | .codehilite .k{font-weight:bold}
1001 | .codehilite .o{font-weight:bold}
1002 | .codehilite .cm{color:#999988;font-style:italic}
1003 | .codehilite .cp{color:#999999;font-weight:bold}
1004 | .codehilite .c1{color:#999988;font-style:italic}
1005 | .codehilite .cs{color:#999999;font-weight:bold;font-style:italic}
1006 | .codehilite .gd{color:#000000;background-color:#ffdddd}
1007 | .codehilite .ge{font-style:italic}
1008 | .codehilite .gr{color:#aa0000}
1009 | .codehilite .gh{color:#999999}
1010 | .codehilite .gi{color:#000000;background-color:#ddffdd}
1011 | .codehilite .go{color:#888888}
1012 | .codehilite .gp{color:#555555}
1013 | .codehilite .gs{font-weight:bold}
1014 | .codehilite .gu{color:#800080;font-weight:bold}
1015 | .codehilite .gt{color:#aa0000}
1016 | .codehilite .kc{font-weight:bold}
1017 | .codehilite .kd{font-weight:bold}
1018 | .codehilite .kn{font-weight:bold}
1019 | .codehilite .kp{font-weight:bold}
1020 | .codehilite .kr{font-weight:bold}
1021 | .codehilite .kt{color:#445588;font-weight:bold}
1022 | .codehilite .m{color:#009999}
1023 | .codehilite .s{color:#dd1144}
1024 | .codehilite .n{color:#333333}
1025 | .codehilite .na{color:teal}
1026 | .codehilite .nb{color:#0086b3}
1027 | .codehilite .nc{color:#445588;font-weight:bold}
1028 | .codehilite .no{color:teal}
1029 | .codehilite .ni{color:purple}
1030 | .codehilite .ne{color:#990000;font-weight:bold}
1031 | .codehilite .nf{color:#990000;font-weight:bold}
1032 | .codehilite .nn{color:#555555}
1033 | .codehilite .nt{color:navy}
1034 | .codehilite .nv{color:teal}
1035 | .codehilite .ow{font-weight:bold}
1036 | .codehilite .w{color:#bbbbbb}
1037 | .codehilite .mf{color:#009999}
1038 | .codehilite .mh{color:#009999}
1039 | .codehilite .mi{color:#009999}
1040 | .codehilite .mo{color:#009999}
1041 | .codehilite .sb{color:#dd1144}
1042 | .codehilite .sc{color:#dd1144}
1043 | .codehilite .sd{color:#dd1144}
1044 | .codehilite .s2{color:#dd1144}
1045 | .codehilite .se{color:#dd1144}
1046 | .codehilite .sh{color:#dd1144}
1047 | .codehilite .si{color:#dd1144}
1048 | .codehilite .sx{color:#dd1144}
1049 | .codehilite .sr{color:#009926}
1050 | .codehilite .s1{color:#dd1144}
1051 | .codehilite .ss{color:#990073}
1052 | .codehilite .bp{color:#999999}
1053 | .codehilite .vc{color:teal}
1054 | .codehilite .vg{color:teal}
1055 | .codehilite .vi{color:teal}
1056 | .codehilite .il{color:#009999}
1057 | .codehilite .gc{color:#999;background-color:#EAF2F5}
1058 | </style><title>README</title></head><body><article class="markdown-body"><h1 id="replication-instructions-for-common-ownership-in-america-1980-2017">Replication Instructions for: Common Ownership in America: 1980-2017<a class="headerlink" href="#replication-instructions-for-common-ownership-in-america-1980-2017" title="Permanent link"></a></h1>
1059 | <p>Backus, Conlon and Sinkinson (2020)
1060 | AEJMicro-2019-0389
1061 | openicpsr-120083
1062 | A copy of the paper is here: <a href="https://chrisconlon.github.io/site/common_owner.pdf">https://chrisconlon.github.io/site/common_owner.pdf</a></p>
1063 | <h3 id="open-icpsr-install-instructions">Open ICPSR Install Instructions<a class="headerlink" href="#open-icpsr-install-instructions" title="Permanent link"></a></h3>
1064 | <ol>
1065 | <li>Download and unzip the repository.</li>
1066 | <li>All required files are included or are downloaded programatically from WRDS (see notes below).</li>
1067 | </ol>
1068 | <h3 id="github-install-instructions">Github Install Instructions<a class="headerlink" href="#github-install-instructions" title="Permanent link"></a></h3>
1069 | <p>To download the repo simply type:</p>
1070 | <div class="codehilite"><pre>git clone https://github.com/chrisconlon/CommonOwnerReplication
1071 | </pre></div>
1072 | 
1073 | 
1074 | <p>You will need to have the git large file storage extension installed. (Which you probably do not).</p>
1075 | <p>To install this extension follow the directions at:
1076 | <a href="https://git-lfs.github.com">https://git-lfs.github.com</a></p>
1077 | <h3 id="dataset-size-and-memory">Dataset Size and Memory<a class="headerlink" href="#dataset-size-and-memory" title="Permanent link"></a></h3>
1078 | <ol>
1079 | <li>We recommend that you have at least 64GB of RAM available.</li>
1080 | <li>All of the datasets saved will take up about 14 GB of drive space.</li>
1081 | <li>NumPy is used extensively for the calculations and is multithreaded (so more cores will help).</li>
1082 | <li>The computation of the $\kappa_{fg}$ terms is parallelized quarter by quarter explicitly (so cores will help a lot here).</li>
1083 | <li>But most of the time spent is in merging and filtering data in pandas (more cores don&rsquo;t help much).</li>
1084 | <li>Total runtime on a 2015 iMac with 64GB of RAM is around 3 hours.</li>
1085 | <li>WRDS download time is about an hour (Depends on internet speed) and total download is &gt; 10GB.</li>
1086 | </ol>
1087 | <h3 id="downloading-from-wrds">Downloading from WRDS<a class="headerlink" href="#downloading-from-wrds" title="Permanent link"></a></h3>
1088 | <p>User must provide their own WRDS account. User will be prompted for WRDS username and password in file 1_Download_WRDS_Data.py.</p>
1089 | <p>To request an account, please visit:
1090 | <a href="https://wrds-www.wharton.upenn.edu/register/">https://wrds-www.wharton.upenn.edu/register/</a></p>
1091 | <p>If you do not have API access, you will need to consult the wrds_constituents.pdf document for instructions on using the WRDS web interface. This is strongly NOT RECOMMENDED. Because you cannot apply complex filters to the SQL queries as we do programatically, you will also need much more disk space (on the order of a Terabyte to save the entire Thomson-Reuters s34 13f database.)</p>
1092 | <p>If you are running this on a batch job (not interactively) such as on a HPC cluster you will need to pre-enter your WRDS password by creating a pgpass file.</p>
1093 | <p>As an example:</p>
1094 | <div class="codehilite"><pre>    import wrds
1095 |     db = wrds.Connection(wrds_username=&#39;joe&#39;)
1096 |     db.create_pgpass_file()
1097 | </pre></div>
1098 | 
1099 | <p>If you encounter a problem, it might be that your pgpass file is not accessible by your batch job.</p>
1100 | <p>For more information please see: <a href="https://wrds-www.wharton.upenn.edu/pages/support/programming-wrds/programming-python/python-from-your-computer/">https://wrds-www.wharton.upenn.edu/pages/support/programming-wrds/programming-python/python-from-your-computer/</a>
1101 | for more details.</p>
1102 | <h3 id="python-dependencies">Python  dependencies<a class="headerlink" href="#python-dependencies" title="Permanent link"></a></h3>
1103 | <p>Our run_all.sh bash script should install all of the required python dependencies (assuming python itself is installed correctly and you have necessary acces to install packages). </p>
1104 | <p>To install those dependencies manually (such as on a shared server) you may need to do the following.</p>
1105 | <p>Python (version 3.8 or above) - install dependencies with </p>
1106 | <div class="codehilite"><pre>pip3 install -r requirements.txt
1107 | 
1108 | numpy, pandas, matplotlib, pyarrow, brotli, seaborn, wrds, scikit-learn, pyhdfe, pyblp, statsmodels
1109 | </pre></div>
1110 | 
1111 | 
1112 | <p>We anticipate most users will be running this replication package from within an Anaconda environment. To avoid making changes to your base environment you will want to create a separate environment for this replication package. To do that</p>
1113 | <div class="codehilite"><pre>    conda create --name common_owner --file requirements.txt
1114 |     conda activate common_owner
1115 | </pre></div>
1116 | 
1117 | <h2 id="how-to-run-the-code">How to run the code<a class="headerlink" href="#how-to-run-the-code" title="Permanent link"></a></h2>
1118 | <p>Change to the directory containing this file and run &ldquo;./run_all.sh&rdquo; on the terminal. The code should take approximately 3-10 hours to run. Tables and figures will be produced as described below.</p>
1119 | <div class="codehilite"><pre>    cd code
1120 |     ./runall.sh
1121 | </pre></div>
1122 | 
1123 | <h3 id="windows-warning">Windows Warning<a class="headerlink" href="#windows-warning" title="Permanent link"></a></h3>
1124 | <p>Windows Users: instead use &ldquo;run_all.bat&rdquo; from the command prompt.</p>
1125 | <p>There are known conflicts between Windows 10 and core Python DLL&rsquo;s in versions &lt; 3.7.3. If you are running on Windows 10, all Python programs will run best with Python 3.8 or later (see: <a href="https://bugs.python.org/issue35797">https://bugs.python.org/issue35797</a>).  </p>
1126 | <h2 id="file-of-origin-for-tables-and-figures">File of origin for tables and figures<a class="headerlink" href="#file-of-origin-for-tables-and-figures" title="Permanent link"></a></h2>
1127 | <table>
1128 | <thead>
1129 | <tr>
1130 | <th>Table/Figure Number</th>
1131 | <th>Generating File</th>
1132 | </tr>
1133 | </thead>
1134 | <tbody>
1135 | <tr>
1136 | <td>Table 1</td>
1137 | <td>(by hand)</td>
1138 | </tr>
1139 | <tr>
1140 | <td>Table 2</td>
1141 | <td>(by hand)</td>
1142 | </tr>
1143 | <tr>
1144 | <td>Table 3</td>
1145 | <td>table3_variance_decomp.py</td>
1146 | </tr>
1147 | <tr>
1148 | <td>Table 4</td>
1149 | <td>table4_kappa_correlations.py</td>
1150 | </tr>
1151 | <tr>
1152 | <td>Figure 1</td>
1153 | <td>plots2_kappa_official.py</td>
1154 | </tr>
1155 | <tr>
1156 | <td>Figure 2</td>
1157 | <td>plots1_basic_descriptives.py</td>
1158 | </tr>
1159 | <tr>
1160 | <td>Figure 3</td>
1161 | <td>plots1_basic_descriptives.py</td>
1162 | </tr>
1163 | <tr>
1164 | <td>Figure 4</td>
1165 | <td>plots1_basic_descriptives.py</td>
1166 | </tr>
1167 | <tr>
1168 | <td>Figure 5</td>
1169 | <td>plots3_big_three_four.py</td>
1170 | </tr>
1171 | <tr>
1172 | <td>Figure 6</td>
1173 | <td>plots2_kappa_official.py</td>
1174 | </tr>
1175 | <tr>
1176 | <td>Figure 7</td>
1177 | <td>plots2_kappa_official.py</td>
1178 | </tr>
1179 | <tr>
1180 | <td>Figure 8</td>
1181 | <td>plots5_investor_similarity.py</td>
1182 | </tr>
1183 | <tr>
1184 | <td>Figure 9</td>
1185 | <td>plots2_kappa_official.py</td>
1186 | </tr>
1187 | <tr>
1188 | <td>Figure 10</td>
1189 | <td>plots11_profit_simulations.py</td>
1190 | </tr>
1191 | <tr>
1192 | <td>Figure 11</td>
1193 | <td>plots11_profit_simulations.py</td>
1194 | </tr>
1195 | <tr>
1196 | <td>Figure 12</td>
1197 | <td>plots9_blackrock_vanguard.py</td>
1198 | </tr>
1199 | <tr>
1200 | <td>Figure 13</td>
1201 | <td>plots2_kappa_official.py</td>
1202 | </tr>
1203 | <tr>
1204 | <td>Figure 14</td>
1205 | <td>plots2_kappa_official.py</td>
1206 | </tr>
1207 | <tr>
1208 | <td>Figure 15</td>
1209 | <td>plots2_kappa_official.py</td>
1210 | </tr>
1211 | <tr>
1212 | <td>Figure 16</td>
1213 | <td>plots5_airlines_cereal.py</td>
1214 | </tr>
1215 | <tr>
1216 | <td>Figure 17</td>
1217 | <td>plots6_sole_vs_shared.py</td>
1218 | </tr>
1219 | <tr>
1220 | <td>Figure A1</td>
1221 | <td>plots1_basic_descriptives.py</td>
1222 | </tr>
1223 | <tr>
1224 | <td>Figure A2</td>
1225 | <td>plots8_individual_firm_coverage.py</td>
1226 | </tr>
1227 | <tr>
1228 | <td>Figure A3</td>
1229 | <td>plots10_kappa_comparison_appendix.py</td>
1230 | </tr>
1231 | <tr>
1232 | <td>Figure A4</td>
1233 | <td>plots7_short_interest_coverage.py</td>
1234 | </tr>
1235 | <tr>
1236 | <td>Figure A5</td>
1237 | <td>plots7_short_interest_coverage.py</td>
1238 | </tr>
1239 | <tr>
1240 | <td>Figure A6</td>
1241 | <td>plots2_kappa_official.py</td>
1242 | </tr>
1243 | <tr>
1244 | <td>Figure A7</td>
1245 | <td>plots2_kappa_official.py</td>
1246 | </tr>
1247 | <tr>
1248 | <td>Figure A8</td>
1249 | <td>plots5_investor_similarity.py</td>
1250 | </tr>
1251 | </tbody>
1252 | </table>
1253 | <h2 id="within-file-dependencies">Within-File Dependencies:<a class="headerlink" href="#within-file-dependencies" title="Permanent link"></a></h2>
1254 | <p>1_Download_WRDS_Data.py: </p>
1255 | <div class="codehilite"><pre>wrds_downloads
1256 | </pre></div>
1257 | 
1258 | 
1259 | <p>2_Process_WRDS_Data.py</p>
1260 | <div class="codehilite"><pre>wrds_cleaning
1261 | wrds_checks
1262 | </pre></div>
1263 | 
1264 | 
1265 | <p>3_Calculate_Kappas.py</p>
1266 | <div class="codehilite"><pre>kappas
1267 | investors
1268 | firminfo
1269 | utilities/quantiles
1270 | </pre></div>
1271 | 
1272 | 
1273 | <p>plots3_big_three_four.py: </p>
1274 | <div class="codehilite"><pre>kappas
1275 | investors
1276 | </pre></div>
1277 | 
1278 | 
1279 | <p>plots5_airlines_cereal.py: </p>
1280 | <div class="codehilite"><pre>kappas
1281 | </pre></div>
1282 | 
1283 | 
1284 | <p>plots9_blackrock_vanguard.py: </p>
1285 | <div class="codehilite"><pre>kappas
1286 | </pre></div>
1287 | 
1288 | 
1289 | <p>plots10_kappa_comparison_appendix.py: </p>
1290 | <div class="codehilite"><pre>utilities/matlab_util
1291 | </pre></div>
1292 | 
1293 | 
1294 | <h2 id="files-provided-and-data-access-statements">Files Provided and Data Access Statements<a class="headerlink" href="#files-provided-and-data-access-statements" title="Permanent link"></a></h2>
1295 | <h3 id="wrds">WRDS<a class="headerlink" href="#wrds" title="Permanent link"></a></h3>
1296 | <p>We use several data sources from WRDS. These are accessed programatically through the WRDS API and we are not able to include individual files in this replication package. (See terms: <a href="https://wrds-www.wharton.upenn.edu/users/tou/">https://wrds-www.wharton.upenn.edu/users/tou/</a>).</p>
1297 | <p>They include:
1298 | A. CRSP: data on securities prices and shares outstanding; list of S&amp;P 500 constituents.
1299 | B. Compustat: business fundamentals, short interest, business segment info.
1300 | C. Thomson-Reuters: s34 database of 13f filings/ownership.</p>
1301 | <h3 id="author-constructed-files">Author Constructed files<a class="headerlink" href="#author-constructed-files" title="Permanent link"></a></h3>
1302 | <p>data/public:</p>
1303 | <p>The below files are publicly available csv&rsquo;s constructed by the authors. These are drops, consolidations, and manager identifiers that are used in our project. They are distributed with this code package.</p>
1304 | <ol>
1305 | <li>manager_consolidations.csv: lists consolidated manager numbers: several manager actually correspond to one</li>
1306 | <li>permno_drops.csv: lists dropped permno IDs with reasons why they are dropped</li>
1307 | <li>big4.csv: lists manager Numbers for Blackrock, Fidelity, State Street, and Vanguard</li>
1308 | </ol>
1309 | <p>The markups from from DLEU 2020 can be reproduced by running the replication package:</p>
1310 | <h3 id="deloecker-eeckhout-unger-markups">DeLoecker Eeckhout Unger Markups<a class="headerlink" href="#deloecker-eeckhout-unger-markups" title="Permanent link"></a></h3>
1311 | <ol start="4">
1312 | <li>DLE_markups_fig_v2.csv: markups from Figure 10 of DeLoecker Eeckhout Unger (QJE 2020)</li>
1313 | </ol>
1314 | <p>De Loecker, Jan; Eeckhout, Jan; Unger, Gabriel, 2020, 
1315 | &ldquo;Replication Data for: &lsquo;The Rise of Market Power and the Macroeconomic Implications&rsquo;&ldquo;, <a href="https://doi.org/10.7910/DVN/5GH8XO">https://doi.org/10.7910/DVN/5GH8XO</a>, Harvard Dataverse, V1</p>
1316 | <p>That replication package requires access to WRDS. A subset of the markups (and no additional data) are being made publicly available here.</p>
1317 | <h3 id="scraped-13f-filings">Scraped 13f filings<a class="headerlink" href="#scraped-13f-filings" title="Permanent link"></a></h3>
1318 | <p>The original source data are the publicly available SEC 13f filing data from EDGAR: <a href="https://www.sec.gov/edgar/searchedgar/companysearch.html">https://www.sec.gov/edgar/searchedgar/companysearch.html</a></p>
1319 | <p>Most users instead access the Thomson-Reuters S34 database from WRDS (as our script above does). We&rsquo;ve also scraped the original source documents from EDGAR and compiled them into an easy to use format. We provide the entire universe of 13f filings as a separate dataset. For the purposes of replicating this paper, we use three smaller extracts as parquet files:</p>
1320 | <ol start="5">
1321 | <li>cereal.parquet: extract 13F Filings for firms within the cereal industry (includes small cap)</li>
1322 | <li>airlines.parquet: extract 13F Filings for firms within the airline industry (includes small cap)</li>
1323 | <li>out_scrape.parquet: extract 13F Filings for LARGE cap firms (a superset of the S&amp;P 500) from 1999-2017 (300MB).</li>
1324 | </ol>
1325 | <p>Each file contains:
1326 | - 13f filings going back to 1999 and end in late 2017 (Data period for this paper).</p>
1327 | <p>The full set of scraped 13f filings and a detailed description of how extracts were created are available in two places:</p>
1328 | <ol>
1329 | <li>
1330 | <p>The live version of the 13f scraping project is <a href="https://sites.google.com/view/msinkinson/research/common-ownership-data?">https://sites.google.com/view/msinkinson/research/common-ownership-data?</a></p>
1331 | </li>
1332 | <li>
1333 | <p>The permanent archived version (including these extracts) is available to the public at Harvard Dataverse (doi:10.7910/DVN/ZRH3EU):
1334 | <a href="https://doi.org/10.7910/DVN/ZRH3EU">https://doi.org/10.7910/DVN/ZRH3EU</a></p>
1335 | </li>
1336 | </ol>
1337 | <p>Backus, Matthew; Conlon, Christopher T; Sinkinson, Michael; 2020, &ldquo;Common Ownership Data: Scraped SEC form 13F filings for 1999-2017&rdquo;, <a href="https://doi.org/10.7910/DVN/ZRH3EU">https://doi.org/10.7910/DVN/ZRH3EU</a>, Harvard Dataverse, V1.1</p>
1338 | <h3 id="description-of-parquet-file-format">Description of .parquet file format<a class="headerlink" href="#description-of-parquet-file-format" title="Permanent link"></a></h3>
1339 | <p>We use the parquet format for:</p>
1340 | <ul>
1341 | <li>Large data inputs (above)</li>
1342 | <li>Most intermediary datasets</li>
1343 | </ul>
1344 | <p>Parquet files are compressed columnar storage binaries that are readable by several software packages (R, Python, Stata, Julia, C++, etc.) and platforms. The goal of the parquet project is to maintain good performance for large datasets as well as interoperability.</p>
1345 | <p>The storage method is stable and maintained by the Apache Foundation.
1346 | <a href="https://parquet.apache.org/documentation/latest/">https://parquet.apache.org/documentation/latest/</a></p>
1347 | <p>We use the python package &ldquo;pyarrow&rdquo; to read parquets and the package &ldquo;brotli&rdquo; for compression (listed in the requirements.txt).</p></article></body></html>


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Replication Instructions for: Common Ownership in America: 1980-2017
  2 | Backus, Conlon and Sinkinson (2020)
  3 | AEJMicro-2019-0389
  4 | openicpsr-120083
  5 | A copy of the paper is here: https://chrisconlon.github.io/site/common_owner.pdf
  6 | 
  7 | 
  8 | ### Open ICPSR Install Instructions
  9 | 1. Download and unzip the repository.
 10 | 2. All required files are included or are downloaded programatically from WRDS (see notes below).
 11 | 
 12 | ### Github Install Instructions
 13 | To download the repo simply type:
 14 | 
 15 |     git clone https://github.com/chrisconlon/CommonOwnerReplication
 16 | 
 17 | You will need to have the git large file storage extension installed. (Which you probably do not).
 18 | 
 19 | To install this extension follow the directions at:
 20 | https://git-lfs.github.com
 21 | 
 22 | ### Dataset Size and Memory
 23 | 1. We recommend that you have at least 64GB of RAM available.
 24 | 2. All of the datasets saved will take up about 14 GB of drive space.
 25 | 3. NumPy is used extensively for the calculations and is multithreaded (so more cores will help).
 26 | 4. The computation of the $\kappa_{fg}$ terms is parallelized quarter by quarter explicitly (so cores will help a lot here).
 27 | 5. But most of the time spent is in merging and filtering data in pandas (more cores don't help much).
 28 | 5. Total runtime on a 2015 iMac with 64GB of RAM is around 3 hours.
 29 | 6. WRDS download time is about an hour (Depends on internet speed) and total download is > 10GB.
 30 | 
 31 | ### Downloading from WRDS
 32 | User must provide their own WRDS account. User will be prompted for WRDS username and password in file 1_Download_WRDS_Data.py.
 33 | 
 34 | To request an account, please visit:
 35 | https://wrds-www.wharton.upenn.edu/register/
 36 | 
 37 | If you do not have API access, you will need to consult the wrds_constituents.pdf document for instructions on using the WRDS web interface. This is strongly NOT RECOMMENDED. Because you cannot apply complex filters to the SQL queries as we do programatically, you will also need much more disk space (on the order of a Terabyte to save the entire Thomson-Reuters s34 13f database.)
 38 | 
 39 | If you are running this on a batch job (not interactively) such as on a HPC cluster you will need to pre-enter your WRDS password by creating a pgpass file.
 40 | 
 41 | As an example:
 42 | 
 43 | ```
 44 |     import wrds
 45 |     db = wrds.Connection(wrds_username='joe')
 46 |     db.create_pgpass_file()
 47 | ```
 48 | 
 49 | If you encounter a problem, it might be that your pgpass file is not accessible by your batch job.
 50 | 
 51 | For more information please see: [https://wrds-www.wharton.upenn.edu/pages/support/programming-wrds/programming-python/python-from-your-computer/](https://wrds-www.wharton.upenn.edu/pages/support/programming-wrds/programming-python/python-from-your-computer/)
 52 | for more details.
 53 | 
 54 | ### Python  dependencies
 55 | Our run_all.sh bash script should install all of the required python dependencies (assuming python itself is installed correctly and you have necessary acces to install packages). 
 56 | 
 57 | To install those dependencies manually (such as on a shared server) you may need to do the following.
 58 | 
 59 | Python (version 3.8 or above) - install dependencies with 
 60 | 
 61 |     pip3 install -r requirements.txt
 62 | 
 63 |     numpy, pandas, matplotlib, pyarrow, brotli, seaborn, wrds, scikit-learn, pyhdfe, pyblp, statsmodels
 64 | 
 65 | We anticipate most users will be running this replication package from within an Anaconda environment. To avoid making changes to your base environment you will want to create a separate environment for this replication package. To do that
 66 | 
 67 | ```
 68 |     conda create --name common_owner --file requirements.txt
 69 |     conda activate common_owner
 70 | ```
 71 | 
 72 | ## How to run the code
 73 | Change to the directory containing this file and run "./run_all.sh" on the terminal. The code should take approximately 3-10 hours to run. Tables and figures will be produced as described below.
 74 | 
 75 | ```
 76 |     cd code
 77 |     ./runall.sh
 78 | ```
 79 | 
 80 | ### Windows Warning
 81 | Windows Users: instead use "run_all.bat" from the command prompt.
 82 | 
 83 | There are known conflicts between Windows 10 and core Python DLL's in versions < 3.7.3. If you are running on Windows 10, all Python programs will run best with Python 3.8 or later (see: https://bugs.python.org/issue35797).  
 84 | 
 85 | 
 86 | ## File of origin for tables and figures
 87 | 
 88 | | Table/Figure Number 	| Generating File			|
 89 | | --- |---|
 90 | | Table 1		| (by hand)				|
 91 | | Table 2		| (by hand)		 		|
 92 | | Table 3		| table3_variance_decomp.py        	|
 93 | | Table 4		| table4_kappa_correlations.py         	|
 94 | | Figure 1		| plots2_kappa_official.py		|
 95 | | Figure 2		| plots1_basic_descriptives.py		|
 96 | | Figure 3		| plots1_basic_descriptives.py 		|
 97 | | Figure 4		| plots1_basic_descriptives.py		|
 98 | | Figure 5		| plots3_big_three_four.py 		|
 99 | | Figure 6		| plots2_kappa_official.py 		|
100 | | Figure 7		| plots2_kappa_official.py 		|
101 | | Figure 8		| plots5_investor_similarity.py 	|
102 | | Figure 9		| plots2_kappa_official.py 		|
103 | | Figure 10		| plots11_profit_simulations.py 	|
104 | | Figure 11		| plots11_profit_simulations.py 	|
105 | | Figure 12		| plots9_blackrock_vanguard.py	 	|
106 | | Figure 13		| plots2_kappa_official.py 		|
107 | | Figure 14		| plots2_kappa_official.py 		|
108 | | Figure 15		| plots2_kappa_official.py 		|
109 | | Figure 16		| plots5_airlines_cereal.py	 	|
110 | | Figure 17		| plots6_sole_vs_shared.py 		|
111 | | Figure A1		| plots1_basic_descriptives.py 		|
112 | | Figure A2		| plots8_individual_firm_coverage.py 	|
113 | | Figure A3		| plots10_kappa_comparison_appendix.py 	|
114 | | Figure A4		| plots7_short_interest_coverage.py 	|
115 | | Figure A5		| plots7_short_interest_coverage.py 	|
116 | | Figure A6		| plots2_kappa_official.py 		|
117 | | Figure A7		| plots2_kappa_official.py 		|
118 | | Figure A8		| plots5_investor_similarity.py 	|
119 | 
120 | 
121 | ## Within-File Dependencies:
122 | 1_Download_WRDS_Data.py: 
123 |     
124 |     wrds_downloads
125 | 
126 | 2_Process_WRDS_Data.py
127 |     
128 |     wrds_cleaning
129 |     wrds_checks
130 | 
131 | 3_Calculate_Kappas.py
132 |      
133 |     kappas
134 |     investors
135 |     firminfo
136 |     utilities/quantiles
137 | 
138 | plots3_big_three_four.py: 
139 | 
140 |     kappas
141 |     investors
142 | 
143 | plots5_airlines_cereal.py: 
144 | 
145 |     kappas
146 | 
147 | plots9_blackrock_vanguard.py: 
148 | 
149 |     kappas 
150 | 
151 | plots10_kappa_comparison_appendix.py: 
152 | 
153 |     utilities/matlab_util
154 | 
155 | 
156 | ## Files Provided and Data Access Statements
157 | 
158 | ### WRDS
159 | 
160 | We use several data sources from WRDS. These are accessed programatically through the WRDS API and we are not able to include individual files in this replication package. (See terms: https://wrds-www.wharton.upenn.edu/users/tou/).
161 | 
162 | They include:
163 | A. CRSP: data on securities prices and shares outstanding; list of S&P 500 constituents.
164 | B. Compustat: business fundamentals, short interest, business segment info.
165 | C. Thomson-Reuters: s34 database of 13f filings/ownership.
166 | 
167 | ### Author Constructed files
168 | data/public:
169 | 
170 | The below files are publicly available csv's constructed by the authors. These are drops, consolidations, and manager identifiers that are used in our project. They are distributed with this code package.
171 | 
172 | 1. manager_consolidations.csv: lists consolidated manager numbers: several manager actually correspond to one
173 | 2. permno_drops.csv: lists dropped permno IDs with reasons why they are dropped
174 | 3. big4.csv: lists manager Numbers for Blackrock, Fidelity, State Street, and Vanguard
175 | 
176 | The markups from from DLEU 2020 can be reproduced by running the replication package:
177 | 
178 | ### DeLoecker Eeckhout Unger Markups
179 | 4. DLE_markups_fig_v2.csv: markups from Figure 10 of DeLoecker Eeckhout Unger (QJE 2020)
180 | 
181 | De Loecker, Jan; Eeckhout, Jan; Unger, Gabriel, 2020, 
182 | "Replication Data for: 'The Rise of Market Power and the Macroeconomic Implications'", https://doi.org/10.7910/DVN/5GH8XO, Harvard Dataverse, V1
183 | 
184 | That replication package requires access to WRDS. A subset of the markups (and no additional data) are being made publicly available here.
185 | 
186 | ### Scraped 13f filings
187 | The original source data are the publicly available SEC 13f filing data from EDGAR: https://www.sec.gov/edgar/searchedgar/companysearch.html
188 | 
189 | Most users instead access the Thomson-Reuters S34 database from WRDS (as our script above does). We've also scraped the original source documents from EDGAR and compiled them into an easy to use format. We provide the entire universe of 13f filings as a separate dataset. For the purposes of replicating this paper, we use three smaller extracts as parquet files:
190 | 
191 | 5. cereal.parquet: extract 13F Filings for firms within the cereal industry (includes small cap)
192 | 6. airlines.parquet: extract 13F Filings for firms within the airline industry (includes small cap)
193 | 7. out_scrape.parquet: extract 13F Filings for LARGE cap firms (a superset of the S&P 500) from 1999-2017 (300MB).
194 | 
195 | Each file contains:
196 | - 13f filings going back to 1999 and end in late 2017 (Data period for this paper).
197 | 
198 | The full set of scraped 13f filings and a detailed description of how extracts were created are available in two places:
199 | 
200 | 1. The live version of the 13f scraping project is [https://sites.google.com/view/msinkinson/research/common-ownership-data?](https://sites.google.com/view/msinkinson/research/common-ownership-data?)
201 | 
202 | 2. The permanent archived version (including these extracts) is available to the public at Harvard Dataverse (doi:10.7910/DVN/ZRH3EU):
203 | https://doi.org/10.7910/DVN/ZRH3EU
204 | 
205 | Backus, Matthew; Conlon, Christopher T; Sinkinson, Michael; 2020, "Common Ownership Data: Scraped SEC form 13F filings for 1999-2017", https://doi.org/10.7910/DVN/ZRH3EU, Harvard Dataverse, V1.1
206 | 
207 | 
208 | ### Description of .parquet file format
209 | We use the parquet format for:
210 | 
211 | -   Large data inputs (above)
212 | -   Most intermediary datasets
213 | 
214 | Parquet files are compressed columnar storage binaries that are readable by several software packages (R, Python, Stata, Julia, C++, etc.) and platforms. The goal of the parquet project is to maintain good performance for large datasets as well as interoperability.
215 | 
216 | The storage method is stable and maintained by the Apache Foundation.
217 | https://parquet.apache.org/documentation/latest/
218 | 
219 | We use the python package "pyarrow" to read parquets and the package "brotli" for compression (listed in the requirements.txt).
220 | 


--------------------------------------------------------------------------------
/README.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chrisconlon/CommonOwnerReplication/c430d4c174bed9e2d4a01cec1582ce7841781c0f/README.pdf


--------------------------------------------------------------------------------
/code/1_Download_WRDS_Data.py:
--------------------------------------------------------------------------------
  1 | # Step 1: Download Data from WRDS
  2 | # Note: you will need a WRDS account for wrds.Connection() to work
  3 | import pandas as pd
  4 | import wrds
  5 | from our_plot_config import wrds_dir
  6 | from wrds_downloads import clean_wrds, get_names, get_crosswalk
  7 | from wrds_downloads import get_fundamentals, get_short_interest
  8 | from wrds_downloads import get_segments, get_msf, get_s34
  9 | 
 10 | # raw data pulls -- save in "WRDS" directory
 11 | f_raw_s34 = wrds_dir / 'raw_s34.parquet'
 12 | f_splist = wrds_dir / 'sp500_list.parquet'
 13 | f_crsp_names = wrds_dir / 'crsp_names.parquet'
 14 | f_msf_data = wrds_dir / 'crsp_msf.parquet'
 15 | f_short = wrds_dir / 'short_interest.parquet'
 16 | f_fundamentals = wrds_dir / 'fundamentals_data.parquet'
 17 | f_segments = wrds_dir / 'wrds_segments.parquet'
 18 | f_managers = wrds_dir / 'manager_list.parquet'
 19 | f_managers_all = wrds_dir / 'manager_list_all.parquet'
 20 | f_names = wrds_dir / 'all_names.parquet'
 21 | 
 22 | # Pull the Data from WRDS ~20 min (ENTIRE FILE)
 23 | # This file requires about 48GB of RAM available
 24 | 
 25 | db = wrds.Connection()
 26 | 
 27 | # Pull the ID/Crosswalk Tables
 28 | # - Pull the S&P 500 Constituents List (CRSP)
 29 | # - Pull the "names" file: this maps permno to CUSIP, and NCUSIP (current period), and SIC code by date
 30 | # - Pull the Compustat link file : Construct a unique mapping from gvkey (Compustat) to Permno (CRSP)
 31 | # - Save the raw (un-filtered by time or S&P membership) files
 32 | 
 33 | # This block is < 1m
 34 | df_sp500 = clean_wrds(db.get_table('crsp', 'DSP500LIST'))
 35 | df_sp500.to_parquet(f_splist)
 36 | print("First File Done: WRDS connection is probably ok")
 37 | 
 38 | 
 39 | # Filter S&P List: Ignore pre-1980 components
 40 | df_sp500 = df_sp500[df_sp500.ending > '1979-12-31']
 41 | 
 42 | df_names = get_names(db)
 43 | df_names.to_parquet(f_crsp_names)
 44 | 
 45 | # Grab all possible CUSIPS by Permno
 46 | df_names2 = pd.merge(df_sp500, df_names, on='permno')
 47 | df_names2 = df_names2[~((df_names2['ending'] < df_names2['st_date']) | (
 48 |     df_names2['start'] > df_names2['end_date']))]
 49 | 
 50 | # Get unique list of CUSIPs and Permno's for SQL queries
 51 | all_cusips = list(set(df_names2.cusip).union(df_names2.ncusip))
 52 | all_permnos = list(df_names2.permno.unique().astype(int))
 53 | 
 54 | crosswalk = get_crosswalk(db, all_permnos)
 55 | 
 56 | # Pull the CRSP and Compustat Data Files (< 1m)
 57 | # Pull the Compustat Short Interest File
 58 | # - Add permno's to short interest table
 59 | # - Convert Short interest table to quarterly observations
 60 | # - Take last observations within each Permno, Quarter
 61 | #
 62 | # Pull the Compustat Fundamentals Data
 63 | # - Add permnos and CUSIPS to the Fundamentals data
 64 | #
 65 | # Pull the Compustat Business Segments Data
 66 | # - Just count the number of segments
 67 | # - Add permnos to number of segments
 68 | #
 69 | # Pull the CRSP Price and Shares Oustanding MSF Data
 70 | # - Save to parquet (around 2MB compressed)
 71 | # - Use this to get a single price, shares_oustanding for each security quarter
 72 | 
 73 | df_fund = get_fundamentals(db, crosswalk)
 74 | df_fund.to_parquet(f_fundamentals)
 75 | 
 76 | df_short = get_short_interest(db, crosswalk)
 77 | df_short.to_parquet(f_short, compression='brotli')
 78 | 
 79 | df_fund = get_fundamentals(db, crosswalk)
 80 | df_fund.to_parquet(f_fundamentals)
 81 | 
 82 | df_seg = get_segments(db, crosswalk)
 83 | df_seg.to_parquet(f_segments, compression='brotli')
 84 | 
 85 | df_msf2 = get_msf(db, all_permnos, False)
 86 | df_msf2.to_parquet(f_msf_data, compression='brotli')
 87 | 
 88 | 
 89 | # Get Managers and stock names
 90 | df_m = db.get_table('tfn', 's34type1')
 91 | df_m.to_parquet(f_managers_all, compression='brotli')
 92 | 
 93 | names = db.get_table('crsp', 'stocknames')
 94 | names.to_parquet(f_names)
 95 | 
 96 | # #### Pull the S-34 Data -- This is SLOW don't re-run ~15m
 97 | # - Only get for 8-digit CUSIPs in our S&P dataset
 98 | # - This is VERY slow and around 5.5 GB (320MB on disk)
 99 | # - Use this to get holdings for each 13-F investor (Don't trust self reported prices or shares outstanding)
100 | 
101 | print("Starting s34 Download...")
102 | s34_data = get_s34(db, all_cusips)
103 | s34_data.to_parquet(f_raw_s34, compression='brotli')
104 | print("S34 Complete!")
105 | 
106 | 
107 | # unique list of manager names
108 | mgr_list = s34_data.groupby(
109 |     ['mgrno'])['mgrname'].agg(
110 |         pd.Series.mode).reset_index()
111 | mgr_list['mgrname'] = mgr_list['mgrname'].astype(str)
112 | mgr_list.to_parquet(f_managers, compression='brotli')
113 | 


--------------------------------------------------------------------------------
/code/2_Process_WRDS_Data.py:
--------------------------------------------------------------------------------
  1 | from our_plot_config import raw_dir, wrds_dir, derived_dir, checks_dir
  2 | import pandas as pd
  3 | 
  4 | from wrds_cleaning import expand_names, make_cusip_list, construct_fundamentals, get_sp_quarters, read_s34
  5 | from wrds_cleaning import construct_bus_segments, consolidate_mgrs, filter_s34
  6 | from wrds_cleaning import compute_betas, add_drops, process_scraped, blackrock_fix
  7 | from wrds_cleaning import add_stock_splits, dedup_s34, combine_betas
  8 | 
  9 | from wrds_checks import check_bigbeta, check_s34, check_names, check_blackrock
 10 | from wrds_checks import check_s34_coverage, check_multiple_cusip, check_fundamental_coverage
 11 | 
 12 | 
 13 | # Public (hand) inputs
 14 | f_scrape = raw_dir / 'out_scrape.parquet'  # CRM renamed
 15 | f_big4 = raw_dir / 'big4.csv'
 16 | 
 17 | # raw data pulls
 18 | # CRM update: calling these WRDS
 19 | f_raw_s34 = wrds_dir / 'raw_s34.parquet'
 20 | f_splist = wrds_dir / 'sp500_list.parquet'
 21 | f_crsp_names = wrds_dir / 'crsp_names.parquet'
 22 | f_msf_data = wrds_dir / 'crsp_msf.parquet'
 23 | f_short = wrds_dir / 'short_interest.parquet'
 24 | f_fundamentals = wrds_dir / 'fundamentals_data.parquet'
 25 | f_segments = wrds_dir / 'wrds_segments.parquet'
 26 | 
 27 | # drops and consolidations
 28 | f_permno_drops = raw_dir / 'permno_drops.csv'
 29 | f_mgr_consolidations = raw_dir / 'manager_consolidations.csv'
 30 | 
 31 | # Outputs
 32 | # other info
 33 | f_comp_info = derived_dir / 'compustat_info.parquet'
 34 | f_names_expanded = derived_dir / 'expanded_names.parquet'
 35 | 
 36 | # Betas
 37 | f_betas_unfiltered = derived_dir / '13f_sp500_unfiltered.parquet'
 38 | f_betas_scraped = derived_dir / '13f_scraped.parquet'
 39 | f_frankenbetas = derived_dir / '13f_sp500_frankenbeta.parquet'
 40 | 
 41 | # Read in the raw parquet files from SQL queries
 42 | df_sp500 = pd.read_parquet(f_splist)
 43 | df_names = pd.read_parquet(f_crsp_names)
 44 | df_msf2 = pd.read_parquet(f_msf_data)
 45 | df_short = pd.read_parquet(f_short)
 46 | 
 47 | # Match the names file against the S&P list and expand to quarters
 48 | df_names2 = expand_names(df_names, df_sp500)
 49 | df_names2.to_parquet(f_names_expanded, compression='brotli')
 50 | 
 51 | # Do Compustat (Fundamentals, Bus Segments, etc.)
 52 | # make sure that fundamentals data is unique permno-quarter
 53 | cusip_list = make_cusip_list(df_names)
 54 | df_fund = construct_fundamentals(pd.read_parquet(f_fundamentals), df_names2)
 55 | df_bus = construct_bus_segments(pd.read_parquet(f_segments), df_sp500)
 56 | df_fund2 = pd.merge(df_fund, df_bus, on=['permno', 'quarter'], how='outer')
 57 | df_fund2.to_parquet(f_comp_info, compression='brotli')
 58 | 
 59 | # List of S&P permo,cusip,quarters
 60 | sp_df = get_sp_quarters(df_sp500, cusip_list)
 61 | 
 62 | 
 63 | # ### Merge and  Drops ~ 5m
 64 | # - Merge: Permno information from CRSP names file to 13-F filings
 65 | # - Drop: Non S&P 500 component filings from 13-f's
 66 | # - Fix: Adjust Blackrock dates because of known reporting issue (see https://wrds-www.wharton.upenn.edu/pages/support/research-wrds/research-guides/research-note-regarding-thomson-reuters-ownership-data-issues/)
 67 | # - Merge: stock split information from MSF file (cfacshr) (https://wrds-support.wharton.upenn.edu/hc/en-us/articles/115003101112-Adjusting-Splits-Using-CRSP-Data)
 68 | # - Fix: Select a single Filing Date (Fdate) for each Rdate.
 69 | #     - 24,432,318 Obs have single observation
 70 | #     -  2,608,149 Obs have multiple filings with same shares (different prices)
 71 | #     - 84,159 Obs have a known share split: take the first filing (before share split)
 72 | #     - 44,874 Obs have no known share split: take the last filing (assume these are corrections)
 73 | # - Merge and Consolidate: Managers using consolidation file (Blackrock, Inc --> Blackrock, etc.)
 74 | # - Calculate $\beta_{fs}$ for each quarter in LONG format.
 75 | # - Add possible drops:  by permno (dual class shares, ADR's,etc.), share class (ADR's, REITs,etc.)
 76 | 
 77 | # Process Thomson-Reuters $\beta$
 78 | # this needs about 20 GB of RAM
 79 | # 1. Apply fixes and merges described above
 80 | 
 81 | s34_data = filter_s34(read_s34(f_raw_s34), sp_df)
 82 | main_df = consolidate_mgrs(
 83 |     dedup_s34(
 84 |         add_stock_splits(
 85 |             s34_data,
 86 |             df_msf2)),
 87 |     f_mgr_consolidations)
 88 | df1 = compute_betas(main_df, df_msf2)
 89 | df1 = add_drops(df1, f_permno_drops, df_names2)
 90 | df1.to_parquet(f_betas_unfiltered, compression='brotli')
 91 | 
 92 | # Process Scraped 13F's ~3min
 93 | # 1. Append it to the existing dataset
 94 | # 2. Add the drops
 95 | 
 96 | dfs = process_scraped(f_scrape, f_big4)
 97 | dfs = add_drops(dfs, f_permno_drops, df_names2)
 98 | dfs.to_parquet(f_betas_scraped, compression='brotli')
 99 | 
100 | # Combine Both Sets of $\beta$s
101 | # - Use TR data before 2001
102 | # - Use scraped data after 2001
103 | # - Save the combined FrankenBeta file
104 | 
105 | # use TR before cut-date and scraped data after
106 | df = combine_betas(df1, dfs, cut_date='2000-01-01')
107 | df.to_parquet(f_frankenbetas, compression='brotli')
108 | 
109 | # Checks
110 | # 1. Tabulate: Missing Shares Outstanding (TR), Missing Price Info (TR), Duplicate Observations within an Fdate/Rdate and Permno, Manager
111 | # 2. Tabulate: 18 cases where firm exist in S&P500 but not in names file (yet).
112 | # 3. 1057 Observations (Firm-Quarter) in S&P500 but not in S34 Data (959 after 2010).
113 | # 4. 924 Observations with multiple CUSIPS in same period for same firm
114 | # (these are filings with typos, weird share classes, etc.)
115 | 
116 | print(checks_dir)
117 | # Define the Checks
118 | f_notin_crsp = checks_dir / 'compustat-notin-crsp.xlsx'
119 | f_shares_out = checks_dir / 's34-no-shares.xlsx'
120 | f_prc_zero = checks_dir / 's34-zero-price.xlsx'
121 | f_duplicates = checks_dir / 's34_duplicate_permno.xlsx'
122 | f_names_missing = checks_dir / 'unmatched-names-splist.xlsx'
123 | f_s34_coverage = checks_dir / 'coverage_s34.xlsx'
124 | f_multiple_cusips = checks_dir / 'multiple_cusips.xlsx'
125 | f_multiple_cusips_summary = checks_dir / 'multiple_cusips_summary.xlsx'
126 | f_missing_betas = checks_dir / 'missing_betas.xlsx'
127 | f_missing_atq = checks_dir / 'missing_atq.xlsx'
128 | f_missing_segments = checks_dir / 'missing_segments.xlsx'
129 | f_bigbeta_1 = checks_dir / 'big_betas_tr.xlsx'
130 | f_bigbeta_2 = checks_dir / 'big_betas_scrape.xlsx'
131 | 
132 | 
133 | # Run the Checks
134 | check_s34(s34_data, f_shares_out, f_prc_zero, f_duplicates)
135 | check_names(df_sp500, df_names, f_names_missing)
136 | check_s34_coverage(df1, df_sp500, df_names, f_s34_coverage)
137 | check_multiple_cusip(s34_data, f_multiple_cusips, f_multiple_cusips_summary)
138 | check_bigbeta(df1, f_bigbeta_1)
139 | check_bigbeta(dfs, f_bigbeta_2)
140 | check_fundamental_coverage(
141 |     df,
142 |     df_fund2,
143 |     df_names2,
144 |     f_missing_betas,
145 |     f_missing_atq,
146 |     f_missing_segments)
147 | 


--------------------------------------------------------------------------------
/code/3_Calculate_Kappas.py:
--------------------------------------------------------------------------------
 1 | from our_plot_config import derived_dir, raw_dir
 2 | import pandas as pd
 3 | 
 4 | from kappas import process_beta, beta_to_kappa, calc_chhis, fix_scrape_cols
 5 | from investors import compute_investor_info, calc_big4, do_one_firm_similarity
 6 | from firminfo import regression_merge, firm_info_merge, kappa_in_out
 7 | 
 8 | from utilities.quantiles import weighted_quantile
 9 | 
10 | # Inputs
11 | # Betas
12 | f_betas = derived_dir / '13f_sp500_frankenbeta.parquet'
13 | f_betas_tr = derived_dir / '13f_sp500_unfiltered.parquet'
14 | f_betas_sc = derived_dir / '13f_scraped.parquet'
15 | 
16 | # Other inputs
17 | f_names_expanded = derived_dir / 'expanded_names.parquet'
18 | f_comp_info = derived_dir / 'compustat_info.parquet'
19 | f_big4 = raw_dir / 'big4.csv'
20 | 
21 | # Outputs
22 | # main outputs (kappas)
23 | f_kappas = derived_dir / 'official-kappas.parquet'
24 | f_kappas_tr = derived_dir / 'appendix_kappa_tr.parquet'
25 | f_kappas_scrape = derived_dir / 'appendix_kappa_scrape.parquet'
26 | f_kappas_combined = derived_dir / 'appendix_kappa_combined.parquet'
27 | 
28 | # Firm and Investor Output
29 | f_investor_info = derived_dir / 'investor-info.parquet'
30 | f_firm_info = derived_dir / 'firm-info.parquet'
31 | f_regression = derived_dir / 'regression_data.parquet'
32 | 
33 | # Calculate $\kappa$ for combined $\beta$ (Frankenstein version)
34 | # - Apply the $\kappa$ calculations period by period
35 | # - This includes (L2, L1, Sole/Shared, and various options for gamma)
36 | # - Save the output to a new parquet file
37 | df = process_beta(f_betas)
38 | df_kappa = beta_to_kappa(df)
39 | df_kappa.to_parquet(f_kappas, compression='brotli')
40 | 
41 | # Calculate alternate Kappas (these are for Appendix)
42 | # - Apply $\kappa$ calculations period by period
43 | # - Do this for the pure TR data and pure scrape data
44 | total_dft = beta_to_kappa(process_beta(f_betas_tr))
45 | total_dfs = beta_to_kappa(process_beta(f_betas_sc))
46 | final_df = pd.merge(total_dft, fix_scrape_cols(total_dfs),
47 |                     on=['from', 'to', 'quarter'], how='outer')
48 | 
49 | total_dft.to_parquet(f_kappas_tr, compression='brotli')
50 | total_dfs.to_parquet(f_kappas_scrape, compression='brotli')
51 | final_df.to_parquet(f_kappas_combined, compression='brotli')
52 | 
53 | # save some memory
54 | del total_dft, total_dfs, final_df
55 | 
56 | # Investor Info: How indexed is each manager? (including big4 information)
57 | df_investor = compute_investor_info(df, f_big4)
58 | df_investor.to_parquet(f_investor_info, compression='brotli')
59 | 
60 | # Do the Firm-Level Descriptives
61 | # - Build the fundamentals, names, and business segments for all S&P entries
62 | # - Compute the firm level similarity measure
63 | # - Compute CHHI, IHHI from Betas
64 | # - Combine everything in the firm (permno-quarter) info file
65 | # - Write the file for regressions (merged firm info and kappa)
66 | 
67 | df_fund2 = pd.read_parquet(f_comp_info)
68 | df_names2 = pd.read_parquet(f_names_expanded)
69 | firm_similarity = df.groupby(['quarter']).apply(
70 |     do_one_firm_similarity).reset_index(drop=True)
71 | big4 = calc_big4(df, pd.read_csv(f_big4))
72 | chhi = calc_chhis(df)
73 | 
74 | df_firm2 = firm_info_merge(df_names2, df_fund2, firm_similarity, big4, chhi)
75 | df_firm2.to_parquet(f_firm_info, compression='brotli')
76 | 
77 | df_reg = regression_merge(df_kappa, df_firm2)
78 | df_reg.to_parquet(f_regression, compression='brotli')
79 | 
80 | # add in-bound and outbound kappa --this isn't in final draft of paper
81 | # df_firm3=kappa_in_out(df_kappa,df_firm2)
82 | 


--------------------------------------------------------------------------------
/code/firminfo.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | def regression_merge(df_kappas, df_firm):
 5 |     firm_cols = ['permno', 'quarter', 'saleq', 'cogsq', 'normalized_l2',
 6 |                  'retail_share', 'market_cap', 'beta_BlackRock', 'beta_Vanguard', 'beta_StateStreet']
 7 |     keep_cols = ['from', 'to', 'quarter', 'kappa', 'cosine', 'retail_share', 'market_cap', 'marginsq', 'saleq', 'cogsq', 'normalized_l2',
 8 |                  'big3', 'beta_BlackRock', 'beta_Vanguard', 'beta_StateStreet']
 9 | 
10 |     # Read things in and Merge
11 |     df = pd.merge(
12 |         df_kappas.loc[(df_kappas['from'] != df_kappas['to']) & (
13 |             df_kappas['quarter'] <= '2017-10-01'), ['from', 'to', 'kappa', 'quarter', 'cosine']],
14 |         df_firm[firm_cols], left_on=['from', 'quarter'], right_on=['permno', 'quarter'], how='left'
15 |     ).reset_index(drop=True)
16 | 
17 |     # Calculate derived columns
18 |     df['big3'] = df['beta_BlackRock'] + \
19 |         df['beta_Vanguard'] + df['beta_StateStreet']
20 |     df['marginsq'] = (df['saleq'] - df['cogsq']) / df['saleq']
21 |     return df[keep_cols]
22 | 
23 | # merge it all together
24 | 
25 | 
26 | def firm_info_merge(df_names2, df_fund2, firm_similarity, big4, chhi):
27 |     df_firm2 = pd.merge(pd.merge(pd.merge(pd.merge(
28 |         df_names2, df_fund2, on=['permno', 'quarter'], how='inner'),
29 |         firm_similarity, on=['permno', 'quarter'], how='left'),
30 |         big4, on=['permno', 'quarter'], how='left'),
31 |         chhi, on=['permno', 'quarter'], how='left'
32 |     )
33 |     df_firm2['market_cap'] = df_firm2['shares_outstanding'] * df_firm2['price']
34 |     df_firm2[['beta_BlackRock',
35 |               'beta_Vanguard',
36 |               'beta_StateStreet',
37 |               'beta_Fidelity']] = df_firm2[['beta_BlackRock',
38 |                                             'beta_Vanguard',
39 |                                             'beta_StateStreet',
40 |                                             'beta_Fidelity']].fillna(0)
41 |     return df_firm2[(df_firm2.quarter >= '1980-01-01') &
42 |                     (df_firm2.quarter <= '2017-10-01')].drop_duplicates()
43 | 
44 | 
45 | # This block is for incoming and outgoing kappa
46 | # note: not sure this made it into the paper (keep the code anyway)
47 | def weighted_from(df):
48 |     a1 = np.ma.average(df['kappa'].values, weights=df['saleq_x'].values)
49 |     return pd.Series({'kappa_in': a1})
50 | 
51 | 
52 | def weighted_to(df):
53 |     a1 = np.ma.average(df['kappa'].values, weights=df['saleq_y'].values)
54 |     return pd.Series({'kappa_out': a1})
55 | 
56 | 
57 | def kappa_in_out(df, df_firm):
58 |     dfk = df.loc[df['from'] != df['to'], ['from', 'to', 'quarter', 'kappa']]
59 |     tmp = pd.merge(pd.merge(dfk,
60 |                             df_firm[['permno', 'quarter', 'saleq']], left_on=['from', 'quarter'], right_on=['permno', 'quarter']),
61 |                    df_firm[['permno', 'quarter', 'saleq']], left_on=['to', 'quarter'], right_on=['permno', 'quarter']
62 |                    ).fillna(0)
63 | 
64 |     g1 = tmp.groupby(['quarter', 'to']).apply(weighted_from)
65 |     g2 = tmp.groupby(['quarter', 'from']).apply(weighted_to)
66 | 
67 |     return pd.merge(pd.merge(df_firm,
68 |                              g1, left_on=['quarter', 'permno'], right_on=['quarter', 'to'], how='left'),
69 |                     g2, left_on=['quarter', 'permno'], right_on=['quarter', 'from'], how='left')
70 | 


--------------------------------------------------------------------------------
/code/investors.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | from utilities.matlab_util import matlab_sparse
 4 | from sklearn.metrics.pairwise import cosine_similarity, manhattan_distances
 5 | 
 6 | 
 7 | def compute_investor_info(df, f_big4):
 8 |     tmp = df.groupby(['quarter']).apply(
 9 |         do_one_investor_similarity).reset_index(drop=True)
10 |     return pd.merge(tmp, pd.read_csv(f_big4), how='left', on=['mgrno'])
11 | 
12 | 
13 | def calc_big4(df, big4):
14 |     df2 = pd.merge(df, big4, on=['mgrno'], how='inner').groupby(
15 |         ['quarter', 'permno', 'InvestorName'])['beta'].sum().unstack()
16 |     df2.columns = [
17 |         'beta_BlackRock',
18 |         'beta_Fidelity',
19 |         'beta_StateStreet',
20 |         'beta_Vanguard']
21 |     return df2[['beta_BlackRock', 'beta_Vanguard',
22 |                 'beta_StateStreet', 'beta_Fidelity']].fillna(0)
23 | 
24 | 
25 | def investor_helper(betas):
26 |     # weights for market porfolio
27 |     mkt = betas.sum(axis=0) / betas.sum()
28 |     # "AUM" weights to aggregate market portfolio
29 |     x = betas.sum(axis=1)
30 |     aum = x / x.sum()
31 |     nbetas = betas / x[:, None]
32 | 
33 |     # distance to AUM weighted market portfolio
34 |     l2 = cosine_similarity(X=betas, Y=np.expand_dims(mkt, axis=0)).flatten()
35 |     l1 = 1 - manhattan_distances(X=nbetas,
36 |                                  Y=np.expand_dims(mkt,
37 |                                                   axis=0),
38 |                                  sum_over_features=True).flatten() / 2
39 |     return(aum, l2, l1)
40 | 
41 | 
42 | def do_one_investor_similarity(df):
43 |     [betas, mgr_keys, permno_keys] = matlab_sparse(
44 |         df.mgrno, df.permno, df.beta)
45 |     # Market portfolio weights
46 |     (aum, l2, l1) = investor_helper(betas)
47 |     out_df = pd.DataFrame({'mgrno': mgr_keys.astype(int),
48 |                            'aum_weight': aum,
49 |                            'l2_similarity': l2,
50 |                            'l1_similarity': l1,
51 |                            'cov_aum_l1': np.cov(l1,
52 |                                                 aum)[1][0]})
53 |     out_df['quarter'] = df.quarter.iloc[0]
54 |     return out_df
55 | 
56 | 
57 | def do_one_firm_similarity(df):
58 |     [betas, mgr_keys, permno_keys] = matlab_sparse(
59 |         df.mgrno, df.permno, df.beta)
60 |     (aum, l2, l1) = investor_helper(betas)
61 | 
62 |     norm_l2 = y = (l2 @ (betas / betas.sum(0)))
63 |     norm_l1 = y = (l1 @ (betas / betas.sum(0)))
64 |     nonnorm_l2 = y = (l2 @ betas)
65 |     nonnorm_l1 = y = (l1 @ betas)
66 | 
67 |     out_df = pd.DataFrame({'permno': permno_keys.astype(int),
68 |                            'normalized_l1': norm_l1,
69 |                            'nonnormalized_l1': nonnorm_l1,
70 |                            'normalized_l2': norm_l2,
71 |                            'nonnormalized_l2': nonnorm_l2})
72 |     out_df['quarter'] = df.quarter.iloc[0]
73 |     return out_df
74 | 


--------------------------------------------------------------------------------
/code/kappas.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | from utilities.matlab_util import matlab_sparse
  4 | from sklearn.metrics.pairwise import cosine_similarity, manhattan_distances
  5 | 
  6 | 
  7 | def fix_scrape_cols(df):
  8 |     # fix the names in the scraped data
  9 |     df = df.set_index(['from', 'to', 'quarter'])
 10 |     df.columns = ['s' + x for x in df.columns]
 11 |     return df.reset_index()
 12 | 
 13 | 
 14 | def process_beta(fn):
 15 |     df = pd.read_parquet(fn)
 16 |     df['mgrno'] = df['mgrno'].astype(int)
 17 |     return df[(df.permno_drop == False) & (
 18 |         df.sharecode_drop == False) & (df.beta < 0.5)]
 19 | 
 20 | # This is the main function
 21 | def beta_to_kappa(df):
 22 |     df = df[(df.quarter >= '1980-01-01')]
 23 | 
 24 |     df.loc[df.price < 0, 'price'] = 0
 25 |     df['mkt_cap'] = df['shares_outstanding'] * df['price']
 26 |     df_m = df.groupby(['permno', 'quarter'])['mkt_cap'].median()
 27 | 
 28 |     total_df = df.groupby(['quarter']).apply(do_one_period)
 29 |     total_df3 =  df[(df.quarter >= '1999-01-01')].groupby(['quarter']).apply(do_one_robustness)
 30 | 
 31 |     # merge and clean up missings
 32 |     total_df = pd.merge(
 33 |         total_df, total_df3, on=[
 34 |             'quarter', 'from', 'to'], how='left')
 35 |     total_df[['kappa',
 36 |               'kappa_CLWY',
 37 |               'kappa_pow2',
 38 |               'kappa_pow3',
 39 |               'kappa_sqrt',
 40 |               'cosine',
 41 |               'kappa_sole',
 42 |               'kappa_soleshared']] = total_df[['kappa',
 43 |                                                'kappa_CLWY',
 44 |                                                'kappa_pow2',
 45 |                                                'kappa_pow3',
 46 |                                                'kappa_sqrt',
 47 |                                                'cosine',
 48 |                                                'kappa_sole',
 49 |                                                'kappa_soleshared']].fillna(0)
 50 | 
 51 |     # Add the market cap
 52 |     total_df = pd.merge(pd.merge(total_df,
 53 |                         df_m, left_on=['from', 'quarter'], right_on=['permno', 'quarter']),
 54 |                         df_m, left_on=['to', 'quarter'], right_on=['permno', 'quarter']
 55 |                         ).rename(columns={'mkt_cap_x': 'mkt_cap_from', 'mkt_cap_y': 'mkt_cap_to'}).reset_index()
 56 | 
 57 |     return total_df
 58 | 
 59 | 
 60 | def do_one_robustness(df):
 61 |     [betas_soleshared, mgr_keys, permno_keys] = matlab_sparse(
 62 |         df.mgrno, df.permno, df.beta_soleshared, compress=False)
 63 |     [betas_sole, mgr_keys, permno_keys] = matlab_sparse(
 64 |         df.mgrno, df.permno, df.beta_sole, compress=False)
 65 | 
 66 |     [betas, mgr_keys, permno_keys] = matlab_sparse(
 67 |         df.mgrno, df.permno, df.beta, compress=False)
 68 | 
 69 |     kappa_sole = raw_kappa(betas, betas_sole)
 70 |     kappa_soleshared = raw_kappa(betas, betas_soleshared)
 71 |     kappa_all = raw_kappa(betas, betas)
 72 |     # kappa_drop=raw_kappa(betas_drop,betas_drop)
 73 | 
 74 |     idx = kappa_all.nonzero()
 75 |     return pd.DataFrame({'from': permno_keys[idx[0]], 'to': permno_keys[idx[1]], 'kappa_all': kappa_all[idx].flatten(),
 76 |                            'kappa_sole': kappa_sole[idx].flatten(), 'kappa_soleshared': kappa_soleshared[idx].flatten()})
 77 | 
 78 | 
 79 | def beta_to_kappa_merger_breakup(df):
 80 |     return df.groupby(['quarter']).apply(do_one_merger_breakup).reset_index(drop=True)
 81 | 
 82 | 
 83 | def do_one_merger_breakup(df2):
 84 |     # breakup in three blocks
 85 |     blockA = df2.loc[~df2['InvestorName'].isnull(), [
 86 |         'mgrno', 'permno', 'beta']]
 87 |     blockB = df2.loc[df2['InvestorName'].isnull(), ['mgrno', 'permno', 'beta']]
 88 |     blockA.beta = 0.5 * blockA.beta
 89 |     blockC = blockA.copy()
 90 |     blockC.mgrno = -blockC.mgrno
 91 |     df3 = pd.concat([blockA, blockB, blockC], axis=0, ignore_index=True)
 92 | 
 93 |     # first do the regular case
 94 |     [betas, mgr_keys, permno_keys] = matlab_sparse(
 95 |         df2.mgrno, df2.permno, df2.beta)
 96 |     k1 = calc_kappa(betas)
 97 | 
 98 |     # now do the breakup case using the augmented data
 99 |     [betas_b, mgr_keys_b, permno_keys_b] = matlab_sparse(
100 |         df3.mgrno, df3.permno, df3.beta)
101 |     k2 = calc_kappa(betas_b)
102 | 
103 |     df4 = df2.groupby(['mgrno_merger', 'permno']).sum().reset_index()
104 |     # finally do the merger using the merger mgrno's instead of the real ones
105 |     [betas_m, mgr_keys_m, permno_keys_m] = matlab_sparse(
106 |         df4.mgrno_merger, df4.permno, df4.beta)
107 |     k3 = calc_kappa(betas_m)
108 | 
109 |     # Ignore BlackRock+Vanguard
110 |     df4 = df2[~(df2['InvestorName'].isin(['BlackRock', 'Vanguard']))]
111 |     [betas_drop, mgr_keys_drop, permno_keys_drop] = matlab_sparse(
112 |         df4.mgrno, df4.permno, df4.beta, compress=False)
113 |     k4 = calc_kappa(betas_drop)
114 | 
115 |     # put it all together and return
116 |     idx = k1.nonzero()
117 |     out_df = pd.DataFrame({'from': permno_keys[idx[0]], 'to': permno_keys[idx[1]], 'kappa': k1[idx].flatten(),
118 |                            'kappa_breakup': k2[idx].flatten(), 'kappa_merger': k3[idx].flatten(), 'kappa_drop': k4[idx].flatten()})
119 |     out_df['quarter'] = df2.quarter.iloc[0]
120 |     return out_df
121 | 
122 | # handler for L2 Measures (Rotemberg Weights, CLWY Weights, etc.)
123 | # input: long dataframe of Manager, Firm, Beta_fs
124 | # Output: long dataframe of Quarter, Firm_from, Firm_to, kappa_fg, ihhi_f,
125 | # ihhi_g, cosine_fg
126 | 
127 | 
128 | def do_one_period(df):
129 |     [betas, mgr_keys, permno_keys] = matlab_sparse(
130 |         df.mgrno, df.permno, df.beta)
131 |     kappa = calc_kappa(betas)
132 |     kappa2 = calc_kappa(betas, 2)
133 |     kappa3 = calc_kappa(betas, 3)
134 |     kappa4 = calc_kappa(betas, 0.5)
135 |     kappa5 = calc_kappa(betas, 'CLWY')
136 |     cosine = cosine_similarity(betas.transpose())
137 |     # this is a bit slow
138 |     l1_measure = calc_l1_measure(betas)
139 | 
140 |     idx = kappa.nonzero()
141 |     return pd.DataFrame({'from': permno_keys[idx[0]], 'to': permno_keys[idx[1]], 'kappa': kappa[idx].flatten(),
142 |                            'kappa_pow2': kappa2[idx].flatten(), 'kappa_pow3': kappa3[idx].flatten(), 'kappa_sqrt': kappa4[idx].flatten(),
143 |                            'kappa_CLWY': kappa5[idx].flatten(), 'cosine': cosine[idx].flatten(), 'l1_measure': l1_measure[idx].flatten()})
144 | 
145 | # This does the work for L1 measure
146 | # Input beta: S x F matrix
147 | # Output L1: F x F matrix
148 | # Subtract beta_f from each column of beta and sum of absolute deviations,
149 | # stack for L1.
150 | def calc_l1_measure(betas):
151 |     y = manhattan_distances(betas.transpose())
152 |     tot = betas.sum(axis=0)
153 |     return (-y + tot[np.newaxis, :] + tot[:, np.newaxis]) / 2
154 | 
155 | # Calculate Summary Stats of Control Weights
156 | # Compute Convex Power Gamma:
157 | # CHHI: Control HHI
158 | # IHHI: Investor HHI
159 | # Retail Share
160 | #
161 | # This is the main function that takes a DF of betas and calculates all of
162 | # the CHHI measures
163 | 
164 | def calc_chhis(df):
165 |     # apply to multiple groups here
166 |     df['inv_total'] = df.groupby(['mgrno', 'quarter'])['beta'].transform(sum)
167 |     y = df[['permno', 'quarter', 'beta', 'inv_total']].groupby(
168 |         ['permno', 'quarter']).apply(agg_chhi)
169 |     x = df.groupby(['permno', 'quarter']).agg(
170 |         {'shares_outstanding': np.max, 'price': np.median})
171 |     return pd.merge(x, y, left_index=True, right_index=True, how='outer')
172 | 
173 | # this is unitary function that takes in a vector Beta_f that is S x 1
174 | def chhi(beta, power):
175 |     gamma = (beta**power)
176 |     # scalar adjustment factor
177 |     adj = 10000 * ((beta.sum() / gamma.sum())**2)
178 |     return (gamma**2).sum() * adj
179 | 
180 | # This calculates all of the CHHI measures and returns a (horizontal) series
181 | def agg_chhi(x):
182 |     out = [chhi(x['beta'], a) for a in [0.5, 1, 2, 3, 4]]
183 |     tmp = x['beta'] / x['inv_total']
184 |     clwy = chhi(tmp, 1)
185 |     clwy_alt = 10000 * (tmp**2).sum()
186 | 
187 |     names = {
188 |         'retail_share': 1 - x['beta'].sum(),
189 |         'chhi_05': out[0],
190 |         'ihhi': out[1],
191 |         'chhi_2': out[2],
192 |         'chhi_3': out[3],
193 |         'chhi_4': out[4],
194 |         'chhi_clwy': clwy,
195 |         'chhi_clwy2': clwy_alt
196 |     }
197 |     return pd.Series(names, index=['retail_share', 'ihhi', 'chhi_05',
198 |                                    'chhi_2', 'chhi_3', 'chhi_4', 'chhi_clwy', 'chhi_clwy2'])
199 | 
200 | 
201 | # This calculates profit weights
202 | #
203 | # Input beta: S x F matrix
204 | # Output kappa: F x F matrix
205 | # Options: Gamma 'CLWY', 'default' (Rotemberg), numeric: convexity
206 | # parameter "a" for gamma=beta^a
207 | def calc_kappa(betas, gamma_type='default'):
208 |     # CLWY normalize the gammas
209 |     if gamma_type == 'CLWY':
210 |         gamma = betas / np.maximum(betas.sum(axis=1), 1e-10)[:, None]
211 |     elif isinstance(gamma_type, (int, float)):
212 |         if gamma_type > 0:
213 |             tmp = betas**(gamma_type)
214 |             gamma = tmp  # *(betas.sum(axis=0)/tmp.sum(axis=0))
215 |         else:
216 |             print("Must provide Positive Parameter")
217 |     # proportional control: do we normalize to sum to one?
218 |     else:
219 |         gamma = betas  # /betas.sum(axis=0)
220 | 
221 |     return raw_kappa(betas, gamma)
222 | 
223 | 
224 | # This is the ratio of inner products for kappas
225 | def raw_kappa(betas, gamma):
226 |     # F x F matrix
227 |     numer = gamma.T @ betas
228 |     # F x 1 vector
229 |     denom = np.diag(numer)
230 |     # this is a F x F  matirx
231 |     return numer / denom[:, None]
232 | 


--------------------------------------------------------------------------------
/code/our_plot_config.py:
--------------------------------------------------------------------------------
 1 | # For files and paths
 2 | import pathlib
 3 | import os
 4 | 
 5 | 
 6 | # File Directories
 7 | # cc modified to parent
 8 | proj_dir = pathlib.Path.cwd().parent
 9 | data_dir = proj_dir / 'data'
10 | raw_dir = data_dir / 'public'
11 | wrds_dir = data_dir / 'wrds'
12 | checks_dir = data_dir / 'checks'
13 | derived_dir = data_dir / 'derived'
14 | 
15 | fig_dir = proj_dir / 'figures'
16 | tab_dir = proj_dir / 'tables'
17 | 
18 | 
19 | # For plotting
20 | #import matplotlib
21 | #import matplotlib.pyplot as plt
22 | #from cycler import cycler
23 | #import seaborn as sns
24 | 
25 | # Plot Configuration
26 | def setplotstyle():
27 |     from cycler import cycler
28 |     import seaborn as sns
29 |     import matplotlib
30 |     import matplotlib.pyplot as plt
31 |     matplotlib.style.use('seaborn-whitegrid')
32 | 
33 |     matplotlib.rcParams.update({'font.size': 24})
34 |     plt.rc('font', size=24)          # controls default text sizes
35 |     plt.rc('axes', titlesize=24)     # fontsize of the axes title
36 |     plt.rc('axes', labelsize=24)    # fontsize of the x and y labels
37 |     plt.rc('xtick', labelsize=24)    # fontsize of the tick labels
38 |     plt.rc('ytick', labelsize=24)    # fontsize of the tick labels
39 |     plt.rc('legend', fontsize=24)    # legend fontsize
40 |     plt.rc('figure', titlesize=24)
41 |     plt.rc(
42 |         'axes',
43 |         prop_cycle=cycler(
44 |             color=[
45 |                 '#252525',
46 |                 '#636363',
47 |                 '#969696',
48 |                 '#bdbdbd']) *
49 |         cycler(
50 |             linestyle=[
51 |                 '-',
52 |                 ':',
53 |                 '--',
54 |                 '-.']))
55 |     plt.rc('lines', linewidth=3)
56 | 


--------------------------------------------------------------------------------
/code/plots10_kappa_comparison_appendix.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | from utilities.matlab_util import coalesce
 3 | import pandas as pd
 4 | import numpy as np
 5 | import pathlib
 6 | 
 7 | import matplotlib
 8 | import matplotlib.pyplot as plt
 9 | from our_plot_config import derived_dir, fig_dir, setplotstyle
10 | 
11 | setplotstyle()
12 | 
13 | # %%
14 | 
15 | 
16 | # Input file
17 | f_kappas = derived_dir / 'appendix_kappa_combined.parquet'
18 | f_firms = derived_dir / 'firm-information.parquet'
19 | 
20 | # Figures
21 | f_profitweights_comp1 = fig_dir / 'appfigure_a3.pdf'
22 | 
23 | # %%
24 | # ### Read in the (Cleaned) Parquet File
25 | # - Apply the $\kappa$ calculations period by period
26 | # - Save the output to a new parquet file
27 | 
28 | total_df = pd.read_parquet(f_kappas)
29 | total_df['tunnel'] = (total_df['skappa'].combine_first(total_df['kappa']) > 1)
30 | total_df = total_df[total_df['from'] != total_df['to']]
31 | qtr_mean = total_df.groupby(['quarter']).mean()
32 | 
33 | qtr_mean = total_df.groupby(['quarter']).mean()
34 | 
35 | qtr_mean = qtr_mean[qtr_mean.index < '2019-01-01']
36 | 
37 | # %%
38 | 
39 | 
40 | col_list = [
41 |     'l1_measure',
42 |     'kappa',
43 |     'kappa_pow2',
44 |     'kappa_pow3',
45 |     'kappa_sqrt',
46 |     'kappa_CLWY']
47 | qtr_mean = coalesce(qtr_mean, col_list, 's', method='left')
48 | 
49 | # %%
50 | 
51 | # ## Make the plots
52 | # ### Comparisons
53 | #  - Compare TR Data (Solid) and Scraped 13-F Data (Dashed)
54 | 
55 | 
56 | qtr_mean[['kappa', 'skappa']].plot(
57 |     figsize=(20, 10), style=['-', '--'], color=['navy', 'maroon'])
58 | plt.xlabel("")
59 | plt.ylabel(r"$\kappa$ weight")
60 | plt.legend([r'TR Data', 'Scraped Data', ])
61 | plt.ylim(0, 1.2)
62 | plt.savefig(f_profitweights_comp1, bbox_inches='tight')
63 | 


--------------------------------------------------------------------------------
/code/plots11_profit_simulations.py:
--------------------------------------------------------------------------------
  1 | # %%
  2 | import pandas as pd
  3 | import numpy as np
  4 | import pathlib
  5 | import pyblp
  6 | import matplotlib
  7 | import matplotlib.pyplot as plt
  8 | 
  9 | from our_plot_config import derived_dir, fig_dir, raw_dir, setplotstyle
 10 | 
 11 | setplotstyle()
 12 | 
 13 | 
 14 | pyblp.options.collinear_atol = pyblp.options.collinear_rtol = 0
 15 | pyblp.options.verbose = False
 16 | 
 17 | # jan and jan markups input
 18 | f_jj_markups = raw_dir / 'DLE_markups_fig_v2.csv'
 19 | 
 20 | # temp input
 21 | f_quarter_mean = derived_dir / 'tmp-quarter-mean.pickle'
 22 | f_markup_out = derived_dir / 'markup-simulations.csv'
 23 | 
 24 | fig_markups = fig_dir / 'macro-simulated-markups.pdf'
 25 | fig_markups_jj = fig_dir / 'figure10_markups.pdf'
 26 | fig_profits = fig_dir / 'figure11_profits.pdf'
 27 | 
 28 | 
 29 | def combine64(years, months=1, days=1, weeks=None, hours=None, minutes=None,
 30 |               seconds=None, milliseconds=None, microseconds=None, nanoseconds=None):
 31 |     years = np.asarray(years) - 1970
 32 |     months = np.asarray(months) - 1
 33 |     days = np.asarray(days) - 1
 34 |     types = ('<M8[Y]', '<m8[M]', '<m8[D]', '<m8[W]', '<m8[h]',
 35 |              '<m8[m]', '<m8[s]', '<m8[ms]', '<m8[us]', '<m8[ns]')
 36 |     vals = (years, months, days, weeks, hours, minutes, seconds,
 37 |             milliseconds, microseconds, nanoseconds)
 38 |     return sum(np.asarray(v, dtype=t) for t, v in zip(types, vals)
 39 |                if v is not None)
 40 | 
 41 | # %%
 42 | # change demand parameters and number of firms
 43 | # first param (intercept -- outside good share)
 44 | # secon param (alpha -- price coefficient/elasticity)
 45 | # third param ALWAYS ZERO
 46 | 
 47 | 
 48 | betas = [25, -6.8, 0]
 49 | n_firms = 8
 50 | 
 51 | # Don't change MC params
 52 | gammas = [1, 0]
 53 | 
 54 | 
 55 | def run_simulation(n_firms, betas, gammas, kappa=0, maverick=False):
 56 |     config_data = pyblp.build_id_data(T=1, J=n_firms, F=n_firms)
 57 |     mutable_id_data = {k: config_data[k] for k in config_data.dtype.names}
 58 |     mutable_id_data['ownership'] = construct_ownership(
 59 |         n_firms, kappa, maverick)
 60 | 
 61 |     simulation = pyblp.Simulation(product_formulations=(pyblp.Formulation('1 + prices+x1'), None, pyblp.Formulation('1+x1')),
 62 |                                   beta=betas, sigma=None, gamma=gammas, xi_variance=1e-6, omega_variance=1e-6,
 63 |                                   product_data=mutable_id_data, seed=0)
 64 | 
 65 |     # solve the simulation for P+Q
 66 |     prod_data = simulation.replace_endogenous()
 67 | 
 68 |     # Construct a Problem and Solve
 69 |     # Don't estimate a model since we know the answers and only want to do
 70 |     # counterfactuals
 71 |     res = prod_data.to_problem().solve(beta=betas, gamma=gammas, sigma=None,
 72 |                                        optimization=pyblp.Optimization('return'))
 73 | 
 74 |     # Pull the calculated P,Q,Profits,Diversion,etc
 75 |     inside_share = np.sum(prod_data.product_data['shares'])
 76 |     prices = np.mean(prod_data.product_data['prices'])
 77 |     og_diversion = np.mean(np.diag(res.compute_diversion_ratios()))
 78 |     own_elas = np.diag(res.compute_elasticities()).mean()
 79 |     total_pi = res.compute_profits().sum()
 80 |     return (inside_share, prices, og_diversion, own_elas, total_pi)
 81 | 
 82 | 
 83 | def construct_ownership(nfirms, kappa=0, maverick=False):
 84 |     O = np.ones((n_firms, n_firms)) * kappa
 85 |     if maverick:
 86 |         O[0, :] = 0
 87 |         O[:, 0] = 0
 88 |     np.fill_diagonal(O, 1)
 89 |     return O
 90 | 
 91 | 
 92 | def run_several(kappa_list, maverick=False):
 93 |     mylist = [
 94 |         run_simulation(
 95 |             n_firms,
 96 |             betas,
 97 |             gammas,
 98 |             k,
 99 |             maverick) for k in kappa_list]
100 |     df_out = pd.DataFrame(mylist)
101 |     df_out.columns = [
102 |         'inside_share',
103 |         'prices',
104 |         'og_diversion',
105 |         'own_elas',
106 |         'total_pi']
107 |     df_out.index = df.index
108 |     return df_out
109 | 
110 | 
111 | # %%
112 | (inside_share, prices, og_diversion, own_elas, pi_1) = run_simulation(
113 |     n_firms, betas, gammas, kappa=0.21)
114 | (inside_share2, prices2, og_diversion2, own_elas2,
115 |  pi_2) = run_simulation(n_firms, betas, gammas, kappa=0.7)
116 | (base_share, base_prices, base_og, base_elas,
117 |  pi_base) = run_simulation(n_firms, betas, gammas, kappa=0)
118 | 
119 | print("\n\nZero Kappa (Bertrand) \n")
120 | print("Inside Share:", base_share)
121 | print("OG Diversion:", base_og)
122 | print("Own Elas:", base_elas)
123 | print("Markups:", base_prices)
124 | print("Profits:", pi_base)
125 | 
126 | print("\n\n1980 KAPPA\n")
127 | print("Inside Share:", inside_share)
128 | print("OG Diversion:", og_diversion)
129 | print("Own Elas:", own_elas)
130 | # Price is Markup * Cost and cost ==1
131 | print("Markups:", prices)
132 | print("Profits:", pi_1)
133 | 
134 | print("\n\n2017 KAPPA\n")
135 | print("Inside Share:", inside_share2)
136 | print("OG Diversion:", og_diversion2)
137 | print("Own Elas:", own_elas2)
138 | print("Markups:", prices2)
139 | print("Profits:", pi_2)
140 | 
141 | 
142 | # %%
143 | # Calibrate to our Kappas
144 | df = pd.read_pickle(f_quarter_mean)
145 | df_out = run_several(df.kappa)
146 | df_out2 = run_several(df.kappa, maverick=True)
147 | df_out2.columns = [
148 |     'inside_mav',
149 |     'prices_mav',
150 |     'og_diversion_mav',
151 |     'own_elas_mav',
152 |     'total_pi_mav']
153 | df_out = pd.concat([df_out, df_out2], sort=False, axis=1)
154 | 
155 | # %%
156 | # ## Macro Quantification
157 | # - Read in quarterly average kappa
158 | # - Quantify Common Ownership Channel
159 | # - Plot rising implied prices (Markups)
160 | 
161 | 
162 | df_out[['prices', 'prices_mav']].plot(figsize=(20, 10))
163 | plt.xlabel('')
164 | plt.ylabel('Markup over Cost', size=24)
165 | plt.legend(['Without Maverick', 'With Maverick'])
166 | plt.ylim(1.0, 1.6)
167 | 
168 | plt.savefig(fig_markups, bbox_inches='tight')
169 | 
170 | 
171 | # %%
172 | df_jj = pd.read_csv(
173 |     f_jj_markups,
174 |     low_memory=False,
175 |     names=[
176 |         'year',
177 |         'jj_markup'])
178 | df_jj.year = pd.DatetimeIndex(combine64(df_jj.year))
179 | df_jj = df_jj.set_index('year')
180 | df_jj = df_jj.resample('Q').mean().ffill()
181 | df_jj = df_jj[df_jj.index >= '1980-01-01']
182 | df2 = pd.merge(df_out, df_jj, left_index=True, right_index=True, how='left')
183 | df2.to_csv(f_markup_out)
184 | 
185 | 
186 | # %%
187 | matplotlib.rc('xtick', labelsize=24)
188 | matplotlib.rc('ytick', labelsize=24)
189 | 
190 | df2 = df2[~df2.jj_markup.isnull()].copy()
191 | 
192 | 
193 | df2[['prices', 'jj_markup', 'prices_mav']].plot(
194 |     figsize=(20, 10), color=['black', 'navy', 'maroon'])
195 | plt.legend(['Common Ownership Markups',
196 |             'DeLoecker, Eeckhout, Unger (2020)',
197 |             'Common Ownership (w/ maverick)'],
198 |            prop={'size': 24},
199 |            loc='upper left')
200 | plt.xlabel('')
201 | plt.ylabel('Markup over Cost', size=24)
202 | plt.ylim(1, 1.65)
203 | 
204 | plt.savefig(fig_markups_jj, bbox_inches='tight')
205 | 
206 | # %%
207 | rel_pi = df_out[['total_pi', 'total_pi_mav']] / pi_base
208 | 
209 | matplotlib.rc('xtick', labelsize=24)
210 | matplotlib.rc('ytick', labelsize=24)
211 | 
212 | rel_pi.plot(figsize=(20, 10), color=['navy', 'maroon'])
213 | #plt.legend(['Profits Relative to Symmetric Differentiated Bertrand (HHI=1250)','With Maverick'], prop={'size': 24})
214 | plt.legend(['Without Maverick', 'With Maverick'])
215 | plt.xlabel('')
216 | plt.ylim(1, 3.5)
217 | 
218 | plt.ylabel('Profits vs. Symmetric Bertrand HHI=1250', size=24)
219 | plt.savefig(fig_profits, bbox_inches='tight')
220 | 


--------------------------------------------------------------------------------
/code/plots1_basic_descriptives.py:
--------------------------------------------------------------------------------
  1 | # %%
  2 | import pandas as pd
  3 | import numpy as np
  4 | import pathlib
  5 | 
  6 | import matplotlib
  7 | import matplotlib.pyplot as plt
  8 | 
  9 | from our_plot_config import derived_dir, fig_dir, raw_dir, setplotstyle
 10 | 
 11 | # Call function that sets the plot style
 12 | setplotstyle()
 13 | # %%
 14 | # Input file
 15 | f_betas = derived_dir / '13f_sp500_unfiltered.parquet'
 16 | f_scraped = derived_dir / '13f_scraped.parquet'
 17 | 
 18 | # Figures
 19 | f_numowners = fig_dir / 'appfigure_a1.pdf'
 20 | fig_mgrs = fig_dir / 'figure3_nmgrs.pdf'
 21 | fig_nfirms = fig_dir / 'figure2_nfirms.pdf'
 22 | fig_ownership = fig_dir / 'figure4_inst_share.pdf'
 23 | 
 24 | 
 25 | # ### Read in the (Cleaned) Parquet Files
 26 | # - One for TR $\beta$
 27 | # - One for scraped $\beta$
 28 | 
 29 | # %%
 30 | df = pd.read_parquet(f_betas)
 31 | dfs = pd.read_parquet(f_scraped)
 32 | 
 33 | # %%
 34 | # calculate number of managers overall
 35 | mgrA = df.groupby(['quarter'])['mgrno'].nunique()
 36 | mgrB = dfs.groupby(['quarter'])['mgrno'].nunique()
 37 | mgrs = pd.concat([mgrA, mgrB], axis=1)
 38 | mgrs.columns = ['TR Data', 'Scraped Data']
 39 | 
 40 | # calculate number of managers per firm
 41 | mgr_tots = df.groupby(['quarter', 'permno'])['mgrno'].nunique(
 42 | ).reset_index().groupby(['quarter'])['mgrno'].describe(percentiles=[.1, .5])
 43 | mgr_totsS = dfs.groupby(['quarter', 'permno'])['mgrno'].nunique(
 44 | ).reset_index().groupby(['quarter'])['mgrno'].describe(percentiles=[.1, .5])
 45 | 
 46 | # %%
 47 | # 2 minutes# calculate number of firms
 48 | a = df[(df.sharecode_drop == False) & (df.permno_drop == False)
 49 |        ].groupby(['quarter'])['permno'].nunique()
 50 | b = df.groupby(['quarter'])['permno'].nunique()
 51 | c = dfs[(dfs.sharecode_drop == False) & (dfs.permno_drop == False)
 52 |         ].groupby(['quarter'])['permno'].nunique()
 53 | d = dfs.groupby(['quarter'])['permno'].nunique()
 54 | 
 55 | c = c[c.index > '2001-01-01']
 56 | d = d[d.index > '2001-01-01']
 57 | 
 58 | nfirms = pd.concat([a, b, c, d], axis=1)
 59 | nfirms.columns = [
 60 |     'TR (Restricted)',
 61 |     'TR (Unrestricted)',
 62 |     'Scraped (Restricted)',
 63 |     'Scraped (Unrestricted)']
 64 | 
 65 | # %%
 66 | 
 67 | t1 = dfs[dfs.quarter == '2017-09-30']
 68 | t2 = df[df.quarter == '2017-09-30']
 69 | 
 70 | 
 71 | # %%
 72 | # ### Plot Number of Firms
 73 | # - total for entire dataset ( with and without drops)
 74 | 
 75 | # For matching the figures: truncate at EOY 2018
 76 | # comment this out if you want a full update! (not scraped)
 77 | nfirms = nfirms[nfirms.index < '2019-01-01']
 78 | 
 79 | # Figure 2
 80 | plt.clf()
 81 | ax = nfirms[['TR (Restricted)', 'TR (Unrestricted)']].plot(
 82 |     figsize=(20, 10), color=['navy', 'maroon'])
 83 | #plt.axhline(y=500,color='0.75',linestyle = '--')
 84 | nfirms[['Scraped (Restricted)', 'Scraped (Unrestricted)']
 85 |        ].plot(ax=ax, color=['navy', 'maroon'])
 86 | plt.xlabel("")
 87 | 
 88 | #plt.ylim(top=520, bottom=0)
 89 | plt.ylabel("Number of Firms in Sample")
 90 | plt.ylim(0, 510)
 91 | 
 92 | plt.savefig(fig_nfirms, bbox_inches="tight")
 93 | # ### Plot Number of Managers
 94 | # - total for entire dataset
 95 | # - per firm
 96 | # - Figure 3
 97 | 
 98 | # %%
 99 | mgrs = mgrs[mgrs.index < '2019-01-01']
100 | 
101 | # Figure 3
102 | plt.clf()
103 | ax = mgrs['TR Data'].plot(figsize=(20, 10), color='navy', style='-')
104 | mgrs['Scraped Data'].plot(ax=ax, color='maroon', style='--')
105 | plt.xlabel("")
106 | plt.ylabel("Overall Number of 13f Managers")
107 | plt.legend(['Thomson Reuters Data', 'Scraped 13(f) Data'])
108 | plt.ylim(0, 4100)
109 | plt.savefig(fig_mgrs, bbox_inches="tight")
110 | 
111 | # %%
112 | mgr_tots = mgr_tots[mgr_tots.index < '2019-01-01']
113 | mgr_totsS = mgr_totsS[mgr_totsS.index < '2019-01-01']
114 | 
115 | # Appendix Figure A-1
116 | plt.clf()
117 | fig, ax = plt.subplots(figsize=(20, 10))
118 | mgr_tots[['mean', '50%', '10%', 'min']].plot(
119 |     figsize=(20, 10), ax=ax, style='-', color=['b', 'r', 'y', 'g'])
120 | mgr_totsS[['mean', '50%', '10%', 'min']].plot(
121 |     figsize=(20, 10), ax=ax, style='--', color=['b', 'r', 'y', 'g'])
122 | plt.xlabel("")
123 | plt.ylabel("Number of Owners")
124 | plt.ylim(0, 900)
125 | plt.legend(['Mean (TR)',
126 |             'Median (TR)',
127 |             '10th Percentile (TR)',
128 |             'Min (TR)',
129 |             'Mean (Scrape)',
130 |             'Median (Scrape)',
131 |             '10th Percentile (Scrape)',
132 |             'Min (Scrape)'],
133 |            ncol=2)
134 | plt.savefig(f_numowners, bbox_inches="tight")
135 | 
136 | # ### Percentage Institutional Ownership
137 | 
138 | # %%
139 | # Figure 4
140 | plt.clf()
141 | df = df[df.quarter < '2019-01-01']
142 | dfs = dfs[dfs.quarter < '2019-01-01']
143 | 
144 | 
145 | df = df[(df.permno_drop == False) & (
146 |     df.sharecode_drop == False) & (df.beta < 0.5)]
147 | dfs = dfs[(dfs.permno_drop == False) & (
148 |     dfs.sharecode_drop == False) & (dfs.beta < 0.5)]
149 | 
150 | a = (100 * df.groupby(['permno', 'quarter'])
151 |      ['beta'].sum()).groupby(level=1).mean()
152 | b = (100 * dfs.groupby(['permno', 'quarter'])
153 |      ['beta'].sum()).groupby(level=1).mean()
154 | pd.concat([a, b], axis=1).plot(figsize=(20, 10), style=[
155 |     '-', '--', ], color=['navy', 'maroon'])
156 | plt.ylabel("Percent Owned by 13(f) Investors")
157 | plt.xlabel("")
158 | plt.ylim(0, 100)
159 | plt.legend(['Thomson Reuters Data', 'Scraped 13(f) Data'])
160 | plt.savefig(fig_ownership, bbox_inches="tight")
161 | 


--------------------------------------------------------------------------------
/code/plots2_kappa_official.py:
--------------------------------------------------------------------------------
  1 | # %%
  2 | import pandas as pd
  3 | import numpy as np
  4 | import pathlib
  5 | 
  6 | import matplotlib.pyplot as plt
  7 | from scipy.stats.mstats import gmean
  8 | 
  9 | from our_plot_config import derived_dir, setplotstyle, fig_dir
 10 | setplotstyle()
 11 | 
 12 | # %%
 13 | 
 14 | 
 15 | # Input file
 16 | f_kappas = derived_dir / 'official-kappas.parquet'
 17 | f_firms = derived_dir / 'firm-info.parquet'
 18 | 
 19 | # temp output - for macro simulations
 20 | f_quarter_mean = derived_dir / 'tmp-quarter-mean.pickle'
 21 | 
 22 | # Figures
 23 | # Kappa
 24 | f_profitweights = fig_dir / 'figure1_kappa.pdf'
 25 | f_profitweights_all = fig_dir / 'figure13_kappa_control.pdf'
 26 | f_within_between = fig_dir / 'figure15_within_between.pdf'
 27 | f_kappa_quantile = fig_dir / 'appfigure_a6.pdf'
 28 | 
 29 | # Concentration
 30 | f_ihhi = fig_dir / 'figure6_ihhi.pdf'
 31 | f_cosine = fig_dir / 'figure7_cosine.pdf'
 32 | f_chhi = fig_dir / 'figure14_chhi1.pdf'
 33 | f_chhi2 = fig_dir / 'figure14_chhi2.pdf'
 34 | 
 35 | # Tunneling
 36 | f_tunneling = fig_dir / 'figure9_tunneling.pdf'
 37 | f_kap1 = fig_dir / 'appfigure_a7.pdf'
 38 | 
 39 | # compute weighted average for kappa with different weighting schemes
 40 | 
 41 | 
 42 | def weighted(x, cols):
 43 |     a1 = np.average(x[cols].values, weights=x['w_amean'].values, axis=0)[0]
 44 |     a2 = np.average(x[cols].values, weights=x['w_gmean'].values, axis=0)[0]
 45 |     a3 = np.average(x[cols].values, weights=x['mkt_cap_to'].values, axis=0)[0]
 46 |     a4 = np.average(
 47 |         x[cols].values,
 48 |         weights=x['mkt_cap_from'].values,
 49 |         axis=0)[0]
 50 |     a5 = np.average(x[cols].values, weights=x['saleq_x'].values, axis=0)[0]
 51 |     a6 = np.average(x[cols].values, weights=x['saleq_y'].values, axis=0)[0]
 52 |     a7 = np.average(x[cols].values, weights=x['w_s_gmean'].values, axis=0)[0]
 53 | 
 54 |     return pd.Series({'kappa_amean': a1, 'kappa_gmean': a2, 'kappa_from': a3, 'kappa_to': a4,
 55 |                       'kappa_sale_from': a4, 'kappa_sale_to': a4, 'kappa_sale_mean': a4})
 56 | 
 57 | 
 58 | # ### Read in the (Cleaned) Parquet File
 59 | # - Apply the $\kappa$ calculations period by period
 60 | # - Save the output to a new parquet file
 61 | 
 62 | # %%
 63 | 
 64 | df = pd.read_parquet(f_kappas)
 65 | df_firm = pd.read_parquet(f_firms)
 66 | ihhi = df_firm[['permno', 'quarter', 'ihhi', 'siccd', 'saleq']]
 67 | 
 68 | # merge to get weights (sales and market cap, from/to)
 69 | total_df = pd.merge(
 70 |     pd.merge(
 71 |         df, ihhi, left_on=[
 72 |             'from', 'quarter'], right_on=[
 73 |             'permno', 'quarter'], how='left'),
 74 |     ihhi, left_on=['to', 'quarter'], right_on=['permno', 'quarter'], how='left')
 75 | total_df['same_sic'] = (total_df['siccd_x'] == total_df['siccd_y'])
 76 | total_df[total_df['from'] != total_df['to']]
 77 | 
 78 | # Average of weights
 79 | total_df['w_amean'] = (total_df['mkt_cap_from'] + total_df['mkt_cap_to']) / 2.0
 80 | total_df['w_gmean'] = gmean(
 81 |     [total_df['mkt_cap_from'], total_df['mkt_cap_to']], axis=0)
 82 | total_df['w_s_gmean'] = gmean(
 83 |     [total_df['saleq_x'], total_df['saleq_y']], axis=0)
 84 | 
 85 | # Apply the weighted averages
 86 | y = total_df.groupby(['quarter']).apply(weighted, ["kappa"])
 87 | 
 88 | 
 89 | qtr_mean = pd.concat([total_df.groupby(['quarter']).mean(), y], axis=1)
 90 | 
 91 | 
 92 | df_cosine = total_df.groupby(
 93 |     ['quarter'])['cosine'].describe(
 94 |         percentiles=[
 95 |             0.05, 0.25, 0.5, 0.75, 0.95])
 96 | 
 97 | # Percentiles of Kappa and IHHI
 98 | kappa_pct = df.groupby(
 99 |     ['quarter'])['kappa'].describe(
100 |         percentiles=[
101 |             0.05,
102 |             0.25,
103 |             0.5,
104 |             0.75,
105 |             0.95])
106 | ihhi_pct = ihhi[~ihhi.ihhi.isnull()].groupby(['quarter'])['ihhi'].describe(
107 |     percentiles=[0.05, 0.25, 0.5, 0.75, 0.95])
108 | 
109 | # drop k_ff =1 cases for tunneling
110 | tunnel_df = (df[df['from'] != df['to']].set_index('quarter')[
111 |              ['kappa_sqrt', 'kappa', 'kappa_pow2', 'kappa_pow3']] > 1).groupby(level=0).mean()
112 | tunnel_df2 = (df[df['from'] != df['to']].set_index(['from', 'quarter'])[
113 |               ['kappa_sqrt', 'kappa', 'kappa_pow2', 'kappa_pow3']] > 1).groupby(level=[0, 1]).max()
114 | 
115 | 
116 | # %%
117 | # need this for the macro simulations
118 | qtr_mean.to_pickle(f_quarter_mean)
119 | 
120 | # %%
121 | # ### Kappas
122 | # - Single Kappa ( Figure 1)
123 | # - Alternative Control (Figure 13)
124 | # - Within and Between Industry (Figure 15)
125 | 
126 | 
127 | # Alternate Figure 1 (revision)
128 | plt.clf()
129 | qtr_mean[['kappa', 'kappa_gmean', 'kappa_sale_mean']].plot(figsize=(20, 10))
130 | plt.legend(['Equal Weights', 'Market Cap Weighted', 'Revenue Weighted'])
131 | plt.xlabel('')
132 | plt.ylabel(r"$\kappa$ weight")
133 | plt.ylim(0, 1)
134 | plt.savefig(f_profitweights, bbox_inches="tight")
135 | 
136 | # %%
137 | 
138 | 
139 | # Appendix Figure 13
140 | plt.clf()
141 | qtr_mean[['kappa', 'kappa_sqrt', 'kappa_pow2',
142 |           'kappa_pow3']].plot(figsize=(20, 10))
143 | #plt.title("Average Pairwise Profit Weights $(\kappa)$ Under Different Control Assumptions")
144 | plt.xlabel("")
145 | plt.ylabel(r"$\kappa$ weight")
146 | plt.ylim(0, 1)
147 | plt.legend([r'$\gamma = \beta$',
148 |             r'$\gamma \propto \sqrt{\beta}$',
149 |             r'$\gamma \propto \beta^2$',
150 |             r'$\gamma \propto \beta^3$'])
151 | plt.savefig(f_profitweights_all, bbox_inches="tight")
152 | 
153 | # %%
154 | 
155 | 
156 | # Figure 15: Within Between
157 | plt.clf()
158 | total_df[(total_df.same_sic == True)].groupby(
159 |     ['quarter'])['kappa'].mean().plot(figsize=(20, 10))
160 | total_df[(total_df.same_sic == False)].groupby(
161 |     ['quarter'])['kappa'].mean().plot()
162 | #plt.title("Average Pairwise Profit Weights $(\kappa)$ Within and Between SIC code")
163 | plt.xlabel("")
164 | plt.ylabel(r"$\kappa$ weight")
165 | plt.ylim(0, 1)
166 | plt.legend([r"$\kappa$ same SIC", r"$\kappa$ different SIC"])
167 | plt.savefig(f_within_between, bbox_inches="tight")
168 | 
169 | # %%
170 | 
171 | 
172 | # Response Quantiles of Kappa
173 | plt.clf()
174 | kappa_pct[['95%', '75%', '50%', '25%', '5%']].plot(figsize=(20, 10))
175 | plt.legend(['95th percentile',
176 |             '75th percentile',
177 |             '50th percentile',
178 |             '25th percentile',
179 |             '5th percentile'])
180 | plt.ylabel(r"$\kappa$ Quantiles")
181 | plt.xlabel("")
182 | plt.ylim(0, 1)
183 | plt.savefig(f_kappa_quantile, bbox_inches="tight")
184 | # %%
185 | 
186 | # ### Concentration
187 | # - IHHI (Figure 6)
188 | # - Cosine Similarity (Figure 7)
189 | # - CHHI (Figure 14 - 2 parts)
190 | 
191 | 
192 | # Figure 6
193 | ihhi_pct[['95%', '75%', '50%', '25%', '5%']].plot(figsize=(20, 10))
194 | plt.legend(['95th percentile',
195 |             '75th percentile',
196 |             '50th percentile',
197 |             '25th percentile',
198 |             '5th percentile'])
199 | plt.ylabel("Investor HHI")
200 | plt.xlabel("")
201 | plt.ylim(0, 600)
202 | plt.savefig(f_ihhi, bbox_inches="tight")
203 | 
204 | # %%
205 | 
206 | 
207 | # Figure 7
208 | total_df.groupby(['quarter'])[['kappa', 'cosine', 'l1_measure']
209 |                               ].mean().plot(figsize=(20, 10))
210 | plt.xlabel("")
211 | #plt.title("Cosine Similarity and $\kappa$")
212 | plt.ylim(0, 1)
213 | plt.legend([r'$\kappa_{f,g}$',
214 |             r'$L_2$ similarity $cos(\beta_f,\beta_g)$',
215 |             r'$L_1$ similarity $|\beta_f - \beta_g|$'])
216 | plt.savefig(f_cosine, bbox_inches="tight")
217 | 
218 | # %%
219 | 
220 | 
221 | # Figure 14a
222 | df_firm[['quarter', 'ihhi', 'chhi_05', 'chhi_2', 'chhi_3', 'chhi_4']
223 |         ].groupby(['quarter']).mean().plot(figsize=(20, 10))
224 | plt.xlabel("")
225 | plt.ylabel("Effective Control HHI")
226 | plt.ylim(0, 3500)
227 | plt.legend([r'$\gamma = \beta$',
228 |             r'$\gamma \propto \sqrt{\beta}$',
229 |             r'$\gamma \propto \beta^2$',
230 |             r'$\gamma \propto \beta^3$',
231 |             r'$\gamma \propto \beta^4$'])
232 | plt.savefig(f_chhi, bbox_inches="tight")
233 | # %%
234 | 
235 | 
236 | # Figure 14b
237 | df_firm[['quarter', 'ihhi', 'chhi_05']].groupby(
238 |     ['quarter']).mean().plot(figsize=(20, 10))
239 | plt.xlabel("")
240 | plt.ylabel("Effective Control HHI")
241 | plt.ylim(0, 350)
242 | plt.legend([r'$\gamma = \beta$', r'$\gamma \propto \sqrt{\beta}$', ])
243 | plt.savefig(f_chhi2, bbox_inches="tight")
244 | 
245 | # ### Tunneling
246 | # - Figure 9: Tunneling
247 | # - App Figure C-6: Tunneling (Alternative Control)
248 | 
249 | # %%
250 | (100.0 * tunnel_df[['kappa']]).plot(figsize=(20, 10))
251 | plt.xlabel("")
252 | #plt.title("Potential Tunneling")
253 | plt.ylabel(r"Percentage of $\kappa$ > 1")
254 | plt.legend('')
255 | plt.ylim(0, 12)
256 | #plt.legend([r'$\gamma = \beta$',r'$\gamma \propto \sqrt{\beta}$',r'$\gamma \propto \beta^2$',r'$\gamma \propto \beta^3$'])
257 | plt.savefig(f_tunneling, bbox_inches="tight")
258 | 
259 | # %%
260 | (100.0 * tunnel_df[['kappa', 'kappa_sqrt',
261 |                     'kappa_pow2', 'kappa_pow3']]).plot(figsize=(20, 10))
262 | plt.xlabel("")
263 | #plt.title("Potential Tunneling")
264 | plt.ylabel(r"Percentage of $\kappa$ > 1")
265 | plt.ylim(0, 20)
266 | plt.legend([r'$\gamma = \beta$',
267 |             r'$\gamma \propto \sqrt{\beta}$',
268 |             r'$\gamma \propto \beta^2$',
269 |             r'$\gamma \propto \beta^3$'])
270 | plt.savefig(f_kap1, bbox_inches="tight")
271 | 


--------------------------------------------------------------------------------
/code/plots3_big_three_four.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | from kappas import process_beta
 3 | from investors import calc_big4
 4 | import pandas as pd
 5 | import numpy as np
 6 | import pathlib
 7 | 
 8 | import matplotlib
 9 | import matplotlib.pyplot as plt
10 | 
11 | from our_plot_config import derived_dir, fig_dir, raw_dir, setplotstyle
12 | 
13 | setplotstyle()
14 | 
15 | 
16 | # Input file
17 | # Get this from 2.5 Scraped Data
18 | f_stata_k = derived_dir / 'tmp-kappas_scrape.dta'
19 | f_betas = derived_dir / '13f_sp500_frankenbeta.parquet'
20 | f_big4 = raw_dir / 'big4.csv'
21 | 
22 | f_investor = derived_dir / 'investor-info.parquet'
23 | # Figures
24 | fig_bigfour = fig_dir / 'py_snp_bigfour.pdf'
25 | fig_bigthree = fig_dir / 'figure5_big3.pdf'
26 | 
27 | # %%
28 | # ## Description
29 | # 1. Load the data
30 | # 2. Setup the plots
31 | # 3. Plot top 4
32 | # 4. Plot top 3
33 | 
34 | 
35 | df = 100.0 * calc_big4(process_beta(f_betas), pd.read_csv(f_big4))
36 | df2 = df.groupby(level=0).mean()
37 | 
38 | 
39 | # %%
40 | def make_bigfour_plot(df, top3=False):
41 |     y = df.groupby(level=0).mean()
42 |     y.index = [pd.to_datetime(date, format='%Y-%m-%d').date()
43 |                for date in y.index]
44 |     if top3:
45 |         y = y.iloc[:, 0:-1]
46 |     y.plot(figsize=(20, 10), color=['black', 'maroon', 'navy', 'green'])
47 | 
48 |     plt.ylim(0, 10)
49 |     plt.xlim('2000-01-01', '2018-01-01')
50 | 
51 |     plt.xlabel("")
52 |     plt.ylabel('Average Ownership Percentage')
53 | 
54 |     plt.annotate('iShares Acquisition', xy=('2010-02-15', 6.2), xycoords='data',
55 |                  xytext=('2010-02-15', 8), textcoords='data',
56 |                  arrowprops=dict(facecolor='black', shrink=0.05),
57 |                  horizontalalignment='center', verticalalignment='top',
58 |                  )
59 |     plt.legend(['BlackRock & Barclays', 'Vanguard',
60 |                 'State Street', 'Fidelity'])
61 |     return plt
62 | 
63 | 
64 | # %%
65 | # Figure 5
66 | make_bigfour_plot(df2[df2.index >= '1999-12-31'], True)
67 | plt.savefig(fig_bigthree, bbox_inches="tight")
68 | 
69 | 
70 | # %%
71 | # Alternate with Fidelity (don't use this one)
72 | # make_bigfour_plot(df,False)
73 | 


--------------------------------------------------------------------------------
/code/plots4_investor_similarity.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import pandas as pd
 3 | import numpy as np
 4 | import pathlib
 5 | 
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | from our_plot_config import derived_dir, setplotstyle, fig_dir
 9 | 
10 | setplotstyle()
11 | 
12 | # %%
13 | # input file
14 | f_investor = derived_dir / 'investor-info.parquet'
15 | 
16 | # outputs
17 | fig_both_sim = fig_dir / 'figure8_similarity.pdf'
18 | fig_both_sim_drops = fig_dir / 'appfigure_a8.pdf'
19 | 
20 | 
21 | def wavg_l2(group):
22 |     d = group['l2_similarity']
23 |     w = group['aum_weight']
24 |     return (d * w).sum() / w.sum()
25 | 
26 | 
27 | def wavg_l1(group):
28 |     d = group['l1_similarity']
29 |     w = group['aum_weight']
30 |     return (d * w).sum() / w.sum()
31 | 
32 | 
33 | # %%
34 | df = pd.read_parquet(f_investor)
35 | 
36 | df3 = pd.concat([df.groupby('quarter').apply(wavg_l1),
37 |                  df.groupby('quarter').apply(wavg_l2)], axis=1)
38 | df3.columns = ['investor_l1', 'investor_l2']
39 | df3 = df3[['investor_l2', 'investor_l1']].copy()
40 | 
41 | # Without blackrock vanguard?
42 | df2 = df[~df.InvestorName.isin(['BlackRock', 'Vanguard'])]
43 | df4 = pd.concat([df2.groupby('quarter').apply(wavg_l1),
44 |                  df2.groupby('quarter').apply(wavg_l2)], axis=1)
45 | df4.columns = ['l1_drop_blackrockvanguard', 'l2_drop_blackrockvanguard']
46 | 
47 | # %%
48 | # ### Make the Plots
49 | 
50 | df3.plot(figsize=(20, 10), color=['navy', 'maroon'])
51 | plt.legend(['Investor Similarity $(L_2)$', 'Investor Similarity $(L_1)$'])
52 | plt.ylabel("Investor Similarity (AUM Weighted)")
53 | plt.xlabel("")
54 | plt.ylim(0, 1)
55 | plt.savefig(fig_both_sim, bbox_inches='tight')
56 | 
57 | # %%
58 | ax = pd.concat([df3,
59 |                 df4],
60 |                axis=1)[['investor_l2',
61 |                         'l2_drop_blackrockvanguard',
62 |                         'investor_l1',
63 |                         'l1_drop_blackrockvanguard']].plot(figsize=(20,
64 |                                                                     10),
65 |                                                            color=['navy',
66 |                                                                   'navy',
67 |                                                                   'maroon',
68 |                                                                   'maroon'],
69 |                                                            style=['-',
70 |                                                                   '--',
71 |                                                                   '-.',
72 |                                                                   ':'])
73 | plt.legend(['Investor Similarity $(L_2)$',
74 |             'Investor Similarity $(L_2)$ (No BlackRock/Vanguard)',
75 |             'Investor Similarity $(L_1)$',
76 |             'Investor Similarity $(L_1)$ (No BlackRock/Vanguard)'])
77 | plt.xlabel("")
78 | plt.ylabel("Investor Similarity (AUM Weighted)")
79 | plt.ylim(0, 1)
80 | plt.savefig(fig_both_sim_drops, bbox_inches='tight')
81 | 


--------------------------------------------------------------------------------
/code/plots5_airlines_cereal.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | # %%
 3 | import pandas as pd
 4 | import numpy as np
 5 | import pathlib
 6 | 
 7 | import matplotlib
 8 | import matplotlib.pyplot as plt
 9 | 
10 | from our_plot_config import derived_dir, fig_dir, raw_dir, setplotstyle
11 | from kappas import do_one_period
12 | 
13 | setplotstyle()
14 | 
15 | # %%
16 | # Input files
17 | f_cereal = raw_dir / 'cereal.parquet'
18 | f_airlines = raw_dir / 'airlines.parquet'
19 | f_firm_info = derived_dir / 'firm-info.parquet'
20 | f_kappas = derived_dir / 'official-kappas.parquet'
21 | 
22 | # Figure outputs
23 | fig_both = fig_dir / 'figure16_airlines_cereal_banks.pdf'
24 | 
25 | # %%
26 | # ### Read in the (Cleaned) Parquet File of Beta's
27 | # - Read in stata file
28 | # - Create the "quarter" variable
29 | # - Apply the $\kappa$ calculations period by period
30 | # - Save the output to a new parquet file
31 | # - Write a Stata file.
32 | 
33 | # %%
34 | # read in, create quarter and drop kappa_ff
35 | 
36 | 
37 | def process_df(fn):
38 |     df = pd.read_parquet(fn)
39 |     df['quarter'] = pd.to_datetime(df.rdate, format='%Y%m%d')
40 |     total_df3 = df[df.beta < 0.5].groupby(['quarter']).apply(do_one_period)
41 |     total_df3 = total_df3[total_df3['from'] != total_df3['to']]
42 |     return total_df3.reset_index()
43 | 
44 | 
45 | df_cereal = process_df(f_cereal)
46 | # Clean up airlines a bit more
47 | df_airlines = process_df(f_airlines)
48 | df_airlines = df_airlines[df_airlines.kappa < 4].copy()
49 | 
50 | df_firms = pd.read_parquet(f_firm_info)
51 | df_firms2 = df_firms.loc[df_firms['siccd'] ==
52 |                          6021, ['permno', 'quarter', 'comnam']].copy()
53 | 
54 | df_k = pd.read_parquet(f_kappas)
55 | 
56 | df_banks = pd.merge(pd.merge(df_k[df_k['from'] != df_k['to']], df_firms2, left_on=['quarter', 'from'], right_on=['quarter', 'permno']),
57 |                     df_firms2, left_on=['quarter', 'to'], right_on=['quarter', 'permno'])
58 | 
59 | # %%
60 | df_tot = pd.concat([df_cereal.groupby(['quarter'])['kappa'].median(), df_airlines.groupby(
61 |     ['quarter'])['kappa'].median(), df_banks.groupby(['quarter'])['kappa'].median()], axis=1)
62 | 
63 | # %%
64 | df_tot[df_tot.index >
65 |        '1999-01-01'].plot(figsize=(20, 10), color=['navy', 'maroon', 'darkgreen'])
66 | plt.legend(['RTE Cereal', 'Airlines', 'Banks'])
67 | plt.ylabel(r"Median Pairwise Profit Weights $(\kappa)$")
68 | plt.xlabel("")
69 | plt.ylim(0, 1)
70 | plt.savefig(fig_both, bbox_inches='tight')
71 | 


--------------------------------------------------------------------------------
/code/plots6_sole_vs_shared.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import pandas as pd
 3 | import numpy as np
 4 | import pathlib
 5 | 
 6 | import matplotlib
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | from our_plot_config import derived_dir, fig_dir, setplotstyle
10 | 
11 | setplotstyle()
12 | 
13 | # %%
14 | # inputs
15 | f_kappas_combined = derived_dir / 'appendix_kappa_combined.parquet'
16 | 
17 | # outputs
18 | fig_soleshared_tr = fig_dir / 'figure17_tr.pdf'
19 | fig_soleshared_sc = fig_dir / 'figure17_sc.pdf'
20 | 
21 | # %%
22 | # ## Read in Data
23 | # 1. Only read in Sole/Shared/All columns
24 | # 2. Do TR data for pre 2011 (afterwards this becomes unreliable).
25 | # 3. Do Scraped data for 2013 onwards (when XML data is available).
26 | # - We would need to re-do scraping to grap sole/shared/all/none for 1999-2013 for non-XML scraped data
27 | 
28 | 
29 | col_list = [
30 |     'from',
31 |     'to',
32 |     'quarter',
33 |     'kappa',
34 |     'kappa_all',
35 |     'kappa_sole',
36 |     'kappa_soleshared',
37 |     'skappa',
38 |     'skappa_all',
39 |     'skappa_sole',
40 |     'skappa_soleshared']
41 | df = pd.read_parquet(f_kappas_combined, columns=col_list)
42 | df = df[df.quarter > '1999-01-01']
43 | 
44 | 
45 | # %%
46 | df[df.quarter < '2011-01-01'].groupby(['quarter']).mean(
47 | )[['kappa', 'kappa_sole', 'kappa_soleshared']].plot(figsize=(20, 10))
48 | plt.xlabel("")
49 | plt.legend(['All Shares', 'Sole Voting Rights', 'Sole+Shared Voting Rights'])
50 | #plt.title('Alternative Control Assumptions: TR data')
51 | plt.ylim(0, 1)
52 | plt.savefig(fig_soleshared_tr, bbox_inches="tight")
53 | 
54 | 
55 | # %%
56 | df[(df.quarter > '2013-09-30') & (~df.skappa.isnull())].groupby(['quarter']
57 |                                                                 ).mean()[['skappa', 'skappa_sole', 'skappa_soleshared']].plot(figsize=(20, 10))
58 | plt.xlabel("")
59 | plt.legend(['All Shares', 'Sole Voting Rights', 'Sole+Shared Voting Rights'])
60 | plt.ylim(0, 1)
61 | #plt.title('Alternative Control Assumptions: Scraped data')
62 | plt.savefig(fig_soleshared_sc, bbox_inches="tight")
63 | 


--------------------------------------------------------------------------------
/code/plots7_short_interest_coverage.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import pandas as pd
 3 | import numpy as np
 4 | import pathlib
 5 | 
 6 | import matplotlib
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | from our_plot_config import derived_dir, fig_dir, raw_dir, wrds_dir, setplotstyle
10 | 
11 | setplotstyle()
12 | 
13 | # %%
14 | 
15 | # inputs
16 | f_betas = derived_dir / '13f_sp500_frankenbeta.parquet'
17 | f_short = wrds_dir / 'short_interest.parquet'
18 | 
19 | # figures
20 | fig_coverage = fig_dir / 'appfigure_a4.pdf'
21 | fig_distribution = fig_dir / 'appfigure_a5.pdf'
22 | 
23 | # %%
24 | # ## Short Interest Checks
25 | # - Read in the main "betas" dataset --> aggregate to firm-quarter across managers
26 | # - Read in the COMPUSTAT Short interest dataset
27 | # - Merge them by firm-quarter
28 | # - Plot S&P Coverage of Short Interest
29 | # - Plot quantiles of short interest distribution (conditional on coverage)
30 | 
31 | df = pd.read_parquet(f_betas)
32 | df = df[df.quarter > '1980-01-01']
33 | df_short = pd.read_parquet(f_short)
34 | 
35 | 
36 | # %%
37 | tmp = pd.merge(df.groupby(['permno',
38 |                            'quarter']).agg({'beta': sum,
39 |                                             'shares': sum,
40 |                                             'shares_outstanding': max}).reset_index(),
41 |                df_short,
42 |                left_on=['permno',
43 |                         'quarter'],
44 |                right_on=['lpermno',
45 |                          'qdate'],
46 |                how='left')
47 | tmp['short_coverage'] = 1.0 * (~tmp['shortint'].isnull())
48 | tmp['coverage'] = 1.0
49 | tmp['short_pct'] = tmp['shortint'] / (tmp['shares_outstanding'] * 1000)
50 | tmp['short_1'] = tmp['short_pct'] > 0.01
51 | tmp['short_2'] = tmp['short_pct'] > 0.02
52 | tmp['short_5'] = tmp['short_pct'] > 0.05
53 | tmp['short_10'] = tmp['short_pct'] > 0.10
54 | tmp['short_20'] = tmp['short_pct'] > 0.20
55 | 
56 | # %%
57 | tmp.groupby(['quarter'])[['coverage', 'short_coverage']].sum().plot(
58 |     figsize=(20, 10), color=['navy', 'maroon'])
59 | matplotlib.rc('xtick', labelsize=24)
60 | matplotlib.rc('ytick', labelsize=24)
61 | plt.legend(['Number of S&P 500 Firms in Sample',
62 |             'Number of S&P 500 Firms with Short Interest Data'])
63 | #plt.title('Coverage of S&P 500 Firms')
64 | plt.xlabel('')
65 | plt.ylim(0, 510)
66 | plt.savefig(fig_coverage, bbox_inches='tight')
67 | # %%
68 | ax = tmp.groupby(['quarter'])[['short_1', 'short_2', 'short_5',
69 |                                'short_10', 'short_20']].mean().plot(figsize=(20, 10))
70 | ax.set_ylim(0, 1)
71 | matplotlib.rc('xtick', labelsize=24)
72 | matplotlib.rc('ytick', labelsize=24)
73 | plt.legend(['Above 1%', 'Above 2%', 'Above 5%', 'Above 10%', 'Above 20%'])
74 | #plt.title('Fraction of Firms by Short Interest (conditional on coverage) ')
75 | vals = ax.get_yticks()
76 | ax.set_yticklabels(['{:,.2%}'.format(x) for x in vals])
77 | plt.xlabel('')
78 | plt.savefig(fig_distribution, bbox_inches='tight')
79 | 


--------------------------------------------------------------------------------
/code/plots8_individual_firm_coverage.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | # %%
 3 | import pandas as pd
 4 | import numpy as np
 5 | import pathlib
 6 | 
 7 | import matplotlib
 8 | import matplotlib.pyplot as plt
 9 | 
10 | from our_plot_config import derived_dir, fig_dir, setplotstyle
11 | 
12 | setplotstyle()
13 | 
14 | # %%
15 | # Input file
16 | f_betas_tr = derived_dir / '13f_sp500_unfiltered.parquet'
17 | f_betas_sc = derived_dir / '13f_scraped.parquet'
18 | 
19 | # outputs
20 | fig_case_study = fig_dir / 'appfigure_a2_coverage.pdf'
21 | 
22 | # %%
23 | # ## Read in Data
24 | # 1. Need both TR Betas and Scrape Betas
25 | # 2. Extract the three companies
26 | # 3. Plot
27 | 
28 | # Read in input files
29 | df_tr = pd.read_parquet(f_betas_tr)
30 | df_sc = pd.read_parquet(f_betas_sc)
31 | 
32 | df_scrape_subset = df_sc[df_sc.permno.isin(['24643', '27983', '88661'])]
33 | df_scrape_subset = df_scrape_subset[df_scrape_subset.quarter > '2007-01-01']
34 | df_scrape_subset = df_scrape_subset[df_scrape_subset.quarter < '2015-01-01']
35 | df_scrape_holdings = 100 * \
36 |     df_scrape_subset.groupby(['quarter', 'permno']).sum()
37 | df_tr_subset = df_tr[df_tr.permno.isin(['24643', '27983', '88661'])]
38 | df_tr_subset = df_tr_subset[df_tr_subset.quarter > '2007-01-01']
39 | df_tr_subset = df_tr_subset[df_tr_subset.quarter < '2015-01-01']
40 | df_tr_holdings = 100 * df_tr_subset.groupby(['quarter', 'permno']).sum()
41 | 
42 | # %%
43 | fig, ax = plt.subplots(figsize=(20, 10))
44 | df_tr_holdings['beta'].unstack().plot(
45 |     ax=ax, color=[
46 |         'navy', 'maroon', 'darkgreen'], style=[
47 |             '-', '-', '-'])
48 | matplotlib.rc('xtick', labelsize=24)
49 | matplotlib.rc('ytick', labelsize=24)
50 | plt.ylabel("Percent of Shares Reported in 13F Filings", {'size': '24'})
51 | 
52 | df_scrape_holdings['beta'].unstack().plot(
53 |     ax=ax,
54 |     style=[
55 |         '--',
56 |         '--',
57 |         '--'],
58 |     color=[
59 |         'navy',
60 |         'maroon',
61 |         'darkgreen'])
62 | plt.xlabel("")
63 | 
64 | plt.legend(['Alcoa', 'Xerox', 'Coach'], prop={'size': '24'})
65 | plt.ylim(0, 100)
66 | 
67 | plt.show
68 | plt.savefig(fig_case_study, bbox_inches='tight')
69 | 


--------------------------------------------------------------------------------
/code/plots9_blackrock_vanguard.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | from kappas import beta_to_kappa_merger_breakup
 3 | import pandas as pd
 4 | import numpy as np
 5 | import pathlib
 6 | 
 7 | import matplotlib
 8 | import matplotlib.pyplot as plt
 9 | 
10 | from our_plot_config import derived_dir, fig_dir, raw_dir, setplotstyle
11 | 
12 | setplotstyle()
13 | 
14 | 
15 | # %%
16 | # Input file
17 | f_betas = derived_dir / '13f_sp500_frankenbeta.parquet'
18 | f_big4 = raw_dir / 'big4.csv'
19 | 
20 | # Figures
21 | f_merger = fig_dir / 'figure12_mergerbreakup.pdf'
22 | 
23 | 
24 | # ## Load Data and Setup
25 | # - Read in the Parquet File of $\beta$'s
26 | # - Read in the csv file of big four firms
27 | # - Setup the merger using mgrno_merger
28 | # - Setup the breakup using InvestorName
29 | # - Apply the $\kappa$ calculations period by period
30 | 
31 | # %%
32 | df = pd.read_parquet(
33 |     f_betas,
34 |     columns=[
35 |         'mgrno',
36 |         'permno',
37 |         'quarter',
38 |         'beta',
39 |         'permno_drop',
40 |         'sharecode_drop'])
41 | df = df[(df.permno_drop == False) & (
42 |     df.sharecode_drop == False) & (df.beta < 0.5)]
43 | 
44 | big4 = pd.read_csv(f_big4)
45 | 
46 | # grab ids for Blackrock and Vanguard
47 | is_blackrock = set(big4[big4.InvestorName == 'BlackRock'].mgrno.values)
48 | is_vanguard = set(big4[big4.InvestorName == 'Vanguard'].mgrno.values)
49 | 
50 | # %%
51 | # merge firms meeting the criteria
52 | df['mgrno_merger'] = df.mgrno
53 | df.loc[df['mgrno_merger'].isin(is_blackrock.union(
54 |     is_vanguard)), 'mgrno_merger'] = 1139734
55 | 
56 | # Only break up firms with a name
57 | df.loc[df.mgrno.isin(is_blackrock), 'InvestorName'] = 'BlackRock'
58 | df.loc[df.mgrno.isin(is_vanguard), 'InvestorName'] = 'Vanguard'
59 | 
60 | # %%
61 | # ### Do the work, Make the Plot
62 | kappa_df = beta_to_kappa_merger_breakup(df)
63 | x = kappa_df[kappa_df['from'] != kappa_df['to']].groupby(
64 |     ['quarter'])[['kappa', 'kappa_merger', 'kappa_breakup', 'kappa_drop']].mean()
65 | 
66 | # %%
67 | x.plot(figsize=(20, 10), color=['black', 'navy', 'maroon', 'darkgreen'])
68 | plt.legend([r'$\kappa$: Actual Ownership',
69 |             r'$\kappa$: Merger: BlackRock+Vanguard',
70 |             r'$\kappa$: Split in Half: BlackRock+Vanguard',
71 |             r'$\kappa$: Ignore: BlackRock+Vanguard'])
72 | plt.xlabel("")
73 | plt.ylim(0, 1)
74 | plt.savefig(f_merger, bbox_inches="tight")
75 | 


--------------------------------------------------------------------------------
/code/table3_variance_decomp.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | import our_plot_config
 4 | from our_plot_config import derived_dir, tab_dir
 5 | import pandas as pd
 6 | import numpy as np
 7 | import pyhdfe
 8 | 
 9 | # Input
10 | f_regression = derived_dir / 'regression_data.parquet'
11 | 
12 | # Output
13 | f_table3 = tab_dir / 'table3.tex'
14 | 
15 | # this helper decomposes the variance
16 | 
17 | 
18 | def do_decomp(x):
19 |     var = np.nanvar(x, axis=0)
20 |     rel_hhi = var[2] / var[0]
21 |     similarity = var[1] / var[0]
22 |     return np.array([similarity, rel_hhi, 1.0 - rel_hhi - similarity])
23 | 
24 | 
25 | # Read in the Kappas: get only columns we need
26 | df_kappas2 = pd.read_parquet(
27 |     f_regression,
28 |     columns=[
29 |         'from',
30 |         'to',
31 |         'quarter',
32 |         'kappa',
33 |         'cosine'])
34 | 
35 | # Kappa  = cosine * IHHI ratio
36 | df_kappas2['irat'] = df_kappas2['kappa'] / df_kappas2['cosine']
37 | # Fixed Effects
38 | df_kappas2['pair_fe'] = df_kappas2.groupby(['from', 'to']).ngroup()
39 | df_kappas2['quarter_fe'] = df_kappas2.groupby(['quarter']).ngroup()
40 | 
41 | # Report the size of everything
42 | print("N of Overall Dataframe:", len(df_kappas2))
43 | print("N Quarter FE:", len(df_kappas2.quarter_fe.unique()))
44 | print("N Pair FE:", len(df_kappas2.pair_fe.unique()))
45 | 
46 | # Take the logs of everything and get a NumPy array
47 | variables = np.log(df_kappas2[['kappa', 'cosine', 'irat']]).values
48 | 
49 | # Use pyhdfe for high-dimensional fixed effects absorption
50 | # This takes 13min on my iMac
51 | resid_cs = pyhdfe.create(
52 |     df_kappas2[['quarter_fe']].values).residualize(variables)
53 | resid_ts = pyhdfe.create(df_kappas2[['pair_fe']].values).residualize(variables)
54 | resid_pa = pyhdfe.create(
55 |     df_kappas2[['pair_fe', 'quarter_fe']].values).residualize(variables)
56 | 
57 | # Do the Variance Decomposition for each case
58 | tab_mat = np.vstack([do_decomp(variables), do_decomp(
59 |     resid_cs), do_decomp(resid_ts), do_decomp(resid_pa)]) * 100.0
60 | table3 = pd.DataFrame(tab_mat, index=['Raw', 'Cross-Section', 'Time-Series', 'Panel'],
61 |                       columns=['Overlapping Ownership', 'Relative IHHI', 'Covariance'])
62 | print(table3)
63 | 
64 | # Write the latex table to disk (skip zero covariance column)
65 | table3.iloc[:, 0:2].to_latex(
66 |     f_table3, float_format=lambda x: '%.2f' % x + str('%'), column_format='l cc')
67 | 


--------------------------------------------------------------------------------
/code/table4_kappa_correlation.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | # Table 4 Correlations with Kappa
  4 | 
  5 | import our_plot_config
  6 | from our_plot_config import derived_dir, tab_dir
  7 | import pandas as pd
  8 | import numpy as np
  9 | import re
 10 | 
 11 | # for regressions
 12 | import pyhdfe
 13 | from sklearn import datasets, linear_model
 14 | import statsmodels.formula.api as smf
 15 | from statsmodels.iolib.summary2 import summary_col
 16 | 
 17 | # Input
 18 | f_regression = derived_dir / 'regression_data.parquet'
 19 | 
 20 | # Output
 21 | f_tab4 = tab_dir / 'table4.tex'
 22 | 
 23 | # Read data
 24 | cols = [
 25 |     'from',
 26 |     'to',
 27 |     'quarter',
 28 |     'kappa',
 29 |     'retail_share',
 30 |     'market_cap',
 31 |     'marginsq',
 32 |     'normalized_l2',
 33 |     'big3',
 34 |     'beta_BlackRock',
 35 |     'beta_Vanguard',
 36 |     'beta_StateStreet']
 37 | df = pd.read_parquet(
 38 |     f_regression,
 39 |     columns=cols).rename(
 40 |         columns={
 41 |             'beta_BlackRock': 'blackrock',
 42 |             'beta_Vanguard': 'vanguard',
 43 |             'beta_StateStreet': 'statestreet'})
 44 | 
 45 | # Filter on dates
 46 | df = df[(df.quarter > '2000-01-01')].copy()
 47 | 
 48 | # Calculate derived columns
 49 | df['lcap'] = np.log(df['market_cap'])
 50 | # Code the FE first: This speeds things up to avoid type converting 13
 51 | # million dates
 52 | df['pair_fe'] = df.groupby(['from', 'to']).ngroup()
 53 | df['quarter_fe'] = df.groupby(['quarter']).ngroup()
 54 | 
 55 | 
 56 | # Regressions!
 57 | # We will need to absorb: do that first
 58 | # This is comically slow and uses 30+GB
 59 | var_list = [
 60 |     'kappa',
 61 |     'retail_share',
 62 |     'lcap',
 63 |     'marginsq',
 64 |     'normalized_l2',
 65 |     'big3',
 66 |     'blackrock',
 67 |     'vanguard',
 68 |     'statestreet']
 69 | 
 70 | # Drop any missings
 71 | df2 = df[var_list + ['pair_fe', 'quarter_fe']].dropna()
 72 | 
 73 | 
 74 | alg_pa = pyhdfe.create(
 75 |     df2[['pair_fe', 'quarter_fe']].values, drop_singletons=False)
 76 | resid_pa = alg_pa.residualize(df2[var_list].values)
 77 | 
 78 | # Perform Regressions
 79 | # no need for fixed effects because we've already residualized everything
 80 | # drop rows containing NAs
 81 | pd_vars = pd.DataFrame(resid_pa, columns=['kappa', 'retail_share', 'lcap',
 82 |                                           'marginsq', 'normalized_l2',
 83 |                                           'big3', 'blackrock', 'vanguard', 'statestreet'])
 84 | 
 85 | 
 86 | reg1 = smf.ols(
 87 |     formula='kappa ~ retail_share + lcap + marginsq + big3',
 88 |     data=pd_vars).fit()
 89 | reg2 = smf.ols(
 90 |     formula='kappa ~ retail_share + lcap + marginsq + normalized_l2',
 91 |     data=pd_vars).fit()
 92 | reg3 = smf.ols(
 93 |     formula='kappa ~ retail_share + lcap + marginsq + big3 + normalized_l2',
 94 |     data=pd_vars).fit()
 95 | reg4 = smf.ols(
 96 |     formula='kappa ~ retail_share + lcap + marginsq + normalized_l2 + blackrock + vanguard + statestreet',
 97 |     data=pd_vars).fit()
 98 | 
 99 | # Adjust R^2 for the FE
100 | 
101 | 
102 | def rsq_update(reg):
103 |     reg.rsquared = np.var(
104 |         reg.predict() + (df2['kappa'].values - resid_pa[:, 0])) / np.var(df2['kappa'])
105 |     reg.quarterfe = r"  \checkmark  "
106 |     reg.pairfe = r"  \checkmark  "
107 |     return
108 | 
109 | 
110 | for r in [reg1, reg2, reg3, reg4]:
111 |     rsq_update(r)
112 | 
113 | 
114 | # Print Output
115 | info_dict = {'R\sq': lambda x: f"{x.rsquared:.4f}",
116 |              'Quarter FE': lambda x: f"{x.quarterfe}",
117 |              'Ordered Pair FE': lambda x: f"{x.pairfe}",
118 |              'N': lambda x: f"{int(x.nobs):d}"
119 |              }
120 | 
121 | dfoutput = summary_col(results=[reg1, reg2, reg3, reg4],
122 |                        float_format='%0.4f',
123 |                        stars=True,
124 |                        model_names=['(1)',
125 |                                     '(2)',
126 |                                     '(3)',
127 |                                     '(4)'],
128 |                        info_dict=info_dict,
129 |                        regressor_order=['retail_share',
130 |                                         'lcap',
131 |                                         'marginsq',
132 |                                         'big3',
133 |                                         'normalized_l2',
134 |                                         'blackrock',
135 |                                         'vanguard',
136 |                                         'statestreet'
137 |                                         ],
138 |                        drop_omitted=True)
139 | 
140 | # Clean up the TeX by hand for the table
141 | tab_reg2 = re.sub(r'\*\*\*', '*', dfoutput.as_latex())
142 | tab_reg3 = re.sub(r'hline', 'toprule', tab_reg2, count=1)
143 | tab_reg4 = re.sub(r'hline', 'bottomrule', tab_reg3, count=1)
144 | 
145 | tab_reg5 = re.sub(r'retail\\_share', 'Retail Share', tab_reg4)
146 | tab_reg5 = re.sub(r'lcap', 'Log(Market Cap)', tab_reg5)
147 | tab_reg5 = re.sub(r'marginsq', 'Operating Margin', tab_reg5)
148 | tab_reg5 = re.sub(r'big3', 'Big Three Holdings', tab_reg5)
149 | tab_reg5 = re.sub(r'normalized\\_l2', 'Investor Indexing', tab_reg5)
150 | tab_reg5 = re.sub(r'blackrock', 'BlackRock Holdings', tab_reg5)
151 | tab_reg5 = re.sub(r'vanguard', 'Vanguard Holdings', tab_reg5)
152 | tab_reg5 = re.sub(r'statestreet', 'State Street Holdings', tab_reg5)
153 | tab_reg5 = re.sub(r'R\\sq', '$R^2$', tab_reg5)
154 | tab_reg5 = re.sub(r'N', '$N$', tab_reg5)
155 | out_tab = '\n'.join(tab_reg5.splitlines()[3:-2])
156 | 
157 | # Display table and save
158 | print(out_tab)
159 | with open(f_tab4, 'w') as file:
160 |     file.write(out_tab)
161 | 


--------------------------------------------------------------------------------
/code/utilities/date_util.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | def lookup_dates(s):
 4 |     """
 5 |     This is an extremely fast approach to datetime parsing.
 6 |     For large data, the same dates are often repeated. Rather than
 7 |     re-parse these, we store all unique dates, parse them, and
 8 |     use a lookup to convert all dates.
 9 |     """
10 |     dates_dict = {date:pd.to_datetime(date,errors='coerce') for date in s.unique()}
11 |     return s.map(dates_dict)
12 | 
13 | def end_quarter(series):
14 |     return (series - pd.tseries.offsets.DateOffset(days=1) + pd.tseries.offsets.QuarterEnd())


--------------------------------------------------------------------------------
/code/utilities/matlab_util.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | def matlab_sparse(i,j,s,compress=True):
 5 |     rows, row_pos = np.unique(i, return_inverse=True)
 6 |     cols, col_pos = np.unique(j, return_inverse=True)
 7 |     pivoted_arr = np.zeros((len(rows), len(cols)))
 8 |     pivoted_arr[row_pos, col_pos] = s
 9 |     if compress:
10 |     	nz=(pivoted_arr.max(axis=1)>0)
11 |     	pivoted_arr=pivoted_arr[nz,:]
12 |     	rows=rows[nz]
13 |     return pivoted_arr, rows, cols
14 | 
15 | def coalesce(df,col_list,prefix,method='left'):
16 | 	for x in col_list:
17 | 		if method=='left':
18 | 			df['merged_'+x]=df[prefix+x].combine_first(df[x])
19 | 		if method=='right':
20 | 			df['merged_'+x]=df[x].combine_first(df[prefix+x])
21 | 	return df


--------------------------------------------------------------------------------
/code/utilities/quantiles.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def weighted_quantile(values, quantiles, sample_weight=None, values_sorted=False, old_style=False):
 6 |     values = np.array(values)
 7 |     quantiles = np.array(quantiles)
 8 |     if sample_weight is None:
 9 |         sample_weight = np.ones(len(values))
10 |     sample_weight = np.array(sample_weight)
11 |     assert np.all(quantiles >= 0) and np.all(quantiles <= 1), 'quantiles should be in [0, 1]'
12 | 
13 |     if not values_sorted:
14 |         sorter = np.argsort(values)
15 |         values = values[sorter]
16 |         sample_weight = sample_weight[sorter]
17 | 
18 |     weighted_quantiles = np.cumsum(sample_weight) - 0.5 * sample_weight
19 |     if old_style:
20 |     # To be convenient with numpy.percentile
21 |         weighted_quantiles -= weighted_quantiles[0]
22 |         weighted_quantiles /= weighted_quantiles[-1]
23 |     else:
24 |         weighted_quantiles /= np.sum(sample_weight)
25 |     return pd.Series(np.interp(quantiles, weighted_quantiles, values))


--------------------------------------------------------------------------------
/code/wrds_checks.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import os
 4 | import sys
 5 | import matplotlib
 6 | import matplotlib.pyplot as plt
 7 | from wrds_cleaning import expand_splist
 8 | #
 9 | # These are checks (WRITE MORE)
10 | #
11 | 
12 | 
13 | def check_bigbeta(df, fn):
14 |     df[df.beta > 0.5].to_excel(fn)
15 |     return
16 | 
17 | 
18 | def check_s34(df, f_shares_out, f_prc_zero, f_duplicates):
19 |     df[df.shrout1 == 0].to_excel(f_shares_out)
20 |     df[df.prc == 0].to_excel(f_prc_zero)
21 |     df[df[['permno', 'fdate', 'rdate', 'mgrno']].duplicated(
22 |         keep=False)].to_excel(f_duplicates)
23 |     return
24 | 
25 | 
26 | def check_names(df_sp500, df_names, f_names_missing):
27 |     df_spe = expand_splist(df_sp500)
28 |     x = pd.merge(df_spe, df_names, on=['permno'], how='left')
29 |     x = x[(x.qdate >= x.namedt) & (x.qdate <= x.nameenddt)]
30 |     y = pd.merge(df_spe, x, on=['permno', 'qdate'], how='left')
31 |     y = y[y.ticker.isnull()][['permno', 'qdate']]
32 |     pd.merge(y, df_names, on=['permno']).to_excel(f_names_missing)
33 | 
34 | 
35 | def check_blackrock(df, fig_blackrock1, fig_blackrock2, fig_blackrock3):
36 |     blackrock = blackrock_fix(df[df.mgrname.str.contains('BLACKROCK')].copy())
37 |     blackrock['aum'] = blackrock['prc'] * blackrock['shares']
38 |     blackrock[blackrock.rdate == blackrock.fdate].groupby(
39 |         'rdate')['aum'].sum().plot(figsize=(20, 10), title="RDATE==FDATE")
40 |     plt.savefig(fig_blackrock1)
41 |     blackrock.groupby(
42 |         ['rdate'])['aum'].sum().plot(
43 |         figsize=(
44 |             20,
45 |             10),
46 |         title="BY RDATE")
47 |     plt.savefig(fig_blackrock2)
48 |     blackrock.groupby(
49 |         ['fdate'])['aum'].sum().plot(
50 |         figsize=(
51 |             20,
52 |             10),
53 |         title="BY FDATE")
54 |     plt.savefig(fig_blackrock3)
55 |     return blackrock
56 | 
57 | 
58 | def check_s34_coverage(df, df_sp500, df_names, f_s34_coverage):
59 |     totals = df.groupby(['permno', 'quarter'])['mgrno'].nunique().reset_index()
60 |     x = pd.merge(
61 |         expand_splist(df_sp500), totals, left_on=[
62 |             'permno', 'qdate'], right_on=[
63 |             'permno', 'quarter'], how='left')
64 |     y = x[x.mgrno.isnull()][['permno', 'qdate']]
65 |     z = pd.merge(y, df_names, on=['permno'], how='left')
66 |     z[(z.qdate <= z.nameenddt) & (z.qdate >= z.namedt)].to_excel(f_s34_coverage)
67 | 
68 | 
69 | def check_multiple_cusip(df, f_multiple_cusips, f_multiple_cusips_summary):
70 |     x = df.groupby(['permno', 'cusip', 'rdate'])['shares'].sum()
71 |     y = x[x.groupby(level=[0, 2]).transform('count') > 1].reset_index(
72 |     ).sort_values(['rdate', 'permno', 'shares'])
73 |     y['share_pct'] = y['shares'] / \
74 |         y.groupby(['permno', 'rdate'])['shares'].transform(sum)
75 |     y.sort_values(['permno', 'rdate', 'cusip']).to_excel(f_multiple_cusips)
76 |     z = y.groupby(['permno', 'rdate'])['share_pct'].min(
77 |     ).sort_values().reset_index().to_excel(f_multiple_cusips_summary)
78 | 
79 | 
80 | def check_fundamental_coverage(
81 |         df, df_fund2, df_names2, f_missing_betas, f_missing_atq, f_missing_segments):
82 |     df3 = df[['permno', 'quarter']].drop_duplicates().reset_index(drop=True)
83 |     df3['betas_observed'] = 1
84 |     x = pd.merge(df3, df_fund2, on=['permno', 'quarter'], how='outer').sort_values(
85 |         ['quarter', 'permno'])
86 |     y1 = x[x.betas_observed.isnull()]
87 |     pd.merge(y1[['permno', 'quarter']].drop_duplicates(), df_names2).sort_values(
88 |         ['permno', 'quarter']).to_excel(f_missing_betas)
89 |     pd.merge(x[x['atq'].isnull()], df_names2, on=['permno', 'quarter']
90 |              ).sort_values(['permno', 'quarter']).to_excel(f_missing_atq)
91 |     pd.merge(x[x['num_bus_seg'].isnull()], df_names2, on=['permno', 'quarter']
92 |              ).sort_values(['permno', 'quarter']).to_excel(f_missing_segments)
93 | 


--------------------------------------------------------------------------------
/code/wrds_cleaning.py:
--------------------------------------------------------------------------------
  1 | # problems
  2 | from our_plot_config import checks_dir
  3 | import pandas as pd
  4 | import numpy as np
  5 | f_no_permnos = checks_dir / 's34_nopermno.xlsx'
  6 | 
  7 | idx = ['permno', 'cusip', 'mgrno', 'rdate']
  8 | data = ['fdate', 'shares', 'prc', 'shrout1', 'shrout2', 'sole', 'shared', 'no']
  9 | 
 10 | #
 11 | # Use CRSP names file to construct mapping from CUSIP/NCUSIP --> Permno
 12 | # - Mapping should be N-->1 unique
 13 | #
 14 | def make_cusip_list(df_names):
 15 |     cusip_list = pd.concat([df_names[['ncusip', 'permno']].drop_duplicates().rename(columns={
 16 |                            'ncusip': 'cusip'}), df_names[['cusip', 'permno']].drop_duplicates()]).drop_duplicates()
 17 |     x = cusip_list.groupby('cusip')['permno'].count()
 18 |     if x.max() == 1:
 19 |         print("CUSIP to Permno mapping unique")
 20 |     else:
 21 |         print("CUSIP to Permno mapping not unique")
 22 |         x = cusip_list.groupby('cusip').count()
 23 |         print(x[x > 1])
 24 |     return cusip_list
 25 | 
 26 | ####
 27 | # S-34 Cleaning
 28 | ###
 29 | # This is S34 Data cleaning/merging
 30 | # - df is always the S-34 data
 31 | # NB: edited this to include Barclays per Mike's note on 2004 -- MRB
 32 | def blackrock_fix(df):
 33 |     # Use Fdates instead of rdates for BlackRock Inc only
 34 |     df['blackrock_fix'] = False
 35 |     df.loc[(df.mgrno.isin([7900, 9385])) & (
 36 |         df.rdate != df.fdate), 'blackrock_fix'] = True
 37 |     df.loc[(df.mgrno.isin([7900, 9385])), 'rdate'] = df.loc[(
 38 |         df.mgrno.isin([7900, 9385])), 'fdate']
 39 |     return df
 40 | 
 41 | 
 42 | def read_s34(fn):
 43 |     df = pd.read_parquet(
 44 |         fn,
 45 |         columns=[
 46 |             'fdate',
 47 |             'mgrno',
 48 |             'rdate',
 49 |             'cusip',
 50 |             'shares',
 51 |             'sole',
 52 |             'shared',
 53 |             'no',
 54 |             'prc',
 55 |             'shrout1',
 56 |             'shrout2'])
 57 |     df['cusip'] = df['cusip'].astype('category')
 58 |     return blackrock_fix(df)
 59 | 
 60 | # Merge to get only the Permo Quarters for S&P 500
 61 | 
 62 | 
 63 | def filter_s34(df, df_sp):
 64 |     return pd.merge(df, df_sp, left_on=['cusip', 'rdate'], right_on=[
 65 |                     'cusip', 'qdate'], how='inner').drop(columns=['qdate'])
 66 | 
 67 | 
 68 | def get_sp_quarters(df_sp500, cusip_list):
 69 |     x = pd.merge(expand_splist(df_sp500), cusip_list, on=['permno'])
 70 |     x['cusip'] = x['cusip'].astype('category')
 71 |     return x.dropna()
 72 | 
 73 | 
 74 | # Consolidate managers (various BlackRock entities, FMR, etc.)
 75 | def consolidate_mgrs(main_df, f_consolidation):
 76 |     mgr_consolidations = pd.read_csv(f_consolidation)
 77 |     merged = pd.merge(
 78 |         main_df,
 79 |         mgr_consolidations,
 80 |         left_on=['mgrno'],
 81 |         right_on=['mgrno_from'],
 82 |         how='left')
 83 |     # don't consolidate these
 84 |     part1 = merged.loc[merged.mgrno_to.isnull(), main_df.columns]
 85 |     # consolidate these
 86 |     x = merged[~merged.mgrno_to.isnull()].copy()
 87 |     x['mgrno'] = x['mgrno_to']
 88 |     part2 = x.drop(columns=['mgrno_from',
 89 |                             'mgrno_to']).groupby(['permno',
 90 |                                                   'cusip',
 91 |                                                   'mgrno',
 92 |                                                   'quarter',
 93 |                                                   'fdate']).agg({'shares': sum,
 94 |                                                                  'prc': max,
 95 |                                                                  'shrout1': max,
 96 |                                                                  'shrout2': max,
 97 |                                                                  'sole': sum,
 98 |                                                                  'shared': sum,
 99 |                                                                  'no': sum,
100 |                                                                  'share_split': max}).reset_index()
101 |     return pd.concat([part1, part2], axis=0)
102 | 
103 | #
104 | # Use f_drops (file) to tag observations
105 | 
106 | 
107 | def add_drops(df, f_drops, df_names2):
108 |     keep_cols = list(set(list(df.columns) +
109 |                          ['no_managers', 'permno_drop', 'sharecode_drop']))
110 |     # count number of managers
111 |     df['no_managers'] = df.groupby(['permno', 'quarter'])[
112 |         'mgrno'].transform('nunique')
113 |     # dual class shares
114 |     drops = pd.read_csv(f_drops)
115 |     drops['start'] = pd.to_datetime(drops['start'])
116 |     drops['end'] = pd.to_datetime(drops['end'])
117 |     df = pd.merge(df, drops, on=['permno'], how='left')
118 |     df.loc[(df['quarter'] >= df['start']) & (
119 |         df['quarter'] <= df['end']), 'permno_drop'] = True
120 | 
121 |     df = pd.merge(df, df_names2[['permno', 'quarter', 'shrcd']].drop_duplicates(), on=[
122 |                   'permno', 'quarter'])
123 |     # ADR's and REITs
124 |     df.loc[~df.shrcd.isin([10, 11, 12, 18]), 'sharecode_drop'] = True
125 |     df[['permno_drop', 'sharecode_drop']] = df[[
126 |         'permno_drop', 'sharecode_drop']].fillna(False)
127 |     return df[keep_cols]
128 | 
129 | #
130 | # Use MSF data to add a stock split dummy to each 13-F filing date
131 | #
132 | 
133 | 
134 | def add_stock_splits(df, df_msf):
135 |     merged = pd.merge(
136 |         pd.merge(df[idx + data],
137 |                  df_msf[['permno', 'qdate', 'cfacshr']], left_on=['permno', 'rdate'], right_on=['permno', 'qdate'], how='left'),
138 |         df_msf[['permno', 'qdate', 'cfacshr']], left_on=['permno', 'fdate'], right_on=['permno', 'qdate'], how='left').drop(columns=['qdate_x', 'qdate_y'])
139 |     merged.loc[(merged.cfacshr_x != merged.cfacshr_y) & (
140 |         ~merged.cfacshr_x.isnull()) & (~merged.cfacshr_y.isnull()), 'is_split'] = 1
141 |     merged['is_split'].fillna(0, inplace=True)
142 |     merged['share_split'] = merged.groupby(['permno', 'mgrno', 'rdate'])[
143 |         'is_split'].transform(max)
144 |     return merged
145 | 
146 | 
147 | def construct_fundamentals(df_fund, df_names2):
148 |     return pd.merge(df_names2[['permno', 'quarter']], df_fund.rename(columns={'datadate': 'quarter'}), on=['permno', 'quarter'], how='left')[['permno', 'quarter', 'oibdpq', 'atq', 'niq',
149 |                                                                                                                                               'saleq', 'cogsq']].drop_duplicates()
150 | 
151 | 
152 | def construct_bus_segments(df_seg, df_sp500):
153 |     df_spe = expand_splist(df_sp500).rename(columns={'qdate': 'quarter'})
154 |     z = pd.merge(df_spe, df_seg.groupby(['permno', 'quarter'])['stype'].max().reset_index(
155 |     ), on=['permno', 'quarter'], how='left').sort_values(['permno', 'quarter'])
156 |     z['num_bus_seg'] = z.groupby(['permno'])['stype'].ffill().bfill()
157 |     return z[['permno', 'quarter', 'num_bus_seg']].copy()
158 | 
159 | 
160 | def expand_names(df_names, df_sp500):
161 |     x = pd.merge(expand_splist(df_sp500), df_names, on=['permno'])
162 |     return x[(x['qdate'] >= x['namedt']) & (x['qdate'] <= x['nameenddt'])].drop(columns=[
163 |         'namedt', 'nameenddt', 'st_date', 'end_date', 'final_date']).rename(columns={'qdate': 'quarter'})
164 | 
165 | 
166 | def expand_splist(df_sp):
167 |     df_sp['key'] = 0
168 |     alldates = pd.DataFrame({'qdate': pd.date_range(
169 |         '01-01-1980', pd.to_datetime('today'), freq='Q')})
170 |     alldates['key'] = 0
171 |     x = pd.merge(df_sp, alldates, on='key')
172 |     return x[(x['qdate'] >= x['start']) & (
173 |         x['qdate'] <= x['ending'])][['permno', 'qdate']]
174 | #
175 | # Return dataset with single fdate associated with each rdate
176 | # Need split data in the S-34 data
177 | #    - 24,432,318 Obs have single observation
178 | #    -  2,608,149 Obs have multiple filings with same shares (different prices)
179 | #    - 84,159 Obs have a known share split: take the first filing (before share split)
180 | #    - 44,874 Obs have no known share split: take the last filing (assume these are corrections)
181 | 
182 | 
183 | def dedup_s34(df):
184 |     # keep these fields only
185 |     data2 = data + ['share_split']
186 | 
187 |     dups = df[idx + data + ['share_split']].duplicated(subset=idx, keep=False)
188 |     dups_df = df.loc[dups, idx + data2]
189 |     dups_df['min_shares'] = dups_df.groupby(idx)['shares'].transform(min)
190 |     dups_df['max_shares'] = dups_df.groupby(idx)['shares'].transform(max)
191 |     dups_df['min_price'] = dups_df.groupby(idx)['prc'].transform(min)
192 |     dups_df['max_price'] = dups_df.groupby(idx)['prc'].transform(max)
193 | 
194 |     # These have one observation per rdate
195 |     part1 = df.loc[~dups, idx + data2]
196 |     # All fdates have the same shares
197 |     part2 = dups_df.loc[dups_df.min_shares == dups_df.max_shares,
198 |                         idx + data2].groupby(idx).last().reset_index()
199 | 
200 |     # Choosing different Fdates gives different answers -- these are more
201 |     # challenging
202 |     problems = dups_df[dups_df.min_shares != dups_df.max_shares]
203 |     problems = problems.sort_values(idx + ['fdate'])
204 | 
205 |     # If a split, take the first fdate for each rdate (usually fdate==rdate)
206 |     part3 = problems[problems.share_split == 1].groupby(idx).first()[
207 |         data2].reset_index()
208 | 
209 |     # If a not split, take the last fdate for each rdate (this is riskier)
210 |     part4 = problems[problems.share_split == 0].groupby(idx).last()[
211 |         data2].reset_index()
212 |     print("Removing duplicate Fdates within each Rdate...")
213 |     print("Observations with one fdate per rdate: ", len(part1))
214 |     print("Observations with multiple fdates but same shares: ", len(part2))
215 |     print("Observations with known split (take first): ", len(
216 |         part3), len(problems[problems.share_split == 1]))
217 |     print("Other observations (update?) (take last): ", len(
218 |         part4), len(problems[problems.share_split == 0]))
219 | 
220 |     return pd.concat([part1, part2, part3, part4]).rename(
221 |         columns={'rdate': 'quarter'})
222 | 
223 | # Merge the CRSP MSF data to the S-34 (13F) data
224 | # - Use MSF data for price and shares out when available
225 | # - Otherwise use median self-reported 13-F values
226 | # - Use shrout2 before shrout1
227 | # - Calculate the betas
228 | 
229 | 
230 | def compute_betas(df, df_msf):
231 |     print('Before 99 Missings:\n',
232 |           df[(df.quarter < '1999-01-01')][['sole',
233 |                                            'shared',
234 |                                            'no',
235 |                                            'shares']].isnull().mean())
236 |     print('After 99 Missings:\n',
237 |           df[(df.quarter > '1999-01-01')][['sole',
238 |                                            'shared',
239 |                                            'no',
240 |                                            'shares']].isnull().mean())
241 |     df.loc[:, ['no', 'sole', 'shared']].fillna(0, inplace=True)
242 | 
243 |     y = pd.merge(df, df_msf[['permno', 'qdate', 'prc', 'shrout']], left_on=[
244 |                  'permno', 'quarter'], right_on=['permno', 'qdate'], how='left')
245 |     y.loc[:, ['shrout1', 'shrout2', 'shrout']] = y[[
246 |         'shrout1', 'shrout2', 'shrout']].replace(0, np.nan)
247 | 
248 |     y['med_price'] = y.groupby(['permno', 'quarter'])[
249 |         'prc_x'].transform(np.median)
250 |     y['med_shares'] = y.groupby(['permno', 'quarter'])['shrout2'].transform(np.median).combine_first(
251 |         1e3 * y.groupby(['permno', 'quarter'])['shrout1'].transform(np.median))
252 |     y[['shared', 'no', 'sole']] = y[['shared', 'no', 'sole']].fillna(0)
253 | 
254 |     y['price'] = y['prc_y'].combine_first(y.med_price)
255 |     y['shares_outstanding'] = y['shrout'].combine_first(y.med_shares)
256 |     y = alt_betas(y)
257 |     return y[['permno', 'mgrno', 'quarter', 'shares', 'shares_outstanding',
258 |               'price', 'beta', 'beta_sole', 'beta_soleshared', 'sole', 'shared', 'no']]
259 | 
260 | 
261 | def process_scraped(fn_scrape, fn_big4):
262 |     df = pd.read_parquet(fn_scrape)
263 |     df['quarter'] = pd.to_datetime(df.rdate, format='%Y%m%d')
264 |     df = df.rename(
265 |         columns={
266 |             'prc': 'price',
267 |             'none': 'no'}).drop(
268 |         columns=['rdate'])
269 |     return alt_betas(pd.merge(df, pd.read_csv(
270 |         fn_big4), how='left', on=['mgrno']))
271 | 
272 | # Compute the betas : shares / 1000 x Shares Outstanding
273 | # Compute sole+shared and sole as well (only valid post 99)
274 | def alt_betas(y):
275 |     y['beta'] = y['shares'] / (1e3 * y['shares_outstanding'])
276 |     y['beta_soleshared'] = (y['shares'] - y['no']) / \
277 |         (1e3 * y['shares_outstanding'])
278 |     y['beta_sole'] = (y['shares'] - y['no'] - y['shared']) / \
279 |         (1e3 * y['shares_outstanding'])
280 |     return y
281 | 
282 | # Combine betas
283 | def combine_betas(df, dfs, cut_date='2000-01-01'):
284 |     cols = df.columns
285 |     return pd.concat([df[df.quarter <= cut_date], dfs.loc[dfs.quarter >
286 |                                                           cut_date, cols]], axis=0, ignore_index=True)
287 | 


--------------------------------------------------------------------------------
/code/wrds_downloads.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from utilities.date_util import lookup_dates, end_quarter
  4 | 
  5 | # Download the CRSP Names file
  6 | # - this links the permno to gvkey (COMPUSTAT) and CUSIP
  7 | # - Fix the date ending only for last date within the group
  8 | 
  9 | 
 10 | def get_names(db):
 11 |     return fix_ending_dates(clean_wrds(db.get_table(
 12 |         'crsp', 'stocknames')), 'nameenddt', ['permno'])
 13 | 
 14 | # Get the Compustat-CRSP Link table
 15 | # - fix the dates and use todays date as empty end_date
 16 | # - filter on permnos
 17 | 
 18 | 
 19 | def get_crosswalk(db, permno_list):
 20 |     crosswalk = clean_wrds(db.get_table('crsp', 'Ccmxpf_linktable'))
 21 |     crosswalk = clean_wrds(crosswalk[~(
 22 |         crosswalk.linkenddt < '1980-01-01') & crosswalk.lpermno.isin(permno_list)])
 23 |     crosswalk['linkenddt'].fillna(pd.Timestamp("today").date(), inplace=True)
 24 |     return crosswalk
 25 | 
 26 | # DB Queries
 27 | 
 28 | # Get the Compustat Fundamentals
 29 | # Match to names file to get permno instead of gvkey
 30 | # Make sure they are unique observations by permno,quarter (this is a pain)
 31 | 
 32 | 
 33 | def get_fundamentals(db, crosswalk):
 34 |     fields = [
 35 |         'gvkey',
 36 |         'datadate',
 37 |         'fyearq',
 38 |         'fqtr',
 39 |         'fyr',
 40 |         'datafqtr',
 41 |         'indfmt',
 42 |         'cusip',
 43 |         'oibdpq',
 44 |         'atq',
 45 |         'niq',
 46 |         'saleq',
 47 |         'cogsq']
 48 |     query = "select " + \
 49 |         ', '.join(fields) + " from comp.fundq where fyearq> 1979 and gvkey in %s" % repr(
 50 |             tuple(crosswalk.gvkey.unique()))
 51 |     df_fundq = clean_wrds(db.raw_sql(query)).sort_values(['gvkey', 'datafqtr'])
 52 |     # remove duplicates by taking last datafqtr within each gvkey-quarter
 53 |     # note: this is rare and only happens when fiscal year changes, taking
 54 |     # first has no effect
 55 |     df_fundq2 = df_fundq.groupby(['gvkey', 'datadate']).last().reset_index()
 56 | 
 57 |     # merge in the gvkey-permno crosswalk
 58 |     x = pd.merge(df_fundq2,
 59 |                  crosswalk[['gvkey',
 60 |                             'lpermno',
 61 |                             'linkdt',
 62 |                             'linkenddt']].drop_duplicates(),
 63 |                  on='gvkey').rename(columns={'lpermno': 'permno'})
 64 |     y = x[(x.datadate >= x.linkdt) & (x.datadate <= x.linkenddt)].copy()
 65 |     return clean_wrds(y.sort_values('linkenddt').groupby(
 66 |         ['permno', 'datadate']).last().reset_index()[fields + ['permno']])
 67 | 
 68 | # Download the MSF file from CRSP
 69 | # - convert to quarterly data by taking last observation
 70 | 
 71 | 
 72 | def get_msf(db, permno_list, trim=False):
 73 |     fields = [
 74 |         'cusip',
 75 |         'permno',
 76 |         'hsiccd',
 77 |         'date',
 78 |         'prc',
 79 |         'altprc',
 80 |         'shrout',
 81 |         'altprcdt',
 82 |         'cfacshr']
 83 |     query = "select " + \
 84 |         ', '.join(
 85 |             fields) + " from crsp.msf where date > '1979-12-31' and permno in %s" % repr(tuple(permno_list))
 86 |     df_msf = clean_wrds(db.raw_sql(query))
 87 |     df_msf2 = convert_to_quarter(df_msf, 'date', ['cusip', 'permno'])
 88 |     if trim:
 89 |         # Trim the MSF data for only dates and permnos in the S&P at the time
 90 |         df_msf3 = pd.merge(df_msf2, df_sp500, on='permno')
 91 |         return df_msf3[(df_msf3['date'] >= df_msf3['start']) &
 92 |                        (df_msf3['date'] <= df_msf3['ending'])]
 93 |     else:
 94 |         return df_msf2
 95 | 
 96 | # Download the short interest file from COMPUSTAT
 97 | # - Merge in the crosswalk to get permnos
 98 | # - Filter on time after merge to get correct crosswalk info
 99 | 
100 | 
101 | def get_short_interest(db, crosswalk):
102 |     short_int = clean_wrds(db.get_table('comp', 'sec_shortint'))
103 |     short_int2 = pd.merge(short_int, crosswalk, on=['gvkey'], how='left')
104 |     short_int3 = short_int2[(short_int2.datadate <= short_int2.linkenddt) & (
105 |         short_int2.datadate >= short_int2.linkdt)].copy()
106 |     return convert_to_quarter(short_int3, 'datadate', ['lpermno'])[
107 |         ['lpermno', 'lpermco', 'qdate', 'gvkey', 'iid', 'shortint', 'shortintadj', 'datadate', 'splitadjdate']]
108 | 
109 | # Download the S-34 Dataset
110 | 
111 | 
112 | def get_s34(db, cusip_list):
113 |     fields = [
114 |         'fdate',
115 |         'mgrname',
116 |         'mgrno',
117 |         'rdate',
118 |         'cusip',
119 |         'shares',
120 |         'sole',
121 |         'shared',
122 |         'no',
123 |         'stkname',
124 |         'ticker',
125 |         'indcode',
126 |         'prc',
127 |         'shrout1',
128 |         'shrout2']
129 |     fields_str = ', '.join(fields)
130 |     query = "select " + fields_str + \
131 |         " from tfn.s34 where rdate > '1979-12-31' and cusip in %s" % repr(
132 |             tuple(map(str, cusip_list)))
133 |     return clean_wrds(db.raw_sql(query))
134 | 
135 | # Download the business segments
136 | #  - merge against crosswalk to get Permno's
137 | #  - only need count of observations (number of segments)
138 | #  - coverage is not great
139 | 
140 | 
141 | def get_segments(db, crosswalk):
142 |     fields = [
143 |         'gvkey',
144 |         'stype',
145 |         'datadate',
146 |         'naicss1',
147 |         'naicss2',
148 |         'naicss3',
149 |         'sics1',
150 |         'sics2',
151 |         'sics3']
152 |     query = "select " + \
153 |         ', '.join(fields) + \
154 |         " from comp_segments_hist.wrds_segmerged where stype ='BUSSEG'"
155 |     df = db.raw_sql(query)
156 |     df['datadate'] = pd.to_datetime(df['datadate'])
157 |     df = df.groupby(['gvkey', 'datadate']).count()['stype'].reset_index()
158 |     df['quarter'] = df['datadate'].apply(end_quarter)
159 |     # these should be unique within the quarter
160 |     df.groupby(['gvkey', 'quarter'])['stype'].last().reset_index()
161 |     x = pd.merge(df,
162 |                  crosswalk[['gvkey',
163 |                             'lpermno',
164 |                             'linkdt',
165 |                             'linkenddt']],
166 |                  on='gvkey').rename(columns={'lpermno': 'permno'})
167 |     return clean_wrds(x[(x.datadate >= x.linkdt) & (x.quarter <= x.linkenddt)].copy())[
168 |         ['permno', 'quarter', 'datadate', 'stype']]
169 | 
170 | 
171 | # Generic cleaning function
172 | # -adjusts dates to pandas format
173 | # -adjusts integers to correct format
174 | def clean_wrds(df):
175 |     col_list = df.iloc[0:1].select_dtypes(exclude=[np.datetime64]).columns
176 |     int_cols = ['permno', 'hsiccd', 'siccd', 'permco', 'shares', 'mgrno']
177 |     date_cols = [
178 |         'start',
179 |         'ending',
180 |         'namedt',
181 |         'nameenddt',
182 |         'st_date',
183 |         'end_date',
184 |         'date',
185 |         'altprcdt',
186 |         'fdate',
187 |         'rdate',
188 |         'linkdt',
189 |         'linkenddt',
190 |         'datadate',
191 |         'splitadjdate']
192 |     my_intcols = [x for x in col_list if x in int_cols]
193 |     my_datecols = [x for x in col_list if x in date_cols]
194 | 
195 |     if my_intcols:
196 |         df.loc[:, my_intcols] = df.loc[:, my_intcols].astype(int)
197 |     if date_cols:
198 |         df.loc[:, my_datecols] = df.loc[:, my_datecols].apply(
199 |             lookup_dates, axis=0)
200 |     return df
201 | 
202 | 
203 | # Construct end of quarter date and take the last observation with group_id
204 | def convert_to_quarter(df, date_name, group_ids):
205 |     df.sort_values(group_ids + [date_name])
206 |     df['qdate'] = df[date_name] - \
207 |         pd.tseries.offsets.DateOffset(days=1) + pd.tseries.offsets.QuarterEnd()
208 |     return df.groupby(group_ids + ['qdate']).last().reset_index()
209 | 
210 | # Adjusts date_field to correspond to end of quarter
211 | # - Within a group_list round the date_field to the last date within the corresponding quarter
212 | # (do this only for the final date within group)
213 | 
214 | 
215 | def fix_ending_dates(df, date_field, group_list):
216 |     df['final_date'] = df.groupby(group_list)[date_field].transform('last')
217 |     df.loc[df[date_field] == df.final_date, date_field] = df.loc[df[date_field] == df.final_date,
218 |                                                                  date_field] - pd.tseries.offsets.DateOffset(days=1) + pd.tseries.offsets.QuarterEnd()
219 |     return df
220 | 


--------------------------------------------------------------------------------
/data/checks/.keep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chrisconlon/CommonOwnerReplication/c430d4c174bed9e2d4a01cec1582ce7841781c0f/data/checks/.keep


--------------------------------------------------------------------------------
/data/derived/.keep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chrisconlon/CommonOwnerReplication/c430d4c174bed9e2d4a01cec1582ce7841781c0f/data/derived/.keep


--------------------------------------------------------------------------------
/data/public/.gitattributes:
--------------------------------------------------------------------------------
1 | out_scrape.parquet filter=lfs diff=lfs merge=lfs -text
2 | 


--------------------------------------------------------------------------------
/data/public/.keep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chrisconlon/CommonOwnerReplication/c430d4c174bed9e2d4a01cec1582ce7841781c0f/data/public/.keep


--------------------------------------------------------------------------------
/data/public/DLE_markups_fig_v2.csv:
--------------------------------------------------------------------------------
 1 | 1955,1.270532
 2 | 1956,1.253736
 3 | 1957,1.24945
 4 | 1958,1.251073
 5 | 1959,1.268111
 6 | 1960,1.251219
 7 | 1961,1.292389
 8 | 1962,1.30727
 9 | 1963,1.326792
10 | 1964,1.341484
11 | 1965,1.342777
12 | 1966,1.337394
13 | 1967,1.332563
14 | 1968,1.338048
15 | 1969,1.342243
16 | 1970,1.331192
17 | 1971,1.327665
18 | 1972,1.32232
19 | 1973,1.317865
20 | 1974,1.288649
21 | 1975,1.275229
22 | 1976,1.269911
23 | 1977,1.262357
24 | 1978,1.269142
25 | 1979,1.234792
26 | 1980,1.210774
27 | 1981,1.207805
28 | 1982,1.226315
29 | 1983,1.248345
30 | 1984,1.25991
31 | 1985,1.272463
32 | 1986,1.312284
33 | 1987,1.32418
34 | 1988,1.369869
35 | 1989,1.369207
36 | 1990,1.369593
37 | 1991,1.376616
38 | 1992,1.393396
39 | 1993,1.383539
40 | 1994,1.36621
41 | 1995,1.381385
42 | 1996,1.406225
43 | 1997,1.418455
44 | 1998,1.440481
45 | 1999,1.464898
46 | 2000,1.47598
47 | 2001,1.449807
48 | 2002,1.448455
49 | 2003,1.468176
50 | 2004,1.465078
51 | 2005,1.467366
52 | 2006,1.45446
53 | 2007,1.464368
54 | 2008,1.444562
55 | 2009,1.474613
56 | 2010,1.488541
57 | 2011,1.462762
58 | 2012,1.462578
59 | 2013,1.476454
60 | 2014,1.485026
61 | 2015,1.522256
62 | 2016,1.606264
63 | 


--------------------------------------------------------------------------------
/data/public/airlines.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chrisconlon/CommonOwnerReplication/c430d4c174bed9e2d4a01cec1582ce7841781c0f/data/public/airlines.parquet


--------------------------------------------------------------------------------
/data/public/big4.csv:
--------------------------------------------------------------------------------
 1 | InvestorName,mgrno
 2 | BlackRock,312069
 3 | BlackRock,1003283
 4 | BlackRock,1013231
 5 | BlackRock,1139734
 6 | Fidelity,315066
 7 | Fidelity,27800
 8 | State Street,93751
 9 | State Street,924355
10 | State Street,81540
11 | State Street,81575
12 | State Street,5960
13 | State Street,22721
14 | Vanguard,102909
15 | Vanguard,90457
16 | BlackRock,9385
17 | BlackRock,39539
18 | BlackRock,91430
19 | BlackRock,56790
20 | BlackRock,11386
21 | BlackRock,12588
22 | BlackRock,7900
23 | BlackRock,92040
24 | BlackRock,92050
25 | BlackRock,7905


--------------------------------------------------------------------------------
/data/public/cereal.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chrisconlon/CommonOwnerReplication/c430d4c174bed9e2d4a01cec1582ce7841781c0f/data/public/cereal.parquet


--------------------------------------------------------------------------------
/data/public/manager_consolidations.csv:
--------------------------------------------------------------------------------
1 | ﻿mgrno_from,mgrno_to
2 | 9385,9385
3 | 11386,9385
4 | 12588,9385
5 | 39539,9385
6 | 56790,9385
7 | 81575,9385
8 | 91430,9385


--------------------------------------------------------------------------------
/data/public/out_scrape.parquet:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:84bd97430b97326e229f6ae50b58522ab98d402d1fbb59448b6812c8b83305c6
3 | size 323182551
4 | 


--------------------------------------------------------------------------------
/data/public/permno_drops.csv:
--------------------------------------------------------------------------------
 1 | stkname,permno,start,end,reason
 2 | A. O. SMITH CORP,65402,3/31/1900,12/31/2100,Dual-Class Shares
 3 | ALPHABET INC,90319,3/31/1900,12/31/2100,Dual-Class Shares
 4 | AMERICAN FAMILY CORP,57904,3/31/1900,12/31/2100,Dual-Class Shares
 5 | BERKSHIRE HATHAWAY INC,83443,3/31/1900,12/31/2100,Dual-Class Shares
 6 | BERKSHIRE HATHAWAY INC CL A,17778,3/31/1900,12/31/2100,Dual-Class Shares
 7 | BROADCOM CORP,85963,3/31/1900,12/31/2100,Dual-Class Shares
 8 | BROWN FORMAN CORP CL A,29938,3/31/1900,12/31/2100,Dual-Class Shares
 9 | BROWN FORMAN DISTILLERS CL B,29946,3/31/1900,12/31/2100,Dual-Class Shares
10 | CANADAIGUA WINE INC CL B,64899,3/31/1900,12/31/2100,Dual-Class Shares
11 | CBS CORP,75104,3/31/1900,12/31/2100,Dual-Class Shares
12 | CBS CORP NEW,76226,3/31/1900,12/31/2100,Dual-Class Shares
13 | CBS INC,20730,3/31/1900,12/31/2100,Dual-Class Shares
14 | COCA COLA BOTTLING CO N Y,27510,3/31/1900,12/31/2100,Dual-Class Shares
15 | COCA COLA ENTERPRISES INC NE,70500,3/31/1900,12/31/2100,Dual-Class Shares
16 | COCA-COLA CO,11308,3/31/1900,12/31/2100,Dual-Class Shares
17 | COMCAST CORP,89525,3/31/1900,12/31/2100,Dual-Class Shares
18 | COMCAST CORP CL A,25022,3/31/1900,12/31/2100,Dual-Class Shares
19 | COMCAST CORP CL A SPL,11997,3/31/1900,12/31/2100,Dual-Class Shares
20 | COMCAST CORP NEW,89565,3/31/1900,12/31/2100,Dual-Class Shares
21 | CONSTELLATION BRANDS  INC,69796,3/31/1900,12/31/2100,Dual-Class Shares
22 | COORS ADOLPH CO CL B,59248,3/31/1900,12/31/2100,Dual-Class Shares
23 | DELL COMPUTER CORP,11081,3/31/1900,12/31/2100,Dual-Class Shares
24 | DELL TECHNOLOGIES INC,16267,3/31/1900,12/31/2100,Dual-Class Shares
25 | DISCOVERY COMMUNICATNS NEW,90805,3/31/1900,12/31/2100,Dual-Class Shares
26 | E W SCRIPPS CO,84176,3/31/1900,12/31/2100,Dual-Class Shares
27 | ECHOSTAR COMMUN CORP,81696,3/31/1900,12/31/2100,Dual-Class Shares
28 | EXPEDIA INC DEL,90808,3/31/1900,12/31/2100,Dual-Class Shares
29 | FACEBOOK INC,13407,3/31/1900,12/31/2100,Dual-Class Shares
30 | FEDERATED INVESTORS INC,86102,3/31/1900,12/31/2100,Dual-Class Shares
31 | FEDERATED INVS INC,34527,3/31/1900,12/31/2100,Dual-Class Shares
32 | FIRST DATA CORP,77546,3/31/1900,12/31/2100,Dual-Class Shares
33 | FORD MTR CO DEL,25785,3/31/1900,12/31/2100,Dual-Class Shares
34 | GOOGLE INC,14542,3/31/1900,12/31/2100,Dual-Class Shares
35 | HERSHEY CO,1660,3/31/1900,12/31/2100,Dual-Class Shares
36 | J. M. SMUCKER CO,42585,3/31/1900,12/31/2100,Dual-Class Shares
37 | LAUDER ESTEE COS INC,82642,3/31/1900,12/31/2100,Dual-Class Shares
38 | LENNAR CORP,52708,3/31/1900,12/31/2100,Dual-Class Shares
39 | LYONDELLBASELL INDUSTRIES N V,12345,3/31/1900,12/31/2100,Dual-Class Shares
40 | MCCORMICK & CO INC COM,89155,3/31/1900,12/31/2100,Dual-Class Shares
41 | MCCORMICK & CO INC N V,52090,3/31/1900,12/31/2100,Dual-Class Shares
42 | MEREDITH CORP,42796,3/31/1900,12/31/2100,Dual-Class Shares
43 | NACCO INDS INC,28118,3/31/1900,12/31/2100,Dual-Class Shares
44 | NEW NEWSCORP INC,13963,3/31/1900,12/31/2100,Dual-Class Shares
45 | NEW YORK TIMES CO,47466,3/31/1900,12/31/2100,Dual-Class Shares
46 | NEWS CORP LTD,69593,3/31/1900,12/31/2100,Dual-Class Shares
47 | NIKE INC,57665,3/31/1900,12/31/2100,Dual-Class Shares
48 | POLO RALPH LAUREN CORP,85072,3/31/1900,12/31/2100,Dual-Class Shares
49 | REGENERON PHARMACEUTCL,76614,3/31/1900,12/31/2100,Dual-Class Shares
50 | SCRIPPS  E W CO,11936,3/31/1900,12/31/2100,Dual-Class Shares
51 | SCRIPPS NETWORKS INTERACT IN,92709,3/31/1900,12/31/2100,Dual-Class Shares
52 | SMITH A O CORP CL A,19852,3/31/1900,12/31/2100,Dual-Class Shares
53 | SMUCKER J M CO CL B,77058,3/31/1900,12/31/2100,Dual-Class Shares
54 | SYNOVUS FINL CORP,20053,3/31/1900,12/31/2100,Dual-Class Shares
55 | THE HERSHEY CO,16600,3/31/1900,12/31/2100,Dual-Class Shares
56 | TRIBUNE CO NEW,65787,3/31/1900,12/31/2100,Dual-Class Shares
57 | TRIBUNE MEDIA CO,15117,3/31/1900,12/31/2100,Dual-Class Shares
58 | TRIPADVISOR INC,13168,3/31/1900,12/31/2100,Dual-Class Shares
59 | TWENTY-FIRST CENTURY FOX INC,90441,3/31/1900,12/31/2100,Dual-Class Shares
60 | TYSON FOODS  INC,77730,3/31/1900,12/31/2100,Dual-Class Shares
61 | UNDER ARMOUR INC,90979,3/31/1900,12/31/2100,Dual-Class Shares
62 | UNITED PARCEL SERVICE INC,87447,3/31/1900,12/31/2100,Dual-Class Shares
63 | UNIVERSAL HEALTH SERVICES IN,79637,3/31/1900,12/31/2100,Dual-Class Shares
64 | USA INTERACTIVE,78840,3/31/1900,12/31/2100,Dual-Class Shares
65 | VIACOM INC NEW,91063,3/31/1900,12/31/2100,Dual-Class Shares
66 | VISA INC,92611,3/31/1900,12/31/2100,Dual-Class Shares
67 | WASHINGTON POST CO,53225,3/31/1900,12/31/2100,Dual-Class Shares
68 | WESTINGHOUSE ELEC CORP,15368,3/31/1900,12/31/2100,Dual-Class Shares
69 | CHRYSLER CORP,11260,3/31/1979,12/31/1983,Bailout
70 | GENERAL MOTORS CO,12369,12/31/2009,12/31/2013,Bailout
71 | AMERICAN INTERNATIONAL GROUP IN,66800,3/31/2008,12/31/2012,Bailout
72 | WALMART INC,55976,3/31/1900,12/31/2100,Controlling Interest
73 | TEXAS GAS TRANSMISSION CORP,26788,3/31/1900,12/31/2100,Controlling Interest
74 | DIAMOND OFFSHORE DRILLING INC,82298,3/31/1900,12/31/2100,Controlling Interest
75 | UNILEVER N V,28310,3/31/1900,12/31/2100,ADR
76 | PULTE HOMES INC,54148,12/31/1987,3/31/1988,Unknown Data Issue
77 | VISTEON CORP,88319,3/31/1900,6/30/2000,First year of spinoff problematic
78 | IDEAL BASIC INDUSTRIES,26286,3/31/1900,12/31/2100,Controlling Interest
79 | R J R NABISCO,14218,6/30/1986,6/30/1988,Unknown Data Issue
80 | CONOCO INC,11471,3/31/1900,6/30/1981,Controlling Interest
81 | BROWN COMPANY,29911,3/31/1900,12/31/2100,Controlling Interest
82 | C N A FINANCIAL CORP,47626,3/31/1900,12/31/2100,Controlling Interest
83 | SEARS HOLDING CORP,89757,3/31/1900,12/31/2100,Controlling Interest
84 | PEABODY ENERGY CORP,88991,3/31/1900,12/31/2100,Controlling Interest


--------------------------------------------------------------------------------
/data/wrds/.keep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chrisconlon/CommonOwnerReplication/c430d4c174bed9e2d4a01cec1582ce7841781c0f/data/wrds/.keep


--------------------------------------------------------------------------------
/figures/.keep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chrisconlon/CommonOwnerReplication/c430d4c174bed9e2d4a01cec1582ce7841781c0f/figures/.keep


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy>=1.17.5
 2 | pandas>=1.0.4
 3 | matplotlib>=3.2.1
 4 | pyarrow>=1.0.0
 5 | brotli>=1.0.7
 6 | seaborn>=0.10.1
 7 | wrds>=3.0.8
 8 | scikit-learn>=0.23.1
 9 | pyhdfe>=0.1.0
10 | pyblp>=0.10.0
11 | xlsxwriter==1.2.9
12 | statsmodels>=0.11.1


--------------------------------------------------------------------------------
/run_all.bat:
--------------------------------------------------------------------------------
 1 | 
 2 | set -e
 3 | 
 4 | rem Install Packages
 5 | pip install -r requirements.txt
 6 | 
 7 | rem If you are in main directory with run_all.sh
 8 | rem you will need to go to code to run everything
 9 | cd code
10 | 
11 | 
12 | #rem Python block
13 | rem data generating block
14 | 
15 | python 1_Download_WRDS_Data.py
16 | python 2_Process_WRDS_Data.py 
17 | python 3_Calculate_Kappas.py 
18 | 
19 | rem plot creating block
20 | 
21 | python plots1_basic_descriptives.py
22 | 
23 | python plots2_kappa_official.py
24 | 
25 | python plots3_big_three_four.py
26 | 
27 | python plots4_investor_similarity.py
28 | 
29 | python plots5_airlines_cereal.py
30 | 
31 | python plots6_sole_vs_shared.py
32 | 
33 | python plots7_short_interest_coverage.py
34 | 
35 | python plots8_individual_firm_coverage.py
36 | 
37 | python plots9_blackrock_vanguard.py
38 | 
39 | python plots10_kappa_comparison_appendix.py
40 | 
41 | python plots11_profit_simulations.py
42 | 
43 | rem table creating block
44 | python table3_variance_decomp.py
45 | 
46 | python table4_kappa_correlation.py
47 | 
48 | 


--------------------------------------------------------------------------------
/run_all.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -e
 3 | 
 4 | # Install Packages
 5 | pip install -r requirements.txt
 6 | 
 7 | ## If you are in main directory with run_all.sh
 8 | # you will need to go to code to run everything
 9 | cd code
10 | 
11 | 
12 | ## Python block
13 | # data generating block
14 | 
15 | python 1_Download_WRDS_Data.py
16 | python 2_Process_WRDS_Data.py 
17 | python 3_Calculate_Kappas.py 
18 | 
19 | # plot creating block
20 | 
21 | python plots1_basic_descriptives.py
22 | 
23 | python plots2_kappa_official.py
24 | 
25 | python plots3_big_three_four.py
26 | 
27 | python plots4_investor_similarity.py
28 | 
29 | python plots5_airlines_cereal.py
30 | 
31 | python plots6_sole_vs_shared.py
32 | 
33 | python plots7_short_interest_coverage.py
34 | 
35 | python plots8_individual_firm_coverage.py
36 | 
37 | python plots9_blackrock_vanguard.py
38 | 
39 | python plots10_kappa_comparison_appendix.py
40 | 
41 | python plots11_profit_simulations.py
42 | 
43 | # table creating block
44 | python table3_variance_decomp.py
45 | 
46 | python table4_kappa_correlation.py
47 | 
48 | 


--------------------------------------------------------------------------------
/tables/.keep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chrisconlon/CommonOwnerReplication/c430d4c174bed9e2d4a01cec1582ce7841781c0f/tables/.keep


--------------------------------------------------------------------------------
/wrds_constituents.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chrisconlon/CommonOwnerReplication/c430d4c174bed9e2d4a01cec1582ce7841781c0f/wrds_constituents.pdf


--------------------------------------------------------------------------------