├── .gitattributes ├── .github ├── ISSUE_TEMPLATE.md └── workflows │ └── main.yml ├── .gitignore ├── LICENSE ├── MANIFEST.in ├── README.rst ├── alphalens ├── __init__.py ├── _version.py ├── examples │ ├── .gitattributes │ ├── alphalens_tutorial_on_quantopian.ipynb │ ├── daily_factor_synthetic_data.ipynb │ ├── event_study.ipynb │ ├── event_study_synthetic_data.ipynb │ ├── ic_tear.png │ ├── intraday_factor.ipynb │ ├── intraday_factor_synthetic_data.ipynb │ ├── predictive_vs_non-predictive_factor.ipynb │ ├── pyfolio_integration.ipynb │ ├── returns_tear.png │ ├── sector_tear.png │ ├── table_tear.png │ └── tear_sheet_walk_through.ipynb ├── performance.py ├── plotting.py ├── tears.py ├── tests │ ├── __init__.py │ ├── matplotlibrc │ ├── test_performance.py │ ├── test_tears.py │ └── test_utils.py └── utils.py ├── build_and_deploy_docs.sh ├── docs ├── Makefile ├── make.bat └── source │ ├── alphalens.rst │ ├── alphalens.tests.rst │ ├── conf.py │ ├── index.rst │ └── modules.rst ├── setup.cfg ├── setup.py ├── tox.ini └── versioneer.py /.gitattributes: -------------------------------------------------------------------------------- 1 | alphalens/_version.py export-subst 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Problem Description 2 | 3 | **Please provide a minimal, self-contained, and reproducible example:** 4 | ```python 5 | [Paste code here] 6 | ``` 7 | 8 | **Please provide the full traceback:** 9 | ```python 10 | [Paste traceback here] 11 | ``` 12 | 13 | **Please provide any additional information below:** 14 | 15 | 16 | ## Versions 17 | 18 | * Alphalens version: 19 | * Python version: 20 | * Pandas version: 21 | * Matplotlib version: 22 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | 11 | jobs: 12 | build: 13 | 14 | runs-on: ${{ matrix.os }} 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | os: [ubuntu-latest] 19 | python-version: [2.7, 3.5, 3.7, 3.8] 20 | include: 21 | - python-version: 2.7 22 | pandas: 0.18.1 23 | numpy: 1.11.3 24 | scipy: 0.17.1 25 | statsmodels: 0.6.1 26 | - python-version: 3.5 27 | pandas: 0.18.1 28 | numpy: 1.11.3 29 | scipy: 0.17.1 30 | statsmodels: 0.6.1 31 | - python-version: 3.7 32 | pandas: 1.0.1 33 | numpy: 1.18.1 34 | scipy: 1.4.1 35 | statsmodels: 0.11.1 36 | - python-version: 3.8 37 | pandas: 1.0.1 38 | numpy: 1.18.1 39 | scipy: 1.4.1 40 | statsmodels: 0.11.1 41 | 42 | steps: 43 | - uses: actions/checkout@v1 44 | - name: Set up Python ${{ matrix.python-version }} 45 | uses: actions/setup-python@v1 46 | with: 47 | python-version: ${{ matrix.python-version }} 48 | - name: Install dependencies 49 | env: 50 | PYTHONWARNINGS: ignore:DEPRECATION::pip._internal.cli.base_command 51 | run: | 52 | python -m pip install --upgrade pip 53 | pip install numpy==${{ matrix.numpy }} 54 | pip install pandas==${{ matrix.pandas }} scipy==${{ matrix.scipy }} statsmodels==${{ matrix.statsmodels}} 55 | pip install -e .[test] 56 | - name: Lint with flake8 57 | run: | 58 | flake8 59 | - name: Test with nose 60 | run: | 61 | MATPLOTLIBRC=alphalens/tests/matplotlibrc nosetests alphalens/tests 62 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | *.xml 3 | 4 | *.iml 5 | 6 | *.pyc 7 | 8 | build/ 9 | docs/build/ 10 | .ipynb_checkpoints 11 | 12 | # Tox puts virtualenvs here by default. 13 | .tox/ 14 | 15 | # coverage.py outputs. 16 | cover 17 | .coverage 18 | 19 | # Intermediate outputs from building distributions for PyPI. 20 | dist 21 | *.egg-info/ 22 | 23 | # Emacs temp files. 24 | *~ 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2018 Quantopian, Inc. 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include versioneer.py 2 | include alphalens/_version.py 3 | include LICENSE 4 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | .. image:: https://media.quantopian.com/logos/open_source/alphalens-logo-03.png 2 | :align: center 3 | 4 | Alphalens 5 | ========= 6 | .. image:: https://github.com/quantopian/alphalens/workflows/CI/badge.svg 7 | :alt: GitHub Actions status 8 | :target: https://github.com/quantopian/alphalens/actions?query=workflow%3ACI+branch%3Amaster 9 | 10 | Alphalens is a Python Library for performance analysis of predictive 11 | (alpha) stock factors. Alphalens works great with the 12 | `Zipline `__ open source backtesting library, and 13 | `Pyfolio `__ which provides 14 | performance and risk analysis of financial portfolios. You can try Alphalens 15 | at `Quantopian `_ -- a free, 16 | community-centered, hosted platform for researching and testing alpha ideas. 17 | Quantopian also offers a `fully managed service for professionals `_ 18 | that includes Zipline, Alphalens, Pyfolio, FactSet data, and more. 19 | 20 | The main function of Alphalens is to surface the most relevant statistics 21 | and plots about an alpha factor, including: 22 | 23 | - Returns Analysis 24 | - Information Coefficient Analysis 25 | - Turnover Analysis 26 | - Grouped Analysis 27 | 28 | Getting started 29 | --------------- 30 | 31 | With a signal and pricing data creating a factor "tear sheet" is a two step process: 32 | 33 | .. code:: python 34 | 35 | import alphalens 36 | 37 | # Ingest and format data 38 | factor_data = alphalens.utils.get_clean_factor_and_forward_returns(my_factor, 39 | pricing, 40 | quantiles=5, 41 | groupby=ticker_sector, 42 | groupby_labels=sector_names) 43 | 44 | # Run analysis 45 | alphalens.tears.create_full_tear_sheet(factor_data) 46 | 47 | 48 | Learn more 49 | ---------- 50 | 51 | Check out the `example notebooks `__ for more on how to read and use 52 | the factor tear sheet. A good starting point could be `this `__ 53 | 54 | Installation 55 | ------------ 56 | 57 | Install with pip: 58 | 59 | :: 60 | 61 | pip install alphalens 62 | 63 | Install with conda: 64 | 65 | :: 66 | 67 | conda install -c conda-forge alphalens 68 | 69 | Install from the master branch of Alphalens repository (development code): 70 | 71 | :: 72 | 73 | pip install git+https://github.com/quantopian/alphalens 74 | 75 | Alphalens depends on: 76 | 77 | - `matplotlib `__ 78 | - `numpy `__ 79 | - `pandas `__ 80 | - `scipy `__ 81 | - `seaborn `__ 82 | - `statsmodels `__ 83 | 84 | Usage 85 | ----- 86 | 87 | A good way to get started is to run the examples in a `Jupyter 88 | notebook `__. 89 | 90 | To get set up with an example, you can: 91 | 92 | Run a Jupyter notebook server via: 93 | 94 | .. code:: bash 95 | 96 | jupyter notebook 97 | 98 | From the notebook list page(usually found at 99 | ``http://localhost:8888/``), navigate over to the examples directory, 100 | and open any file with a .ipynb extension. 101 | 102 | Execute the code in a notebook cell by clicking on it and hitting 103 | Shift+Enter. 104 | 105 | Questions? 106 | ---------- 107 | 108 | If you find a bug, feel free to open an issue on our `github 109 | tracker `__. 110 | 111 | Contribute 112 | ---------- 113 | 114 | If you want to contribute, a great place to start would be the 115 | `help-wanted 116 | issues `__. 117 | 118 | Credits 119 | ------- 120 | 121 | - `Andrew Campbell `__ 122 | - `James Christopher `__ 123 | - `Thomas Wiecki `__ 124 | - `Jonathan Larkin `__ 125 | - Jessica Stauth (jstauth@quantopian.com) 126 | - `Taso Petridis `_ 127 | 128 | For a full list of contributors see the `contributors page. `_ 129 | 130 | Example Tear Sheet 131 | ------------------ 132 | 133 | Example factor courtesy of `ExtractAlpha `_ 134 | 135 | .. image:: https://github.com/quantopian/alphalens/raw/master/alphalens/examples/table_tear.png 136 | .. image:: https://github.com/quantopian/alphalens/raw/master/alphalens/examples/returns_tear.png 137 | .. image:: https://github.com/quantopian/alphalens/raw/master/alphalens/examples/ic_tear.png 138 | .. image:: https://github.com/quantopian/alphalens/raw/master/alphalens/examples/sector_tear.png 139 | :alt: 140 | -------------------------------------------------------------------------------- /alphalens/__init__.py: -------------------------------------------------------------------------------- 1 | from . import performance 2 | from . import plotting 3 | from . import tears 4 | from . import utils 5 | 6 | from ._version import get_versions 7 | 8 | 9 | __version__ = get_versions()['version'] 10 | del get_versions 11 | 12 | __all__ = ['performance', 'plotting', 'tears', 'utils'] 13 | -------------------------------------------------------------------------------- /alphalens/_version.py: -------------------------------------------------------------------------------- 1 | 2 | # This file helps to compute a version number in source trees obtained from 3 | # git-archive tarball (such as those provided by githubs download-from-tag 4 | # feature). Distribution tarballs (built by setup.py sdist) and build 5 | # directories (produced by setup.py build) will contain a much shorter file 6 | # that just contains the computed version number. 7 | 8 | # This file is released into the public domain. Generated by 9 | # versioneer-0.18 (https://github.com/warner/python-versioneer) 10 | 11 | """Git implementation of _version.py.""" 12 | 13 | import errno 14 | import os 15 | import re 16 | import subprocess 17 | import sys 18 | 19 | 20 | def get_keywords(): 21 | """Get the keywords needed to look up the version information.""" 22 | # these strings will be replaced by git during git-archive. 23 | # setup.py/versioneer.py will grep for the variable names, so they must 24 | # each be defined on a line of their own. _version.py will just call 25 | # get_keywords(). 26 | git_refnames = " (HEAD -> master)" 27 | git_full = "4979057c6fbd045a7998dba53388654d45a217ef" 28 | git_date = "2021-11-23 00:26:54 +0800" 29 | keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} 30 | return keywords 31 | 32 | 33 | class VersioneerConfig: 34 | """Container for Versioneer configuration parameters.""" 35 | 36 | 37 | def get_config(): 38 | """Create, populate and return the VersioneerConfig() object.""" 39 | # these strings are filled in when 'setup.py versioneer' creates 40 | # _version.py 41 | cfg = VersioneerConfig() 42 | cfg.VCS = "git" 43 | cfg.style = "pep440" 44 | cfg.tag_prefix = "v" 45 | cfg.parentdir_prefix = "alphalens-" 46 | cfg.versionfile_source = "alphalens/_version.py" 47 | cfg.verbose = False 48 | return cfg 49 | 50 | 51 | class NotThisMethod(Exception): 52 | """Exception raised if a method is not valid for the current scenario.""" 53 | 54 | 55 | LONG_VERSION_PY = {} 56 | HANDLERS = {} 57 | 58 | 59 | def register_vcs_handler(vcs, method): # decorator 60 | """Decorator to mark a method as the handler for a particular VCS.""" 61 | def decorate(f): 62 | """Store f in HANDLERS[vcs][method].""" 63 | if vcs not in HANDLERS: 64 | HANDLERS[vcs] = {} 65 | HANDLERS[vcs][method] = f 66 | return f 67 | return decorate 68 | 69 | 70 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, 71 | env=None): 72 | """Call the given command(s).""" 73 | assert isinstance(commands, list) 74 | p = None 75 | for c in commands: 76 | try: 77 | dispcmd = str([c] + args) 78 | # remember shell=False, so use git.cmd on windows, not just git 79 | p = subprocess.Popen([c] + args, cwd=cwd, env=env, 80 | stdout=subprocess.PIPE, 81 | stderr=(subprocess.PIPE if hide_stderr 82 | else None)) 83 | break 84 | except EnvironmentError: 85 | e = sys.exc_info()[1] 86 | if e.errno == errno.ENOENT: 87 | continue 88 | if verbose: 89 | print("unable to run %s" % dispcmd) 90 | print(e) 91 | return None, None 92 | else: 93 | if verbose: 94 | print("unable to find command, tried %s" % (commands,)) 95 | return None, None 96 | stdout = p.communicate()[0].strip() 97 | if sys.version_info[0] >= 3: 98 | stdout = stdout.decode() 99 | if p.returncode != 0: 100 | if verbose: 101 | print("unable to run %s (error)" % dispcmd) 102 | print("stdout was %s" % stdout) 103 | return None, p.returncode 104 | return stdout, p.returncode 105 | 106 | 107 | def versions_from_parentdir(parentdir_prefix, root, verbose): 108 | """Try to determine the version from the parent directory name. 109 | 110 | Source tarballs conventionally unpack into a directory that includes both 111 | the project name and a version string. We will also support searching up 112 | two directory levels for an appropriately named parent directory 113 | """ 114 | rootdirs = [] 115 | 116 | for i in range(3): 117 | dirname = os.path.basename(root) 118 | if dirname.startswith(parentdir_prefix): 119 | return {"version": dirname[len(parentdir_prefix):], 120 | "full-revisionid": None, 121 | "dirty": False, "error": None, "date": None} 122 | else: 123 | rootdirs.append(root) 124 | root = os.path.dirname(root) # up a level 125 | 126 | if verbose: 127 | print("Tried directories %s but none started with prefix %s" % 128 | (str(rootdirs), parentdir_prefix)) 129 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 130 | 131 | 132 | @register_vcs_handler("git", "get_keywords") 133 | def git_get_keywords(versionfile_abs): 134 | """Extract version information from the given file.""" 135 | # the code embedded in _version.py can just fetch the value of these 136 | # keywords. When used from setup.py, we don't want to import _version.py, 137 | # so we do it with a regexp instead. This function is not used from 138 | # _version.py. 139 | keywords = {} 140 | try: 141 | f = open(versionfile_abs, "r") 142 | for line in f.readlines(): 143 | if line.strip().startswith("git_refnames ="): 144 | mo = re.search(r'=\s*"(.*)"', line) 145 | if mo: 146 | keywords["refnames"] = mo.group(1) 147 | if line.strip().startswith("git_full ="): 148 | mo = re.search(r'=\s*"(.*)"', line) 149 | if mo: 150 | keywords["full"] = mo.group(1) 151 | if line.strip().startswith("git_date ="): 152 | mo = re.search(r'=\s*"(.*)"', line) 153 | if mo: 154 | keywords["date"] = mo.group(1) 155 | f.close() 156 | except EnvironmentError: 157 | pass 158 | return keywords 159 | 160 | 161 | @register_vcs_handler("git", "keywords") 162 | def git_versions_from_keywords(keywords, tag_prefix, verbose): 163 | """Get version information from git keywords.""" 164 | if not keywords: 165 | raise NotThisMethod("no keywords at all, weird") 166 | date = keywords.get("date") 167 | if date is not None: 168 | # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant 169 | # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 170 | # -like" string, which we must then edit to make compliant), because 171 | # it's been around since git-1.5.3, and it's too difficult to 172 | # discover which version we're using, or to work around using an 173 | # older one. 174 | date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 175 | refnames = keywords["refnames"].strip() 176 | if refnames.startswith("$Format"): 177 | if verbose: 178 | print("keywords are unexpanded, not using") 179 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 180 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) 181 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 182 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 183 | TAG = "tag: " 184 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) 185 | if not tags: 186 | # Either we're using git < 1.8.3, or there really are no tags. We use 187 | # a heuristic: assume all version tags have a digit. The old git %d 188 | # expansion behaves like git log --decorate=short and strips out the 189 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 190 | # between branches and tags. By ignoring refnames without digits, we 191 | # filter out many common branch names like "release" and 192 | # "stabilization", as well as "HEAD" and "master". 193 | tags = set([r for r in refs if re.search(r'\d', r)]) 194 | if verbose: 195 | print("discarding '%s', no digits" % ",".join(refs - tags)) 196 | if verbose: 197 | print("likely tags: %s" % ",".join(sorted(tags))) 198 | for ref in sorted(tags): 199 | # sorting will prefer e.g. "2.0" over "2.0rc1" 200 | if ref.startswith(tag_prefix): 201 | r = ref[len(tag_prefix):] 202 | if verbose: 203 | print("picking %s" % r) 204 | return {"version": r, 205 | "full-revisionid": keywords["full"].strip(), 206 | "dirty": False, "error": None, 207 | "date": date} 208 | # no suitable tags, so version is "0+unknown", but full hex is still there 209 | if verbose: 210 | print("no suitable tags, using unknown + full revision id") 211 | return {"version": "0+unknown", 212 | "full-revisionid": keywords["full"].strip(), 213 | "dirty": False, "error": "no suitable tags", "date": None} 214 | 215 | 216 | @register_vcs_handler("git", "pieces_from_vcs") 217 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): 218 | """Get version from 'git describe' in the root of the source tree. 219 | 220 | This only gets called if the git-archive 'subst' keywords were *not* 221 | expanded, and _version.py hasn't already been rewritten with a short 222 | version string, meaning we're inside a checked out source tree. 223 | """ 224 | GITS = ["git"] 225 | if sys.platform == "win32": 226 | GITS = ["git.cmd", "git.exe"] 227 | 228 | out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, 229 | hide_stderr=True) 230 | if rc != 0: 231 | if verbose: 232 | print("Directory %s not under git control" % root) 233 | raise NotThisMethod("'git rev-parse --git-dir' returned error") 234 | 235 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] 236 | # if there isn't one, this yields HEX[-dirty] (no NUM) 237 | describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", 238 | "--always", "--long", 239 | "--match", "%s*" % tag_prefix], 240 | cwd=root) 241 | # --long was added in git-1.5.5 242 | if describe_out is None: 243 | raise NotThisMethod("'git describe' failed") 244 | describe_out = describe_out.strip() 245 | full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) 246 | if full_out is None: 247 | raise NotThisMethod("'git rev-parse' failed") 248 | full_out = full_out.strip() 249 | 250 | pieces = {} 251 | pieces["long"] = full_out 252 | pieces["short"] = full_out[:7] # maybe improved later 253 | pieces["error"] = None 254 | 255 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 256 | # TAG might have hyphens. 257 | git_describe = describe_out 258 | 259 | # look for -dirty suffix 260 | dirty = git_describe.endswith("-dirty") 261 | pieces["dirty"] = dirty 262 | if dirty: 263 | git_describe = git_describe[:git_describe.rindex("-dirty")] 264 | 265 | # now we have TAG-NUM-gHEX or HEX 266 | 267 | if "-" in git_describe: 268 | # TAG-NUM-gHEX 269 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) 270 | if not mo: 271 | # unparseable. Maybe git-describe is misbehaving? 272 | pieces["error"] = ("unable to parse git-describe output: '%s'" 273 | % describe_out) 274 | return pieces 275 | 276 | # tag 277 | full_tag = mo.group(1) 278 | if not full_tag.startswith(tag_prefix): 279 | if verbose: 280 | fmt = "tag '%s' doesn't start with prefix '%s'" 281 | print(fmt % (full_tag, tag_prefix)) 282 | pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" 283 | % (full_tag, tag_prefix)) 284 | return pieces 285 | pieces["closest-tag"] = full_tag[len(tag_prefix):] 286 | 287 | # distance: number of commits since tag 288 | pieces["distance"] = int(mo.group(2)) 289 | 290 | # commit: short hex revision ID 291 | pieces["short"] = mo.group(3) 292 | 293 | else: 294 | # HEX: no tags 295 | pieces["closest-tag"] = None 296 | count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], 297 | cwd=root) 298 | pieces["distance"] = int(count_out) # total number of commits 299 | 300 | # commit date: see ISO-8601 comment in git_versions_from_keywords() 301 | date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], 302 | cwd=root)[0].strip() 303 | pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 304 | 305 | return pieces 306 | 307 | 308 | def plus_or_dot(pieces): 309 | """Return a + if we don't already have one, else return a .""" 310 | if "+" in pieces.get("closest-tag", ""): 311 | return "." 312 | return "+" 313 | 314 | 315 | def render_pep440(pieces): 316 | """Build up version string, with post-release "local version identifier". 317 | 318 | Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 319 | get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 320 | 321 | Exceptions: 322 | 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 323 | """ 324 | if pieces["closest-tag"]: 325 | rendered = pieces["closest-tag"] 326 | if pieces["distance"] or pieces["dirty"]: 327 | rendered += plus_or_dot(pieces) 328 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) 329 | if pieces["dirty"]: 330 | rendered += ".dirty" 331 | else: 332 | # exception #1 333 | rendered = "0+untagged.%d.g%s" % (pieces["distance"], 334 | pieces["short"]) 335 | if pieces["dirty"]: 336 | rendered += ".dirty" 337 | return rendered 338 | 339 | 340 | def render_pep440_pre(pieces): 341 | """TAG[.post.devDISTANCE] -- No -dirty. 342 | 343 | Exceptions: 344 | 1: no tags. 0.post.devDISTANCE 345 | """ 346 | if pieces["closest-tag"]: 347 | rendered = pieces["closest-tag"] 348 | if pieces["distance"]: 349 | rendered += ".post.dev%d" % pieces["distance"] 350 | else: 351 | # exception #1 352 | rendered = "0.post.dev%d" % pieces["distance"] 353 | return rendered 354 | 355 | 356 | def render_pep440_post(pieces): 357 | """TAG[.postDISTANCE[.dev0]+gHEX] . 358 | 359 | The ".dev0" means dirty. Note that .dev0 sorts backwards 360 | (a dirty tree will appear "older" than the corresponding clean one), 361 | but you shouldn't be releasing software with -dirty anyways. 362 | 363 | Exceptions: 364 | 1: no tags. 0.postDISTANCE[.dev0] 365 | """ 366 | if pieces["closest-tag"]: 367 | rendered = pieces["closest-tag"] 368 | if pieces["distance"] or pieces["dirty"]: 369 | rendered += ".post%d" % pieces["distance"] 370 | if pieces["dirty"]: 371 | rendered += ".dev0" 372 | rendered += plus_or_dot(pieces) 373 | rendered += "g%s" % pieces["short"] 374 | else: 375 | # exception #1 376 | rendered = "0.post%d" % pieces["distance"] 377 | if pieces["dirty"]: 378 | rendered += ".dev0" 379 | rendered += "+g%s" % pieces["short"] 380 | return rendered 381 | 382 | 383 | def render_pep440_old(pieces): 384 | """TAG[.postDISTANCE[.dev0]] . 385 | 386 | The ".dev0" means dirty. 387 | 388 | Eexceptions: 389 | 1: no tags. 0.postDISTANCE[.dev0] 390 | """ 391 | if pieces["closest-tag"]: 392 | rendered = pieces["closest-tag"] 393 | if pieces["distance"] or pieces["dirty"]: 394 | rendered += ".post%d" % pieces["distance"] 395 | if pieces["dirty"]: 396 | rendered += ".dev0" 397 | else: 398 | # exception #1 399 | rendered = "0.post%d" % pieces["distance"] 400 | if pieces["dirty"]: 401 | rendered += ".dev0" 402 | return rendered 403 | 404 | 405 | def render_git_describe(pieces): 406 | """TAG[-DISTANCE-gHEX][-dirty]. 407 | 408 | Like 'git describe --tags --dirty --always'. 409 | 410 | Exceptions: 411 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 412 | """ 413 | if pieces["closest-tag"]: 414 | rendered = pieces["closest-tag"] 415 | if pieces["distance"]: 416 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 417 | else: 418 | # exception #1 419 | rendered = pieces["short"] 420 | if pieces["dirty"]: 421 | rendered += "-dirty" 422 | return rendered 423 | 424 | 425 | def render_git_describe_long(pieces): 426 | """TAG-DISTANCE-gHEX[-dirty]. 427 | 428 | Like 'git describe --tags --dirty --always -long'. 429 | The distance/hash is unconditional. 430 | 431 | Exceptions: 432 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 433 | """ 434 | if pieces["closest-tag"]: 435 | rendered = pieces["closest-tag"] 436 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 437 | else: 438 | # exception #1 439 | rendered = pieces["short"] 440 | if pieces["dirty"]: 441 | rendered += "-dirty" 442 | return rendered 443 | 444 | 445 | def render(pieces, style): 446 | """Render the given version pieces into the requested style.""" 447 | if pieces["error"]: 448 | return {"version": "unknown", 449 | "full-revisionid": pieces.get("long"), 450 | "dirty": None, 451 | "error": pieces["error"], 452 | "date": None} 453 | 454 | if not style or style == "default": 455 | style = "pep440" # the default 456 | 457 | if style == "pep440": 458 | rendered = render_pep440(pieces) 459 | elif style == "pep440-pre": 460 | rendered = render_pep440_pre(pieces) 461 | elif style == "pep440-post": 462 | rendered = render_pep440_post(pieces) 463 | elif style == "pep440-old": 464 | rendered = render_pep440_old(pieces) 465 | elif style == "git-describe": 466 | rendered = render_git_describe(pieces) 467 | elif style == "git-describe-long": 468 | rendered = render_git_describe_long(pieces) 469 | else: 470 | raise ValueError("unknown style '%s'" % style) 471 | 472 | return {"version": rendered, "full-revisionid": pieces["long"], 473 | "dirty": pieces["dirty"], "error": None, 474 | "date": pieces.get("date")} 475 | 476 | 477 | def get_versions(): 478 | """Get version information or return default if unable to do so.""" 479 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have 480 | # __file__, we can work backwards from there to the root. Some 481 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which 482 | # case we can only use expanded keywords. 483 | 484 | cfg = get_config() 485 | verbose = cfg.verbose 486 | 487 | try: 488 | return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, 489 | verbose) 490 | except NotThisMethod: 491 | pass 492 | 493 | try: 494 | root = os.path.realpath(__file__) 495 | # versionfile_source is the relative path from the top of the source 496 | # tree (where the .git directory might live) to this file. Invert 497 | # this to find the root from __file__. 498 | for i in cfg.versionfile_source.split('/'): 499 | root = os.path.dirname(root) 500 | except NameError: 501 | return {"version": "0+unknown", "full-revisionid": None, 502 | "dirty": None, 503 | "error": "unable to find root of source tree", 504 | "date": None} 505 | 506 | try: 507 | pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) 508 | return render(pieces, cfg.style) 509 | except NotThisMethod: 510 | pass 511 | 512 | try: 513 | if cfg.parentdir_prefix: 514 | return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 515 | except NotThisMethod: 516 | pass 517 | 518 | return {"version": "0+unknown", "full-revisionid": None, 519 | "dirty": None, 520 | "error": "unable to compute version", "date": None} 521 | -------------------------------------------------------------------------------- /alphalens/examples/.gitattributes: -------------------------------------------------------------------------------- 1 | *.ipynb binary 2 | -------------------------------------------------------------------------------- /alphalens/examples/ic_tear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ckend/alphalens/4979057c6fbd045a7998dba53388654d45a217ef/alphalens/examples/ic_tear.png -------------------------------------------------------------------------------- /alphalens/examples/returns_tear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ckend/alphalens/4979057c6fbd045a7998dba53388654d45a217ef/alphalens/examples/returns_tear.png -------------------------------------------------------------------------------- /alphalens/examples/sector_tear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ckend/alphalens/4979057c6fbd045a7998dba53388654d45a217ef/alphalens/examples/sector_tear.png -------------------------------------------------------------------------------- /alphalens/examples/table_tear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ckend/alphalens/4979057c6fbd045a7998dba53388654d45a217ef/alphalens/examples/table_tear.png -------------------------------------------------------------------------------- /alphalens/plotting.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2017 Quantopian, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import numpy as np 17 | import pandas as pd 18 | from scipy import stats 19 | import statsmodels.api as sm 20 | 21 | import seaborn as sns 22 | import matplotlib.cm as cm 23 | import matplotlib.pyplot as plt 24 | from matplotlib.ticker import ScalarFormatter 25 | 26 | from functools import wraps 27 | 28 | from . import utils 29 | from . import performance as perf 30 | 31 | DECIMAL_TO_BPS = 10000 32 | 33 | 34 | def customize(func): 35 | """ 36 | Decorator to set plotting context and axes style during function call. 37 | """ 38 | @wraps(func) 39 | def call_w_context(*args, **kwargs): 40 | set_context = kwargs.pop('set_context', True) 41 | if set_context: 42 | color_palette = sns.color_palette('colorblind') 43 | with plotting_context(), axes_style(), color_palette: 44 | sns.despine(left=True) 45 | return func(*args, **kwargs) 46 | else: 47 | return func(*args, **kwargs) 48 | return call_w_context 49 | 50 | 51 | def plotting_context(context='notebook', font_scale=1.5, rc=None): 52 | """ 53 | Create alphalens default plotting style context. 54 | 55 | Under the hood, calls and returns seaborn.plotting_context() with 56 | some custom settings. Usually you would use in a with-context. 57 | 58 | Parameters 59 | ---------- 60 | context : str, optional 61 | Name of seaborn context. 62 | font_scale : float, optional 63 | Scale font by factor font_scale. 64 | rc : dict, optional 65 | Config flags. 66 | By default, {'lines.linewidth': 1.5} 67 | is being used and will be added to any 68 | rc passed in, unless explicitly overriden. 69 | 70 | Returns 71 | ------- 72 | seaborn plotting context 73 | 74 | Example 75 | ------- 76 | with alphalens.plotting.plotting_context(font_scale=2): 77 | alphalens.create_full_tear_sheet(..., set_context=False) 78 | 79 | See also 80 | -------- 81 | For more information, see seaborn.plotting_context(). 82 | """ 83 | if rc is None: 84 | rc = {} 85 | 86 | rc_default = {'lines.linewidth': 1.5} 87 | 88 | # Add defaults if they do not exist 89 | for name, val in rc_default.items(): 90 | rc.setdefault(name, val) 91 | 92 | return sns.plotting_context(context=context, font_scale=font_scale, rc=rc) 93 | 94 | 95 | def axes_style(style='darkgrid', rc=None): 96 | """Create alphalens default axes style context. 97 | 98 | Under the hood, calls and returns seaborn.axes_style() with 99 | some custom settings. Usually you would use in a with-context. 100 | 101 | Parameters 102 | ---------- 103 | style : str, optional 104 | Name of seaborn style. 105 | rc : dict, optional 106 | Config flags. 107 | 108 | Returns 109 | ------- 110 | seaborn plotting context 111 | 112 | Example 113 | ------- 114 | with alphalens.plotting.axes_style(style='whitegrid'): 115 | alphalens.create_full_tear_sheet(..., set_context=False) 116 | 117 | See also 118 | -------- 119 | For more information, see seaborn.plotting_context(). 120 | 121 | """ 122 | if rc is None: 123 | rc = {} 124 | 125 | rc_default = {} 126 | 127 | # Add defaults if they do not exist 128 | for name, val in rc_default.items(): 129 | rc.setdefault(name, val) 130 | 131 | return sns.axes_style(style=style, rc=rc) 132 | 133 | 134 | def plot_returns_table(alpha_beta, 135 | mean_ret_quantile, 136 | mean_ret_spread_quantile): 137 | returns_table = pd.DataFrame() 138 | returns_table = returns_table.append(alpha_beta) 139 | returns_table.loc["Mean Period Wise Return Top Quantile (bps)"] = \ 140 | mean_ret_quantile.iloc[-1] * DECIMAL_TO_BPS 141 | returns_table.loc["Mean Period Wise Return Bottom Quantile (bps)"] = \ 142 | mean_ret_quantile.iloc[0] * DECIMAL_TO_BPS 143 | returns_table.loc["Mean Period Wise Spread (bps)"] = \ 144 | mean_ret_spread_quantile.mean() * DECIMAL_TO_BPS 145 | 146 | print("Returns Analysis") 147 | utils.print_table(returns_table.apply(lambda x: x.round(3))) 148 | 149 | 150 | def plot_turnover_table(autocorrelation_data, quantile_turnover): 151 | turnover_table = pd.DataFrame() 152 | for period in sorted(quantile_turnover.keys()): 153 | for quantile, p_data in quantile_turnover[period].iteritems(): 154 | turnover_table.loc["Quantile {} Mean Turnover ".format(quantile), 155 | "{}D".format(period)] = p_data.mean() 156 | auto_corr = pd.DataFrame() 157 | for period, p_data in autocorrelation_data.iteritems(): 158 | auto_corr.loc["Mean Factor Rank Autocorrelation", 159 | "{}D".format(period)] = p_data.mean() 160 | 161 | print("Turnover Analysis") 162 | utils.print_table(turnover_table.apply(lambda x: x.round(3))) 163 | utils.print_table(auto_corr.apply(lambda x: x.round(3))) 164 | 165 | 166 | def plot_information_table(ic_data): 167 | ic_summary_table = pd.DataFrame() 168 | ic_summary_table["IC Mean"] = ic_data.mean() 169 | ic_summary_table["IC Std."] = ic_data.std() 170 | ic_summary_table["Risk-Adjusted IC"] = \ 171 | ic_data.mean() / ic_data.std() 172 | t_stat, p_value = stats.ttest_1samp(ic_data, 0) 173 | ic_summary_table["t-stat(IC)"] = t_stat 174 | ic_summary_table["p-value(IC)"] = p_value 175 | ic_summary_table["IC Skew"] = stats.skew(ic_data) 176 | ic_summary_table["IC Kurtosis"] = stats.kurtosis(ic_data) 177 | 178 | print("Information Analysis") 179 | utils.print_table(ic_summary_table.apply(lambda x: x.round(3)).T) 180 | 181 | 182 | def plot_quantile_statistics_table(factor_data): 183 | quantile_stats = factor_data.groupby('factor_quantile') \ 184 | .agg(['min', 'max', 'mean', 'std', 'count'])['factor'] 185 | quantile_stats['count %'] = quantile_stats['count'] \ 186 | / quantile_stats['count'].sum() * 100. 187 | 188 | print("Quantiles Statistics") 189 | utils.print_table(quantile_stats) 190 | 191 | 192 | def plot_ic_ts(ic, ax=None): 193 | """ 194 | Plots Spearman Rank Information Coefficient and IC moving 195 | average for a given factor. 196 | 197 | Parameters 198 | ---------- 199 | ic : pd.DataFrame 200 | DataFrame indexed by date, with IC for each forward return. 201 | ax : matplotlib.Axes, optional 202 | Axes upon which to plot. 203 | 204 | Returns 205 | ------- 206 | ax : matplotlib.Axes 207 | The axes that were plotted on. 208 | """ 209 | ic = ic.copy() 210 | 211 | num_plots = len(ic.columns) 212 | if ax is None: 213 | f, ax = plt.subplots(num_plots, 1, figsize=(18, num_plots * 7)) 214 | ax = np.asarray([ax]).flatten() 215 | 216 | ymin, ymax = (None, None) 217 | for a, (period_num, ic) in zip(ax, ic.iteritems()): 218 | ic.plot(alpha=0.7, ax=a, lw=0.7, color='steelblue') 219 | ic.rolling(window=22).mean().plot( 220 | ax=a, 221 | color='forestgreen', 222 | lw=2, 223 | alpha=0.8 224 | ) 225 | 226 | a.set(ylabel='IC', xlabel="") 227 | a.set_title( 228 | "{} Period Forward Return Information Coefficient (IC)" 229 | .format(period_num)) 230 | a.axhline(0.0, linestyle='-', color='black', lw=1, alpha=0.8) 231 | a.legend(['IC', '1 month moving avg'], loc='upper right') 232 | a.text(.05, .95, "Mean %.3f \n Std. %.3f" % (ic.mean(), ic.std()), 233 | fontsize=16, 234 | bbox={'facecolor': 'white', 'alpha': 1, 'pad': 5}, 235 | transform=a.transAxes, 236 | verticalalignment='top') 237 | 238 | curr_ymin, curr_ymax = a.get_ylim() 239 | ymin = curr_ymin if ymin is None else min(ymin, curr_ymin) 240 | ymax = curr_ymax if ymax is None else max(ymax, curr_ymax) 241 | 242 | for a in ax: 243 | a.set_ylim([ymin, ymax]) 244 | 245 | return ax 246 | 247 | 248 | def plot_ic_hist(ic, ax=None): 249 | """ 250 | Plots Spearman Rank Information Coefficient histogram for a given factor. 251 | 252 | Parameters 253 | ---------- 254 | ic : pd.DataFrame 255 | DataFrame indexed by date, with IC for each forward return. 256 | ax : matplotlib.Axes, optional 257 | Axes upon which to plot. 258 | 259 | Returns 260 | ------- 261 | ax : matplotlib.Axes 262 | The axes that were plotted on. 263 | """ 264 | 265 | ic = ic.copy() 266 | 267 | num_plots = len(ic.columns) 268 | 269 | v_spaces = ((num_plots - 1) // 3) + 1 270 | 271 | if ax is None: 272 | f, ax = plt.subplots(v_spaces, 3, figsize=(18, v_spaces * 6)) 273 | ax = ax.flatten() 274 | 275 | for a, (period_num, ic) in zip(ax, ic.iteritems()): 276 | sns.distplot(ic.replace(np.nan, 0.), norm_hist=True, ax=a) 277 | a.set(title="%s Period IC" % period_num, xlabel='IC') 278 | a.set_xlim([-1, 1]) 279 | a.text(.05, .95, "Mean %.3f \n Std. %.3f" % (ic.mean(), ic.std()), 280 | fontsize=16, 281 | bbox={'facecolor': 'white', 'alpha': 1, 'pad': 5}, 282 | transform=a.transAxes, 283 | verticalalignment='top') 284 | a.axvline(ic.mean(), color='w', linestyle='dashed', linewidth=2) 285 | 286 | if num_plots < len(ax): 287 | ax[-1].set_visible(False) 288 | 289 | return ax 290 | 291 | 292 | def plot_ic_qq(ic, theoretical_dist=stats.norm, ax=None): 293 | """ 294 | Plots Spearman Rank Information Coefficient "Q-Q" plot relative to 295 | a theoretical distribution. 296 | 297 | Parameters 298 | ---------- 299 | ic : pd.DataFrame 300 | DataFrame indexed by date, with IC for each forward return. 301 | theoretical_dist : scipy.stats._continuous_distns 302 | Continuous distribution generator. scipy.stats.norm and 303 | scipy.stats.t are popular options. 304 | ax : matplotlib.Axes, optional 305 | Axes upon which to plot. 306 | 307 | Returns 308 | ------- 309 | ax : matplotlib.Axes 310 | The axes that were plotted on. 311 | """ 312 | 313 | ic = ic.copy() 314 | 315 | num_plots = len(ic.columns) 316 | 317 | v_spaces = ((num_plots - 1) // 3) + 1 318 | 319 | if ax is None: 320 | f, ax = plt.subplots(v_spaces, 3, figsize=(18, v_spaces * 6)) 321 | ax = ax.flatten() 322 | 323 | if isinstance(theoretical_dist, stats.norm.__class__): 324 | dist_name = 'Normal' 325 | elif isinstance(theoretical_dist, stats.t.__class__): 326 | dist_name = 'T' 327 | else: 328 | dist_name = 'Theoretical' 329 | 330 | for a, (period_num, ic) in zip(ax, ic.iteritems()): 331 | sm.qqplot(ic.replace(np.nan, 0.).values, theoretical_dist, fit=True, 332 | line='45', ax=a) 333 | a.set(title="{} Period IC {} Dist. Q-Q".format( 334 | period_num, dist_name), 335 | ylabel='Observed Quantile', 336 | xlabel='{} Distribution Quantile'.format(dist_name)) 337 | 338 | return ax 339 | 340 | 341 | def plot_quantile_returns_bar(mean_ret_by_q, 342 | by_group=False, 343 | ylim_percentiles=None, 344 | ax=None): 345 | """ 346 | Plots mean period wise returns for factor quantiles. 347 | 348 | Parameters 349 | ---------- 350 | mean_ret_by_q : pd.DataFrame 351 | DataFrame with quantile, (group) and mean period wise return values. 352 | by_group : bool 353 | Disaggregated figures by group. 354 | ylim_percentiles : tuple of integers 355 | Percentiles of observed data to use as y limits for plot. 356 | ax : matplotlib.Axes, optional 357 | Axes upon which to plot. 358 | 359 | Returns 360 | ------- 361 | ax : matplotlib.Axes 362 | The axes that were plotted on. 363 | """ 364 | 365 | mean_ret_by_q = mean_ret_by_q.copy() 366 | 367 | if ylim_percentiles is not None: 368 | ymin = (np.nanpercentile(mean_ret_by_q.values, 369 | ylim_percentiles[0]) * DECIMAL_TO_BPS) 370 | ymax = (np.nanpercentile(mean_ret_by_q.values, 371 | ylim_percentiles[1]) * DECIMAL_TO_BPS) 372 | else: 373 | ymin = None 374 | ymax = None 375 | 376 | if by_group: 377 | num_group = len( 378 | mean_ret_by_q.index.get_level_values('group').unique()) 379 | 380 | if ax is None: 381 | v_spaces = ((num_group - 1) // 2) + 1 382 | f, ax = plt.subplots(v_spaces, 2, sharex=False, 383 | sharey=True, figsize=(18, 6 * v_spaces)) 384 | ax = ax.flatten() 385 | 386 | for a, (sc, cor) in zip(ax, mean_ret_by_q.groupby(level='group')): 387 | (cor.xs(sc, level='group') 388 | .multiply(DECIMAL_TO_BPS) 389 | .plot(kind='bar', title=sc, ax=a)) 390 | 391 | a.set(xlabel='', ylabel='Mean Return (bps)', 392 | ylim=(ymin, ymax)) 393 | 394 | if num_group < len(ax): 395 | ax[-1].set_visible(False) 396 | 397 | return ax 398 | 399 | else: 400 | if ax is None: 401 | f, ax = plt.subplots(1, 1, figsize=(18, 6)) 402 | 403 | (mean_ret_by_q.multiply(DECIMAL_TO_BPS) 404 | .plot(kind='bar', 405 | title="Mean Period Wise Return By Factor Quantile", ax=ax)) 406 | ax.set(xlabel='', ylabel='Mean Return (bps)', 407 | ylim=(ymin, ymax)) 408 | 409 | return ax 410 | 411 | 412 | def plot_quantile_returns_violin(return_by_q, 413 | ylim_percentiles=None, 414 | ax=None): 415 | """ 416 | Plots a violin box plot of period wise returns for factor quantiles. 417 | 418 | Parameters 419 | ---------- 420 | return_by_q : pd.DataFrame - MultiIndex 421 | DataFrame with date and quantile as rows MultiIndex, 422 | forward return windows as columns, returns as values. 423 | ylim_percentiles : tuple of integers 424 | Percentiles of observed data to use as y limits for plot. 425 | ax : matplotlib.Axes, optional 426 | Axes upon which to plot. 427 | 428 | Returns 429 | ------- 430 | ax : matplotlib.Axes 431 | The axes that were plotted on. 432 | """ 433 | 434 | return_by_q = return_by_q.copy() 435 | 436 | if ylim_percentiles is not None: 437 | ymin = (np.nanpercentile(return_by_q.values, 438 | ylim_percentiles[0]) * DECIMAL_TO_BPS) 439 | ymax = (np.nanpercentile(return_by_q.values, 440 | ylim_percentiles[1]) * DECIMAL_TO_BPS) 441 | else: 442 | ymin = None 443 | ymax = None 444 | 445 | if ax is None: 446 | f, ax = plt.subplots(1, 1, figsize=(18, 6)) 447 | 448 | unstacked_dr = (return_by_q 449 | .multiply(DECIMAL_TO_BPS)) 450 | unstacked_dr.columns = unstacked_dr.columns.set_names('forward_periods') 451 | unstacked_dr = unstacked_dr.stack() 452 | unstacked_dr.name = 'return' 453 | unstacked_dr = unstacked_dr.reset_index() 454 | 455 | sns.violinplot(data=unstacked_dr, 456 | x='factor_quantile', 457 | hue='forward_periods', 458 | y='return', 459 | orient='v', 460 | cut=0, 461 | inner='quartile', 462 | ax=ax) 463 | ax.set(xlabel='', ylabel='Return (bps)', 464 | title="Period Wise Return By Factor Quantile", 465 | ylim=(ymin, ymax)) 466 | 467 | ax.axhline(0.0, linestyle='-', color='black', lw=0.7, alpha=0.6) 468 | 469 | return ax 470 | 471 | 472 | def plot_mean_quantile_returns_spread_time_series(mean_returns_spread, 473 | std_err=None, 474 | bandwidth=1, 475 | ax=None): 476 | """ 477 | Plots mean period wise returns for factor quantiles. 478 | 479 | Parameters 480 | ---------- 481 | mean_returns_spread : pd.Series 482 | Series with difference between quantile mean returns by period. 483 | std_err : pd.Series 484 | Series with standard error of difference between quantile 485 | mean returns each period. 486 | bandwidth : float 487 | Width of displayed error bands in standard deviations. 488 | ax : matplotlib.Axes, optional 489 | Axes upon which to plot. 490 | 491 | Returns 492 | ------- 493 | ax : matplotlib.Axes 494 | The axes that were plotted on. 495 | """ 496 | 497 | if isinstance(mean_returns_spread, pd.DataFrame): 498 | if ax is None: 499 | ax = [None for a in mean_returns_spread.columns] 500 | 501 | ymin, ymax = (None, None) 502 | for (i, a), (name, fr_column) in zip(enumerate(ax), 503 | mean_returns_spread.iteritems()): 504 | stdn = None if std_err is None else std_err[name] 505 | a = plot_mean_quantile_returns_spread_time_series(fr_column, 506 | std_err=stdn, 507 | ax=a) 508 | ax[i] = a 509 | curr_ymin, curr_ymax = a.get_ylim() 510 | ymin = curr_ymin if ymin is None else min(ymin, curr_ymin) 511 | ymax = curr_ymax if ymax is None else max(ymax, curr_ymax) 512 | 513 | for a in ax: 514 | a.set_ylim([ymin, ymax]) 515 | 516 | return ax 517 | 518 | if mean_returns_spread.isnull().all(): 519 | return ax 520 | 521 | periods = mean_returns_spread.name 522 | title = ('Top Minus Bottom Quantile Mean Return ({} Period Forward Return)' 523 | .format(periods if periods is not None else "")) 524 | 525 | if ax is None: 526 | f, ax = plt.subplots(figsize=(18, 6)) 527 | 528 | mean_returns_spread_bps = mean_returns_spread * DECIMAL_TO_BPS 529 | 530 | mean_returns_spread_bps.plot(alpha=0.4, ax=ax, lw=0.7, color='forestgreen') 531 | mean_returns_spread_bps.rolling(window=22).mean().plot( 532 | color='orangered', 533 | alpha=0.7, 534 | ax=ax 535 | ) 536 | ax.legend(['mean returns spread', '1 month moving avg'], loc='upper right') 537 | 538 | if std_err is not None: 539 | std_err_bps = std_err * DECIMAL_TO_BPS 540 | upper = mean_returns_spread_bps.values + (std_err_bps * bandwidth) 541 | lower = mean_returns_spread_bps.values - (std_err_bps * bandwidth) 542 | ax.fill_between(mean_returns_spread.index, 543 | lower, 544 | upper, 545 | alpha=0.3, 546 | color='steelblue') 547 | 548 | ylim = np.nanpercentile(abs(mean_returns_spread_bps.values), 95) 549 | ax.set(ylabel='Difference In Quantile Mean Return (bps)', 550 | xlabel='', 551 | title=title, 552 | ylim=(-ylim, ylim)) 553 | ax.axhline(0.0, linestyle='-', color='black', lw=1, alpha=0.8) 554 | 555 | return ax 556 | 557 | 558 | def plot_ic_by_group(ic_group, ax=None): 559 | """ 560 | Plots Spearman Rank Information Coefficient for a given factor over 561 | provided forward returns. Separates by group. 562 | 563 | Parameters 564 | ---------- 565 | ic_group : pd.DataFrame 566 | group-wise mean period wise returns. 567 | ax : matplotlib.Axes, optional 568 | Axes upon which to plot. 569 | 570 | Returns 571 | ------- 572 | ax : matplotlib.Axes 573 | The axes that were plotted on. 574 | """ 575 | if ax is None: 576 | f, ax = plt.subplots(1, 1, figsize=(18, 6)) 577 | ic_group.plot(kind='bar', ax=ax) 578 | 579 | ax.set(title="Information Coefficient By Group", xlabel="") 580 | ax.set_xticklabels(ic_group.index, rotation=45) 581 | 582 | return ax 583 | 584 | 585 | def plot_factor_rank_auto_correlation(factor_autocorrelation, 586 | period=1, 587 | ax=None): 588 | """ 589 | Plots factor rank autocorrelation over time. 590 | See factor_rank_autocorrelation for more details. 591 | 592 | Parameters 593 | ---------- 594 | factor_autocorrelation : pd.Series 595 | Rolling 1 period (defined by time_rule) autocorrelation 596 | of factor values. 597 | period: int, optional 598 | Period over which the autocorrelation is calculated 599 | ax : matplotlib.Axes, optional 600 | Axes upon which to plot. 601 | 602 | Returns 603 | ------- 604 | ax : matplotlib.Axes 605 | The axes that were plotted on. 606 | """ 607 | if ax is None: 608 | f, ax = plt.subplots(1, 1, figsize=(18, 6)) 609 | 610 | factor_autocorrelation.plot(title='{}D Period Factor Rank Autocorrelation' 611 | .format(period), ax=ax) 612 | ax.set(ylabel='Autocorrelation Coefficient', xlabel='') 613 | ax.axhline(0.0, linestyle='-', color='black', lw=1) 614 | ax.text(.05, .95, "Mean %.3f" % factor_autocorrelation.mean(), 615 | fontsize=16, 616 | bbox={'facecolor': 'white', 'alpha': 1, 'pad': 5}, 617 | transform=ax.transAxes, 618 | verticalalignment='top') 619 | 620 | return ax 621 | 622 | 623 | def plot_top_bottom_quantile_turnover(quantile_turnover, period=1, ax=None): 624 | """ 625 | Plots period wise top and bottom quantile factor turnover. 626 | 627 | Parameters 628 | ---------- 629 | quantile_turnover: pd.Dataframe 630 | Quantile turnover (each DataFrame column a quantile). 631 | period: int, optional 632 | Period over which to calculate the turnover. 633 | ax : matplotlib.Axes, optional 634 | Axes upon which to plot. 635 | 636 | Returns 637 | ------- 638 | ax : matplotlib.Axes 639 | The axes that were plotted on. 640 | """ 641 | if ax is None: 642 | f, ax = plt.subplots(1, 1, figsize=(18, 6)) 643 | 644 | max_quantile = quantile_turnover.columns.max() 645 | min_quantile = quantile_turnover.columns.min() 646 | turnover = pd.DataFrame() 647 | turnover['top quantile turnover'] = quantile_turnover[max_quantile] 648 | turnover['bottom quantile turnover'] = quantile_turnover[min_quantile] 649 | turnover.plot(title='{}D Period Top and Bottom Quantile Turnover' 650 | .format(period), ax=ax, alpha=0.6, lw=0.8) 651 | ax.set(ylabel='Proportion Of Names New To Quantile', xlabel="") 652 | 653 | return ax 654 | 655 | 656 | def plot_monthly_ic_heatmap(mean_monthly_ic, ax=None): 657 | """ 658 | Plots a heatmap of the information coefficient or returns by month. 659 | 660 | Parameters 661 | ---------- 662 | mean_monthly_ic : pd.DataFrame 663 | The mean monthly IC for N periods forward. 664 | 665 | Returns 666 | ------- 667 | ax : matplotlib.Axes 668 | The axes that were plotted on. 669 | """ 670 | 671 | mean_monthly_ic = mean_monthly_ic.copy() 672 | 673 | num_plots = len(mean_monthly_ic.columns) 674 | 675 | v_spaces = ((num_plots - 1) // 3) + 1 676 | 677 | if ax is None: 678 | f, ax = plt.subplots(v_spaces, 3, figsize=(18, v_spaces * 6)) 679 | ax = ax.flatten() 680 | 681 | new_index_year = [] 682 | new_index_month = [] 683 | for date in mean_monthly_ic.index: 684 | new_index_year.append(date.year) 685 | new_index_month.append(date.month) 686 | 687 | mean_monthly_ic.index = pd.MultiIndex.from_arrays( 688 | [new_index_year, new_index_month], 689 | names=["year", "month"]) 690 | 691 | for a, (periods_num, ic) in zip(ax, mean_monthly_ic.iteritems()): 692 | 693 | sns.heatmap( 694 | ic.unstack(), 695 | annot=True, 696 | alpha=1.0, 697 | center=0.0, 698 | annot_kws={"size": 7}, 699 | linewidths=0.01, 700 | linecolor='white', 701 | cmap=cm.coolwarm_r, 702 | cbar=False, 703 | ax=a) 704 | a.set(ylabel='', xlabel='') 705 | 706 | a.set_title("Monthly Mean {} Period IC".format(periods_num)) 707 | 708 | if num_plots < len(ax): 709 | ax[-1].set_visible(False) 710 | 711 | return ax 712 | 713 | 714 | def plot_cumulative_returns(factor_returns, 715 | period, 716 | freq=None, 717 | title=None, 718 | ax=None): 719 | """ 720 | Plots the cumulative returns of the returns series passed in. 721 | 722 | Parameters 723 | ---------- 724 | factor_returns : pd.Series 725 | Period wise returns of dollar neutral portfolio weighted by factor 726 | value. 727 | period : pandas.Timedelta or string 728 | Length of period for which the returns are computed (e.g. 1 day) 729 | if 'period' is a string it must follow pandas.Timedelta constructor 730 | format (e.g. '1 days', '1D', '30m', '3h', '1D1h', etc) 731 | freq : pandas DateOffset 732 | Used to specify a particular trading calendar e.g. BusinessDay or Day 733 | Usually this is inferred from utils.infer_trading_calendar, which is 734 | called by either get_clean_factor_and_forward_returns or 735 | compute_forward_returns 736 | title: string, optional 737 | Custom title 738 | ax : matplotlib.Axes, optional 739 | Axes upon which to plot. 740 | 741 | Returns 742 | ------- 743 | ax : matplotlib.Axes 744 | The axes that were plotted on. 745 | """ 746 | if ax is None: 747 | f, ax = plt.subplots(1, 1, figsize=(18, 6)) 748 | 749 | factor_returns = perf.cumulative_returns(factor_returns) 750 | 751 | factor_returns.plot(ax=ax, lw=3, color='forestgreen', alpha=0.6) 752 | ax.set(ylabel='Cumulative Returns', 753 | title=("Portfolio Cumulative Return ({} Fwd Period)".format(period) 754 | if title is None else title), 755 | xlabel='') 756 | ax.axhline(1.0, linestyle='-', color='black', lw=1) 757 | 758 | return ax 759 | 760 | 761 | def plot_cumulative_returns_by_quantile(quantile_returns, 762 | period, 763 | freq=None, 764 | ax=None): 765 | """ 766 | Plots the cumulative returns of various factor quantiles. 767 | 768 | Parameters 769 | ---------- 770 | quantile_returns : pd.DataFrame 771 | Returns by factor quantile 772 | period : pandas.Timedelta or string 773 | Length of period for which the returns are computed (e.g. 1 day) 774 | if 'period' is a string it must follow pandas.Timedelta constructor 775 | format (e.g. '1 days', '1D', '30m', '3h', '1D1h', etc) 776 | freq : pandas DateOffset 777 | Used to specify a particular trading calendar e.g. BusinessDay or Day 778 | Usually this is inferred from utils.infer_trading_calendar, which is 779 | called by either get_clean_factor_and_forward_returns or 780 | compute_forward_returns 781 | ax : matplotlib.Axes, optional 782 | Axes upon which to plot. 783 | 784 | Returns 785 | ------- 786 | ax : matplotlib.Axes 787 | """ 788 | 789 | if ax is None: 790 | f, ax = plt.subplots(1, 1, figsize=(18, 6)) 791 | 792 | ret_wide = quantile_returns.unstack('factor_quantile') 793 | 794 | cum_ret = ret_wide.apply(perf.cumulative_returns) 795 | 796 | cum_ret = cum_ret.loc[:, ::-1] # we want negative quantiles as 'red' 797 | 798 | cum_ret.plot(lw=2, ax=ax, cmap=cm.coolwarm) 799 | ax.legend() 800 | ymin, ymax = cum_ret.min().min(), cum_ret.max().max() 801 | ax.set(ylabel='Log Cumulative Returns', 802 | title='''Cumulative Return by Quantile 803 | ({} Period Forward Return)'''.format(period), 804 | xlabel='', 805 | yscale='symlog', 806 | yticks=np.linspace(ymin, ymax, 5), 807 | ylim=(ymin, ymax)) 808 | 809 | ax.yaxis.set_major_formatter(ScalarFormatter()) 810 | ax.axhline(1.0, linestyle='-', color='black', lw=1) 811 | 812 | return ax 813 | 814 | 815 | def plot_quantile_average_cumulative_return(avg_cumulative_returns, 816 | by_quantile=False, 817 | std_bar=False, 818 | title=None, 819 | ax=None): 820 | """ 821 | Plots sector-wise mean daily returns for factor quantiles 822 | across provided forward price movement columns. 823 | 824 | Parameters 825 | ---------- 826 | avg_cumulative_returns: pd.Dataframe 827 | The format is the one returned by 828 | performance.average_cumulative_return_by_quantile 829 | by_quantile : boolean, optional 830 | Disaggregated figures by quantile (useful to clearly see std dev bars) 831 | std_bar : boolean, optional 832 | Plot standard deviation plot 833 | title: string, optional 834 | Custom title 835 | ax : matplotlib.Axes, optional 836 | Axes upon which to plot. 837 | 838 | Returns 839 | ------- 840 | ax : matplotlib.Axes 841 | """ 842 | 843 | avg_cumulative_returns = avg_cumulative_returns.multiply(DECIMAL_TO_BPS) 844 | quantiles = len(avg_cumulative_returns.index.levels[0].unique()) 845 | palette = [cm.coolwarm(i) for i in np.linspace(0, 1, quantiles)] 846 | palette = palette[::-1] # we want negative quantiles as 'red' 847 | 848 | if by_quantile: 849 | 850 | if ax is None: 851 | v_spaces = ((quantiles - 1) // 2) + 1 852 | f, ax = plt.subplots(v_spaces, 2, sharex=False, 853 | sharey=False, figsize=(18, 6 * v_spaces)) 854 | ax = ax.flatten() 855 | 856 | for i, (quantile, q_ret) in enumerate(avg_cumulative_returns 857 | .groupby(level='factor_quantile') 858 | ): 859 | 860 | mean = q_ret.loc[(quantile, 'mean')] 861 | mean.name = 'Quantile ' + str(quantile) 862 | mean.plot(ax=ax[i], color=palette[i]) 863 | ax[i].set_ylabel('Mean Return (bps)') 864 | 865 | if std_bar: 866 | std = q_ret.loc[(quantile, 'std')] 867 | ax[i].errorbar(std.index, mean, yerr=std, 868 | fmt='none', ecolor=palette[i], label='none') 869 | 870 | ax[i].axvline(x=0, color='k', linestyle='--') 871 | ax[i].legend() 872 | i += 1 873 | 874 | else: 875 | 876 | if ax is None: 877 | f, ax = plt.subplots(1, 1, figsize=(18, 6)) 878 | 879 | for i, (quantile, q_ret) in enumerate(avg_cumulative_returns 880 | .groupby(level='factor_quantile') 881 | ): 882 | 883 | mean = q_ret.loc[(quantile, 'mean')] 884 | mean.name = 'Quantile ' + str(quantile) 885 | mean.plot(ax=ax, color=palette[i]) 886 | 887 | if std_bar: 888 | std = q_ret.loc[(quantile, 'std')] 889 | ax.errorbar(std.index, mean, yerr=std, 890 | fmt='none', ecolor=palette[i], label='none') 891 | i += 1 892 | 893 | ax.axvline(x=0, color='k', linestyle='--') 894 | ax.legend() 895 | ax.set(ylabel='Mean Return (bps)', 896 | title=("Average Cumulative Returns by Quantile" 897 | if title is None else title), 898 | xlabel='Periods') 899 | 900 | return ax 901 | 902 | 903 | def plot_events_distribution(events, num_bars=50, ax=None): 904 | """ 905 | Plots the distribution of events in time. 906 | 907 | Parameters 908 | ---------- 909 | events : pd.Series 910 | A pd.Series whose index contains at least 'date' level. 911 | num_bars : integer, optional 912 | Number of bars to plot 913 | ax : matplotlib.Axes, optional 914 | Axes upon which to plot. 915 | 916 | Returns 917 | ------- 918 | ax : matplotlib.Axes 919 | """ 920 | 921 | if ax is None: 922 | f, ax = plt.subplots(1, 1, figsize=(18, 6)) 923 | 924 | start = events.index.get_level_values('date').min() 925 | end = events.index.get_level_values('date').max() 926 | group_interval = (end - start) / num_bars 927 | grouper = pd.Grouper(level='date', freq=group_interval) 928 | events.groupby(grouper).count().plot(kind="bar", grid=False, ax=ax) 929 | ax.set(ylabel='Number of events', 930 | title='Distribution of events in time', 931 | xlabel='Date') 932 | 933 | return ax 934 | -------------------------------------------------------------------------------- /alphalens/tears.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2017 Quantopian, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import matplotlib.gridspec as gridspec 17 | import matplotlib.pyplot as plt 18 | import pandas as pd 19 | import warnings 20 | 21 | from . import plotting 22 | from . import performance as perf 23 | from . import utils 24 | 25 | 26 | class GridFigure(object): 27 | """ 28 | It makes life easier with grid plots 29 | """ 30 | 31 | def __init__(self, rows, cols): 32 | self.rows = rows 33 | self.cols = cols 34 | self.fig = plt.figure(figsize=(14, rows * 7)) 35 | self.gs = gridspec.GridSpec(rows, cols, wspace=0.4, hspace=0.3) 36 | self.curr_row = 0 37 | self.curr_col = 0 38 | 39 | def next_row(self): 40 | if self.curr_col != 0: 41 | self.curr_row += 1 42 | self.curr_col = 0 43 | subplt = plt.subplot(self.gs[self.curr_row, :]) 44 | self.curr_row += 1 45 | return subplt 46 | 47 | def next_cell(self): 48 | if self.curr_col >= self.cols: 49 | self.curr_row += 1 50 | self.curr_col = 0 51 | subplt = plt.subplot(self.gs[self.curr_row, self.curr_col]) 52 | self.curr_col += 1 53 | return subplt 54 | 55 | def close(self): 56 | plt.close(self.fig) 57 | self.fig = None 58 | self.gs = None 59 | 60 | 61 | @plotting.customize 62 | def create_summary_tear_sheet( 63 | factor_data, long_short=True, group_neutral=False 64 | ): 65 | """ 66 | Creates a small summary tear sheet with returns, information, and turnover 67 | analysis. 68 | 69 | Parameters 70 | ---------- 71 | factor_data : pd.DataFrame - MultiIndex 72 | A MultiIndex DataFrame indexed by date (level 0) and asset (level 1), 73 | containing the values for a single alpha factor, forward returns for 74 | each period, the factor quantile/bin that factor value belongs to, and 75 | (optionally) the group the asset belongs to. 76 | - See full explanation in utils.get_clean_factor_and_forward_returns 77 | long_short : bool 78 | Should this computation happen on a long short portfolio? if so, then 79 | mean quantile returns will be demeaned across the factor universe. 80 | group_neutral : bool 81 | Should this computation happen on a group neutral portfolio? if so, 82 | returns demeaning will occur on the group level. 83 | """ 84 | 85 | # Returns Analysis 86 | mean_quant_ret, std_quantile = perf.mean_return_by_quantile( 87 | factor_data, 88 | by_group=False, 89 | demeaned=long_short, 90 | group_adjust=group_neutral, 91 | ) 92 | 93 | mean_quant_rateret = mean_quant_ret.apply( 94 | utils.rate_of_return, axis=0, base_period=mean_quant_ret.columns[0] 95 | ) 96 | 97 | mean_quant_ret_bydate, std_quant_daily = perf.mean_return_by_quantile( 98 | factor_data, 99 | by_date=True, 100 | by_group=False, 101 | demeaned=long_short, 102 | group_adjust=group_neutral, 103 | ) 104 | 105 | mean_quant_rateret_bydate = mean_quant_ret_bydate.apply( 106 | utils.rate_of_return, 107 | axis=0, 108 | base_period=mean_quant_ret_bydate.columns[0], 109 | ) 110 | 111 | compstd_quant_daily = std_quant_daily.apply( 112 | utils.std_conversion, axis=0, base_period=std_quant_daily.columns[0] 113 | ) 114 | 115 | alpha_beta = perf.factor_alpha_beta( 116 | factor_data, demeaned=long_short, group_adjust=group_neutral 117 | ) 118 | 119 | mean_ret_spread_quant, std_spread_quant = perf.compute_mean_returns_spread( 120 | mean_quant_rateret_bydate, 121 | factor_data["factor_quantile"].max(), 122 | factor_data["factor_quantile"].min(), 123 | std_err=compstd_quant_daily, 124 | ) 125 | 126 | periods = utils.get_forward_returns_columns(factor_data.columns) 127 | periods = list(map(lambda p: pd.Timedelta(p).days, periods)) 128 | 129 | fr_cols = len(periods) 130 | vertical_sections = 2 + fr_cols * 3 131 | gf = GridFigure(rows=vertical_sections, cols=1) 132 | 133 | plotting.plot_quantile_statistics_table(factor_data) 134 | 135 | plotting.plot_returns_table( 136 | alpha_beta, mean_quant_rateret, mean_ret_spread_quant 137 | ) 138 | 139 | plotting.plot_quantile_returns_bar( 140 | mean_quant_rateret, 141 | by_group=False, 142 | ylim_percentiles=None, 143 | ax=gf.next_row(), 144 | ) 145 | 146 | # Information Analysis 147 | ic = perf.factor_information_coefficient(factor_data) 148 | plotting.plot_information_table(ic) 149 | 150 | # Turnover Analysis 151 | quantile_factor = factor_data["factor_quantile"] 152 | 153 | quantile_turnover = { 154 | p: pd.concat( 155 | [ 156 | perf.quantile_turnover(quantile_factor, q, p) 157 | for q in range(1, int(quantile_factor.max()) + 1) 158 | ], 159 | axis=1, 160 | ) 161 | for p in periods 162 | } 163 | 164 | autocorrelation = pd.concat( 165 | [ 166 | perf.factor_rank_autocorrelation(factor_data, period) 167 | for period in periods 168 | ], 169 | axis=1, 170 | ) 171 | 172 | plotting.plot_turnover_table(autocorrelation, quantile_turnover) 173 | 174 | plt.show() 175 | gf.close() 176 | 177 | 178 | @plotting.customize 179 | def create_returns_tear_sheet( 180 | factor_data, long_short=True, group_neutral=False, by_group=False 181 | ): 182 | """ 183 | Creates a tear sheet for returns analysis of a factor. 184 | 185 | Parameters 186 | ---------- 187 | factor_data : pd.DataFrame - MultiIndex 188 | A MultiIndex DataFrame indexed by date (level 0) and asset (level 1), 189 | containing the values for a single alpha factor, forward returns for 190 | each period, the factor quantile/bin that factor value belongs to, 191 | and (optionally) the group the asset belongs to. 192 | - See full explanation in utils.get_clean_factor_and_forward_returns 193 | long_short : bool 194 | Should this computation happen on a long short portfolio? if so, then 195 | mean quantile returns will be demeaned across the factor universe. 196 | Additionally factor values will be demeaned across the factor universe 197 | when factor weighting the portfolio for cumulative returns plots 198 | group_neutral : bool 199 | Should this computation happen on a group neutral portfolio? if so, 200 | returns demeaning will occur on the group level. 201 | Additionally each group will weight the same in cumulative returns 202 | plots 203 | by_group : bool 204 | If True, display graphs separately for each group. 205 | """ 206 | 207 | factor_returns = perf.factor_returns( 208 | factor_data, long_short, group_neutral 209 | ) 210 | 211 | mean_quant_ret, std_quantile = perf.mean_return_by_quantile( 212 | factor_data, 213 | by_group=False, 214 | demeaned=long_short, 215 | group_adjust=group_neutral, 216 | ) 217 | 218 | mean_quant_rateret = mean_quant_ret.apply( 219 | utils.rate_of_return, axis=0, base_period=mean_quant_ret.columns[0] 220 | ) 221 | 222 | mean_quant_ret_bydate, std_quant_daily = perf.mean_return_by_quantile( 223 | factor_data, 224 | by_date=True, 225 | by_group=False, 226 | demeaned=long_short, 227 | group_adjust=group_neutral, 228 | ) 229 | 230 | mean_quant_rateret_bydate = mean_quant_ret_bydate.apply( 231 | utils.rate_of_return, 232 | axis=0, 233 | base_period=mean_quant_ret_bydate.columns[0], 234 | ) 235 | 236 | compstd_quant_daily = std_quant_daily.apply( 237 | utils.std_conversion, axis=0, base_period=std_quant_daily.columns[0] 238 | ) 239 | 240 | alpha_beta = perf.factor_alpha_beta( 241 | factor_data, factor_returns, long_short, group_neutral 242 | ) 243 | 244 | mean_ret_spread_quant, std_spread_quant = perf.compute_mean_returns_spread( 245 | mean_quant_rateret_bydate, 246 | factor_data["factor_quantile"].max(), 247 | factor_data["factor_quantile"].min(), 248 | std_err=compstd_quant_daily, 249 | ) 250 | 251 | fr_cols = len(factor_returns.columns) 252 | vertical_sections = 2 + fr_cols * 3 253 | gf = GridFigure(rows=vertical_sections, cols=1) 254 | 255 | plotting.plot_returns_table( 256 | alpha_beta, mean_quant_rateret, mean_ret_spread_quant 257 | ) 258 | 259 | plotting.plot_quantile_returns_bar( 260 | mean_quant_rateret, 261 | by_group=False, 262 | ylim_percentiles=None, 263 | ax=gf.next_row(), 264 | ) 265 | 266 | plotting.plot_quantile_returns_violin( 267 | mean_quant_rateret_bydate, ylim_percentiles=(1, 99), ax=gf.next_row() 268 | ) 269 | 270 | trading_calendar = factor_data.index.levels[0].freq 271 | if trading_calendar is None: 272 | trading_calendar = pd.tseries.offsets.BDay() 273 | warnings.warn( 274 | "'freq' not set in factor_data index: assuming business day", 275 | UserWarning, 276 | ) 277 | 278 | # Compute cumulative returns from daily simple returns, if '1D' 279 | # returns are provided. 280 | if "1D" in factor_returns: 281 | title = ( 282 | "Factor Weighted " 283 | + ("Group Neutral " if group_neutral else "") 284 | + ("Long/Short " if long_short else "") 285 | + "Portfolio Cumulative Return (1D Period)" 286 | ) 287 | 288 | plotting.plot_cumulative_returns( 289 | factor_returns["1D"], period="1D", title=title, ax=gf.next_row() 290 | ) 291 | 292 | plotting.plot_cumulative_returns_by_quantile( 293 | mean_quant_ret_bydate["1D"], period="1D", ax=gf.next_row() 294 | ) 295 | 296 | ax_mean_quantile_returns_spread_ts = [ 297 | gf.next_row() for x in range(fr_cols) 298 | ] 299 | plotting.plot_mean_quantile_returns_spread_time_series( 300 | mean_ret_spread_quant, 301 | std_err=std_spread_quant, 302 | bandwidth=0.5, 303 | ax=ax_mean_quantile_returns_spread_ts, 304 | ) 305 | 306 | plt.show() 307 | gf.close() 308 | 309 | if by_group: 310 | ( 311 | mean_return_quantile_group, 312 | mean_return_quantile_group_std_err, 313 | ) = perf.mean_return_by_quantile( 314 | factor_data, 315 | by_date=False, 316 | by_group=True, 317 | demeaned=long_short, 318 | group_adjust=group_neutral, 319 | ) 320 | 321 | mean_quant_rateret_group = mean_return_quantile_group.apply( 322 | utils.rate_of_return, 323 | axis=0, 324 | base_period=mean_return_quantile_group.columns[0], 325 | ) 326 | 327 | num_groups = len( 328 | mean_quant_rateret_group.index.get_level_values("group").unique() 329 | ) 330 | 331 | vertical_sections = 1 + (((num_groups - 1) // 2) + 1) 332 | gf = GridFigure(rows=vertical_sections, cols=2) 333 | 334 | ax_quantile_returns_bar_by_group = [ 335 | gf.next_cell() for _ in range(num_groups) 336 | ] 337 | plotting.plot_quantile_returns_bar( 338 | mean_quant_rateret_group, 339 | by_group=True, 340 | ylim_percentiles=(5, 95), 341 | ax=ax_quantile_returns_bar_by_group, 342 | ) 343 | plt.show() 344 | gf.close() 345 | 346 | 347 | @plotting.customize 348 | def create_information_tear_sheet( 349 | factor_data, group_neutral=False, by_group=False 350 | ): 351 | """ 352 | Creates a tear sheet for information analysis of a factor. 353 | 354 | Parameters 355 | ---------- 356 | factor_data : pd.DataFrame - MultiIndex 357 | A MultiIndex DataFrame indexed by date (level 0) and asset (level 1), 358 | containing the values for a single alpha factor, forward returns for 359 | each period, the factor quantile/bin that factor value belongs to, and 360 | (optionally) the group the asset belongs to. 361 | - See full explanation in utils.get_clean_factor_and_forward_returns 362 | group_neutral : bool 363 | Demean forward returns by group before computing IC. 364 | by_group : bool 365 | If True, display graphs separately for each group. 366 | """ 367 | 368 | ic = perf.factor_information_coefficient(factor_data, group_neutral) 369 | 370 | plotting.plot_information_table(ic) 371 | 372 | columns_wide = 2 373 | fr_cols = len(ic.columns) 374 | rows_when_wide = ((fr_cols - 1) // columns_wide) + 1 375 | vertical_sections = fr_cols + 3 * rows_when_wide + 2 * fr_cols 376 | gf = GridFigure(rows=vertical_sections, cols=columns_wide) 377 | 378 | ax_ic_ts = [gf.next_row() for _ in range(fr_cols)] 379 | plotting.plot_ic_ts(ic, ax=ax_ic_ts) 380 | 381 | ax_ic_hqq = [gf.next_cell() for _ in range(fr_cols * 2)] 382 | plotting.plot_ic_hist(ic, ax=ax_ic_hqq[::2]) 383 | plotting.plot_ic_qq(ic, ax=ax_ic_hqq[1::2]) 384 | 385 | if not by_group: 386 | 387 | mean_monthly_ic = perf.mean_information_coefficient( 388 | factor_data, 389 | group_adjust=group_neutral, 390 | by_group=False, 391 | by_time="M", 392 | ) 393 | ax_monthly_ic_heatmap = [gf.next_cell() for x in range(fr_cols)] 394 | plotting.plot_monthly_ic_heatmap( 395 | mean_monthly_ic, ax=ax_monthly_ic_heatmap 396 | ) 397 | 398 | if by_group: 399 | mean_group_ic = perf.mean_information_coefficient( 400 | factor_data, group_adjust=group_neutral, by_group=True 401 | ) 402 | 403 | plotting.plot_ic_by_group(mean_group_ic, ax=gf.next_row()) 404 | 405 | plt.show() 406 | gf.close() 407 | 408 | 409 | @plotting.customize 410 | def create_turnover_tear_sheet(factor_data, turnover_periods=None): 411 | """ 412 | Creates a tear sheet for analyzing the turnover properties of a factor. 413 | 414 | Parameters 415 | ---------- 416 | factor_data : pd.DataFrame - MultiIndex 417 | A MultiIndex DataFrame indexed by date (level 0) and asset (level 1), 418 | containing the values for a single alpha factor, forward returns for 419 | each period, the factor quantile/bin that factor value belongs to, and 420 | (optionally) the group the asset belongs to. 421 | - See full explanation in utils.get_clean_factor_and_forward_returns 422 | turnover_periods : sequence[string], optional 423 | Periods to compute turnover analysis on. By default periods in 424 | 'factor_data' are used but custom periods can provided instead. This 425 | can be useful when periods in 'factor_data' are not multiples of the 426 | frequency at which factor values are computed i.e. the periods 427 | are 2h and 4h and the factor is computed daily and so values like 428 | ['1D', '2D'] could be used instead 429 | """ 430 | 431 | if turnover_periods is None: 432 | input_periods = utils.get_forward_returns_columns( 433 | factor_data.columns, require_exact_day_multiple=True, 434 | ).to_numpy() 435 | turnover_periods = utils.timedelta_strings_to_integers(input_periods) 436 | else: 437 | turnover_periods = utils.timedelta_strings_to_integers( 438 | turnover_periods, 439 | ) 440 | 441 | quantile_factor = factor_data["factor_quantile"] 442 | 443 | quantile_turnover = { 444 | p: pd.concat( 445 | [ 446 | perf.quantile_turnover(quantile_factor, q, p) 447 | for q in quantile_factor.sort_values().unique().tolist() 448 | ], 449 | axis=1, 450 | ) 451 | for p in turnover_periods 452 | } 453 | 454 | autocorrelation = pd.concat( 455 | [ 456 | perf.factor_rank_autocorrelation(factor_data, period) 457 | for period in turnover_periods 458 | ], 459 | axis=1, 460 | ) 461 | 462 | plotting.plot_turnover_table(autocorrelation, quantile_turnover) 463 | 464 | fr_cols = len(turnover_periods) 465 | columns_wide = 1 466 | rows_when_wide = ((fr_cols - 1) // 1) + 1 467 | vertical_sections = fr_cols + 3 * rows_when_wide + 2 * fr_cols 468 | gf = GridFigure(rows=vertical_sections, cols=columns_wide) 469 | 470 | for period in turnover_periods: 471 | if quantile_turnover[period].isnull().all().all(): 472 | continue 473 | plotting.plot_top_bottom_quantile_turnover( 474 | quantile_turnover[period], period=period, ax=gf.next_row() 475 | ) 476 | 477 | for period in autocorrelation: 478 | if autocorrelation[period].isnull().all(): 479 | continue 480 | plotting.plot_factor_rank_auto_correlation( 481 | autocorrelation[period], period=period, ax=gf.next_row() 482 | ) 483 | 484 | plt.show() 485 | gf.close() 486 | 487 | 488 | @plotting.customize 489 | def create_full_tear_sheet(factor_data, 490 | long_short=True, 491 | group_neutral=False, 492 | by_group=False): 493 | """ 494 | Creates a full tear sheet for analysis and evaluating single 495 | return predicting (alpha) factor. 496 | 497 | Parameters 498 | ---------- 499 | factor_data : pd.DataFrame - MultiIndex 500 | A MultiIndex DataFrame indexed by date (level 0) and asset (level 1), 501 | containing the values for a single alpha factor, forward returns for 502 | each period, the factor quantile/bin that factor value belongs to, and 503 | (optionally) the group the asset belongs to. 504 | - See full explanation in utils.get_clean_factor_and_forward_returns 505 | long_short : bool 506 | Should this computation happen on a long short portfolio? 507 | - See tears.create_returns_tear_sheet for details on how this flag 508 | affects returns analysis 509 | group_neutral : bool 510 | Should this computation happen on a group neutral portfolio? 511 | - See tears.create_returns_tear_sheet for details on how this flag 512 | affects returns analysis 513 | - See tears.create_information_tear_sheet for details on how this 514 | flag affects information analysis 515 | by_group : bool 516 | If True, display graphs separately for each group. 517 | """ 518 | 519 | plotting.plot_quantile_statistics_table(factor_data) 520 | create_returns_tear_sheet( 521 | factor_data, long_short, group_neutral, by_group, set_context=False 522 | ) 523 | create_information_tear_sheet( 524 | factor_data, group_neutral, by_group, set_context=False 525 | ) 526 | create_turnover_tear_sheet(factor_data, set_context=False) 527 | 528 | 529 | @plotting.customize 530 | def create_event_returns_tear_sheet(factor_data, 531 | returns, 532 | avgretplot=(5, 15), 533 | long_short=True, 534 | group_neutral=False, 535 | std_bar=True, 536 | by_group=False): 537 | """ 538 | Creates a tear sheet to view the average cumulative returns for a 539 | factor within a window (pre and post event). 540 | 541 | Parameters 542 | ---------- 543 | factor_data : pd.DataFrame - MultiIndex 544 | A MultiIndex Series indexed by date (level 0) and asset (level 1), 545 | containing the values for a single alpha factor, the factor 546 | quantile/bin that factor value belongs to and (optionally) the group 547 | the asset belongs to. 548 | - See full explanation in utils.get_clean_factor_and_forward_returns 549 | returns : pd.DataFrame 550 | A DataFrame indexed by date with assets in the columns containing daily 551 | returns. 552 | - See full explanation in utils.get_clean_factor_and_forward_returns 553 | avgretplot: tuple (int, int) - (before, after) 554 | If not None, plot quantile average cumulative returns 555 | long_short : bool 556 | Should this computation happen on a long short portfolio? if so then 557 | factor returns will be demeaned across the factor universe 558 | group_neutral : bool 559 | Should this computation happen on a group neutral portfolio? if so, 560 | returns demeaning will occur on the group level. 561 | std_bar : boolean, optional 562 | Show plots with standard deviation bars, one for each quantile 563 | by_group : bool 564 | If True, display graphs separately for each group. 565 | """ 566 | 567 | before, after = avgretplot 568 | 569 | avg_cumulative_returns = perf.average_cumulative_return_by_quantile( 570 | factor_data, 571 | returns, 572 | periods_before=before, 573 | periods_after=after, 574 | demeaned=long_short, 575 | group_adjust=group_neutral, 576 | ) 577 | 578 | num_quantiles = int(factor_data["factor_quantile"].max()) 579 | 580 | vertical_sections = 1 581 | if std_bar: 582 | vertical_sections += ((num_quantiles - 1) // 2) + 1 583 | cols = 2 if num_quantiles != 1 else 1 584 | gf = GridFigure(rows=vertical_sections, cols=cols) 585 | plotting.plot_quantile_average_cumulative_return( 586 | avg_cumulative_returns, 587 | by_quantile=False, 588 | std_bar=False, 589 | ax=gf.next_row(), 590 | ) 591 | if std_bar: 592 | ax_avg_cumulative_returns_by_q = [ 593 | gf.next_cell() for _ in range(num_quantiles) 594 | ] 595 | plotting.plot_quantile_average_cumulative_return( 596 | avg_cumulative_returns, 597 | by_quantile=True, 598 | std_bar=True, 599 | ax=ax_avg_cumulative_returns_by_q, 600 | ) 601 | 602 | plt.show() 603 | gf.close() 604 | 605 | if by_group: 606 | groups = factor_data["group"].unique() 607 | num_groups = len(groups) 608 | vertical_sections = ((num_groups - 1) // 2) + 1 609 | gf = GridFigure(rows=vertical_sections, cols=2) 610 | 611 | avg_cumret_by_group = perf.average_cumulative_return_by_quantile( 612 | factor_data, 613 | returns, 614 | periods_before=before, 615 | periods_after=after, 616 | demeaned=long_short, 617 | group_adjust=group_neutral, 618 | by_group=True, 619 | ) 620 | 621 | for group, avg_cumret in avg_cumret_by_group.groupby(level="group"): 622 | avg_cumret.index = avg_cumret.index.droplevel("group") 623 | plotting.plot_quantile_average_cumulative_return( 624 | avg_cumret, 625 | by_quantile=False, 626 | std_bar=False, 627 | title=group, 628 | ax=gf.next_cell(), 629 | ) 630 | 631 | plt.show() 632 | gf.close() 633 | 634 | 635 | @plotting.customize 636 | def create_event_study_tear_sheet(factor_data, 637 | returns, 638 | avgretplot=(5, 15), 639 | rate_of_ret=True, 640 | n_bars=50): 641 | """ 642 | Creates an event study tear sheet for analysis of a specific event. 643 | 644 | Parameters 645 | ---------- 646 | factor_data : pd.DataFrame - MultiIndex 647 | A MultiIndex DataFrame indexed by date (level 0) and asset (level 1), 648 | containing the values for a single event, forward returns for each 649 | period, the factor quantile/bin that factor value belongs to, and 650 | (optionally) the group the asset belongs to. 651 | returns : pd.DataFrame, required only if 'avgretplot' is provided 652 | A DataFrame indexed by date with assets in the columns containing daily 653 | returns. 654 | - See full explanation in utils.get_clean_factor_and_forward_returns 655 | avgretplot: tuple (int, int) - (before, after), optional 656 | If not None, plot event style average cumulative returns within a 657 | window (pre and post event). 658 | rate_of_ret : bool, optional 659 | Display rate of return instead of simple return in 'Mean Period Wise 660 | Return By Factor Quantile' and 'Period Wise Return By Factor Quantile' 661 | plots 662 | n_bars : int, optional 663 | Number of bars in event distribution plot 664 | """ 665 | 666 | long_short = False 667 | 668 | plotting.plot_quantile_statistics_table(factor_data) 669 | 670 | gf = GridFigure(rows=1, cols=1) 671 | plotting.plot_events_distribution( 672 | events=factor_data["factor"], num_bars=n_bars, ax=gf.next_row() 673 | ) 674 | plt.show() 675 | gf.close() 676 | 677 | if returns is not None and avgretplot is not None: 678 | 679 | create_event_returns_tear_sheet( 680 | factor_data=factor_data, 681 | returns=returns, 682 | avgretplot=avgretplot, 683 | long_short=long_short, 684 | group_neutral=False, 685 | std_bar=True, 686 | by_group=False, 687 | ) 688 | 689 | factor_returns = perf.factor_returns( 690 | factor_data, demeaned=False, equal_weight=True 691 | ) 692 | 693 | mean_quant_ret, std_quantile = perf.mean_return_by_quantile( 694 | factor_data, by_group=False, demeaned=long_short 695 | ) 696 | if rate_of_ret: 697 | mean_quant_ret = mean_quant_ret.apply( 698 | utils.rate_of_return, axis=0, base_period=mean_quant_ret.columns[0] 699 | ) 700 | 701 | mean_quant_ret_bydate, std_quant_daily = perf.mean_return_by_quantile( 702 | factor_data, by_date=True, by_group=False, demeaned=long_short 703 | ) 704 | if rate_of_ret: 705 | mean_quant_ret_bydate = mean_quant_ret_bydate.apply( 706 | utils.rate_of_return, 707 | axis=0, 708 | base_period=mean_quant_ret_bydate.columns[0], 709 | ) 710 | 711 | fr_cols = len(factor_returns.columns) 712 | vertical_sections = 2 + fr_cols * 1 713 | gf = GridFigure(rows=vertical_sections + 1, cols=1) 714 | 715 | plotting.plot_quantile_returns_bar( 716 | mean_quant_ret, by_group=False, ylim_percentiles=None, ax=gf.next_row() 717 | ) 718 | 719 | plotting.plot_quantile_returns_violin( 720 | mean_quant_ret_bydate, ylim_percentiles=(1, 99), ax=gf.next_row() 721 | ) 722 | 723 | trading_calendar = factor_data.index.levels[0].freq 724 | if trading_calendar is None: 725 | trading_calendar = pd.tseries.offsets.BDay() 726 | warnings.warn( 727 | "'freq' not set in factor_data index: assuming business day", 728 | UserWarning, 729 | ) 730 | 731 | plt.show() 732 | gf.close() 733 | -------------------------------------------------------------------------------- /alphalens/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ckend/alphalens/4979057c6fbd045a7998dba53388654d45a217ef/alphalens/tests/__init__.py -------------------------------------------------------------------------------- /alphalens/tests/matplotlibrc: -------------------------------------------------------------------------------- 1 | backend : Agg -------------------------------------------------------------------------------- /alphalens/tests/test_tears.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2017 Quantopian, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import division 17 | from unittest import TestCase 18 | from parameterized import parameterized 19 | from numpy import nan 20 | from pandas import (DataFrame, date_range, Timedelta, concat) 21 | 22 | from .. tears import (create_returns_tear_sheet, 23 | create_information_tear_sheet, 24 | create_turnover_tear_sheet, 25 | create_summary_tear_sheet, 26 | create_full_tear_sheet, 27 | create_event_returns_tear_sheet, 28 | create_event_study_tear_sheet) 29 | 30 | from .. utils import get_clean_factor_and_forward_returns 31 | 32 | 33 | class TearsTestCase(TestCase): 34 | 35 | tickers = ['A', 'B', 'C', 'D', 'E', 'F'] 36 | 37 | factor_groups = {'A': 1, 'B': 2, 'C': 1, 'D': 2, 'E': 1, 'F': 2} 38 | 39 | price_data = [[1.25**i, 1.50**i, 1.00**i, 0.50**i, 1.50**i, 1.00**i] 40 | for i in range(1, 51)] 41 | 42 | factor_data = [[3, 4, 2, 1, nan, nan], [3, 4, 2, 1, nan, nan], 43 | [3, 4, 2, 1, nan, nan], [3, 4, 2, 1, nan, nan], 44 | [3, 4, 2, 1, nan, nan], [3, 4, 2, 1, nan, nan], 45 | [3, nan, nan, 1, 4, 2], [3, nan, nan, 1, 4, 2], 46 | [3, 4, 2, 1, nan, nan], [3, 4, 2, 1, nan, nan], 47 | [3, nan, nan, 1, 4, 2], [3, nan, nan, 1, 4, 2], 48 | [3, nan, nan, 1, 4, 2], [3, nan, nan, 1, 4, 2], 49 | [3, nan, nan, 1, 4, 2], [3, nan, nan, 1, 4, 2], 50 | [3, nan, nan, 1, 4, 2], [3, nan, nan, 1, 4, 2], 51 | [3, nan, nan, 1, 4, 2], [3, nan, nan, 1, 4, 2], 52 | [3, 4, 2, 1, nan, nan], [3, 4, 2, 1, nan, nan], 53 | [3, 4, 2, 1, nan, nan], [3, 4, 2, 1, nan, nan], 54 | [3, 4, 2, 1, nan, nan], [3, 4, 2, 1, nan, nan], 55 | [3, 4, 2, 1, nan, nan], [3, 4, 2, 1, nan, nan], 56 | [3, nan, nan, 1, 4, 2], [3, nan, nan, 1, 4, 2]] 57 | 58 | event_data = [[1, nan, nan, nan, nan, nan], 59 | [4, nan, nan, 7, nan, nan], 60 | [nan, nan, nan, nan, nan, nan], 61 | [nan, 3, nan, 2, nan, nan], 62 | [1, nan, nan, nan, nan, nan], 63 | [nan, nan, 2, nan, nan, nan], 64 | [nan, nan, nan, 2, nan, nan], 65 | [nan, nan, nan, 1, nan, nan], 66 | [2, nan, nan, nan, nan, nan], 67 | [nan, nan, nan, nan, 5, nan], 68 | [nan, nan, nan, 2, nan, nan], 69 | [nan, nan, nan, nan, nan, nan], 70 | [2, nan, nan, nan, nan, nan], 71 | [nan, nan, nan, nan, nan, 5], 72 | [nan, nan, nan, 1, nan, nan], 73 | [nan, nan, nan, nan, 4, nan], 74 | [5, nan, nan, 4, nan, nan], 75 | [nan, nan, nan, 3, nan, nan], 76 | [nan, nan, nan, 4, nan, nan], 77 | [nan, nan, 2, nan, nan, nan], 78 | [5, nan, nan, nan, nan, nan], 79 | [nan, 1, nan, nan, nan, nan], 80 | [nan, nan, nan, nan, 4, nan], 81 | [0, nan, nan, nan, nan, nan], 82 | [nan, 5, nan, nan, nan, 4], 83 | [nan, nan, nan, nan, nan, nan], 84 | [nan, nan, 5, nan, nan, 3], 85 | [nan, nan, 1, 2, 3, nan], 86 | [nan, nan, nan, 5, nan, nan], 87 | [nan, nan, 1, nan, 3, nan]] 88 | 89 | # 90 | # business days calendar 91 | # 92 | bprice_index = date_range(start='2015-1-10', end='2015-3-22', freq='B') 93 | bprice_index.name = 'date' 94 | bprices = DataFrame(index=bprice_index, columns=tickers, data=price_data) 95 | 96 | bfactor_index = date_range(start='2015-1-15', end='2015-2-25', freq='B') 97 | bfactor_index.name = 'date' 98 | bfactor = DataFrame(index=bfactor_index, columns=tickers, 99 | data=factor_data).stack() 100 | 101 | # 102 | # full calendar 103 | # 104 | price_index = date_range(start='2015-1-10', end='2015-2-28') 105 | price_index.name = 'date' 106 | prices = DataFrame(index=price_index, columns=tickers, data=price_data) 107 | 108 | factor_index = date_range(start='2015-1-15', end='2015-2-13') 109 | factor_index.name = 'date' 110 | factor = DataFrame(index=factor_index, columns=tickers, 111 | data=factor_data).stack() 112 | 113 | # 114 | # intraday factor 115 | # 116 | today_open = DataFrame(index=price_index+Timedelta('9h30m'), 117 | columns=tickers, data=price_data) 118 | today_open_1h = DataFrame(index=price_index+Timedelta('10h30m'), 119 | columns=tickers, data=price_data) 120 | today_open_1h += today_open_1h*0.001 121 | today_open_3h = DataFrame(index=price_index+Timedelta('12h30m'), 122 | columns=tickers, data=price_data) 123 | today_open_3h -= today_open_3h*0.002 124 | intraday_prices = concat([today_open, today_open_1h, today_open_3h]) \ 125 | .sort_index() 126 | 127 | intraday_factor = DataFrame(index=factor_index+Timedelta('9h30m'), 128 | columns=tickers, data=factor_data).stack() 129 | 130 | # 131 | # event factor 132 | # 133 | bevent_factor = DataFrame(index=bfactor_index, columns=tickers, 134 | data=event_data).stack() 135 | 136 | event_factor = DataFrame(index=factor_index, columns=tickers, 137 | data=event_data).stack() 138 | 139 | all_prices = [prices, bprices] 140 | all_factors = [factor, bfactor] 141 | all_events = [event_factor, bevent_factor] 142 | 143 | def __localize_prices_and_factor(self, prices, factor, tz): 144 | if tz is not None: 145 | factor = factor.unstack() 146 | factor.index = factor.index.tz_localize(tz) 147 | factor = factor.stack() 148 | prices = prices.copy() 149 | prices.index = prices.index.tz_localize(tz) 150 | return prices, factor 151 | 152 | @parameterized.expand([(2, (1, 5, 10), None), 153 | (3, (2, 4, 6), 20)]) 154 | def test_create_returns_tear_sheet( 155 | self, 156 | quantiles, 157 | periods, 158 | filter_zscore): 159 | """ 160 | Test no exceptions are thrown 161 | """ 162 | factor_data = get_clean_factor_and_forward_returns( 163 | self.factor, 164 | self.prices, 165 | quantiles=quantiles, 166 | periods=periods, 167 | filter_zscore=filter_zscore) 168 | 169 | create_returns_tear_sheet( 170 | factor_data, long_short=False, group_neutral=False, by_group=False) 171 | 172 | @parameterized.expand([(1, (1, 5, 10), None), 173 | (4, (1, 2, 3, 7), 20)]) 174 | def test_create_information_tear_sheet( 175 | self, quantiles, periods, filter_zscore): 176 | """ 177 | Test no exceptions are thrown 178 | """ 179 | factor_data = get_clean_factor_and_forward_returns( 180 | self.factor, 181 | self.prices, 182 | quantiles=quantiles, 183 | periods=periods, 184 | filter_zscore=filter_zscore) 185 | 186 | create_information_tear_sheet( 187 | factor_data, group_neutral=False, by_group=False) 188 | 189 | @parameterized.expand([ 190 | (2, (2, 3, 6), None, 20), 191 | (4, (1, 2, 3, 7), None, None), 192 | (2, (2, 3, 6), ['1D', '2D'], 20), 193 | (4, (1, 2, 3, 7), ['1D'], None), 194 | ]) 195 | def test_create_turnover_tear_sheet( 196 | self, 197 | quantiles, 198 | periods, 199 | turnover_periods, 200 | filter_zscore): 201 | """ 202 | Test no exceptions are thrown 203 | """ 204 | factor_data = get_clean_factor_and_forward_returns( 205 | self.factor, 206 | self.prices, 207 | quantiles=quantiles, 208 | periods=periods, 209 | filter_zscore=filter_zscore) 210 | 211 | create_turnover_tear_sheet(factor_data, turnover_periods) 212 | 213 | @parameterized.expand([(2, (1, 5, 10), None), 214 | (3, (1, 2, 3, 7), 20)]) 215 | def test_create_summary_tear_sheet( 216 | self, 217 | quantiles, 218 | periods, 219 | filter_zscore): 220 | """ 221 | Test no exceptions are thrown 222 | """ 223 | factor_data = get_clean_factor_and_forward_returns( 224 | self.factor, 225 | self.prices, 226 | quantiles=quantiles, 227 | periods=periods, 228 | filter_zscore=filter_zscore) 229 | 230 | create_summary_tear_sheet( 231 | factor_data, long_short=True, group_neutral=False) 232 | create_summary_tear_sheet( 233 | factor_data, long_short=False, group_neutral=False) 234 | 235 | @parameterized.expand([ 236 | (2, (1, 5, 10), None, None), 237 | (3, (2, 4, 6), 20, 'US/Eastern'), 238 | (4, (1, 8), 20, None), 239 | (4, (1, 2, 3, 7), None, 'US/Eastern'), 240 | ]) 241 | def test_create_full_tear_sheet( 242 | self, 243 | quantiles, 244 | periods, 245 | filter_zscore, 246 | tz): 247 | """ 248 | Test no exceptions are thrown 249 | """ 250 | for factor, prices in zip(self.all_factors, self.all_prices): 251 | 252 | prices, factor = self.__localize_prices_and_factor(prices, 253 | factor, 254 | tz) 255 | factor_data = get_clean_factor_and_forward_returns( 256 | factor, 257 | prices, 258 | groupby=self.factor_groups, 259 | quantiles=quantiles, 260 | periods=periods, 261 | filter_zscore=filter_zscore) 262 | 263 | create_full_tear_sheet(factor_data, long_short=False, 264 | group_neutral=False, by_group=False) 265 | create_full_tear_sheet(factor_data, long_short=True, 266 | group_neutral=False, by_group=True) 267 | create_full_tear_sheet(factor_data, long_short=True, 268 | group_neutral=True, by_group=True) 269 | 270 | @parameterized.expand([(2, (1, 5, 10), None, None), 271 | (3, (2, 4, 6), 20, None), 272 | (4, (3, 4), None, 'US/Eastern'), 273 | (1, (2, 3, 6, 9), 20, 'US/Eastern')]) 274 | def test_create_event_returns_tear_sheet( 275 | self, quantiles, periods, filter_zscore, tz): 276 | """ 277 | Test no exceptions are thrown 278 | """ 279 | for factor, prices in zip(self.all_factors, self.all_prices): 280 | 281 | prices, factor = self.__localize_prices_and_factor(prices, 282 | factor, 283 | tz) 284 | factor_data = get_clean_factor_and_forward_returns( 285 | factor, 286 | prices, 287 | groupby=self.factor_groups, 288 | quantiles=quantiles, 289 | periods=periods, 290 | filter_zscore=filter_zscore) 291 | 292 | create_event_returns_tear_sheet(factor_data, prices, avgretplot=( 293 | 5, 11), long_short=False, group_neutral=False, by_group=False) 294 | create_event_returns_tear_sheet(factor_data, prices, avgretplot=( 295 | 5, 11), long_short=True, group_neutral=False, by_group=False) 296 | create_event_returns_tear_sheet(factor_data, prices, avgretplot=( 297 | 5, 11), long_short=False, group_neutral=True, by_group=False) 298 | create_event_returns_tear_sheet(factor_data, prices, avgretplot=( 299 | 5, 11), long_short=False, group_neutral=False, by_group=True) 300 | create_event_returns_tear_sheet(factor_data, prices, avgretplot=( 301 | 5, 11), long_short=True, group_neutral=False, by_group=True) 302 | create_event_returns_tear_sheet(factor_data, prices, avgretplot=( 303 | 5, 11), long_short=False, group_neutral=True, by_group=True) 304 | 305 | @parameterized.expand([((6, 8), None, None), 306 | ((6, 8), None, None), 307 | ((6, 3), 20, None), 308 | ((6, 3), 20, 'US/Eastern'), 309 | ((0, 3), None, None), 310 | ((3, 0), 20, 'US/Eastern')]) 311 | def test_create_event_study_tear_sheet( 312 | self, avgretplot, filter_zscore, tz): 313 | """ 314 | Test no exceptions are thrown 315 | """ 316 | for factor, prices in zip(self.all_events, self.all_prices): 317 | 318 | prices, factor = self.__localize_prices_and_factor(prices, 319 | factor, 320 | tz) 321 | factor_data = get_clean_factor_and_forward_returns( 322 | factor, prices, bins=1, quantiles=None, periods=( 323 | 1, 2), filter_zscore=filter_zscore) 324 | 325 | create_event_study_tear_sheet( 326 | factor_data, prices, avgretplot=avgretplot) 327 | -------------------------------------------------------------------------------- /alphalens/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2018 Quantopian, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import division 17 | from unittest import TestCase 18 | from parameterized import parameterized 19 | from numpy import (nan) 20 | 21 | from pandas import ( 22 | Series, 23 | DataFrame, 24 | date_range, 25 | MultiIndex, 26 | Timedelta, 27 | Timestamp, 28 | concat, 29 | ) 30 | from pandas.util.testing import (assert_frame_equal, 31 | assert_series_equal) 32 | 33 | from .. utils import (get_clean_factor_and_forward_returns, 34 | compute_forward_returns, 35 | quantize_factor) 36 | 37 | 38 | class UtilsTestCase(TestCase): 39 | dr = date_range(start='2015-1-1', end='2015-1-2') 40 | dr.name = 'date' 41 | tickers = ['A', 'B', 'C', 'D'] 42 | 43 | factor = DataFrame(index=dr, 44 | columns=tickers, 45 | data=[[1, 2, 3, 4], 46 | [4, 3, 2, 1]]).stack() 47 | factor.index = factor.index.set_names(['date', 'asset']) 48 | factor.name = 'factor' 49 | factor_data = DataFrame() 50 | factor_data['factor'] = factor 51 | factor_data['group'] = Series(index=factor.index, 52 | data=[1, 1, 2, 2, 1, 1, 2, 2], 53 | dtype="category") 54 | 55 | biased_factor = DataFrame(index=dr, 56 | columns=tickers.extend(['E', 'F', 'G', 'H']), 57 | data=[[-1, 3, -2, 4, -5, 7, -6, 8], 58 | [-4, 2, -3, 1, -8, 6, -7, 5]]).stack() 59 | biased_factor.index = biased_factor.index.set_names(['date', 'asset']) 60 | biased_factor.name = 'factor' 61 | biased_factor_data = DataFrame() 62 | biased_factor_data['factor'] = biased_factor 63 | biased_factor_data['group'] = Series(index=biased_factor.index, 64 | data=[1, 1, 2, 2, 1, 1, 2, 2, 65 | 1, 1, 2, 2, 1, 1, 2, 2], 66 | dtype="category") 67 | 68 | def test_compute_forward_returns(self): 69 | dr = date_range(start='2015-1-1', end='2015-1-3') 70 | prices = DataFrame(index=dr, columns=['A', 'B'], 71 | data=[[1, 1], [1, 2], [2, 1]]) 72 | factor = prices.stack() 73 | 74 | fp = compute_forward_returns(factor, prices, periods=[1, 2]) 75 | 76 | ix = MultiIndex.from_product([dr, ['A', 'B']], 77 | names=['date', 'asset']) 78 | expected = DataFrame(index=ix, columns=['1D', '2D']) 79 | expected['1D'] = [0., 1., 1., -0.5, nan, nan] 80 | expected['2D'] = [1., 0., nan, nan, nan, nan] 81 | 82 | assert_frame_equal(fp, expected) 83 | 84 | def test_compute_forward_returns_index_out_of_bound(self): 85 | dr = date_range(start='2014-12-29', end='2015-1-3') 86 | prices = DataFrame(index=dr, columns=['A', 'B'], 87 | data=[[nan, nan], [nan, nan], [nan, nan], 88 | [1, 1], [1, 2], [2, 1]]) 89 | 90 | dr = date_range(start='2015-1-1', end='2015-1-3') 91 | factor = DataFrame(index=dr, columns=['A', 'B'], 92 | data=[[1, 1], [1, 2], [2, 1]]) 93 | factor = factor.stack() 94 | 95 | fp = compute_forward_returns(factor, prices, periods=[1, 2]) 96 | 97 | ix = MultiIndex.from_product([dr, ['A', 'B']], 98 | names=['date', 'asset']) 99 | expected = DataFrame(index=ix, columns=['1D', '2D']) 100 | expected['1D'] = [0., 1., 1., -0.5, nan, nan] 101 | expected['2D'] = [1., 0., nan, nan, nan, nan] 102 | 103 | assert_frame_equal(fp, expected) 104 | 105 | def test_compute_forward_returns_non_cum(self): 106 | dr = date_range(start='2015-1-1', end='2015-1-3') 107 | prices = DataFrame(index=dr, columns=['A', 'B'], 108 | data=[[1, 1], [1, 2], [2, 1]]) 109 | factor = prices.stack() 110 | 111 | fp = compute_forward_returns(factor, prices, periods=[1, 2], 112 | cumulative_returns=False) 113 | 114 | ix = MultiIndex.from_product([dr, ['A', 'B']], 115 | names=['date', 'asset']) 116 | expected = DataFrame(index=ix, columns=['1D', '2D']) 117 | expected['1D'] = [0., 1., 1., -0.5, nan, nan] 118 | expected['2D'] = [1., -0.5, nan, nan, nan, nan] 119 | 120 | assert_frame_equal(fp, expected) 121 | 122 | @parameterized.expand([(factor_data, 4, None, False, False, 123 | [1, 2, 3, 4, 4, 3, 2, 1]), 124 | (factor_data, 2, None, False, False, 125 | [1, 1, 2, 2, 2, 2, 1, 1]), 126 | (factor_data, 2, None, True, False, 127 | [1, 2, 1, 2, 2, 1, 2, 1]), 128 | (biased_factor_data, 4, None, False, True, 129 | [2, 3, 2, 3, 1, 4, 1, 4, 2, 3, 2, 3, 1, 4, 1, 4]), 130 | (biased_factor_data, 2, None, False, True, 131 | [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2]), 132 | (biased_factor_data, 2, None, True, True, 133 | [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2]), 134 | (biased_factor_data, None, 4, False, True, 135 | [2, 3, 2, 3, 1, 4, 1, 4, 2, 3, 2, 3, 1, 4, 1, 4]), 136 | (biased_factor_data, None, 2, False, True, 137 | [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2]), 138 | (biased_factor_data, None, 2, True, True, 139 | [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2]), 140 | (factor_data, [0, .25, .5, .75, 1.], None, False, 141 | False, [1, 2, 3, 4, 4, 3, 2, 1]), 142 | (factor_data, [0, .5, .75, 1.], None, False, False, 143 | [1, 1, 2, 3, 3, 2, 1, 1]), 144 | (factor_data, [0, .25, .5, 1.], None, False, False, 145 | [1, 2, 3, 3, 3, 3, 2, 1]), 146 | (factor_data, [0, .5, 1.], None, False, False, 147 | [1, 1, 2, 2, 2, 2, 1, 1]), 148 | (factor_data, [.25, .5, .75], None, False, False, 149 | [nan, 1, 2, nan, nan, 2, 1, nan]), 150 | (factor_data, [0, .5, 1.], None, True, False, 151 | [1, 2, 1, 2, 2, 1, 2, 1]), 152 | (factor_data, [.5, 1.], None, True, False, 153 | [nan, 1, nan, 1, 1, nan, 1, nan]), 154 | (factor_data, [0, 1.], None, True, False, 155 | [1, 1, 1, 1, 1, 1, 1, 1]), 156 | (factor_data, None, 4, False, False, 157 | [1, 2, 3, 4, 4, 3, 2, 1]), 158 | (factor_data, None, 2, False, False, 159 | [1, 1, 2, 2, 2, 2, 1, 1]), 160 | (factor_data, None, 3, False, False, 161 | [1, 1, 2, 3, 3, 2, 1, 1]), 162 | (factor_data, None, 8, False, False, 163 | [1, 3, 6, 8, 8, 6, 3, 1]), 164 | (factor_data, None, [0, 1, 2, 3, 5], False, False, 165 | [1, 2, 3, 4, 4, 3, 2, 1]), 166 | (factor_data, None, [1, 2, 3], False, False, 167 | [nan, 1, 2, nan, nan, 2, 1, nan]), 168 | (factor_data, None, [0, 2, 5], False, False, 169 | [1, 1, 2, 2, 2, 2, 1, 1]), 170 | (factor_data, None, [0.5, 2.5, 4.5], False, False, 171 | [1, 1, 2, 2, 2, 2, 1, 1]), 172 | (factor_data, None, [0.5, 2.5], True, False, 173 | [1, 1, nan, nan, nan, nan, 1, 1]), 174 | (factor_data, None, 2, True, False, 175 | [1, 2, 1, 2, 2, 1, 2, 1])]) 176 | def test_quantize_factor(self, factor, quantiles, bins, by_group, 177 | zero_aware, expected_vals): 178 | quantized_factor = quantize_factor(factor, 179 | quantiles=quantiles, 180 | bins=bins, 181 | by_group=by_group, 182 | zero_aware=zero_aware) 183 | expected = Series(index=factor.index, 184 | data=expected_vals, 185 | name='factor_quantile').dropna() 186 | assert_series_equal(quantized_factor, expected) 187 | 188 | def test_get_clean_factor_and_forward_returns_1(self): 189 | """ 190 | Test get_clean_factor_and_forward_returns with a daily factor 191 | """ 192 | tickers = ['A', 'B', 'C', 'D', 'E', 'F'] 193 | 194 | factor_groups = {'A': 1, 'B': 2, 'C': 1, 'D': 2, 'E': 1, 'F': 2} 195 | 196 | price_data = [[1.10**i, 0.50**i, 3.00**i, 0.90**i, 0.50**i, 1.00**i] 197 | for i in range(1, 7)] # 6 days = 3 + 3 fwd returns 198 | 199 | factor_data = [[3, 4, 2, 1, nan, nan], 200 | [3, nan, nan, 1, 4, 2], 201 | [3, 4, 2, 1, nan, nan]] # 3 days 202 | 203 | start = '2015-1-11' 204 | factor_end = '2015-1-13' 205 | price_end = '2015-1-16' # 3D fwd returns 206 | 207 | price_index = date_range(start=start, end=price_end) 208 | price_index.name = 'date' 209 | prices = DataFrame(index=price_index, columns=tickers, data=price_data) 210 | 211 | factor_index = date_range(start=start, end=factor_end) 212 | factor_index.name = 'date' 213 | factor = DataFrame(index=factor_index, columns=tickers, 214 | data=factor_data).stack() 215 | 216 | factor_data = get_clean_factor_and_forward_returns( 217 | factor, prices, 218 | groupby=factor_groups, 219 | quantiles=4, 220 | periods=(1, 2, 3)) 221 | 222 | expected_idx = factor.index.rename(['date', 'asset']) 223 | expected_cols = ['1D', '2D', '3D', 224 | 'factor', 'group', 'factor_quantile'] 225 | expected_data = [[0.1, 0.21, 0.331, 3.0, 1, 3], 226 | [-0.5, -0.75, -0.875, 4.0, 2, 4], 227 | [2.0, 8.00, 26.000, 2.0, 1, 2], 228 | [-0.1, -0.19, -0.271, 1.0, 2, 1], 229 | [0.1, 0.21, 0.331, 3.0, 1, 3], 230 | [-0.1, -0.19, -0.271, 1.0, 2, 1], 231 | [-0.5, -0.75, -0.875, 4.0, 1, 4], 232 | [0.0, 0.00, 0.000, 2.0, 2, 2], 233 | [0.1, 0.21, 0.331, 3.0, 1, 3], 234 | [-0.5, -0.75, -0.875, 4.0, 2, 4], 235 | [2.0, 8.00, 26.000, 2.0, 1, 2], 236 | [-0.1, -0.19, -0.271, 1.0, 2, 1]] 237 | expected = DataFrame(index=expected_idx, 238 | columns=expected_cols, data=expected_data) 239 | expected['group'] = expected['group'].astype('category') 240 | 241 | assert_frame_equal(factor_data, expected) 242 | 243 | def test_get_clean_factor_and_forward_returns_2(self): 244 | """ 245 | Test get_clean_factor_and_forward_returns with a daily factor 246 | on a business day calendar 247 | """ 248 | tickers = ['A', 'B', 'C', 'D', 'E', 'F'] 249 | 250 | factor_groups = {'A': 1, 'B': 2, 'C': 1, 'D': 2, 'E': 1, 'F': 2} 251 | 252 | price_data = [[1.10**i, 0.50**i, 3.00**i, 0.90**i, 0.50**i, 1.00**i] 253 | for i in range(1, 7)] # 6 days = 3 + 3 fwd returns 254 | 255 | factor_data = [[3, 4, 2, 1, nan, nan], 256 | [3, nan, nan, 1, 4, 2], 257 | [3, 4, 2, 1, nan, nan]] # 3 days 258 | 259 | start = '2017-1-12' 260 | factor_end = '2017-1-16' 261 | price_end = '2017-1-19' # 3D fwd returns 262 | 263 | price_index = date_range(start=start, end=price_end, freq='B') 264 | price_index.name = 'date' 265 | prices = DataFrame(index=price_index, columns=tickers, data=price_data) 266 | 267 | factor_index = date_range(start=start, end=factor_end, freq='B') 268 | factor_index.name = 'date' 269 | factor = DataFrame(index=factor_index, columns=tickers, 270 | data=factor_data).stack() 271 | 272 | factor_data = get_clean_factor_and_forward_returns( 273 | factor, prices, 274 | groupby=factor_groups, 275 | quantiles=4, 276 | periods=(1, 2, 3)) 277 | 278 | expected_idx = factor.index.rename(['date', 'asset']) 279 | expected_cols = ['1D', '2D', '3D', 280 | 'factor', 'group', 'factor_quantile'] 281 | expected_data = [[0.1, 0.21, 0.331, 3.0, 1, 3], 282 | [-0.5, -0.75, -0.875, 4.0, 2, 4], 283 | [2.0, 8.00, 26.000, 2.0, 1, 2], 284 | [-0.1, -0.19, -0.271, 1.0, 2, 1], 285 | [0.1, 0.21, 0.331, 3.0, 1, 3], 286 | [-0.1, -0.19, -0.271, 1.0, 2, 1], 287 | [-0.5, -0.75, -0.875, 4.0, 1, 4], 288 | [0.0, 0.00, 0.000, 2.0, 2, 2], 289 | [0.1, 0.21, 0.331, 3.0, 1, 3], 290 | [-0.5, -0.75, -0.875, 4.0, 2, 4], 291 | [2.0, 8.00, 26.000, 2.0, 1, 2], 292 | [-0.1, -0.19, -0.271, 1.0, 2, 1]] 293 | expected = DataFrame(index=expected_idx, 294 | columns=expected_cols, data=expected_data) 295 | expected['group'] = expected['group'].astype('category') 296 | 297 | assert_frame_equal(factor_data, expected) 298 | 299 | def test_get_clean_factor_and_forward_returns_3(self): 300 | """ 301 | Test get_clean_factor_and_forward_returns with and intraday factor 302 | """ 303 | tickers = ['A', 'B', 'C', 'D', 'E', 'F'] 304 | 305 | factor_groups = {'A': 1, 'B': 2, 'C': 1, 'D': 2, 'E': 1, 'F': 2} 306 | 307 | price_data = [[1.10**i, 0.50**i, 3.00**i, 0.90**i, 0.50**i, 1.00**i] 308 | for i in range(1, 5)] # 4 days = 3 + 1 fwd returns 309 | 310 | factor_data = [[3, 4, 2, 1, nan, nan], 311 | [3, nan, nan, 1, 4, 2], 312 | [3, 4, 2, 1, nan, nan]] # 3 days 313 | 314 | start = '2017-1-12' 315 | factor_end = '2017-1-16' 316 | price_end = '2017-1-17' # 1D fwd returns 317 | 318 | price_index = date_range(start=start, end=price_end, freq='B') 319 | price_index.name = 'date' 320 | today_open = DataFrame(index=price_index + Timedelta('9h30m'), 321 | columns=tickers, data=price_data) 322 | today_open_1h = DataFrame(index=price_index + Timedelta('10h30m'), 323 | columns=tickers, data=price_data) 324 | today_open_1h += today_open_1h * 0.001 325 | today_open_3h = DataFrame(index=price_index + Timedelta('12h30m'), 326 | columns=tickers, data=price_data) 327 | today_open_3h -= today_open_3h * 0.002 328 | prices = concat([today_open, today_open_1h, today_open_3h]) \ 329 | .sort_index() 330 | 331 | factor_index = date_range(start=start, end=factor_end, freq='B') 332 | factor_index.name = 'date' 333 | factor = DataFrame(index=factor_index + Timedelta('9h30m'), 334 | columns=tickers, data=factor_data).stack() 335 | 336 | factor_data = get_clean_factor_and_forward_returns( 337 | factor, prices, 338 | groupby=factor_groups, 339 | quantiles=4, 340 | periods=(1, 2, 3)) 341 | 342 | expected_idx = factor.index.rename(['date', 'asset']) 343 | expected_cols = ['1h', '3h', '1D', 344 | 'factor', 'group', 'factor_quantile'] 345 | expected_data = [[0.001, -0.002, 0.1, 3.0, 1, 3], 346 | [0.001, -0.002, -0.5, 4.0, 2, 4], 347 | [0.001, -0.002, 2.0, 2.0, 1, 2], 348 | [0.001, -0.002, -0.1, 1.0, 2, 1], 349 | [0.001, -0.002, 0.1, 3.0, 1, 3], 350 | [0.001, -0.002, -0.1, 1.0, 2, 1], 351 | [0.001, -0.002, -0.5, 4.0, 1, 4], 352 | [0.001, -0.002, 0.0, 2.0, 2, 2], 353 | [0.001, -0.002, 0.1, 3.0, 1, 3], 354 | [0.001, -0.002, -0.5, 4.0, 2, 4], 355 | [0.001, -0.002, 2.0, 2.0, 1, 2], 356 | [0.001, -0.002, -0.1, 1.0, 2, 1]] 357 | expected = DataFrame(index=expected_idx, 358 | columns=expected_cols, data=expected_data) 359 | expected['group'] = expected['group'].astype('category') 360 | 361 | assert_frame_equal(factor_data, expected) 362 | 363 | def test_get_clean_factor_and_forward_returns_4(self): 364 | """ 365 | Test get_clean_factor_and_forward_returns on an event 366 | """ 367 | tickers = ['A', 'B', 'C', 'D', 'E', 'F'] 368 | 369 | factor_groups = {'A': 1, 'B': 2, 'C': 1, 'D': 2, 'E': 1, 'F': 2} 370 | 371 | price_data = [[1.10**i, 0.50**i, 3.00**i, 0.90**i, 0.50**i, 1.00**i] 372 | for i in range(1, 9)] 373 | 374 | factor_data = [[1, nan, nan, nan, nan, 6], 375 | [4, nan, nan, 7, nan, nan], 376 | [nan, nan, nan, nan, nan, nan], 377 | [nan, 3, nan, 2, nan, nan], 378 | [nan, nan, 1, nan, 3, nan]] 379 | 380 | price_index = date_range(start='2017-1-12', end='2017-1-23', freq='B') 381 | price_index.name = 'date' 382 | prices = DataFrame(index=price_index, columns=tickers, data=price_data) 383 | 384 | factor_index = date_range(start='2017-1-12', end='2017-1-18', freq='B') 385 | factor_index.name = 'date' 386 | factor = DataFrame(index=factor_index, columns=tickers, 387 | data=factor_data).stack() 388 | 389 | factor_data = get_clean_factor_and_forward_returns( 390 | factor, prices, 391 | groupby=factor_groups, 392 | quantiles=4, 393 | periods=(1, 2, 3)) 394 | 395 | expected_idx = factor.index.rename(['date', 'asset']) 396 | expected_cols = ['1D', '2D', '3D', 397 | 'factor', 'group', 'factor_quantile'] 398 | expected_data = [[0.1, 0.21, 0.331, 1.0, 1, 1], 399 | [0.0, 0.00, 0.000, 6.0, 2, 4], 400 | [0.1, 0.21, 0.331, 4.0, 1, 1], 401 | [-0.1, -0.19, -0.271, 7.0, 2, 4], 402 | [-0.5, -0.75, -0.875, 3.0, 2, 4], 403 | [-0.1, -0.19, -0.271, 2.0, 2, 1], 404 | [2.0, 8.00, 26.000, 1.0, 1, 1], 405 | [-0.5, -0.75, -0.875, 3.0, 1, 4]] 406 | expected = DataFrame(index=expected_idx, 407 | columns=expected_cols, data=expected_data) 408 | expected['group'] = expected['group'].astype('category') 409 | 410 | assert_frame_equal(factor_data, expected) 411 | 412 | def test_get_clean_factor_and_forward_returns_5(self): 413 | """ 414 | Test get_clean_factor_and_forward_returns with and intraday factor 415 | and holidays 416 | """ 417 | tickers = ['A', 'B', 'C', 'D', 'E', 'F'] 418 | 419 | factor_groups = {'A': 1, 'B': 2, 'C': 1, 'D': 2, 'E': 1, 'F': 2} 420 | 421 | price_data = [[1.10**i, 0.50**i, 3.00**i, 0.90**i, 0.50**i, 1.00**i] 422 | for i in range(1, 20)] # 19 days = 18 + 1 fwd returns 423 | 424 | factor_data = [[3, 4, 2, 1, nan, nan], 425 | [3, nan, nan, 1, 4, 2], 426 | [3, 4, 2, 1, nan, nan]] * 6 # 18 days 427 | 428 | start = '2017-1-12' 429 | factor_end = '2017-2-10' 430 | price_end = '2017-2-13' # 1D (business day) fwd returns 431 | holidays = ['2017-1-13', '2017-1-18', '2017-1-30', '2017-2-7'] 432 | holidays = [Timestamp(d) for d in holidays] 433 | 434 | price_index = date_range(start=start, end=price_end, freq='B') 435 | price_index.name = 'date' 436 | price_index = price_index.drop(holidays) 437 | 438 | today_open = DataFrame(index=price_index + Timedelta('9h30m'), 439 | columns=tickers, data=price_data) 440 | today_open_1h = DataFrame(index=price_index + Timedelta('10h30m'), 441 | columns=tickers, data=price_data) 442 | today_open_1h += today_open_1h * 0.001 443 | today_open_3h = DataFrame(index=price_index + Timedelta('12h30m'), 444 | columns=tickers, data=price_data) 445 | today_open_3h -= today_open_3h * 0.002 446 | prices = concat([today_open, today_open_1h, today_open_3h]) \ 447 | .sort_index() 448 | 449 | factor_index = date_range(start=start, end=factor_end, freq='B') 450 | factor_index.name = 'date' 451 | factor_index = factor_index.drop(holidays) 452 | factor = DataFrame(index=factor_index + Timedelta('9h30m'), 453 | columns=tickers, data=factor_data).stack() 454 | 455 | factor_data = get_clean_factor_and_forward_returns( 456 | factor, prices, 457 | groupby=factor_groups, 458 | quantiles=4, 459 | periods=(1, 2, 3)) 460 | 461 | expected_idx = factor.index.rename(['date', 'asset']) 462 | expected_cols = ['1h', '3h', '1D', 463 | 'factor', 'group', 'factor_quantile'] 464 | expected_data = [[0.001, -0.002, 0.1, 3.0, 1, 3], 465 | [0.001, -0.002, -0.5, 4.0, 2, 4], 466 | [0.001, -0.002, 2.0, 2.0, 1, 2], 467 | [0.001, -0.002, -0.1, 1.0, 2, 1], 468 | [0.001, -0.002, 0.1, 3.0, 1, 3], 469 | [0.001, -0.002, -0.1, 1.0, 2, 1], 470 | [0.001, -0.002, -0.5, 4.0, 1, 4], 471 | [0.001, -0.002, 0.0, 2.0, 2, 2], 472 | [0.001, -0.002, 0.1, 3.0, 1, 3], 473 | [0.001, -0.002, -0.5, 4.0, 2, 4], 474 | [0.001, -0.002, 2.0, 2.0, 1, 2], 475 | [0.001, -0.002, -0.1, 1.0, 2, 1]] * 6 # 18 days 476 | expected = DataFrame(index=expected_idx, 477 | columns=expected_cols, data=expected_data) 478 | expected['group'] = expected['group'].astype('category') 479 | 480 | assert_frame_equal(factor_data, expected) 481 | 482 | inferred_holidays = factor_data.index.levels[0].freq.holidays 483 | assert sorted(holidays) == sorted(inferred_holidays) 484 | 485 | def test_get_clean_factor_and_forward_returns_6(self): 486 | """ 487 | Test get_clean_factor_and_forward_returns with a daily factor 488 | on a business day calendar and holidays 489 | """ 490 | tickers = ['A', 'B', 'C', 'D', 'E', 'F'] 491 | 492 | factor_groups = {'A': 1, 'B': 2, 'C': 1, 'D': 2, 'E': 1, 'F': 2} 493 | 494 | price_data = [[1.10**i, 0.50**i, 3.00**i, 0.90**i, 0.50**i, 1.00**i] 495 | for i in range(1, 22)] # 21 days = 18 + 3 fwd returns 496 | 497 | factor_data = [[3, 4, 2, 1, nan, nan], 498 | [3, nan, nan, 1, 4, 2], 499 | [3, 4, 2, 1, nan, nan]] * 6 # 18 days 500 | 501 | start = '2017-1-12' 502 | factor_end = '2017-2-10' 503 | price_end = '2017-2-15' # 3D (business day) fwd returns 504 | holidays = ['2017-1-13', '2017-1-18', '2017-1-30', '2017-2-7'] 505 | holidays = [Timestamp(d) for d in holidays] 506 | 507 | price_index = date_range(start=start, end=price_end, freq='B') 508 | price_index.name = 'date' 509 | price_index = price_index.drop(holidays) 510 | prices = DataFrame(index=price_index, columns=tickers, data=price_data) 511 | 512 | factor_index = date_range(start=start, end=factor_end, freq='B') 513 | factor_index.name = 'date' 514 | factor_index = factor_index.drop(holidays) 515 | factor = DataFrame(index=factor_index, columns=tickers, 516 | data=factor_data).stack() 517 | 518 | factor_data = get_clean_factor_and_forward_returns( 519 | factor, prices, 520 | groupby=factor_groups, 521 | quantiles=4, 522 | periods=(1, 2, 3)) 523 | 524 | expected_idx = factor.index.rename(['date', 'asset']) 525 | expected_cols = ['1D', '2D', '3D', 526 | 'factor', 'group', 'factor_quantile'] 527 | expected_data = [[0.1, 0.21, 0.331, 3.0, 1, 3], 528 | [-0.5, -0.75, -0.875, 4.0, 2, 4], 529 | [2.0, 8.00, 26.000, 2.0, 1, 2], 530 | [-0.1, -0.19, -0.271, 1.0, 2, 1], 531 | [0.1, 0.21, 0.331, 3.0, 1, 3], 532 | [-0.1, -0.19, -0.271, 1.0, 2, 1], 533 | [-0.5, -0.75, -0.875, 4.0, 1, 4], 534 | [0.0, 0.00, 0.000, 2.0, 2, 2], 535 | [0.1, 0.21, 0.331, 3.0, 1, 3], 536 | [-0.5, -0.75, -0.875, 4.0, 2, 4], 537 | [2.0, 8.00, 26.000, 2.0, 1, 2], 538 | [-0.1, -0.19, -0.271, 1.0, 2, 1]] * 6 # 18 days 539 | expected = DataFrame(index=expected_idx, 540 | columns=expected_cols, data=expected_data) 541 | expected['group'] = expected['group'].astype('category') 542 | 543 | assert_frame_equal(factor_data, expected) 544 | 545 | inferred_holidays = factor_data.index.levels[0].freq.holidays 546 | assert sorted(holidays) == sorted(inferred_holidays) 547 | -------------------------------------------------------------------------------- /alphalens/utils.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2018 Quantopian, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import pandas as pd 17 | import numpy as np 18 | import re 19 | import warnings 20 | 21 | from IPython.display import display 22 | from pandas.tseries.offsets import CustomBusinessDay, Day, BusinessDay 23 | from scipy.stats import mode 24 | 25 | 26 | class NonMatchingTimezoneError(Exception): 27 | pass 28 | 29 | 30 | class MaxLossExceededError(Exception): 31 | pass 32 | 33 | 34 | def rethrow(exception, additional_message): 35 | """ 36 | Re-raise the last exception that was active in the current scope 37 | without losing the stacktrace but adding an additional message. 38 | This is hacky because it has to be compatible with both python 2/3 39 | """ 40 | e = exception 41 | m = additional_message 42 | if not e.args: 43 | e.args = (m,) 44 | else: 45 | e.args = (e.args[0] + m,) + e.args[1:] 46 | raise e 47 | 48 | 49 | def non_unique_bin_edges_error(func): 50 | """ 51 | Give user a more informative error in case it is not possible 52 | to properly calculate quantiles on the input dataframe (factor) 53 | """ 54 | message = """ 55 | 56 | An error occurred while computing bins/quantiles on the input provided. 57 | This usually happens when the input contains too many identical 58 | values and they span more than one quantile. The quantiles are choosen 59 | to have the same number of records each, but the same value cannot span 60 | multiple quantiles. Possible workarounds are: 61 | 1 - Decrease the number of quantiles 62 | 2 - Specify a custom quantiles range, e.g. [0, .50, .75, 1.] to get unequal 63 | number of records per quantile 64 | 3 - Use 'bins' option instead of 'quantiles', 'bins' chooses the 65 | buckets to be evenly spaced according to the values themselves, while 66 | 'quantiles' forces the buckets to have the same number of records. 67 | 4 - for factors with discrete values use the 'bins' option with custom 68 | ranges and create a range for each discrete value 69 | Please see utils.get_clean_factor_and_forward_returns documentation for 70 | full documentation of 'bins' and 'quantiles' options. 71 | 72 | """ 73 | 74 | def dec(*args, **kwargs): 75 | try: 76 | return func(*args, **kwargs) 77 | except ValueError as e: 78 | if 'Bin edges must be unique' in str(e): 79 | rethrow(e, message) 80 | raise 81 | return dec 82 | 83 | 84 | @non_unique_bin_edges_error 85 | def quantize_factor(factor_data, 86 | quantiles=5, 87 | bins=None, 88 | by_group=False, 89 | no_raise=False, 90 | zero_aware=False): 91 | """ 92 | Computes period wise factor quantiles. 93 | 94 | Parameters 95 | ---------- 96 | factor_data : pd.DataFrame - MultiIndex 97 | A MultiIndex DataFrame indexed by date (level 0) and asset (level 1), 98 | containing the values for a single alpha factor, forward returns for 99 | each period, the factor quantile/bin that factor value belongs to, and 100 | (optionally) the group the asset belongs to. 101 | 102 | - See full explanation in utils.get_clean_factor_and_forward_returns 103 | 104 | quantiles : int or sequence[float] 105 | Number of equal-sized quantile buckets to use in factor bucketing. 106 | Alternately sequence of quantiles, allowing non-equal-sized buckets 107 | e.g. [0, .10, .5, .90, 1.] or [.05, .5, .95] 108 | Only one of 'quantiles' or 'bins' can be not-None 109 | bins : int or sequence[float] 110 | Number of equal-width (valuewise) bins to use in factor bucketing. 111 | Alternately sequence of bin edges allowing for non-uniform bin width 112 | e.g. [-4, -2, -0.5, 0, 10] 113 | Only one of 'quantiles' or 'bins' can be not-None 114 | by_group : bool, optional 115 | If True, compute quantile buckets separately for each group. 116 | no_raise: bool, optional 117 | If True, no exceptions are thrown and the values for which the 118 | exception would have been thrown are set to np.NaN 119 | zero_aware : bool, optional 120 | If True, compute quantile buckets separately for positive and negative 121 | signal values. This is useful if your signal is centered and zero is 122 | the separation between long and short signals, respectively. 123 | 124 | Returns 125 | ------- 126 | factor_quantile : pd.Series 127 | Factor quantiles indexed by date and asset. 128 | """ 129 | if not ((quantiles is not None and bins is None) or 130 | (quantiles is None and bins is not None)): 131 | raise ValueError('Either quantiles or bins should be provided') 132 | 133 | if zero_aware and not (isinstance(quantiles, int) 134 | or isinstance(bins, int)): 135 | msg = ("zero_aware should only be True when quantiles or bins is an" 136 | " integer") 137 | raise ValueError(msg) 138 | 139 | def quantile_calc(x, _quantiles, _bins, _zero_aware, _no_raise): 140 | try: 141 | if _quantiles is not None and _bins is None and not _zero_aware: 142 | return pd.qcut(x, _quantiles, labels=False) + 1 143 | elif _quantiles is not None and _bins is None and _zero_aware: 144 | pos_quantiles = pd.qcut(x[x >= 0], _quantiles // 2, 145 | labels=False) + _quantiles // 2 + 1 146 | neg_quantiles = pd.qcut(x[x < 0], _quantiles // 2, 147 | labels=False) + 1 148 | return pd.concat([pos_quantiles, neg_quantiles]).sort_index() 149 | elif _bins is not None and _quantiles is None and not _zero_aware: 150 | return pd.cut(x, _bins, labels=False) + 1 151 | elif _bins is not None and _quantiles is None and _zero_aware: 152 | pos_bins = pd.cut(x[x >= 0], _bins // 2, 153 | labels=False) + _bins // 2 + 1 154 | neg_bins = pd.cut(x[x < 0], _bins // 2, 155 | labels=False) + 1 156 | return pd.concat([pos_bins, neg_bins]).sort_index() 157 | except Exception as e: 158 | if _no_raise: 159 | return pd.Series(index=x.index) 160 | raise e 161 | 162 | grouper = [factor_data.index.get_level_values('date')] 163 | if by_group: 164 | grouper.append('group') 165 | 166 | factor_quantile = factor_data.groupby(grouper)['factor'] \ 167 | .apply(quantile_calc, quantiles, bins, zero_aware, no_raise) 168 | factor_quantile.name = 'factor_quantile' 169 | 170 | return factor_quantile.dropna() 171 | 172 | 173 | def infer_trading_calendar(factor_idx, prices_idx): 174 | """ 175 | Infer the trading calendar from factor and price information. 176 | 177 | Parameters 178 | ---------- 179 | factor_idx : pd.DatetimeIndex 180 | The factor datetimes for which we are computing the forward returns 181 | prices_idx : pd.DatetimeIndex 182 | The prices datetimes associated withthe factor data 183 | 184 | Returns 185 | ------- 186 | calendar : pd.DateOffset 187 | """ 188 | full_idx = factor_idx.union(prices_idx) 189 | 190 | traded_weekdays = [] 191 | holidays = [] 192 | 193 | days_of_the_week = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] 194 | for day, day_str in enumerate(days_of_the_week): 195 | 196 | weekday_mask = (full_idx.dayofweek == day) 197 | 198 | # drop days of the week that are not traded at all 199 | if not weekday_mask.any(): 200 | continue 201 | traded_weekdays.append(day_str) 202 | 203 | # look for holidays 204 | used_weekdays = full_idx[weekday_mask].normalize() 205 | all_weekdays = pd.date_range(full_idx.min(), full_idx.max(), 206 | freq=CustomBusinessDay(weekmask=day_str) 207 | ).normalize() 208 | _holidays = all_weekdays.difference(used_weekdays) 209 | _holidays = [timestamp.date() for timestamp in _holidays] 210 | holidays.extend(_holidays) 211 | 212 | traded_weekdays = ' '.join(traded_weekdays) 213 | return CustomBusinessDay(weekmask=traded_weekdays, holidays=holidays) 214 | 215 | 216 | def compute_forward_returns(factor, 217 | prices, 218 | periods=(1, 5, 10), 219 | filter_zscore=None, 220 | cumulative_returns=True): 221 | """ 222 | Finds the N period forward returns (as percent change) for each asset 223 | provided. 224 | 225 | Parameters 226 | ---------- 227 | factor : pd.Series - MultiIndex 228 | A MultiIndex Series indexed by timestamp (level 0) and asset 229 | (level 1), containing the values for a single alpha factor. 230 | 231 | - See full explanation in utils.get_clean_factor_and_forward_returns 232 | 233 | prices : pd.DataFrame 234 | Pricing data to use in forward price calculation. 235 | Assets as columns, dates as index. Pricing data must 236 | span the factor analysis time period plus an additional buffer window 237 | that is greater than the maximum number of expected periods 238 | in the forward returns calculations. 239 | periods : sequence[int] 240 | periods to compute forward returns on. 241 | filter_zscore : int or float, optional 242 | Sets forward returns greater than X standard deviations 243 | from the the mean to nan. Set it to 'None' to avoid filtering. 244 | Caution: this outlier filtering incorporates lookahead bias. 245 | cumulative_returns : bool, optional 246 | If True, forward returns columns will contain cumulative returns. 247 | Setting this to False is useful if you want to analyze how predictive 248 | a factor is for a single forward day. 249 | 250 | Returns 251 | ------- 252 | forward_returns : pd.DataFrame - MultiIndex 253 | A MultiIndex DataFrame indexed by timestamp (level 0) and asset 254 | (level 1), containing the forward returns for assets. 255 | Forward returns column names follow the format accepted by 256 | pd.Timedelta (e.g. '1D', '30m', '3h15m', '1D1h', etc). 257 | 'date' index freq property (forward_returns.index.levels[0].freq) 258 | will be set to a trading calendar (pandas DateOffset) inferred 259 | from the input data (see infer_trading_calendar for more details). 260 | """ 261 | 262 | factor_dateindex = factor.index.levels[0] 263 | if factor_dateindex.tz != prices.index.tz: 264 | raise NonMatchingTimezoneError("The timezone of 'factor' is not the " 265 | "same as the timezone of 'prices'. See " 266 | "the pandas methods tz_localize and " 267 | "tz_convert.") 268 | 269 | freq = infer_trading_calendar(factor_dateindex, prices.index) 270 | 271 | factor_dateindex = factor_dateindex.intersection(prices.index) 272 | 273 | if len(factor_dateindex) == 0: 274 | raise ValueError("Factor and prices indices don't match: make sure " 275 | "they have the same convention in terms of datetimes " 276 | "and symbol-names") 277 | 278 | # chop prices down to only the assets we care about (= unique assets in 279 | # `factor`). we could modify `prices` in place, but that might confuse 280 | # the caller. 281 | prices = prices.filter(items=factor.index.levels[1]) 282 | 283 | raw_values_dict = {} 284 | column_list = [] 285 | 286 | for period in sorted(periods): 287 | if cumulative_returns: 288 | returns = prices.pct_change(period) 289 | else: 290 | returns = prices.pct_change() 291 | 292 | forward_returns = \ 293 | returns.shift(-period).reindex(factor_dateindex) 294 | 295 | if filter_zscore is not None: 296 | mask = abs( 297 | forward_returns - forward_returns.mean() 298 | ) > (filter_zscore * forward_returns.std()) 299 | forward_returns[mask] = np.nan 300 | 301 | # 302 | # Find the period length, which will be the column name. We'll test 303 | # several entries in order to find out the most likely period length 304 | # (in case the user passed inconsinstent data) 305 | # 306 | days_diffs = [] 307 | for i in range(30): 308 | if i >= len(forward_returns.index): 309 | break 310 | p_idx = prices.index.get_loc(forward_returns.index[i]) 311 | if p_idx is None or p_idx < 0 or ( 312 | p_idx + period) >= len(prices.index): 313 | continue 314 | start = prices.index[p_idx] 315 | end = prices.index[p_idx + period] 316 | period_len = diff_custom_calendar_timedeltas(start, end, freq) 317 | days_diffs.append(period_len.components.days) 318 | 319 | delta_days = period_len.components.days - mode(days_diffs).mode[0] 320 | period_len -= pd.Timedelta(days=delta_days) 321 | label = timedelta_to_string(period_len) 322 | 323 | column_list.append(label) 324 | 325 | raw_values_dict[label] = np.concatenate(forward_returns.values) 326 | 327 | df = pd.DataFrame.from_dict(raw_values_dict) 328 | df.set_index( 329 | pd.MultiIndex.from_product( 330 | [factor_dateindex, prices.columns], 331 | names=['date', 'asset'] 332 | ), 333 | inplace=True 334 | ) 335 | df = df.reindex(factor.index) 336 | 337 | # now set the columns correctly 338 | df = df[column_list] 339 | 340 | df.index.levels[0].freq = freq 341 | df.index.set_names(['date', 'asset'], inplace=True) 342 | 343 | return df 344 | 345 | 346 | def backshift_returns_series(series, N): 347 | """Shift a multi-indexed series backwards by N observations in 348 | the first level. 349 | 350 | This can be used to convert backward-looking returns into a 351 | forward-returns series. 352 | """ 353 | ix = series.index 354 | dates, sids = ix.levels 355 | date_labels, sid_labels = map(np.array, ix.labels) 356 | 357 | # Output date labels will contain the all but the last N dates. 358 | new_dates = dates[:-N] 359 | 360 | # Output data will remove the first M rows, where M is the index of the 361 | # last record with one of the first N dates. 362 | cutoff = date_labels.searchsorted(N) 363 | new_date_labels = date_labels[cutoff:] - N 364 | new_sid_labels = sid_labels[cutoff:] 365 | new_values = series.values[cutoff:] 366 | 367 | assert new_date_labels[0] == 0 368 | 369 | new_index = pd.MultiIndex( 370 | levels=[new_dates, sids], 371 | labels=[new_date_labels, new_sid_labels], 372 | sortorder=1, 373 | names=ix.names, 374 | ) 375 | 376 | return pd.Series(data=new_values, index=new_index) 377 | 378 | 379 | def demean_forward_returns(factor_data, grouper=None): 380 | """ 381 | Convert forward returns to returns relative to mean 382 | period wise all-universe or group returns. 383 | group-wise normalization incorporates the assumption of a 384 | group neutral portfolio constraint and thus allows allows the 385 | factor to be evaluated across groups. 386 | 387 | For example, if AAPL 5 period return is 0.1% and mean 5 period 388 | return for the Technology stocks in our universe was 0.5% in the 389 | same period, the group adjusted 5 period return for AAPL in this 390 | period is -0.4%. 391 | 392 | Parameters 393 | ---------- 394 | factor_data : pd.DataFrame - MultiIndex 395 | Forward returns indexed by date and asset. 396 | Separate column for each forward return window. 397 | grouper : list 398 | If True, demean according to group. 399 | 400 | Returns 401 | ------- 402 | adjusted_forward_returns : pd.DataFrame - MultiIndex 403 | DataFrame of the same format as the input, but with each 404 | security's returns normalized by group. 405 | """ 406 | 407 | factor_data = factor_data.copy() 408 | 409 | if not grouper: 410 | grouper = factor_data.index.get_level_values('date') 411 | 412 | cols = get_forward_returns_columns(factor_data.columns) 413 | factor_data[cols] = factor_data.groupby(grouper)[cols] \ 414 | .transform(lambda x: x - x.mean()) 415 | 416 | return factor_data 417 | 418 | 419 | def print_table(table, name=None, fmt=None): 420 | """ 421 | Pretty print a pandas DataFrame. 422 | 423 | Uses HTML output if running inside Jupyter Notebook, otherwise 424 | formatted text output. 425 | 426 | Parameters 427 | ---------- 428 | table : pd.Series or pd.DataFrame 429 | Table to pretty-print. 430 | name : str, optional 431 | Table name to display in upper left corner. 432 | fmt : str, optional 433 | Formatter to use for displaying table elements. 434 | E.g. '{0:.2f}%' for displaying 100 as '100.00%'. 435 | Restores original setting after displaying. 436 | """ 437 | if isinstance(table, pd.Series): 438 | table = pd.DataFrame(table) 439 | 440 | if isinstance(table, pd.DataFrame): 441 | table.columns.name = name 442 | 443 | prev_option = pd.get_option('display.float_format') 444 | if fmt is not None: 445 | pd.set_option('display.float_format', lambda x: fmt.format(x)) 446 | 447 | display(table) 448 | 449 | if fmt is not None: 450 | pd.set_option('display.float_format', prev_option) 451 | 452 | 453 | def get_clean_factor(factor, 454 | forward_returns, 455 | groupby=None, 456 | binning_by_group=False, 457 | quantiles=5, 458 | bins=None, 459 | groupby_labels=None, 460 | max_loss=0.35, 461 | zero_aware=False): 462 | """ 463 | Formats the factor data, forward return data, and group mappings into a 464 | DataFrame that contains aligned MultiIndex indices of timestamp and asset. 465 | The returned data will be formatted to be suitable for Alphalens functions. 466 | 467 | It is safe to skip a call to this function and still make use of Alphalens 468 | functionalities as long as the factor data conforms to the format returned 469 | from get_clean_factor_and_forward_returns and documented here 470 | 471 | Parameters 472 | ---------- 473 | factor : pd.Series - MultiIndex 474 | A MultiIndex Series indexed by timestamp (level 0) and asset 475 | (level 1), containing the values for a single alpha factor. 476 | :: 477 | ----------------------------------- 478 | date | asset | 479 | ----------------------------------- 480 | | AAPL | 0.5 481 | ----------------------- 482 | | BA | -1.1 483 | ----------------------- 484 | 2014-01-01 | CMG | 1.7 485 | ----------------------- 486 | | DAL | -0.1 487 | ----------------------- 488 | | LULU | 2.7 489 | ----------------------- 490 | 491 | forward_returns : pd.DataFrame - MultiIndex 492 | A MultiIndex DataFrame indexed by timestamp (level 0) and asset 493 | (level 1), containing the forward returns for assets. 494 | Forward returns column names must follow the format accepted by 495 | pd.Timedelta (e.g. '1D', '30m', '3h15m', '1D1h', etc). 496 | 'date' index freq property must be set to a trading calendar 497 | (pandas DateOffset), see infer_trading_calendar for more details. 498 | This information is currently used only in cumulative returns 499 | computation 500 | :: 501 | --------------------------------------- 502 | | | 1D | 5D | 10D 503 | --------------------------------------- 504 | date | asset | | | 505 | --------------------------------------- 506 | | AAPL | 0.09|-0.01|-0.079 507 | ---------------------------- 508 | | BA | 0.02| 0.06| 0.020 509 | ---------------------------- 510 | 2014-01-01 | CMG | 0.03| 0.09| 0.036 511 | ---------------------------- 512 | | DAL |-0.02|-0.06|-0.029 513 | ---------------------------- 514 | | LULU |-0.03| 0.05|-0.009 515 | ---------------------------- 516 | 517 | groupby : pd.Series - MultiIndex or dict 518 | Either A MultiIndex Series indexed by date and asset, 519 | containing the period wise group codes for each asset, or 520 | a dict of asset to group mappings. If a dict is passed, 521 | it is assumed that group mappings are unchanged for the 522 | entire time period of the passed factor data. 523 | binning_by_group : bool 524 | If True, compute quantile buckets separately for each group. 525 | This is useful when the factor values range vary considerably 526 | across gorups so that it is wise to make the binning group relative. 527 | You should probably enable this if the factor is intended 528 | to be analyzed for a group neutral portfolio 529 | quantiles : int or sequence[float] 530 | Number of equal-sized quantile buckets to use in factor bucketing. 531 | Alternately sequence of quantiles, allowing non-equal-sized buckets 532 | e.g. [0, .10, .5, .90, 1.] or [.05, .5, .95] 533 | Only one of 'quantiles' or 'bins' can be not-None 534 | bins : int or sequence[float] 535 | Number of equal-width (valuewise) bins to use in factor bucketing. 536 | Alternately sequence of bin edges allowing for non-uniform bin width 537 | e.g. [-4, -2, -0.5, 0, 10] 538 | Chooses the buckets to be evenly spaced according to the values 539 | themselves. Useful when the factor contains discrete values. 540 | Only one of 'quantiles' or 'bins' can be not-None 541 | groupby_labels : dict 542 | A dictionary keyed by group code with values corresponding 543 | to the display name for each group. 544 | max_loss : float, optional 545 | Maximum percentage (0.00 to 1.00) of factor data dropping allowed, 546 | computed comparing the number of items in the input factor index and 547 | the number of items in the output DataFrame index. 548 | Factor data can be partially dropped due to being flawed itself 549 | (e.g. NaNs), not having provided enough price data to compute 550 | forward returns for all factor values, or because it is not possible 551 | to perform binning. 552 | Set max_loss=0 to avoid Exceptions suppression. 553 | zero_aware : bool, optional 554 | If True, compute quantile buckets separately for positive and negative 555 | signal values. This is useful if your signal is centered and zero is 556 | the separation between long and short signals, respectively. 557 | 'quantiles' is None. 558 | 559 | Returns 560 | ------- 561 | merged_data : pd.DataFrame - MultiIndex 562 | A MultiIndex Series indexed by date (level 0) and asset (level 1), 563 | containing the values for a single alpha factor, forward returns for 564 | each period, the factor quantile/bin that factor value belongs to, and 565 | (optionally) the group the asset belongs to. 566 | 567 | - forward returns column names follow the format accepted by 568 | pd.Timedelta (e.g. '1D', '30m', '3h15m', '1D1h', etc) 569 | 570 | - 'date' index freq property (merged_data.index.levels[0].freq) is the 571 | same as that of the input forward returns data. This is currently 572 | used only in cumulative returns computation 573 | :: 574 | ------------------------------------------------------------------- 575 | | | 1D | 5D | 10D |factor|group|factor_quantile 576 | ------------------------------------------------------------------- 577 | date | asset | | | | | | 578 | ------------------------------------------------------------------- 579 | | AAPL | 0.09|-0.01|-0.079| 0.5 | G1 | 3 580 | -------------------------------------------------------- 581 | | BA | 0.02| 0.06| 0.020| -1.1 | G2 | 5 582 | -------------------------------------------------------- 583 | 2014-01-01 | CMG | 0.03| 0.09| 0.036| 1.7 | G2 | 1 584 | -------------------------------------------------------- 585 | | DAL |-0.02|-0.06|-0.029| -0.1 | G3 | 5 586 | -------------------------------------------------------- 587 | | LULU |-0.03| 0.05|-0.009| 2.7 | G1 | 2 588 | -------------------------------------------------------- 589 | """ 590 | 591 | initial_amount = float(len(factor.index)) 592 | 593 | factor_copy = factor.copy() 594 | factor_copy.index = factor_copy.index.rename(['date', 'asset']) 595 | factor_copy = factor_copy[np.isfinite(factor_copy)] 596 | 597 | merged_data = forward_returns.copy() 598 | merged_data['factor'] = factor_copy 599 | 600 | if groupby is not None: 601 | if isinstance(groupby, dict): 602 | diff = set(factor_copy.index.get_level_values( 603 | 'asset')) - set(groupby.keys()) 604 | if len(diff) > 0: 605 | raise KeyError( 606 | "Assets {} not in group mapping".format( 607 | list(diff))) 608 | 609 | ss = pd.Series(groupby) 610 | groupby = pd.Series(index=factor_copy.index, 611 | data=ss[factor_copy.index.get_level_values( 612 | 'asset')].values) 613 | 614 | if groupby_labels is not None: 615 | diff = set(groupby.values) - set(groupby_labels.keys()) 616 | if len(diff) > 0: 617 | raise KeyError( 618 | "groups {} not in passed group names".format( 619 | list(diff))) 620 | 621 | sn = pd.Series(groupby_labels) 622 | groupby = pd.Series(index=groupby.index, 623 | data=sn[groupby.values].values) 624 | 625 | merged_data['group'] = groupby.astype('category') 626 | 627 | merged_data = merged_data.dropna() 628 | 629 | fwdret_amount = float(len(merged_data.index)) 630 | 631 | no_raise = False if max_loss == 0 else True 632 | quantile_data = quantize_factor( 633 | merged_data, 634 | quantiles, 635 | bins, 636 | binning_by_group, 637 | no_raise, 638 | zero_aware 639 | ) 640 | 641 | merged_data['factor_quantile'] = quantile_data 642 | 643 | merged_data = merged_data.dropna() 644 | 645 | binning_amount = float(len(merged_data.index)) 646 | 647 | tot_loss = (initial_amount - binning_amount) / initial_amount 648 | fwdret_loss = (initial_amount - fwdret_amount) / initial_amount 649 | bin_loss = tot_loss - fwdret_loss 650 | 651 | print("Dropped %.1f%% entries from factor data: %.1f%% in forward " 652 | "returns computation and %.1f%% in binning phase " 653 | "(set max_loss=0 to see potentially suppressed Exceptions)." % 654 | (tot_loss * 100, fwdret_loss * 100, bin_loss * 100)) 655 | 656 | if tot_loss > max_loss: 657 | message = ("max_loss (%.1f%%) exceeded %.1f%%, consider increasing it." 658 | % (max_loss * 100, tot_loss * 100)) 659 | raise MaxLossExceededError(message) 660 | else: 661 | print("max_loss is %.1f%%, not exceeded: OK!" % (max_loss * 100)) 662 | 663 | return merged_data 664 | 665 | 666 | def get_clean_factor_and_forward_returns(factor, 667 | prices, 668 | groupby=None, 669 | binning_by_group=False, 670 | quantiles=5, 671 | bins=None, 672 | periods=(1, 5, 10), 673 | filter_zscore=20, 674 | groupby_labels=None, 675 | max_loss=0.35, 676 | zero_aware=False, 677 | cumulative_returns=True): 678 | """ 679 | Formats the factor data, pricing data, and group mappings into a DataFrame 680 | that contains aligned MultiIndex indices of timestamp and asset. The 681 | returned data will be formatted to be suitable for Alphalens functions. 682 | 683 | It is safe to skip a call to this function and still make use of Alphalens 684 | functionalities as long as the factor data conforms to the format returned 685 | from get_clean_factor_and_forward_returns and documented here 686 | 687 | Parameters 688 | ---------- 689 | factor : pd.Series - MultiIndex 690 | A MultiIndex Series indexed by timestamp (level 0) and asset 691 | (level 1), containing the values for a single alpha factor. 692 | :: 693 | ----------------------------------- 694 | date | asset | 695 | ----------------------------------- 696 | | AAPL | 0.5 697 | ----------------------- 698 | | BA | -1.1 699 | ----------------------- 700 | 2014-01-01 | CMG | 1.7 701 | ----------------------- 702 | | DAL | -0.1 703 | ----------------------- 704 | | LULU | 2.7 705 | ----------------------- 706 | 707 | prices : pd.DataFrame 708 | A wide form Pandas DataFrame indexed by timestamp with assets 709 | in the columns. 710 | Pricing data must span the factor analysis time period plus an 711 | additional buffer window that is greater than the maximum number 712 | of expected periods in the forward returns calculations. 713 | It is important to pass the correct pricing data in depending on 714 | what time of period your signal was generated so to avoid lookahead 715 | bias, or delayed calculations. 716 | 'Prices' must contain at least an entry for each timestamp/asset 717 | combination in 'factor'. This entry should reflect the buy price 718 | for the assets and usually it is the next available price after the 719 | factor is computed but it can also be a later price if the factor is 720 | meant to be traded later (e.g. if the factor is computed at market 721 | open but traded 1 hour after market open the price information should 722 | be 1 hour after market open). 723 | 'Prices' must also contain entries for timestamps following each 724 | timestamp/asset combination in 'factor', as many more timestamps 725 | as the maximum value in 'periods'. The asset price after 'period' 726 | timestamps will be considered the sell price for that asset when 727 | computing 'period' forward returns. 728 | :: 729 | ---------------------------------------------------- 730 | | AAPL | BA | CMG | DAL | LULU | 731 | ---------------------------------------------------- 732 | Date | | | | | | 733 | ---------------------------------------------------- 734 | 2014-01-01 |605.12| 24.58| 11.72| 54.43 | 37.14 | 735 | ---------------------------------------------------- 736 | 2014-01-02 |604.35| 22.23| 12.21| 52.78 | 33.63 | 737 | ---------------------------------------------------- 738 | 2014-01-03 |607.94| 21.68| 14.36| 53.94 | 29.37 | 739 | ---------------------------------------------------- 740 | 741 | groupby : pd.Series - MultiIndex or dict 742 | Either A MultiIndex Series indexed by date and asset, 743 | containing the period wise group codes for each asset, or 744 | a dict of asset to group mappings. If a dict is passed, 745 | it is assumed that group mappings are unchanged for the 746 | entire time period of the passed factor data. 747 | binning_by_group : bool 748 | If True, compute quantile buckets separately for each group. 749 | This is useful when the factor values range vary considerably 750 | across gorups so that it is wise to make the binning group relative. 751 | You should probably enable this if the factor is intended 752 | to be analyzed for a group neutral portfolio 753 | quantiles : int or sequence[float] 754 | Number of equal-sized quantile buckets to use in factor bucketing. 755 | Alternately sequence of quantiles, allowing non-equal-sized buckets 756 | e.g. [0, .10, .5, .90, 1.] or [.05, .5, .95] 757 | Only one of 'quantiles' or 'bins' can be not-None 758 | bins : int or sequence[float] 759 | Number of equal-width (valuewise) bins to use in factor bucketing. 760 | Alternately sequence of bin edges allowing for non-uniform bin width 761 | e.g. [-4, -2, -0.5, 0, 10] 762 | Chooses the buckets to be evenly spaced according to the values 763 | themselves. Useful when the factor contains discrete values. 764 | Only one of 'quantiles' or 'bins' can be not-None 765 | periods : sequence[int] 766 | periods to compute forward returns on. 767 | filter_zscore : int or float, optional 768 | Sets forward returns greater than X standard deviations 769 | from the the mean to nan. Set it to 'None' to avoid filtering. 770 | Caution: this outlier filtering incorporates lookahead bias. 771 | groupby_labels : dict 772 | A dictionary keyed by group code with values corresponding 773 | to the display name for each group. 774 | max_loss : float, optional 775 | Maximum percentage (0.00 to 1.00) of factor data dropping allowed, 776 | computed comparing the number of items in the input factor index and 777 | the number of items in the output DataFrame index. 778 | Factor data can be partially dropped due to being flawed itself 779 | (e.g. NaNs), not having provided enough price data to compute 780 | forward returns for all factor values, or because it is not possible 781 | to perform binning. 782 | Set max_loss=0 to avoid Exceptions suppression. 783 | zero_aware : bool, optional 784 | If True, compute quantile buckets separately for positive and negative 785 | signal values. This is useful if your signal is centered and zero is 786 | the separation between long and short signals, respectively. 787 | cumulative_returns : bool, optional 788 | If True, forward returns columns will contain cumulative returns. 789 | Setting this to False is useful if you want to analyze how predictive 790 | a factor is for a single forward day. 791 | 792 | Returns 793 | ------- 794 | merged_data : pd.DataFrame - MultiIndex 795 | A MultiIndex Series indexed by date (level 0) and asset (level 1), 796 | containing the values for a single alpha factor, forward returns for 797 | each period, the factor quantile/bin that factor value belongs to, and 798 | (optionally) the group the asset belongs to. 799 | - forward returns column names follow the format accepted by 800 | pd.Timedelta (e.g. '1D', '30m', '3h15m', '1D1h', etc) 801 | - 'date' index freq property (merged_data.index.levels[0].freq) will be 802 | set to a trading calendar (pandas DateOffset) inferred from the input 803 | data (see infer_trading_calendar for more details). This is currently 804 | used only in cumulative returns computation 805 | :: 806 | ------------------------------------------------------------------- 807 | | | 1D | 5D | 10D |factor|group|factor_quantile 808 | ------------------------------------------------------------------- 809 | date | asset | | | | | | 810 | ------------------------------------------------------------------- 811 | | AAPL | 0.09|-0.01|-0.079| 0.5 | G1 | 3 812 | -------------------------------------------------------- 813 | | BA | 0.02| 0.06| 0.020| -1.1 | G2 | 5 814 | -------------------------------------------------------- 815 | 2014-01-01 | CMG | 0.03| 0.09| 0.036| 1.7 | G2 | 1 816 | -------------------------------------------------------- 817 | | DAL |-0.02|-0.06|-0.029| -0.1 | G3 | 5 818 | -------------------------------------------------------- 819 | | LULU |-0.03| 0.05|-0.009| 2.7 | G1 | 2 820 | -------------------------------------------------------- 821 | 822 | See Also 823 | -------- 824 | utils.get_clean_factor 825 | For use when forward returns are already available. 826 | """ 827 | forward_returns = compute_forward_returns( 828 | factor, 829 | prices, 830 | periods, 831 | filter_zscore, 832 | cumulative_returns, 833 | ) 834 | 835 | factor_data = get_clean_factor(factor, forward_returns, groupby=groupby, 836 | groupby_labels=groupby_labels, 837 | quantiles=quantiles, bins=bins, 838 | binning_by_group=binning_by_group, 839 | max_loss=max_loss, zero_aware=zero_aware) 840 | 841 | return factor_data 842 | 843 | 844 | def rate_of_return(period_ret, base_period): 845 | """ 846 | Convert returns to 'one_period_len' rate of returns: that is the value the 847 | returns would have every 'one_period_len' if they had grown at a steady 848 | rate 849 | 850 | Parameters 851 | ---------- 852 | period_ret: pd.DataFrame 853 | DataFrame containing returns values with column headings representing 854 | the return period. 855 | base_period: string 856 | The base period length used in the conversion 857 | It must follow pandas.Timedelta constructor format (e.g. '1 days', 858 | '1D', '30m', '3h', '1D1h', etc) 859 | 860 | Returns 861 | ------- 862 | pd.DataFrame 863 | DataFrame in same format as input but with 'one_period_len' rate of 864 | returns values. 865 | """ 866 | period_len = period_ret.name 867 | conversion_factor = (pd.Timedelta(base_period) / 868 | pd.Timedelta(period_len)) 869 | return period_ret.add(1).pow(conversion_factor).sub(1) 870 | 871 | 872 | def std_conversion(period_std, base_period): 873 | """ 874 | one_period_len standard deviation (or standard error) approximation 875 | 876 | Parameters 877 | ---------- 878 | period_std: pd.DataFrame 879 | DataFrame containing standard deviation or standard error values 880 | with column headings representing the return period. 881 | base_period: string 882 | The base period length used in the conversion 883 | It must follow pandas.Timedelta constructor format (e.g. '1 days', 884 | '1D', '30m', '3h', '1D1h', etc) 885 | 886 | Returns 887 | ------- 888 | pd.DataFrame 889 | DataFrame in same format as input but with one-period 890 | standard deviation/error values. 891 | """ 892 | period_len = period_std.name 893 | conversion_factor = (pd.Timedelta(period_len) / 894 | pd.Timedelta(base_period)) 895 | return period_std / np.sqrt(conversion_factor) 896 | 897 | 898 | def get_forward_returns_columns(columns, require_exact_day_multiple=False): 899 | """ 900 | Utility that detects and returns the columns that are forward returns 901 | """ 902 | 903 | # If exact day multiples are required in the forward return periods, 904 | # drop all other columns (e.g. drop 3D12h). 905 | if require_exact_day_multiple: 906 | pattern = re.compile(r"^(\d+([D]))+$", re.IGNORECASE) 907 | valid_columns = [(pattern.match(col) is not None) for col in columns] 908 | 909 | if sum(valid_columns) < len(valid_columns): 910 | warnings.warn( 911 | "Skipping return periods that aren't exact multiples" 912 | + " of days." 913 | ) 914 | else: 915 | pattern = re.compile(r"^(\d+([Dhms]|ms|us|ns]))+$", re.IGNORECASE) 916 | valid_columns = [(pattern.match(col) is not None) for col in columns] 917 | 918 | return columns[valid_columns] 919 | 920 | 921 | def timedelta_to_string(timedelta): 922 | """ 923 | Utility that converts a pandas.Timedelta to a string representation 924 | compatible with pandas.Timedelta constructor format 925 | 926 | Parameters 927 | ---------- 928 | timedelta: pd.Timedelta 929 | 930 | Returns 931 | ------- 932 | string 933 | string representation of 'timedelta' 934 | """ 935 | c = timedelta.components 936 | format = '' 937 | if c.days != 0: 938 | format += '%dD' % c.days 939 | if c.hours > 0: 940 | format += '%dh' % c.hours 941 | if c.minutes > 0: 942 | format += '%dm' % c.minutes 943 | if c.seconds > 0: 944 | format += '%ds' % c.seconds 945 | if c.milliseconds > 0: 946 | format += '%dms' % c.milliseconds 947 | if c.microseconds > 0: 948 | format += '%dus' % c.microseconds 949 | if c.nanoseconds > 0: 950 | format += '%dns' % c.nanoseconds 951 | return format 952 | 953 | 954 | def timedelta_strings_to_integers(sequence): 955 | """ 956 | Converts pandas string representations of timedeltas into integers of days. 957 | 958 | Parameters 959 | ---------- 960 | sequence : iterable 961 | List or array of timedelta string representations, e.g. ['1D', '5D']. 962 | 963 | Returns 964 | ------- 965 | sequence : list 966 | Integer days corresponding to the input sequence, e.g. [1, 5]. 967 | """ 968 | return list(map(lambda x: pd.Timedelta(x).days, sequence)) 969 | 970 | 971 | def add_custom_calendar_timedelta(input, timedelta, freq): 972 | """ 973 | Add timedelta to 'input' taking into consideration custom frequency, which 974 | is used to deal with custom calendars, such as a trading calendar 975 | 976 | Parameters 977 | ---------- 978 | input : pd.DatetimeIndex or pd.Timestamp 979 | timedelta : pd.Timedelta 980 | freq : pd.DataOffset (CustomBusinessDay, Day or BusinessDay) 981 | 982 | Returns 983 | ------- 984 | pd.DatetimeIndex or pd.Timestamp 985 | input + timedelta 986 | """ 987 | if not isinstance(freq, (Day, BusinessDay, CustomBusinessDay)): 988 | raise ValueError("freq must be Day, BDay or CustomBusinessDay") 989 | days = timedelta.components.days 990 | offset = timedelta - pd.Timedelta(days=days) 991 | return input + freq * days + offset 992 | 993 | 994 | def diff_custom_calendar_timedeltas(start, end, freq): 995 | """ 996 | Compute the difference between two pd.Timedelta taking into consideration 997 | custom frequency, which is used to deal with custom calendars, such as a 998 | trading calendar 999 | 1000 | Parameters 1001 | ---------- 1002 | start : pd.Timestamp 1003 | end : pd.Timestamp 1004 | freq : CustomBusinessDay (see infer_trading_calendar) 1005 | freq : pd.DataOffset (CustomBusinessDay, Day or BDay) 1006 | 1007 | Returns 1008 | ------- 1009 | pd.Timedelta 1010 | end - start 1011 | """ 1012 | if not isinstance(freq, (Day, BusinessDay, CustomBusinessDay)): 1013 | raise ValueError("freq must be Day, BusinessDay or CustomBusinessDay") 1014 | 1015 | weekmask = getattr(freq, 'weekmask', None) 1016 | holidays = getattr(freq, 'holidays', None) 1017 | 1018 | if weekmask is None and holidays is None: 1019 | if isinstance(freq, Day): 1020 | weekmask = 'Mon Tue Wed Thu Fri Sat Sun' 1021 | holidays = [] 1022 | elif isinstance(freq, BusinessDay): 1023 | weekmask = 'Mon Tue Wed Thu Fri' 1024 | holidays = [] 1025 | 1026 | if weekmask is not None and holidays is not None: 1027 | # we prefer this method as it is faster 1028 | actual_days = np.busday_count(np.array(start).astype('datetime64[D]'), 1029 | np.array(end).astype('datetime64[D]'), 1030 | weekmask, holidays) 1031 | else: 1032 | # default, it is slow 1033 | actual_days = pd.date_range(start, end, freq=freq).shape[0] - 1 1034 | if not freq.onOffset(start): 1035 | actual_days -= 1 1036 | 1037 | timediff = end - start 1038 | delta_days = timediff.components.days - actual_days 1039 | return timediff - pd.Timedelta(days=delta_days) 1040 | -------------------------------------------------------------------------------- /build_and_deploy_docs.sh: -------------------------------------------------------------------------------- 1 | pushd docs 2 | make html 3 | ghp-import -n -p build/html/ 4 | popd 5 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 21 | 22 | .PHONY: help 23 | help: 24 | @echo "Please use \`make ' where is one of" 25 | @echo " html to make standalone HTML files" 26 | @echo " dirhtml to make HTML files named index.html in directories" 27 | @echo " singlehtml to make a single large HTML file" 28 | @echo " pickle to make pickle files" 29 | @echo " json to make JSON files" 30 | @echo " htmlhelp to make HTML files and a HTML help project" 31 | @echo " qthelp to make HTML files and a qthelp project" 32 | @echo " applehelp to make an Apple Help Book" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | @echo " coverage to run coverage check of the documentation (if enabled)" 49 | 50 | .PHONY: clean 51 | clean: 52 | rm -rf $(BUILDDIR)/* 53 | 54 | .PHONY: html 55 | html: 56 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 57 | @echo 58 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 59 | 60 | .PHONY: dirhtml 61 | dirhtml: 62 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 63 | @echo 64 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 65 | 66 | .PHONY: singlehtml 67 | singlehtml: 68 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 69 | @echo 70 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 71 | 72 | .PHONY: pickle 73 | pickle: 74 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 75 | @echo 76 | @echo "Build finished; now you can process the pickle files." 77 | 78 | .PHONY: json 79 | json: 80 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 81 | @echo 82 | @echo "Build finished; now you can process the JSON files." 83 | 84 | .PHONY: htmlhelp 85 | htmlhelp: 86 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 87 | @echo 88 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 89 | ".hhp project file in $(BUILDDIR)/htmlhelp." 90 | 91 | .PHONY: qthelp 92 | qthelp: 93 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 94 | @echo 95 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 96 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 97 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Qfactor.qhcp" 98 | @echo "To view the help file:" 99 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Qfactor.qhc" 100 | 101 | .PHONY: applehelp 102 | applehelp: 103 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 104 | @echo 105 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 106 | @echo "N.B. You won't be able to view it unless you put it in" \ 107 | "~/Library/Documentation/Help or install it in your application" \ 108 | "bundle." 109 | 110 | .PHONY: devhelp 111 | devhelp: 112 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 113 | @echo 114 | @echo "Build finished." 115 | @echo "To view the help file:" 116 | @echo "# mkdir -p $$HOME/.local/share/devhelp/Qfactor" 117 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Qfactor" 118 | @echo "# devhelp" 119 | 120 | .PHONY: epub 121 | epub: 122 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 123 | @echo 124 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 125 | 126 | .PHONY: latex 127 | latex: 128 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 129 | @echo 130 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 131 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 132 | "(use \`make latexpdf' here to do that automatically)." 133 | 134 | .PHONY: latexpdf 135 | latexpdf: 136 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 137 | @echo "Running LaTeX files through pdflatex..." 138 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 139 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 140 | 141 | .PHONY: latexpdfja 142 | latexpdfja: 143 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 144 | @echo "Running LaTeX files through platex and dvipdfmx..." 145 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 146 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 147 | 148 | .PHONY: text 149 | text: 150 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 151 | @echo 152 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 153 | 154 | .PHONY: man 155 | man: 156 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 157 | @echo 158 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 159 | 160 | .PHONY: texinfo 161 | texinfo: 162 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 163 | @echo 164 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 165 | @echo "Run \`make' in that directory to run these through makeinfo" \ 166 | "(use \`make info' here to do that automatically)." 167 | 168 | .PHONY: info 169 | info: 170 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 171 | @echo "Running Texinfo files through makeinfo..." 172 | make -C $(BUILDDIR)/texinfo info 173 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 174 | 175 | .PHONY: gettext 176 | gettext: 177 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 178 | @echo 179 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 180 | 181 | .PHONY: changes 182 | changes: 183 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 184 | @echo 185 | @echo "The overview file is in $(BUILDDIR)/changes." 186 | 187 | .PHONY: linkcheck 188 | linkcheck: 189 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 190 | @echo 191 | @echo "Link check complete; look for any errors in the above output " \ 192 | "or in $(BUILDDIR)/linkcheck/output.txt." 193 | 194 | .PHONY: doctest 195 | doctest: 196 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 197 | @echo "Testing of doctests in the sources finished, look at the " \ 198 | "results in $(BUILDDIR)/doctest/output.txt." 199 | 200 | .PHONY: coverage 201 | coverage: 202 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 203 | @echo "Testing of coverage in the sources finished, look at the " \ 204 | "results in $(BUILDDIR)/coverage/python.txt." 205 | 206 | .PHONY: xml 207 | xml: 208 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 209 | @echo 210 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 211 | 212 | .PHONY: pseudoxml 213 | pseudoxml: 214 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 215 | @echo 216 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 217 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source 10 | set I18NSPHINXOPTS=%SPHINXOPTS% source 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. xml to make Docutils-native XML files 37 | echo. pseudoxml to make pseudoxml-XML files for display purposes 38 | echo. linkcheck to check all external links for integrity 39 | echo. doctest to run all doctests embedded in the documentation if enabled 40 | echo. coverage to run coverage check of the documentation if enabled 41 | goto end 42 | ) 43 | 44 | if "%1" == "clean" ( 45 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 46 | del /q /s %BUILDDIR%\* 47 | goto end 48 | ) 49 | 50 | 51 | REM Check if sphinx-build is available and fallback to Python version if any 52 | %SPHINXBUILD% 1>NUL 2>NUL 53 | if errorlevel 9009 goto sphinx_python 54 | goto sphinx_ok 55 | 56 | :sphinx_python 57 | 58 | set SPHINXBUILD=python -m sphinx.__init__ 59 | %SPHINXBUILD% 2> nul 60 | if errorlevel 9009 ( 61 | echo. 62 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 63 | echo.installed, then set the SPHINXBUILD environment variable to point 64 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 65 | echo.may add the Sphinx directory to PATH. 66 | echo. 67 | echo.If you don't have Sphinx installed, grab it from 68 | echo.http://sphinx-doc.org/ 69 | exit /b 1 70 | ) 71 | 72 | :sphinx_ok 73 | 74 | 75 | if "%1" == "html" ( 76 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 77 | if errorlevel 1 exit /b 1 78 | echo. 79 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 80 | goto end 81 | ) 82 | 83 | if "%1" == "dirhtml" ( 84 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 85 | if errorlevel 1 exit /b 1 86 | echo. 87 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 88 | goto end 89 | ) 90 | 91 | if "%1" == "singlehtml" ( 92 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 93 | if errorlevel 1 exit /b 1 94 | echo. 95 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 96 | goto end 97 | ) 98 | 99 | if "%1" == "pickle" ( 100 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 101 | if errorlevel 1 exit /b 1 102 | echo. 103 | echo.Build finished; now you can process the pickle files. 104 | goto end 105 | ) 106 | 107 | if "%1" == "json" ( 108 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 109 | if errorlevel 1 exit /b 1 110 | echo. 111 | echo.Build finished; now you can process the JSON files. 112 | goto end 113 | ) 114 | 115 | if "%1" == "htmlhelp" ( 116 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 117 | if errorlevel 1 exit /b 1 118 | echo. 119 | echo.Build finished; now you can run HTML Help Workshop with the ^ 120 | .hhp project file in %BUILDDIR%/htmlhelp. 121 | goto end 122 | ) 123 | 124 | if "%1" == "qthelp" ( 125 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 129 | .qhcp project file in %BUILDDIR%/qthelp, like this: 130 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Qfactor.qhcp 131 | echo.To view the help file: 132 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Qfactor.ghc 133 | goto end 134 | ) 135 | 136 | if "%1" == "devhelp" ( 137 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 138 | if errorlevel 1 exit /b 1 139 | echo. 140 | echo.Build finished. 141 | goto end 142 | ) 143 | 144 | if "%1" == "epub" ( 145 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 146 | if errorlevel 1 exit /b 1 147 | echo. 148 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 149 | goto end 150 | ) 151 | 152 | if "%1" == "latex" ( 153 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 154 | if errorlevel 1 exit /b 1 155 | echo. 156 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 157 | goto end 158 | ) 159 | 160 | if "%1" == "latexpdf" ( 161 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 162 | cd %BUILDDIR%/latex 163 | make all-pdf 164 | cd %~dp0 165 | echo. 166 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 167 | goto end 168 | ) 169 | 170 | if "%1" == "latexpdfja" ( 171 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 172 | cd %BUILDDIR%/latex 173 | make all-pdf-ja 174 | cd %~dp0 175 | echo. 176 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 177 | goto end 178 | ) 179 | 180 | if "%1" == "text" ( 181 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 182 | if errorlevel 1 exit /b 1 183 | echo. 184 | echo.Build finished. The text files are in %BUILDDIR%/text. 185 | goto end 186 | ) 187 | 188 | if "%1" == "man" ( 189 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 190 | if errorlevel 1 exit /b 1 191 | echo. 192 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 193 | goto end 194 | ) 195 | 196 | if "%1" == "texinfo" ( 197 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 198 | if errorlevel 1 exit /b 1 199 | echo. 200 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 201 | goto end 202 | ) 203 | 204 | if "%1" == "gettext" ( 205 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 206 | if errorlevel 1 exit /b 1 207 | echo. 208 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 209 | goto end 210 | ) 211 | 212 | if "%1" == "changes" ( 213 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 214 | if errorlevel 1 exit /b 1 215 | echo. 216 | echo.The overview file is in %BUILDDIR%/changes. 217 | goto end 218 | ) 219 | 220 | if "%1" == "linkcheck" ( 221 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 222 | if errorlevel 1 exit /b 1 223 | echo. 224 | echo.Link check complete; look for any errors in the above output ^ 225 | or in %BUILDDIR%/linkcheck/output.txt. 226 | goto end 227 | ) 228 | 229 | if "%1" == "doctest" ( 230 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 231 | if errorlevel 1 exit /b 1 232 | echo. 233 | echo.Testing of doctests in the sources finished, look at the ^ 234 | results in %BUILDDIR%/doctest/output.txt. 235 | goto end 236 | ) 237 | 238 | if "%1" == "coverage" ( 239 | %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage 240 | if errorlevel 1 exit /b 1 241 | echo. 242 | echo.Testing of coverage in the sources finished, look at the ^ 243 | results in %BUILDDIR%/coverage/python.txt. 244 | goto end 245 | ) 246 | 247 | if "%1" == "xml" ( 248 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml 249 | if errorlevel 1 exit /b 1 250 | echo. 251 | echo.Build finished. The XML files are in %BUILDDIR%/xml. 252 | goto end 253 | ) 254 | 255 | if "%1" == "pseudoxml" ( 256 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml 257 | if errorlevel 1 exit /b 1 258 | echo. 259 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. 260 | goto end 261 | ) 262 | 263 | :end 264 | -------------------------------------------------------------------------------- /docs/source/alphalens.rst: -------------------------------------------------------------------------------- 1 | 2 | Alphalens 3 | ========= 4 | 5 | Tear Sheets 6 | ----------- 7 | 8 | .. automodule:: alphalens.tears 9 | :members: 10 | :undoc-members: 11 | :show-inheritance: 12 | 13 | Performance 14 | ----------- 15 | 16 | .. automodule:: alphalens.performance 17 | :members: 18 | :undoc-members: 19 | :show-inheritance: 20 | 21 | Plotting 22 | -------- 23 | 24 | .. automodule:: alphalens.plotting 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | 29 | Utilities 30 | --------- 31 | 32 | .. automodule:: alphalens.utils 33 | :members: 34 | :undoc-members: 35 | :show-inheritance: 36 | -------------------------------------------------------------------------------- /docs/source/alphalens.tests.rst: -------------------------------------------------------------------------------- 1 | alphalens.tests package 2 | ===================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | alphalens.tests.test_performance module 8 | ------------------------------------- 9 | 10 | .. automodule:: alphalens.tests.test_performance 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | alphalens.tests.test_utils module 16 | ------------------------------- 17 | 18 | .. automodule:: alphalens.tests.test_utils 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: alphalens.tests 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Alphalens documentation build configuration file, created by 4 | # sphinx-quickstart on Tue Jul 5 15:45:48 2016. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import sys 16 | import os 17 | 18 | from alphalens import __version__ as version 19 | 20 | # If extensions (or modules to document with autodoc) are in another directory, 21 | # add these directories to sys.path here. If the directory is relative to the 22 | # documentation root, use os.path.abspath to make it absolute, like shown here. 23 | sys.path.insert(0, os.path.abspath('../..')) 24 | 25 | # -- General configuration ------------------------------------------------ 26 | 27 | # If your documentation needs a minimal Sphinx version, state it here. 28 | #needs_sphinx = '1.0' 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = [ 34 | 'sphinx.ext.autodoc', 35 | 'numpydoc' 36 | ] 37 | 38 | # Add any paths that contain templates here, relative to this directory. 39 | templates_path = ['_templates'] 40 | 41 | # The suffix(es) of source filenames. 42 | # You can specify multiple suffix as a list of string: 43 | # source_suffix = ['.rst', '.md'] 44 | source_suffix = '.rst' 45 | 46 | # The encoding of source files. 47 | #source_encoding = 'utf-8-sig' 48 | 49 | # The master toctree document. 50 | master_doc = 'index' 51 | 52 | # General information about the project. 53 | project = u'Alphalens' 54 | copyright = u'2016, Quantopian, Inc.' 55 | author = u'Quantopian, Inc.' 56 | 57 | # The full version, including alpha/beta/rc tags. 58 | release = version 59 | 60 | # The language for content autogenerated by Sphinx. Refer to documentation 61 | # for a list of supported languages. 62 | # 63 | # This is also used if you do content translation via gettext catalogs. 64 | # Usually you set "language" from the command line for these cases. 65 | language = None 66 | 67 | # There are two options for replacing |today|: either, you set today to some 68 | # non-false value, then it is used: 69 | #today = '' 70 | # Else, today_fmt is used as the format for a strftime call. 71 | #today_fmt = '%B %d, %Y' 72 | 73 | # List of patterns, relative to source directory, that match files and 74 | # directories to ignore when looking for source files. 75 | exclude_patterns = [] 76 | 77 | # The reST default role (used for this markup: `text`) to use for all 78 | # documents. 79 | #default_role = None 80 | 81 | # If true, '()' will be appended to :func: etc. cross-reference text. 82 | #add_function_parentheses = True 83 | 84 | # If true, the current module name will be prepended to all description 85 | # unit titles (such as .. function::). 86 | #add_module_names = True 87 | 88 | # If true, sectionauthor and moduleauthor directives will be shown in the 89 | # output. They are ignored by default. 90 | #show_authors = False 91 | 92 | # The name of the Pygments (syntax highlighting) style to use. 93 | pygments_style = 'sphinx' 94 | 95 | # A list of ignored prefixes for module index sorting. 96 | #modindex_common_prefix = [] 97 | 98 | # If true, keep warnings as "system message" paragraphs in the built documents. 99 | #keep_warnings = False 100 | 101 | # If true, `todo` and `todoList` produce output, else they produce nothing. 102 | todo_include_todos = False 103 | 104 | 105 | # -- Options for HTML output ---------------------------------------------- 106 | 107 | # The theme to use for HTML and HTML Help pages. See the documentation for 108 | # a list of builtin themes. 109 | html_theme = 'sphinx_rtd_theme' 110 | 111 | # Theme options are theme-specific and customize the look and feel of a theme 112 | # further. For a list of options available for each theme, see the 113 | # documentation. 114 | #html_theme_options = {} 115 | 116 | # Add any paths that contain custom themes here, relative to this directory. 117 | #html_theme_path = [] 118 | 119 | # The name for this set of Sphinx documents. If None, it defaults to 120 | # " v documentation". 121 | #html_title = None 122 | 123 | # A shorter title for the navigation bar. Default is the same as html_title. 124 | #html_short_title = None 125 | 126 | # The name of an image file (relative to this directory) to place at the top 127 | # of the sidebar. 128 | #html_logo = None 129 | 130 | # The name of an image file (within the static path) to use as favicon of the 131 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 132 | # pixels large. 133 | #html_favicon = None 134 | 135 | # Add any paths that contain custom static files (such as style sheets) here, 136 | # relative to this directory. They are copied after the builtin static files, 137 | # so a file named "default.css" will overwrite the builtin "default.css". 138 | html_static_path = ['_static'] 139 | 140 | # Add any extra paths that contain custom files (such as robots.txt or 141 | # .htaccess) here, relative to this directory. These files are copied 142 | # directly to the root of the documentation. 143 | #html_extra_path = [] 144 | 145 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 146 | # using the given strftime format. 147 | #html_last_updated_fmt = '%b %d, %Y' 148 | 149 | # If true, SmartyPants will be used to convert quotes and dashes to 150 | # typographically correct entities. 151 | #html_use_smartypants = True 152 | 153 | # Custom sidebar templates, maps document names to template names. 154 | #html_sidebars = {} 155 | 156 | # Additional templates that should be rendered to pages, maps page names to 157 | # template names. 158 | #html_additional_pages = {} 159 | 160 | # If false, no module index is generated. 161 | #html_domain_indices = True 162 | 163 | # If false, no index is generated. 164 | #html_use_index = True 165 | 166 | # If true, the index is split into individual pages for each letter. 167 | #html_split_index = False 168 | 169 | # If true, links to the reST sources are added to the pages. 170 | #html_show_sourcelink = True 171 | 172 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 173 | #html_show_sphinx = True 174 | 175 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 176 | #html_show_copyright = True 177 | 178 | # If true, an OpenSearch description file will be output, and all pages will 179 | # contain a tag referring to it. The value of this option must be the 180 | # base URL from which the finished HTML is served. 181 | #html_use_opensearch = '' 182 | 183 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 184 | #html_file_suffix = None 185 | 186 | # Language to be used for generating the HTML full-text search index. 187 | # Sphinx supports the following languages: 188 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' 189 | # 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr' 190 | #html_search_language = 'en' 191 | 192 | # A dictionary with options for the search language support, empty by default. 193 | # Now only 'ja' uses this config value 194 | #html_search_options = {'type': 'default'} 195 | 196 | # The name of a javascript file (relative to the configuration directory) that 197 | # implements a search results scorer. If empty, the default will be used. 198 | #html_search_scorer = 'scorer.js' 199 | 200 | # Output file base name for HTML help builder. 201 | htmlhelp_basename = 'Alphalensdoc' 202 | 203 | # -- Options for LaTeX output --------------------------------------------- 204 | 205 | latex_elements = { 206 | # The paper size ('letterpaper' or 'a4paper'). 207 | #'papersize': 'letterpaper', 208 | 209 | # The font size ('10pt', '11pt' or '12pt'). 210 | #'pointsize': '10pt', 211 | 212 | # Additional stuff for the LaTeX preamble. 213 | #'preamble': '', 214 | 215 | # Latex figure (float) alignment 216 | #'figure_align': 'htbp', 217 | } 218 | 219 | # Grouping the document tree into LaTeX files. List of tuples 220 | # (source start file, target name, title, 221 | # author, documentclass [howto, manual, or own class]). 222 | latex_documents = [ 223 | (master_doc, 'Alphalens.tex', u'Alphalens Documentation', 224 | u'Quantopian, Inc.', 'manual'), 225 | ] 226 | 227 | # The name of an image file (relative to this directory) to place at the top of 228 | # the title page. 229 | #latex_logo = None 230 | 231 | # For "manual" documents, if this is true, then toplevel headings are parts, 232 | # not chapters. 233 | #latex_use_parts = False 234 | 235 | # If true, show page references after internal links. 236 | #latex_show_pagerefs = False 237 | 238 | # If true, show URL addresses after external links. 239 | #latex_show_urls = False 240 | 241 | # Documents to append as an appendix to all manuals. 242 | #latex_appendices = [] 243 | 244 | # If false, no module index is generated. 245 | #latex_domain_indices = True 246 | 247 | 248 | # -- Options for manual page output --------------------------------------- 249 | 250 | # One entry per manual page. List of tuples 251 | # (source start file, name, description, authors, manual section). 252 | man_pages = [ 253 | (master_doc, 'alphalens', u'Alphalens Documentation', 254 | [author], 1) 255 | ] 256 | 257 | # If true, show URL addresses after external links. 258 | #man_show_urls = False 259 | 260 | 261 | # -- Options for Texinfo output ------------------------------------------- 262 | 263 | # Grouping the document tree into Texinfo files. List of tuples 264 | # (source start file, target name, title, author, 265 | # dir menu entry, description, category) 266 | texinfo_documents = [ 267 | (master_doc, 'Alphalens', u'Alphalens Documentation', 268 | author, 'Alphalens', 'One line description of project.', 269 | 'Miscellaneous'), 270 | ] 271 | 272 | # Documents to append as an appendix to all manuals. 273 | #texinfo_appendices = [] 274 | 275 | # If false, no module index is generated. 276 | #texinfo_domain_indices = True 277 | 278 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 279 | #texinfo_show_urls = 'footnote' 280 | 281 | # If true, do not generate a @detailmenu in the "Top" node's menu. 282 | #texinfo_no_detailmenu = False 283 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../../README.rst 2 | 3 | API 4 | --- 5 | 6 | Information on a specific functions, classes, or methods. 7 | 8 | - :mod:`alphalens.tears` 9 | - :mod:`alphalens.performance` 10 | - :mod:`alphalens.plotting` 11 | - :mod:`alphalens.utils` 12 | 13 | 14 | -------------------------------------------------------------------------------- /docs/source/modules.rst: -------------------------------------------------------------------------------- 1 | alphalens 2 | ======= 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | alphalens 8 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | # See the docstring in versioneer.py for instructions. Note that you must 2 | # re-run 'versioneer.py setup' after changing this section, and commit the 3 | # resulting files. 4 | [versioneer] 5 | VCS=git 6 | style=pep440 7 | versionfile_source=alphalens/_version.py 8 | versionfile_build=alphalens/_version.py 9 | tag_prefix= v 10 | parentdir_prefix= alphalens- 11 | 12 | [flake8] 13 | exclude = 14 | .git, 15 | __pycache__, 16 | docs, 17 | versioneer.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from setuptools import setup, find_packages 3 | import versioneer 4 | import sys 5 | 6 | long_description = '' 7 | 8 | if 'upload' in sys.argv: 9 | with open('README.rst') as f: 10 | long_description = f.read() 11 | 12 | install_reqs = [ 13 | 'matplotlib>=1.4.0', 14 | 'numpy>=1.9.1', 15 | 'pandas>=0.18.0', 16 | 'scipy>=0.14.0', 17 | 'seaborn>=0.6.0', 18 | 'statsmodels>=0.6.1', 19 | 'IPython>=3.2.3', 20 | 'empyrical>=0.5.0', 21 | ] 22 | 23 | extra_reqs = { 24 | 'test': [ 25 | "nose>=1.3.7", 26 | "parameterized>=0.5.0", 27 | "tox>=2.3.1", 28 | "flake8>=3.7.9", 29 | ], 30 | } 31 | 32 | if __name__ == "__main__": 33 | setup( 34 | name='alphalens', 35 | version=versioneer.get_version(), 36 | cmdclass=versioneer.get_cmdclass(), 37 | description='Performance analysis of predictive (alpha) stock factors', 38 | author='Quantopian Inc.', 39 | author_email='opensource@quantopian.com', 40 | packages=find_packages(include='alphalens.*'), 41 | package_data={ 42 | 'alphalens': ['examples/*'], 43 | }, 44 | long_description=long_description, 45 | classifiers=[ 46 | 'Development Status :: 5 - Production/Stable', 47 | 'Intended Audience :: Developers', 48 | 'License :: OSI Approved :: Apache Software License', 49 | 'Natural Language :: English', 50 | 'Operating System :: OS Independent', 51 | 'Programming Language :: Python :: 2.7', 52 | 'Programming Language :: Python :: 3.4', 53 | 'Programming Language :: Python :: 3.5', 54 | 'Programming Language :: Python', 55 | 'Topic :: Utilities', 56 | 'Topic :: Office/Business :: Financial', 57 | 'Topic :: Scientific/Engineering :: Information Analysis', 58 | ], 59 | url='https://github.com/quantopian/alphalens', 60 | install_requires=install_reqs, 61 | extras_require=extra_reqs, 62 | ) 63 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist=py{27,35}-pandas{16,17,18} 3 | skip_missing_interpreters=True 4 | 5 | [testenv] 6 | commands= 7 | py{27,35}-pandas16: pip install -vv pandas>=0.16,<0.17 8 | py{27,35}-pandas17: pip install -vv pandas>=0.17,<0.18 9 | py{27,35}-pandas18: pip install -vv pandas>=0.18,<0.19 10 | 11 | pip install .[test] 12 | # cd out of the alphalens root so that we actually use what we installed 13 | # with our sdist. Otherwise, our imports will find alphalens' source from 14 | # our CWD. 15 | /bin/bash -c 'cd /tmp && nosetests -P {toxinidir}' 16 | --------------------------------------------------------------------------------