├── .gitattributes
├── .github
    ├── ISSUE_TEMPLATE.md
    └── workflows
    │   └── main.yml
├── .gitignore
├── LICENSE
├── MANIFEST.in
├── README.rst
├── alphalens
    ├── __init__.py
    ├── _version.py
    ├── examples
    │   ├── .gitattributes
    │   ├── alphalens_tutorial_on_quantopian.ipynb
    │   ├── daily_factor_synthetic_data.ipynb
    │   ├── event_study.ipynb
    │   ├── event_study_synthetic_data.ipynb
    │   ├── ic_tear.png
    │   ├── intraday_factor.ipynb
    │   ├── intraday_factor_synthetic_data.ipynb
    │   ├── predictive_vs_non-predictive_factor.ipynb
    │   ├── pyfolio_integration.ipynb
    │   ├── returns_tear.png
    │   ├── sector_tear.png
    │   ├── table_tear.png
    │   └── tear_sheet_walk_through.ipynb
    ├── performance.py
    ├── plotting.py
    ├── tears.py
    ├── tests
    │   ├── __init__.py
    │   ├── matplotlibrc
    │   ├── test_performance.py
    │   ├── test_tears.py
    │   └── test_utils.py
    └── utils.py
├── build_and_deploy_docs.sh
├── docs
    ├── Makefile
    ├── make.bat
    └── source
    │   ├── alphalens.rst
    │   ├── alphalens.tests.rst
    │   ├── conf.py
    │   ├── index.rst
    │   └── modules.rst
├── setup.cfg
├── setup.py
├── tox.ini
└── versioneer.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | alphalens/_version.py export-subst
2 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ## Problem Description
 2 | 
 3 | **Please provide a minimal, self-contained, and reproducible example:**
 4 | ```python
 5 | [Paste code here]
 6 | ```
 7 | 
 8 | **Please provide the full traceback:**
 9 | ```python
10 | [Paste traceback here]
11 | ```
12 | 
13 | **Please provide any additional information below:**
14 | 
15 | 
16 | ## Versions
17 | 
18 | * Alphalens version: 
19 | * Python version: 
20 | * Pandas version: 
21 | * Matplotlib version: 
22 | 


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 |   pull_request:
 8 |     branches:
 9 |       - master
10 | 
11 | jobs:
12 |   build:
13 | 
14 |     runs-on: ${{ matrix.os }}
15 |     strategy:
16 |       fail-fast: false
17 |       matrix:
18 |         os: [ubuntu-latest]
19 |         python-version: [2.7, 3.5, 3.7, 3.8]
20 |         include:
21 |           - python-version: 2.7
22 |             pandas: 0.18.1
23 |             numpy: 1.11.3
24 |             scipy: 0.17.1
25 |             statsmodels: 0.6.1
26 |           - python-version: 3.5
27 |             pandas: 0.18.1
28 |             numpy: 1.11.3
29 |             scipy: 0.17.1
30 |             statsmodels: 0.6.1
31 |           - python-version: 3.7
32 |             pandas: 1.0.1
33 |             numpy: 1.18.1
34 |             scipy: 1.4.1
35 |             statsmodels: 0.11.1
36 |           - python-version: 3.8
37 |             pandas: 1.0.1
38 |             numpy: 1.18.1
39 |             scipy: 1.4.1
40 |             statsmodels: 0.11.1
41 | 
42 |     steps:
43 |     - uses: actions/checkout@v1
44 |     - name: Set up Python ${{ matrix.python-version }}
45 |       uses: actions/setup-python@v1
46 |       with:
47 |         python-version: ${{ matrix.python-version }}
48 |     - name: Install dependencies
49 |       env:
50 |         PYTHONWARNINGS: ignore:DEPRECATION::pip._internal.cli.base_command
51 |       run: |
52 |         python -m pip install --upgrade pip
53 |         pip install numpy==${{ matrix.numpy }}
54 |         pip install pandas==${{ matrix.pandas }} scipy==${{ matrix.scipy }} statsmodels==${{ matrix.statsmodels}}
55 |         pip install -e .[test]
56 |     - name: Lint with flake8
57 |       run: |
58 |         flake8
59 |     - name: Test with nose
60 |       run: |
61 |         MATPLOTLIBRC=alphalens/tests/matplotlibrc nosetests alphalens/tests
62 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | 
 2 | *.xml
 3 | 
 4 | *.iml
 5 | 
 6 | *.pyc
 7 | 
 8 | build/
 9 | docs/build/
10 | .ipynb_checkpoints
11 | 
12 | # Tox puts virtualenvs here by default.
13 | .tox/
14 | 
15 | # coverage.py outputs.
16 | cover
17 | .coverage
18 | 
19 | # Intermediate outputs from building distributions for PyPI.
20 | dist
21 | *.egg-info/
22 | 
23 | # Emacs temp files.
24 | *~
25 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2018 Quantopian, Inc.
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include versioneer.py
2 | include alphalens/_version.py
3 | include LICENSE
4 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | .. image:: https://media.quantopian.com/logos/open_source/alphalens-logo-03.png
  2 |     :align: center
  3 | 
  4 | Alphalens
  5 | =========
  6 | .. image:: https://github.com/quantopian/alphalens/workflows/CI/badge.svg
  7 |     :alt: GitHub Actions status
  8 |     :target: https://github.com/quantopian/alphalens/actions?query=workflow%3ACI+branch%3Amaster
  9 | 
 10 | Alphalens is a Python Library for performance analysis of predictive
 11 | (alpha) stock factors. Alphalens works great with the
 12 | `Zipline <https://www.zipline.io/>`__ open source backtesting library, and
 13 | `Pyfolio <https://github.com/quantopian/pyfolio>`__ which provides
 14 | performance and risk analysis of financial portfolios. You can try Alphalens
 15 | at  `Quantopian <https://www.quantopian.com>`_ -- a free,
 16 | community-centered, hosted platform for researching and testing alpha ideas. 
 17 | Quantopian also offers a `fully managed service for professionals <https://factset.quantopian.com>`_ 
 18 | that includes Zipline, Alphalens, Pyfolio, FactSet data, and more.
 19 | 
 20 | The main function of Alphalens is to surface the most relevant statistics
 21 | and plots about an alpha factor, including:
 22 | 
 23 | -  Returns Analysis
 24 | -  Information Coefficient Analysis
 25 | -  Turnover Analysis
 26 | -  Grouped Analysis
 27 | 
 28 | Getting started
 29 | ---------------
 30 | 
 31 | With a signal and pricing data creating a factor "tear sheet" is a two step process:
 32 | 
 33 | .. code:: python
 34 | 
 35 |     import alphalens
 36 |     
 37 |     # Ingest and format data
 38 |     factor_data = alphalens.utils.get_clean_factor_and_forward_returns(my_factor, 
 39 |                                                                        pricing, 
 40 |                                                                        quantiles=5,
 41 |                                                                        groupby=ticker_sector,
 42 |                                                                        groupby_labels=sector_names)
 43 | 
 44 |     # Run analysis
 45 |     alphalens.tears.create_full_tear_sheet(factor_data)
 46 | 
 47 | 
 48 | Learn more
 49 | ----------
 50 | 
 51 | Check out the `example notebooks <https://github.com/quantopian/alphalens/tree/master/alphalens/examples>`__ for more on how to read and use
 52 | the factor tear sheet.  A good starting point could be `this <https://github.com/quantopian/alphalens/tree/master/alphalens/examples/alphalens_tutorial_on_quantopian.ipynb>`__
 53 | 
 54 | Installation
 55 | ------------
 56 | 
 57 | Install with pip:
 58 | 
 59 | ::
 60 | 
 61 |     pip install alphalens
 62 | 
 63 | Install with conda: 
 64 | 
 65 | ::
 66 | 
 67 |     conda install -c conda-forge alphalens
 68 | 
 69 | Install from the master branch of Alphalens repository (development code):
 70 | 
 71 | ::
 72 | 
 73 |     pip install git+https://github.com/quantopian/alphalens
 74 | 
 75 | Alphalens depends on:
 76 | 
 77 | -  `matplotlib <https://github.com/matplotlib/matplotlib>`__
 78 | -  `numpy <https://github.com/numpy/numpy>`__
 79 | -  `pandas <https://github.com/pandas-dev/pandas>`__
 80 | -  `scipy <https://github.com/scipy/scipy>`__
 81 | -  `seaborn <https://github.com/mwaskom/seaborn>`__
 82 | -  `statsmodels <https://github.com/statsmodels/statsmodels>`__
 83 | 
 84 | Usage
 85 | -----
 86 | 
 87 | A good way to get started is to run the examples in a `Jupyter
 88 | notebook <https://jupyter.org/>`__.
 89 | 
 90 | To get set up with an example, you can:
 91 | 
 92 | Run a Jupyter notebook server via:
 93 | 
 94 | .. code:: bash
 95 | 
 96 |     jupyter notebook
 97 | 
 98 | From the notebook list page(usually found at
 99 | ``http://localhost:8888/``), navigate over to the examples directory,
100 | and open any file with a .ipynb extension.
101 | 
102 | Execute the code in a notebook cell by clicking on it and hitting
103 | Shift+Enter.
104 | 
105 | Questions?
106 | ----------
107 | 
108 | If you find a bug, feel free to open an issue on our `github
109 | tracker <https://github.com/quantopian/alphalens/issues>`__.
110 | 
111 | Contribute
112 | ----------
113 | 
114 | If you want to contribute, a great place to start would be the
115 | `help-wanted
116 | issues <https://github.com/quantopian/alphalens/issues?q=is%3Aopen+is%3Aissue+label%3A%22help+wanted%22>`__.
117 | 
118 | Credits
119 | -------
120 | 
121 | -  `Andrew Campbell <https://github.com/a-campbell>`__
122 | -  `James Christopher <https://github.com/jameschristopher>`__
123 | -  `Thomas Wiecki <https://github.com/twiecki>`__
124 | -  `Jonathan Larkin <https://github.com/marketneutral>`__
125 | -  Jessica Stauth (jstauth@quantopian.com)
126 | -  `Taso Petridis <https://github.com/tasopetridis>`_
127 | 
128 | For a full list of contributors see the `contributors page. <https://github.com/quantopian/alphalens/graphs/contributors>`_
129 | 
130 | Example Tear Sheet
131 | ------------------
132 | 
133 | Example factor courtesy of `ExtractAlpha <https://extractalpha.com/>`_
134 | 
135 | .. image:: https://github.com/quantopian/alphalens/raw/master/alphalens/examples/table_tear.png
136 | .. image:: https://github.com/quantopian/alphalens/raw/master/alphalens/examples/returns_tear.png
137 | .. image:: https://github.com/quantopian/alphalens/raw/master/alphalens/examples/ic_tear.png
138 | .. image:: https://github.com/quantopian/alphalens/raw/master/alphalens/examples/sector_tear.png
139 |     :alt:
140 | 


--------------------------------------------------------------------------------
/alphalens/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import performance
 2 | from . import plotting
 3 | from . import tears
 4 | from . import utils
 5 | 
 6 | from ._version import get_versions
 7 | 
 8 | 
 9 | __version__ = get_versions()['version']
10 | del get_versions
11 | 
12 | __all__ = ['performance', 'plotting', 'tears', 'utils']
13 | 


--------------------------------------------------------------------------------
/alphalens/_version.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # This file helps to compute a version number in source trees obtained from
  3 | # git-archive tarball (such as those provided by githubs download-from-tag
  4 | # feature). Distribution tarballs (built by setup.py sdist) and build
  5 | # directories (produced by setup.py build) will contain a much shorter file
  6 | # that just contains the computed version number.
  7 | 
  8 | # This file is released into the public domain. Generated by
  9 | # versioneer-0.18 (https://github.com/warner/python-versioneer)
 10 | 
 11 | """Git implementation of _version.py."""
 12 | 
 13 | import errno
 14 | import os
 15 | import re
 16 | import subprocess
 17 | import sys
 18 | 
 19 | 
 20 | def get_keywords():
 21 |     """Get the keywords needed to look up the version information."""
 22 |     # these strings will be replaced by git during git-archive.
 23 |     # setup.py/versioneer.py will grep for the variable names, so they must
 24 |     # each be defined on a line of their own. _version.py will just call
 25 |     # get_keywords().
 26 |     git_refnames = " (HEAD -> master)"
 27 |     git_full = "4979057c6fbd045a7998dba53388654d45a217ef"
 28 |     git_date = "2021-11-23 00:26:54 +0800"
 29 |     keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
 30 |     return keywords
 31 | 
 32 | 
 33 | class VersioneerConfig:
 34 |     """Container for Versioneer configuration parameters."""
 35 | 
 36 | 
 37 | def get_config():
 38 |     """Create, populate and return the VersioneerConfig() object."""
 39 |     # these strings are filled in when 'setup.py versioneer' creates
 40 |     # _version.py
 41 |     cfg = VersioneerConfig()
 42 |     cfg.VCS = "git"
 43 |     cfg.style = "pep440"
 44 |     cfg.tag_prefix = "v"
 45 |     cfg.parentdir_prefix = "alphalens-"
 46 |     cfg.versionfile_source = "alphalens/_version.py"
 47 |     cfg.verbose = False
 48 |     return cfg
 49 | 
 50 | 
 51 | class NotThisMethod(Exception):
 52 |     """Exception raised if a method is not valid for the current scenario."""
 53 | 
 54 | 
 55 | LONG_VERSION_PY = {}
 56 | HANDLERS = {}
 57 | 
 58 | 
 59 | def register_vcs_handler(vcs, method):  # decorator
 60 |     """Decorator to mark a method as the handler for a particular VCS."""
 61 |     def decorate(f):
 62 |         """Store f in HANDLERS[vcs][method]."""
 63 |         if vcs not in HANDLERS:
 64 |             HANDLERS[vcs] = {}
 65 |         HANDLERS[vcs][method] = f
 66 |         return f
 67 |     return decorate
 68 | 
 69 | 
 70 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
 71 |                 env=None):
 72 |     """Call the given command(s)."""
 73 |     assert isinstance(commands, list)
 74 |     p = None
 75 |     for c in commands:
 76 |         try:
 77 |             dispcmd = str([c] + args)
 78 |             # remember shell=False, so use git.cmd on windows, not just git
 79 |             p = subprocess.Popen([c] + args, cwd=cwd, env=env,
 80 |                                  stdout=subprocess.PIPE,
 81 |                                  stderr=(subprocess.PIPE if hide_stderr
 82 |                                          else None))
 83 |             break
 84 |         except EnvironmentError:
 85 |             e = sys.exc_info()[1]
 86 |             if e.errno == errno.ENOENT:
 87 |                 continue
 88 |             if verbose:
 89 |                 print("unable to run %s" % dispcmd)
 90 |                 print(e)
 91 |             return None, None
 92 |     else:
 93 |         if verbose:
 94 |             print("unable to find command, tried %s" % (commands,))
 95 |         return None, None
 96 |     stdout = p.communicate()[0].strip()
 97 |     if sys.version_info[0] >= 3:
 98 |         stdout = stdout.decode()
 99 |     if p.returncode != 0:
100 |         if verbose:
101 |             print("unable to run %s (error)" % dispcmd)
102 |             print("stdout was %s" % stdout)
103 |         return None, p.returncode
104 |     return stdout, p.returncode
105 | 
106 | 
107 | def versions_from_parentdir(parentdir_prefix, root, verbose):
108 |     """Try to determine the version from the parent directory name.
109 | 
110 |     Source tarballs conventionally unpack into a directory that includes both
111 |     the project name and a version string. We will also support searching up
112 |     two directory levels for an appropriately named parent directory
113 |     """
114 |     rootdirs = []
115 | 
116 |     for i in range(3):
117 |         dirname = os.path.basename(root)
118 |         if dirname.startswith(parentdir_prefix):
119 |             return {"version": dirname[len(parentdir_prefix):],
120 |                     "full-revisionid": None,
121 |                     "dirty": False, "error": None, "date": None}
122 |         else:
123 |             rootdirs.append(root)
124 |             root = os.path.dirname(root)  # up a level
125 | 
126 |     if verbose:
127 |         print("Tried directories %s but none started with prefix %s" %
128 |               (str(rootdirs), parentdir_prefix))
129 |     raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
130 | 
131 | 
132 | @register_vcs_handler("git", "get_keywords")
133 | def git_get_keywords(versionfile_abs):
134 |     """Extract version information from the given file."""
135 |     # the code embedded in _version.py can just fetch the value of these
136 |     # keywords. When used from setup.py, we don't want to import _version.py,
137 |     # so we do it with a regexp instead. This function is not used from
138 |     # _version.py.
139 |     keywords = {}
140 |     try:
141 |         f = open(versionfile_abs, "r")
142 |         for line in f.readlines():
143 |             if line.strip().startswith("git_refnames ="):
144 |                 mo = re.search(r'=\s*"(.*)"', line)
145 |                 if mo:
146 |                     keywords["refnames"] = mo.group(1)
147 |             if line.strip().startswith("git_full ="):
148 |                 mo = re.search(r'=\s*"(.*)"', line)
149 |                 if mo:
150 |                     keywords["full"] = mo.group(1)
151 |             if line.strip().startswith("git_date ="):
152 |                 mo = re.search(r'=\s*"(.*)"', line)
153 |                 if mo:
154 |                     keywords["date"] = mo.group(1)
155 |         f.close()
156 |     except EnvironmentError:
157 |         pass
158 |     return keywords
159 | 
160 | 
161 | @register_vcs_handler("git", "keywords")
162 | def git_versions_from_keywords(keywords, tag_prefix, verbose):
163 |     """Get version information from git keywords."""
164 |     if not keywords:
165 |         raise NotThisMethod("no keywords at all, weird")
166 |     date = keywords.get("date")
167 |     if date is not None:
168 |         # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
169 |         # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
170 |         # -like" string, which we must then edit to make compliant), because
171 |         # it's been around since git-1.5.3, and it's too difficult to
172 |         # discover which version we're using, or to work around using an
173 |         # older one.
174 |         date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
175 |     refnames = keywords["refnames"].strip()
176 |     if refnames.startswith("$Format"):
177 |         if verbose:
178 |             print("keywords are unexpanded, not using")
179 |         raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
180 |     refs = set([r.strip() for r in refnames.strip("()").split(",")])
181 |     # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
182 |     # just "foo-1.0". If we see a "tag: " prefix, prefer those.
183 |     TAG = "tag: "
184 |     tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
185 |     if not tags:
186 |         # Either we're using git < 1.8.3, or there really are no tags. We use
187 |         # a heuristic: assume all version tags have a digit. The old git %d
188 |         # expansion behaves like git log --decorate=short and strips out the
189 |         # refs/heads/ and refs/tags/ prefixes that would let us distinguish
190 |         # between branches and tags. By ignoring refnames without digits, we
191 |         # filter out many common branch names like "release" and
192 |         # "stabilization", as well as "HEAD" and "master".
193 |         tags = set([r for r in refs if re.search(r'\d', r)])
194 |         if verbose:
195 |             print("discarding '%s', no digits" % ",".join(refs - tags))
196 |     if verbose:
197 |         print("likely tags: %s" % ",".join(sorted(tags)))
198 |     for ref in sorted(tags):
199 |         # sorting will prefer e.g. "2.0" over "2.0rc1"
200 |         if ref.startswith(tag_prefix):
201 |             r = ref[len(tag_prefix):]
202 |             if verbose:
203 |                 print("picking %s" % r)
204 |             return {"version": r,
205 |                     "full-revisionid": keywords["full"].strip(),
206 |                     "dirty": False, "error": None,
207 |                     "date": date}
208 |     # no suitable tags, so version is "0+unknown", but full hex is still there
209 |     if verbose:
210 |         print("no suitable tags, using unknown + full revision id")
211 |     return {"version": "0+unknown",
212 |             "full-revisionid": keywords["full"].strip(),
213 |             "dirty": False, "error": "no suitable tags", "date": None}
214 | 
215 | 
216 | @register_vcs_handler("git", "pieces_from_vcs")
217 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
218 |     """Get version from 'git describe' in the root of the source tree.
219 | 
220 |     This only gets called if the git-archive 'subst' keywords were *not*
221 |     expanded, and _version.py hasn't already been rewritten with a short
222 |     version string, meaning we're inside a checked out source tree.
223 |     """
224 |     GITS = ["git"]
225 |     if sys.platform == "win32":
226 |         GITS = ["git.cmd", "git.exe"]
227 | 
228 |     out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
229 |                           hide_stderr=True)
230 |     if rc != 0:
231 |         if verbose:
232 |             print("Directory %s not under git control" % root)
233 |         raise NotThisMethod("'git rev-parse --git-dir' returned error")
234 | 
235 |     # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
236 |     # if there isn't one, this yields HEX[-dirty] (no NUM)
237 |     describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
238 |                                           "--always", "--long",
239 |                                           "--match", "%s*" % tag_prefix],
240 |                                    cwd=root)
241 |     # --long was added in git-1.5.5
242 |     if describe_out is None:
243 |         raise NotThisMethod("'git describe' failed")
244 |     describe_out = describe_out.strip()
245 |     full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
246 |     if full_out is None:
247 |         raise NotThisMethod("'git rev-parse' failed")
248 |     full_out = full_out.strip()
249 | 
250 |     pieces = {}
251 |     pieces["long"] = full_out
252 |     pieces["short"] = full_out[:7]  # maybe improved later
253 |     pieces["error"] = None
254 | 
255 |     # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
256 |     # TAG might have hyphens.
257 |     git_describe = describe_out
258 | 
259 |     # look for -dirty suffix
260 |     dirty = git_describe.endswith("-dirty")
261 |     pieces["dirty"] = dirty
262 |     if dirty:
263 |         git_describe = git_describe[:git_describe.rindex("-dirty")]
264 | 
265 |     # now we have TAG-NUM-gHEX or HEX
266 | 
267 |     if "-" in git_describe:
268 |         # TAG-NUM-gHEX
269 |         mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
270 |         if not mo:
271 |             # unparseable. Maybe git-describe is misbehaving?
272 |             pieces["error"] = ("unable to parse git-describe output: '%s'"
273 |                                % describe_out)
274 |             return pieces
275 | 
276 |         # tag
277 |         full_tag = mo.group(1)
278 |         if not full_tag.startswith(tag_prefix):
279 |             if verbose:
280 |                 fmt = "tag '%s' doesn't start with prefix '%s'"
281 |                 print(fmt % (full_tag, tag_prefix))
282 |             pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
283 |                                % (full_tag, tag_prefix))
284 |             return pieces
285 |         pieces["closest-tag"] = full_tag[len(tag_prefix):]
286 | 
287 |         # distance: number of commits since tag
288 |         pieces["distance"] = int(mo.group(2))
289 | 
290 |         # commit: short hex revision ID
291 |         pieces["short"] = mo.group(3)
292 | 
293 |     else:
294 |         # HEX: no tags
295 |         pieces["closest-tag"] = None
296 |         count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
297 |                                     cwd=root)
298 |         pieces["distance"] = int(count_out)  # total number of commits
299 | 
300 |     # commit date: see ISO-8601 comment in git_versions_from_keywords()
301 |     date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"],
302 |                        cwd=root)[0].strip()
303 |     pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
304 | 
305 |     return pieces
306 | 
307 | 
308 | def plus_or_dot(pieces):
309 |     """Return a + if we don't already have one, else return a ."""
310 |     if "+" in pieces.get("closest-tag", ""):
311 |         return "."
312 |     return "+"
313 | 
314 | 
315 | def render_pep440(pieces):
316 |     """Build up version string, with post-release "local version identifier".
317 | 
318 |     Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
319 |     get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
320 | 
321 |     Exceptions:
322 |     1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
323 |     """
324 |     if pieces["closest-tag"]:
325 |         rendered = pieces["closest-tag"]
326 |         if pieces["distance"] or pieces["dirty"]:
327 |             rendered += plus_or_dot(pieces)
328 |             rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
329 |             if pieces["dirty"]:
330 |                 rendered += ".dirty"
331 |     else:
332 |         # exception #1
333 |         rendered = "0+untagged.%d.g%s" % (pieces["distance"],
334 |                                           pieces["short"])
335 |         if pieces["dirty"]:
336 |             rendered += ".dirty"
337 |     return rendered
338 | 
339 | 
340 | def render_pep440_pre(pieces):
341 |     """TAG[.post.devDISTANCE] -- No -dirty.
342 | 
343 |     Exceptions:
344 |     1: no tags. 0.post.devDISTANCE
345 |     """
346 |     if pieces["closest-tag"]:
347 |         rendered = pieces["closest-tag"]
348 |         if pieces["distance"]:
349 |             rendered += ".post.dev%d" % pieces["distance"]
350 |     else:
351 |         # exception #1
352 |         rendered = "0.post.dev%d" % pieces["distance"]
353 |     return rendered
354 | 
355 | 
356 | def render_pep440_post(pieces):
357 |     """TAG[.postDISTANCE[.dev0]+gHEX] .
358 | 
359 |     The ".dev0" means dirty. Note that .dev0 sorts backwards
360 |     (a dirty tree will appear "older" than the corresponding clean one),
361 |     but you shouldn't be releasing software with -dirty anyways.
362 | 
363 |     Exceptions:
364 |     1: no tags. 0.postDISTANCE[.dev0]
365 |     """
366 |     if pieces["closest-tag"]:
367 |         rendered = pieces["closest-tag"]
368 |         if pieces["distance"] or pieces["dirty"]:
369 |             rendered += ".post%d" % pieces["distance"]
370 |             if pieces["dirty"]:
371 |                 rendered += ".dev0"
372 |             rendered += plus_or_dot(pieces)
373 |             rendered += "g%s" % pieces["short"]
374 |     else:
375 |         # exception #1
376 |         rendered = "0.post%d" % pieces["distance"]
377 |         if pieces["dirty"]:
378 |             rendered += ".dev0"
379 |         rendered += "+g%s" % pieces["short"]
380 |     return rendered
381 | 
382 | 
383 | def render_pep440_old(pieces):
384 |     """TAG[.postDISTANCE[.dev0]] .
385 | 
386 |     The ".dev0" means dirty.
387 | 
388 |     Eexceptions:
389 |     1: no tags. 0.postDISTANCE[.dev0]
390 |     """
391 |     if pieces["closest-tag"]:
392 |         rendered = pieces["closest-tag"]
393 |         if pieces["distance"] or pieces["dirty"]:
394 |             rendered += ".post%d" % pieces["distance"]
395 |             if pieces["dirty"]:
396 |                 rendered += ".dev0"
397 |     else:
398 |         # exception #1
399 |         rendered = "0.post%d" % pieces["distance"]
400 |         if pieces["dirty"]:
401 |             rendered += ".dev0"
402 |     return rendered
403 | 
404 | 
405 | def render_git_describe(pieces):
406 |     """TAG[-DISTANCE-gHEX][-dirty].
407 | 
408 |     Like 'git describe --tags --dirty --always'.
409 | 
410 |     Exceptions:
411 |     1: no tags. HEX[-dirty]  (note: no 'g' prefix)
412 |     """
413 |     if pieces["closest-tag"]:
414 |         rendered = pieces["closest-tag"]
415 |         if pieces["distance"]:
416 |             rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
417 |     else:
418 |         # exception #1
419 |         rendered = pieces["short"]
420 |     if pieces["dirty"]:
421 |         rendered += "-dirty"
422 |     return rendered
423 | 
424 | 
425 | def render_git_describe_long(pieces):
426 |     """TAG-DISTANCE-gHEX[-dirty].
427 | 
428 |     Like 'git describe --tags --dirty --always -long'.
429 |     The distance/hash is unconditional.
430 | 
431 |     Exceptions:
432 |     1: no tags. HEX[-dirty]  (note: no 'g' prefix)
433 |     """
434 |     if pieces["closest-tag"]:
435 |         rendered = pieces["closest-tag"]
436 |         rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
437 |     else:
438 |         # exception #1
439 |         rendered = pieces["short"]
440 |     if pieces["dirty"]:
441 |         rendered += "-dirty"
442 |     return rendered
443 | 
444 | 
445 | def render(pieces, style):
446 |     """Render the given version pieces into the requested style."""
447 |     if pieces["error"]:
448 |         return {"version": "unknown",
449 |                 "full-revisionid": pieces.get("long"),
450 |                 "dirty": None,
451 |                 "error": pieces["error"],
452 |                 "date": None}
453 | 
454 |     if not style or style == "default":
455 |         style = "pep440"  # the default
456 | 
457 |     if style == "pep440":
458 |         rendered = render_pep440(pieces)
459 |     elif style == "pep440-pre":
460 |         rendered = render_pep440_pre(pieces)
461 |     elif style == "pep440-post":
462 |         rendered = render_pep440_post(pieces)
463 |     elif style == "pep440-old":
464 |         rendered = render_pep440_old(pieces)
465 |     elif style == "git-describe":
466 |         rendered = render_git_describe(pieces)
467 |     elif style == "git-describe-long":
468 |         rendered = render_git_describe_long(pieces)
469 |     else:
470 |         raise ValueError("unknown style '%s'" % style)
471 | 
472 |     return {"version": rendered, "full-revisionid": pieces["long"],
473 |             "dirty": pieces["dirty"], "error": None,
474 |             "date": pieces.get("date")}
475 | 
476 | 
477 | def get_versions():
478 |     """Get version information or return default if unable to do so."""
479 |     # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
480 |     # __file__, we can work backwards from there to the root. Some
481 |     # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
482 |     # case we can only use expanded keywords.
483 | 
484 |     cfg = get_config()
485 |     verbose = cfg.verbose
486 | 
487 |     try:
488 |         return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
489 |                                           verbose)
490 |     except NotThisMethod:
491 |         pass
492 | 
493 |     try:
494 |         root = os.path.realpath(__file__)
495 |         # versionfile_source is the relative path from the top of the source
496 |         # tree (where the .git directory might live) to this file. Invert
497 |         # this to find the root from __file__.
498 |         for i in cfg.versionfile_source.split('/'):
499 |             root = os.path.dirname(root)
500 |     except NameError:
501 |         return {"version": "0+unknown", "full-revisionid": None,
502 |                 "dirty": None,
503 |                 "error": "unable to find root of source tree",
504 |                 "date": None}
505 | 
506 |     try:
507 |         pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
508 |         return render(pieces, cfg.style)
509 |     except NotThisMethod:
510 |         pass
511 | 
512 |     try:
513 |         if cfg.parentdir_prefix:
514 |             return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
515 |     except NotThisMethod:
516 |         pass
517 | 
518 |     return {"version": "0+unknown", "full-revisionid": None,
519 |             "dirty": None,
520 |             "error": "unable to compute version", "date": None}
521 | 


--------------------------------------------------------------------------------
/alphalens/examples/.gitattributes:
--------------------------------------------------------------------------------
1 | *.ipynb binary
2 | 


--------------------------------------------------------------------------------
/alphalens/examples/ic_tear.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ckend/alphalens/4979057c6fbd045a7998dba53388654d45a217ef/alphalens/examples/ic_tear.png


--------------------------------------------------------------------------------
/alphalens/examples/returns_tear.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ckend/alphalens/4979057c6fbd045a7998dba53388654d45a217ef/alphalens/examples/returns_tear.png


--------------------------------------------------------------------------------
/alphalens/examples/sector_tear.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ckend/alphalens/4979057c6fbd045a7998dba53388654d45a217ef/alphalens/examples/sector_tear.png


--------------------------------------------------------------------------------
/alphalens/examples/table_tear.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ckend/alphalens/4979057c6fbd045a7998dba53388654d45a217ef/alphalens/examples/table_tear.png


--------------------------------------------------------------------------------
/alphalens/plotting.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright 2017 Quantopian, Inc.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | import numpy as np
 17 | import pandas as pd
 18 | from scipy import stats
 19 | import statsmodels.api as sm
 20 | 
 21 | import seaborn as sns
 22 | import matplotlib.cm as cm
 23 | import matplotlib.pyplot as plt
 24 | from matplotlib.ticker import ScalarFormatter
 25 | 
 26 | from functools import wraps
 27 | 
 28 | from . import utils
 29 | from . import performance as perf
 30 | 
 31 | DECIMAL_TO_BPS = 10000
 32 | 
 33 | 
 34 | def customize(func):
 35 |     """
 36 |     Decorator to set plotting context and axes style during function call.
 37 |     """
 38 |     @wraps(func)
 39 |     def call_w_context(*args, **kwargs):
 40 |         set_context = kwargs.pop('set_context', True)
 41 |         if set_context:
 42 |             color_palette = sns.color_palette('colorblind')
 43 |             with plotting_context(), axes_style(), color_palette:
 44 |                 sns.despine(left=True)
 45 |                 return func(*args, **kwargs)
 46 |         else:
 47 |             return func(*args, **kwargs)
 48 |     return call_w_context
 49 | 
 50 | 
 51 | def plotting_context(context='notebook', font_scale=1.5, rc=None):
 52 |     """
 53 |     Create alphalens default plotting style context.
 54 | 
 55 |     Under the hood, calls and returns seaborn.plotting_context() with
 56 |     some custom settings. Usually you would use in a with-context.
 57 | 
 58 |     Parameters
 59 |     ----------
 60 |     context : str, optional
 61 |         Name of seaborn context.
 62 |     font_scale : float, optional
 63 |         Scale font by factor font_scale.
 64 |     rc : dict, optional
 65 |         Config flags.
 66 |         By default, {'lines.linewidth': 1.5}
 67 |         is being used and will be added to any
 68 |         rc passed in, unless explicitly overriden.
 69 | 
 70 |     Returns
 71 |     -------
 72 |     seaborn plotting context
 73 | 
 74 |     Example
 75 |     -------
 76 |     with alphalens.plotting.plotting_context(font_scale=2):
 77 |         alphalens.create_full_tear_sheet(..., set_context=False)
 78 | 
 79 |     See also
 80 |     --------
 81 |     For more information, see seaborn.plotting_context().
 82 |     """
 83 |     if rc is None:
 84 |         rc = {}
 85 | 
 86 |     rc_default = {'lines.linewidth': 1.5}
 87 | 
 88 |     # Add defaults if they do not exist
 89 |     for name, val in rc_default.items():
 90 |         rc.setdefault(name, val)
 91 | 
 92 |     return sns.plotting_context(context=context, font_scale=font_scale, rc=rc)
 93 | 
 94 | 
 95 | def axes_style(style='darkgrid', rc=None):
 96 |     """Create alphalens default axes style context.
 97 | 
 98 |     Under the hood, calls and returns seaborn.axes_style() with
 99 |     some custom settings. Usually you would use in a with-context.
100 | 
101 |     Parameters
102 |     ----------
103 |     style : str, optional
104 |         Name of seaborn style.
105 |     rc : dict, optional
106 |         Config flags.
107 | 
108 |     Returns
109 |     -------
110 |     seaborn plotting context
111 | 
112 |     Example
113 |     -------
114 |     with alphalens.plotting.axes_style(style='whitegrid'):
115 |         alphalens.create_full_tear_sheet(..., set_context=False)
116 | 
117 |     See also
118 |     --------
119 |     For more information, see seaborn.plotting_context().
120 | 
121 |     """
122 |     if rc is None:
123 |         rc = {}
124 | 
125 |     rc_default = {}
126 | 
127 |     # Add defaults if they do not exist
128 |     for name, val in rc_default.items():
129 |         rc.setdefault(name, val)
130 | 
131 |     return sns.axes_style(style=style, rc=rc)
132 | 
133 | 
134 | def plot_returns_table(alpha_beta,
135 |                        mean_ret_quantile,
136 |                        mean_ret_spread_quantile):
137 |     returns_table = pd.DataFrame()
138 |     returns_table = returns_table.append(alpha_beta)
139 |     returns_table.loc["Mean Period Wise Return Top Quantile (bps)"] = \
140 |         mean_ret_quantile.iloc[-1] * DECIMAL_TO_BPS
141 |     returns_table.loc["Mean Period Wise Return Bottom Quantile (bps)"] = \
142 |         mean_ret_quantile.iloc[0] * DECIMAL_TO_BPS
143 |     returns_table.loc["Mean Period Wise Spread (bps)"] = \
144 |         mean_ret_spread_quantile.mean() * DECIMAL_TO_BPS
145 | 
146 |     print("Returns Analysis")
147 |     utils.print_table(returns_table.apply(lambda x: x.round(3)))
148 | 
149 | 
150 | def plot_turnover_table(autocorrelation_data, quantile_turnover):
151 |     turnover_table = pd.DataFrame()
152 |     for period in sorted(quantile_turnover.keys()):
153 |         for quantile, p_data in quantile_turnover[period].iteritems():
154 |             turnover_table.loc["Quantile {} Mean Turnover ".format(quantile),
155 |                                "{}D".format(period)] = p_data.mean()
156 |     auto_corr = pd.DataFrame()
157 |     for period, p_data in autocorrelation_data.iteritems():
158 |         auto_corr.loc["Mean Factor Rank Autocorrelation",
159 |                       "{}D".format(period)] = p_data.mean()
160 | 
161 |     print("Turnover Analysis")
162 |     utils.print_table(turnover_table.apply(lambda x: x.round(3)))
163 |     utils.print_table(auto_corr.apply(lambda x: x.round(3)))
164 | 
165 | 
166 | def plot_information_table(ic_data):
167 |     ic_summary_table = pd.DataFrame()
168 |     ic_summary_table["IC Mean"] = ic_data.mean()
169 |     ic_summary_table["IC Std."] = ic_data.std()
170 |     ic_summary_table["Risk-Adjusted IC"] = \
171 |         ic_data.mean() / ic_data.std()
172 |     t_stat, p_value = stats.ttest_1samp(ic_data, 0)
173 |     ic_summary_table["t-stat(IC)"] = t_stat
174 |     ic_summary_table["p-value(IC)"] = p_value
175 |     ic_summary_table["IC Skew"] = stats.skew(ic_data)
176 |     ic_summary_table["IC Kurtosis"] = stats.kurtosis(ic_data)
177 | 
178 |     print("Information Analysis")
179 |     utils.print_table(ic_summary_table.apply(lambda x: x.round(3)).T)
180 | 
181 | 
182 | def plot_quantile_statistics_table(factor_data):
183 |     quantile_stats = factor_data.groupby('factor_quantile') \
184 |         .agg(['min', 'max', 'mean', 'std', 'count'])['factor']
185 |     quantile_stats['count %'] = quantile_stats['count'] \
186 |         / quantile_stats['count'].sum() * 100.
187 | 
188 |     print("Quantiles Statistics")
189 |     utils.print_table(quantile_stats)
190 | 
191 | 
192 | def plot_ic_ts(ic, ax=None):
193 |     """
194 |     Plots Spearman Rank Information Coefficient and IC moving
195 |     average for a given factor.
196 | 
197 |     Parameters
198 |     ----------
199 |     ic : pd.DataFrame
200 |         DataFrame indexed by date, with IC for each forward return.
201 |     ax : matplotlib.Axes, optional
202 |         Axes upon which to plot.
203 | 
204 |     Returns
205 |     -------
206 |     ax : matplotlib.Axes
207 |         The axes that were plotted on.
208 |     """
209 |     ic = ic.copy()
210 | 
211 |     num_plots = len(ic.columns)
212 |     if ax is None:
213 |         f, ax = plt.subplots(num_plots, 1, figsize=(18, num_plots * 7))
214 |         ax = np.asarray([ax]).flatten()
215 | 
216 |     ymin, ymax = (None, None)
217 |     for a, (period_num, ic) in zip(ax, ic.iteritems()):
218 |         ic.plot(alpha=0.7, ax=a, lw=0.7, color='steelblue')
219 |         ic.rolling(window=22).mean().plot(
220 |             ax=a,
221 |             color='forestgreen',
222 |             lw=2,
223 |             alpha=0.8
224 |         )
225 | 
226 |         a.set(ylabel='IC', xlabel="")
227 |         a.set_title(
228 |             "{} Period Forward Return Information Coefficient (IC)"
229 |             .format(period_num))
230 |         a.axhline(0.0, linestyle='-', color='black', lw=1, alpha=0.8)
231 |         a.legend(['IC', '1 month moving avg'], loc='upper right')
232 |         a.text(.05, .95, "Mean %.3f \n Std. %.3f" % (ic.mean(), ic.std()),
233 |                fontsize=16,
234 |                bbox={'facecolor': 'white', 'alpha': 1, 'pad': 5},
235 |                transform=a.transAxes,
236 |                verticalalignment='top')
237 | 
238 |         curr_ymin, curr_ymax = a.get_ylim()
239 |         ymin = curr_ymin if ymin is None else min(ymin, curr_ymin)
240 |         ymax = curr_ymax if ymax is None else max(ymax, curr_ymax)
241 | 
242 |     for a in ax:
243 |         a.set_ylim([ymin, ymax])
244 | 
245 |     return ax
246 | 
247 | 
248 | def plot_ic_hist(ic, ax=None):
249 |     """
250 |     Plots Spearman Rank Information Coefficient histogram for a given factor.
251 | 
252 |     Parameters
253 |     ----------
254 |     ic : pd.DataFrame
255 |         DataFrame indexed by date, with IC for each forward return.
256 |     ax : matplotlib.Axes, optional
257 |         Axes upon which to plot.
258 | 
259 |     Returns
260 |     -------
261 |     ax : matplotlib.Axes
262 |         The axes that were plotted on.
263 |     """
264 | 
265 |     ic = ic.copy()
266 | 
267 |     num_plots = len(ic.columns)
268 | 
269 |     v_spaces = ((num_plots - 1) // 3) + 1
270 | 
271 |     if ax is None:
272 |         f, ax = plt.subplots(v_spaces, 3, figsize=(18, v_spaces * 6))
273 |         ax = ax.flatten()
274 | 
275 |     for a, (period_num, ic) in zip(ax, ic.iteritems()):
276 |         sns.distplot(ic.replace(np.nan, 0.), norm_hist=True, ax=a)
277 |         a.set(title="%s Period IC" % period_num, xlabel='IC')
278 |         a.set_xlim([-1, 1])
279 |         a.text(.05, .95, "Mean %.3f \n Std. %.3f" % (ic.mean(), ic.std()),
280 |                fontsize=16,
281 |                bbox={'facecolor': 'white', 'alpha': 1, 'pad': 5},
282 |                transform=a.transAxes,
283 |                verticalalignment='top')
284 |         a.axvline(ic.mean(), color='w', linestyle='dashed', linewidth=2)
285 | 
286 |     if num_plots < len(ax):
287 |         ax[-1].set_visible(False)
288 | 
289 |     return ax
290 | 
291 | 
292 | def plot_ic_qq(ic, theoretical_dist=stats.norm, ax=None):
293 |     """
294 |     Plots Spearman Rank Information Coefficient "Q-Q" plot relative to
295 |     a theoretical distribution.
296 | 
297 |     Parameters
298 |     ----------
299 |     ic : pd.DataFrame
300 |         DataFrame indexed by date, with IC for each forward return.
301 |     theoretical_dist : scipy.stats._continuous_distns
302 |         Continuous distribution generator. scipy.stats.norm and
303 |         scipy.stats.t are popular options.
304 |     ax : matplotlib.Axes, optional
305 |         Axes upon which to plot.
306 | 
307 |     Returns
308 |     -------
309 |     ax : matplotlib.Axes
310 |         The axes that were plotted on.
311 |     """
312 | 
313 |     ic = ic.copy()
314 | 
315 |     num_plots = len(ic.columns)
316 | 
317 |     v_spaces = ((num_plots - 1) // 3) + 1
318 | 
319 |     if ax is None:
320 |         f, ax = plt.subplots(v_spaces, 3, figsize=(18, v_spaces * 6))
321 |         ax = ax.flatten()
322 | 
323 |     if isinstance(theoretical_dist, stats.norm.__class__):
324 |         dist_name = 'Normal'
325 |     elif isinstance(theoretical_dist, stats.t.__class__):
326 |         dist_name = 'T'
327 |     else:
328 |         dist_name = 'Theoretical'
329 | 
330 |     for a, (period_num, ic) in zip(ax, ic.iteritems()):
331 |         sm.qqplot(ic.replace(np.nan, 0.).values, theoretical_dist, fit=True,
332 |                   line='45', ax=a)
333 |         a.set(title="{} Period IC {} Dist. Q-Q".format(
334 |               period_num, dist_name),
335 |               ylabel='Observed Quantile',
336 |               xlabel='{} Distribution Quantile'.format(dist_name))
337 | 
338 |     return ax
339 | 
340 | 
341 | def plot_quantile_returns_bar(mean_ret_by_q,
342 |                               by_group=False,
343 |                               ylim_percentiles=None,
344 |                               ax=None):
345 |     """
346 |     Plots mean period wise returns for factor quantiles.
347 | 
348 |     Parameters
349 |     ----------
350 |     mean_ret_by_q : pd.DataFrame
351 |         DataFrame with quantile, (group) and mean period wise return values.
352 |     by_group : bool
353 |         Disaggregated figures by group.
354 |     ylim_percentiles : tuple of integers
355 |         Percentiles of observed data to use as y limits for plot.
356 |     ax : matplotlib.Axes, optional
357 |         Axes upon which to plot.
358 | 
359 |     Returns
360 |     -------
361 |     ax : matplotlib.Axes
362 |         The axes that were plotted on.
363 |     """
364 | 
365 |     mean_ret_by_q = mean_ret_by_q.copy()
366 | 
367 |     if ylim_percentiles is not None:
368 |         ymin = (np.nanpercentile(mean_ret_by_q.values,
369 |                                  ylim_percentiles[0]) * DECIMAL_TO_BPS)
370 |         ymax = (np.nanpercentile(mean_ret_by_q.values,
371 |                                  ylim_percentiles[1]) * DECIMAL_TO_BPS)
372 |     else:
373 |         ymin = None
374 |         ymax = None
375 | 
376 |     if by_group:
377 |         num_group = len(
378 |             mean_ret_by_q.index.get_level_values('group').unique())
379 | 
380 |         if ax is None:
381 |             v_spaces = ((num_group - 1) // 2) + 1
382 |             f, ax = plt.subplots(v_spaces, 2, sharex=False,
383 |                                  sharey=True, figsize=(18, 6 * v_spaces))
384 |             ax = ax.flatten()
385 | 
386 |         for a, (sc, cor) in zip(ax, mean_ret_by_q.groupby(level='group')):
387 |             (cor.xs(sc, level='group')
388 |                 .multiply(DECIMAL_TO_BPS)
389 |                 .plot(kind='bar', title=sc, ax=a))
390 | 
391 |             a.set(xlabel='', ylabel='Mean Return (bps)',
392 |                   ylim=(ymin, ymax))
393 | 
394 |         if num_group < len(ax):
395 |             ax[-1].set_visible(False)
396 | 
397 |         return ax
398 | 
399 |     else:
400 |         if ax is None:
401 |             f, ax = plt.subplots(1, 1, figsize=(18, 6))
402 | 
403 |         (mean_ret_by_q.multiply(DECIMAL_TO_BPS)
404 |             .plot(kind='bar',
405 |                   title="Mean Period Wise Return By Factor Quantile", ax=ax))
406 |         ax.set(xlabel='', ylabel='Mean Return (bps)',
407 |                ylim=(ymin, ymax))
408 | 
409 |         return ax
410 | 
411 | 
412 | def plot_quantile_returns_violin(return_by_q,
413 |                                  ylim_percentiles=None,
414 |                                  ax=None):
415 |     """
416 |     Plots a violin box plot of period wise returns for factor quantiles.
417 | 
418 |     Parameters
419 |     ----------
420 |     return_by_q : pd.DataFrame - MultiIndex
421 |         DataFrame with date and quantile as rows MultiIndex,
422 |         forward return windows as columns, returns as values.
423 |     ylim_percentiles : tuple of integers
424 |         Percentiles of observed data to use as y limits for plot.
425 |     ax : matplotlib.Axes, optional
426 |         Axes upon which to plot.
427 | 
428 |     Returns
429 |     -------
430 |     ax : matplotlib.Axes
431 |         The axes that were plotted on.
432 |     """
433 | 
434 |     return_by_q = return_by_q.copy()
435 | 
436 |     if ylim_percentiles is not None:
437 |         ymin = (np.nanpercentile(return_by_q.values,
438 |                                  ylim_percentiles[0]) * DECIMAL_TO_BPS)
439 |         ymax = (np.nanpercentile(return_by_q.values,
440 |                                  ylim_percentiles[1]) * DECIMAL_TO_BPS)
441 |     else:
442 |         ymin = None
443 |         ymax = None
444 | 
445 |     if ax is None:
446 |         f, ax = plt.subplots(1, 1, figsize=(18, 6))
447 | 
448 |     unstacked_dr = (return_by_q
449 |                     .multiply(DECIMAL_TO_BPS))
450 |     unstacked_dr.columns = unstacked_dr.columns.set_names('forward_periods')
451 |     unstacked_dr = unstacked_dr.stack()
452 |     unstacked_dr.name = 'return'
453 |     unstacked_dr = unstacked_dr.reset_index()
454 | 
455 |     sns.violinplot(data=unstacked_dr,
456 |                    x='factor_quantile',
457 |                    hue='forward_periods',
458 |                    y='return',
459 |                    orient='v',
460 |                    cut=0,
461 |                    inner='quartile',
462 |                    ax=ax)
463 |     ax.set(xlabel='', ylabel='Return (bps)',
464 |            title="Period Wise Return By Factor Quantile",
465 |            ylim=(ymin, ymax))
466 | 
467 |     ax.axhline(0.0, linestyle='-', color='black', lw=0.7, alpha=0.6)
468 | 
469 |     return ax
470 | 
471 | 
472 | def plot_mean_quantile_returns_spread_time_series(mean_returns_spread,
473 |                                                   std_err=None,
474 |                                                   bandwidth=1,
475 |                                                   ax=None):
476 |     """
477 |     Plots mean period wise returns for factor quantiles.
478 | 
479 |     Parameters
480 |     ----------
481 |     mean_returns_spread : pd.Series
482 |         Series with difference between quantile mean returns by period.
483 |     std_err : pd.Series
484 |         Series with standard error of difference between quantile
485 |         mean returns each period.
486 |     bandwidth : float
487 |         Width of displayed error bands in standard deviations.
488 |     ax : matplotlib.Axes, optional
489 |         Axes upon which to plot.
490 | 
491 |     Returns
492 |     -------
493 |     ax : matplotlib.Axes
494 |         The axes that were plotted on.
495 |     """
496 | 
497 |     if isinstance(mean_returns_spread, pd.DataFrame):
498 |         if ax is None:
499 |             ax = [None for a in mean_returns_spread.columns]
500 | 
501 |         ymin, ymax = (None, None)
502 |         for (i, a), (name, fr_column) in zip(enumerate(ax),
503 |                                              mean_returns_spread.iteritems()):
504 |             stdn = None if std_err is None else std_err[name]
505 |             a = plot_mean_quantile_returns_spread_time_series(fr_column,
506 |                                                               std_err=stdn,
507 |                                                               ax=a)
508 |             ax[i] = a
509 |             curr_ymin, curr_ymax = a.get_ylim()
510 |             ymin = curr_ymin if ymin is None else min(ymin, curr_ymin)
511 |             ymax = curr_ymax if ymax is None else max(ymax, curr_ymax)
512 | 
513 |         for a in ax:
514 |             a.set_ylim([ymin, ymax])
515 | 
516 |         return ax
517 | 
518 |     if mean_returns_spread.isnull().all():
519 |         return ax
520 | 
521 |     periods = mean_returns_spread.name
522 |     title = ('Top Minus Bottom Quantile Mean Return ({} Period Forward Return)'
523 |              .format(periods if periods is not None else ""))
524 | 
525 |     if ax is None:
526 |         f, ax = plt.subplots(figsize=(18, 6))
527 | 
528 |     mean_returns_spread_bps = mean_returns_spread * DECIMAL_TO_BPS
529 | 
530 |     mean_returns_spread_bps.plot(alpha=0.4, ax=ax, lw=0.7, color='forestgreen')
531 |     mean_returns_spread_bps.rolling(window=22).mean().plot(
532 |         color='orangered',
533 |         alpha=0.7,
534 |         ax=ax
535 |     )
536 |     ax.legend(['mean returns spread', '1 month moving avg'], loc='upper right')
537 | 
538 |     if std_err is not None:
539 |         std_err_bps = std_err * DECIMAL_TO_BPS
540 |         upper = mean_returns_spread_bps.values + (std_err_bps * bandwidth)
541 |         lower = mean_returns_spread_bps.values - (std_err_bps * bandwidth)
542 |         ax.fill_between(mean_returns_spread.index,
543 |                         lower,
544 |                         upper,
545 |                         alpha=0.3,
546 |                         color='steelblue')
547 | 
548 |     ylim = np.nanpercentile(abs(mean_returns_spread_bps.values), 95)
549 |     ax.set(ylabel='Difference In Quantile Mean Return (bps)',
550 |            xlabel='',
551 |            title=title,
552 |            ylim=(-ylim, ylim))
553 |     ax.axhline(0.0, linestyle='-', color='black', lw=1, alpha=0.8)
554 | 
555 |     return ax
556 | 
557 | 
558 | def plot_ic_by_group(ic_group, ax=None):
559 |     """
560 |     Plots Spearman Rank Information Coefficient for a given factor over
561 |     provided forward returns. Separates by group.
562 | 
563 |     Parameters
564 |     ----------
565 |     ic_group : pd.DataFrame
566 |         group-wise mean period wise returns.
567 |     ax : matplotlib.Axes, optional
568 |         Axes upon which to plot.
569 | 
570 |     Returns
571 |     -------
572 |     ax : matplotlib.Axes
573 |         The axes that were plotted on.
574 |     """
575 |     if ax is None:
576 |         f, ax = plt.subplots(1, 1, figsize=(18, 6))
577 |     ic_group.plot(kind='bar', ax=ax)
578 | 
579 |     ax.set(title="Information Coefficient By Group", xlabel="")
580 |     ax.set_xticklabels(ic_group.index, rotation=45)
581 | 
582 |     return ax
583 | 
584 | 
585 | def plot_factor_rank_auto_correlation(factor_autocorrelation,
586 |                                       period=1,
587 |                                       ax=None):
588 |     """
589 |     Plots factor rank autocorrelation over time.
590 |     See factor_rank_autocorrelation for more details.
591 | 
592 |     Parameters
593 |     ----------
594 |     factor_autocorrelation : pd.Series
595 |         Rolling 1 period (defined by time_rule) autocorrelation
596 |         of factor values.
597 |     period: int, optional
598 |         Period over which the autocorrelation is calculated
599 |     ax : matplotlib.Axes, optional
600 |         Axes upon which to plot.
601 | 
602 |     Returns
603 |     -------
604 |     ax : matplotlib.Axes
605 |         The axes that were plotted on.
606 |     """
607 |     if ax is None:
608 |         f, ax = plt.subplots(1, 1, figsize=(18, 6))
609 | 
610 |     factor_autocorrelation.plot(title='{}D Period Factor Rank Autocorrelation'
611 |                                 .format(period), ax=ax)
612 |     ax.set(ylabel='Autocorrelation Coefficient', xlabel='')
613 |     ax.axhline(0.0, linestyle='-', color='black', lw=1)
614 |     ax.text(.05, .95, "Mean %.3f" % factor_autocorrelation.mean(),
615 |             fontsize=16,
616 |             bbox={'facecolor': 'white', 'alpha': 1, 'pad': 5},
617 |             transform=ax.transAxes,
618 |             verticalalignment='top')
619 | 
620 |     return ax
621 | 
622 | 
623 | def plot_top_bottom_quantile_turnover(quantile_turnover, period=1, ax=None):
624 |     """
625 |     Plots period wise top and bottom quantile factor turnover.
626 | 
627 |     Parameters
628 |     ----------
629 |     quantile_turnover: pd.Dataframe
630 |         Quantile turnover (each DataFrame column a quantile).
631 |     period: int, optional
632 |         Period over which to calculate the turnover.
633 |     ax : matplotlib.Axes, optional
634 |         Axes upon which to plot.
635 | 
636 |     Returns
637 |     -------
638 |     ax : matplotlib.Axes
639 |         The axes that were plotted on.
640 |     """
641 |     if ax is None:
642 |         f, ax = plt.subplots(1, 1, figsize=(18, 6))
643 | 
644 |     max_quantile = quantile_turnover.columns.max()
645 |     min_quantile = quantile_turnover.columns.min()
646 |     turnover = pd.DataFrame()
647 |     turnover['top quantile turnover'] = quantile_turnover[max_quantile]
648 |     turnover['bottom quantile turnover'] = quantile_turnover[min_quantile]
649 |     turnover.plot(title='{}D Period Top and Bottom Quantile Turnover'
650 |                   .format(period), ax=ax, alpha=0.6, lw=0.8)
651 |     ax.set(ylabel='Proportion Of Names New To Quantile', xlabel="")
652 | 
653 |     return ax
654 | 
655 | 
656 | def plot_monthly_ic_heatmap(mean_monthly_ic, ax=None):
657 |     """
658 |     Plots a heatmap of the information coefficient or returns by month.
659 | 
660 |     Parameters
661 |     ----------
662 |     mean_monthly_ic : pd.DataFrame
663 |         The mean monthly IC for N periods forward.
664 | 
665 |     Returns
666 |     -------
667 |     ax : matplotlib.Axes
668 |         The axes that were plotted on.
669 |     """
670 | 
671 |     mean_monthly_ic = mean_monthly_ic.copy()
672 | 
673 |     num_plots = len(mean_monthly_ic.columns)
674 | 
675 |     v_spaces = ((num_plots - 1) // 3) + 1
676 | 
677 |     if ax is None:
678 |         f, ax = plt.subplots(v_spaces, 3, figsize=(18, v_spaces * 6))
679 |         ax = ax.flatten()
680 | 
681 |     new_index_year = []
682 |     new_index_month = []
683 |     for date in mean_monthly_ic.index:
684 |         new_index_year.append(date.year)
685 |         new_index_month.append(date.month)
686 | 
687 |     mean_monthly_ic.index = pd.MultiIndex.from_arrays(
688 |         [new_index_year, new_index_month],
689 |         names=["year", "month"])
690 | 
691 |     for a, (periods_num, ic) in zip(ax, mean_monthly_ic.iteritems()):
692 | 
693 |         sns.heatmap(
694 |             ic.unstack(),
695 |             annot=True,
696 |             alpha=1.0,
697 |             center=0.0,
698 |             annot_kws={"size": 7},
699 |             linewidths=0.01,
700 |             linecolor='white',
701 |             cmap=cm.coolwarm_r,
702 |             cbar=False,
703 |             ax=a)
704 |         a.set(ylabel='', xlabel='')
705 | 
706 |         a.set_title("Monthly Mean {} Period IC".format(periods_num))
707 | 
708 |     if num_plots < len(ax):
709 |         ax[-1].set_visible(False)
710 | 
711 |     return ax
712 | 
713 | 
714 | def plot_cumulative_returns(factor_returns,
715 |                             period,
716 |                             freq=None,
717 |                             title=None,
718 |                             ax=None):
719 |     """
720 |     Plots the cumulative returns of the returns series passed in.
721 | 
722 |     Parameters
723 |     ----------
724 |     factor_returns : pd.Series
725 |         Period wise returns of dollar neutral portfolio weighted by factor
726 |         value.
727 |     period : pandas.Timedelta or string
728 |         Length of period for which the returns are computed (e.g. 1 day)
729 |         if 'period' is a string it must follow pandas.Timedelta constructor
730 |         format (e.g. '1 days', '1D', '30m', '3h', '1D1h', etc)
731 |     freq : pandas DateOffset
732 |         Used to specify a particular trading calendar e.g. BusinessDay or Day
733 |         Usually this is inferred from utils.infer_trading_calendar, which is
734 |         called by either get_clean_factor_and_forward_returns or
735 |         compute_forward_returns
736 |     title: string, optional
737 |         Custom title
738 |     ax : matplotlib.Axes, optional
739 |         Axes upon which to plot.
740 | 
741 |     Returns
742 |     -------
743 |     ax : matplotlib.Axes
744 |         The axes that were plotted on.
745 |     """
746 |     if ax is None:
747 |         f, ax = plt.subplots(1, 1, figsize=(18, 6))
748 | 
749 |     factor_returns = perf.cumulative_returns(factor_returns)
750 | 
751 |     factor_returns.plot(ax=ax, lw=3, color='forestgreen', alpha=0.6)
752 |     ax.set(ylabel='Cumulative Returns',
753 |            title=("Portfolio Cumulative Return ({} Fwd Period)".format(period)
754 |                   if title is None else title),
755 |            xlabel='')
756 |     ax.axhline(1.0, linestyle='-', color='black', lw=1)
757 | 
758 |     return ax
759 | 
760 | 
761 | def plot_cumulative_returns_by_quantile(quantile_returns,
762 |                                         period,
763 |                                         freq=None,
764 |                                         ax=None):
765 |     """
766 |     Plots the cumulative returns of various factor quantiles.
767 | 
768 |     Parameters
769 |     ----------
770 |     quantile_returns : pd.DataFrame
771 |         Returns by factor quantile
772 |     period : pandas.Timedelta or string
773 |         Length of period for which the returns are computed (e.g. 1 day)
774 |         if 'period' is a string it must follow pandas.Timedelta constructor
775 |         format (e.g. '1 days', '1D', '30m', '3h', '1D1h', etc)
776 |     freq : pandas DateOffset
777 |         Used to specify a particular trading calendar e.g. BusinessDay or Day
778 |         Usually this is inferred from utils.infer_trading_calendar, which is
779 |         called by either get_clean_factor_and_forward_returns or
780 |         compute_forward_returns
781 |     ax : matplotlib.Axes, optional
782 |         Axes upon which to plot.
783 | 
784 |     Returns
785 |     -------
786 |     ax : matplotlib.Axes
787 |     """
788 | 
789 |     if ax is None:
790 |         f, ax = plt.subplots(1, 1, figsize=(18, 6))
791 | 
792 |     ret_wide = quantile_returns.unstack('factor_quantile')
793 | 
794 |     cum_ret = ret_wide.apply(perf.cumulative_returns)
795 | 
796 |     cum_ret = cum_ret.loc[:, ::-1]  # we want negative quantiles as 'red'
797 | 
798 |     cum_ret.plot(lw=2, ax=ax, cmap=cm.coolwarm)
799 |     ax.legend()
800 |     ymin, ymax = cum_ret.min().min(), cum_ret.max().max()
801 |     ax.set(ylabel='Log Cumulative Returns',
802 |            title='''Cumulative Return by Quantile
803 |                     ({} Period Forward Return)'''.format(period),
804 |            xlabel='',
805 |            yscale='symlog',
806 |            yticks=np.linspace(ymin, ymax, 5),
807 |            ylim=(ymin, ymax))
808 | 
809 |     ax.yaxis.set_major_formatter(ScalarFormatter())
810 |     ax.axhline(1.0, linestyle='-', color='black', lw=1)
811 | 
812 |     return ax
813 | 
814 | 
815 | def plot_quantile_average_cumulative_return(avg_cumulative_returns,
816 |                                             by_quantile=False,
817 |                                             std_bar=False,
818 |                                             title=None,
819 |                                             ax=None):
820 |     """
821 |     Plots sector-wise mean daily returns for factor quantiles
822 |     across provided forward price movement columns.
823 | 
824 |     Parameters
825 |     ----------
826 |     avg_cumulative_returns: pd.Dataframe
827 |         The format is the one returned by
828 |         performance.average_cumulative_return_by_quantile
829 |     by_quantile : boolean, optional
830 |         Disaggregated figures by quantile (useful to clearly see std dev bars)
831 |     std_bar : boolean, optional
832 |         Plot standard deviation plot
833 |     title: string, optional
834 |         Custom title
835 |     ax : matplotlib.Axes, optional
836 |         Axes upon which to plot.
837 | 
838 |     Returns
839 |     -------
840 |     ax : matplotlib.Axes
841 |     """
842 | 
843 |     avg_cumulative_returns = avg_cumulative_returns.multiply(DECIMAL_TO_BPS)
844 |     quantiles = len(avg_cumulative_returns.index.levels[0].unique())
845 |     palette = [cm.coolwarm(i) for i in np.linspace(0, 1, quantiles)]
846 |     palette = palette[::-1]  # we want negative quantiles as 'red'
847 | 
848 |     if by_quantile:
849 | 
850 |         if ax is None:
851 |             v_spaces = ((quantiles - 1) // 2) + 1
852 |             f, ax = plt.subplots(v_spaces, 2, sharex=False,
853 |                                  sharey=False, figsize=(18, 6 * v_spaces))
854 |             ax = ax.flatten()
855 | 
856 |         for i, (quantile, q_ret) in enumerate(avg_cumulative_returns
857 |                                               .groupby(level='factor_quantile')
858 |                                               ):
859 | 
860 |             mean = q_ret.loc[(quantile, 'mean')]
861 |             mean.name = 'Quantile ' + str(quantile)
862 |             mean.plot(ax=ax[i], color=palette[i])
863 |             ax[i].set_ylabel('Mean Return (bps)')
864 | 
865 |             if std_bar:
866 |                 std = q_ret.loc[(quantile, 'std')]
867 |                 ax[i].errorbar(std.index, mean, yerr=std,
868 |                                fmt='none', ecolor=palette[i], label='none')
869 | 
870 |             ax[i].axvline(x=0, color='k', linestyle='--')
871 |             ax[i].legend()
872 |             i += 1
873 | 
874 |     else:
875 | 
876 |         if ax is None:
877 |             f, ax = plt.subplots(1, 1, figsize=(18, 6))
878 | 
879 |         for i, (quantile, q_ret) in enumerate(avg_cumulative_returns
880 |                                               .groupby(level='factor_quantile')
881 |                                               ):
882 | 
883 |             mean = q_ret.loc[(quantile, 'mean')]
884 |             mean.name = 'Quantile ' + str(quantile)
885 |             mean.plot(ax=ax, color=palette[i])
886 | 
887 |             if std_bar:
888 |                 std = q_ret.loc[(quantile, 'std')]
889 |                 ax.errorbar(std.index, mean, yerr=std,
890 |                             fmt='none', ecolor=palette[i], label='none')
891 |             i += 1
892 | 
893 |         ax.axvline(x=0, color='k', linestyle='--')
894 |         ax.legend()
895 |         ax.set(ylabel='Mean Return (bps)',
896 |                title=("Average Cumulative Returns by Quantile"
897 |                       if title is None else title),
898 |                xlabel='Periods')
899 | 
900 |     return ax
901 | 
902 | 
903 | def plot_events_distribution(events, num_bars=50, ax=None):
904 |     """
905 |     Plots the distribution of events in time.
906 | 
907 |     Parameters
908 |     ----------
909 |     events : pd.Series
910 |         A pd.Series whose index contains at least 'date' level.
911 |     num_bars : integer, optional
912 |         Number of bars to plot
913 |     ax : matplotlib.Axes, optional
914 |         Axes upon which to plot.
915 | 
916 |     Returns
917 |     -------
918 |     ax : matplotlib.Axes
919 |     """
920 | 
921 |     if ax is None:
922 |         f, ax = plt.subplots(1, 1, figsize=(18, 6))
923 | 
924 |     start = events.index.get_level_values('date').min()
925 |     end = events.index.get_level_values('date').max()
926 |     group_interval = (end - start) / num_bars
927 |     grouper = pd.Grouper(level='date', freq=group_interval)
928 |     events.groupby(grouper).count().plot(kind="bar", grid=False, ax=ax)
929 |     ax.set(ylabel='Number of events',
930 |            title='Distribution of events in time',
931 |            xlabel='Date')
932 | 
933 |     return ax
934 | 


--------------------------------------------------------------------------------
/alphalens/tears.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright 2017 Quantopian, Inc.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | import matplotlib.gridspec as gridspec
 17 | import matplotlib.pyplot as plt
 18 | import pandas as pd
 19 | import warnings
 20 | 
 21 | from . import plotting
 22 | from . import performance as perf
 23 | from . import utils
 24 | 
 25 | 
 26 | class GridFigure(object):
 27 |     """
 28 |     It makes life easier with grid plots
 29 |     """
 30 | 
 31 |     def __init__(self, rows, cols):
 32 |         self.rows = rows
 33 |         self.cols = cols
 34 |         self.fig = plt.figure(figsize=(14, rows * 7))
 35 |         self.gs = gridspec.GridSpec(rows, cols, wspace=0.4, hspace=0.3)
 36 |         self.curr_row = 0
 37 |         self.curr_col = 0
 38 | 
 39 |     def next_row(self):
 40 |         if self.curr_col != 0:
 41 |             self.curr_row += 1
 42 |             self.curr_col = 0
 43 |         subplt = plt.subplot(self.gs[self.curr_row, :])
 44 |         self.curr_row += 1
 45 |         return subplt
 46 | 
 47 |     def next_cell(self):
 48 |         if self.curr_col >= self.cols:
 49 |             self.curr_row += 1
 50 |             self.curr_col = 0
 51 |         subplt = plt.subplot(self.gs[self.curr_row, self.curr_col])
 52 |         self.curr_col += 1
 53 |         return subplt
 54 | 
 55 |     def close(self):
 56 |         plt.close(self.fig)
 57 |         self.fig = None
 58 |         self.gs = None
 59 | 
 60 | 
 61 | @plotting.customize
 62 | def create_summary_tear_sheet(
 63 |     factor_data, long_short=True, group_neutral=False
 64 | ):
 65 |     """
 66 |     Creates a small summary tear sheet with returns, information, and turnover
 67 |     analysis.
 68 | 
 69 |     Parameters
 70 |     ----------
 71 |     factor_data : pd.DataFrame - MultiIndex
 72 |         A MultiIndex DataFrame indexed by date (level 0) and asset (level 1),
 73 |         containing the values for a single alpha factor, forward returns for
 74 |         each period, the factor quantile/bin that factor value belongs to, and
 75 |         (optionally) the group the asset belongs to.
 76 |         - See full explanation in utils.get_clean_factor_and_forward_returns
 77 |     long_short : bool
 78 |         Should this computation happen on a long short portfolio? if so, then
 79 |         mean quantile returns will be demeaned across the factor universe.
 80 |     group_neutral : bool
 81 |         Should this computation happen on a group neutral portfolio? if so,
 82 |         returns demeaning will occur on the group level.
 83 |     """
 84 | 
 85 |     # Returns Analysis
 86 |     mean_quant_ret, std_quantile = perf.mean_return_by_quantile(
 87 |         factor_data,
 88 |         by_group=False,
 89 |         demeaned=long_short,
 90 |         group_adjust=group_neutral,
 91 |     )
 92 | 
 93 |     mean_quant_rateret = mean_quant_ret.apply(
 94 |         utils.rate_of_return, axis=0, base_period=mean_quant_ret.columns[0]
 95 |     )
 96 | 
 97 |     mean_quant_ret_bydate, std_quant_daily = perf.mean_return_by_quantile(
 98 |         factor_data,
 99 |         by_date=True,
100 |         by_group=False,
101 |         demeaned=long_short,
102 |         group_adjust=group_neutral,
103 |     )
104 | 
105 |     mean_quant_rateret_bydate = mean_quant_ret_bydate.apply(
106 |         utils.rate_of_return,
107 |         axis=0,
108 |         base_period=mean_quant_ret_bydate.columns[0],
109 |     )
110 | 
111 |     compstd_quant_daily = std_quant_daily.apply(
112 |         utils.std_conversion, axis=0, base_period=std_quant_daily.columns[0]
113 |     )
114 | 
115 |     alpha_beta = perf.factor_alpha_beta(
116 |         factor_data, demeaned=long_short, group_adjust=group_neutral
117 |     )
118 | 
119 |     mean_ret_spread_quant, std_spread_quant = perf.compute_mean_returns_spread(
120 |         mean_quant_rateret_bydate,
121 |         factor_data["factor_quantile"].max(),
122 |         factor_data["factor_quantile"].min(),
123 |         std_err=compstd_quant_daily,
124 |     )
125 | 
126 |     periods = utils.get_forward_returns_columns(factor_data.columns)
127 |     periods = list(map(lambda p: pd.Timedelta(p).days, periods))
128 | 
129 |     fr_cols = len(periods)
130 |     vertical_sections = 2 + fr_cols * 3
131 |     gf = GridFigure(rows=vertical_sections, cols=1)
132 | 
133 |     plotting.plot_quantile_statistics_table(factor_data)
134 | 
135 |     plotting.plot_returns_table(
136 |         alpha_beta, mean_quant_rateret, mean_ret_spread_quant
137 |     )
138 | 
139 |     plotting.plot_quantile_returns_bar(
140 |         mean_quant_rateret,
141 |         by_group=False,
142 |         ylim_percentiles=None,
143 |         ax=gf.next_row(),
144 |     )
145 | 
146 |     # Information Analysis
147 |     ic = perf.factor_information_coefficient(factor_data)
148 |     plotting.plot_information_table(ic)
149 | 
150 |     # Turnover Analysis
151 |     quantile_factor = factor_data["factor_quantile"]
152 | 
153 |     quantile_turnover = {
154 |         p: pd.concat(
155 |             [
156 |                 perf.quantile_turnover(quantile_factor, q, p)
157 |                 for q in range(1, int(quantile_factor.max()) + 1)
158 |             ],
159 |             axis=1,
160 |         )
161 |         for p in periods
162 |     }
163 | 
164 |     autocorrelation = pd.concat(
165 |         [
166 |             perf.factor_rank_autocorrelation(factor_data, period)
167 |             for period in periods
168 |         ],
169 |         axis=1,
170 |     )
171 | 
172 |     plotting.plot_turnover_table(autocorrelation, quantile_turnover)
173 | 
174 |     plt.show()
175 |     gf.close()
176 | 
177 | 
178 | @plotting.customize
179 | def create_returns_tear_sheet(
180 |     factor_data, long_short=True, group_neutral=False, by_group=False
181 | ):
182 |     """
183 |     Creates a tear sheet for returns analysis of a factor.
184 | 
185 |     Parameters
186 |     ----------
187 |     factor_data : pd.DataFrame - MultiIndex
188 |         A MultiIndex DataFrame indexed by date (level 0) and asset (level 1),
189 |         containing the values for a single alpha factor, forward returns for
190 |         each period, the factor quantile/bin that factor value belongs to,
191 |         and (optionally) the group the asset belongs to.
192 |         - See full explanation in utils.get_clean_factor_and_forward_returns
193 |     long_short : bool
194 |         Should this computation happen on a long short portfolio? if so, then
195 |         mean quantile returns will be demeaned across the factor universe.
196 |         Additionally factor values will be demeaned across the factor universe
197 |         when factor weighting the portfolio for cumulative returns plots
198 |     group_neutral : bool
199 |         Should this computation happen on a group neutral portfolio? if so,
200 |         returns demeaning will occur on the group level.
201 |         Additionally each group will weight the same in cumulative returns
202 |         plots
203 |     by_group : bool
204 |         If True, display graphs separately for each group.
205 |     """
206 | 
207 |     factor_returns = perf.factor_returns(
208 |         factor_data, long_short, group_neutral
209 |     )
210 | 
211 |     mean_quant_ret, std_quantile = perf.mean_return_by_quantile(
212 |         factor_data,
213 |         by_group=False,
214 |         demeaned=long_short,
215 |         group_adjust=group_neutral,
216 |     )
217 | 
218 |     mean_quant_rateret = mean_quant_ret.apply(
219 |         utils.rate_of_return, axis=0, base_period=mean_quant_ret.columns[0]
220 |     )
221 | 
222 |     mean_quant_ret_bydate, std_quant_daily = perf.mean_return_by_quantile(
223 |         factor_data,
224 |         by_date=True,
225 |         by_group=False,
226 |         demeaned=long_short,
227 |         group_adjust=group_neutral,
228 |     )
229 | 
230 |     mean_quant_rateret_bydate = mean_quant_ret_bydate.apply(
231 |         utils.rate_of_return,
232 |         axis=0,
233 |         base_period=mean_quant_ret_bydate.columns[0],
234 |     )
235 | 
236 |     compstd_quant_daily = std_quant_daily.apply(
237 |         utils.std_conversion, axis=0, base_period=std_quant_daily.columns[0]
238 |     )
239 | 
240 |     alpha_beta = perf.factor_alpha_beta(
241 |         factor_data, factor_returns, long_short, group_neutral
242 |     )
243 | 
244 |     mean_ret_spread_quant, std_spread_quant = perf.compute_mean_returns_spread(
245 |         mean_quant_rateret_bydate,
246 |         factor_data["factor_quantile"].max(),
247 |         factor_data["factor_quantile"].min(),
248 |         std_err=compstd_quant_daily,
249 |     )
250 | 
251 |     fr_cols = len(factor_returns.columns)
252 |     vertical_sections = 2 + fr_cols * 3
253 |     gf = GridFigure(rows=vertical_sections, cols=1)
254 | 
255 |     plotting.plot_returns_table(
256 |         alpha_beta, mean_quant_rateret, mean_ret_spread_quant
257 |     )
258 | 
259 |     plotting.plot_quantile_returns_bar(
260 |         mean_quant_rateret,
261 |         by_group=False,
262 |         ylim_percentiles=None,
263 |         ax=gf.next_row(),
264 |     )
265 | 
266 |     plotting.plot_quantile_returns_violin(
267 |         mean_quant_rateret_bydate, ylim_percentiles=(1, 99), ax=gf.next_row()
268 |     )
269 | 
270 |     trading_calendar = factor_data.index.levels[0].freq
271 |     if trading_calendar is None:
272 |         trading_calendar = pd.tseries.offsets.BDay()
273 |         warnings.warn(
274 |             "'freq' not set in factor_data index: assuming business day",
275 |             UserWarning,
276 |         )
277 | 
278 |     # Compute cumulative returns from daily simple returns, if '1D'
279 |     # returns are provided.
280 |     if "1D" in factor_returns:
281 |         title = (
282 |             "Factor Weighted "
283 |             + ("Group Neutral " if group_neutral else "")
284 |             + ("Long/Short " if long_short else "")
285 |             + "Portfolio Cumulative Return (1D Period)"
286 |         )
287 | 
288 |         plotting.plot_cumulative_returns(
289 |             factor_returns["1D"], period="1D", title=title, ax=gf.next_row()
290 |         )
291 | 
292 |         plotting.plot_cumulative_returns_by_quantile(
293 |             mean_quant_ret_bydate["1D"], period="1D", ax=gf.next_row()
294 |         )
295 | 
296 |     ax_mean_quantile_returns_spread_ts = [
297 |         gf.next_row() for x in range(fr_cols)
298 |     ]
299 |     plotting.plot_mean_quantile_returns_spread_time_series(
300 |         mean_ret_spread_quant,
301 |         std_err=std_spread_quant,
302 |         bandwidth=0.5,
303 |         ax=ax_mean_quantile_returns_spread_ts,
304 |     )
305 | 
306 |     plt.show()
307 |     gf.close()
308 | 
309 |     if by_group:
310 |         (
311 |             mean_return_quantile_group,
312 |             mean_return_quantile_group_std_err,
313 |         ) = perf.mean_return_by_quantile(
314 |             factor_data,
315 |             by_date=False,
316 |             by_group=True,
317 |             demeaned=long_short,
318 |             group_adjust=group_neutral,
319 |         )
320 | 
321 |         mean_quant_rateret_group = mean_return_quantile_group.apply(
322 |             utils.rate_of_return,
323 |             axis=0,
324 |             base_period=mean_return_quantile_group.columns[0],
325 |         )
326 | 
327 |         num_groups = len(
328 |             mean_quant_rateret_group.index.get_level_values("group").unique()
329 |         )
330 | 
331 |         vertical_sections = 1 + (((num_groups - 1) // 2) + 1)
332 |         gf = GridFigure(rows=vertical_sections, cols=2)
333 | 
334 |         ax_quantile_returns_bar_by_group = [
335 |             gf.next_cell() for _ in range(num_groups)
336 |         ]
337 |         plotting.plot_quantile_returns_bar(
338 |             mean_quant_rateret_group,
339 |             by_group=True,
340 |             ylim_percentiles=(5, 95),
341 |             ax=ax_quantile_returns_bar_by_group,
342 |         )
343 |         plt.show()
344 |         gf.close()
345 | 
346 | 
347 | @plotting.customize
348 | def create_information_tear_sheet(
349 |     factor_data, group_neutral=False, by_group=False
350 | ):
351 |     """
352 |     Creates a tear sheet for information analysis of a factor.
353 | 
354 |     Parameters
355 |     ----------
356 |     factor_data : pd.DataFrame - MultiIndex
357 |         A MultiIndex DataFrame indexed by date (level 0) and asset (level 1),
358 |         containing the values for a single alpha factor, forward returns for
359 |         each period, the factor quantile/bin that factor value belongs to, and
360 |         (optionally) the group the asset belongs to.
361 |         - See full explanation in utils.get_clean_factor_and_forward_returns
362 |     group_neutral : bool
363 |         Demean forward returns by group before computing IC.
364 |     by_group : bool
365 |         If True, display graphs separately for each group.
366 |     """
367 | 
368 |     ic = perf.factor_information_coefficient(factor_data, group_neutral)
369 | 
370 |     plotting.plot_information_table(ic)
371 | 
372 |     columns_wide = 2
373 |     fr_cols = len(ic.columns)
374 |     rows_when_wide = ((fr_cols - 1) // columns_wide) + 1
375 |     vertical_sections = fr_cols + 3 * rows_when_wide + 2 * fr_cols
376 |     gf = GridFigure(rows=vertical_sections, cols=columns_wide)
377 | 
378 |     ax_ic_ts = [gf.next_row() for _ in range(fr_cols)]
379 |     plotting.plot_ic_ts(ic, ax=ax_ic_ts)
380 | 
381 |     ax_ic_hqq = [gf.next_cell() for _ in range(fr_cols * 2)]
382 |     plotting.plot_ic_hist(ic, ax=ax_ic_hqq[::2])
383 |     plotting.plot_ic_qq(ic, ax=ax_ic_hqq[1::2])
384 | 
385 |     if not by_group:
386 | 
387 |         mean_monthly_ic = perf.mean_information_coefficient(
388 |             factor_data,
389 |             group_adjust=group_neutral,
390 |             by_group=False,
391 |             by_time="M",
392 |         )
393 |         ax_monthly_ic_heatmap = [gf.next_cell() for x in range(fr_cols)]
394 |         plotting.plot_monthly_ic_heatmap(
395 |             mean_monthly_ic, ax=ax_monthly_ic_heatmap
396 |         )
397 | 
398 |     if by_group:
399 |         mean_group_ic = perf.mean_information_coefficient(
400 |             factor_data, group_adjust=group_neutral, by_group=True
401 |         )
402 | 
403 |         plotting.plot_ic_by_group(mean_group_ic, ax=gf.next_row())
404 | 
405 |     plt.show()
406 |     gf.close()
407 | 
408 | 
409 | @plotting.customize
410 | def create_turnover_tear_sheet(factor_data, turnover_periods=None):
411 |     """
412 |     Creates a tear sheet for analyzing the turnover properties of a factor.
413 | 
414 |     Parameters
415 |     ----------
416 |     factor_data : pd.DataFrame - MultiIndex
417 |         A MultiIndex DataFrame indexed by date (level 0) and asset (level 1),
418 |         containing the values for a single alpha factor, forward returns for
419 |         each period, the factor quantile/bin that factor value belongs to, and
420 |         (optionally) the group the asset belongs to.
421 |         - See full explanation in utils.get_clean_factor_and_forward_returns
422 |     turnover_periods : sequence[string], optional
423 |         Periods to compute turnover analysis on. By default periods in
424 |         'factor_data' are used but custom periods can provided instead. This
425 |         can be useful when periods in 'factor_data' are not multiples of the
426 |         frequency at which factor values are computed i.e. the periods
427 |         are 2h and 4h and the factor is computed daily and so values like
428 |         ['1D', '2D'] could be used instead
429 |     """
430 | 
431 |     if turnover_periods is None:
432 |         input_periods = utils.get_forward_returns_columns(
433 |             factor_data.columns, require_exact_day_multiple=True,
434 |         ).to_numpy()
435 |         turnover_periods = utils.timedelta_strings_to_integers(input_periods)
436 |     else:
437 |         turnover_periods = utils.timedelta_strings_to_integers(
438 |             turnover_periods,
439 |         )
440 | 
441 |     quantile_factor = factor_data["factor_quantile"]
442 | 
443 |     quantile_turnover = {
444 |         p: pd.concat(
445 |             [
446 |                 perf.quantile_turnover(quantile_factor, q, p)
447 |                 for q in quantile_factor.sort_values().unique().tolist()
448 |             ],
449 |             axis=1,
450 |         )
451 |         for p in turnover_periods
452 |     }
453 | 
454 |     autocorrelation = pd.concat(
455 |         [
456 |             perf.factor_rank_autocorrelation(factor_data, period)
457 |             for period in turnover_periods
458 |         ],
459 |         axis=1,
460 |     )
461 | 
462 |     plotting.plot_turnover_table(autocorrelation, quantile_turnover)
463 | 
464 |     fr_cols = len(turnover_periods)
465 |     columns_wide = 1
466 |     rows_when_wide = ((fr_cols - 1) // 1) + 1
467 |     vertical_sections = fr_cols + 3 * rows_when_wide + 2 * fr_cols
468 |     gf = GridFigure(rows=vertical_sections, cols=columns_wide)
469 | 
470 |     for period in turnover_periods:
471 |         if quantile_turnover[period].isnull().all().all():
472 |             continue
473 |         plotting.plot_top_bottom_quantile_turnover(
474 |             quantile_turnover[period], period=period, ax=gf.next_row()
475 |         )
476 | 
477 |     for period in autocorrelation:
478 |         if autocorrelation[period].isnull().all():
479 |             continue
480 |         plotting.plot_factor_rank_auto_correlation(
481 |             autocorrelation[period], period=period, ax=gf.next_row()
482 |         )
483 | 
484 |     plt.show()
485 |     gf.close()
486 | 
487 | 
488 | @plotting.customize
489 | def create_full_tear_sheet(factor_data,
490 |                            long_short=True,
491 |                            group_neutral=False,
492 |                            by_group=False):
493 |     """
494 |     Creates a full tear sheet for analysis and evaluating single
495 |     return predicting (alpha) factor.
496 | 
497 |     Parameters
498 |     ----------
499 |     factor_data : pd.DataFrame - MultiIndex
500 |         A MultiIndex DataFrame indexed by date (level 0) and asset (level 1),
501 |         containing the values for a single alpha factor, forward returns for
502 |         each period, the factor quantile/bin that factor value belongs to, and
503 |         (optionally) the group the asset belongs to.
504 |         - See full explanation in utils.get_clean_factor_and_forward_returns
505 |     long_short : bool
506 |         Should this computation happen on a long short portfolio?
507 |         - See tears.create_returns_tear_sheet for details on how this flag
508 |         affects returns analysis
509 |     group_neutral : bool
510 |         Should this computation happen on a group neutral portfolio?
511 |         - See tears.create_returns_tear_sheet for details on how this flag
512 |         affects returns analysis
513 |         - See tears.create_information_tear_sheet for details on how this
514 |         flag affects information analysis
515 |     by_group : bool
516 |         If True, display graphs separately for each group.
517 |     """
518 | 
519 |     plotting.plot_quantile_statistics_table(factor_data)
520 |     create_returns_tear_sheet(
521 |         factor_data, long_short, group_neutral, by_group, set_context=False
522 |     )
523 |     create_information_tear_sheet(
524 |         factor_data, group_neutral, by_group, set_context=False
525 |     )
526 |     create_turnover_tear_sheet(factor_data, set_context=False)
527 | 
528 | 
529 | @plotting.customize
530 | def create_event_returns_tear_sheet(factor_data,
531 |                                     returns,
532 |                                     avgretplot=(5, 15),
533 |                                     long_short=True,
534 |                                     group_neutral=False,
535 |                                     std_bar=True,
536 |                                     by_group=False):
537 |     """
538 |     Creates a tear sheet to view the average cumulative returns for a
539 |     factor within a window (pre and post event).
540 | 
541 |     Parameters
542 |     ----------
543 |     factor_data : pd.DataFrame - MultiIndex
544 |         A MultiIndex Series indexed by date (level 0) and asset (level 1),
545 |         containing the values for a single alpha factor, the factor
546 |         quantile/bin that factor value belongs to and (optionally) the group
547 |         the asset belongs to.
548 |         - See full explanation in utils.get_clean_factor_and_forward_returns
549 |     returns : pd.DataFrame
550 |         A DataFrame indexed by date with assets in the columns containing daily
551 |         returns.
552 |         - See full explanation in utils.get_clean_factor_and_forward_returns
553 |     avgretplot: tuple (int, int) - (before, after)
554 |         If not None, plot quantile average cumulative returns
555 |     long_short : bool
556 |         Should this computation happen on a long short portfolio? if so then
557 |         factor returns will be demeaned across the factor universe
558 |     group_neutral : bool
559 |         Should this computation happen on a group neutral portfolio? if so,
560 |         returns demeaning will occur on the group level.
561 |     std_bar : boolean, optional
562 |         Show plots with standard deviation bars, one for each quantile
563 |     by_group : bool
564 |         If True, display graphs separately for each group.
565 |     """
566 | 
567 |     before, after = avgretplot
568 | 
569 |     avg_cumulative_returns = perf.average_cumulative_return_by_quantile(
570 |         factor_data,
571 |         returns,
572 |         periods_before=before,
573 |         periods_after=after,
574 |         demeaned=long_short,
575 |         group_adjust=group_neutral,
576 |     )
577 | 
578 |     num_quantiles = int(factor_data["factor_quantile"].max())
579 | 
580 |     vertical_sections = 1
581 |     if std_bar:
582 |         vertical_sections += ((num_quantiles - 1) // 2) + 1
583 |     cols = 2 if num_quantiles != 1 else 1
584 |     gf = GridFigure(rows=vertical_sections, cols=cols)
585 |     plotting.plot_quantile_average_cumulative_return(
586 |         avg_cumulative_returns,
587 |         by_quantile=False,
588 |         std_bar=False,
589 |         ax=gf.next_row(),
590 |     )
591 |     if std_bar:
592 |         ax_avg_cumulative_returns_by_q = [
593 |             gf.next_cell() for _ in range(num_quantiles)
594 |         ]
595 |         plotting.plot_quantile_average_cumulative_return(
596 |             avg_cumulative_returns,
597 |             by_quantile=True,
598 |             std_bar=True,
599 |             ax=ax_avg_cumulative_returns_by_q,
600 |         )
601 | 
602 |     plt.show()
603 |     gf.close()
604 | 
605 |     if by_group:
606 |         groups = factor_data["group"].unique()
607 |         num_groups = len(groups)
608 |         vertical_sections = ((num_groups - 1) // 2) + 1
609 |         gf = GridFigure(rows=vertical_sections, cols=2)
610 | 
611 |         avg_cumret_by_group = perf.average_cumulative_return_by_quantile(
612 |             factor_data,
613 |             returns,
614 |             periods_before=before,
615 |             periods_after=after,
616 |             demeaned=long_short,
617 |             group_adjust=group_neutral,
618 |             by_group=True,
619 |         )
620 | 
621 |         for group, avg_cumret in avg_cumret_by_group.groupby(level="group"):
622 |             avg_cumret.index = avg_cumret.index.droplevel("group")
623 |             plotting.plot_quantile_average_cumulative_return(
624 |                 avg_cumret,
625 |                 by_quantile=False,
626 |                 std_bar=False,
627 |                 title=group,
628 |                 ax=gf.next_cell(),
629 |             )
630 | 
631 |         plt.show()
632 |         gf.close()
633 | 
634 | 
635 | @plotting.customize
636 | def create_event_study_tear_sheet(factor_data,
637 |                                   returns,
638 |                                   avgretplot=(5, 15),
639 |                                   rate_of_ret=True,
640 |                                   n_bars=50):
641 |     """
642 |     Creates an event study tear sheet for analysis of a specific event.
643 | 
644 |     Parameters
645 |     ----------
646 |     factor_data : pd.DataFrame - MultiIndex
647 |         A MultiIndex DataFrame indexed by date (level 0) and asset (level 1),
648 |         containing the values for a single event, forward returns for each
649 |         period, the factor quantile/bin that factor value belongs to, and
650 |         (optionally) the group the asset belongs to.
651 |     returns : pd.DataFrame, required only if 'avgretplot' is provided
652 |         A DataFrame indexed by date with assets in the columns containing daily
653 |         returns.
654 |         - See full explanation in utils.get_clean_factor_and_forward_returns
655 |     avgretplot: tuple (int, int) - (before, after), optional
656 |         If not None, plot event style average cumulative returns within a
657 |         window (pre and post event).
658 |     rate_of_ret : bool, optional
659 |         Display rate of return instead of simple return in 'Mean Period Wise
660 |         Return By Factor Quantile' and 'Period Wise Return By Factor Quantile'
661 |         plots
662 |     n_bars : int, optional
663 |         Number of bars in event distribution plot
664 |     """
665 | 
666 |     long_short = False
667 | 
668 |     plotting.plot_quantile_statistics_table(factor_data)
669 | 
670 |     gf = GridFigure(rows=1, cols=1)
671 |     plotting.plot_events_distribution(
672 |         events=factor_data["factor"], num_bars=n_bars, ax=gf.next_row()
673 |     )
674 |     plt.show()
675 |     gf.close()
676 | 
677 |     if returns is not None and avgretplot is not None:
678 | 
679 |         create_event_returns_tear_sheet(
680 |             factor_data=factor_data,
681 |             returns=returns,
682 |             avgretplot=avgretplot,
683 |             long_short=long_short,
684 |             group_neutral=False,
685 |             std_bar=True,
686 |             by_group=False,
687 |         )
688 | 
689 |     factor_returns = perf.factor_returns(
690 |         factor_data, demeaned=False, equal_weight=True
691 |     )
692 | 
693 |     mean_quant_ret, std_quantile = perf.mean_return_by_quantile(
694 |         factor_data, by_group=False, demeaned=long_short
695 |     )
696 |     if rate_of_ret:
697 |         mean_quant_ret = mean_quant_ret.apply(
698 |             utils.rate_of_return, axis=0, base_period=mean_quant_ret.columns[0]
699 |         )
700 | 
701 |     mean_quant_ret_bydate, std_quant_daily = perf.mean_return_by_quantile(
702 |         factor_data, by_date=True, by_group=False, demeaned=long_short
703 |     )
704 |     if rate_of_ret:
705 |         mean_quant_ret_bydate = mean_quant_ret_bydate.apply(
706 |             utils.rate_of_return,
707 |             axis=0,
708 |             base_period=mean_quant_ret_bydate.columns[0],
709 |         )
710 | 
711 |     fr_cols = len(factor_returns.columns)
712 |     vertical_sections = 2 + fr_cols * 1
713 |     gf = GridFigure(rows=vertical_sections + 1, cols=1)
714 | 
715 |     plotting.plot_quantile_returns_bar(
716 |         mean_quant_ret, by_group=False, ylim_percentiles=None, ax=gf.next_row()
717 |     )
718 | 
719 |     plotting.plot_quantile_returns_violin(
720 |         mean_quant_ret_bydate, ylim_percentiles=(1, 99), ax=gf.next_row()
721 |     )
722 | 
723 |     trading_calendar = factor_data.index.levels[0].freq
724 |     if trading_calendar is None:
725 |         trading_calendar = pd.tseries.offsets.BDay()
726 |         warnings.warn(
727 |             "'freq' not set in factor_data index: assuming business day",
728 |             UserWarning,
729 |         )
730 | 
731 |     plt.show()
732 |     gf.close()
733 | 


--------------------------------------------------------------------------------
/alphalens/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ckend/alphalens/4979057c6fbd045a7998dba53388654d45a217ef/alphalens/tests/__init__.py


--------------------------------------------------------------------------------
/alphalens/tests/matplotlibrc:
--------------------------------------------------------------------------------
1 | backend : Agg


--------------------------------------------------------------------------------
/alphalens/tests/test_tears.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright 2017 Quantopian, Inc.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | from __future__ import division
 17 | from unittest import TestCase
 18 | from parameterized import parameterized
 19 | from numpy import nan
 20 | from pandas import (DataFrame, date_range, Timedelta, concat)
 21 | 
 22 | from .. tears import (create_returns_tear_sheet,
 23 |                       create_information_tear_sheet,
 24 |                       create_turnover_tear_sheet,
 25 |                       create_summary_tear_sheet,
 26 |                       create_full_tear_sheet,
 27 |                       create_event_returns_tear_sheet,
 28 |                       create_event_study_tear_sheet)
 29 | 
 30 | from .. utils import get_clean_factor_and_forward_returns
 31 | 
 32 | 
 33 | class TearsTestCase(TestCase):
 34 | 
 35 |     tickers = ['A', 'B', 'C', 'D', 'E', 'F']
 36 | 
 37 |     factor_groups = {'A': 1, 'B': 2, 'C': 1, 'D': 2, 'E': 1, 'F': 2}
 38 | 
 39 |     price_data = [[1.25**i, 1.50**i, 1.00**i, 0.50**i, 1.50**i, 1.00**i]
 40 |                   for i in range(1, 51)]
 41 | 
 42 |     factor_data = [[3, 4, 2, 1, nan, nan], [3, 4, 2, 1, nan, nan],
 43 |                    [3, 4, 2, 1, nan, nan], [3, 4, 2, 1, nan, nan],
 44 |                    [3, 4, 2, 1, nan, nan], [3, 4, 2, 1, nan, nan],
 45 |                    [3, nan, nan, 1, 4, 2], [3, nan, nan, 1, 4, 2],
 46 |                    [3, 4, 2, 1, nan, nan], [3, 4, 2, 1, nan, nan],
 47 |                    [3, nan, nan, 1, 4, 2], [3, nan, nan, 1, 4, 2],
 48 |                    [3, nan, nan, 1, 4, 2], [3, nan, nan, 1, 4, 2],
 49 |                    [3, nan, nan, 1, 4, 2], [3, nan, nan, 1, 4, 2],
 50 |                    [3, nan, nan, 1, 4, 2], [3, nan, nan, 1, 4, 2],
 51 |                    [3, nan, nan, 1, 4, 2], [3, nan, nan, 1, 4, 2],
 52 |                    [3, 4, 2, 1, nan, nan], [3, 4, 2, 1, nan, nan],
 53 |                    [3, 4, 2, 1, nan, nan], [3, 4, 2, 1, nan, nan],
 54 |                    [3, 4, 2, 1, nan, nan], [3, 4, 2, 1, nan, nan],
 55 |                    [3, 4, 2, 1, nan, nan], [3, 4, 2, 1, nan, nan],
 56 |                    [3, nan, nan, 1, 4, 2], [3, nan, nan, 1, 4, 2]]
 57 | 
 58 |     event_data = [[1, nan, nan, nan, nan, nan],
 59 |                   [4, nan, nan, 7, nan, nan],
 60 |                   [nan, nan, nan, nan, nan, nan],
 61 |                   [nan, 3, nan, 2, nan, nan],
 62 |                   [1, nan, nan, nan, nan, nan],
 63 |                   [nan, nan, 2, nan, nan, nan],
 64 |                   [nan, nan, nan, 2, nan, nan],
 65 |                   [nan, nan, nan, 1, nan, nan],
 66 |                   [2, nan, nan, nan, nan, nan],
 67 |                   [nan, nan, nan, nan, 5, nan],
 68 |                   [nan, nan, nan, 2, nan, nan],
 69 |                   [nan, nan, nan, nan, nan, nan],
 70 |                   [2, nan, nan, nan, nan, nan],
 71 |                   [nan, nan, nan, nan, nan, 5],
 72 |                   [nan, nan, nan, 1, nan, nan],
 73 |                   [nan, nan, nan, nan, 4, nan],
 74 |                   [5, nan, nan, 4, nan, nan],
 75 |                   [nan, nan, nan, 3, nan, nan],
 76 |                   [nan, nan, nan, 4, nan, nan],
 77 |                   [nan, nan, 2, nan, nan, nan],
 78 |                   [5, nan, nan, nan, nan, nan],
 79 |                   [nan, 1, nan, nan, nan, nan],
 80 |                   [nan, nan, nan, nan, 4, nan],
 81 |                   [0, nan, nan, nan, nan, nan],
 82 |                   [nan, 5, nan, nan, nan, 4],
 83 |                   [nan, nan, nan, nan, nan, nan],
 84 |                   [nan, nan, 5, nan, nan, 3],
 85 |                   [nan, nan, 1, 2, 3, nan],
 86 |                   [nan, nan, nan, 5, nan, nan],
 87 |                   [nan, nan, 1, nan, 3, nan]]
 88 | 
 89 |     #
 90 |     # business days calendar
 91 |     #
 92 |     bprice_index = date_range(start='2015-1-10', end='2015-3-22', freq='B')
 93 |     bprice_index.name = 'date'
 94 |     bprices = DataFrame(index=bprice_index, columns=tickers, data=price_data)
 95 | 
 96 |     bfactor_index = date_range(start='2015-1-15', end='2015-2-25', freq='B')
 97 |     bfactor_index.name = 'date'
 98 |     bfactor = DataFrame(index=bfactor_index, columns=tickers,
 99 |                         data=factor_data).stack()
100 | 
101 |     #
102 |     # full calendar
103 |     #
104 |     price_index = date_range(start='2015-1-10', end='2015-2-28')
105 |     price_index.name = 'date'
106 |     prices = DataFrame(index=price_index, columns=tickers, data=price_data)
107 | 
108 |     factor_index = date_range(start='2015-1-15', end='2015-2-13')
109 |     factor_index.name = 'date'
110 |     factor = DataFrame(index=factor_index, columns=tickers,
111 |                        data=factor_data).stack()
112 | 
113 |     #
114 |     # intraday factor
115 |     #
116 |     today_open = DataFrame(index=price_index+Timedelta('9h30m'),
117 |                            columns=tickers, data=price_data)
118 |     today_open_1h = DataFrame(index=price_index+Timedelta('10h30m'),
119 |                               columns=tickers, data=price_data)
120 |     today_open_1h += today_open_1h*0.001
121 |     today_open_3h = DataFrame(index=price_index+Timedelta('12h30m'),
122 |                               columns=tickers, data=price_data)
123 |     today_open_3h -= today_open_3h*0.002
124 |     intraday_prices = concat([today_open, today_open_1h, today_open_3h]) \
125 |         .sort_index()
126 | 
127 |     intraday_factor = DataFrame(index=factor_index+Timedelta('9h30m'),
128 |                                 columns=tickers, data=factor_data).stack()
129 | 
130 |     #
131 |     # event factor
132 |     #
133 |     bevent_factor = DataFrame(index=bfactor_index, columns=tickers,
134 |                               data=event_data).stack()
135 | 
136 |     event_factor = DataFrame(index=factor_index, columns=tickers,
137 |                              data=event_data).stack()
138 | 
139 |     all_prices = [prices, bprices]
140 |     all_factors = [factor, bfactor]
141 |     all_events = [event_factor, bevent_factor]
142 | 
143 |     def __localize_prices_and_factor(self, prices, factor, tz):
144 |         if tz is not None:
145 |             factor = factor.unstack()
146 |             factor.index = factor.index.tz_localize(tz)
147 |             factor = factor.stack()
148 |             prices = prices.copy()
149 |             prices.index = prices.index.tz_localize(tz)
150 |         return prices, factor
151 | 
152 |     @parameterized.expand([(2, (1, 5, 10), None),
153 |                            (3, (2, 4, 6), 20)])
154 |     def test_create_returns_tear_sheet(
155 |             self,
156 |             quantiles,
157 |             periods,
158 |             filter_zscore):
159 |         """
160 |         Test no exceptions are thrown
161 |         """
162 |         factor_data = get_clean_factor_and_forward_returns(
163 |             self.factor,
164 |             self.prices,
165 |             quantiles=quantiles,
166 |             periods=periods,
167 |             filter_zscore=filter_zscore)
168 | 
169 |         create_returns_tear_sheet(
170 |             factor_data, long_short=False, group_neutral=False, by_group=False)
171 | 
172 |     @parameterized.expand([(1, (1, 5, 10), None),
173 |                            (4, (1, 2, 3, 7), 20)])
174 |     def test_create_information_tear_sheet(
175 |             self, quantiles, periods, filter_zscore):
176 |         """
177 |         Test no exceptions are thrown
178 |         """
179 |         factor_data = get_clean_factor_and_forward_returns(
180 |             self.factor,
181 |             self.prices,
182 |             quantiles=quantiles,
183 |             periods=periods,
184 |             filter_zscore=filter_zscore)
185 | 
186 |         create_information_tear_sheet(
187 |             factor_data, group_neutral=False, by_group=False)
188 | 
189 |     @parameterized.expand([
190 |         (2, (2, 3, 6), None, 20),
191 |         (4, (1, 2, 3, 7), None, None),
192 |         (2, (2, 3, 6), ['1D', '2D'], 20),
193 |         (4, (1, 2, 3, 7), ['1D'], None),
194 |     ])
195 |     def test_create_turnover_tear_sheet(
196 |             self,
197 |             quantiles,
198 |             periods,
199 |             turnover_periods,
200 |             filter_zscore):
201 |         """
202 |         Test no exceptions are thrown
203 |         """
204 |         factor_data = get_clean_factor_and_forward_returns(
205 |             self.factor,
206 |             self.prices,
207 |             quantiles=quantiles,
208 |             periods=periods,
209 |             filter_zscore=filter_zscore)
210 | 
211 |         create_turnover_tear_sheet(factor_data, turnover_periods)
212 | 
213 |     @parameterized.expand([(2, (1, 5, 10), None),
214 |                            (3, (1, 2, 3, 7), 20)])
215 |     def test_create_summary_tear_sheet(
216 |             self,
217 |             quantiles,
218 |             periods,
219 |             filter_zscore):
220 |         """
221 |         Test no exceptions are thrown
222 |         """
223 |         factor_data = get_clean_factor_and_forward_returns(
224 |             self.factor,
225 |             self.prices,
226 |             quantiles=quantiles,
227 |             periods=periods,
228 |             filter_zscore=filter_zscore)
229 | 
230 |         create_summary_tear_sheet(
231 |             factor_data, long_short=True, group_neutral=False)
232 |         create_summary_tear_sheet(
233 |             factor_data, long_short=False, group_neutral=False)
234 | 
235 |     @parameterized.expand([
236 |         (2, (1, 5, 10), None, None),
237 |         (3, (2, 4, 6), 20, 'US/Eastern'),
238 |         (4, (1, 8), 20, None),
239 |         (4, (1, 2, 3, 7), None, 'US/Eastern'),
240 |     ])
241 |     def test_create_full_tear_sheet(
242 |             self,
243 |             quantiles,
244 |             periods,
245 |             filter_zscore,
246 |             tz):
247 |         """
248 |         Test no exceptions are thrown
249 |         """
250 |         for factor, prices in zip(self.all_factors, self.all_prices):
251 | 
252 |             prices, factor = self.__localize_prices_and_factor(prices,
253 |                                                                factor,
254 |                                                                tz)
255 |             factor_data = get_clean_factor_and_forward_returns(
256 |                 factor,
257 |                 prices,
258 |                 groupby=self.factor_groups,
259 |                 quantiles=quantiles,
260 |                 periods=periods,
261 |                 filter_zscore=filter_zscore)
262 | 
263 |             create_full_tear_sheet(factor_data, long_short=False,
264 |                                    group_neutral=False, by_group=False)
265 |             create_full_tear_sheet(factor_data, long_short=True,
266 |                                    group_neutral=False, by_group=True)
267 |             create_full_tear_sheet(factor_data, long_short=True,
268 |                                    group_neutral=True, by_group=True)
269 | 
270 |     @parameterized.expand([(2, (1, 5, 10), None, None),
271 |                            (3, (2, 4, 6), 20, None),
272 |                            (4, (3, 4), None, 'US/Eastern'),
273 |                            (1, (2, 3, 6, 9), 20, 'US/Eastern')])
274 |     def test_create_event_returns_tear_sheet(
275 |             self, quantiles, periods, filter_zscore, tz):
276 |         """
277 |         Test no exceptions are thrown
278 |         """
279 |         for factor, prices in zip(self.all_factors, self.all_prices):
280 | 
281 |             prices, factor = self.__localize_prices_and_factor(prices,
282 |                                                                factor,
283 |                                                                tz)
284 |             factor_data = get_clean_factor_and_forward_returns(
285 |                 factor,
286 |                 prices,
287 |                 groupby=self.factor_groups,
288 |                 quantiles=quantiles,
289 |                 periods=periods,
290 |                 filter_zscore=filter_zscore)
291 | 
292 |             create_event_returns_tear_sheet(factor_data, prices, avgretplot=(
293 |                 5, 11), long_short=False, group_neutral=False, by_group=False)
294 |             create_event_returns_tear_sheet(factor_data, prices, avgretplot=(
295 |                 5, 11), long_short=True, group_neutral=False, by_group=False)
296 |             create_event_returns_tear_sheet(factor_data, prices, avgretplot=(
297 |                 5, 11), long_short=False, group_neutral=True, by_group=False)
298 |             create_event_returns_tear_sheet(factor_data, prices, avgretplot=(
299 |                 5, 11), long_short=False, group_neutral=False, by_group=True)
300 |             create_event_returns_tear_sheet(factor_data, prices, avgretplot=(
301 |                 5, 11), long_short=True, group_neutral=False, by_group=True)
302 |             create_event_returns_tear_sheet(factor_data, prices, avgretplot=(
303 |                 5, 11), long_short=False, group_neutral=True, by_group=True)
304 | 
305 |     @parameterized.expand([((6, 8), None, None),
306 |                            ((6, 8), None, None),
307 |                            ((6, 3), 20, None),
308 |                            ((6, 3), 20, 'US/Eastern'),
309 |                            ((0, 3), None, None),
310 |                            ((3, 0), 20, 'US/Eastern')])
311 |     def test_create_event_study_tear_sheet(
312 |             self, avgretplot, filter_zscore, tz):
313 |         """
314 |         Test no exceptions are thrown
315 |         """
316 |         for factor, prices in zip(self.all_events, self.all_prices):
317 | 
318 |             prices, factor = self.__localize_prices_and_factor(prices,
319 |                                                                factor,
320 |                                                                tz)
321 |             factor_data = get_clean_factor_and_forward_returns(
322 |                 factor, prices, bins=1, quantiles=None, periods=(
323 |                     1, 2), filter_zscore=filter_zscore)
324 | 
325 |             create_event_study_tear_sheet(
326 |                 factor_data, prices, avgretplot=avgretplot)
327 | 


--------------------------------------------------------------------------------
/alphalens/tests/test_utils.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright 2018 Quantopian, Inc.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | from __future__ import division
 17 | from unittest import TestCase
 18 | from parameterized import parameterized
 19 | from numpy import (nan)
 20 | 
 21 | from pandas import (
 22 |     Series,
 23 |     DataFrame,
 24 |     date_range,
 25 |     MultiIndex,
 26 |     Timedelta,
 27 |     Timestamp,
 28 |     concat,
 29 | )
 30 | from pandas.util.testing import (assert_frame_equal,
 31 |                                  assert_series_equal)
 32 | 
 33 | from .. utils import (get_clean_factor_and_forward_returns,
 34 |                       compute_forward_returns,
 35 |                       quantize_factor)
 36 | 
 37 | 
 38 | class UtilsTestCase(TestCase):
 39 |     dr = date_range(start='2015-1-1', end='2015-1-2')
 40 |     dr.name = 'date'
 41 |     tickers = ['A', 'B', 'C', 'D']
 42 | 
 43 |     factor = DataFrame(index=dr,
 44 |                        columns=tickers,
 45 |                        data=[[1, 2, 3, 4],
 46 |                              [4, 3, 2, 1]]).stack()
 47 |     factor.index = factor.index.set_names(['date', 'asset'])
 48 |     factor.name = 'factor'
 49 |     factor_data = DataFrame()
 50 |     factor_data['factor'] = factor
 51 |     factor_data['group'] = Series(index=factor.index,
 52 |                                   data=[1, 1, 2, 2, 1, 1, 2, 2],
 53 |                                   dtype="category")
 54 | 
 55 |     biased_factor = DataFrame(index=dr,
 56 |                               columns=tickers.extend(['E', 'F', 'G', 'H']),
 57 |                               data=[[-1, 3, -2, 4, -5, 7, -6, 8],
 58 |                                     [-4, 2, -3, 1, -8, 6, -7, 5]]).stack()
 59 |     biased_factor.index = biased_factor.index.set_names(['date', 'asset'])
 60 |     biased_factor.name = 'factor'
 61 |     biased_factor_data = DataFrame()
 62 |     biased_factor_data['factor'] = biased_factor
 63 |     biased_factor_data['group'] = Series(index=biased_factor.index,
 64 |                                          data=[1, 1, 2, 2, 1, 1, 2, 2,
 65 |                                                1, 1, 2, 2, 1, 1, 2, 2],
 66 |                                          dtype="category")
 67 | 
 68 |     def test_compute_forward_returns(self):
 69 |         dr = date_range(start='2015-1-1', end='2015-1-3')
 70 |         prices = DataFrame(index=dr, columns=['A', 'B'],
 71 |                            data=[[1, 1], [1, 2], [2, 1]])
 72 |         factor = prices.stack()
 73 | 
 74 |         fp = compute_forward_returns(factor, prices, periods=[1, 2])
 75 | 
 76 |         ix = MultiIndex.from_product([dr, ['A', 'B']],
 77 |                                      names=['date', 'asset'])
 78 |         expected = DataFrame(index=ix, columns=['1D', '2D'])
 79 |         expected['1D'] = [0., 1., 1., -0.5, nan, nan]
 80 |         expected['2D'] = [1., 0., nan, nan, nan, nan]
 81 | 
 82 |         assert_frame_equal(fp, expected)
 83 | 
 84 |     def test_compute_forward_returns_index_out_of_bound(self):
 85 |         dr = date_range(start='2014-12-29', end='2015-1-3')
 86 |         prices = DataFrame(index=dr, columns=['A', 'B'],
 87 |                            data=[[nan, nan], [nan, nan], [nan, nan],
 88 |                                  [1, 1], [1, 2], [2, 1]])
 89 | 
 90 |         dr = date_range(start='2015-1-1', end='2015-1-3')
 91 |         factor = DataFrame(index=dr, columns=['A', 'B'],
 92 |                            data=[[1, 1], [1, 2], [2, 1]])
 93 |         factor = factor.stack()
 94 | 
 95 |         fp = compute_forward_returns(factor, prices, periods=[1, 2])
 96 | 
 97 |         ix = MultiIndex.from_product([dr, ['A', 'B']],
 98 |                                      names=['date', 'asset'])
 99 |         expected = DataFrame(index=ix, columns=['1D', '2D'])
100 |         expected['1D'] = [0., 1., 1., -0.5, nan, nan]
101 |         expected['2D'] = [1., 0., nan, nan, nan, nan]
102 | 
103 |         assert_frame_equal(fp, expected)
104 | 
105 |     def test_compute_forward_returns_non_cum(self):
106 |         dr = date_range(start='2015-1-1', end='2015-1-3')
107 |         prices = DataFrame(index=dr, columns=['A', 'B'],
108 |                            data=[[1, 1], [1, 2], [2, 1]])
109 |         factor = prices.stack()
110 | 
111 |         fp = compute_forward_returns(factor, prices, periods=[1, 2],
112 |                                      cumulative_returns=False)
113 | 
114 |         ix = MultiIndex.from_product([dr, ['A', 'B']],
115 |                                      names=['date', 'asset'])
116 |         expected = DataFrame(index=ix, columns=['1D', '2D'])
117 |         expected['1D'] = [0., 1., 1., -0.5, nan, nan]
118 |         expected['2D'] = [1., -0.5, nan, nan, nan, nan]
119 | 
120 |         assert_frame_equal(fp, expected)
121 | 
122 |     @parameterized.expand([(factor_data, 4, None, False, False,
123 |                             [1, 2, 3, 4, 4, 3, 2, 1]),
124 |                            (factor_data, 2, None, False, False,
125 |                             [1, 1, 2, 2, 2, 2, 1, 1]),
126 |                            (factor_data, 2, None, True, False,
127 |                             [1, 2, 1, 2, 2, 1, 2, 1]),
128 |                            (biased_factor_data, 4, None, False, True,
129 |                             [2, 3, 2, 3, 1, 4, 1, 4, 2, 3, 2, 3, 1, 4, 1, 4]),
130 |                            (biased_factor_data, 2, None, False, True,
131 |                             [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2]),
132 |                            (biased_factor_data, 2, None, True, True,
133 |                             [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2]),
134 |                            (biased_factor_data, None, 4, False, True,
135 |                             [2, 3, 2, 3, 1, 4, 1, 4, 2, 3, 2, 3, 1, 4, 1, 4]),
136 |                            (biased_factor_data, None, 2, False, True,
137 |                             [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2]),
138 |                            (biased_factor_data, None, 2, True, True,
139 |                             [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2]),
140 |                            (factor_data, [0, .25, .5, .75, 1.], None, False,
141 |                             False, [1, 2, 3, 4, 4, 3, 2, 1]),
142 |                            (factor_data, [0, .5, .75, 1.], None, False, False,
143 |                             [1, 1, 2, 3, 3, 2, 1, 1]),
144 |                            (factor_data, [0, .25, .5, 1.], None, False, False,
145 |                             [1, 2, 3, 3, 3, 3, 2, 1]),
146 |                            (factor_data, [0, .5, 1.], None, False, False,
147 |                             [1, 1, 2, 2, 2, 2, 1, 1]),
148 |                            (factor_data, [.25, .5, .75], None, False, False,
149 |                             [nan, 1, 2, nan, nan, 2, 1, nan]),
150 |                            (factor_data, [0, .5, 1.], None, True, False,
151 |                             [1, 2, 1, 2, 2, 1, 2, 1]),
152 |                            (factor_data, [.5, 1.], None, True, False,
153 |                             [nan, 1, nan, 1, 1, nan, 1, nan]),
154 |                            (factor_data, [0, 1.], None, True, False,
155 |                             [1, 1, 1, 1, 1, 1, 1, 1]),
156 |                            (factor_data, None, 4, False, False,
157 |                             [1, 2, 3, 4, 4, 3, 2, 1]),
158 |                            (factor_data, None, 2, False, False,
159 |                             [1, 1, 2, 2, 2, 2, 1, 1]),
160 |                            (factor_data, None, 3, False, False,
161 |                             [1, 1, 2, 3, 3, 2, 1, 1]),
162 |                            (factor_data, None, 8, False, False,
163 |                             [1, 3, 6, 8, 8, 6, 3, 1]),
164 |                            (factor_data, None, [0, 1, 2, 3, 5], False, False,
165 |                             [1, 2, 3, 4, 4, 3, 2, 1]),
166 |                            (factor_data, None, [1, 2, 3], False, False,
167 |                             [nan, 1, 2, nan, nan, 2, 1, nan]),
168 |                            (factor_data, None, [0, 2, 5], False, False,
169 |                             [1, 1, 2, 2, 2, 2, 1, 1]),
170 |                            (factor_data, None, [0.5, 2.5, 4.5], False, False,
171 |                             [1, 1, 2, 2, 2, 2, 1, 1]),
172 |                            (factor_data, None, [0.5, 2.5], True, False,
173 |                             [1, 1, nan, nan, nan, nan, 1, 1]),
174 |                            (factor_data, None, 2, True, False,
175 |                             [1, 2, 1, 2, 2, 1, 2, 1])])
176 |     def test_quantize_factor(self, factor, quantiles, bins, by_group,
177 |                              zero_aware, expected_vals):
178 |         quantized_factor = quantize_factor(factor,
179 |                                            quantiles=quantiles,
180 |                                            bins=bins,
181 |                                            by_group=by_group,
182 |                                            zero_aware=zero_aware)
183 |         expected = Series(index=factor.index,
184 |                           data=expected_vals,
185 |                           name='factor_quantile').dropna()
186 |         assert_series_equal(quantized_factor, expected)
187 | 
188 |     def test_get_clean_factor_and_forward_returns_1(self):
189 |         """
190 |         Test get_clean_factor_and_forward_returns with a daily factor
191 |         """
192 |         tickers = ['A', 'B', 'C', 'D', 'E', 'F']
193 | 
194 |         factor_groups = {'A': 1, 'B': 2, 'C': 1, 'D': 2, 'E': 1, 'F': 2}
195 | 
196 |         price_data = [[1.10**i, 0.50**i, 3.00**i, 0.90**i, 0.50**i, 1.00**i]
197 |                       for i in range(1, 7)]  # 6 days = 3 + 3 fwd returns
198 | 
199 |         factor_data = [[3, 4, 2, 1, nan, nan],
200 |                        [3, nan, nan, 1, 4, 2],
201 |                        [3, 4, 2, 1, nan, nan]]  # 3 days
202 | 
203 |         start = '2015-1-11'
204 |         factor_end = '2015-1-13'
205 |         price_end = '2015-1-16'  # 3D fwd returns
206 | 
207 |         price_index = date_range(start=start, end=price_end)
208 |         price_index.name = 'date'
209 |         prices = DataFrame(index=price_index, columns=tickers, data=price_data)
210 | 
211 |         factor_index = date_range(start=start, end=factor_end)
212 |         factor_index.name = 'date'
213 |         factor = DataFrame(index=factor_index, columns=tickers,
214 |                            data=factor_data).stack()
215 | 
216 |         factor_data = get_clean_factor_and_forward_returns(
217 |             factor, prices,
218 |             groupby=factor_groups,
219 |             quantiles=4,
220 |             periods=(1, 2, 3))
221 | 
222 |         expected_idx = factor.index.rename(['date', 'asset'])
223 |         expected_cols = ['1D', '2D', '3D',
224 |                          'factor', 'group', 'factor_quantile']
225 |         expected_data = [[0.1,  0.21,  0.331, 3.0, 1, 3],
226 |                          [-0.5, -0.75, -0.875, 4.0, 2, 4],
227 |                          [2.0,  8.00, 26.000, 2.0, 1, 2],
228 |                          [-0.1, -0.19, -0.271, 1.0, 2, 1],
229 |                          [0.1,  0.21,  0.331, 3.0, 1, 3],
230 |                          [-0.1, -0.19, -0.271, 1.0, 2, 1],
231 |                          [-0.5, -0.75, -0.875, 4.0, 1, 4],
232 |                          [0.0,  0.00,  0.000, 2.0, 2, 2],
233 |                          [0.1,  0.21,  0.331, 3.0, 1, 3],
234 |                          [-0.5, -0.75, -0.875, 4.0, 2, 4],
235 |                          [2.0,  8.00, 26.000, 2.0, 1, 2],
236 |                          [-0.1, -0.19, -0.271, 1.0, 2, 1]]
237 |         expected = DataFrame(index=expected_idx,
238 |                              columns=expected_cols, data=expected_data)
239 |         expected['group'] = expected['group'].astype('category')
240 | 
241 |         assert_frame_equal(factor_data, expected)
242 | 
243 |     def test_get_clean_factor_and_forward_returns_2(self):
244 |         """
245 |         Test get_clean_factor_and_forward_returns with a daily factor
246 |         on a business day calendar
247 |         """
248 |         tickers = ['A', 'B', 'C', 'D', 'E', 'F']
249 | 
250 |         factor_groups = {'A': 1, 'B': 2, 'C': 1, 'D': 2, 'E': 1, 'F': 2}
251 | 
252 |         price_data = [[1.10**i, 0.50**i, 3.00**i, 0.90**i, 0.50**i, 1.00**i]
253 |                       for i in range(1, 7)]  # 6 days = 3 + 3 fwd returns
254 | 
255 |         factor_data = [[3, 4, 2, 1, nan, nan],
256 |                        [3, nan, nan, 1, 4, 2],
257 |                        [3, 4, 2, 1, nan, nan]]  # 3 days
258 | 
259 |         start = '2017-1-12'
260 |         factor_end = '2017-1-16'
261 |         price_end = '2017-1-19'  # 3D fwd returns
262 | 
263 |         price_index = date_range(start=start, end=price_end, freq='B')
264 |         price_index.name = 'date'
265 |         prices = DataFrame(index=price_index, columns=tickers, data=price_data)
266 | 
267 |         factor_index = date_range(start=start, end=factor_end, freq='B')
268 |         factor_index.name = 'date'
269 |         factor = DataFrame(index=factor_index, columns=tickers,
270 |                            data=factor_data).stack()
271 | 
272 |         factor_data = get_clean_factor_and_forward_returns(
273 |             factor, prices,
274 |             groupby=factor_groups,
275 |             quantiles=4,
276 |             periods=(1, 2, 3))
277 | 
278 |         expected_idx = factor.index.rename(['date', 'asset'])
279 |         expected_cols = ['1D', '2D', '3D',
280 |                          'factor', 'group', 'factor_quantile']
281 |         expected_data = [[0.1,  0.21,  0.331, 3.0, 1, 3],
282 |                          [-0.5, -0.75, -0.875, 4.0, 2, 4],
283 |                          [2.0,  8.00, 26.000, 2.0, 1, 2],
284 |                          [-0.1, -0.19, -0.271, 1.0, 2, 1],
285 |                          [0.1,  0.21,  0.331, 3.0, 1, 3],
286 |                          [-0.1, -0.19, -0.271, 1.0, 2, 1],
287 |                          [-0.5, -0.75, -0.875, 4.0, 1, 4],
288 |                          [0.0,  0.00,  0.000, 2.0, 2, 2],
289 |                          [0.1,  0.21,  0.331, 3.0, 1, 3],
290 |                          [-0.5, -0.75, -0.875, 4.0, 2, 4],
291 |                          [2.0,  8.00, 26.000, 2.0, 1, 2],
292 |                          [-0.1, -0.19, -0.271, 1.0, 2, 1]]
293 |         expected = DataFrame(index=expected_idx,
294 |                              columns=expected_cols, data=expected_data)
295 |         expected['group'] = expected['group'].astype('category')
296 | 
297 |         assert_frame_equal(factor_data, expected)
298 | 
299 |     def test_get_clean_factor_and_forward_returns_3(self):
300 |         """
301 |         Test get_clean_factor_and_forward_returns with and intraday factor
302 |         """
303 |         tickers = ['A', 'B', 'C', 'D', 'E', 'F']
304 | 
305 |         factor_groups = {'A': 1, 'B': 2, 'C': 1, 'D': 2, 'E': 1, 'F': 2}
306 | 
307 |         price_data = [[1.10**i, 0.50**i, 3.00**i, 0.90**i, 0.50**i, 1.00**i]
308 |                       for i in range(1, 5)]  # 4 days = 3 + 1 fwd returns
309 | 
310 |         factor_data = [[3, 4, 2, 1, nan, nan],
311 |                        [3, nan, nan, 1, 4, 2],
312 |                        [3, 4, 2, 1, nan, nan]]  # 3 days
313 | 
314 |         start = '2017-1-12'
315 |         factor_end = '2017-1-16'
316 |         price_end = '2017-1-17'  # 1D fwd returns
317 | 
318 |         price_index = date_range(start=start, end=price_end, freq='B')
319 |         price_index.name = 'date'
320 |         today_open = DataFrame(index=price_index + Timedelta('9h30m'),
321 |                                columns=tickers, data=price_data)
322 |         today_open_1h = DataFrame(index=price_index + Timedelta('10h30m'),
323 |                                   columns=tickers, data=price_data)
324 |         today_open_1h += today_open_1h * 0.001
325 |         today_open_3h = DataFrame(index=price_index + Timedelta('12h30m'),
326 |                                   columns=tickers, data=price_data)
327 |         today_open_3h -= today_open_3h * 0.002
328 |         prices = concat([today_open, today_open_1h, today_open_3h]) \
329 |             .sort_index()
330 | 
331 |         factor_index = date_range(start=start, end=factor_end, freq='B')
332 |         factor_index.name = 'date'
333 |         factor = DataFrame(index=factor_index + Timedelta('9h30m'),
334 |                            columns=tickers, data=factor_data).stack()
335 | 
336 |         factor_data = get_clean_factor_and_forward_returns(
337 |             factor, prices,
338 |             groupby=factor_groups,
339 |             quantiles=4,
340 |             periods=(1, 2, 3))
341 | 
342 |         expected_idx = factor.index.rename(['date', 'asset'])
343 |         expected_cols = ['1h', '3h', '1D',
344 |                          'factor', 'group', 'factor_quantile']
345 |         expected_data = [[0.001, -0.002, 0.1, 3.0, 1, 3],
346 |                          [0.001, -0.002, -0.5, 4.0, 2, 4],
347 |                          [0.001, -0.002, 2.0, 2.0, 1, 2],
348 |                          [0.001, -0.002, -0.1, 1.0, 2, 1],
349 |                          [0.001, -0.002, 0.1, 3.0, 1, 3],
350 |                          [0.001, -0.002, -0.1, 1.0, 2, 1],
351 |                          [0.001, -0.002, -0.5, 4.0, 1, 4],
352 |                          [0.001, -0.002, 0.0, 2.0, 2, 2],
353 |                          [0.001, -0.002, 0.1, 3.0, 1, 3],
354 |                          [0.001, -0.002, -0.5, 4.0, 2, 4],
355 |                          [0.001, -0.002, 2.0, 2.0, 1, 2],
356 |                          [0.001, -0.002, -0.1, 1.0, 2, 1]]
357 |         expected = DataFrame(index=expected_idx,
358 |                              columns=expected_cols, data=expected_data)
359 |         expected['group'] = expected['group'].astype('category')
360 | 
361 |         assert_frame_equal(factor_data, expected)
362 | 
363 |     def test_get_clean_factor_and_forward_returns_4(self):
364 |         """
365 |         Test get_clean_factor_and_forward_returns on an event
366 |         """
367 |         tickers = ['A', 'B', 'C', 'D', 'E', 'F']
368 | 
369 |         factor_groups = {'A': 1, 'B': 2, 'C': 1, 'D': 2, 'E': 1, 'F': 2}
370 | 
371 |         price_data = [[1.10**i, 0.50**i, 3.00**i, 0.90**i, 0.50**i, 1.00**i]
372 |                       for i in range(1, 9)]
373 | 
374 |         factor_data = [[1, nan, nan, nan, nan, 6],
375 |                        [4, nan, nan, 7, nan, nan],
376 |                        [nan, nan, nan, nan, nan, nan],
377 |                        [nan, 3, nan, 2, nan, nan],
378 |                        [nan, nan, 1, nan, 3, nan]]
379 | 
380 |         price_index = date_range(start='2017-1-12', end='2017-1-23', freq='B')
381 |         price_index.name = 'date'
382 |         prices = DataFrame(index=price_index, columns=tickers, data=price_data)
383 | 
384 |         factor_index = date_range(start='2017-1-12', end='2017-1-18', freq='B')
385 |         factor_index.name = 'date'
386 |         factor = DataFrame(index=factor_index, columns=tickers,
387 |                            data=factor_data).stack()
388 | 
389 |         factor_data = get_clean_factor_and_forward_returns(
390 |             factor, prices,
391 |             groupby=factor_groups,
392 |             quantiles=4,
393 |             periods=(1, 2, 3))
394 | 
395 |         expected_idx = factor.index.rename(['date', 'asset'])
396 |         expected_cols = ['1D', '2D', '3D',
397 |                          'factor', 'group', 'factor_quantile']
398 |         expected_data = [[0.1,  0.21,  0.331, 1.0, 1, 1],
399 |                          [0.0,   0.00,  0.000, 6.0, 2, 4],
400 |                          [0.1,  0.21,  0.331, 4.0, 1, 1],
401 |                          [-0.1, -0.19, -0.271, 7.0, 2, 4],
402 |                          [-0.5, -0.75, -0.875, 3.0, 2, 4],
403 |                          [-0.1, -0.19, -0.271, 2.0, 2, 1],
404 |                          [2.0,  8.00, 26.000, 1.0, 1, 1],
405 |                          [-0.5, -0.75, -0.875, 3.0, 1, 4]]
406 |         expected = DataFrame(index=expected_idx,
407 |                              columns=expected_cols, data=expected_data)
408 |         expected['group'] = expected['group'].astype('category')
409 | 
410 |         assert_frame_equal(factor_data, expected)
411 | 
412 |     def test_get_clean_factor_and_forward_returns_5(self):
413 |         """
414 |         Test get_clean_factor_and_forward_returns with and intraday factor
415 |         and holidays
416 |         """
417 |         tickers = ['A', 'B', 'C', 'D', 'E', 'F']
418 | 
419 |         factor_groups = {'A': 1, 'B': 2, 'C': 1, 'D': 2, 'E': 1, 'F': 2}
420 | 
421 |         price_data = [[1.10**i, 0.50**i, 3.00**i, 0.90**i, 0.50**i, 1.00**i]
422 |                       for i in range(1, 20)]  # 19 days = 18 + 1 fwd returns
423 | 
424 |         factor_data = [[3, 4, 2, 1, nan, nan],
425 |                        [3, nan, nan, 1, 4, 2],
426 |                        [3, 4, 2, 1, nan, nan]] * 6  # 18 days
427 | 
428 |         start = '2017-1-12'
429 |         factor_end = '2017-2-10'
430 |         price_end = '2017-2-13'  # 1D (business day) fwd returns
431 |         holidays = ['2017-1-13', '2017-1-18', '2017-1-30', '2017-2-7']
432 |         holidays = [Timestamp(d) for d in holidays]
433 | 
434 |         price_index = date_range(start=start, end=price_end, freq='B')
435 |         price_index.name = 'date'
436 |         price_index = price_index.drop(holidays)
437 | 
438 |         today_open = DataFrame(index=price_index + Timedelta('9h30m'),
439 |                                columns=tickers, data=price_data)
440 |         today_open_1h = DataFrame(index=price_index + Timedelta('10h30m'),
441 |                                   columns=tickers, data=price_data)
442 |         today_open_1h += today_open_1h * 0.001
443 |         today_open_3h = DataFrame(index=price_index + Timedelta('12h30m'),
444 |                                   columns=tickers, data=price_data)
445 |         today_open_3h -= today_open_3h * 0.002
446 |         prices = concat([today_open, today_open_1h, today_open_3h]) \
447 |             .sort_index()
448 | 
449 |         factor_index = date_range(start=start, end=factor_end, freq='B')
450 |         factor_index.name = 'date'
451 |         factor_index = factor_index.drop(holidays)
452 |         factor = DataFrame(index=factor_index + Timedelta('9h30m'),
453 |                            columns=tickers, data=factor_data).stack()
454 | 
455 |         factor_data = get_clean_factor_and_forward_returns(
456 |             factor, prices,
457 |             groupby=factor_groups,
458 |             quantiles=4,
459 |             periods=(1, 2, 3))
460 | 
461 |         expected_idx = factor.index.rename(['date', 'asset'])
462 |         expected_cols = ['1h', '3h', '1D',
463 |                          'factor', 'group', 'factor_quantile']
464 |         expected_data = [[0.001, -0.002, 0.1, 3.0, 1, 3],
465 |                          [0.001, -0.002, -0.5, 4.0, 2, 4],
466 |                          [0.001, -0.002, 2.0, 2.0, 1, 2],
467 |                          [0.001, -0.002, -0.1, 1.0, 2, 1],
468 |                          [0.001, -0.002, 0.1, 3.0, 1, 3],
469 |                          [0.001, -0.002, -0.1, 1.0, 2, 1],
470 |                          [0.001, -0.002, -0.5, 4.0, 1, 4],
471 |                          [0.001, -0.002, 0.0, 2.0, 2, 2],
472 |                          [0.001, -0.002, 0.1, 3.0, 1, 3],
473 |                          [0.001, -0.002, -0.5, 4.0, 2, 4],
474 |                          [0.001, -0.002, 2.0, 2.0, 1, 2],
475 |                          [0.001, -0.002, -0.1, 1.0, 2, 1]] * 6  # 18  days
476 |         expected = DataFrame(index=expected_idx,
477 |                              columns=expected_cols, data=expected_data)
478 |         expected['group'] = expected['group'].astype('category')
479 | 
480 |         assert_frame_equal(factor_data, expected)
481 | 
482 |         inferred_holidays = factor_data.index.levels[0].freq.holidays
483 |         assert sorted(holidays) == sorted(inferred_holidays)
484 | 
485 |     def test_get_clean_factor_and_forward_returns_6(self):
486 |         """
487 |         Test get_clean_factor_and_forward_returns with a daily factor
488 |         on a business day calendar and holidays
489 |         """
490 |         tickers = ['A', 'B', 'C', 'D', 'E', 'F']
491 | 
492 |         factor_groups = {'A': 1, 'B': 2, 'C': 1, 'D': 2, 'E': 1, 'F': 2}
493 | 
494 |         price_data = [[1.10**i, 0.50**i, 3.00**i, 0.90**i, 0.50**i, 1.00**i]
495 |                       for i in range(1, 22)]  # 21 days = 18 + 3 fwd returns
496 | 
497 |         factor_data = [[3, 4, 2, 1, nan, nan],
498 |                        [3, nan, nan, 1, 4, 2],
499 |                        [3, 4, 2, 1, nan, nan]] * 6  # 18 days
500 | 
501 |         start = '2017-1-12'
502 |         factor_end = '2017-2-10'
503 |         price_end = '2017-2-15'  # 3D (business day) fwd returns
504 |         holidays = ['2017-1-13', '2017-1-18', '2017-1-30', '2017-2-7']
505 |         holidays = [Timestamp(d) for d in holidays]
506 | 
507 |         price_index = date_range(start=start, end=price_end, freq='B')
508 |         price_index.name = 'date'
509 |         price_index = price_index.drop(holidays)
510 |         prices = DataFrame(index=price_index, columns=tickers, data=price_data)
511 | 
512 |         factor_index = date_range(start=start, end=factor_end, freq='B')
513 |         factor_index.name = 'date'
514 |         factor_index = factor_index.drop(holidays)
515 |         factor = DataFrame(index=factor_index, columns=tickers,
516 |                            data=factor_data).stack()
517 | 
518 |         factor_data = get_clean_factor_and_forward_returns(
519 |             factor, prices,
520 |             groupby=factor_groups,
521 |             quantiles=4,
522 |             periods=(1, 2, 3))
523 | 
524 |         expected_idx = factor.index.rename(['date', 'asset'])
525 |         expected_cols = ['1D', '2D', '3D',
526 |                          'factor', 'group', 'factor_quantile']
527 |         expected_data = [[0.1,  0.21,  0.331, 3.0, 1, 3],
528 |                          [-0.5, -0.75, -0.875, 4.0, 2, 4],
529 |                          [2.0,  8.00, 26.000, 2.0, 1, 2],
530 |                          [-0.1, -0.19, -0.271, 1.0, 2, 1],
531 |                          [0.1,  0.21,  0.331, 3.0, 1, 3],
532 |                          [-0.1, -0.19, -0.271, 1.0, 2, 1],
533 |                          [-0.5, -0.75, -0.875, 4.0, 1, 4],
534 |                          [0.0,  0.00,  0.000, 2.0, 2, 2],
535 |                          [0.1,  0.21,  0.331, 3.0, 1, 3],
536 |                          [-0.5, -0.75, -0.875, 4.0, 2, 4],
537 |                          [2.0,  8.00, 26.000, 2.0, 1, 2],
538 |                          [-0.1, -0.19, -0.271, 1.0, 2, 1]] * 6  # 18  days
539 |         expected = DataFrame(index=expected_idx,
540 |                              columns=expected_cols, data=expected_data)
541 |         expected['group'] = expected['group'].astype('category')
542 | 
543 |         assert_frame_equal(factor_data, expected)
544 | 
545 |         inferred_holidays = factor_data.index.levels[0].freq.holidays
546 |         assert sorted(holidays) == sorted(inferred_holidays)
547 | 


--------------------------------------------------------------------------------
/alphalens/utils.py:
--------------------------------------------------------------------------------
   1 | #
   2 | # Copyright 2018 Quantopian, Inc.
   3 | #
   4 | # Licensed under the Apache License, Version 2.0 (the "License");
   5 | # you may not use this file except in compliance with the License.
   6 | # You may obtain a copy of the License at
   7 | #
   8 | #     http://www.apache.org/licenses/LICENSE-2.0
   9 | #
  10 | # Unless required by applicable law or agreed to in writing, software
  11 | # distributed under the License is distributed on an "AS IS" BASIS,
  12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 | # See the License for the specific language governing permissions and
  14 | # limitations under the License.
  15 | 
  16 | import pandas as pd
  17 | import numpy as np
  18 | import re
  19 | import warnings
  20 | 
  21 | from IPython.display import display
  22 | from pandas.tseries.offsets import CustomBusinessDay, Day, BusinessDay
  23 | from scipy.stats import mode
  24 | 
  25 | 
  26 | class NonMatchingTimezoneError(Exception):
  27 |     pass
  28 | 
  29 | 
  30 | class MaxLossExceededError(Exception):
  31 |     pass
  32 | 
  33 | 
  34 | def rethrow(exception, additional_message):
  35 |     """
  36 |     Re-raise the last exception that was active in the current scope
  37 |     without losing the stacktrace but adding an additional message.
  38 |     This is hacky because it has to be compatible with both python 2/3
  39 |     """
  40 |     e = exception
  41 |     m = additional_message
  42 |     if not e.args:
  43 |         e.args = (m,)
  44 |     else:
  45 |         e.args = (e.args[0] + m,) + e.args[1:]
  46 |     raise e
  47 | 
  48 | 
  49 | def non_unique_bin_edges_error(func):
  50 |     """
  51 |     Give user a more informative error in case it is not possible
  52 |     to properly calculate quantiles on the input dataframe (factor)
  53 |     """
  54 |     message = """
  55 | 
  56 |     An error occurred while computing bins/quantiles on the input provided.
  57 |     This usually happens when the input contains too many identical
  58 |     values and they span more than one quantile. The quantiles are choosen
  59 |     to have the same number of records each, but the same value cannot span
  60 |     multiple quantiles. Possible workarounds are:
  61 |     1 - Decrease the number of quantiles
  62 |     2 - Specify a custom quantiles range, e.g. [0, .50, .75, 1.] to get unequal
  63 |         number of records per quantile
  64 |     3 - Use 'bins' option instead of 'quantiles', 'bins' chooses the
  65 |         buckets to be evenly spaced according to the values themselves, while
  66 |         'quantiles' forces the buckets to have the same number of records.
  67 |     4 - for factors with discrete values use the 'bins' option with custom
  68 |         ranges and create a range for each discrete value
  69 |     Please see utils.get_clean_factor_and_forward_returns documentation for
  70 |     full documentation of 'bins' and 'quantiles' options.
  71 | 
  72 | """
  73 | 
  74 |     def dec(*args, **kwargs):
  75 |         try:
  76 |             return func(*args, **kwargs)
  77 |         except ValueError as e:
  78 |             if 'Bin edges must be unique' in str(e):
  79 |                 rethrow(e, message)
  80 |             raise
  81 |     return dec
  82 | 
  83 | 
  84 | @non_unique_bin_edges_error
  85 | def quantize_factor(factor_data,
  86 |                     quantiles=5,
  87 |                     bins=None,
  88 |                     by_group=False,
  89 |                     no_raise=False,
  90 |                     zero_aware=False):
  91 |     """
  92 |     Computes period wise factor quantiles.
  93 | 
  94 |     Parameters
  95 |     ----------
  96 |     factor_data : pd.DataFrame - MultiIndex
  97 |         A MultiIndex DataFrame indexed by date (level 0) and asset (level 1),
  98 |         containing the values for a single alpha factor, forward returns for
  99 |         each period, the factor quantile/bin that factor value belongs to, and
 100 |         (optionally) the group the asset belongs to.
 101 | 
 102 |         - See full explanation in utils.get_clean_factor_and_forward_returns
 103 | 
 104 |     quantiles : int or sequence[float]
 105 |         Number of equal-sized quantile buckets to use in factor bucketing.
 106 |         Alternately sequence of quantiles, allowing non-equal-sized buckets
 107 |         e.g. [0, .10, .5, .90, 1.] or [.05, .5, .95]
 108 |         Only one of 'quantiles' or 'bins' can be not-None
 109 |     bins : int or sequence[float]
 110 |         Number of equal-width (valuewise) bins to use in factor bucketing.
 111 |         Alternately sequence of bin edges allowing for non-uniform bin width
 112 |         e.g. [-4, -2, -0.5, 0, 10]
 113 |         Only one of 'quantiles' or 'bins' can be not-None
 114 |     by_group : bool, optional
 115 |         If True, compute quantile buckets separately for each group.
 116 |     no_raise: bool, optional
 117 |         If True, no exceptions are thrown and the values for which the
 118 |         exception would have been thrown are set to np.NaN
 119 |     zero_aware : bool, optional
 120 |         If True, compute quantile buckets separately for positive and negative
 121 |         signal values. This is useful if your signal is centered and zero is
 122 |         the separation between long and short signals, respectively.
 123 | 
 124 |     Returns
 125 |     -------
 126 |     factor_quantile : pd.Series
 127 |         Factor quantiles indexed by date and asset.
 128 |     """
 129 |     if not ((quantiles is not None and bins is None) or
 130 |             (quantiles is None and bins is not None)):
 131 |         raise ValueError('Either quantiles or bins should be provided')
 132 | 
 133 |     if zero_aware and not (isinstance(quantiles, int)
 134 |                            or isinstance(bins, int)):
 135 |         msg = ("zero_aware should only be True when quantiles or bins is an"
 136 |                " integer")
 137 |         raise ValueError(msg)
 138 | 
 139 |     def quantile_calc(x, _quantiles, _bins, _zero_aware, _no_raise):
 140 |         try:
 141 |             if _quantiles is not None and _bins is None and not _zero_aware:
 142 |                 return pd.qcut(x, _quantiles, labels=False) + 1
 143 |             elif _quantiles is not None and _bins is None and _zero_aware:
 144 |                 pos_quantiles = pd.qcut(x[x >= 0], _quantiles // 2,
 145 |                                         labels=False) + _quantiles // 2 + 1
 146 |                 neg_quantiles = pd.qcut(x[x < 0], _quantiles // 2,
 147 |                                         labels=False) + 1
 148 |                 return pd.concat([pos_quantiles, neg_quantiles]).sort_index()
 149 |             elif _bins is not None and _quantiles is None and not _zero_aware:
 150 |                 return pd.cut(x, _bins, labels=False) + 1
 151 |             elif _bins is not None and _quantiles is None and _zero_aware:
 152 |                 pos_bins = pd.cut(x[x >= 0], _bins // 2,
 153 |                                   labels=False) + _bins // 2 + 1
 154 |                 neg_bins = pd.cut(x[x < 0], _bins // 2,
 155 |                                   labels=False) + 1
 156 |                 return pd.concat([pos_bins, neg_bins]).sort_index()
 157 |         except Exception as e:
 158 |             if _no_raise:
 159 |                 return pd.Series(index=x.index)
 160 |             raise e
 161 | 
 162 |     grouper = [factor_data.index.get_level_values('date')]
 163 |     if by_group:
 164 |         grouper.append('group')
 165 | 
 166 |     factor_quantile = factor_data.groupby(grouper)['factor'] \
 167 |         .apply(quantile_calc, quantiles, bins, zero_aware, no_raise)
 168 |     factor_quantile.name = 'factor_quantile'
 169 | 
 170 |     return factor_quantile.dropna()
 171 | 
 172 | 
 173 | def infer_trading_calendar(factor_idx, prices_idx):
 174 |     """
 175 |     Infer the trading calendar from factor and price information.
 176 | 
 177 |     Parameters
 178 |     ----------
 179 |     factor_idx : pd.DatetimeIndex
 180 |         The factor datetimes for which we are computing the forward returns
 181 |     prices_idx : pd.DatetimeIndex
 182 |         The prices datetimes associated withthe factor data
 183 | 
 184 |     Returns
 185 |     -------
 186 |     calendar : pd.DateOffset
 187 |     """
 188 |     full_idx = factor_idx.union(prices_idx)
 189 | 
 190 |     traded_weekdays = []
 191 |     holidays = []
 192 | 
 193 |     days_of_the_week = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
 194 |     for day, day_str in enumerate(days_of_the_week):
 195 | 
 196 |         weekday_mask = (full_idx.dayofweek == day)
 197 | 
 198 |         # drop days of the week that are not traded at all
 199 |         if not weekday_mask.any():
 200 |             continue
 201 |         traded_weekdays.append(day_str)
 202 | 
 203 |         # look for holidays
 204 |         used_weekdays = full_idx[weekday_mask].normalize()
 205 |         all_weekdays = pd.date_range(full_idx.min(), full_idx.max(),
 206 |                                      freq=CustomBusinessDay(weekmask=day_str)
 207 |                                      ).normalize()
 208 |         _holidays = all_weekdays.difference(used_weekdays)
 209 |         _holidays = [timestamp.date() for timestamp in _holidays]
 210 |         holidays.extend(_holidays)
 211 | 
 212 |     traded_weekdays = ' '.join(traded_weekdays)
 213 |     return CustomBusinessDay(weekmask=traded_weekdays, holidays=holidays)
 214 | 
 215 | 
 216 | def compute_forward_returns(factor,
 217 |                             prices,
 218 |                             periods=(1, 5, 10),
 219 |                             filter_zscore=None,
 220 |                             cumulative_returns=True):
 221 |     """
 222 |     Finds the N period forward returns (as percent change) for each asset
 223 |     provided.
 224 | 
 225 |     Parameters
 226 |     ----------
 227 |     factor : pd.Series - MultiIndex
 228 |         A MultiIndex Series indexed by timestamp (level 0) and asset
 229 |         (level 1), containing the values for a single alpha factor.
 230 | 
 231 |         - See full explanation in utils.get_clean_factor_and_forward_returns
 232 | 
 233 |     prices : pd.DataFrame
 234 |         Pricing data to use in forward price calculation.
 235 |         Assets as columns, dates as index. Pricing data must
 236 |         span the factor analysis time period plus an additional buffer window
 237 |         that is greater than the maximum number of expected periods
 238 |         in the forward returns calculations.
 239 |     periods : sequence[int]
 240 |         periods to compute forward returns on.
 241 |     filter_zscore : int or float, optional
 242 |         Sets forward returns greater than X standard deviations
 243 |         from the the mean to nan. Set it to 'None' to avoid filtering.
 244 |         Caution: this outlier filtering incorporates lookahead bias.
 245 |     cumulative_returns : bool, optional
 246 |         If True, forward returns columns will contain cumulative returns.
 247 |         Setting this to False is useful if you want to analyze how predictive
 248 |         a factor is for a single forward day.
 249 | 
 250 |     Returns
 251 |     -------
 252 |     forward_returns : pd.DataFrame - MultiIndex
 253 |         A MultiIndex DataFrame indexed by timestamp (level 0) and asset
 254 |         (level 1), containing the forward returns for assets.
 255 |         Forward returns column names follow the format accepted by
 256 |         pd.Timedelta (e.g. '1D', '30m', '3h15m', '1D1h', etc).
 257 |         'date' index freq property (forward_returns.index.levels[0].freq)
 258 |         will be set to a trading calendar (pandas DateOffset) inferred
 259 |         from the input data (see infer_trading_calendar for more details).
 260 |     """
 261 | 
 262 |     factor_dateindex = factor.index.levels[0]
 263 |     if factor_dateindex.tz != prices.index.tz:
 264 |         raise NonMatchingTimezoneError("The timezone of 'factor' is not the "
 265 |                                        "same as the timezone of 'prices'. See "
 266 |                                        "the pandas methods tz_localize and "
 267 |                                        "tz_convert.")
 268 | 
 269 |     freq = infer_trading_calendar(factor_dateindex, prices.index)
 270 | 
 271 |     factor_dateindex = factor_dateindex.intersection(prices.index)
 272 | 
 273 |     if len(factor_dateindex) == 0:
 274 |         raise ValueError("Factor and prices indices don't match: make sure "
 275 |                          "they have the same convention in terms of datetimes "
 276 |                          "and symbol-names")
 277 | 
 278 |     # chop prices down to only the assets we care about (= unique assets in
 279 |     # `factor`).  we could modify `prices` in place, but that might confuse
 280 |     # the caller.
 281 |     prices = prices.filter(items=factor.index.levels[1])
 282 | 
 283 |     raw_values_dict = {}
 284 |     column_list = []
 285 | 
 286 |     for period in sorted(periods):
 287 |         if cumulative_returns:
 288 |             returns = prices.pct_change(period)
 289 |         else:
 290 |             returns = prices.pct_change()
 291 | 
 292 |         forward_returns = \
 293 |             returns.shift(-period).reindex(factor_dateindex)
 294 | 
 295 |         if filter_zscore is not None:
 296 |             mask = abs(
 297 |                 forward_returns - forward_returns.mean()
 298 |             ) > (filter_zscore * forward_returns.std())
 299 |             forward_returns[mask] = np.nan
 300 | 
 301 |         #
 302 |         # Find the period length, which will be the column name. We'll test
 303 |         # several entries in order to find out the most likely period length
 304 |         # (in case the user passed inconsinstent data)
 305 |         #
 306 |         days_diffs = []
 307 |         for i in range(30):
 308 |             if i >= len(forward_returns.index):
 309 |                 break
 310 |             p_idx = prices.index.get_loc(forward_returns.index[i])
 311 |             if p_idx is None or p_idx < 0 or (
 312 |                     p_idx + period) >= len(prices.index):
 313 |                 continue
 314 |             start = prices.index[p_idx]
 315 |             end = prices.index[p_idx + period]
 316 |             period_len = diff_custom_calendar_timedeltas(start, end, freq)
 317 |             days_diffs.append(period_len.components.days)
 318 | 
 319 |         delta_days = period_len.components.days - mode(days_diffs).mode[0]
 320 |         period_len -= pd.Timedelta(days=delta_days)
 321 |         label = timedelta_to_string(period_len)
 322 | 
 323 |         column_list.append(label)
 324 | 
 325 |         raw_values_dict[label] = np.concatenate(forward_returns.values)
 326 | 
 327 |     df = pd.DataFrame.from_dict(raw_values_dict)
 328 |     df.set_index(
 329 |         pd.MultiIndex.from_product(
 330 |             [factor_dateindex, prices.columns],
 331 |             names=['date', 'asset']
 332 |         ),
 333 |         inplace=True
 334 |     )
 335 |     df = df.reindex(factor.index)
 336 | 
 337 |     # now set the columns correctly
 338 |     df = df[column_list]
 339 | 
 340 |     df.index.levels[0].freq = freq
 341 |     df.index.set_names(['date', 'asset'], inplace=True)
 342 | 
 343 |     return df
 344 | 
 345 | 
 346 | def backshift_returns_series(series, N):
 347 |     """Shift a multi-indexed series backwards by N observations in
 348 |     the first level.
 349 | 
 350 |     This can be used to convert backward-looking returns into a
 351 |     forward-returns series.
 352 |     """
 353 |     ix = series.index
 354 |     dates, sids = ix.levels
 355 |     date_labels, sid_labels = map(np.array, ix.labels)
 356 | 
 357 |     # Output date labels will contain the all but the last N dates.
 358 |     new_dates = dates[:-N]
 359 | 
 360 |     # Output data will remove the first M rows, where M is the index of the
 361 |     # last record with one of the first N dates.
 362 |     cutoff = date_labels.searchsorted(N)
 363 |     new_date_labels = date_labels[cutoff:] - N
 364 |     new_sid_labels = sid_labels[cutoff:]
 365 |     new_values = series.values[cutoff:]
 366 | 
 367 |     assert new_date_labels[0] == 0
 368 | 
 369 |     new_index = pd.MultiIndex(
 370 |         levels=[new_dates, sids],
 371 |         labels=[new_date_labels, new_sid_labels],
 372 |         sortorder=1,
 373 |         names=ix.names,
 374 |     )
 375 | 
 376 |     return pd.Series(data=new_values, index=new_index)
 377 | 
 378 | 
 379 | def demean_forward_returns(factor_data, grouper=None):
 380 |     """
 381 |     Convert forward returns to returns relative to mean
 382 |     period wise all-universe or group returns.
 383 |     group-wise normalization incorporates the assumption of a
 384 |     group neutral portfolio constraint and thus allows allows the
 385 |     factor to be evaluated across groups.
 386 | 
 387 |     For example, if AAPL 5 period return is 0.1% and mean 5 period
 388 |     return for the Technology stocks in our universe was 0.5% in the
 389 |     same period, the group adjusted 5 period return for AAPL in this
 390 |     period is -0.4%.
 391 | 
 392 |     Parameters
 393 |     ----------
 394 |     factor_data : pd.DataFrame - MultiIndex
 395 |         Forward returns indexed by date and asset.
 396 |         Separate column for each forward return window.
 397 |     grouper : list
 398 |         If True, demean according to group.
 399 | 
 400 |     Returns
 401 |     -------
 402 |     adjusted_forward_returns : pd.DataFrame - MultiIndex
 403 |         DataFrame of the same format as the input, but with each
 404 |         security's returns normalized by group.
 405 |     """
 406 | 
 407 |     factor_data = factor_data.copy()
 408 | 
 409 |     if not grouper:
 410 |         grouper = factor_data.index.get_level_values('date')
 411 | 
 412 |     cols = get_forward_returns_columns(factor_data.columns)
 413 |     factor_data[cols] = factor_data.groupby(grouper)[cols] \
 414 |         .transform(lambda x: x - x.mean())
 415 | 
 416 |     return factor_data
 417 | 
 418 | 
 419 | def print_table(table, name=None, fmt=None):
 420 |     """
 421 |     Pretty print a pandas DataFrame.
 422 | 
 423 |     Uses HTML output if running inside Jupyter Notebook, otherwise
 424 |     formatted text output.
 425 | 
 426 |     Parameters
 427 |     ----------
 428 |     table : pd.Series or pd.DataFrame
 429 |         Table to pretty-print.
 430 |     name : str, optional
 431 |         Table name to display in upper left corner.
 432 |     fmt : str, optional
 433 |         Formatter to use for displaying table elements.
 434 |         E.g. '{0:.2f}%' for displaying 100 as '100.00%'.
 435 |         Restores original setting after displaying.
 436 |     """
 437 |     if isinstance(table, pd.Series):
 438 |         table = pd.DataFrame(table)
 439 | 
 440 |     if isinstance(table, pd.DataFrame):
 441 |         table.columns.name = name
 442 | 
 443 |     prev_option = pd.get_option('display.float_format')
 444 |     if fmt is not None:
 445 |         pd.set_option('display.float_format', lambda x: fmt.format(x))
 446 | 
 447 |     display(table)
 448 | 
 449 |     if fmt is not None:
 450 |         pd.set_option('display.float_format', prev_option)
 451 | 
 452 | 
 453 | def get_clean_factor(factor,
 454 |                      forward_returns,
 455 |                      groupby=None,
 456 |                      binning_by_group=False,
 457 |                      quantiles=5,
 458 |                      bins=None,
 459 |                      groupby_labels=None,
 460 |                      max_loss=0.35,
 461 |                      zero_aware=False):
 462 |     """
 463 |     Formats the factor data, forward return data, and group mappings into a
 464 |     DataFrame that contains aligned MultiIndex indices of timestamp and asset.
 465 |     The returned data will be formatted to be suitable for Alphalens functions.
 466 | 
 467 |     It is safe to skip a call to this function and still make use of Alphalens
 468 |     functionalities as long as the factor data conforms to the format returned
 469 |     from get_clean_factor_and_forward_returns and documented here
 470 | 
 471 |     Parameters
 472 |     ----------
 473 |     factor : pd.Series - MultiIndex
 474 |         A MultiIndex Series indexed by timestamp (level 0) and asset
 475 |         (level 1), containing the values for a single alpha factor.
 476 |         ::
 477 |             -----------------------------------
 478 |                 date    |    asset   |
 479 |             -----------------------------------
 480 |                         |   AAPL     |   0.5
 481 |                         -----------------------
 482 |                         |   BA       |  -1.1
 483 |                         -----------------------
 484 |             2014-01-01  |   CMG      |   1.7
 485 |                         -----------------------
 486 |                         |   DAL      |  -0.1
 487 |                         -----------------------
 488 |                         |   LULU     |   2.7
 489 |                         -----------------------
 490 | 
 491 |     forward_returns : pd.DataFrame - MultiIndex
 492 |         A MultiIndex DataFrame indexed by timestamp (level 0) and asset
 493 |         (level 1), containing the forward returns for assets.
 494 |         Forward returns column names must follow the format accepted by
 495 |         pd.Timedelta (e.g. '1D', '30m', '3h15m', '1D1h', etc).
 496 |         'date' index freq property must be set to a trading calendar
 497 |         (pandas DateOffset), see infer_trading_calendar for more details.
 498 |         This information is currently used only in cumulative returns
 499 |         computation
 500 |         ::
 501 |             ---------------------------------------
 502 |                        |       | 1D  | 5D  | 10D
 503 |             ---------------------------------------
 504 |                 date   | asset |     |     |
 505 |             ---------------------------------------
 506 |                        | AAPL  | 0.09|-0.01|-0.079
 507 |                        ----------------------------
 508 |                        | BA    | 0.02| 0.06| 0.020
 509 |                        ----------------------------
 510 |             2014-01-01 | CMG   | 0.03| 0.09| 0.036
 511 |                        ----------------------------
 512 |                        | DAL   |-0.02|-0.06|-0.029
 513 |                        ----------------------------
 514 |                        | LULU  |-0.03| 0.05|-0.009
 515 |                        ----------------------------
 516 | 
 517 |     groupby : pd.Series - MultiIndex or dict
 518 |         Either A MultiIndex Series indexed by date and asset,
 519 |         containing the period wise group codes for each asset, or
 520 |         a dict of asset to group mappings. If a dict is passed,
 521 |         it is assumed that group mappings are unchanged for the
 522 |         entire time period of the passed factor data.
 523 |     binning_by_group : bool
 524 |         If True, compute quantile buckets separately for each group.
 525 |         This is useful when the factor values range vary considerably
 526 |         across gorups so that it is wise to make the binning group relative.
 527 |         You should probably enable this if the factor is intended
 528 |         to be analyzed for a group neutral portfolio
 529 |     quantiles : int or sequence[float]
 530 |         Number of equal-sized quantile buckets to use in factor bucketing.
 531 |         Alternately sequence of quantiles, allowing non-equal-sized buckets
 532 |         e.g. [0, .10, .5, .90, 1.] or [.05, .5, .95]
 533 |         Only one of 'quantiles' or 'bins' can be not-None
 534 |     bins : int or sequence[float]
 535 |         Number of equal-width (valuewise) bins to use in factor bucketing.
 536 |         Alternately sequence of bin edges allowing for non-uniform bin width
 537 |         e.g. [-4, -2, -0.5, 0, 10]
 538 |         Chooses the buckets to be evenly spaced according to the values
 539 |         themselves. Useful when the factor contains discrete values.
 540 |         Only one of 'quantiles' or 'bins' can be not-None
 541 |     groupby_labels : dict
 542 |         A dictionary keyed by group code with values corresponding
 543 |         to the display name for each group.
 544 |     max_loss : float, optional
 545 |         Maximum percentage (0.00 to 1.00) of factor data dropping allowed,
 546 |         computed comparing the number of items in the input factor index and
 547 |         the number of items in the output DataFrame index.
 548 |         Factor data can be partially dropped due to being flawed itself
 549 |         (e.g. NaNs), not having provided enough price data to compute
 550 |         forward returns for all factor values, or because it is not possible
 551 |         to perform binning.
 552 |         Set max_loss=0 to avoid Exceptions suppression.
 553 |     zero_aware : bool, optional
 554 |         If True, compute quantile buckets separately for positive and negative
 555 |         signal values. This is useful if your signal is centered and zero is
 556 |         the separation between long and short signals, respectively.
 557 |         'quantiles' is None.
 558 | 
 559 |     Returns
 560 |     -------
 561 |     merged_data : pd.DataFrame - MultiIndex
 562 |         A MultiIndex Series indexed by date (level 0) and asset (level 1),
 563 |         containing the values for a single alpha factor, forward returns for
 564 |         each period, the factor quantile/bin that factor value belongs to, and
 565 |         (optionally) the group the asset belongs to.
 566 | 
 567 |         - forward returns column names follow the format accepted by
 568 |           pd.Timedelta (e.g. '1D', '30m', '3h15m', '1D1h', etc)
 569 | 
 570 |         - 'date' index freq property (merged_data.index.levels[0].freq) is the
 571 |           same as that of the input forward returns data. This is currently
 572 |           used only in cumulative returns computation
 573 |         ::
 574 |            -------------------------------------------------------------------
 575 |                       |       | 1D  | 5D  | 10D  |factor|group|factor_quantile
 576 |            -------------------------------------------------------------------
 577 |                date   | asset |     |     |      |      |     |
 578 |            -------------------------------------------------------------------
 579 |                       | AAPL  | 0.09|-0.01|-0.079|  0.5 |  G1 |      3
 580 |                       --------------------------------------------------------
 581 |                       | BA    | 0.02| 0.06| 0.020| -1.1 |  G2 |      5
 582 |                       --------------------------------------------------------
 583 |            2014-01-01 | CMG   | 0.03| 0.09| 0.036|  1.7 |  G2 |      1
 584 |                       --------------------------------------------------------
 585 |                       | DAL   |-0.02|-0.06|-0.029| -0.1 |  G3 |      5
 586 |                       --------------------------------------------------------
 587 |                       | LULU  |-0.03| 0.05|-0.009|  2.7 |  G1 |      2
 588 |                       --------------------------------------------------------
 589 |     """
 590 | 
 591 |     initial_amount = float(len(factor.index))
 592 | 
 593 |     factor_copy = factor.copy()
 594 |     factor_copy.index = factor_copy.index.rename(['date', 'asset'])
 595 |     factor_copy = factor_copy[np.isfinite(factor_copy)]
 596 | 
 597 |     merged_data = forward_returns.copy()
 598 |     merged_data['factor'] = factor_copy
 599 | 
 600 |     if groupby is not None:
 601 |         if isinstance(groupby, dict):
 602 |             diff = set(factor_copy.index.get_level_values(
 603 |                 'asset')) - set(groupby.keys())
 604 |             if len(diff) > 0:
 605 |                 raise KeyError(
 606 |                     "Assets {} not in group mapping".format(
 607 |                         list(diff)))
 608 | 
 609 |             ss = pd.Series(groupby)
 610 |             groupby = pd.Series(index=factor_copy.index,
 611 |                                 data=ss[factor_copy.index.get_level_values(
 612 |                                     'asset')].values)
 613 | 
 614 |         if groupby_labels is not None:
 615 |             diff = set(groupby.values) - set(groupby_labels.keys())
 616 |             if len(diff) > 0:
 617 |                 raise KeyError(
 618 |                     "groups {} not in passed group names".format(
 619 |                         list(diff)))
 620 | 
 621 |             sn = pd.Series(groupby_labels)
 622 |             groupby = pd.Series(index=groupby.index,
 623 |                                 data=sn[groupby.values].values)
 624 | 
 625 |         merged_data['group'] = groupby.astype('category')
 626 | 
 627 |     merged_data = merged_data.dropna()
 628 | 
 629 |     fwdret_amount = float(len(merged_data.index))
 630 | 
 631 |     no_raise = False if max_loss == 0 else True
 632 |     quantile_data = quantize_factor(
 633 |         merged_data,
 634 |         quantiles,
 635 |         bins,
 636 |         binning_by_group,
 637 |         no_raise,
 638 |         zero_aware
 639 |     )
 640 | 
 641 |     merged_data['factor_quantile'] = quantile_data
 642 | 
 643 |     merged_data = merged_data.dropna()
 644 | 
 645 |     binning_amount = float(len(merged_data.index))
 646 | 
 647 |     tot_loss = (initial_amount - binning_amount) / initial_amount
 648 |     fwdret_loss = (initial_amount - fwdret_amount) / initial_amount
 649 |     bin_loss = tot_loss - fwdret_loss
 650 | 
 651 |     print("Dropped %.1f%% entries from factor data: %.1f%% in forward "
 652 |           "returns computation and %.1f%% in binning phase "
 653 |           "(set max_loss=0 to see potentially suppressed Exceptions)." %
 654 |           (tot_loss * 100, fwdret_loss * 100, bin_loss * 100))
 655 | 
 656 |     if tot_loss > max_loss:
 657 |         message = ("max_loss (%.1f%%) exceeded %.1f%%, consider increasing it."
 658 |                    % (max_loss * 100, tot_loss * 100))
 659 |         raise MaxLossExceededError(message)
 660 |     else:
 661 |         print("max_loss is %.1f%%, not exceeded: OK!" % (max_loss * 100))
 662 | 
 663 |     return merged_data
 664 | 
 665 | 
 666 | def get_clean_factor_and_forward_returns(factor,
 667 |                                          prices,
 668 |                                          groupby=None,
 669 |                                          binning_by_group=False,
 670 |                                          quantiles=5,
 671 |                                          bins=None,
 672 |                                          periods=(1, 5, 10),
 673 |                                          filter_zscore=20,
 674 |                                          groupby_labels=None,
 675 |                                          max_loss=0.35,
 676 |                                          zero_aware=False,
 677 |                                          cumulative_returns=True):
 678 |     """
 679 |     Formats the factor data, pricing data, and group mappings into a DataFrame
 680 |     that contains aligned MultiIndex indices of timestamp and asset. The
 681 |     returned data will be formatted to be suitable for Alphalens functions.
 682 | 
 683 |     It is safe to skip a call to this function and still make use of Alphalens
 684 |     functionalities as long as the factor data conforms to the format returned
 685 |     from get_clean_factor_and_forward_returns and documented here
 686 | 
 687 |     Parameters
 688 |     ----------
 689 |     factor : pd.Series - MultiIndex
 690 |         A MultiIndex Series indexed by timestamp (level 0) and asset
 691 |         (level 1), containing the values for a single alpha factor.
 692 |         ::
 693 |             -----------------------------------
 694 |                 date    |    asset   |
 695 |             -----------------------------------
 696 |                         |   AAPL     |   0.5
 697 |                         -----------------------
 698 |                         |   BA       |  -1.1
 699 |                         -----------------------
 700 |             2014-01-01  |   CMG      |   1.7
 701 |                         -----------------------
 702 |                         |   DAL      |  -0.1
 703 |                         -----------------------
 704 |                         |   LULU     |   2.7
 705 |                         -----------------------
 706 | 
 707 |     prices : pd.DataFrame
 708 |         A wide form Pandas DataFrame indexed by timestamp with assets
 709 |         in the columns.
 710 |         Pricing data must span the factor analysis time period plus an
 711 |         additional buffer window that is greater than the maximum number
 712 |         of expected periods in the forward returns calculations.
 713 |         It is important to pass the correct pricing data in depending on
 714 |         what time of period your signal was generated so to avoid lookahead
 715 |         bias, or  delayed calculations.
 716 |         'Prices' must contain at least an entry for each timestamp/asset
 717 |         combination in 'factor'. This entry should reflect the buy price
 718 |         for the assets and usually it is the next available price after the
 719 |         factor is computed but it can also be a later price if the factor is
 720 |         meant to be traded later (e.g. if the factor is computed at market
 721 |         open but traded 1 hour after market open the price information should
 722 |         be 1 hour after market open).
 723 |         'Prices' must also contain entries for timestamps following each
 724 |         timestamp/asset combination in 'factor', as many more timestamps
 725 |         as the maximum value in 'periods'. The asset price after 'period'
 726 |         timestamps will be considered the sell price for that asset when
 727 |         computing 'period' forward returns.
 728 |         ::
 729 |             ----------------------------------------------------
 730 |                         | AAPL |  BA  |  CMG  |  DAL  |  LULU  |
 731 |             ----------------------------------------------------
 732 |                Date     |      |      |       |       |        |
 733 |             ----------------------------------------------------
 734 |             2014-01-01  |605.12| 24.58|  11.72| 54.43 |  37.14 |
 735 |             ----------------------------------------------------
 736 |             2014-01-02  |604.35| 22.23|  12.21| 52.78 |  33.63 |
 737 |             ----------------------------------------------------
 738 |             2014-01-03  |607.94| 21.68|  14.36| 53.94 |  29.37 |
 739 |             ----------------------------------------------------
 740 | 
 741 |     groupby : pd.Series - MultiIndex or dict
 742 |         Either A MultiIndex Series indexed by date and asset,
 743 |         containing the period wise group codes for each asset, or
 744 |         a dict of asset to group mappings. If a dict is passed,
 745 |         it is assumed that group mappings are unchanged for the
 746 |         entire time period of the passed factor data.
 747 |     binning_by_group : bool
 748 |         If True, compute quantile buckets separately for each group.
 749 |         This is useful when the factor values range vary considerably
 750 |         across gorups so that it is wise to make the binning group relative.
 751 |         You should probably enable this if the factor is intended
 752 |         to be analyzed for a group neutral portfolio
 753 |     quantiles : int or sequence[float]
 754 |         Number of equal-sized quantile buckets to use in factor bucketing.
 755 |         Alternately sequence of quantiles, allowing non-equal-sized buckets
 756 |         e.g. [0, .10, .5, .90, 1.] or [.05, .5, .95]
 757 |         Only one of 'quantiles' or 'bins' can be not-None
 758 |     bins : int or sequence[float]
 759 |         Number of equal-width (valuewise) bins to use in factor bucketing.
 760 |         Alternately sequence of bin edges allowing for non-uniform bin width
 761 |         e.g. [-4, -2, -0.5, 0, 10]
 762 |         Chooses the buckets to be evenly spaced according to the values
 763 |         themselves. Useful when the factor contains discrete values.
 764 |         Only one of 'quantiles' or 'bins' can be not-None
 765 |     periods : sequence[int]
 766 |         periods to compute forward returns on.
 767 |     filter_zscore : int or float, optional
 768 |         Sets forward returns greater than X standard deviations
 769 |         from the the mean to nan. Set it to 'None' to avoid filtering.
 770 |         Caution: this outlier filtering incorporates lookahead bias.
 771 |     groupby_labels : dict
 772 |         A dictionary keyed by group code with values corresponding
 773 |         to the display name for each group.
 774 |     max_loss : float, optional
 775 |         Maximum percentage (0.00 to 1.00) of factor data dropping allowed,
 776 |         computed comparing the number of items in the input factor index and
 777 |         the number of items in the output DataFrame index.
 778 |         Factor data can be partially dropped due to being flawed itself
 779 |         (e.g. NaNs), not having provided enough price data to compute
 780 |         forward returns for all factor values, or because it is not possible
 781 |         to perform binning.
 782 |         Set max_loss=0 to avoid Exceptions suppression.
 783 |     zero_aware : bool, optional
 784 |         If True, compute quantile buckets separately for positive and negative
 785 |         signal values. This is useful if your signal is centered and zero is
 786 |         the separation between long and short signals, respectively.
 787 |     cumulative_returns : bool, optional
 788 |         If True, forward returns columns will contain cumulative returns.
 789 |         Setting this to False is useful if you want to analyze how predictive
 790 |         a factor is for a single forward day.
 791 | 
 792 |     Returns
 793 |     -------
 794 |     merged_data : pd.DataFrame - MultiIndex
 795 |         A MultiIndex Series indexed by date (level 0) and asset (level 1),
 796 |         containing the values for a single alpha factor, forward returns for
 797 |         each period, the factor quantile/bin that factor value belongs to, and
 798 |         (optionally) the group the asset belongs to.
 799 |         - forward returns column names follow  the format accepted by
 800 |           pd.Timedelta (e.g. '1D', '30m', '3h15m', '1D1h', etc)
 801 |         - 'date' index freq property (merged_data.index.levels[0].freq) will be
 802 |           set to a trading calendar (pandas DateOffset) inferred from the input
 803 |           data (see infer_trading_calendar for more details). This is currently
 804 |           used only in cumulative returns computation
 805 |         ::
 806 |            -------------------------------------------------------------------
 807 |                       |       | 1D  | 5D  | 10D  |factor|group|factor_quantile
 808 |            -------------------------------------------------------------------
 809 |                date   | asset |     |     |      |      |     |
 810 |            -------------------------------------------------------------------
 811 |                       | AAPL  | 0.09|-0.01|-0.079|  0.5 |  G1 |      3
 812 |                       --------------------------------------------------------
 813 |                       | BA    | 0.02| 0.06| 0.020| -1.1 |  G2 |      5
 814 |                       --------------------------------------------------------
 815 |            2014-01-01 | CMG   | 0.03| 0.09| 0.036|  1.7 |  G2 |      1
 816 |                       --------------------------------------------------------
 817 |                       | DAL   |-0.02|-0.06|-0.029| -0.1 |  G3 |      5
 818 |                       --------------------------------------------------------
 819 |                       | LULU  |-0.03| 0.05|-0.009|  2.7 |  G1 |      2
 820 |                       --------------------------------------------------------
 821 | 
 822 |     See Also
 823 |     --------
 824 |     utils.get_clean_factor
 825 |         For use when forward returns are already available.
 826 |     """
 827 |     forward_returns = compute_forward_returns(
 828 |         factor,
 829 |         prices,
 830 |         periods,
 831 |         filter_zscore,
 832 |         cumulative_returns,
 833 |     )
 834 | 
 835 |     factor_data = get_clean_factor(factor, forward_returns, groupby=groupby,
 836 |                                    groupby_labels=groupby_labels,
 837 |                                    quantiles=quantiles, bins=bins,
 838 |                                    binning_by_group=binning_by_group,
 839 |                                    max_loss=max_loss, zero_aware=zero_aware)
 840 | 
 841 |     return factor_data
 842 | 
 843 | 
 844 | def rate_of_return(period_ret, base_period):
 845 |     """
 846 |     Convert returns to 'one_period_len' rate of returns: that is the value the
 847 |     returns would have every 'one_period_len' if they had grown at a steady
 848 |     rate
 849 | 
 850 |     Parameters
 851 |     ----------
 852 |     period_ret: pd.DataFrame
 853 |         DataFrame containing returns values with column headings representing
 854 |         the return period.
 855 |     base_period: string
 856 |         The base period length used in the conversion
 857 |         It must follow pandas.Timedelta constructor format (e.g. '1 days',
 858 |         '1D', '30m', '3h', '1D1h', etc)
 859 | 
 860 |     Returns
 861 |     -------
 862 |     pd.DataFrame
 863 |         DataFrame in same format as input but with 'one_period_len' rate of
 864 |         returns values.
 865 |     """
 866 |     period_len = period_ret.name
 867 |     conversion_factor = (pd.Timedelta(base_period) /
 868 |                          pd.Timedelta(period_len))
 869 |     return period_ret.add(1).pow(conversion_factor).sub(1)
 870 | 
 871 | 
 872 | def std_conversion(period_std, base_period):
 873 |     """
 874 |     one_period_len standard deviation (or standard error) approximation
 875 | 
 876 |     Parameters
 877 |     ----------
 878 |     period_std: pd.DataFrame
 879 |         DataFrame containing standard deviation or standard error values
 880 |         with column headings representing the return period.
 881 |     base_period: string
 882 |         The base period length used in the conversion
 883 |         It must follow pandas.Timedelta constructor format (e.g. '1 days',
 884 |         '1D', '30m', '3h', '1D1h', etc)
 885 | 
 886 |     Returns
 887 |     -------
 888 |     pd.DataFrame
 889 |         DataFrame in same format as input but with one-period
 890 |         standard deviation/error values.
 891 |     """
 892 |     period_len = period_std.name
 893 |     conversion_factor = (pd.Timedelta(period_len) /
 894 |                          pd.Timedelta(base_period))
 895 |     return period_std / np.sqrt(conversion_factor)
 896 | 
 897 | 
 898 | def get_forward_returns_columns(columns, require_exact_day_multiple=False):
 899 |     """
 900 |     Utility that detects and returns the columns that are forward returns
 901 |     """
 902 | 
 903 |     # If exact day multiples are required in the forward return periods,
 904 |     # drop all other columns (e.g. drop 3D12h).
 905 |     if require_exact_day_multiple:
 906 |         pattern = re.compile(r"^(\d+([D]))+$", re.IGNORECASE)
 907 |         valid_columns = [(pattern.match(col) is not None) for col in columns]
 908 | 
 909 |         if sum(valid_columns) < len(valid_columns):
 910 |             warnings.warn(
 911 |                 "Skipping return periods that aren't exact multiples"
 912 |                 + " of days."
 913 |             )
 914 |     else:
 915 |         pattern = re.compile(r"^(\d+([Dhms]|ms|us|ns]))+$", re.IGNORECASE)
 916 |         valid_columns = [(pattern.match(col) is not None) for col in columns]
 917 | 
 918 |     return columns[valid_columns]
 919 | 
 920 | 
 921 | def timedelta_to_string(timedelta):
 922 |     """
 923 |     Utility that converts a pandas.Timedelta to a string representation
 924 |     compatible with pandas.Timedelta constructor format
 925 | 
 926 |     Parameters
 927 |     ----------
 928 |     timedelta: pd.Timedelta
 929 | 
 930 |     Returns
 931 |     -------
 932 |     string
 933 |         string representation of 'timedelta'
 934 |     """
 935 |     c = timedelta.components
 936 |     format = ''
 937 |     if c.days != 0:
 938 |         format += '%dD' % c.days
 939 |     if c.hours > 0:
 940 |         format += '%dh' % c.hours
 941 |     if c.minutes > 0:
 942 |         format += '%dm' % c.minutes
 943 |     if c.seconds > 0:
 944 |         format += '%ds' % c.seconds
 945 |     if c.milliseconds > 0:
 946 |         format += '%dms' % c.milliseconds
 947 |     if c.microseconds > 0:
 948 |         format += '%dus' % c.microseconds
 949 |     if c.nanoseconds > 0:
 950 |         format += '%dns' % c.nanoseconds
 951 |     return format
 952 | 
 953 | 
 954 | def timedelta_strings_to_integers(sequence):
 955 |     """
 956 |     Converts pandas string representations of timedeltas into integers of days.
 957 | 
 958 |     Parameters
 959 |     ----------
 960 |     sequence : iterable
 961 |         List or array of timedelta string representations, e.g. ['1D', '5D'].
 962 | 
 963 |     Returns
 964 |     -------
 965 |     sequence : list
 966 |         Integer days corresponding to the input sequence, e.g. [1, 5].
 967 |     """
 968 |     return list(map(lambda x: pd.Timedelta(x).days, sequence))
 969 | 
 970 | 
 971 | def add_custom_calendar_timedelta(input, timedelta, freq):
 972 |     """
 973 |     Add timedelta to 'input' taking into consideration custom frequency, which
 974 |     is used to deal with custom calendars, such as a trading calendar
 975 | 
 976 |     Parameters
 977 |     ----------
 978 |     input : pd.DatetimeIndex or pd.Timestamp
 979 |     timedelta : pd.Timedelta
 980 |     freq : pd.DataOffset (CustomBusinessDay, Day or BusinessDay)
 981 | 
 982 |     Returns
 983 |     -------
 984 |     pd.DatetimeIndex or pd.Timestamp
 985 |         input + timedelta
 986 |     """
 987 |     if not isinstance(freq, (Day, BusinessDay, CustomBusinessDay)):
 988 |         raise ValueError("freq must be Day, BDay or CustomBusinessDay")
 989 |     days = timedelta.components.days
 990 |     offset = timedelta - pd.Timedelta(days=days)
 991 |     return input + freq * days + offset
 992 | 
 993 | 
 994 | def diff_custom_calendar_timedeltas(start, end, freq):
 995 |     """
 996 |     Compute the difference between two pd.Timedelta taking into consideration
 997 |     custom frequency, which is used to deal with custom calendars, such as a
 998 |     trading calendar
 999 | 
1000 |     Parameters
1001 |     ----------
1002 |     start : pd.Timestamp
1003 |     end : pd.Timestamp
1004 |     freq : CustomBusinessDay (see infer_trading_calendar)
1005 |     freq : pd.DataOffset (CustomBusinessDay, Day or BDay)
1006 | 
1007 |     Returns
1008 |     -------
1009 |     pd.Timedelta
1010 |         end - start
1011 |     """
1012 |     if not isinstance(freq, (Day, BusinessDay, CustomBusinessDay)):
1013 |         raise ValueError("freq must be Day, BusinessDay or CustomBusinessDay")
1014 | 
1015 |     weekmask = getattr(freq, 'weekmask', None)
1016 |     holidays = getattr(freq, 'holidays', None)
1017 | 
1018 |     if weekmask is None and holidays is None:
1019 |         if isinstance(freq, Day):
1020 |             weekmask = 'Mon Tue Wed Thu Fri Sat Sun'
1021 |             holidays = []
1022 |         elif isinstance(freq, BusinessDay):
1023 |             weekmask = 'Mon Tue Wed Thu Fri'
1024 |             holidays = []
1025 | 
1026 |     if weekmask is not None and holidays is not None:
1027 |         # we prefer this method as it is faster
1028 |         actual_days = np.busday_count(np.array(start).astype('datetime64[D]'),
1029 |                                       np.array(end).astype('datetime64[D]'),
1030 |                                       weekmask, holidays)
1031 |     else:
1032 |         # default, it is slow
1033 |         actual_days = pd.date_range(start, end, freq=freq).shape[0] - 1
1034 |         if not freq.onOffset(start):
1035 |             actual_days -= 1
1036 | 
1037 |     timediff = end - start
1038 |     delta_days = timediff.components.days - actual_days
1039 |     return timediff - pd.Timedelta(days=delta_days)
1040 | 


--------------------------------------------------------------------------------
/build_and_deploy_docs.sh:
--------------------------------------------------------------------------------
1 | pushd docs
2 | make html
3 | ghp-import -n -p build/html/
4 | popd
5 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 21 | 
 22 | .PHONY: help
 23 | help:
 24 | 	@echo "Please use \`make <target>' where <target> is one of"
 25 | 	@echo "  html       to make standalone HTML files"
 26 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 27 | 	@echo "  singlehtml to make a single large HTML file"
 28 | 	@echo "  pickle     to make pickle files"
 29 | 	@echo "  json       to make JSON files"
 30 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 31 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 32 | 	@echo "  applehelp  to make an Apple Help Book"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 36 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 37 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 38 | 	@echo "  text       to make text files"
 39 | 	@echo "  man        to make manual pages"
 40 | 	@echo "  texinfo    to make Texinfo files"
 41 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 42 | 	@echo "  gettext    to make PO message catalogs"
 43 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 44 | 	@echo "  xml        to make Docutils-native XML files"
 45 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 46 | 	@echo "  linkcheck  to check all external links for integrity"
 47 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 48 | 	@echo "  coverage   to run coverage check of the documentation (if enabled)"
 49 | 
 50 | .PHONY: clean
 51 | clean:
 52 | 	rm -rf $(BUILDDIR)/*
 53 | 
 54 | .PHONY: html
 55 | html:
 56 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 57 | 	@echo
 58 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 59 | 
 60 | .PHONY: dirhtml
 61 | dirhtml:
 62 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 63 | 	@echo
 64 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 65 | 
 66 | .PHONY: singlehtml
 67 | singlehtml:
 68 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 69 | 	@echo
 70 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 71 | 
 72 | .PHONY: pickle
 73 | pickle:
 74 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 75 | 	@echo
 76 | 	@echo "Build finished; now you can process the pickle files."
 77 | 
 78 | .PHONY: json
 79 | json:
 80 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 81 | 	@echo
 82 | 	@echo "Build finished; now you can process the JSON files."
 83 | 
 84 | .PHONY: htmlhelp
 85 | htmlhelp:
 86 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 87 | 	@echo
 88 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 89 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 90 | 
 91 | .PHONY: qthelp
 92 | qthelp:
 93 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 94 | 	@echo
 95 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 96 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 97 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Qfactor.qhcp"
 98 | 	@echo "To view the help file:"
 99 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Qfactor.qhc"
100 | 
101 | .PHONY: applehelp
102 | applehelp:
103 | 	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
104 | 	@echo
105 | 	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
106 | 	@echo "N.B. You won't be able to view it unless you put it in" \
107 | 	      "~/Library/Documentation/Help or install it in your application" \
108 | 	      "bundle."
109 | 
110 | .PHONY: devhelp
111 | devhelp:
112 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
113 | 	@echo
114 | 	@echo "Build finished."
115 | 	@echo "To view the help file:"
116 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/Qfactor"
117 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Qfactor"
118 | 	@echo "# devhelp"
119 | 
120 | .PHONY: epub
121 | epub:
122 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
123 | 	@echo
124 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
125 | 
126 | .PHONY: latex
127 | latex:
128 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
129 | 	@echo
130 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
131 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
132 | 	      "(use \`make latexpdf' here to do that automatically)."
133 | 
134 | .PHONY: latexpdf
135 | latexpdf:
136 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
137 | 	@echo "Running LaTeX files through pdflatex..."
138 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
139 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
140 | 
141 | .PHONY: latexpdfja
142 | latexpdfja:
143 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
144 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
145 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
146 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
147 | 
148 | .PHONY: text
149 | text:
150 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
151 | 	@echo
152 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
153 | 
154 | .PHONY: man
155 | man:
156 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
157 | 	@echo
158 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
159 | 
160 | .PHONY: texinfo
161 | texinfo:
162 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
163 | 	@echo
164 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
165 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
166 | 	      "(use \`make info' here to do that automatically)."
167 | 
168 | .PHONY: info
169 | info:
170 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
171 | 	@echo "Running Texinfo files through makeinfo..."
172 | 	make -C $(BUILDDIR)/texinfo info
173 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
174 | 
175 | .PHONY: gettext
176 | gettext:
177 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
178 | 	@echo
179 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
180 | 
181 | .PHONY: changes
182 | changes:
183 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
184 | 	@echo
185 | 	@echo "The overview file is in $(BUILDDIR)/changes."
186 | 
187 | .PHONY: linkcheck
188 | linkcheck:
189 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
190 | 	@echo
191 | 	@echo "Link check complete; look for any errors in the above output " \
192 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
193 | 
194 | .PHONY: doctest
195 | doctest:
196 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
197 | 	@echo "Testing of doctests in the sources finished, look at the " \
198 | 	      "results in $(BUILDDIR)/doctest/output.txt."
199 | 
200 | .PHONY: coverage
201 | coverage:
202 | 	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
203 | 	@echo "Testing of coverage in the sources finished, look at the " \
204 | 	      "results in $(BUILDDIR)/coverage/python.txt."
205 | 
206 | .PHONY: xml
207 | xml:
208 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
209 | 	@echo
210 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
211 | 
212 | .PHONY: pseudoxml
213 | pseudoxml:
214 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
215 | 	@echo
216 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
217 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% source
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  xml        to make Docutils-native XML files
 37 | 	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
 38 | 	echo.  linkcheck  to check all external links for integrity
 39 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 40 | 	echo.  coverage   to run coverage check of the documentation if enabled
 41 | 	goto end
 42 | )
 43 | 
 44 | if "%1" == "clean" (
 45 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 46 | 	del /q /s %BUILDDIR%\*
 47 | 	goto end
 48 | )
 49 | 
 50 | 
 51 | REM Check if sphinx-build is available and fallback to Python version if any
 52 | %SPHINXBUILD% 1>NUL 2>NUL
 53 | if errorlevel 9009 goto sphinx_python
 54 | goto sphinx_ok
 55 | 
 56 | :sphinx_python
 57 | 
 58 | set SPHINXBUILD=python -m sphinx.__init__
 59 | %SPHINXBUILD% 2> nul
 60 | if errorlevel 9009 (
 61 | 	echo.
 62 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 63 | 	echo.installed, then set the SPHINXBUILD environment variable to point
 64 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 65 | 	echo.may add the Sphinx directory to PATH.
 66 | 	echo.
 67 | 	echo.If you don't have Sphinx installed, grab it from
 68 | 	echo.http://sphinx-doc.org/
 69 | 	exit /b 1
 70 | )
 71 | 
 72 | :sphinx_ok
 73 | 
 74 | 
 75 | if "%1" == "html" (
 76 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 77 | 	if errorlevel 1 exit /b 1
 78 | 	echo.
 79 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 80 | 	goto end
 81 | )
 82 | 
 83 | if "%1" == "dirhtml" (
 84 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 85 | 	if errorlevel 1 exit /b 1
 86 | 	echo.
 87 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 88 | 	goto end
 89 | )
 90 | 
 91 | if "%1" == "singlehtml" (
 92 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 93 | 	if errorlevel 1 exit /b 1
 94 | 	echo.
 95 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 96 | 	goto end
 97 | )
 98 | 
 99 | if "%1" == "pickle" (
100 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
101 | 	if errorlevel 1 exit /b 1
102 | 	echo.
103 | 	echo.Build finished; now you can process the pickle files.
104 | 	goto end
105 | )
106 | 
107 | if "%1" == "json" (
108 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
109 | 	if errorlevel 1 exit /b 1
110 | 	echo.
111 | 	echo.Build finished; now you can process the JSON files.
112 | 	goto end
113 | )
114 | 
115 | if "%1" == "htmlhelp" (
116 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
117 | 	if errorlevel 1 exit /b 1
118 | 	echo.
119 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
120 | .hhp project file in %BUILDDIR%/htmlhelp.
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "qthelp" (
125 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
129 | .qhcp project file in %BUILDDIR%/qthelp, like this:
130 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Qfactor.qhcp
131 | 	echo.To view the help file:
132 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Qfactor.ghc
133 | 	goto end
134 | )
135 | 
136 | if "%1" == "devhelp" (
137 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
138 | 	if errorlevel 1 exit /b 1
139 | 	echo.
140 | 	echo.Build finished.
141 | 	goto end
142 | )
143 | 
144 | if "%1" == "epub" (
145 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
146 | 	if errorlevel 1 exit /b 1
147 | 	echo.
148 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
149 | 	goto end
150 | )
151 | 
152 | if "%1" == "latex" (
153 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
154 | 	if errorlevel 1 exit /b 1
155 | 	echo.
156 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
157 | 	goto end
158 | )
159 | 
160 | if "%1" == "latexpdf" (
161 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
162 | 	cd %BUILDDIR%/latex
163 | 	make all-pdf
164 | 	cd %~dp0
165 | 	echo.
166 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
167 | 	goto end
168 | )
169 | 
170 | if "%1" == "latexpdfja" (
171 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
172 | 	cd %BUILDDIR%/latex
173 | 	make all-pdf-ja
174 | 	cd %~dp0
175 | 	echo.
176 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
177 | 	goto end
178 | )
179 | 
180 | if "%1" == "text" (
181 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
182 | 	if errorlevel 1 exit /b 1
183 | 	echo.
184 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
185 | 	goto end
186 | )
187 | 
188 | if "%1" == "man" (
189 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
190 | 	if errorlevel 1 exit /b 1
191 | 	echo.
192 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
193 | 	goto end
194 | )
195 | 
196 | if "%1" == "texinfo" (
197 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
198 | 	if errorlevel 1 exit /b 1
199 | 	echo.
200 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
201 | 	goto end
202 | )
203 | 
204 | if "%1" == "gettext" (
205 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
206 | 	if errorlevel 1 exit /b 1
207 | 	echo.
208 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
209 | 	goto end
210 | )
211 | 
212 | if "%1" == "changes" (
213 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
214 | 	if errorlevel 1 exit /b 1
215 | 	echo.
216 | 	echo.The overview file is in %BUILDDIR%/changes.
217 | 	goto end
218 | )
219 | 
220 | if "%1" == "linkcheck" (
221 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
222 | 	if errorlevel 1 exit /b 1
223 | 	echo.
224 | 	echo.Link check complete; look for any errors in the above output ^
225 | or in %BUILDDIR%/linkcheck/output.txt.
226 | 	goto end
227 | )
228 | 
229 | if "%1" == "doctest" (
230 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
231 | 	if errorlevel 1 exit /b 1
232 | 	echo.
233 | 	echo.Testing of doctests in the sources finished, look at the ^
234 | results in %BUILDDIR%/doctest/output.txt.
235 | 	goto end
236 | )
237 | 
238 | if "%1" == "coverage" (
239 | 	%SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
240 | 	if errorlevel 1 exit /b 1
241 | 	echo.
242 | 	echo.Testing of coverage in the sources finished, look at the ^
243 | results in %BUILDDIR%/coverage/python.txt.
244 | 	goto end
245 | )
246 | 
247 | if "%1" == "xml" (
248 | 	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
249 | 	if errorlevel 1 exit /b 1
250 | 	echo.
251 | 	echo.Build finished. The XML files are in %BUILDDIR%/xml.
252 | 	goto end
253 | )
254 | 
255 | if "%1" == "pseudoxml" (
256 | 	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
257 | 	if errorlevel 1 exit /b 1
258 | 	echo.
259 | 	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
260 | 	goto end
261 | )
262 | 
263 | :end
264 | 


--------------------------------------------------------------------------------
/docs/source/alphalens.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Alphalens
 3 | =========
 4 | 
 5 | Tear Sheets
 6 | -----------
 7 | 
 8 | .. automodule:: alphalens.tears
 9 |     :members:
10 |     :undoc-members:
11 |     :show-inheritance:
12 | 
13 | Performance
14 | -----------
15 | 
16 | .. automodule:: alphalens.performance
17 |     :members:
18 |     :undoc-members:
19 |     :show-inheritance:
20 | 
21 | Plotting
22 | --------
23 | 
24 | .. automodule:: alphalens.plotting
25 |     :members:
26 |     :undoc-members:
27 |     :show-inheritance:
28 | 
29 | Utilities
30 | ---------
31 | 
32 | .. automodule:: alphalens.utils
33 |     :members:
34 |     :undoc-members:
35 |     :show-inheritance:
36 | 


--------------------------------------------------------------------------------
/docs/source/alphalens.tests.rst:
--------------------------------------------------------------------------------
 1 | alphalens.tests package
 2 | =====================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | alphalens.tests.test_performance module
 8 | -------------------------------------
 9 | 
10 | .. automodule:: alphalens.tests.test_performance
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | alphalens.tests.test_utils module
16 | -------------------------------
17 | 
18 | .. automodule:: alphalens.tests.test_utils
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | 
24 | Module contents
25 | ---------------
26 | 
27 | .. automodule:: alphalens.tests
28 |     :members:
29 |     :undoc-members:
30 |     :show-inheritance:
31 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Alphalens documentation build configuration file, created by
  4 | # sphinx-quickstart on Tue Jul  5 15:45:48 2016.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | import sys
 16 | import os
 17 | 
 18 | from alphalens import __version__ as version
 19 | 
 20 | # If extensions (or modules to document with autodoc) are in another directory,
 21 | # add these directories to sys.path here. If the directory is relative to the
 22 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 23 | sys.path.insert(0, os.path.abspath('../..'))
 24 | 
 25 | # -- General configuration ------------------------------------------------
 26 | 
 27 | # If your documentation needs a minimal Sphinx version, state it here.
 28 | #needs_sphinx = '1.0'
 29 | 
 30 | # Add any Sphinx extension module names here, as strings. They can be
 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 32 | # ones.
 33 | extensions = [
 34 |     'sphinx.ext.autodoc',
 35 |     'numpydoc'
 36 | ]
 37 | 
 38 | # Add any paths that contain templates here, relative to this directory.
 39 | templates_path = ['_templates']
 40 | 
 41 | # The suffix(es) of source filenames.
 42 | # You can specify multiple suffix as a list of string:
 43 | # source_suffix = ['.rst', '.md']
 44 | source_suffix = '.rst'
 45 | 
 46 | # The encoding of source files.
 47 | #source_encoding = 'utf-8-sig'
 48 | 
 49 | # The master toctree document.
 50 | master_doc = 'index'
 51 | 
 52 | # General information about the project.
 53 | project = u'Alphalens'
 54 | copyright = u'2016, Quantopian, Inc.'
 55 | author = u'Quantopian, Inc.'
 56 | 
 57 | # The full version, including alpha/beta/rc tags.
 58 | release = version
 59 | 
 60 | # The language for content autogenerated by Sphinx. Refer to documentation
 61 | # for a list of supported languages.
 62 | #
 63 | # This is also used if you do content translation via gettext catalogs.
 64 | # Usually you set "language" from the command line for these cases.
 65 | language = None
 66 | 
 67 | # There are two options for replacing |today|: either, you set today to some
 68 | # non-false value, then it is used:
 69 | #today = ''
 70 | # Else, today_fmt is used as the format for a strftime call.
 71 | #today_fmt = '%B %d, %Y'
 72 | 
 73 | # List of patterns, relative to source directory, that match files and
 74 | # directories to ignore when looking for source files.
 75 | exclude_patterns = []
 76 | 
 77 | # The reST default role (used for this markup: `text`) to use for all
 78 | # documents.
 79 | #default_role = None
 80 | 
 81 | # If true, '()' will be appended to :func: etc. cross-reference text.
 82 | #add_function_parentheses = True
 83 | 
 84 | # If true, the current module name will be prepended to all description
 85 | # unit titles (such as .. function::).
 86 | #add_module_names = True
 87 | 
 88 | # If true, sectionauthor and moduleauthor directives will be shown in the
 89 | # output. They are ignored by default.
 90 | #show_authors = False
 91 | 
 92 | # The name of the Pygments (syntax highlighting) style to use.
 93 | pygments_style = 'sphinx'
 94 | 
 95 | # A list of ignored prefixes for module index sorting.
 96 | #modindex_common_prefix = []
 97 | 
 98 | # If true, keep warnings as "system message" paragraphs in the built documents.
 99 | #keep_warnings = False
100 | 
101 | # If true, `todo` and `todoList` produce output, else they produce nothing.
102 | todo_include_todos = False
103 | 
104 | 
105 | # -- Options for HTML output ----------------------------------------------
106 | 
107 | # The theme to use for HTML and HTML Help pages.  See the documentation for
108 | # a list of builtin themes.
109 | html_theme = 'sphinx_rtd_theme'
110 | 
111 | # Theme options are theme-specific and customize the look and feel of a theme
112 | # further.  For a list of options available for each theme, see the
113 | # documentation.
114 | #html_theme_options = {}
115 | 
116 | # Add any paths that contain custom themes here, relative to this directory.
117 | #html_theme_path = []
118 | 
119 | # The name for this set of Sphinx documents.  If None, it defaults to
120 | # "<project> v<release> documentation".
121 | #html_title = None
122 | 
123 | # A shorter title for the navigation bar.  Default is the same as html_title.
124 | #html_short_title = None
125 | 
126 | # The name of an image file (relative to this directory) to place at the top
127 | # of the sidebar.
128 | #html_logo = None
129 | 
130 | # The name of an image file (within the static path) to use as favicon of the
131 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
132 | # pixels large.
133 | #html_favicon = None
134 | 
135 | # Add any paths that contain custom static files (such as style sheets) here,
136 | # relative to this directory. They are copied after the builtin static files,
137 | # so a file named "default.css" will overwrite the builtin "default.css".
138 | html_static_path = ['_static']
139 | 
140 | # Add any extra paths that contain custom files (such as robots.txt or
141 | # .htaccess) here, relative to this directory. These files are copied
142 | # directly to the root of the documentation.
143 | #html_extra_path = []
144 | 
145 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
146 | # using the given strftime format.
147 | #html_last_updated_fmt = '%b %d, %Y'
148 | 
149 | # If true, SmartyPants will be used to convert quotes and dashes to
150 | # typographically correct entities.
151 | #html_use_smartypants = True
152 | 
153 | # Custom sidebar templates, maps document names to template names.
154 | #html_sidebars = {}
155 | 
156 | # Additional templates that should be rendered to pages, maps page names to
157 | # template names.
158 | #html_additional_pages = {}
159 | 
160 | # If false, no module index is generated.
161 | #html_domain_indices = True
162 | 
163 | # If false, no index is generated.
164 | #html_use_index = True
165 | 
166 | # If true, the index is split into individual pages for each letter.
167 | #html_split_index = False
168 | 
169 | # If true, links to the reST sources are added to the pages.
170 | #html_show_sourcelink = True
171 | 
172 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
173 | #html_show_sphinx = True
174 | 
175 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
176 | #html_show_copyright = True
177 | 
178 | # If true, an OpenSearch description file will be output, and all pages will
179 | # contain a <link> tag referring to it.  The value of this option must be the
180 | # base URL from which the finished HTML is served.
181 | #html_use_opensearch = ''
182 | 
183 | # This is the file name suffix for HTML files (e.g. ".xhtml").
184 | #html_file_suffix = None
185 | 
186 | # Language to be used for generating the HTML full-text search index.
187 | # Sphinx supports the following languages:
188 | #   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
189 | #   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr'
190 | #html_search_language = 'en'
191 | 
192 | # A dictionary with options for the search language support, empty by default.
193 | # Now only 'ja' uses this config value
194 | #html_search_options = {'type': 'default'}
195 | 
196 | # The name of a javascript file (relative to the configuration directory) that
197 | # implements a search results scorer. If empty, the default will be used.
198 | #html_search_scorer = 'scorer.js'
199 | 
200 | # Output file base name for HTML help builder.
201 | htmlhelp_basename = 'Alphalensdoc'
202 | 
203 | # -- Options for LaTeX output ---------------------------------------------
204 | 
205 | latex_elements = {
206 | # The paper size ('letterpaper' or 'a4paper').
207 | #'papersize': 'letterpaper',
208 | 
209 | # The font size ('10pt', '11pt' or '12pt').
210 | #'pointsize': '10pt',
211 | 
212 | # Additional stuff for the LaTeX preamble.
213 | #'preamble': '',
214 | 
215 | # Latex figure (float) alignment
216 | #'figure_align': 'htbp',
217 | }
218 | 
219 | # Grouping the document tree into LaTeX files. List of tuples
220 | # (source start file, target name, title,
221 | #  author, documentclass [howto, manual, or own class]).
222 | latex_documents = [
223 |     (master_doc, 'Alphalens.tex', u'Alphalens Documentation',
224 |      u'Quantopian, Inc.', 'manual'),
225 | ]
226 | 
227 | # The name of an image file (relative to this directory) to place at the top of
228 | # the title page.
229 | #latex_logo = None
230 | 
231 | # For "manual" documents, if this is true, then toplevel headings are parts,
232 | # not chapters.
233 | #latex_use_parts = False
234 | 
235 | # If true, show page references after internal links.
236 | #latex_show_pagerefs = False
237 | 
238 | # If true, show URL addresses after external links.
239 | #latex_show_urls = False
240 | 
241 | # Documents to append as an appendix to all manuals.
242 | #latex_appendices = []
243 | 
244 | # If false, no module index is generated.
245 | #latex_domain_indices = True
246 | 
247 | 
248 | # -- Options for manual page output ---------------------------------------
249 | 
250 | # One entry per manual page. List of tuples
251 | # (source start file, name, description, authors, manual section).
252 | man_pages = [
253 |     (master_doc, 'alphalens', u'Alphalens Documentation',
254 |      [author], 1)
255 | ]
256 | 
257 | # If true, show URL addresses after external links.
258 | #man_show_urls = False
259 | 
260 | 
261 | # -- Options for Texinfo output -------------------------------------------
262 | 
263 | # Grouping the document tree into Texinfo files. List of tuples
264 | # (source start file, target name, title, author,
265 | #  dir menu entry, description, category)
266 | texinfo_documents = [
267 |     (master_doc, 'Alphalens', u'Alphalens Documentation',
268 |      author, 'Alphalens', 'One line description of project.',
269 |      'Miscellaneous'),
270 | ]
271 | 
272 | # Documents to append as an appendix to all manuals.
273 | #texinfo_appendices = []
274 | 
275 | # If false, no module index is generated.
276 | #texinfo_domain_indices = True
277 | 
278 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
279 | #texinfo_show_urls = 'footnote'
280 | 
281 | # If true, do not generate a @detailmenu in the "Top" node's menu.
282 | #texinfo_no_detailmenu = False
283 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. include:: ../../README.rst
 2 | 
 3 | API
 4 | ---
 5 | 
 6 | Information on a specific functions, classes, or methods.
 7 | 
 8 | - :mod:`alphalens.tears`
 9 | - :mod:`alphalens.performance`
10 | - :mod:`alphalens.plotting`
11 | - :mod:`alphalens.utils`
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/docs/source/modules.rst:
--------------------------------------------------------------------------------
1 | alphalens
2 | =======
3 | 
4 | .. toctree::
5 |    :maxdepth: 4
6 | 
7 |    alphalens
8 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | # See the docstring in versioneer.py for instructions. Note that you must
 2 | # re-run 'versioneer.py setup' after changing this section, and commit the
 3 | # resulting files.
 4 | [versioneer]
 5 | VCS=git
 6 | style=pep440
 7 | versionfile_source=alphalens/_version.py
 8 | versionfile_build=alphalens/_version.py
 9 | tag_prefix= v
10 | parentdir_prefix= alphalens-
11 | 
12 | [flake8]
13 | exclude =
14 |     .git,
15 |     __pycache__,
16 |     docs,
17 |     versioneer.py


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from setuptools import setup, find_packages
 3 | import versioneer
 4 | import sys
 5 | 
 6 | long_description = ''
 7 | 
 8 | if 'upload' in sys.argv:
 9 |     with open('README.rst') as f:
10 |         long_description = f.read()
11 | 
12 | install_reqs = [
13 |     'matplotlib>=1.4.0',
14 |     'numpy>=1.9.1',
15 |     'pandas>=0.18.0',
16 |     'scipy>=0.14.0',
17 |     'seaborn>=0.6.0',
18 |     'statsmodels>=0.6.1',
19 |     'IPython>=3.2.3',
20 |     'empyrical>=0.5.0',
21 | ]
22 | 
23 | extra_reqs = {
24 |     'test': [
25 |         "nose>=1.3.7",
26 |         "parameterized>=0.5.0",
27 |         "tox>=2.3.1",
28 |         "flake8>=3.7.9",
29 |     ],
30 | }
31 | 
32 | if __name__ == "__main__":
33 |     setup(
34 |         name='alphalens',
35 |         version=versioneer.get_version(),
36 |         cmdclass=versioneer.get_cmdclass(),
37 |         description='Performance analysis of predictive (alpha) stock factors',
38 |         author='Quantopian Inc.',
39 |         author_email='opensource@quantopian.com',
40 |         packages=find_packages(include='alphalens.*'),
41 |         package_data={
42 |             'alphalens': ['examples/*'],
43 |         },
44 |         long_description=long_description,
45 |         classifiers=[
46 |             'Development Status :: 5 - Production/Stable',
47 |             'Intended Audience :: Developers',
48 |             'License :: OSI Approved :: Apache Software License',
49 |             'Natural Language :: English',
50 |             'Operating System :: OS Independent',
51 |             'Programming Language :: Python :: 2.7',
52 |             'Programming Language :: Python :: 3.4',
53 |             'Programming Language :: Python :: 3.5',
54 |             'Programming Language :: Python',
55 |             'Topic :: Utilities',
56 |             'Topic :: Office/Business :: Financial',
57 |             'Topic :: Scientific/Engineering :: Information Analysis',
58 |         ],
59 |         url='https://github.com/quantopian/alphalens',
60 |         install_requires=install_reqs,
61 |         extras_require=extra_reqs,
62 |     )
63 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist=py{27,35}-pandas{16,17,18}
 3 | skip_missing_interpreters=True
 4 | 
 5 | [testenv]
 6 | commands=
 7 |     py{27,35}-pandas16: pip install -vv pandas>=0.16,<0.17
 8 |     py{27,35}-pandas17: pip install -vv pandas>=0.17,<0.18
 9 |     py{27,35}-pandas18: pip install -vv pandas>=0.18,<0.19
10 | 
11 |     pip install .[test]
12 |     # cd out of the alphalens root so that we actually use what we installed
13 |     # with our sdist.  Otherwise, our imports will find alphalens' source from
14 |     # our CWD.
15 |     /bin/bash -c 'cd /tmp && nosetests -P {toxinidir}'
16 | 


--------------------------------------------------------------------------------