├── .flake8
├── .gitattributes
├── .github
├── ISSUE_TEMPLATE.md
├── dependabot.yml
└── workflows
│ ├── build_wheels.yml
│ ├── conda_package.yml
│ ├── test_wheels.yml
│ └── unit_tests.yml
├── .gitignore
├── LICENSE
├── MANIFEST.in
├── README.md
├── build_and_deploy_docs.sh
├── conda
└── recipe
│ └── meta.yaml
├── docs
├── Makefile
├── deploy.py
├── make.bat
└── source
│ ├── api-reference.rst
│ ├── conf.py
│ ├── index.rst
│ └── notebooks
│ ├── event_study.ipynb
│ ├── intraday_factor.ipynb
│ ├── overview.ipynb
│ └── pyfolio_integration.ipynb
├── pyproject.toml
├── src
└── alphalens
│ ├── __init__.py
│ ├── examples
│ ├── event_study.ipynb
│ ├── intraday_factor.ipynb
│ ├── overview.ipynb
│ └── pyfolio_integration.ipynb
│ ├── performance.py
│ ├── plotting.py
│ ├── tears.py
│ └── utils.py
└── tests
├── __init__.py
├── test_performance.py
├── test_tears.py
└── test_utils.py
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | exclude =
3 | .git,
4 | .pytest_cache
5 | conda,
6 | _sources,
7 | __pycache__,
8 | docs/source/conf.py,
9 | src/pyfolio/_version.py
10 | max-line-length = 88
11 | max-complexity = 18
12 | select = B,C,E,F,W,T4,B9
13 | ignore = E203, E266, E501, W503, F403, F401, E231
14 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | alphalens/_version.py export-subst
2 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | ## Problem Description
2 |
3 | **Please provide a minimal, self-contained, and reproducible example:**
4 | ```python
5 | [Paste code here]
6 | ```
7 |
8 | **Please provide the full traceback:**
9 | ```python
10 | [Paste traceback here]
11 | ```
12 |
13 | **Please provide any additional information below:**
14 |
15 |
16 | ## Versions
17 |
18 | * Alphalens version:
19 | * Python version:
20 | * Pandas version:
21 | * Matplotlib version:
22 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | # To get started with Dependabot version updates, you'll need to specify which
2 | # package ecosystems to update and where the package manifests are located.
3 | # Please see the documentation for all configuration options:
4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
5 |
6 | version: 2
7 | updates:
8 | # Maintain dependencies for GitHub Actions
9 | - package-ecosystem: "github-actions"
10 | # Workflow files stored in the default location of `.github/workflows`
11 | directory: "/"
12 | schedule:
13 | interval: "daily"
14 | open-pull-requests-limit: 10
15 |
--------------------------------------------------------------------------------
/.github/workflows/build_wheels.yml:
--------------------------------------------------------------------------------
1 | name: PyPI
2 |
3 | on:
4 | workflow_dispatch:
5 | inputs:
6 | target:
7 | type: choice
8 | description: 'Package Index'
9 | required: true
10 | default: 'PYPI'
11 | options: [ 'TESTPYPI', 'PYPI' ]
12 | version:
13 | description: 'Version to publish'
14 | required: true
15 | default: '0.4.6'
16 |
17 | jobs:
18 | dist:
19 | name: Package source distribution
20 | runs-on: ${{ matrix.os }}
21 | strategy:
22 | fail-fast: false
23 | matrix:
24 | os: [ ubuntu-latest ]
25 | python-version: [ "3.12" ]
26 |
27 | steps:
28 | - name: Checkout alphalens
29 | uses: actions/checkout@v4
30 | with:
31 | fetch-depth: 0
32 | fetch-tags: true
33 | ref: ${{ github.event.inputs.version }}
34 |
35 | - name: Set up Python ${{ matrix.python-version }}
36 | uses: actions/setup-python@v5
37 | with:
38 | python-version: ${{ matrix.python-version }}
39 |
40 | - name: Build wheel
41 | run: pipx run build
42 |
43 | - name: Store artifacts
44 | uses: actions/upload-artifact@v4
45 | with:
46 | path: dist/*
47 |
48 | - name: Check metadata
49 | run: pipx run twine check dist/*
50 |
51 | upload_pypi:
52 | needs: [ dist ]
53 | runs-on: ubuntu-latest
54 | steps:
55 | - uses: actions/download-artifact@v4
56 | with:
57 | name: artifact
58 | path: dist
59 |
60 | - name: Publish to PyPI
61 | if: ${{ github.event.inputs.target == 'PYPI' }}
62 | uses: pypa/gh-action-pypi-publish@release/v1
63 | with:
64 | user: __token__
65 | password: ${{ secrets.PYPI_TOKEN }}
66 |
67 | - name: Publish to PyPI - Test
68 | if: ${{ github.event.inputs.target == 'TESTPYPI' }}
69 | uses: pypa/gh-action-pypi-publish@release/v1
70 | with:
71 | user: __token__
72 | password: ${{ secrets.TESTPYPI_TOKEN }}
73 | repository-url: https://test.pypi.org/legacy/
74 | skip-existing: true
75 | verbose: true
76 |
--------------------------------------------------------------------------------
/.github/workflows/conda_package.yml:
--------------------------------------------------------------------------------
1 | name: Anaconda
2 |
3 | on: workflow_dispatch
4 |
5 | jobs:
6 | build_wheels:
7 | name: py${{ matrix.python }} on ${{ matrix.os }}
8 | runs-on: ${{ matrix.os }}
9 | env:
10 | ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_TOKEN }}
11 | defaults:
12 | run:
13 | shell: bash -l {0}
14 |
15 | strategy:
16 | fail-fast: false
17 | matrix:
18 | os: [ macos-latest, windows-latest, ubuntu-latest ]
19 | python: [ '3.7', '3.8' ,'3.9' ]
20 |
21 | steps:
22 | - name: Checkout alphalens-reloaded
23 | uses: actions/checkout@v4
24 |
25 | - name: Setup miniconda3
26 | uses: conda-incubator/setup-miniconda@v3
27 | with:
28 | miniconda-version: latest
29 | auto-update-conda: true
30 | channel-priority: true
31 | mamba-version: "*"
32 | python-version: ${{ matrix.python }}
33 | activate-environment: recipe
34 | channels: ml4t, conda-forge, defaults, anaconda, ranaroussi
35 |
36 | - name: create uploader
37 | # address broken client under py3.9
38 | if: ${{ matrix.python == '3.9' }}
39 | run: conda create -n up python=3.7 anaconda-client
40 |
41 | - name: conda build for ${{ matrix.os }}
42 | run: |
43 | conda activate recipe
44 | mamba install -n recipe boa conda-verify anaconda-client
45 | conda mambabuild --output-folder . --python ${{ matrix.python }} conda/recipe
46 |
47 | - name: activate uploader
48 | # address broken client under py3.9
49 | if: ${{ matrix.python == '3.9' }}
50 | run: conda activate up
51 |
52 | - name: store windows result
53 | uses: actions/upload-artifact@v4
54 | if: ${{ matrix.os == 'windows-latest' }}
55 | with:
56 | path: win-64/*.tar.bz2
57 |
58 | - name: upload windows
59 | if: ${{ matrix.os == 'windows-latest' }}
60 | run: anaconda upload -l main -u ml4t win-64/*.tar.bz2
61 |
62 | - name: store linux result
63 | uses: actions/upload-artifact@v4
64 | if: ${{ matrix.os == 'ubuntu-latest' }}
65 | with:
66 | path: linux-64/*.tar.bz2
67 |
68 | - name: upload linux
69 | if: ${{ matrix.os == 'ubuntu-latest' }}
70 | run: anaconda upload -l main -u ml4t linux-64/*.tar.bz2
71 |
72 | - name: store macos result
73 | uses: actions/upload-artifact@v4
74 | if: ${{ matrix.os == 'macos-latest' }}
75 | with:
76 | path: osx-64/*.tar.bz2
77 |
78 | - name: upload macos
79 | if: ${{ matrix.os == 'macos-latest' }}
80 | run: anaconda upload -l main -u ml4t osx-64/*.tar.bz2
81 |
--------------------------------------------------------------------------------
/.github/workflows/test_wheels.yml:
--------------------------------------------------------------------------------
1 | name: Test Wheels
2 |
3 | on:
4 | workflow_dispatch:
5 |
6 | jobs:
7 | test_wheels:
8 | runs-on: ${{ matrix.os }}
9 | strategy:
10 | fail-fast: false
11 | matrix:
12 | os: [ ubuntu-latest, windows-latest, macos-latest ]
13 | python-version: [ 3.7, 3.8, 3.9 ]
14 | steps:
15 | - name: Set up Python ${{ matrix.python-version }}
16 | uses: actions/setup-python@v5
17 | with:
18 | python-version: ${{ matrix.python-version }}
19 |
20 | - name: Install wheel
21 | run: |
22 | pip install -U pip wheel
23 | pip install -i https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple alphalens-reloaded[test]
24 | pytest alphalens/tests
25 |
--------------------------------------------------------------------------------
/.github/workflows/unit_tests.yml:
--------------------------------------------------------------------------------
1 | name: Tests
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 | branches:
9 | - main
10 | schedule:
11 | - cron: "30 9 * * 6"
12 | workflow_dispatch:
13 |
14 | jobs:
15 | build:
16 | runs-on: ${{ matrix.os }}
17 | strategy:
18 | fail-fast: false
19 | matrix:
20 | os: [ ubuntu-latest , windows-latest, macos-latest ]
21 | python-version: [ '3.10', '3.11', '3.12', '3.13' ]
22 | steps:
23 | - name: Checkout alphalens
24 | uses: actions/checkout@v4
25 |
26 | - name: Set up Python ${{ matrix.python-version }}
27 | uses: actions/setup-python@v5
28 | with:
29 | python-version: ${{ matrix.python-version }}
30 |
31 | - name: Install alphalens
32 | run: |
33 | python -m pip install --upgrade pip
34 | pip install tox tox-gh-actions
35 | pip install -e .[test]
36 |
37 | - name: Lint with flake8
38 | run: |
39 | flake8
40 |
41 | - name: Unittests with tox & pytest
42 | run: |
43 | tox
44 |
45 | - name: Upload coverage data to coveralls.io
46 | run: coveralls --service=github
47 | env:
48 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
49 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | *.xml
3 |
4 | *.iml
5 |
6 | *.pyc
7 |
8 | build/
9 | docs/build/
10 | .ipynb_checkpoints
11 |
12 | # Tox puts virtualenvs here by default.
13 | .tox/
14 |
15 | # coverage.py outputs.
16 | cover
17 | .coverage
18 |
19 | # Intermediate outputs from building distributions for PyPI.
20 | dist
21 | *.egg-info/
22 |
23 | # Emacs temp files.
24 | *~
25 |
26 | .idea
27 | .noseids
28 | update
29 | .python-version
30 | .pre-commit-config.yaml
31 | conda/recipe
32 | _static
33 | _version.py
34 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright 2018 Quantopian, Inc.
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include versioneer.py
2 | include alphalens/_version.py
3 | include LICENSE
4 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | 
8 | [](https://github.com/stefan-jansen/alphalens-reloaded/actions/workflows/conda_package.yml)
9 | [](https://github.com/stefan-jansen/alphalens-reloaded/actions/workflows/unit_tests.yml)
10 | [](https://github.com/stefan-jansen/alphalens-reloaded/actions/workflows/build_wheels.yml)
11 | [](https://coveralls.io/github/stefan-jansen/alphalens-reloaded?branch=main)
12 | 
13 | 
14 | 
15 | 
16 |
17 | Alphalens is a Python library for performance analysis of predictive
18 | (alpha) stock factors. Alphalens works great with the
19 | [Zipline](https://www.zipline.ml4trading.io/) open source backtesting library, and [Pyfolio](https://github.com/quantopian/pyfolio) which provides performance and risk analysis of financial portfolios.
20 |
21 | The main function of Alphalens is to surface the most relevant statistics and plots about an alpha factor, including:
22 |
23 | - Returns Analysis
24 | - Information Coefficient Analysis
25 | - Turnover Analysis
26 | - Grouped Analysis
27 |
28 | # Getting started
29 |
30 | With a signal and pricing data creating a factor \"tear sheet\" is a two step process:
31 |
32 | ```python
33 | import alphalens
34 |
35 | # Ingest and format data
36 | factor_data = alphalens.utils.get_clean_factor_and_forward_returns(my_factor,
37 | pricing,
38 | quantiles=5,
39 | groupby=ticker_sector,
40 | groupby_labels=sector_names)
41 |
42 | # Run analysis
43 | alphalens.tears.create_full_tear_sheet(factor_data)
44 | ```
45 |
46 | # Learn more
47 |
48 | Check out the [example notebooks](https://github.com/stefan-jansen/alphalens-reloaded/tree/master/alphalens/examples)
49 | for more on how to read and use the factor tear sheet.
50 |
51 | # Installation
52 |
53 | Install with pip:
54 |
55 | pip install alphalens-reloaded
56 |
57 | Install with conda:
58 |
59 | conda install -c ml4t alphalens-reloaded
60 |
61 | Install from the master branch of Alphalens repository (development code):
62 |
63 | pip install git+https://github.com/stefan-jansen/alphalens-reloaded
64 |
65 | Alphalens depends on:
66 |
67 | - [matplotlib](https://github.com/matplotlib/matplotlib)
68 | - [numpy](https://github.com/numpy/numpy)
69 | - [pandas](https://github.com/pandas-dev/pandas)
70 | - [scipy](https://github.com/scipy/scipy)
71 | - [seaborn](https://github.com/mwaskom/seaborn)
72 | - [statsmodels](https://github.com/statsmodels/statsmodels)
73 |
74 | > Note that Numpy>=2.0 requires pandas>=2.2.2. If you are using an older version of pandas, you may need to upgrade
75 | > accordingly, otherwise you may encounter compatibility issues.
76 |
77 | # Usage
78 |
79 | A good way to get started is to run the examples in a [Jupyter notebook](https://jupyter.org/).
80 |
81 | To get set up with an example, you can:
82 |
83 | Run a Jupyter notebook server via:
84 |
85 | ```bash
86 | jupyter notebook
87 | ```
88 |
89 | From the notebook list page(usually found at `http://localhost:8888/`), navigate over to the examples directory, and open any file with a .ipynb extension.
90 |
91 | Execute the code in a notebook cell by clicking on it and hitting Shift+Enter.
92 |
93 | # Questions?
94 |
95 | If you find a bug, feel free to open an issue on our [github tracker](https://github.com/stefan-jansen/alphalens-reloaded/issues).
96 |
97 | # Contribute
98 |
99 | If you want to contribute, a great place to start would be the
100 | [help-wanted issues](https://github.com/stefan-jansen/alphalens-reloaded/issues?q=is%3Aopen+is%3Aissue+label%3A%22help+wanted%22).
101 |
102 | # Credits
103 |
104 | - [Andrew Campbell](https://github.com/a-campbell)
105 | - [James Christopher](https://github.com/jameschristopher)
106 | - [Thomas Wiecki](https://github.com/twiecki)
107 | - [Jonathan Larkin](https://github.com/marketneutral)
108 | - Jessica Stauth ()
109 | - [Taso Petridis](https://github.com/tasopetridis)
110 |
111 | For a full list of contributors see the [contributors page.](https://github.com/stefan-jansen/alphalens-reloaded/graphs/contributors)
112 |
113 | # Example Tear Sheets
114 |
115 | Example factor courtesy of [ExtractAlpha](https://extractalpha.com/)
116 |
117 | ## Peformance Metrics Tables
118 |
119 | 
120 |
121 | ## Returns Tear Sheet
122 |
123 | 
124 |
125 | ## Information Coefficient Tear Sheet
126 |
127 | 
128 |
129 | ## Sector Tear Sheet
130 |
131 | 
132 |
--------------------------------------------------------------------------------
/build_and_deploy_docs.sh:
--------------------------------------------------------------------------------
1 | pushd docs
2 | make html
3 | ghp-import -n -p build/html/
4 | popd
5 |
--------------------------------------------------------------------------------
/conda/recipe/meta.yaml:
--------------------------------------------------------------------------------
1 | {% set name = "alphalens-reloaded" %}
2 | {% set version = "0.4.2" %}
3 |
4 | package:
5 | name: {{ name|lower }}
6 | version: {{ version }}
7 |
8 | source:
9 | url: https://pypi.io/packages/source/{{ name[0] }}/{{ name }}/{{ name }}-{{ version }}.tar.gz
10 | md5: fb3a6ab0f6c5fdb95750181a7b9654f0
11 |
12 | build:
13 | number: 0
14 | skip: true # [py<37 or not x86_64]
15 | script: {{ PYTHON }} -m pip install . -vv
16 |
17 | requirements:
18 | build:
19 | - python
20 | - setuptools
21 |
22 | run:
23 | - python
24 | - matplotlib>=1.4.0
25 | - numpy>=1.9.1
26 | - pandas>=1.0.0
27 | - scipy>=0.14.0
28 | - seaborn>=0.6.0
29 | - statsmodels>=0.6.1
30 | - IPython>=3.2.3
31 | - empyrical-reloaded>=0.5.8
32 |
33 | test:
34 | imports:
35 | - alphalens
36 |
37 | about:
38 | home: https://alphalens.ml4trading.io
39 | summary: 'Performance analysis of predictive (alpha) stock factors'
40 | license: Apache 2.0
41 | license_file: LICENSE
42 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | PAPER =
8 | BUILDDIR = build
9 |
10 | # User-friendly check for sphinx-build
11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
13 | endif
14 |
15 | # Internal variables.
16 | PAPEROPT_a4 = -D latex_paper_size=a4
17 | PAPEROPT_letter = -D latex_paper_size=letter
18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
19 | # the i18n builder cannot share the environment and doctrees with the others
20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
21 |
22 | .PHONY: help
23 | help:
24 | @echo "Please use \`make ' where is one of"
25 | @echo " html to make standalone HTML files"
26 | @echo " dirhtml to make HTML files named index.html in directories"
27 | @echo " singlehtml to make a single large HTML file"
28 | @echo " pickle to make pickle files"
29 | @echo " json to make JSON files"
30 | @echo " htmlhelp to make HTML files and a HTML help project"
31 | @echo " qthelp to make HTML files and a qthelp project"
32 | @echo " applehelp to make an Apple Help Book"
33 | @echo " devhelp to make HTML files and a Devhelp project"
34 | @echo " epub to make an epub"
35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
36 | @echo " latexpdf to make LaTeX files and run them through pdflatex"
37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
38 | @echo " text to make text files"
39 | @echo " man to make manual pages"
40 | @echo " texinfo to make Texinfo files"
41 | @echo " info to make Texinfo files and run them through makeinfo"
42 | @echo " gettext to make PO message catalogs"
43 | @echo " changes to make an overview of all changed/added/deprecated items"
44 | @echo " xml to make Docutils-native XML files"
45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes"
46 | @echo " linkcheck to check all external links for integrity"
47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)"
48 | @echo " coverage to run coverage check of the documentation (if enabled)"
49 |
50 | .PHONY: clean
51 | clean:
52 | rm -rf $(BUILDDIR)/*
53 |
54 | .PHONY: html
55 | html:
56 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
57 | @echo
58 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
59 |
60 | .PHONY: dirhtml
61 | dirhtml:
62 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
63 | @echo
64 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
65 |
66 | .PHONY: singlehtml
67 | singlehtml:
68 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
69 | @echo
70 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
71 |
72 | .PHONY: pickle
73 | pickle:
74 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
75 | @echo
76 | @echo "Build finished; now you can process the pickle files."
77 |
78 | .PHONY: json
79 | json:
80 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
81 | @echo
82 | @echo "Build finished; now you can process the JSON files."
83 |
84 | .PHONY: htmlhelp
85 | htmlhelp:
86 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
87 | @echo
88 | @echo "Build finished; now you can run HTML Help Workshop with the" \
89 | ".hhp project file in $(BUILDDIR)/htmlhelp."
90 |
91 | .PHONY: qthelp
92 | qthelp:
93 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
94 | @echo
95 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \
96 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
97 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Qfactor.qhcp"
98 | @echo "To view the help file:"
99 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Qfactor.qhc"
100 |
101 | .PHONY: applehelp
102 | applehelp:
103 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
104 | @echo
105 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
106 | @echo "N.B. You won't be able to view it unless you put it in" \
107 | "~/Library/Documentation/Help or install it in your application" \
108 | "bundle."
109 |
110 | .PHONY: devhelp
111 | devhelp:
112 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
113 | @echo
114 | @echo "Build finished."
115 | @echo "To view the help file:"
116 | @echo "# mkdir -p $$HOME/.local/share/devhelp/Qfactor"
117 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Qfactor"
118 | @echo "# devhelp"
119 |
120 | .PHONY: epub
121 | epub:
122 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
123 | @echo
124 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
125 |
126 | .PHONY: latex
127 | latex:
128 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
129 | @echo
130 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
131 | @echo "Run \`make' in that directory to run these through (pdf)latex" \
132 | "(use \`make latexpdf' here to do that automatically)."
133 |
134 | .PHONY: latexpdf
135 | latexpdf:
136 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
137 | @echo "Running LaTeX files through pdflatex..."
138 | $(MAKE) -C $(BUILDDIR)/latex all-pdf
139 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
140 |
141 | .PHONY: latexpdfja
142 | latexpdfja:
143 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
144 | @echo "Running LaTeX files through platex and dvipdfmx..."
145 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
146 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
147 |
148 | .PHONY: text
149 | text:
150 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
151 | @echo
152 | @echo "Build finished. The text files are in $(BUILDDIR)/text."
153 |
154 | .PHONY: man
155 | man:
156 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
157 | @echo
158 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
159 |
160 | .PHONY: texinfo
161 | texinfo:
162 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
163 | @echo
164 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
165 | @echo "Run \`make' in that directory to run these through makeinfo" \
166 | "(use \`make info' here to do that automatically)."
167 |
168 | .PHONY: info
169 | info:
170 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
171 | @echo "Running Texinfo files through makeinfo..."
172 | make -C $(BUILDDIR)/texinfo info
173 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
174 |
175 | .PHONY: gettext
176 | gettext:
177 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
178 | @echo
179 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
180 |
181 | .PHONY: changes
182 | changes:
183 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
184 | @echo
185 | @echo "The overview file is in $(BUILDDIR)/changes."
186 |
187 | .PHONY: linkcheck
188 | linkcheck:
189 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
190 | @echo
191 | @echo "Link check complete; look for any errors in the above output " \
192 | "or in $(BUILDDIR)/linkcheck/output.txt."
193 |
194 | .PHONY: doctest
195 | doctest:
196 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
197 | @echo "Testing of doctests in the sources finished, look at the " \
198 | "results in $(BUILDDIR)/doctest/output.txt."
199 |
200 | .PHONY: coverage
201 | coverage:
202 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
203 | @echo "Testing of coverage in the sources finished, look at the " \
204 | "results in $(BUILDDIR)/coverage/python.txt."
205 |
206 | .PHONY: xml
207 | xml:
208 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
209 | @echo
210 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml."
211 |
212 | .PHONY: pseudoxml
213 | pseudoxml:
214 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
215 | @echo
216 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
217 |
--------------------------------------------------------------------------------
/docs/deploy.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | from __future__ import print_function
3 | from contextlib import contextmanager
4 | from glob import glob
5 | import os
6 | from os.path import basename, exists, isfile
7 | from pathlib import Path
8 | from shutil import move, rmtree
9 | from subprocess import check_call
10 |
11 | HERE = Path(__file__).resolve(strict=True).parent
12 | ALPHALENS_ROOT = HERE.parent
13 | TEMP_LOCATION = "/tmp/alphalens-doc"
14 | TEMP_LOCATION_GLOB = TEMP_LOCATION + "/*"
15 |
16 |
17 | @contextmanager
18 | def removing(path):
19 | try:
20 | yield
21 | finally:
22 | rmtree(path)
23 |
24 |
25 | def ensure_not_exists(path):
26 | if not exists(path):
27 | return
28 | if isfile(path):
29 | os.unlink(path)
30 | else:
31 | rmtree(path)
32 |
33 |
34 | def main():
35 | old_dir = Path.cwd()
36 | print("Moving to %s." % HERE)
37 | os.chdir(HERE)
38 |
39 | try:
40 | print("Cleaning docs with 'make clean'")
41 | check_call(["make", "clean"])
42 | print("Building docs with 'make html'")
43 | check_call(["make", "html"])
44 |
45 | print("Clearing temp location '%s'" % TEMP_LOCATION)
46 | rmtree(TEMP_LOCATION, ignore_errors=True)
47 |
48 | with removing(TEMP_LOCATION):
49 | print("Copying built files to temp location.")
50 | move("build/html", TEMP_LOCATION)
51 |
52 | print("Moving to '%s'" % ALPHALENS_ROOT)
53 | os.chdir(ALPHALENS_ROOT)
54 |
55 | print("Checking out gh-pages branch.")
56 | check_call(
57 | [
58 | "git",
59 | "branch",
60 | "-f",
61 | "--track",
62 | "gh-pages",
63 | "origin/gh-pages",
64 | ]
65 | )
66 | check_call(["git", "checkout", "gh-pages"])
67 | check_call(["git", "reset", "--hard", "origin/gh-pages"])
68 |
69 | print("Copying built files:")
70 | for file_ in glob(TEMP_LOCATION_GLOB):
71 | base = basename(file_)
72 |
73 | print("%s -> %s" % (file_, base))
74 | ensure_not_exists(base)
75 | move(file_, ".")
76 | finally:
77 | os.chdir(old_dir)
78 |
79 | print()
80 | print("Updated documentation branch in directory %s" % ALPHALENS_ROOT)
81 | print("If you are happy with these changes, commit and push to gh-pages.")
82 |
83 |
84 | if __name__ == "__main__":
85 | main()
86 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | REM Command file for Sphinx documentation
4 |
5 | if "%SPHINXBUILD%" == "" (
6 | set SPHINXBUILD=sphinx-build
7 | )
8 | set BUILDDIR=build
9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source
10 | set I18NSPHINXOPTS=%SPHINXOPTS% source
11 | if NOT "%PAPER%" == "" (
12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
14 | )
15 |
16 | if "%1" == "" goto help
17 |
18 | if "%1" == "help" (
19 | :help
20 | echo.Please use `make ^` where ^ is one of
21 | echo. html to make standalone HTML files
22 | echo. dirhtml to make HTML files named index.html in directories
23 | echo. singlehtml to make a single large HTML file
24 | echo. pickle to make pickle files
25 | echo. json to make JSON files
26 | echo. htmlhelp to make HTML files and a HTML help project
27 | echo. qthelp to make HTML files and a qthelp project
28 | echo. devhelp to make HTML files and a Devhelp project
29 | echo. epub to make an epub
30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
31 | echo. text to make text files
32 | echo. man to make manual pages
33 | echo. texinfo to make Texinfo files
34 | echo. gettext to make PO message catalogs
35 | echo. changes to make an overview over all changed/added/deprecated items
36 | echo. xml to make Docutils-native XML files
37 | echo. pseudoxml to make pseudoxml-XML files for display purposes
38 | echo. linkcheck to check all external links for integrity
39 | echo. doctest to run all doctests embedded in the documentation if enabled
40 | echo. coverage to run coverage check of the documentation if enabled
41 | goto end
42 | )
43 |
44 | if "%1" == "clean" (
45 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
46 | del /q /s %BUILDDIR%\*
47 | goto end
48 | )
49 |
50 |
51 | REM Check if sphinx-build is available and fallback to Python version if any
52 | %SPHINXBUILD% 1>NUL 2>NUL
53 | if errorlevel 9009 goto sphinx_python
54 | goto sphinx_ok
55 |
56 | :sphinx_python
57 |
58 | set SPHINXBUILD=python -m sphinx.__init__
59 | %SPHINXBUILD% 2> nul
60 | if errorlevel 9009 (
61 | echo.
62 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
63 | echo.installed, then set the SPHINXBUILD environment variable to point
64 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
65 | echo.may add the Sphinx directory to PATH.
66 | echo.
67 | echo.If you don't have Sphinx installed, grab it from
68 | echo.http://sphinx-doc.org/
69 | exit /b 1
70 | )
71 |
72 | :sphinx_ok
73 |
74 |
75 | if "%1" == "html" (
76 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
77 | if errorlevel 1 exit /b 1
78 | echo.
79 | echo.Build finished. The HTML pages are in %BUILDDIR%/html.
80 | goto end
81 | )
82 |
83 | if "%1" == "dirhtml" (
84 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
85 | if errorlevel 1 exit /b 1
86 | echo.
87 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
88 | goto end
89 | )
90 |
91 | if "%1" == "singlehtml" (
92 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
93 | if errorlevel 1 exit /b 1
94 | echo.
95 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
96 | goto end
97 | )
98 |
99 | if "%1" == "pickle" (
100 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
101 | if errorlevel 1 exit /b 1
102 | echo.
103 | echo.Build finished; now you can process the pickle files.
104 | goto end
105 | )
106 |
107 | if "%1" == "json" (
108 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
109 | if errorlevel 1 exit /b 1
110 | echo.
111 | echo.Build finished; now you can process the JSON files.
112 | goto end
113 | )
114 |
115 | if "%1" == "htmlhelp" (
116 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
117 | if errorlevel 1 exit /b 1
118 | echo.
119 | echo.Build finished; now you can run HTML Help Workshop with the ^
120 | .hhp project file in %BUILDDIR%/htmlhelp.
121 | goto end
122 | )
123 |
124 | if "%1" == "qthelp" (
125 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
126 | if errorlevel 1 exit /b 1
127 | echo.
128 | echo.Build finished; now you can run "qcollectiongenerator" with the ^
129 | .qhcp project file in %BUILDDIR%/qthelp, like this:
130 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Qfactor.qhcp
131 | echo.To view the help file:
132 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Qfactor.ghc
133 | goto end
134 | )
135 |
136 | if "%1" == "devhelp" (
137 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
138 | if errorlevel 1 exit /b 1
139 | echo.
140 | echo.Build finished.
141 | goto end
142 | )
143 |
144 | if "%1" == "epub" (
145 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
146 | if errorlevel 1 exit /b 1
147 | echo.
148 | echo.Build finished. The epub file is in %BUILDDIR%/epub.
149 | goto end
150 | )
151 |
152 | if "%1" == "latex" (
153 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
154 | if errorlevel 1 exit /b 1
155 | echo.
156 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
157 | goto end
158 | )
159 |
160 | if "%1" == "latexpdf" (
161 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
162 | cd %BUILDDIR%/latex
163 | make all-pdf
164 | cd %~dp0
165 | echo.
166 | echo.Build finished; the PDF files are in %BUILDDIR%/latex.
167 | goto end
168 | )
169 |
170 | if "%1" == "latexpdfja" (
171 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
172 | cd %BUILDDIR%/latex
173 | make all-pdf-ja
174 | cd %~dp0
175 | echo.
176 | echo.Build finished; the PDF files are in %BUILDDIR%/latex.
177 | goto end
178 | )
179 |
180 | if "%1" == "text" (
181 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
182 | if errorlevel 1 exit /b 1
183 | echo.
184 | echo.Build finished. The text files are in %BUILDDIR%/text.
185 | goto end
186 | )
187 |
188 | if "%1" == "man" (
189 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
190 | if errorlevel 1 exit /b 1
191 | echo.
192 | echo.Build finished. The manual pages are in %BUILDDIR%/man.
193 | goto end
194 | )
195 |
196 | if "%1" == "texinfo" (
197 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
198 | if errorlevel 1 exit /b 1
199 | echo.
200 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
201 | goto end
202 | )
203 |
204 | if "%1" == "gettext" (
205 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
206 | if errorlevel 1 exit /b 1
207 | echo.
208 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
209 | goto end
210 | )
211 |
212 | if "%1" == "changes" (
213 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
214 | if errorlevel 1 exit /b 1
215 | echo.
216 | echo.The overview file is in %BUILDDIR%/changes.
217 | goto end
218 | )
219 |
220 | if "%1" == "linkcheck" (
221 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
222 | if errorlevel 1 exit /b 1
223 | echo.
224 | echo.Link check complete; look for any errors in the above output ^
225 | or in %BUILDDIR%/linkcheck/output.txt.
226 | goto end
227 | )
228 |
229 | if "%1" == "doctest" (
230 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
231 | if errorlevel 1 exit /b 1
232 | echo.
233 | echo.Testing of doctests in the sources finished, look at the ^
234 | results in %BUILDDIR%/doctest/output.txt.
235 | goto end
236 | )
237 |
238 | if "%1" == "coverage" (
239 | %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
240 | if errorlevel 1 exit /b 1
241 | echo.
242 | echo.Testing of coverage in the sources finished, look at the ^
243 | results in %BUILDDIR%/coverage/python.txt.
244 | goto end
245 | )
246 |
247 | if "%1" == "xml" (
248 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
249 | if errorlevel 1 exit /b 1
250 | echo.
251 | echo.Build finished. The XML files are in %BUILDDIR%/xml.
252 | goto end
253 | )
254 |
255 | if "%1" == "pseudoxml" (
256 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
257 | if errorlevel 1 exit /b 1
258 | echo.
259 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
260 | goto end
261 | )
262 |
263 | :end
264 |
--------------------------------------------------------------------------------
/docs/source/api-reference.rst:
--------------------------------------------------------------------------------
1 | .. _api-reference:
2 |
3 | API
4 | ===
5 |
6 | The alphalens API is organized into four modules:
7 |
8 | * Tear Sheets: :mod:`alphalens.tears`
9 | * Performance Metrics: :mod:`alphalens.performance`
10 | * Plotting Functions: :mod:`alphalens.plotting`
11 | * Utilities: :mod:`alphalens.utils`
12 |
13 | Tear Sheets
14 | -----------
15 |
16 | Alphalens combines key metrics in plots in thematic and summary tear sheets.
17 |
18 | .. automodule:: alphalens.tears
19 | :members:
20 | :undoc-members:
21 | :show-inheritance:
22 |
23 | Performance Metrics
24 | -------------------
25 |
26 | The module :mod:`alphalens.performance` provides performance and risk metrics.
27 |
28 | .. automodule:: alphalens.performance
29 | :members:
30 | :undoc-members:
31 | :show-inheritance:
32 |
33 | Plotting Functions
34 | ------------------
35 |
36 | The module :mod:`alphalens.plotting` facilitates the visualization of performance metrics.
37 |
38 | .. automodule:: alphalens.plotting
39 | :members:
40 | :undoc-members:
41 | :show-inheritance:
42 |
43 | Utilities
44 | ---------
45 |
46 | The module :mod:`alphalens.utils` contains helper functions, e.g. to format factor data into the requisite input format.
47 |
48 | .. automodule:: alphalens.utils
49 | :members:
50 | :undoc-members:
51 | :show-inheritance:
52 |
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import sys
3 | import os
4 | from pathlib import Path
5 | import pydata_sphinx_theme
6 | from alphalens import __version__ as version
7 |
8 | sys.path.insert(0, Path("../..").resolve(strict=True).as_posix())
9 |
10 | extensions = [
11 | "sphinx.ext.autodoc",
12 | "numpydoc",
13 | "m2r2",
14 | "sphinx_markdown_tables",
15 | "nbsphinx",
16 | "sphinx.ext.mathjax",
17 | "sphinx_copybutton",
18 | ]
19 |
20 | templates_path = ["_templates"]
21 |
22 | source_suffix = {".rst": "restructuredtext", ".md": "markdown"}
23 |
24 | master_doc = "index"
25 |
26 | project = "Alphalens"
27 | copyright = "2016, Quantopian, Inc."
28 | author = "Quantopian, Inc."
29 |
30 | release = version
31 | language = None
32 |
33 | exclude_patterns = []
34 |
35 | highlight_language = "python"
36 |
37 | pygments_style = "sphinx"
38 |
39 | todo_include_todos = False
40 |
41 | html_theme = "pydata_sphinx_theme"
42 | html_theme_path = pydata_sphinx_theme.get_html_theme_path()
43 |
44 | html_theme_options = {
45 | "github_url": "https://github.com/stefan-jansen/alphalens-reloaded",
46 | "twitter_url": "https://twitter.com/ml4trading",
47 | "external_links": [
48 | {"name": "ML for Trading", "url": "https://ml4trading.io"},
49 | {"name": "Community", "url": "https://exchange.ml4trading.io"},
50 | ],
51 | "google_analytics_id": "UA-74956955-3",
52 | "use_edit_page_button": True,
53 | "favicons": [
54 | {
55 | "rel": "icon",
56 | "sizes": "16x16",
57 | "href": "assets/favicon16x16.ico",
58 | },
59 | {
60 | "rel": "icon",
61 | "sizes": "32x32",
62 | "href": "assets/favicon32x32.ico",
63 | },
64 | ],
65 | }
66 |
67 | html_context = {
68 | "github_url": "https://github.com",
69 | "github_user": "stefan-jansen",
70 | "github_repo": "alphalens-reloaded",
71 | "github_version": "main",
72 | "doc_path": "docs/source",
73 | }
74 |
75 | html_static_path = []
76 |
77 | htmlhelp_basename = "Alphalensdoc"
78 |
79 | latex_elements = {}
80 |
81 | latex_documents = [
82 | (
83 | master_doc,
84 | "Alphalens.tex",
85 | "Alphalens Documentation",
86 | "Quantopian, Inc.",
87 | "manual",
88 | )
89 | ]
90 |
91 | man_pages = [(master_doc, "alphalens", "Alphalens Documentation", [author], 1)]
92 |
93 | texinfo_documents = [
94 | (
95 | master_doc,
96 | "Alphalens",
97 | "Alphalens Documentation",
98 | author,
99 | "Alphalens",
100 | "One line description of project.",
101 | "Miscellaneous",
102 | )
103 | ]
104 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | .. title:: Alphalens
2 |
3 | .. mdinclude:: ../../README.md
4 |
5 | .. toctree::
6 | :maxdepth: 4
7 |
8 | notebooks/overview
9 | notebooks/intraday_factor
10 | notebooks/event_study
11 | notebooks/pyfolio_integration
12 | api-reference
13 |
--------------------------------------------------------------------------------
/docs/source/notebooks/event_study.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Event Study"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "While Alphalens is a tool designed to evaluate a cross-sectional signal which can be used to rank many securities each day, we can still make use of Alphalens returns analysis functions, a subset of Alphalens, to create a meaningful event study.\n",
15 | "\n",
16 | "An event study is a statistical method to assess the impact of a particular event on the value of a stock. In this example we will evalute what happens to stocks whose price fall below 30$"
17 | ]
18 | },
19 | {
20 | "cell_type": "markdown",
21 | "metadata": {},
22 | "source": [
23 | "# Imports & Settings"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": 1,
29 | "metadata": {
30 | "ExecuteTime": {
31 | "end_time": "2021-04-19T18:54:55.486341Z",
32 | "start_time": "2021-04-19T18:54:55.483129Z"
33 | }
34 | },
35 | "source": [
36 | "import warnings\n",
37 | "warnings.filterwarnings('ignore')"
38 | ],
39 | "outputs": []
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": 2,
44 | "metadata": {
45 | "ExecuteTime": {
46 | "end_time": "2021-04-19T18:54:56.240797Z",
47 | "start_time": "2021-04-19T18:54:55.491468Z"
48 | }
49 | },
50 | "source": [
51 | "import alphalens\n",
52 | "import pandas as pd"
53 | ],
54 | "outputs": []
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": 3,
59 | "metadata": {
60 | "ExecuteTime": {
61 | "end_time": "2021-04-19T18:54:56.246046Z",
62 | "start_time": "2021-04-19T18:54:56.242196Z"
63 | }
64 | },
65 | "source": [
66 | "%matplotlib inline"
67 | ],
68 | "outputs": []
69 | },
70 | {
71 | "cell_type": "markdown",
72 | "metadata": {},
73 | "source": [
74 | "# Load Data"
75 | ]
76 | },
77 | {
78 | "cell_type": "markdown",
79 | "metadata": {},
80 | "source": [
81 | "Below is a simple mapping of tickers to sectors for a universe of 500 large cap stocks."
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": 4,
87 | "metadata": {
88 | "ExecuteTime": {
89 | "end_time": "2021-04-19T18:54:56.260987Z",
90 | "start_time": "2021-04-19T18:54:56.247691Z"
91 | }
92 | },
93 | "source": [
94 | "tickers = ['ACN', 'ATVI', 'ADBE', 'AMD', 'AKAM', 'ADS', 'GOOGL', 'GOOG', 'APH', 'ADI', 'ANSS', 'AAPL',\n",
95 | " 'AVGO', 'CA', 'CDNS', 'CSCO', 'CTXS', 'CTSH', 'GLW', 'CSRA', 'DXC', 'EBAY', 'EA', 'FFIV', 'FB',\n",
96 | " 'FLIR', 'IT', 'GPN', 'HRS', 'HPE', 'HPQ', 'INTC', 'IBM', 'INTU', 'JNPR', 'KLAC', 'LRCX', 'MA', 'MCHP',\n",
97 | " 'MSFT', 'MSI', 'NTAP', 'NFLX', 'NVDA', 'ORCL', 'PAYX', 'PYPL', 'QRVO', 'QCOM', 'RHT', 'CRM', 'STX',\n",
98 | " 'AMG', 'AFL', 'ALL', 'AXP', 'AIG', 'AMP', 'AON', 'AJG', 'AIZ', 'BAC', 'BK', 'BBT', 'BRK.B', 'BLK', 'HRB',\n",
99 | " 'BHF', 'COF', 'CBOE', 'SCHW', 'CB', 'CINF', 'C', 'CFG', 'CME', 'CMA', 'DFS', 'ETFC', 'RE', 'FITB', 'BEN',\n",
100 | " 'GS', 'HIG', 'HBAN', 'ICE', 'IVZ', 'JPM', 'KEY', 'LUK', 'LNC', 'L', 'MTB', 'MMC', 'MET', 'MCO', 'MS',\n",
101 | " 'NDAQ', 'NAVI', 'NTRS', 'PBCT', 'PNC', 'PFG', 'PGR', 'PRU', 'RJF', 'RF', 'SPGI', 'STT', 'STI', 'SYF', 'TROW',\n",
102 | " 'ABT', 'ABBV', 'AET', 'A', 'ALXN', 'ALGN', 'AGN', 'ABC', 'AMGN', 'ANTM', 'BCR', 'BAX', 'BDX', 'BIIB', 'BSX',\n",
103 | " 'BMY', 'CAH', 'CELG', 'CNC', 'CERN', 'CI', 'COO', 'DHR', 'DVA', 'XRAY', 'EW', 'EVHC', 'ESRX', 'GILD', 'HCA',\n",
104 | " 'HSIC', 'HOLX', 'HUM', 'IDXX', 'ILMN', 'INCY', 'ISRG', 'IQV', 'JNJ', 'LH', 'LLY', 'MCK', 'MDT', 'MRK', 'MTD',\n",
105 | " 'MYL', 'PDCO', 'PKI', 'PRGO', 'PFE', 'DGX', 'REGN', 'RMD', 'SYK', 'TMO', 'UNH', 'UHS', 'VAR', 'VRTX', 'WAT',\n",
106 | " 'MMM', 'AYI', 'ALK', 'ALLE', 'AAL', 'AME', 'AOS', 'ARNC', 'BA', 'CHRW', 'CAT', 'CTAS', 'CSX', 'CMI', 'DE',\n",
107 | " 'DAL', 'DOV', 'ETN', 'EMR', 'EFX', 'EXPD', 'FAST', 'FDX', 'FLS', 'FLR', 'FTV', 'FBHS', 'GD', 'GE', 'GWW',\n",
108 | " 'HON', 'INFO', 'ITW', 'IR', 'JEC', 'JBHT', 'JCI', 'KSU', 'LLL', 'LMT', 'MAS', 'NLSN', 'NSC', 'NOC', 'PCAR',\n",
109 | " 'PH', 'PNR', 'PWR', 'RTN', 'RSG', 'RHI', 'ROK', 'COL', 'ROP', 'LUV', 'SRCL', 'TXT', 'TDG', 'UNP', 'UAL',\n",
110 | " 'AES', 'LNT', 'AEE', 'AEP', 'AWK', 'CNP', 'CMS', 'ED', 'D', 'DTE', 'DUK', 'EIX', 'ETR', 'ES', 'EXC']"
111 | ],
112 | "outputs": []
113 | },
114 | {
115 | "cell_type": "markdown",
116 | "metadata": {},
117 | "source": [
118 | "## YFinance Download"
119 | ]
120 | },
121 | {
122 | "cell_type": "code",
123 | "execution_count": 5,
124 | "metadata": {
125 | "ExecuteTime": {
126 | "end_time": "2021-04-19T18:55:06.273472Z",
127 | "start_time": "2021-04-19T18:54:56.262287Z"
128 | }
129 | },
130 | "source": [
131 | "import yfinance as yf\n",
132 | "import pandas_datareader.data as web\n",
133 | "yf.pdr_override()\n",
134 | "\n",
135 | "df = web.get_data_yahoo(tickers, start='2015-06-01', end='2017-01-01')"
136 | ],
137 | "outputs": []
138 | },
139 | {
140 | "cell_type": "markdown",
141 | "metadata": {},
142 | "source": [
143 | "## Data Formatting"
144 | ]
145 | },
146 | {
147 | "cell_type": "code",
148 | "execution_count": 6,
149 | "metadata": {
150 | "ExecuteTime": {
151 | "end_time": "2021-04-19T18:55:06.299710Z",
152 | "start_time": "2021-04-19T18:55:06.274655Z"
153 | }
154 | },
155 | "source": [
156 | "df = df.stack()\n",
157 | "df.index.names = ['date', 'asset']\n",
158 | "df = df.tz_localize('UTC', level='date')"
159 | ],
160 | "outputs": []
161 | },
162 | {
163 | "cell_type": "code",
164 | "execution_count": 7,
165 | "metadata": {
166 | "ExecuteTime": {
167 | "end_time": "2021-04-19T18:55:06.308533Z",
168 | "start_time": "2021-04-19T18:55:06.300614Z"
169 | }
170 | },
171 | "source": [
172 | "df.info()"
173 | ],
174 | "outputs": []
175 | },
176 | {
177 | "cell_type": "markdown",
178 | "metadata": {},
179 | "source": [
180 | "# Factor Computation"
181 | ]
182 | },
183 | {
184 | "cell_type": "markdown",
185 | "metadata": {},
186 | "source": [
187 | "Now it's time to build the events DataFrame, the input we will pass to Alphalens.\n",
188 | "\n",
189 | "Alphalens calculates statistics for those dates where the input DataFrame has values (not NaN). So to compute the performace analysis on specific dates and securities (like an event study) then we have to make sure the input DataFrame contains valid values only on those date/security combinations where the event happens. All the other values in the DataFrame must be NaN or not present.\n",
190 | "\n",
191 | "Also, make sure the event values are positive (it doesn't matter the value but they must be positive) if you intend to go long on the events and use negative values if you intent to go short. This impacts the cumulative returns plots. \n",
192 | "\n",
193 | "Let's create the event DataFrame where we \"mark\" (any value) each day a security price fall below 30$. "
194 | ]
195 | },
196 | {
197 | "cell_type": "code",
198 | "execution_count": 8,
199 | "metadata": {
200 | "ExecuteTime": {
201 | "end_time": "2021-04-19T18:55:06.332317Z",
202 | "start_time": "2021-04-19T18:55:06.309464Z"
203 | }
204 | },
205 | "source": [
206 | "today_price = df.loc[:, 'Open'].unstack('asset')\n",
207 | "yesterday_price = today_price.shift(1)\n",
208 | "events = today_price[(today_price < 30.0) & (yesterday_price >= 30)]\n",
209 | "events = events.stack()\n",
210 | "events = events.astype(float)\n",
211 | "events"
212 | ],
213 | "outputs": []
214 | },
215 | {
216 | "cell_type": "markdown",
217 | "metadata": {},
218 | "source": [
219 | "The pricing data passed to alphalens should contain the entry price for the assets so it must reflect the next available price after an event was observed at a given timestamp. Those prices must not be used in the calculation of the events for that time. Always double check to ensure you are not introducing lookahead bias to your study.\n",
220 | "\n",
221 | "The pricing data must also contain the exit price for the assets, for period 1 the price at the next timestamp will be used, for period 2 the price after 2 timestats will be used and so on.\n",
222 | "\n",
223 | "While Alphalens is time frequency agnostic, in our example we build 'pricing' DataFrame so that for each event timestamp it contains the assets open price for the next day afer the event is detected, this price will be used as the assets entry price. Also, we are not adding additional prices so the assets exit price will be the following days open prices (how many days depends on 'periods' argument)."
224 | ]
225 | },
226 | {
227 | "cell_type": "code",
228 | "execution_count": 9,
229 | "metadata": {
230 | "ExecuteTime": {
231 | "end_time": "2021-04-19T18:55:06.351638Z",
232 | "start_time": "2021-04-19T18:55:06.334334Z"
233 | }
234 | },
235 | "source": [
236 | "pricing = df.loc[:, 'Open'].iloc[1:].unstack('asset')"
237 | ],
238 | "outputs": []
239 | },
240 | {
241 | "cell_type": "code",
242 | "execution_count": 10,
243 | "metadata": {
244 | "ExecuteTime": {
245 | "end_time": "2021-04-19T18:55:06.368177Z",
246 | "start_time": "2021-04-19T18:55:06.353103Z"
247 | }
248 | },
249 | "source": [
250 | "pricing.info()"
251 | ],
252 | "outputs": []
253 | },
254 | {
255 | "cell_type": "markdown",
256 | "metadata": {},
257 | "source": [
258 | "# Run Event Study"
259 | ]
260 | },
261 | {
262 | "cell_type": "markdown",
263 | "metadata": {},
264 | "source": [
265 | "## Configuration"
266 | ]
267 | },
268 | {
269 | "cell_type": "markdown",
270 | "metadata": {
271 | "collapsed": true
272 | },
273 | "source": [
274 | "Before running Alphalens beware of some important options: "
275 | ]
276 | },
277 | {
278 | "cell_type": "code",
279 | "execution_count": 11,
280 | "metadata": {
281 | "ExecuteTime": {
282 | "end_time": "2021-04-19T18:55:06.376011Z",
283 | "start_time": "2021-04-19T18:55:06.369474Z"
284 | }
285 | },
286 | "source": [
287 | "# we don't want any filtering to be done\n",
288 | "\n",
289 | "filter_zscore = None"
290 | ],
291 | "outputs": []
292 | },
293 | {
294 | "cell_type": "code",
295 | "execution_count": 12,
296 | "metadata": {
297 | "ExecuteTime": {
298 | "end_time": "2021-04-19T18:55:06.383869Z",
299 | "start_time": "2021-04-19T18:55:06.377094Z"
300 | }
301 | },
302 | "source": [
303 | "# We want to have only one bin/quantile. So we can either use quantiles=1 or bins=1\n",
304 | "\n",
305 | "quantiles = None\n",
306 | "bins = 1\n",
307 | "\n",
308 | "# Beware that in pandas versions below 0.20.0 there were few bugs in panda.qcut and pandas.cut\n",
309 | "# that resulted in ValueError exception to be thrown when identical values were present in the\n",
310 | "# dataframe and 1 quantile/bin was selected.\n",
311 | "# As a workaroung use the bins custom range option that include all your values. E.g.\n",
312 | "\n",
313 | "quantiles = None\n",
314 | "bins = [-1000000, 1000000]"
315 | ],
316 | "outputs": []
317 | },
318 | {
319 | "cell_type": "code",
320 | "execution_count": 13,
321 | "metadata": {
322 | "ExecuteTime": {
323 | "end_time": "2021-04-19T18:55:06.392541Z",
324 | "start_time": "2021-04-19T18:55:06.384930Z"
325 | }
326 | },
327 | "source": [
328 | "# You don't have to directly set 'long_short' option when running alphalens.tears.create_event_study_tear_sheet\n",
329 | "# But in case you are making use of other Alphalens functions make sure to set 'long_short=False'\n",
330 | "# if you set 'long_short=True' Alphalens will perform forward return demeaning and that makes sense only\n",
331 | "# in a dollar neutral portfolio. With an event style signal you cannot usually create a dollar neutral\n",
332 | "# long/short portfolio\n",
333 | "\n",
334 | "long_short = False"
335 | ],
336 | "outputs": []
337 | },
338 | {
339 | "cell_type": "markdown",
340 | "metadata": {},
341 | "source": [
342 | "## Get Alphalens Input"
343 | ]
344 | },
345 | {
346 | "cell_type": "code",
347 | "execution_count": 14,
348 | "metadata": {
349 | "ExecuteTime": {
350 | "end_time": "2021-04-19T18:55:06.605891Z",
351 | "start_time": "2021-04-19T18:55:06.394261Z"
352 | }
353 | },
354 | "source": [
355 | "factor_data = alphalens.utils.get_clean_factor_and_forward_returns(events,\n",
356 | " pricing,\n",
357 | " quantiles=None,\n",
358 | " bins=1,\n",
359 | " periods=(\n",
360 | " 1, 2, 3, 4, 5, 6, 10),\n",
361 | " filter_zscore=None)"
362 | ],
363 | "outputs": []
364 | },
365 | {
366 | "cell_type": "markdown",
367 | "metadata": {},
368 | "source": [
369 | "## Run Event Tearsheet"
370 | ]
371 | },
372 | {
373 | "cell_type": "code",
374 | "execution_count": 15,
375 | "metadata": {
376 | "ExecuteTime": {
377 | "end_time": "2021-04-19T18:55:09.176443Z",
378 | "start_time": "2021-04-19T18:55:06.606958Z"
379 | },
380 | "scrolled": false
381 | },
382 | "source": [
383 | "alphalens.tears.create_event_study_tear_sheet(\n",
384 | " factor_data, pricing, avgretplot=(5, 10))"
385 | ],
386 | "outputs": []
387 | },
388 | {
389 | "cell_type": "markdown",
390 | "metadata": {},
391 | "source": [
392 | "## Short Signal Analysis"
393 | ]
394 | },
395 | {
396 | "cell_type": "markdown",
397 | "metadata": {},
398 | "source": [
399 | "If we wanted to analyze the performance of short signal, we only had to switch from positive to negative event values"
400 | ]
401 | },
402 | {
403 | "cell_type": "code",
404 | "execution_count": 16,
405 | "metadata": {
406 | "ExecuteTime": {
407 | "end_time": "2021-04-19T18:55:09.179414Z",
408 | "start_time": "2021-04-19T18:55:09.177456Z"
409 | }
410 | },
411 | "source": [
412 | "events = -events"
413 | ],
414 | "outputs": []
415 | },
416 | {
417 | "cell_type": "code",
418 | "execution_count": 17,
419 | "metadata": {
420 | "ExecuteTime": {
421 | "end_time": "2021-04-19T18:55:09.375636Z",
422 | "start_time": "2021-04-19T18:55:09.180310Z"
423 | }
424 | },
425 | "source": [
426 | "factor_data = alphalens.utils.get_clean_factor_and_forward_returns(events,\n",
427 | " pricing,\n",
428 | " quantiles=None,\n",
429 | " bins=1,\n",
430 | " periods=(\n",
431 | " 1, 2, 3, 4, 5, 6, 10),\n",
432 | " filter_zscore=None)"
433 | ],
434 | "outputs": []
435 | },
436 | {
437 | "cell_type": "code",
438 | "execution_count": 18,
439 | "metadata": {
440 | "ExecuteTime": {
441 | "end_time": "2021-04-19T18:55:11.822514Z",
442 | "start_time": "2021-04-19T18:55:09.376640Z"
443 | }
444 | },
445 | "source": [
446 | "alphalens.tears.create_event_study_tear_sheet(\n",
447 | " factor_data, pricing, avgretplot=(5, 10))"
448 | ],
449 | "outputs": []
450 | }
451 | ],
452 | "metadata": {
453 | "kernelspec": {
454 | "display_name": "Python 3",
455 | "language": "python",
456 | "name": "python3"
457 | },
458 | "language_info": {
459 | "codemirror_mode": {
460 | "name": "ipython",
461 | "version": 3
462 | },
463 | "file_extension": ".py",
464 | "mimetype": "text/x-python",
465 | "name": "python",
466 | "nbconvert_exporter": "python",
467 | "pygments_lexer": "ipython3",
468 | "version": "3.8.8"
469 | },
470 | "toc": {
471 | "base_numbering": 1,
472 | "nav_menu": {},
473 | "number_sections": true,
474 | "sideBar": true,
475 | "skip_h1_title": false,
476 | "title_cell": "Table of Contents",
477 | "title_sidebar": "Contents",
478 | "toc_cell": false,
479 | "toc_position": {},
480 | "toc_section_display": true,
481 | "toc_window_display": true
482 | }
483 | },
484 | "nbformat": 4,
485 | "nbformat_minor": 1
486 | }
487 |
--------------------------------------------------------------------------------
/docs/source/notebooks/intraday_factor.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Intraday Factor"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "In this notebook we use Alphalens to analyse the performance of an intraday factor, which is computed daily but the stocks are bought at marker open and sold at market close with no overnight positions."
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "# Imports & Settings"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 2,
27 | "metadata": {
28 | "ExecuteTime": {
29 | "end_time": "2021-04-19T18:51:14.070853Z",
30 | "start_time": "2021-04-19T18:51:14.066776Z"
31 | },
32 | "pycharm": {
33 | "name": "#%%\n"
34 | }
35 | },
36 | "source": [
37 | "import warnings\n",
38 | "warnings.filterwarnings('ignore')"
39 | ],
40 | "outputs": []
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": 3,
45 | "metadata": {
46 | "ExecuteTime": {
47 | "end_time": "2021-04-19T18:51:14.085970Z",
48 | "start_time": "2021-04-19T18:51:14.075670Z"
49 | }
50 | },
51 | "source": [
52 | "import alphalens\n",
53 | "import pandas as pd"
54 | ],
55 | "outputs": []
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": 4,
60 | "metadata": {
61 | "ExecuteTime": {
62 | "end_time": "2021-04-19T18:51:14.095404Z",
63 | "start_time": "2021-04-19T18:51:14.087648Z"
64 | }
65 | },
66 | "source": [
67 | "%matplotlib inline"
68 | ],
69 | "outputs": []
70 | },
71 | {
72 | "cell_type": "markdown",
73 | "metadata": {},
74 | "source": [
75 | "# Loading Data"
76 | ]
77 | },
78 | {
79 | "cell_type": "markdown",
80 | "metadata": {},
81 | "source": [
82 | "Below is a simple mapping of tickers to sectors for a small universe of large cap stocks."
83 | ]
84 | },
85 | {
86 | "cell_type": "code",
87 | "execution_count": 5,
88 | "metadata": {
89 | "ExecuteTime": {
90 | "end_time": "2021-04-19T18:51:14.105582Z",
91 | "start_time": "2021-04-19T18:51:14.096816Z"
92 | }
93 | },
94 | "source": [
95 | "sector_names = {\n",
96 | " 0 : \"information_technology\",\n",
97 | " 1 : \"financials\",\n",
98 | " 2 : \"health_care\",\n",
99 | " 3 : \"industrials\",\n",
100 | " 4 : \"utilities\", \n",
101 | " 5 : \"real_estate\", \n",
102 | " 6 : \"materials\", \n",
103 | " 7 : \"telecommunication_services\", \n",
104 | " 8 : \"consumer_staples\", \n",
105 | " 9 : \"consumer_discretionary\", \n",
106 | " 10 : \"energy\" \n",
107 | "}\n",
108 | "\n",
109 | "ticker_sector = {\n",
110 | " \"ACN\" : 0, \"ATVI\" : 0, \"ADBE\" : 0, \"AMD\" : 0, \"AKAM\" : 0, \"ADS\" : 0, \"GOOGL\" : 0, \"GOOG\" : 0, \n",
111 | " \"APH\" : 0, \"ADI\" : 0, \"ANSS\" : 0, \"AAPL\" : 0, \"AMAT\" : 0, \"ADSK\" : 0, \"ADP\" : 0, \"AVGO\" : 0,\n",
112 | " \"AMG\" : 1, \"AFL\" : 1, \"ALL\" : 1, \"AXP\" : 1, \"AIG\" : 1, \"AMP\" : 1, \"AON\" : 1, \"AJG\" : 1, \"AIZ\" : 1, \"BAC\" : 1,\n",
113 | " \"BK\" : 1, \"BBT\" : 1, \"BRK.B\" : 1, \"BLK\" : 1, \"HRB\" : 1, \"BHF\" : 1, \"COF\" : 1, \"CBOE\" : 1, \"SCHW\" : 1, \"CB\" : 1,\n",
114 | " \"ABT\" : 2, \"ABBV\" : 2, \"AET\" : 2, \"A\" : 2, \"ALXN\" : 2, \"ALGN\" : 2, \"AGN\" : 2, \"ABC\" : 2, \"AMGN\" : 2, \"ANTM\" : 2,\n",
115 | " \"BCR\" : 2, \"BAX\" : 2, \"BDX\" : 2, \"BIIB\" : 2, \"BSX\" : 2, \"BMY\" : 2, \"CAH\" : 2, \"CELG\" : 2, \"CNC\" : 2, \"CERN\" : 2,\n",
116 | " \"MMM\" : 3, \"AYI\" : 3, \"ALK\" : 3, \"ALLE\" : 3, \"AAL\" : 3, \"AME\" : 3, \"AOS\" : 3, \"ARNC\" : 3, \"BA\" : 3, \"CHRW\" : 3,\n",
117 | " \"CAT\" : 3, \"CTAS\" : 3, \"CSX\" : 3, \"CMI\" : 3, \"DE\" : 3, \"DAL\" : 3, \"DOV\" : 3, \"ETN\" : 3, \"EMR\" : 3, \"EFX\" : 3,\n",
118 | " \"AES\" : 4, \"LNT\" : 4, \"AEE\" : 4, \"AEP\" : 4, \"AWK\" : 4, \"CNP\" : 4, \"CMS\" : 4, \"ED\" : 4, \"D\" : 4, \"DTE\" : 4,\n",
119 | " \"DUK\" : 4, \"EIX\" : 4, \"ETR\" : 4, \"ES\" : 4, \"EXC\" : 4, \"FE\" : 4, \"NEE\" : 4, \"NI\" : 4, \"NRG\" : 4, \"PCG\" : 4,\n",
120 | " \"ARE\" : 5, \"AMT\" : 5, \"AIV\" : 5, \"AVB\" : 5, \"BXP\" : 5, \"CBG\" : 5, \"CCI\" : 5, \"DLR\" : 5, \"DRE\" : 5,\n",
121 | " \"EQIX\" : 5, \"EQR\" : 5, \"ESS\" : 5, \"EXR\" : 5, \"FRT\" : 5, \"GGP\" : 5, \"HCP\" : 5, \"HST\" : 5, \"IRM\" : 5, \"KIM\" : 5,\n",
122 | " \"APD\" : 6, \"ALB\" : 6, \"AVY\" : 6, \"BLL\" : 6, \"CF\" : 6, \"DWDP\" : 6, \"EMN\" : 6, \"ECL\" : 6, \"FMC\" : 6, \"FCX\" : 6,\n",
123 | " \"IP\" : 6, \"IFF\" : 6, \"LYB\" : 6, \"MLM\" : 6, \"MON\" : 6, \"MOS\" : 6, \"NEM\" : 6, \"NUE\" : 6, \"PKG\" : 6, \"PPG\" : 6,\n",
124 | " \"T\" : 7, \"CTL\" : 7, \"VZ\" : 7, \n",
125 | " \"MO\" : 8, \"ADM\" : 8, \"BF.B\" : 8, \"CPB\" : 8, \"CHD\" : 8, \"CLX\" : 8, \"KO\" : 8, \"CL\" : 8, \"CAG\" : 8,\n",
126 | " \"STZ\" : 8, \"COST\" : 8, \"COTY\" : 8, \"CVS\" : 8, \"DPS\" : 8, \"EL\" : 8, \"GIS\" : 8, \"HSY\" : 8, \"HRL\" : 8,\n",
127 | " \"AAP\" : 9, \"AMZN\" : 9, \"APTV\" : 9, \"AZO\" : 9, \"BBY\" : 9, \"BWA\" : 9, \"KMX\" : 9, \"CCL\" : 9, \n",
128 | " \"APC\" : 10, \"ANDV\" : 10, \"APA\" : 10, \"BHGE\" : 10, \"COG\" : 10, \"CHK\" : 10, \"CVX\" : 10, \"XEC\" : 10, \"CXO\" : 10,\n",
129 | " \"COP\" : 10, \"DVN\" : 10, \"EOG\" : 10, \"EQT\" : 10, \"XOM\" : 10, \"HAL\" : 10, \"HP\" : 10, \"HES\" : 10, \"KMI\" : 10\n",
130 | "}"
131 | ],
132 | "outputs": []
133 | },
134 | {
135 | "cell_type": "markdown",
136 | "metadata": {},
137 | "source": [
138 | "## YFinance Download"
139 | ]
140 | },
141 | {
142 | "cell_type": "code",
143 | "execution_count": 6,
144 | "metadata": {
145 | "ExecuteTime": {
146 | "end_time": "2021-04-19T18:51:21.241423Z",
147 | "start_time": "2021-04-19T18:51:14.107318Z"
148 | }
149 | },
150 | "source": [
151 | "import yfinance as yf\n",
152 | "import pandas_datareader.data as web\n",
153 | "yf.pdr_override()\n",
154 | "\n",
155 | "tickers = list(ticker_sector.keys())\n",
156 | "df = web.get_data_yahoo(tickers, start='2017-01-01', end='2017-06-01')\n",
157 | "df.index = pd.to_datetime(df.index, utc=True)"
158 | ],
159 | "outputs": []
160 | },
161 | {
162 | "cell_type": "code",
163 | "execution_count": 7,
164 | "metadata": {
165 | "ExecuteTime": {
166 | "end_time": "2021-04-19T18:51:21.253717Z",
167 | "start_time": "2021-04-19T18:51:21.242284Z"
168 | }
169 | },
170 | "source": [
171 | "df = df.stack()\n",
172 | "df.index.names = ['date', 'asset']\n",
173 | "df.info()"
174 | ],
175 | "outputs": []
176 | },
177 | {
178 | "cell_type": "markdown",
179 | "metadata": {},
180 | "source": [
181 | "# Factor Computation"
182 | ]
183 | },
184 | {
185 | "cell_type": "markdown",
186 | "metadata": {},
187 | "source": [
188 | "Our example factor ranks the stocks based on their overnight price gap (yesterday close to today open price). We'll see if the factor has some alpha or if it is pure noise."
189 | ]
190 | },
191 | {
192 | "cell_type": "code",
193 | "execution_count": 8,
194 | "metadata": {
195 | "ExecuteTime": {
196 | "end_time": "2021-04-19T18:51:21.259633Z",
197 | "start_time": "2021-04-19T18:51:21.254738Z"
198 | }
199 | },
200 | "source": [
201 | "available_tickers = df.index.unique('asset')\n",
202 | "ticker_sector = {k: v for k, v in ticker_sector.items() if k in available_tickers}"
203 | ],
204 | "outputs": []
205 | },
206 | {
207 | "cell_type": "code",
208 | "execution_count": 9,
209 | "metadata": {
210 | "ExecuteTime": {
211 | "end_time": "2021-04-19T18:51:21.272024Z",
212 | "start_time": "2021-04-19T18:51:21.261385Z"
213 | }
214 | },
215 | "source": [
216 | "today_open = df.loc[:, 'Open'].unstack('asset')\n",
217 | "today_close = df.loc[:, 'Close'].unstack('asset')\n",
218 | "yesterday_close = today_close.shift(1)"
219 | ],
220 | "outputs": []
221 | },
222 | {
223 | "cell_type": "code",
224 | "execution_count": 10,
225 | "metadata": {
226 | "ExecuteTime": {
227 | "end_time": "2021-04-19T18:51:21.276157Z",
228 | "start_time": "2021-04-19T18:51:21.273553Z"
229 | }
230 | },
231 | "source": [
232 | "factor = (today_open - yesterday_close) / yesterday_close"
233 | ],
234 | "outputs": []
235 | },
236 | {
237 | "cell_type": "markdown",
238 | "metadata": {},
239 | "source": [
240 | "The pricing data passed to alphalens should contain the entry price for the assets so it must reflect the next available price after a factor value was observed at a given timestamp. Those prices must not be used in the calculation of the factor values for that time. Always double check to ensure you are not introducing lookahead bias to your study.\n",
241 | "\n",
242 | "The pricing data must also contain the exit price for the assets, for period 1 the price at the next timestamp will be used, for period 2 the price after 2 timestamps will be used and so on.\n",
243 | "\n",
244 | "There are no restrinctions/assumptions on the time frequencies a factor should be computed at and neither on the specific time a factor should be traded (trading at the open vs trading at the close vs intraday trading), it is only required that factor and price DataFrames are properly aligned given the rules above.\n",
245 | "\n",
246 | "In our example, we want to buy the stocks at marker open, so the need the open price at the exact timestamps as the factor valules, and we want to sell the stocks at market close so we will add the close prices too, which will be used to compute period 1 forward returns as they appear just after the factor values timestamps. The returns computed by Alphalens will therefore be based on the difference between open to close assets prices.\n",
247 | "\n",
248 | "If we had other prices we could compute other period returns, for example one hour after market open and 2 hours and so on. We could have added those prices right after the open prices and instruct Alphalens to compute 1, 2, 3... periods too and not only period 1 like in this example."
249 | ]
250 | },
251 | {
252 | "cell_type": "markdown",
253 | "metadata": {},
254 | "source": [
255 | "## Data Formatting"
256 | ]
257 | },
258 | {
259 | "cell_type": "markdown",
260 | "metadata": {},
261 | "source": [
262 | "### Time Adjustments"
263 | ]
264 | },
265 | {
266 | "cell_type": "code",
267 | "execution_count": 11,
268 | "metadata": {
269 | "ExecuteTime": {
270 | "end_time": "2021-04-19T18:51:21.285341Z",
271 | "start_time": "2021-04-19T18:51:21.277049Z"
272 | }
273 | },
274 | "source": [
275 | "# Fix time as Yahoo doesn't set it\n",
276 | "today_open.index += pd.Timedelta('9h30m')\n",
277 | "today_close.index += pd.Timedelta('16h')\n",
278 | "# pricing will contain both open and close\n",
279 | "pricing = pd.concat([today_open, today_close]).sort_index()"
280 | ],
281 | "outputs": []
282 | },
283 | {
284 | "cell_type": "code",
285 | "execution_count": 12,
286 | "metadata": {
287 | "ExecuteTime": {
288 | "end_time": "2021-04-19T18:51:21.314071Z",
289 | "start_time": "2021-04-19T18:51:21.286210Z"
290 | }
291 | },
292 | "source": [
293 | "pricing.head()"
294 | ],
295 | "outputs": []
296 | },
297 | {
298 | "cell_type": "markdown",
299 | "metadata": {},
300 | "source": [
301 | "### Align Factor & Price"
302 | ]
303 | },
304 | {
305 | "cell_type": "code",
306 | "execution_count": 13,
307 | "metadata": {
308 | "ExecuteTime": {
309 | "end_time": "2021-04-19T18:51:21.320284Z",
310 | "start_time": "2021-04-19T18:51:21.315441Z"
311 | }
312 | },
313 | "source": [
314 | "# Align factor to open price\n",
315 | "factor.index += pd.Timedelta('9h30m')\n",
316 | "factor = factor.stack()\n",
317 | "factor.index = factor.index.set_names(['date', 'asset'])"
318 | ],
319 | "outputs": []
320 | },
321 | {
322 | "cell_type": "code",
323 | "execution_count": 14,
324 | "metadata": {
325 | "ExecuteTime": {
326 | "end_time": "2021-04-19T18:51:21.352176Z",
327 | "start_time": "2021-04-19T18:51:21.321710Z"
328 | }
329 | },
330 | "source": [
331 | "factor.unstack().head()"
332 | ],
333 | "outputs": []
334 | },
335 | {
336 | "cell_type": "markdown",
337 | "metadata": {},
338 | "source": [
339 | "# Run Alphalens"
340 | ]
341 | },
342 | {
343 | "cell_type": "markdown",
344 | "metadata": {},
345 | "source": [
346 | "Period 1 will show returns from market open to market close while period 2 will show returns from today open to tomorrow open"
347 | ]
348 | },
349 | {
350 | "cell_type": "markdown",
351 | "metadata": {},
352 | "source": [
353 | "## Get Alphalens Input"
354 | ]
355 | },
356 | {
357 | "cell_type": "code",
358 | "execution_count": 15,
359 | "metadata": {
360 | "ExecuteTime": {
361 | "end_time": "2021-04-19T18:51:21.735159Z",
362 | "start_time": "2021-04-19T18:51:21.353187Z"
363 | }
364 | },
365 | "source": [
366 | "non_predictive_factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor, \n",
367 | " pricing, \n",
368 | " periods=(1,2),\n",
369 | " groupby=ticker_sector,\n",
370 | " groupby_labels=sector_names)"
371 | ],
372 | "outputs": []
373 | },
374 | {
375 | "cell_type": "markdown",
376 | "metadata": {},
377 | "source": [
378 | "## Returns Tear Sheet"
379 | ]
380 | },
381 | {
382 | "cell_type": "code",
383 | "execution_count": 16,
384 | "metadata": {
385 | "ExecuteTime": {
386 | "end_time": "2021-04-19T18:51:23.586574Z",
387 | "start_time": "2021-04-19T18:51:21.736402Z"
388 | },
389 | "scrolled": false
390 | },
391 | "source": [
392 | "alphalens.tears.create_returns_tear_sheet(non_predictive_factor_data)"
393 | ],
394 | "outputs": []
395 | },
396 | {
397 | "cell_type": "code",
398 | "execution_count": 17,
399 | "metadata": {
400 | "ExecuteTime": {
401 | "end_time": "2021-04-19T18:51:25.407194Z",
402 | "start_time": "2021-04-19T18:51:23.587527Z"
403 | },
404 | "scrolled": false
405 | },
406 | "source": [
407 | "alphalens.tears.create_event_returns_tear_sheet(non_predictive_factor_data, pricing);"
408 | ],
409 | "outputs": []
410 | }
411 | ],
412 | "metadata": {
413 | "kernelspec": {
414 | "display_name": "Python 3",
415 | "language": "python",
416 | "name": "python3"
417 | },
418 | "language_info": {
419 | "codemirror_mode": {
420 | "name": "ipython",
421 | "version": 3
422 | },
423 | "file_extension": ".py",
424 | "mimetype": "text/x-python",
425 | "name": "python",
426 | "nbconvert_exporter": "python",
427 | "pygments_lexer": "ipython3",
428 | "version": "3.8.8"
429 | },
430 | "toc": {
431 | "base_numbering": 1,
432 | "nav_menu": {},
433 | "number_sections": true,
434 | "sideBar": true,
435 | "skip_h1_title": false,
436 | "title_cell": "Table of Contents",
437 | "title_sidebar": "Contents",
438 | "toc_cell": false,
439 | "toc_position": {},
440 | "toc_section_display": true,
441 | "toc_window_display": false
442 | }
443 | },
444 | "nbformat": 4,
445 | "nbformat_minor": 1
446 | }
447 |
--------------------------------------------------------------------------------
/docs/source/notebooks/pyfolio_integration.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Pyfolio Integration"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "Alphalens can simulate the performance of a portfolio where the factor values are use to weight stocks. Once the portfolio is built, it can be analyzed by Pyfolio. For details on how this portfolio is built see:\n",
15 | "- alphalens.performance.factor_returns\n",
16 | "- alphalens.performance.cumulative_returns \n",
17 | "- alphalens.performance.create_pyfolio_input"
18 | ]
19 | },
20 | {
21 | "cell_type": "markdown",
22 | "metadata": {},
23 | "source": [
24 | "## Imports & Settings"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": 1,
30 | "metadata": {
31 | "ExecuteTime": {
32 | "end_time": "2021-04-19T18:59:58.095199Z",
33 | "start_time": "2021-04-19T18:59:58.093321Z"
34 | }
35 | },
36 | "source": [
37 | "import warnings\n",
38 | "warnings.filterwarnings('ignore')"
39 | ],
40 | "outputs": []
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": 2,
45 | "metadata": {
46 | "ExecuteTime": {
47 | "end_time": "2021-04-19T18:59:58.977581Z",
48 | "start_time": "2021-04-19T18:59:58.097957Z"
49 | },
50 | "scrolled": true
51 | },
52 | "source": [
53 | "import alphalens\n",
54 | "import pyfolio\n",
55 | "import pandas as pd"
56 | ],
57 | "outputs": []
58 | },
59 | {
60 | "cell_type": "code",
61 | "execution_count": 3,
62 | "metadata": {
63 | "ExecuteTime": {
64 | "end_time": "2021-04-19T18:59:58.981286Z",
65 | "start_time": "2021-04-19T18:59:58.978628Z"
66 | }
67 | },
68 | "source": [
69 | "%matplotlib inline"
70 | ],
71 | "outputs": []
72 | },
73 | {
74 | "cell_type": "markdown",
75 | "metadata": {},
76 | "source": [
77 | "## Load Data"
78 | ]
79 | },
80 | {
81 | "cell_type": "markdown",
82 | "metadata": {},
83 | "source": [
84 | "First load some stocks data"
85 | ]
86 | },
87 | {
88 | "cell_type": "code",
89 | "execution_count": 4,
90 | "metadata": {
91 | "ExecuteTime": {
92 | "end_time": "2021-04-19T18:59:59.001541Z",
93 | "start_time": "2021-04-19T18:59:58.982180Z"
94 | }
95 | },
96 | "source": [
97 | "tickers = [ 'ACN', 'ATVI', 'ADBE', 'AMD', 'AKAM', 'ADS', 'GOOGL', 'GOOG', 'APH', 'ADI', 'ANSS', 'AAPL',\n",
98 | "'AVGO', 'CA', 'CDNS', 'CSCO', 'CTXS', 'CTSH', 'GLW', 'CSRA', 'DXC', 'EBAY', 'EA', 'FFIV', 'FB',\n",
99 | "'FLIR', 'IT', 'GPN', 'HRS', 'HPE', 'HPQ', 'INTC', 'IBM', 'INTU', 'JNPR', 'KLAC', 'LRCX', 'MA', 'MCHP',\n",
100 | "'MSFT', 'MSI', 'NTAP', 'NFLX', 'NVDA', 'ORCL', 'PAYX', 'PYPL', 'QRVO', 'QCOM', 'RHT', 'CRM', 'STX',\n",
101 | "'AMG', 'AFL', 'ALL', 'AXP', 'AIG', 'AMP', 'AON', 'AJG', 'AIZ', 'BAC', 'BK', 'BBT', 'BRK.B', 'BLK', 'HRB',\n",
102 | "'BHF', 'COF', 'CBOE', 'SCHW', 'CB', 'CINF', 'C', 'CFG', 'CME', 'CMA', 'DFS', 'ETFC', 'RE', 'FITB', 'BEN',\n",
103 | "'GS', 'HIG', 'HBAN', 'ICE', 'IVZ', 'JPM', 'KEY', 'LUK', 'LNC', 'L', 'MTB', 'MMC', 'MET', 'MCO', 'MS',\n",
104 | "'NDAQ', 'NAVI', 'NTRS', 'PBCT', 'PNC', 'PFG', 'PGR', 'PRU', 'RJF', 'RF', 'SPGI', 'STT', 'STI', 'SYF', 'TROW',\n",
105 | "'ABT', 'ABBV', 'AET', 'A', 'ALXN', 'ALGN', 'AGN', 'ABC', 'AMGN', 'ANTM', 'BCR', 'BAX', 'BDX', 'BIIB', 'BSX',\n",
106 | "'BMY', 'CAH', 'CELG', 'CNC', 'CERN', 'CI', 'COO', 'DHR', 'DVA', 'XRAY', 'EW', 'EVHC', 'ESRX', 'GILD', 'HCA',\n",
107 | "'HSIC', 'HOLX', 'HUM', 'IDXX', 'ILMN', 'INCY', 'ISRG', 'IQV', 'JNJ', 'LH', 'LLY', 'MCK', 'MDT', 'MRK', 'MTD',\n",
108 | "'MYL', 'PDCO', 'PKI', 'PRGO', 'PFE', 'DGX', 'REGN', 'RMD', 'SYK', 'TMO', 'UNH', 'UHS', 'VAR', 'VRTX', 'WAT',\n",
109 | "'MMM', 'AYI', 'ALK', 'ALLE', 'AAL', 'AME', 'AOS', 'ARNC', 'BA', 'CHRW', 'CAT', 'CTAS', 'CSX', 'CMI', 'DE',\n",
110 | "'DAL', 'DOV', 'ETN', 'EMR', 'EFX', 'EXPD', 'FAST', 'FDX', 'FLS', 'FLR', 'FTV', 'FBHS', 'GD', 'GE', 'GWW',\n",
111 | "'HON', 'INFO', 'ITW', 'IR', 'JEC', 'JBHT', 'JCI', 'KSU', 'LLL', 'LMT', 'MAS', 'NLSN', 'NSC', 'NOC', 'PCAR',\n",
112 | "'PH', 'PNR', 'PWR', 'RTN', 'RSG', 'RHI', 'ROK', 'COL', 'ROP', 'LUV', 'SRCL', 'TXT', 'TDG', 'UNP', 'UAL',\n",
113 | "'AES', 'LNT', 'AEE', 'AEP', 'AWK', 'CNP', 'CMS', 'ED', 'D', 'DTE', 'DUK', 'EIX', 'ETR', 'ES', 'EXC']"
114 | ],
115 | "outputs": []
116 | },
117 | {
118 | "cell_type": "markdown",
119 | "metadata": {},
120 | "source": [
121 | "### YFinance Download"
122 | ]
123 | },
124 | {
125 | "cell_type": "code",
126 | "execution_count": 5,
127 | "metadata": {
128 | "ExecuteTime": {
129 | "end_time": "2021-04-19T19:00:07.983393Z",
130 | "start_time": "2021-04-19T18:59:59.002557Z"
131 | }
132 | },
133 | "source": [
134 | "import yfinance as yf\n",
135 | "import pandas_datareader.data as web\n",
136 | "yf.pdr_override()\n",
137 | "\n",
138 | "df = web.get_data_yahoo(tickers, start='2015-01-01', end='2017-01-01')\n",
139 | "df.index = pd.to_datetime(df.index, utc=True)"
140 | ],
141 | "outputs": []
142 | },
143 | {
144 | "cell_type": "markdown",
145 | "metadata": {},
146 | "source": [
147 | "### Data Formatting"
148 | ]
149 | },
150 | {
151 | "cell_type": "code",
152 | "execution_count": 6,
153 | "metadata": {
154 | "ExecuteTime": {
155 | "end_time": "2021-04-19T19:00:08.021347Z",
156 | "start_time": "2021-04-19T19:00:07.984972Z"
157 | }
158 | },
159 | "source": [
160 | "df = df.stack()\n",
161 | "df.index.names = ['date', 'asset']\n",
162 | "df.info()"
163 | ],
164 | "outputs": []
165 | },
166 | {
167 | "cell_type": "markdown",
168 | "metadata": {},
169 | "source": [
170 | "## Compute Factor"
171 | ]
172 | },
173 | {
174 | "cell_type": "markdown",
175 | "metadata": {},
176 | "source": [
177 | "We'll compute a simple mean reversion factor looking at recent stocks performance: stocks that performed well in the last 5 days will have high rank and vice versa."
178 | ]
179 | },
180 | {
181 | "cell_type": "code",
182 | "execution_count": 7,
183 | "metadata": {
184 | "ExecuteTime": {
185 | "end_time": "2021-04-19T19:00:08.039448Z",
186 | "start_time": "2021-04-19T19:00:08.022458Z"
187 | }
188 | },
189 | "source": [
190 | "factor = df.loc[:,'Open'].unstack('asset')\n",
191 | "factor = -factor.pct_change(5)\n",
192 | "factor = factor.stack()"
193 | ],
194 | "outputs": []
195 | },
196 | {
197 | "cell_type": "markdown",
198 | "metadata": {},
199 | "source": [
200 | "The pricing data passed to alphalens should contain the entry price for the assets so it must reflect the next available price after a factor value was observed at a given timestamp. Those prices must not be used in the calculation of the factor values for that time. Always double check to ensure you are not introducing lookahead bias to your study.\n",
201 | "\n",
202 | "The pricing data must also contain the exit price for the assets, for period 1 the price at the next timestamp will be used, for period 2 the price after 2 timestats will be used and so on.\n",
203 | "\n",
204 | "There are no restrinctions/assumptions on the time frequencies a factor should be computed at and neither on the specific time a factor should be traded (trading at the open vs trading at the close vs intraday trading), it is only required that factor and price DataFrames are properly aligned given the rules above.\n",
205 | "\n",
206 | "In our example, before the trading starts every day, we observe yesterday factor values. The price we pass to alphalens is the next available price after that factor observation: the daily open price that will be used as assets entry price. Also, we are not adding additional prices so the assets exit price will be the following days open prices (how many days depends on 'periods' argument). The retuns computed by Alphalens will therefore based on assets open prices."
207 | ]
208 | },
209 | {
210 | "cell_type": "code",
211 | "execution_count": 8,
212 | "metadata": {
213 | "ExecuteTime": {
214 | "end_time": "2021-04-19T19:00:08.053904Z",
215 | "start_time": "2021-04-19T19:00:08.040394Z"
216 | }
217 | },
218 | "source": [
219 | "pricing = df.loc[:,'Open'].unstack('asset').iloc[1:]"
220 | ],
221 | "outputs": []
222 | },
223 | {
224 | "cell_type": "markdown",
225 | "metadata": {},
226 | "source": [
227 | "## Run Alphalens Analysis"
228 | ]
229 | },
230 | {
231 | "cell_type": "markdown",
232 | "metadata": {},
233 | "source": [
234 | "### Get Input Data"
235 | ]
236 | },
237 | {
238 | "cell_type": "code",
239 | "execution_count": 9,
240 | "metadata": {
241 | "ExecuteTime": {
242 | "end_time": "2021-04-19T19:00:09.358420Z",
243 | "start_time": "2021-04-19T19:00:08.054804Z"
244 | }
245 | },
246 | "source": [
247 | "factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor,\n",
248 | " pricing,\n",
249 | " periods=(1, 3, 5),\n",
250 | " quantiles=5,\n",
251 | " bins=None)"
252 | ],
253 | "outputs": []
254 | },
255 | {
256 | "cell_type": "markdown",
257 | "metadata": {},
258 | "source": [
259 | "### Summary Tear Sheet"
260 | ]
261 | },
262 | {
263 | "cell_type": "code",
264 | "execution_count": 10,
265 | "metadata": {
266 | "ExecuteTime": {
267 | "end_time": "2021-04-19T19:00:22.469819Z",
268 | "start_time": "2021-04-19T19:00:09.359418Z"
269 | },
270 | "scrolled": true
271 | },
272 | "source": [
273 | "alphalens.tears.create_summary_tear_sheet(factor_data)"
274 | ],
275 | "outputs": []
276 | },
277 | {
278 | "cell_type": "markdown",
279 | "metadata": {},
280 | "source": [
281 | "## Run Pyfolio Analysis"
282 | ]
283 | },
284 | {
285 | "cell_type": "markdown",
286 | "metadata": {},
287 | "source": [
288 | "### Get Input Data"
289 | ]
290 | },
291 | {
292 | "cell_type": "markdown",
293 | "metadata": {},
294 | "source": [
295 | "We can see in Alphalens analysis that quantiles 1 and 5 are the most predictive so we'll build a portfolio data using only those quantiles."
296 | ]
297 | },
298 | {
299 | "cell_type": "code",
300 | "execution_count": 11,
301 | "metadata": {
302 | "ExecuteTime": {
303 | "end_time": "2021-04-19T19:00:36.987486Z",
304 | "start_time": "2021-04-19T19:00:22.470808Z"
305 | }
306 | },
307 | "source": [
308 | "pf_returns, pf_positions, pf_benchmark = \\\n",
309 | " alphalens.performance.create_pyfolio_input(factor_data,\n",
310 | " period='1D',\n",
311 | " capital=100000,\n",
312 | " long_short=True,\n",
313 | " group_neutral=False,\n",
314 | " equal_weight=True,\n",
315 | " quantiles=[1,5],\n",
316 | " groups=None,\n",
317 | " benchmark_period='1D')"
318 | ],
319 | "outputs": []
320 | },
321 | {
322 | "cell_type": "markdown",
323 | "metadata": {},
324 | "source": [
325 | "### Pyfolio Tearsheet"
326 | ]
327 | },
328 | {
329 | "cell_type": "markdown",
330 | "metadata": {},
331 | "source": [
332 | "Now that we have prepared the data we can run Pyfolio functions"
333 | ]
334 | },
335 | {
336 | "cell_type": "code",
337 | "execution_count": 12,
338 | "metadata": {
339 | "ExecuteTime": {
340 | "end_time": "2021-04-19T19:00:42.663363Z",
341 | "start_time": "2021-04-19T19:00:36.988417Z"
342 | },
343 | "scrolled": true
344 | },
345 | "source": [
346 | "pyfolio.tears.create_full_tear_sheet(pf_returns,\n",
347 | " positions=pf_positions,\n",
348 | " benchmark_rets=pf_benchmark)"
349 | ],
350 | "outputs": []
351 | },
352 | {
353 | "cell_type": "markdown",
354 | "metadata": {},
355 | "source": [
356 | "## Subset Performance"
357 | ]
358 | },
359 | {
360 | "cell_type": "markdown",
361 | "metadata": {},
362 | "source": [
363 | "### Weekday Analysis"
364 | ]
365 | },
366 | {
367 | "cell_type": "markdown",
368 | "metadata": {},
369 | "source": [
370 | "Sometimes it might be useful to analyze subets of your factor data, for example it could be interesting to see the comparison of your factor in different days of the week. Below we'll see how to select and analyze factor data corresponding to Mondays, the positions will be held the for a period of 5 days"
371 | ]
372 | },
373 | {
374 | "cell_type": "code",
375 | "execution_count": 13,
376 | "metadata": {
377 | "ExecuteTime": {
378 | "end_time": "2021-04-19T19:00:42.675026Z",
379 | "start_time": "2021-04-19T19:00:42.664285Z"
380 | }
381 | },
382 | "source": [
383 | "monday_factor_data = factor_data[ factor_data.index.get_level_values('date').weekday == 0 ]"
384 | ],
385 | "outputs": []
386 | },
387 | {
388 | "cell_type": "code",
389 | "execution_count": 14,
390 | "metadata": {
391 | "ExecuteTime": {
392 | "end_time": "2021-04-19T19:00:45.541885Z",
393 | "start_time": "2021-04-19T19:00:42.675921Z"
394 | }
395 | },
396 | "source": [
397 | "pf_returns, pf_positions, pf_benchmark = \\\n",
398 | " alphalens.performance.create_pyfolio_input(monday_factor_data,\n",
399 | " period='5D',\n",
400 | " capital=100000,\n",
401 | " long_short=True,\n",
402 | " group_neutral=False,\n",
403 | " equal_weight=True,\n",
404 | " quantiles=[1,5],\n",
405 | " groups=None,\n",
406 | " benchmark_period='1D')"
407 | ],
408 | "outputs": []
409 | },
410 | {
411 | "cell_type": "markdown",
412 | "metadata": {},
413 | "source": [
414 | "### Pyfolio Tearsheet"
415 | ]
416 | },
417 | {
418 | "cell_type": "code",
419 | "execution_count": 15,
420 | "metadata": {
421 | "ExecuteTime": {
422 | "end_time": "2021-04-19T19:00:50.905988Z",
423 | "start_time": "2021-04-19T19:00:45.542816Z"
424 | },
425 | "scrolled": true
426 | },
427 | "source": [
428 | "pyfolio.tears.create_full_tear_sheet(pf_returns,\n",
429 | " positions=pf_positions,\n",
430 | " benchmark_rets=pf_benchmark)"
431 | ],
432 | "outputs": []
433 | }
434 | ],
435 | "metadata": {
436 | "kernelspec": {
437 | "display_name": "Python 3",
438 | "language": "python",
439 | "name": "python3"
440 | },
441 | "language_info": {
442 | "codemirror_mode": {
443 | "name": "ipython",
444 | "version": 3
445 | },
446 | "file_extension": ".py",
447 | "mimetype": "text/x-python",
448 | "name": "python",
449 | "nbconvert_exporter": "python",
450 | "pygments_lexer": "ipython3",
451 | "version": "3.8.8"
452 | },
453 | "toc": {
454 | "base_numbering": 1,
455 | "nav_menu": {},
456 | "number_sections": true,
457 | "sideBar": true,
458 | "skip_h1_title": false,
459 | "title_cell": "Table of Contents",
460 | "title_sidebar": "Contents",
461 | "toc_cell": false,
462 | "toc_position": {},
463 | "toc_section_display": true,
464 | "toc_window_display": true
465 | }
466 | },
467 | "nbformat": 4,
468 | "nbformat_minor": 1
469 | }
470 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "alphalens-reloaded"
3 | description = "Performance analysis of predictive (alpha) stock factors"
4 | requires-python = '>=3.10'
5 | dynamic = ["version"]
6 | readme = "README.md"
7 | authors = [
8 | { name = 'Quantopian Inc' },
9 | { email = 'pm@ml4trading.io' }
10 | ]
11 | maintainers = [
12 | { name = 'Stefan Jansen' },
13 | { email = 'pm@ml4trading.io' }
14 | ]
15 | license = { file = "LICENSE" }
16 |
17 | classifiers = [
18 | 'Development Status :: 5 - Production/Stable',
19 | 'License :: OSI Approved :: Apache Software License',
20 | 'Natural Language :: English',
21 | 'Programming Language :: Python',
22 | 'Programming Language :: Python :: 3.10',
23 | 'Programming Language :: Python :: 3.11',
24 | 'Programming Language :: Python :: 3.12',
25 | 'Programming Language :: Python :: 3.13',
26 | 'Operating System :: OS Independent',
27 | 'Intended Audience :: Science/Research',
28 | 'Topic :: Office/Business :: Financial :: Investment',
29 | 'Topic :: Scientific/Engineering :: Information Analysis',
30 | ]
31 |
32 | dependencies = [
33 | # following pandas
34 | "numpy>=1.23.5; python_version<'3.12'",
35 | "numpy>=1.26.0; python_version>='3.12'",
36 | "pandas >=1.5.0,<3.0",
37 | "matplotlib >=1.4.0",
38 | "scipy >=0.14.0",
39 | "seaborn >=0.6.0",
40 | "statsmodels >=0.6.1",
41 | "IPython >=3.2.3",
42 | "empyrical-reloaded>=0.5.7"
43 | ]
44 |
45 | [project.urls]
46 | homepage = 'https://ml4trading.io'
47 | repository = 'https://github.com/stefan-jansen/alphalens-reloaded'
48 | documentation = 'https://alphalens.ml4trading.io'
49 |
50 | [build-system]
51 | requires = [
52 | 'setuptools>=54.0.0',
53 | "setuptools_scm[toml]>=6.2",
54 | ]
55 |
56 | build-backend = 'setuptools.build_meta'
57 |
58 |
59 | [project.optional-dependencies]
60 | test = [
61 | "tox >=2.3.1",
62 | "coverage >=4.0.3",
63 | "coveralls ==3.0.1",
64 | "pytest >=6.2",
65 | 'pytest-xdist >=2.5.0',
66 | "pytest-cov >=2.12",
67 | "parameterized >=0.6.1",
68 | "flake8 >=3.9.1",
69 | "black",
70 | ]
71 | dev = [
72 | "flake8 >=3.9.1",
73 | "black",
74 | "pre-commit >=2.12.1",
75 | ]
76 | docs = [
77 | 'Cython',
78 | 'Sphinx >=1.3.2',
79 | 'numpydoc >=0.5.0',
80 | 'sphinx-autobuild >=0.6.0',
81 | 'pydata-sphinx-theme',
82 | 'sphinx-markdown-tables',
83 | "sphinx_copybutton",
84 | 'm2r2'
85 | ]
86 |
87 | [tool.setuptools]
88 | include-package-data = true
89 | zip-safe = false
90 |
91 | [tool.setuptools.packages.find]
92 | where = ['src']
93 | exclude = ['tests*']
94 |
95 | [tool.setuptools_scm]
96 | write_to = "src/alphalens/_version.py"
97 | version_scheme = 'guess-next-dev'
98 | local_scheme = 'dirty-tag'
99 |
100 |
101 | [tool.pytest.ini_options]
102 | pythonpath = ['src']
103 | minversion = "6.0"
104 | testpaths = 'tests'
105 | addopts = '-v'
106 |
107 | [tool.cibuildwheel]
108 | test-extras = "test"
109 | test-command = "pytest -n 2 {package}/tests"
110 | build-verbosity = 3
111 |
112 |
113 | [tool.cibuildwheel.macos]
114 | archs = ["x86_64", "arm64", "universal2"]
115 | test-skip = ["*universal2:arm64"]
116 |
117 |
118 | [tool.cibuildwheel.linux]
119 | archs = ["auto64"]
120 | skip = "*musllinux*"
121 |
122 |
123 | [tool.black]
124 | line-length = 88
125 | target-version = ['py39', 'py310', 'py311', 'py312']
126 | include = '\.pyi?$'
127 | extend-exclude = '''
128 | \(
129 | docs/source/conf.py
130 | \)
131 | '''
132 |
133 |
134 | [tool.tox]
135 | legacy_tox_ini = """
136 | [tox]
137 |
138 | envlist =
139 | py310-pandas{15,20,21,22}-numpy1
140 | py311-pandas{15,20,21,22}-numpy1
141 | py312-pandas{15,20,21,22}-numpy1
142 | py310-pandas222-numpy2{0,1,2}
143 | py311-pandas222-numpy2{0,1,2}
144 | py312-pandas222-numpy2{0,1,2}
145 |
146 | isolated_build = True
147 | skip_missing_interpreters = True
148 | minversion = 3.23.0
149 |
150 | [gh-actions]
151 | python =
152 | 3.10: py310
153 | 3.11: py311
154 | 3.12: py312
155 | 3.13: py313
156 |
157 | [testenv]
158 | usedevelop = True
159 | setenv =
160 | MPLBACKEND = Agg
161 |
162 | changedir = tmp
163 | extras = test
164 | deps =
165 | pandas15: pandas>=1.5.0,<1.6
166 | pandas20: pandas>=2.0,<2.1
167 | pandas21: pandas>=2.1,<2.2
168 | pandas22: pandas>=2.2,<2.3
169 | pandas222: pandas>=2.2.2,<2.3
170 | numpy1: numpy>=1.23.5,<2.0
171 | numpy20: numpy>=2.0.0,<2.1.0
172 | numpy21: numpy>=2.1.0,<2.2.0
173 | numpy22: numpy>=2.2.0,<2.3.0
174 |
175 | commands =
176 | pytest -n 2 --cov={toxinidir}/src --cov-report term --cov-report=xml --cov-report=html:htmlcov {toxinidir}/tests
177 | """
178 |
--------------------------------------------------------------------------------
/src/alphalens/__init__.py:
--------------------------------------------------------------------------------
1 | from . import performance
2 | from . import plotting
3 | from . import tears
4 | from . import utils
5 |
6 | try:
7 | from ._version import version as __version__
8 | from ._version import version_tuple
9 | except ImportError:
10 | __version__ = "unknown version"
11 | version_tuple = (0, 0, "unknown version")
12 |
13 |
14 | __all__ = ["performance", "plotting", "tears", "utils"]
15 |
--------------------------------------------------------------------------------
/src/alphalens/examples/event_study.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Event Study"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "While Alphalens is a tool designed to evaluate a cross-sectional signal which can be used to rank many securities each day, we can still make use of Alphalens returns analysis functions, a subset of Alphalens, to create a meaningful event study.\n",
15 | "\n",
16 | "An event study is a statistical method to assess the impact of a particular event on the value of a stock. In this example we will evalute what happens to stocks whose price fall below 30$"
17 | ]
18 | },
19 | {
20 | "cell_type": "markdown",
21 | "metadata": {},
22 | "source": [
23 | "# Imports & Settings"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": 1,
29 | "metadata": {
30 | "ExecuteTime": {
31 | "end_time": "2021-09-07T23:18:43.743879Z",
32 | "start_time": "2021-09-07T23:18:43.733479Z"
33 | }
34 | },
35 | "source": [
36 | "import warnings\n",
37 | "warnings.filterwarnings('ignore')"
38 | ],
39 | "outputs": []
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": 2,
44 | "metadata": {
45 | "ExecuteTime": {
46 | "end_time": "2021-09-07T23:18:44.563311Z",
47 | "start_time": "2021-09-07T23:18:43.759334Z"
48 | }
49 | },
50 | "source": [
51 | "import alphalens\n",
52 | "import pandas as pd"
53 | ],
54 | "outputs": []
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": 3,
59 | "metadata": {
60 | "ExecuteTime": {
61 | "end_time": "2021-09-07T23:18:44.568258Z",
62 | "start_time": "2021-09-07T23:18:44.564789Z"
63 | }
64 | },
65 | "source": [
66 | "%matplotlib inline"
67 | ],
68 | "outputs": []
69 | },
70 | {
71 | "cell_type": "markdown",
72 | "metadata": {},
73 | "source": [
74 | "# Load Data"
75 | ]
76 | },
77 | {
78 | "cell_type": "markdown",
79 | "metadata": {},
80 | "source": [
81 | "Below is a simple mapping of tickers to sectors for a universe of 500 large cap stocks."
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": 4,
87 | "metadata": {
88 | "ExecuteTime": {
89 | "end_time": "2021-09-07T23:18:44.583002Z",
90 | "start_time": "2021-09-07T23:18:44.569186Z"
91 | }
92 | },
93 | "source": [
94 | "tickers = ['ACN', 'ATVI', 'ADBE', 'AMD', 'AKAM', 'ADS', 'GOOGL', 'GOOG', 'APH', 'ADI', 'ANSS', 'AAPL',\n",
95 | " 'AVGO', 'CA', 'CDNS', 'CSCO', 'CTXS', 'CTSH', 'GLW', 'CSRA', 'DXC', 'EBAY', 'EA', 'FFIV', 'FB',\n",
96 | " 'FLIR', 'IT', 'GPN', 'HRS', 'HPE', 'HPQ', 'INTC', 'IBM', 'INTU', 'JNPR', 'KLAC', 'LRCX', 'MA', 'MCHP',\n",
97 | " 'MSFT', 'MSI', 'NTAP', 'NFLX', 'NVDA', 'ORCL', 'PAYX', 'PYPL', 'QRVO', 'QCOM', 'RHT', 'CRM', 'STX',\n",
98 | " 'AMG', 'AFL', 'ALL', 'AXP', 'AIG', 'AMP', 'AON', 'AJG', 'AIZ', 'BAC', 'BK', 'BBT', 'BRK.B', 'BLK', 'HRB',\n",
99 | " 'BHF', 'COF', 'CBOE', 'SCHW', 'CB', 'CINF', 'C', 'CFG', 'CME', 'CMA', 'DFS', 'ETFC', 'RE', 'FITB', 'BEN',\n",
100 | " 'GS', 'HIG', 'HBAN', 'ICE', 'IVZ', 'JPM', 'KEY', 'LUK', 'LNC', 'L', 'MTB', 'MMC', 'MET', 'MCO', 'MS',\n",
101 | " 'NDAQ', 'NAVI', 'NTRS', 'PBCT', 'PNC', 'PFG', 'PGR', 'PRU', 'RJF', 'RF', 'SPGI', 'STT', 'STI', 'SYF', 'TROW',\n",
102 | " 'ABT', 'ABBV', 'AET', 'A', 'ALXN', 'ALGN', 'AGN', 'ABC', 'AMGN', 'ANTM', 'BCR', 'BAX', 'BDX', 'BIIB', 'BSX',\n",
103 | " 'BMY', 'CAH', 'CELG', 'CNC', 'CERN', 'CI', 'COO', 'DHR', 'DVA', 'XRAY', 'EW', 'EVHC', 'ESRX', 'GILD', 'HCA',\n",
104 | " 'HSIC', 'HOLX', 'HUM', 'IDXX', 'ILMN', 'INCY', 'ISRG', 'IQV', 'JNJ', 'LH', 'LLY', 'MCK', 'MDT', 'MRK', 'MTD',\n",
105 | " 'MYL', 'PDCO', 'PKI', 'PRGO', 'PFE', 'DGX', 'REGN', 'RMD', 'SYK', 'TMO', 'UNH', 'UHS', 'VAR', 'VRTX', 'WAT',\n",
106 | " 'MMM', 'AYI', 'ALK', 'ALLE', 'AAL', 'AME', 'AOS', 'ARNC', 'BA', 'CHRW', 'CAT', 'CTAS', 'CSX', 'CMI', 'DE',\n",
107 | " 'DAL', 'DOV', 'ETN', 'EMR', 'EFX', 'EXPD', 'FAST', 'FDX', 'FLS', 'FLR', 'FTV', 'FBHS', 'GD', 'GE', 'GWW',\n",
108 | " 'HON', 'INFO', 'ITW', 'IR', 'JEC', 'JBHT', 'JCI', 'KSU', 'LLL', 'LMT', 'MAS', 'NLSN', 'NSC', 'NOC', 'PCAR',\n",
109 | " 'PH', 'PNR', 'PWR', 'RTN', 'RSG', 'RHI', 'ROK', 'COL', 'ROP', 'LUV', 'SRCL', 'TXT', 'TDG', 'UNP', 'UAL',\n",
110 | " 'AES', 'LNT', 'AEE', 'AEP', 'AWK', 'CNP', 'CMS', 'ED', 'D', 'DTE', 'DUK', 'EIX', 'ETR', 'ES', 'EXC']"
111 | ],
112 | "outputs": []
113 | },
114 | {
115 | "cell_type": "markdown",
116 | "metadata": {},
117 | "source": [
118 | "## YFinance Download"
119 | ]
120 | },
121 | {
122 | "cell_type": "code",
123 | "execution_count": 5,
124 | "metadata": {
125 | "ExecuteTime": {
126 | "end_time": "2021-09-07T23:19:15.663673Z",
127 | "start_time": "2021-09-07T23:18:44.634298Z"
128 | }
129 | },
130 | "source": [
131 | "import yfinance as yf\n",
132 | "import pandas_datareader.data as web\n",
133 | "yf.pdr_override()\n",
134 | "\n",
135 | "df = web.get_data_yahoo(tickers, start='2015-06-01', end='2017-01-01')\n",
136 | "df.index = pd.to_datetime(df.index)"
137 | ],
138 | "outputs": []
139 | },
140 | {
141 | "cell_type": "markdown",
142 | "metadata": {},
143 | "source": [
144 | "## Data Formatting"
145 | ]
146 | },
147 | {
148 | "cell_type": "code",
149 | "execution_count": 6,
150 | "metadata": {
151 | "ExecuteTime": {
152 | "end_time": "2021-09-07T23:19:18.013078Z",
153 | "start_time": "2021-09-07T23:19:17.894866Z"
154 | }
155 | },
156 | "source": [
157 | "df.info()"
158 | ],
159 | "outputs": []
160 | },
161 | {
162 | "cell_type": "code",
163 | "execution_count": 7,
164 | "metadata": {
165 | "ExecuteTime": {
166 | "end_time": "2021-09-07T23:19:21.132186Z",
167 | "start_time": "2021-09-07T23:19:21.084649Z"
168 | }
169 | },
170 | "source": [
171 | "df = df.stack()\n",
172 | "df.index.names = ['date', 'asset']\n",
173 | "df = df.tz_localize('UTC', level='date')"
174 | ],
175 | "outputs": []
176 | },
177 | {
178 | "cell_type": "code",
179 | "execution_count": 8,
180 | "metadata": {
181 | "ExecuteTime": {
182 | "end_time": "2021-09-07T23:19:21.655910Z",
183 | "start_time": "2021-09-07T23:19:21.630676Z"
184 | }
185 | },
186 | "source": [
187 | "df.info()"
188 | ],
189 | "outputs": []
190 | },
191 | {
192 | "cell_type": "markdown",
193 | "metadata": {},
194 | "source": [
195 | "# Factor Computation"
196 | ]
197 | },
198 | {
199 | "cell_type": "markdown",
200 | "metadata": {},
201 | "source": [
202 | "Now it's time to build the events DataFrame, the input we will pass to Alphalens.\n",
203 | "\n",
204 | "Alphalens calculates statistics for those dates where the input DataFrame has values (not NaN). So to compute the performace analysis on specific dates and securities (like an event study) then we have to make sure the input DataFrame contains valid values only on those date/security combinations where the event happens. All the other values in the DataFrame must be NaN or not present.\n",
205 | "\n",
206 | "Also, make sure the event values are positive (it doesn't matter the value but they must be positive) if you intend to go long on the events and use negative values if you intent to go short. This impacts the cumulative returns plots. \n",
207 | "\n",
208 | "Let's create the event DataFrame where we \"mark\" (any value) each day a security price fall below 30$. "
209 | ]
210 | },
211 | {
212 | "cell_type": "code",
213 | "execution_count": 9,
214 | "metadata": {
215 | "ExecuteTime": {
216 | "end_time": "2021-09-07T23:19:27.001181Z",
217 | "start_time": "2021-09-07T23:19:26.963815Z"
218 | }
219 | },
220 | "source": [
221 | "today_price = df.loc[:, 'Open'].unstack('asset')\n",
222 | "yesterday_price = today_price.shift(1)\n",
223 | "events = today_price[(today_price < 30.0) & (yesterday_price >= 30)]\n",
224 | "events = events.stack()\n",
225 | "events = events.astype(float)\n",
226 | "events"
227 | ],
228 | "outputs": []
229 | },
230 | {
231 | "cell_type": "markdown",
232 | "metadata": {},
233 | "source": [
234 | "The pricing data passed to alphalens should contain the entry price for the assets so it must reflect the next available price after an event was observed at a given timestamp. Those prices must not be used in the calculation of the events for that time. Always double check to ensure you are not introducing lookahead bias to your study.\n",
235 | "\n",
236 | "The pricing data must also contain the exit price for the assets, for period 1 the price at the next timestamp will be used, for period 2 the price after 2 timestats will be used and so on.\n",
237 | "\n",
238 | "While Alphalens is time frequency agnostic, in our example we build 'pricing' DataFrame so that for each event timestamp it contains the assets open price for the next day afer the event is detected, this price will be used as the assets entry price. Also, we are not adding additional prices so the assets exit price will be the following days open prices (how many days depends on 'periods' argument)."
239 | ]
240 | },
241 | {
242 | "cell_type": "code",
243 | "execution_count": 10,
244 | "metadata": {
245 | "ExecuteTime": {
246 | "end_time": "2021-09-07T23:19:27.016155Z",
247 | "start_time": "2021-09-07T23:19:27.002467Z"
248 | }
249 | },
250 | "source": [
251 | "pricing = df.loc[:, 'Open'].iloc[1:].unstack('asset')"
252 | ],
253 | "outputs": []
254 | },
255 | {
256 | "cell_type": "code",
257 | "execution_count": 11,
258 | "metadata": {
259 | "ExecuteTime": {
260 | "end_time": "2021-09-07T23:19:27.030083Z",
261 | "start_time": "2021-09-07T23:19:27.017147Z"
262 | }
263 | },
264 | "source": [
265 | "pricing.info()"
266 | ],
267 | "outputs": []
268 | },
269 | {
270 | "cell_type": "markdown",
271 | "metadata": {},
272 | "source": [
273 | "# Run Event Study"
274 | ]
275 | },
276 | {
277 | "cell_type": "markdown",
278 | "metadata": {},
279 | "source": [
280 | "## Configuration"
281 | ]
282 | },
283 | {
284 | "cell_type": "markdown",
285 | "metadata": {
286 | "collapsed": true
287 | },
288 | "source": [
289 | "Before running Alphalens beware of some important options: "
290 | ]
291 | },
292 | {
293 | "cell_type": "code",
294 | "execution_count": 12,
295 | "metadata": {
296 | "ExecuteTime": {
297 | "end_time": "2021-09-07T23:19:27.036984Z",
298 | "start_time": "2021-09-07T23:19:27.031550Z"
299 | }
300 | },
301 | "source": [
302 | "# we don't want any filtering to be done\n",
303 | "\n",
304 | "filter_zscore = None"
305 | ],
306 | "outputs": []
307 | },
308 | {
309 | "cell_type": "code",
310 | "execution_count": 13,
311 | "metadata": {
312 | "ExecuteTime": {
313 | "end_time": "2021-09-07T23:19:27.045088Z",
314 | "start_time": "2021-09-07T23:19:27.038022Z"
315 | }
316 | },
317 | "source": [
318 | "# We want to have only one bin/quantile. So we can either use quantiles=1 or bins=1\n",
319 | "\n",
320 | "quantiles = None\n",
321 | "bins = 1\n",
322 | "\n",
323 | "# Beware that in pandas versions below 0.20.0 there were few bugs in panda.qcut and pandas.cut\n",
324 | "# that resulted in ValueError exception to be thrown when identical values were present in the\n",
325 | "# dataframe and 1 quantile/bin was selected.\n",
326 | "# As a workaroung use the bins custom range option that include all your values. E.g.\n",
327 | "\n",
328 | "quantiles = None\n",
329 | "bins = [-1000000, 1000000]"
330 | ],
331 | "outputs": []
332 | },
333 | {
334 | "cell_type": "code",
335 | "execution_count": 14,
336 | "metadata": {
337 | "ExecuteTime": {
338 | "end_time": "2021-09-07T23:19:27.061765Z",
339 | "start_time": "2021-09-07T23:19:27.046246Z"
340 | }
341 | },
342 | "source": [
343 | "# You don't have to directly set 'long_short' option when running alphalens.tears.create_event_study_tear_sheet\n",
344 | "# But in case you are making use of other Alphalens functions make sure to set 'long_short=False'\n",
345 | "# if you set 'long_short=True' Alphalens will perform forward return demeaning and that makes sense only\n",
346 | "# in a dollar neutral portfolio. With an event style signal you cannot usually create a dollar neutral\n",
347 | "# long/short portfolio\n",
348 | "\n",
349 | "long_short = False"
350 | ],
351 | "outputs": []
352 | },
353 | {
354 | "cell_type": "markdown",
355 | "metadata": {},
356 | "source": [
357 | "## Get Alphalens Input"
358 | ]
359 | },
360 | {
361 | "cell_type": "code",
362 | "execution_count": 15,
363 | "metadata": {
364 | "ExecuteTime": {
365 | "end_time": "2021-09-07T23:19:27.323875Z",
366 | "start_time": "2021-09-07T23:19:27.067595Z"
367 | }
368 | },
369 | "source": [
370 | "factor_data = alphalens.utils.get_clean_factor_and_forward_returns(events,\n",
371 | " pricing,\n",
372 | " quantiles=None,\n",
373 | " bins=1,\n",
374 | " periods=(\n",
375 | " 1, 2, 3, 4, 5, 6, 10),\n",
376 | " filter_zscore=None)"
377 | ],
378 | "outputs": []
379 | },
380 | {
381 | "cell_type": "markdown",
382 | "metadata": {},
383 | "source": [
384 | "## Run Event Tearsheet"
385 | ]
386 | },
387 | {
388 | "cell_type": "code",
389 | "execution_count": 16,
390 | "metadata": {
391 | "ExecuteTime": {
392 | "end_time": "2021-09-07T23:19:29.805689Z",
393 | "start_time": "2021-09-07T23:19:27.324792Z"
394 | },
395 | "scrolled": false
396 | },
397 | "source": [
398 | "alphalens.tears.create_event_study_tear_sheet(factor_data, \n",
399 | " pricing, \n",
400 | " avgretplot=(5, 10));"
401 | ],
402 | "outputs": []
403 | },
404 | {
405 | "cell_type": "markdown",
406 | "metadata": {},
407 | "source": [
408 | "## Short Signal Analysis"
409 | ]
410 | },
411 | {
412 | "cell_type": "markdown",
413 | "metadata": {},
414 | "source": [
415 | "If we wanted to analyze the performance of short signal, we only had to switch from positive to negative event values"
416 | ]
417 | },
418 | {
419 | "cell_type": "code",
420 | "execution_count": 17,
421 | "metadata": {
422 | "ExecuteTime": {
423 | "end_time": "2021-09-07T23:19:29.809273Z",
424 | "start_time": "2021-09-07T23:19:29.806840Z"
425 | }
426 | },
427 | "source": [
428 | "events = -events"
429 | ],
430 | "outputs": []
431 | },
432 | {
433 | "cell_type": "code",
434 | "execution_count": 18,
435 | "metadata": {
436 | "ExecuteTime": {
437 | "end_time": "2021-09-07T23:19:29.985276Z",
438 | "start_time": "2021-09-07T23:19:29.811225Z"
439 | }
440 | },
441 | "source": [
442 | "factor_data = alphalens.utils.get_clean_factor_and_forward_returns(events,\n",
443 | " pricing,\n",
444 | " quantiles=None,\n",
445 | " bins=1,\n",
446 | " periods=(\n",
447 | " 1, 2, 3, 4, 5, 6, 10),\n",
448 | " filter_zscore=None)"
449 | ],
450 | "outputs": []
451 | },
452 | {
453 | "cell_type": "code",
454 | "execution_count": 19,
455 | "metadata": {
456 | "ExecuteTime": {
457 | "end_time": "2021-09-07T23:19:32.434768Z",
458 | "start_time": "2021-09-07T23:19:29.986257Z"
459 | },
460 | "scrolled": false
461 | },
462 | "source": [
463 | "alphalens.tears.create_event_study_tear_sheet(factor_data, \n",
464 | " pricing, \n",
465 | " avgretplot=(5, 10));"
466 | ],
467 | "outputs": []
468 | }
469 | ],
470 | "metadata": {
471 | "kernelspec": {
472 | "display_name": "Python 3 (ipykernel)",
473 | "language": "python",
474 | "name": "python3"
475 | },
476 | "language_info": {
477 | "codemirror_mode": {
478 | "name": "ipython",
479 | "version": 3
480 | },
481 | "file_extension": ".py",
482 | "mimetype": "text/x-python",
483 | "name": "python",
484 | "nbconvert_exporter": "python",
485 | "pygments_lexer": "ipython3",
486 | "version": "3.8.8"
487 | },
488 | "toc": {
489 | "base_numbering": 1,
490 | "nav_menu": {},
491 | "number_sections": true,
492 | "sideBar": true,
493 | "skip_h1_title": false,
494 | "title_cell": "Table of Contents",
495 | "title_sidebar": "Contents",
496 | "toc_cell": false,
497 | "toc_position": {},
498 | "toc_section_display": true,
499 | "toc_window_display": true
500 | }
501 | },
502 | "nbformat": 4,
503 | "nbformat_minor": 1
504 | }
505 |
--------------------------------------------------------------------------------
/src/alphalens/examples/intraday_factor.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Intraday Factor"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "In this notebook we use Alphalens to analyse the performance of an intraday factor, which is computed daily but the stocks are bought at marker open and sold at market close with no overnight positions."
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "# Imports & Settings"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 1,
27 | "metadata": {
28 | "ExecuteTime": {
29 | "end_time": "2021-09-07T23:14:22.562637Z",
30 | "start_time": "2021-09-07T23:14:22.560177Z"
31 | },
32 | "pycharm": {
33 | "name": "#%%\n"
34 | }
35 | },
36 | "source": [
37 | "import warnings\n",
38 | "warnings.filterwarnings('ignore')"
39 | ],
40 | "outputs": []
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": 2,
45 | "metadata": {
46 | "ExecuteTime": {
47 | "end_time": "2021-09-07T23:14:23.425407Z",
48 | "start_time": "2021-09-07T23:14:22.565593Z"
49 | }
50 | },
51 | "source": [
52 | "import alphalens\n",
53 | "import pandas as pd"
54 | ],
55 | "outputs": []
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": 3,
60 | "metadata": {
61 | "ExecuteTime": {
62 | "end_time": "2021-09-07T23:14:23.430548Z",
63 | "start_time": "2021-09-07T23:14:23.426792Z"
64 | }
65 | },
66 | "source": [
67 | "%matplotlib inline"
68 | ],
69 | "outputs": []
70 | },
71 | {
72 | "cell_type": "markdown",
73 | "metadata": {},
74 | "source": [
75 | "# Loading Data"
76 | ]
77 | },
78 | {
79 | "cell_type": "markdown",
80 | "metadata": {},
81 | "source": [
82 | "Below is a simple mapping of tickers to sectors for a small universe of large cap stocks."
83 | ]
84 | },
85 | {
86 | "cell_type": "code",
87 | "execution_count": 4,
88 | "metadata": {
89 | "ExecuteTime": {
90 | "end_time": "2021-09-07T23:14:23.445030Z",
91 | "start_time": "2021-09-07T23:14:23.431980Z"
92 | }
93 | },
94 | "source": [
95 | "sector_names = {\n",
96 | " 0 : \"information_technology\",\n",
97 | " 1 : \"financials\",\n",
98 | " 2 : \"health_care\",\n",
99 | " 3 : \"industrials\",\n",
100 | " 4 : \"utilities\", \n",
101 | " 5 : \"real_estate\", \n",
102 | " 6 : \"materials\", \n",
103 | " 7 : \"telecommunication_services\", \n",
104 | " 8 : \"consumer_staples\", \n",
105 | " 9 : \"consumer_discretionary\", \n",
106 | " 10 : \"energy\" \n",
107 | "}\n",
108 | "\n",
109 | "ticker_sector = {\n",
110 | " \"ACN\" : 0, \"ATVI\" : 0, \"ADBE\" : 0, \"AMD\" : 0, \"AKAM\" : 0, \"ADS\" : 0, \"GOOGL\" : 0, \"GOOG\" : 0, \n",
111 | " \"APH\" : 0, \"ADI\" : 0, \"ANSS\" : 0, \"AAPL\" : 0, \"AMAT\" : 0, \"ADSK\" : 0, \"ADP\" : 0, \"AVGO\" : 0,\n",
112 | " \"AMG\" : 1, \"AFL\" : 1, \"ALL\" : 1, \"AXP\" : 1, \"AIG\" : 1, \"AMP\" : 1, \"AON\" : 1, \"AJG\" : 1, \"AIZ\" : 1, \"BAC\" : 1,\n",
113 | " \"BK\" : 1, \"BBT\" : 1, \"BRK.B\" : 1, \"BLK\" : 1, \"HRB\" : 1, \"BHF\" : 1, \"COF\" : 1, \"CBOE\" : 1, \"SCHW\" : 1, \"CB\" : 1,\n",
114 | " \"ABT\" : 2, \"ABBV\" : 2, \"AET\" : 2, \"A\" : 2, \"ALXN\" : 2, \"ALGN\" : 2, \"AGN\" : 2, \"ABC\" : 2, \"AMGN\" : 2, \"ANTM\" : 2,\n",
115 | " \"BCR\" : 2, \"BAX\" : 2, \"BDX\" : 2, \"BIIB\" : 2, \"BSX\" : 2, \"BMY\" : 2, \"CAH\" : 2, \"CELG\" : 2, \"CNC\" : 2, \"CERN\" : 2,\n",
116 | " \"MMM\" : 3, \"AYI\" : 3, \"ALK\" : 3, \"ALLE\" : 3, \"AAL\" : 3, \"AME\" : 3, \"AOS\" : 3, \"ARNC\" : 3, \"BA\" : 3, \"CHRW\" : 3,\n",
117 | " \"CAT\" : 3, \"CTAS\" : 3, \"CSX\" : 3, \"CMI\" : 3, \"DE\" : 3, \"DAL\" : 3, \"DOV\" : 3, \"ETN\" : 3, \"EMR\" : 3, \"EFX\" : 3,\n",
118 | " \"AES\" : 4, \"LNT\" : 4, \"AEE\" : 4, \"AEP\" : 4, \"AWK\" : 4, \"CNP\" : 4, \"CMS\" : 4, \"ED\" : 4, \"D\" : 4, \"DTE\" : 4,\n",
119 | " \"DUK\" : 4, \"EIX\" : 4, \"ETR\" : 4, \"ES\" : 4, \"EXC\" : 4, \"FE\" : 4, \"NEE\" : 4, \"NI\" : 4, \"NRG\" : 4, \"PCG\" : 4,\n",
120 | " \"ARE\" : 5, \"AMT\" : 5, \"AIV\" : 5, \"AVB\" : 5, \"BXP\" : 5, \"CBG\" : 5, \"CCI\" : 5, \"DLR\" : 5, \"DRE\" : 5,\n",
121 | " \"EQIX\" : 5, \"EQR\" : 5, \"ESS\" : 5, \"EXR\" : 5, \"FRT\" : 5, \"GGP\" : 5, \"HCP\" : 5, \"HST\" : 5, \"IRM\" : 5, \"KIM\" : 5,\n",
122 | " \"APD\" : 6, \"ALB\" : 6, \"AVY\" : 6, \"BLL\" : 6, \"CF\" : 6, \"DWDP\" : 6, \"EMN\" : 6, \"ECL\" : 6, \"FMC\" : 6, \"FCX\" : 6,\n",
123 | " \"IP\" : 6, \"IFF\" : 6, \"LYB\" : 6, \"MLM\" : 6, \"MON\" : 6, \"MOS\" : 6, \"NEM\" : 6, \"NUE\" : 6, \"PKG\" : 6, \"PPG\" : 6,\n",
124 | " \"T\" : 7, \"CTL\" : 7, \"VZ\" : 7, \n",
125 | " \"MO\" : 8, \"ADM\" : 8, \"BF.B\" : 8, \"CPB\" : 8, \"CHD\" : 8, \"CLX\" : 8, \"KO\" : 8, \"CL\" : 8, \"CAG\" : 8,\n",
126 | " \"STZ\" : 8, \"COST\" : 8, \"COTY\" : 8, \"CVS\" : 8, \"DPS\" : 8, \"EL\" : 8, \"GIS\" : 8, \"HSY\" : 8, \"HRL\" : 8,\n",
127 | " \"AAP\" : 9, \"AMZN\" : 9, \"APTV\" : 9, \"AZO\" : 9, \"BBY\" : 9, \"BWA\" : 9, \"KMX\" : 9, \"CCL\" : 9, \n",
128 | " \"APC\" : 10, \"ANDV\" : 10, \"APA\" : 10, \"BHGE\" : 10, \"COG\" : 10, \"CHK\" : 10, \"CVX\" : 10, \"XEC\" : 10, \"CXO\" : 10,\n",
129 | " \"COP\" : 10, \"DVN\" : 10, \"EOG\" : 10, \"EQT\" : 10, \"XOM\" : 10, \"HAL\" : 10, \"HP\" : 10, \"HES\" : 10, \"KMI\" : 10\n",
130 | "}"
131 | ],
132 | "outputs": []
133 | },
134 | {
135 | "cell_type": "markdown",
136 | "metadata": {},
137 | "source": [
138 | "## YFinance Download"
139 | ]
140 | },
141 | {
142 | "cell_type": "code",
143 | "execution_count": 5,
144 | "metadata": {
145 | "ExecuteTime": {
146 | "end_time": "2021-09-07T23:14:34.135698Z",
147 | "start_time": "2021-09-07T23:14:23.446445Z"
148 | }
149 | },
150 | "source": [
151 | "import yfinance as yf\n",
152 | "import pandas_datareader.data as web\n",
153 | "yf.pdr_override()\n",
154 | "\n",
155 | "tickers = list(ticker_sector.keys())\n",
156 | "df = web.get_data_yahoo(tickers, start='2017-01-01', end='2017-06-01')\n",
157 | "df.index = pd.to_datetime(df.index, utc=True)"
158 | ],
159 | "outputs": []
160 | },
161 | {
162 | "cell_type": "code",
163 | "execution_count": 6,
164 | "metadata": {
165 | "ExecuteTime": {
166 | "end_time": "2021-09-07T23:14:34.148420Z",
167 | "start_time": "2021-09-07T23:14:34.136653Z"
168 | }
169 | },
170 | "source": [
171 | "df = df.stack()\n",
172 | "df.index.names = ['date', 'asset']\n",
173 | "df.info()"
174 | ],
175 | "outputs": []
176 | },
177 | {
178 | "cell_type": "markdown",
179 | "metadata": {},
180 | "source": [
181 | "# Factor Computation"
182 | ]
183 | },
184 | {
185 | "cell_type": "markdown",
186 | "metadata": {},
187 | "source": [
188 | "Our example factor ranks the stocks based on their overnight price gap (yesterday close to today open price). We'll see if the factor has some alpha or if it is pure noise."
189 | ]
190 | },
191 | {
192 | "cell_type": "code",
193 | "execution_count": 7,
194 | "metadata": {
195 | "ExecuteTime": {
196 | "end_time": "2021-09-07T23:14:34.153844Z",
197 | "start_time": "2021-09-07T23:14:34.149541Z"
198 | }
199 | },
200 | "source": [
201 | "available_tickers = df.index.unique('asset')\n",
202 | "ticker_sector = {k: v for k, v in ticker_sector.items() if k in available_tickers}"
203 | ],
204 | "outputs": []
205 | },
206 | {
207 | "cell_type": "code",
208 | "execution_count": 8,
209 | "metadata": {
210 | "ExecuteTime": {
211 | "end_time": "2021-09-07T23:14:34.170979Z",
212 | "start_time": "2021-09-07T23:14:34.154778Z"
213 | }
214 | },
215 | "source": [
216 | "today_open = df.loc[:, 'Open'].unstack('asset')\n",
217 | "today_close = df.loc[:, 'Close'].unstack('asset')\n",
218 | "yesterday_close = today_close.shift(1)"
219 | ],
220 | "outputs": []
221 | },
222 | {
223 | "cell_type": "code",
224 | "execution_count": 9,
225 | "metadata": {
226 | "ExecuteTime": {
227 | "end_time": "2021-09-07T23:14:34.187472Z",
228 | "start_time": "2021-09-07T23:14:34.171837Z"
229 | }
230 | },
231 | "source": [
232 | "factor = (today_open - yesterday_close) / yesterday_close"
233 | ],
234 | "outputs": []
235 | },
236 | {
237 | "cell_type": "markdown",
238 | "metadata": {},
239 | "source": [
240 | "The pricing data passed to alphalens should contain the entry price for the assets so it must reflect the next available price after a factor value was observed at a given timestamp. Those prices must not be used in the calculation of the factor values for that time. Always double check to ensure you are not introducing lookahead bias to your study.\n",
241 | "\n",
242 | "The pricing data must also contain the exit price for the assets, for period 1 the price at the next timestamp will be used, for period 2 the price after 2 timestamps will be used and so on.\n",
243 | "\n",
244 | "There are no restrinctions/assumptions on the time frequencies a factor should be computed at and neither on the specific time a factor should be traded (trading at the open vs trading at the close vs intraday trading), it is only required that factor and price DataFrames are properly aligned given the rules above.\n",
245 | "\n",
246 | "In our example, we want to buy the stocks at marker open, so the need the open price at the exact timestamps as the factor valules, and we want to sell the stocks at market close so we will add the close prices too, which will be used to compute period 1 forward returns as they appear just after the factor values timestamps. The returns computed by Alphalens will therefore be based on the difference between open to close assets prices.\n",
247 | "\n",
248 | "If we had other prices we could compute other period returns, for example one hour after market open and 2 hours and so on. We could have added those prices right after the open prices and instruct Alphalens to compute 1, 2, 3... periods too and not only period 1 like in this example."
249 | ]
250 | },
251 | {
252 | "cell_type": "markdown",
253 | "metadata": {},
254 | "source": [
255 | "## Data Formatting"
256 | ]
257 | },
258 | {
259 | "cell_type": "markdown",
260 | "metadata": {},
261 | "source": [
262 | "### Time Adjustments"
263 | ]
264 | },
265 | {
266 | "cell_type": "code",
267 | "execution_count": 10,
268 | "metadata": {
269 | "ExecuteTime": {
270 | "end_time": "2021-09-07T23:14:34.201010Z",
271 | "start_time": "2021-09-07T23:14:34.188492Z"
272 | }
273 | },
274 | "source": [
275 | "# Fix time as Yahoo doesn't set it\n",
276 | "today_open.index += pd.Timedelta('9h30m')\n",
277 | "today_close.index += pd.Timedelta('16h')\n",
278 | "# pricing will contain both open and close\n",
279 | "pricing = pd.concat([today_open, today_close]).sort_index()"
280 | ],
281 | "outputs": []
282 | },
283 | {
284 | "cell_type": "code",
285 | "execution_count": 11,
286 | "metadata": {
287 | "ExecuteTime": {
288 | "end_time": "2021-09-07T23:14:34.226872Z",
289 | "start_time": "2021-09-07T23:14:34.202621Z"
290 | }
291 | },
292 | "source": [
293 | "pricing.head()"
294 | ],
295 | "outputs": []
296 | },
297 | {
298 | "cell_type": "markdown",
299 | "metadata": {},
300 | "source": [
301 | "### Align Factor & Price"
302 | ]
303 | },
304 | {
305 | "cell_type": "code",
306 | "execution_count": 12,
307 | "metadata": {
308 | "ExecuteTime": {
309 | "end_time": "2021-09-07T23:14:34.233924Z",
310 | "start_time": "2021-09-07T23:14:34.227872Z"
311 | }
312 | },
313 | "source": [
314 | "# Align factor to open price\n",
315 | "factor.index += pd.Timedelta('9h30m')\n",
316 | "factor = factor.stack()\n",
317 | "factor.index = factor.index.set_names(['date', 'asset'])"
318 | ],
319 | "outputs": []
320 | },
321 | {
322 | "cell_type": "code",
323 | "execution_count": 13,
324 | "metadata": {
325 | "ExecuteTime": {
326 | "end_time": "2021-09-07T23:14:34.276802Z",
327 | "start_time": "2021-09-07T23:14:34.235395Z"
328 | }
329 | },
330 | "source": [
331 | "factor.unstack().head()"
332 | ],
333 | "outputs": []
334 | },
335 | {
336 | "cell_type": "markdown",
337 | "metadata": {},
338 | "source": [
339 | "# Run Alphalens"
340 | ]
341 | },
342 | {
343 | "cell_type": "markdown",
344 | "metadata": {},
345 | "source": [
346 | "Period 1 will show returns from market open to market close while period 2 will show returns from today open to tomorrow open"
347 | ]
348 | },
349 | {
350 | "cell_type": "markdown",
351 | "metadata": {},
352 | "source": [
353 | "## Get Alphalens Input"
354 | ]
355 | },
356 | {
357 | "cell_type": "code",
358 | "execution_count": 14,
359 | "metadata": {
360 | "ExecuteTime": {
361 | "end_time": "2021-09-07T23:14:34.593247Z",
362 | "start_time": "2021-09-07T23:14:34.277877Z"
363 | }
364 | },
365 | "source": [
366 | "non_predictive_factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor, \n",
367 | " pricing, \n",
368 | " periods=(1,2),\n",
369 | " groupby=ticker_sector,\n",
370 | " groupby_labels=sector_names)"
371 | ],
372 | "outputs": []
373 | },
374 | {
375 | "cell_type": "markdown",
376 | "metadata": {},
377 | "source": [
378 | "## Returns Tear Sheet"
379 | ]
380 | },
381 | {
382 | "cell_type": "code",
383 | "execution_count": 15,
384 | "metadata": {
385 | "ExecuteTime": {
386 | "end_time": "2021-09-07T23:14:36.498458Z",
387 | "start_time": "2021-09-07T23:14:34.594389Z"
388 | },
389 | "scrolled": false
390 | },
391 | "source": [
392 | "alphalens.tears.create_returns_tear_sheet(non_predictive_factor_data)"
393 | ],
394 | "outputs": []
395 | },
396 | {
397 | "cell_type": "code",
398 | "execution_count": 16,
399 | "metadata": {
400 | "ExecuteTime": {
401 | "end_time": "2021-09-07T23:14:38.294837Z",
402 | "start_time": "2021-09-07T23:14:36.499461Z"
403 | },
404 | "scrolled": false
405 | },
406 | "source": [
407 | "alphalens.tears.create_event_returns_tear_sheet(non_predictive_factor_data, pricing);"
408 | ],
409 | "outputs": []
410 | }
411 | ],
412 | "metadata": {
413 | "kernelspec": {
414 | "display_name": "Python 3 (ipykernel)",
415 | "language": "python",
416 | "name": "python3"
417 | },
418 | "language_info": {
419 | "codemirror_mode": {
420 | "name": "ipython",
421 | "version": 3
422 | },
423 | "file_extension": ".py",
424 | "mimetype": "text/x-python",
425 | "name": "python",
426 | "nbconvert_exporter": "python",
427 | "pygments_lexer": "ipython3",
428 | "version": "3.8.8"
429 | },
430 | "toc": {
431 | "base_numbering": 1,
432 | "nav_menu": {},
433 | "number_sections": true,
434 | "sideBar": true,
435 | "skip_h1_title": false,
436 | "title_cell": "Table of Contents",
437 | "title_sidebar": "Contents",
438 | "toc_cell": false,
439 | "toc_position": {},
440 | "toc_section_display": true,
441 | "toc_window_display": false
442 | }
443 | },
444 | "nbformat": 4,
445 | "nbformat_minor": 1
446 | }
447 |
--------------------------------------------------------------------------------
/src/alphalens/examples/pyfolio_integration.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Pyfolio Integration"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "Alphalens can simulate the performance of a portfolio where the factor values are use to weight stocks. Once the portfolio is built, it can be analyzed by Pyfolio. For details on how this portfolio is built see:\n",
15 | "- alphalens.performance.factor_returns\n",
16 | "- alphalens.performance.cumulative_returns \n",
17 | "- alphalens.performance.create_pyfolio_input"
18 | ]
19 | },
20 | {
21 | "cell_type": "markdown",
22 | "metadata": {},
23 | "source": [
24 | "## Imports & Settings"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": 1,
30 | "metadata": {
31 | "ExecuteTime": {
32 | "end_time": "2021-09-07T23:12:08.038529Z",
33 | "start_time": "2021-09-07T23:12:08.030383Z"
34 | }
35 | },
36 | "source": [
37 | "import warnings\n",
38 | "warnings.filterwarnings('ignore')"
39 | ],
40 | "outputs": []
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": 2,
45 | "metadata": {
46 | "ExecuteTime": {
47 | "end_time": "2021-09-07T23:12:10.475056Z",
48 | "start_time": "2021-09-07T23:12:09.637898Z"
49 | },
50 | "scrolled": true
51 | },
52 | "source": [
53 | "import alphalens\n",
54 | "import pyfolio\n",
55 | "import pandas as pd"
56 | ],
57 | "outputs": []
58 | },
59 | {
60 | "cell_type": "code",
61 | "execution_count": 3,
62 | "metadata": {
63 | "ExecuteTime": {
64 | "end_time": "2021-09-07T23:12:11.858461Z",
65 | "start_time": "2021-09-07T23:12:11.840129Z"
66 | }
67 | },
68 | "source": [
69 | "%matplotlib inline"
70 | ],
71 | "outputs": []
72 | },
73 | {
74 | "cell_type": "markdown",
75 | "metadata": {},
76 | "source": [
77 | "## Load Data"
78 | ]
79 | },
80 | {
81 | "cell_type": "markdown",
82 | "metadata": {},
83 | "source": [
84 | "First load some stocks data"
85 | ]
86 | },
87 | {
88 | "cell_type": "code",
89 | "execution_count": 5,
90 | "metadata": {
91 | "ExecuteTime": {
92 | "end_time": "2021-09-07T23:12:18.651289Z",
93 | "start_time": "2021-09-07T23:12:18.626646Z"
94 | }
95 | },
96 | "source": [
97 | "tickers = [ 'ACN', 'ATVI', 'ADBE', 'AMD', 'AKAM', 'ADS', 'GOOGL', 'GOOG', 'APH', 'ADI', 'ANSS', 'AAPL',\n",
98 | "'AVGO', 'CA', 'CDNS', 'CSCO', 'CTXS', 'CTSH', 'GLW', 'CSRA', 'DXC', 'EBAY', 'EA', 'FFIV', 'FB',\n",
99 | "'FLIR', 'IT', 'GPN', 'HRS', 'HPE', 'HPQ', 'INTC', 'IBM', 'INTU', 'JNPR', 'KLAC', 'LRCX', 'MA', 'MCHP',\n",
100 | "'MSFT', 'MSI', 'NTAP', 'NFLX', 'NVDA', 'ORCL', 'PAYX', 'PYPL', 'QRVO', 'QCOM', 'RHT', 'CRM', 'STX',\n",
101 | "'AMG', 'AFL', 'ALL', 'AXP', 'AIG', 'AMP', 'AON', 'AJG', 'AIZ', 'BAC', 'BK', 'BBT', 'BRK.B', 'BLK', 'HRB',\n",
102 | "'BHF', 'COF', 'CBOE', 'SCHW', 'CB', 'CINF', 'C', 'CFG', 'CME', 'CMA', 'DFS', 'ETFC', 'RE', 'FITB', 'BEN',\n",
103 | "'GS', 'HIG', 'HBAN', 'ICE', 'IVZ', 'JPM', 'KEY', 'LUK', 'LNC', 'L', 'MTB', 'MMC', 'MET', 'MCO', 'MS',\n",
104 | "'NDAQ', 'NAVI', 'NTRS', 'PBCT', 'PNC', 'PFG', 'PGR', 'PRU', 'RJF', 'RF', 'SPGI', 'STT', 'STI', 'SYF', 'TROW',\n",
105 | "'ABT', 'ABBV', 'AET', 'A', 'ALXN', 'ALGN', 'AGN', 'ABC', 'AMGN', 'ANTM', 'BCR', 'BAX', 'BDX', 'BIIB', 'BSX',\n",
106 | "'BMY', 'CAH', 'CELG', 'CNC', 'CERN', 'CI', 'COO', 'DHR', 'DVA', 'XRAY', 'EW', 'EVHC', 'ESRX', 'GILD', 'HCA',\n",
107 | "'HSIC', 'HOLX', 'HUM', 'IDXX', 'ILMN', 'INCY', 'ISRG', 'IQV', 'JNJ', 'LH', 'LLY', 'MCK', 'MDT', 'MRK', 'MTD',\n",
108 | "'MYL', 'PDCO', 'PKI', 'PRGO', 'PFE', 'DGX', 'REGN', 'RMD', 'SYK', 'TMO', 'UNH', 'UHS', 'VAR', 'VRTX', 'WAT',\n",
109 | "'MMM', 'AYI', 'ALK', 'ALLE', 'AAL', 'AME', 'AOS', 'ARNC', 'BA', 'CHRW', 'CAT', 'CTAS', 'CSX', 'CMI', 'DE',\n",
110 | "'DAL', 'DOV', 'ETN', 'EMR', 'EFX', 'EXPD', 'FAST', 'FDX', 'FLS', 'FLR', 'FTV', 'FBHS', 'GD', 'GE', 'GWW',\n",
111 | "'HON', 'INFO', 'ITW', 'IR', 'JEC', 'JBHT', 'JCI', 'KSU', 'LLL', 'LMT', 'MAS', 'NLSN', 'NSC', 'NOC', 'PCAR',\n",
112 | "'PH', 'PNR', 'PWR', 'RTN', 'RSG', 'RHI', 'ROK', 'COL', 'ROP', 'LUV', 'SRCL', 'TXT', 'TDG', 'UNP', 'UAL',\n",
113 | "'AES', 'LNT', 'AEE', 'AEP', 'AWK', 'CNP', 'CMS', 'ED', 'D', 'DTE', 'DUK', 'EIX', 'ETR', 'ES', 'EXC']"
114 | ],
115 | "outputs": []
116 | },
117 | {
118 | "cell_type": "markdown",
119 | "metadata": {},
120 | "source": [
121 | "### YFinance Download"
122 | ]
123 | },
124 | {
125 | "cell_type": "code",
126 | "execution_count": 6,
127 | "metadata": {
128 | "ExecuteTime": {
129 | "end_time": "2021-09-07T23:12:31.521200Z",
130 | "start_time": "2021-09-07T23:12:20.933573Z"
131 | }
132 | },
133 | "source": [
134 | "import yfinance as yf\n",
135 | "import pandas_datareader.data as web\n",
136 | "yf.pdr_override()\n",
137 | "\n",
138 | "df = web.get_data_yahoo(tickers, start='2015-01-01', end='2017-01-01')\n",
139 | "df.index = pd.to_datetime(df.index, utc=True)"
140 | ],
141 | "outputs": []
142 | },
143 | {
144 | "cell_type": "markdown",
145 | "metadata": {},
146 | "source": [
147 | "### Data Formatting"
148 | ]
149 | },
150 | {
151 | "cell_type": "code",
152 | "execution_count": 7,
153 | "metadata": {
154 | "ExecuteTime": {
155 | "end_time": "2021-09-07T23:12:43.442139Z",
156 | "start_time": "2021-09-07T23:12:43.379118Z"
157 | }
158 | },
159 | "source": [
160 | "df = df.stack()\n",
161 | "df.index.names = ['date', 'asset']\n",
162 | "df.info()"
163 | ],
164 | "outputs": []
165 | },
166 | {
167 | "cell_type": "markdown",
168 | "metadata": {},
169 | "source": [
170 | "## Compute Factor"
171 | ]
172 | },
173 | {
174 | "cell_type": "markdown",
175 | "metadata": {},
176 | "source": [
177 | "We'll compute a simple mean reversion factor looking at recent stocks performance: stocks that performed well in the last 5 days will have high rank and vice versa."
178 | ]
179 | },
180 | {
181 | "cell_type": "code",
182 | "execution_count": 8,
183 | "metadata": {
184 | "ExecuteTime": {
185 | "end_time": "2021-09-07T23:12:45.413650Z",
186 | "start_time": "2021-09-07T23:12:45.370509Z"
187 | }
188 | },
189 | "source": [
190 | "factor = df.loc[:,'Open'].unstack('asset')\n",
191 | "factor = -factor.pct_change(5)\n",
192 | "factor = factor.stack()"
193 | ],
194 | "outputs": []
195 | },
196 | {
197 | "cell_type": "markdown",
198 | "metadata": {},
199 | "source": [
200 | "The pricing data passed to alphalens should contain the entry price for the assets so it must reflect the next available price after a factor value was observed at a given timestamp. Those prices must not be used in the calculation of the factor values for that time. Always double check to ensure you are not introducing lookahead bias to your study.\n",
201 | "\n",
202 | "The pricing data must also contain the exit price for the assets, for period 1 the price at the next timestamp will be used, for period 2 the price after 2 timestats will be used and so on.\n",
203 | "\n",
204 | "There are no restrinctions/assumptions on the time frequencies a factor should be computed at and neither on the specific time a factor should be traded (trading at the open vs trading at the close vs intraday trading), it is only required that factor and price DataFrames are properly aligned given the rules above.\n",
205 | "\n",
206 | "In our example, before the trading starts every day, we observe yesterday factor values. The price we pass to alphalens is the next available price after that factor observation: the daily open price that will be used as assets entry price. Also, we are not adding additional prices so the assets exit price will be the following days open prices (how many days depends on 'periods' argument). The retuns computed by Alphalens will therefore based on assets open prices."
207 | ]
208 | },
209 | {
210 | "cell_type": "code",
211 | "execution_count": 9,
212 | "metadata": {
213 | "ExecuteTime": {
214 | "end_time": "2021-09-07T23:12:45.706136Z",
215 | "start_time": "2021-09-07T23:12:45.672149Z"
216 | }
217 | },
218 | "source": [
219 | "pricing = df.loc[:,'Open'].unstack('asset').iloc[1:]"
220 | ],
221 | "outputs": []
222 | },
223 | {
224 | "cell_type": "markdown",
225 | "metadata": {},
226 | "source": [
227 | "## Run Alphalens Analysis"
228 | ]
229 | },
230 | {
231 | "cell_type": "markdown",
232 | "metadata": {},
233 | "source": [
234 | "### Get Input Data"
235 | ]
236 | },
237 | {
238 | "cell_type": "code",
239 | "execution_count": 10,
240 | "metadata": {
241 | "ExecuteTime": {
242 | "end_time": "2021-09-07T23:12:48.863288Z",
243 | "start_time": "2021-09-07T23:12:47.653078Z"
244 | }
245 | },
246 | "source": [
247 | "factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor,\n",
248 | " pricing,\n",
249 | " periods=(1, 3, 5),\n",
250 | " quantiles=5,\n",
251 | " bins=None)"
252 | ],
253 | "outputs": []
254 | },
255 | {
256 | "cell_type": "markdown",
257 | "metadata": {},
258 | "source": [
259 | "### Summary Tear Sheet"
260 | ]
261 | },
262 | {
263 | "cell_type": "code",
264 | "execution_count": 11,
265 | "metadata": {
266 | "ExecuteTime": {
267 | "end_time": "2021-09-07T23:13:02.045516Z",
268 | "start_time": "2021-09-07T23:12:48.864382Z"
269 | },
270 | "scrolled": true
271 | },
272 | "source": [
273 | "alphalens.tears.create_summary_tear_sheet(factor_data);"
274 | ],
275 | "outputs": []
276 | },
277 | {
278 | "cell_type": "markdown",
279 | "metadata": {},
280 | "source": [
281 | "## Run Pyfolio Analysis"
282 | ]
283 | },
284 | {
285 | "cell_type": "markdown",
286 | "metadata": {},
287 | "source": [
288 | "### Get Input Data"
289 | ]
290 | },
291 | {
292 | "cell_type": "markdown",
293 | "metadata": {},
294 | "source": [
295 | "We can see in Alphalens analysis that quantiles 1 and 5 are the most predictive so we'll build a portfolio data using only those quantiles."
296 | ]
297 | },
298 | {
299 | "cell_type": "code",
300 | "execution_count": 12,
301 | "metadata": {
302 | "ExecuteTime": {
303 | "end_time": "2021-09-07T23:13:16.537671Z",
304 | "start_time": "2021-09-07T23:13:02.046606Z"
305 | }
306 | },
307 | "source": [
308 | "pf_returns, pf_positions, pf_benchmark = \\\n",
309 | " alphalens.performance.create_pyfolio_input(factor_data,\n",
310 | " period='1D',\n",
311 | " capital=100000,\n",
312 | " long_short=True,\n",
313 | " group_neutral=False,\n",
314 | " equal_weight=True,\n",
315 | " quantiles=[1,5],\n",
316 | " groups=None,\n",
317 | " benchmark_period='1D')"
318 | ],
319 | "outputs": []
320 | },
321 | {
322 | "cell_type": "markdown",
323 | "metadata": {},
324 | "source": [
325 | "### Pyfolio Tearsheet"
326 | ]
327 | },
328 | {
329 | "cell_type": "markdown",
330 | "metadata": {},
331 | "source": [
332 | "Now that we have prepared the data we can run Pyfolio functions"
333 | ]
334 | },
335 | {
336 | "cell_type": "code",
337 | "execution_count": 13,
338 | "metadata": {
339 | "ExecuteTime": {
340 | "end_time": "2021-09-07T23:13:22.863699Z",
341 | "start_time": "2021-09-07T23:13:16.538539Z"
342 | },
343 | "scrolled": false
344 | },
345 | "source": [
346 | "pyfolio.tears.create_full_tear_sheet(pf_returns,\n",
347 | " positions=pf_positions,\n",
348 | " benchmark_rets=pf_benchmark)"
349 | ],
350 | "outputs": []
351 | },
352 | {
353 | "cell_type": "markdown",
354 | "metadata": {},
355 | "source": [
356 | "## Subset Performance"
357 | ]
358 | },
359 | {
360 | "cell_type": "markdown",
361 | "metadata": {},
362 | "source": [
363 | "### Weekday Analysis"
364 | ]
365 | },
366 | {
367 | "cell_type": "markdown",
368 | "metadata": {},
369 | "source": [
370 | "Sometimes it might be useful to analyze subets of your factor data, for example it could be interesting to see the comparison of your factor in different days of the week. Below we'll see how to select and analyze factor data corresponding to Mondays, the positions will be held the for a period of 5 days"
371 | ]
372 | },
373 | {
374 | "cell_type": "code",
375 | "execution_count": 14,
376 | "metadata": {
377 | "ExecuteTime": {
378 | "end_time": "2021-09-07T23:13:22.875368Z",
379 | "start_time": "2021-09-07T23:13:22.864898Z"
380 | }
381 | },
382 | "source": [
383 | "monday_factor_data = factor_data[ factor_data.index.get_level_values('date').weekday == 0 ]"
384 | ],
385 | "outputs": []
386 | },
387 | {
388 | "cell_type": "code",
389 | "execution_count": 15,
390 | "metadata": {
391 | "ExecuteTime": {
392 | "end_time": "2021-09-07T23:13:25.711516Z",
393 | "start_time": "2021-09-07T23:13:22.876258Z"
394 | }
395 | },
396 | "source": [
397 | "pf_returns, pf_positions, pf_benchmark = \\\n",
398 | " alphalens.performance.create_pyfolio_input(monday_factor_data,\n",
399 | " period='5D',\n",
400 | " capital=100000,\n",
401 | " long_short=True,\n",
402 | " group_neutral=False,\n",
403 | " equal_weight=True,\n",
404 | " quantiles=[1,5],\n",
405 | " groups=None,\n",
406 | " benchmark_period='1D')"
407 | ],
408 | "outputs": []
409 | },
410 | {
411 | "cell_type": "markdown",
412 | "metadata": {},
413 | "source": [
414 | "### Pyfolio Tearsheet"
415 | ]
416 | },
417 | {
418 | "cell_type": "code",
419 | "execution_count": 16,
420 | "metadata": {
421 | "ExecuteTime": {
422 | "end_time": "2021-09-07T23:13:31.792091Z",
423 | "start_time": "2021-09-07T23:13:25.712423Z"
424 | },
425 | "scrolled": false
426 | },
427 | "source": [
428 | "pyfolio.tears.create_full_tear_sheet(pf_returns,\n",
429 | " positions=pf_positions,\n",
430 | " benchmark_rets=pf_benchmark)"
431 | ],
432 | "outputs": []
433 | }
434 | ],
435 | "metadata": {
436 | "kernelspec": {
437 | "display_name": "Python 3 (ipykernel)",
438 | "language": "python",
439 | "name": "python3"
440 | },
441 | "language_info": {
442 | "codemirror_mode": {
443 | "name": "ipython",
444 | "version": 3
445 | },
446 | "file_extension": ".py",
447 | "mimetype": "text/x-python",
448 | "name": "python",
449 | "nbconvert_exporter": "python",
450 | "pygments_lexer": "ipython3",
451 | "version": "3.8.8"
452 | },
453 | "toc": {
454 | "base_numbering": 1,
455 | "nav_menu": {},
456 | "number_sections": true,
457 | "sideBar": true,
458 | "skip_h1_title": false,
459 | "title_cell": "Table of Contents",
460 | "title_sidebar": "Contents",
461 | "toc_cell": false,
462 | "toc_position": {},
463 | "toc_section_display": true,
464 | "toc_window_display": true
465 | }
466 | },
467 | "nbformat": 4,
468 | "nbformat_minor": 1
469 | }
470 |
--------------------------------------------------------------------------------
/src/alphalens/tears.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright 2017 Quantopian, Inc.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | import warnings
17 | import matplotlib.gridspec as gridspec
18 | import matplotlib.pyplot as plt
19 | import pandas as pd
20 |
21 | from . import plotting
22 | from . import performance as perf
23 | from . import utils
24 |
25 |
26 | class GridFigure(object):
27 | """
28 | It makes life easier with grid plots
29 | """
30 |
31 | def __init__(self, rows, cols):
32 | self.rows = rows
33 | self.cols = cols
34 | self.fig = plt.figure(figsize=(14, rows * 7))
35 | self.gs = gridspec.GridSpec(rows, cols, wspace=0.4, hspace=0.3)
36 | self.curr_row = 0
37 | self.curr_col = 0
38 |
39 | def next_row(self):
40 | if self.curr_col != 0:
41 | self.curr_row += 1
42 | self.curr_col = 0
43 | subplt = plt.subplot(self.gs[self.curr_row, :])
44 | self.curr_row += 1
45 | return subplt
46 |
47 | def next_cell(self):
48 | if self.curr_col >= self.cols:
49 | self.curr_row += 1
50 | self.curr_col = 0
51 | subplt = plt.subplot(self.gs[self.curr_row, self.curr_col])
52 | self.curr_col += 1
53 | return subplt
54 |
55 | def close(self):
56 | plt.close(self.fig)
57 | self.fig = None
58 | self.gs = None
59 |
60 |
61 | @plotting.customize
62 | def create_summary_tear_sheet(factor_data, long_short=True, group_neutral=False):
63 | """
64 | Creates a small summary tear sheet with returns, information, and turnover
65 | analysis.
66 |
67 | Parameters
68 | ----------
69 | factor_data : pd.DataFrame - MultiIndex
70 | A MultiIndex DataFrame indexed by date (level 0) and asset (level 1),
71 | containing the values for a single alpha factor, forward returns for
72 | each period, the factor quantile/bin that factor value belongs to, and
73 | (optionally) the group the asset belongs to.
74 | - See full explanation in utils.get_clean_factor_and_forward_returns
75 | long_short : bool
76 | Should this computation happen on a long short portfolio? if so, then
77 | mean quantile returns will be demeaned across the factor universe.
78 | group_neutral : bool
79 | Should this computation happen on a group neutral portfolio? if so,
80 | returns demeaning will occur on the group level.
81 | """
82 |
83 | # Returns Analysis
84 | mean_quant_ret, std_quantile = perf.mean_return_by_quantile(
85 | factor_data,
86 | by_group=False,
87 | demeaned=long_short,
88 | group_adjust=group_neutral,
89 | )
90 |
91 | mean_quant_rateret = mean_quant_ret.apply(
92 | utils.rate_of_return, axis=0, base_period=mean_quant_ret.columns[0]
93 | )
94 |
95 | mean_quant_ret_bydate, std_quant_daily = perf.mean_return_by_quantile(
96 | factor_data,
97 | by_date=True,
98 | by_group=False,
99 | demeaned=long_short,
100 | group_adjust=group_neutral,
101 | )
102 |
103 | mean_quant_rateret_bydate = mean_quant_ret_bydate.apply(
104 | utils.rate_of_return,
105 | axis=0,
106 | base_period=mean_quant_ret_bydate.columns[0],
107 | )
108 |
109 | compstd_quant_daily = std_quant_daily.apply(
110 | utils.std_conversion, axis=0, base_period=std_quant_daily.columns[0]
111 | )
112 |
113 | alpha_beta = perf.factor_alpha_beta(
114 | factor_data, demeaned=long_short, group_adjust=group_neutral
115 | )
116 |
117 | mean_ret_spread_quant, std_spread_quant = perf.compute_mean_returns_spread(
118 | mean_quant_rateret_bydate,
119 | factor_data["factor_quantile"].max(),
120 | factor_data["factor_quantile"].min(),
121 | std_err=compstd_quant_daily,
122 | )
123 |
124 | periods = utils.get_forward_returns_columns(factor_data.columns)
125 | periods = list(map(lambda p: pd.Timedelta(p).days, periods))
126 |
127 | fr_cols = len(periods)
128 | vertical_sections = 2 + fr_cols * 3
129 | gf = GridFigure(rows=vertical_sections, cols=1)
130 |
131 | plotting.plot_quantile_statistics_table(factor_data)
132 |
133 | plotting.plot_returns_table(alpha_beta, mean_quant_rateret, mean_ret_spread_quant)
134 |
135 | plotting.plot_quantile_returns_bar(
136 | mean_quant_rateret,
137 | by_group=False,
138 | ylim_percentiles=None,
139 | ax=gf.next_row(),
140 | )
141 |
142 | # Information Analysis
143 | ic = perf.factor_information_coefficient(factor_data)
144 | plotting.plot_information_table(ic)
145 |
146 | # Turnover Analysis
147 | quantile_factor = factor_data["factor_quantile"]
148 |
149 | quantile_turnover = {
150 | p: pd.concat(
151 | [
152 | perf.quantile_turnover(quantile_factor, q, p)
153 | for q in range(1, int(quantile_factor.max()) + 1)
154 | ],
155 | axis=1,
156 | )
157 | for p in periods
158 | }
159 |
160 | autocorrelation = pd.concat(
161 | [perf.factor_rank_autocorrelation(factor_data, period) for period in periods],
162 | axis=1,
163 | )
164 |
165 | plotting.plot_turnover_table(autocorrelation, quantile_turnover)
166 |
167 | plt.show()
168 | gf.close()
169 |
170 |
171 | @plotting.customize
172 | def create_returns_tear_sheet(
173 | factor_data, long_short=True, group_neutral=False, by_group=False
174 | ):
175 | """
176 | Creates a tear sheet for returns analysis of a factor.
177 |
178 | Parameters
179 | ----------
180 | factor_data : pd.DataFrame - MultiIndex
181 | A MultiIndex DataFrame indexed by date (level 0) and asset (level 1),
182 | containing the values for a single alpha factor, forward returns for
183 | each period, the factor quantile/bin that factor value belongs to,
184 | and (optionally) the group the asset belongs to.
185 | - See full explanation in utils.get_clean_factor_and_forward_returns
186 | long_short : bool
187 | Should this computation happen on a long short portfolio? if so, then
188 | mean quantile returns will be demeaned across the factor universe.
189 | Additionally factor values will be demeaned across the factor universe
190 | when factor weighting the portfolio for cumulative returns plots
191 | group_neutral : bool
192 | Should this computation happen on a group neutral portfolio? if so,
193 | returns demeaning will occur on the group level.
194 | Additionally each group will weight the same in cumulative returns
195 | plots
196 | by_group : bool
197 | If True, display graphs separately for each group.
198 | """
199 |
200 | factor_returns = perf.factor_returns(factor_data, long_short, group_neutral)
201 |
202 | mean_quant_ret, std_quantile = perf.mean_return_by_quantile(
203 | factor_data,
204 | by_group=False,
205 | demeaned=long_short,
206 | group_adjust=group_neutral,
207 | )
208 |
209 | mean_quant_rateret = mean_quant_ret.apply(
210 | utils.rate_of_return, axis=0, base_period=mean_quant_ret.columns[0]
211 | )
212 |
213 | mean_quant_ret_bydate, std_quant_daily = perf.mean_return_by_quantile(
214 | factor_data,
215 | by_date=True,
216 | by_group=False,
217 | demeaned=long_short,
218 | group_adjust=group_neutral,
219 | )
220 |
221 | mean_quant_rateret_bydate = mean_quant_ret_bydate.apply(
222 | utils.rate_of_return,
223 | axis=0,
224 | base_period=mean_quant_ret_bydate.columns[0],
225 | )
226 |
227 | compstd_quant_daily = std_quant_daily.apply(
228 | utils.std_conversion, axis=0, base_period=std_quant_daily.columns[0]
229 | )
230 |
231 | alpha_beta = perf.factor_alpha_beta(
232 | factor_data, factor_returns, long_short, group_neutral
233 | )
234 |
235 | mean_ret_spread_quant, std_spread_quant = perf.compute_mean_returns_spread(
236 | mean_quant_rateret_bydate,
237 | factor_data["factor_quantile"].max(),
238 | factor_data["factor_quantile"].min(),
239 | std_err=compstd_quant_daily,
240 | )
241 |
242 | fr_cols = len(factor_returns.columns)
243 | vertical_sections = 2 + fr_cols * 3
244 | gf = GridFigure(rows=vertical_sections, cols=1)
245 |
246 | plotting.plot_returns_table(alpha_beta, mean_quant_rateret, mean_ret_spread_quant)
247 |
248 | plotting.plot_quantile_returns_bar(
249 | mean_quant_rateret,
250 | by_group=False,
251 | ylim_percentiles=None,
252 | ax=gf.next_row(),
253 | )
254 |
255 | plotting.plot_quantile_returns_violin(
256 | mean_quant_rateret_bydate, ylim_percentiles=(1, 99), ax=gf.next_row()
257 | )
258 |
259 | trading_calendar = factor_data.index.levels[0].freq
260 | if trading_calendar is None:
261 | trading_calendar = pd.tseries.offsets.BDay()
262 | warnings.warn(
263 | "'freq' not set in factor_data index: assuming business day",
264 | UserWarning,
265 | )
266 |
267 | # Compute cumulative returns from daily simple returns, if '1D'
268 | # returns are provided.
269 | if "1D" in factor_returns:
270 | title = (
271 | "Factor Weighted "
272 | + ("Group Neutral " if group_neutral else "")
273 | + ("Long/Short " if long_short else "")
274 | + "Portfolio Cumulative Return (1D Period)"
275 | )
276 |
277 | plotting.plot_cumulative_returns(
278 | factor_returns["1D"], period="1D", title=title, ax=gf.next_row()
279 | )
280 |
281 | plotting.plot_cumulative_returns_by_quantile(
282 | mean_quant_ret_bydate["1D"], period="1D", ax=gf.next_row()
283 | )
284 |
285 | ax_mean_quantile_returns_spread_ts = [gf.next_row() for x in range(fr_cols)]
286 | plotting.plot_mean_quantile_returns_spread_time_series(
287 | mean_ret_spread_quant,
288 | std_err=std_spread_quant,
289 | bandwidth=0.5,
290 | ax=ax_mean_quantile_returns_spread_ts,
291 | )
292 |
293 | plt.show()
294 | gf.close()
295 |
296 | if by_group:
297 | (
298 | mean_return_quantile_group,
299 | mean_return_quantile_group_std_err,
300 | ) = perf.mean_return_by_quantile(
301 | factor_data,
302 | by_date=False,
303 | by_group=True,
304 | demeaned=long_short,
305 | group_adjust=group_neutral,
306 | )
307 |
308 | mean_quant_rateret_group = mean_return_quantile_group.apply(
309 | utils.rate_of_return,
310 | axis=0,
311 | base_period=mean_return_quantile_group.columns[0],
312 | )
313 |
314 | num_groups = len(
315 | mean_quant_rateret_group.index.get_level_values("group").unique()
316 | )
317 |
318 | vertical_sections = 1 + (((num_groups - 1) // 2) + 1)
319 | gf = GridFigure(rows=vertical_sections, cols=2)
320 |
321 | ax_quantile_returns_bar_by_group = [gf.next_cell() for _ in range(num_groups)]
322 | plotting.plot_quantile_returns_bar(
323 | mean_quant_rateret_group,
324 | by_group=True,
325 | ylim_percentiles=(5, 95),
326 | ax=ax_quantile_returns_bar_by_group,
327 | )
328 | plt.show()
329 | gf.close()
330 |
331 |
332 | @plotting.customize
333 | def create_information_tear_sheet(factor_data, group_neutral=False, by_group=False):
334 | """
335 | Creates a tear sheet for information analysis of a factor.
336 |
337 | Parameters
338 | ----------
339 | factor_data : pd.DataFrame - MultiIndex
340 | A MultiIndex DataFrame indexed by date (level 0) and asset (level 1),
341 | containing the values for a single alpha factor, forward returns for
342 | each period, the factor quantile/bin that factor value belongs to, and
343 | (optionally) the group the asset belongs to.
344 | - See full explanation in utils.get_clean_factor_and_forward_returns
345 | group_neutral : bool
346 | Demean forward returns by group before computing IC.
347 | by_group : bool
348 | If True, display graphs separately for each group.
349 | """
350 |
351 | ic = perf.factor_information_coefficient(factor_data, group_neutral)
352 |
353 | plotting.plot_information_table(ic)
354 |
355 | columns_wide = 2
356 | fr_cols = len(ic.columns)
357 | rows_when_wide = ((fr_cols - 1) // columns_wide) + 1
358 | vertical_sections = fr_cols + 3 * rows_when_wide + 2 * fr_cols
359 | gf = GridFigure(rows=vertical_sections, cols=columns_wide)
360 |
361 | ax_ic_ts = [gf.next_row() for _ in range(fr_cols)]
362 | plotting.plot_ic_ts(ic, ax=ax_ic_ts)
363 |
364 | ax_ic_hqq = [gf.next_cell() for _ in range(fr_cols * 2)]
365 | plotting.plot_ic_hist(ic, ax=ax_ic_hqq[::2])
366 | plotting.plot_ic_qq(ic, ax=ax_ic_hqq[1::2])
367 |
368 | if not by_group:
369 |
370 | mean_monthly_ic = perf.mean_information_coefficient(
371 | factor_data,
372 | group_adjust=group_neutral,
373 | by_group=False,
374 | by_time="M",
375 | )
376 | ax_monthly_ic_heatmap = [gf.next_cell() for x in range(fr_cols)]
377 | plotting.plot_monthly_ic_heatmap(mean_monthly_ic, ax=ax_monthly_ic_heatmap)
378 |
379 | if by_group:
380 | mean_group_ic = perf.mean_information_coefficient(
381 | factor_data, group_adjust=group_neutral, by_group=True
382 | )
383 |
384 | plotting.plot_ic_by_group(mean_group_ic, ax=gf.next_row())
385 |
386 | plt.show()
387 | gf.close()
388 |
389 |
390 | @plotting.customize
391 | def create_turnover_tear_sheet(factor_data, turnover_periods=None):
392 | """
393 | Creates a tear sheet for analyzing the turnover properties of a factor.
394 |
395 | Parameters
396 | ----------
397 | factor_data : pd.DataFrame - MultiIndex
398 | A MultiIndex DataFrame indexed by date (level 0) and asset (level 1),
399 | containing the values for a single alpha factor, forward returns for
400 | each period, the factor quantile/bin that factor value belongs to, and
401 | (optionally) the group the asset belongs to.
402 | - See full explanation in utils.get_clean_factor_and_forward_returns
403 | turnover_periods : sequence[string], optional
404 | Periods to compute turnover analysis on. By default periods in
405 | 'factor_data' are used but custom periods can provided instead. This
406 | can be useful when periods in 'factor_data' are not multiples of the
407 | frequency at which factor values are computed i.e. the periods
408 | are 2h and 4h and the factor is computed daily and so values like
409 | ['1D', '2D'] could be used instead
410 | """
411 |
412 | if turnover_periods is None:
413 | input_periods = utils.get_forward_returns_columns(
414 | factor_data.columns, require_exact_day_multiple=True
415 | ).to_numpy()
416 | turnover_periods = utils.timedelta_strings_to_integers(input_periods)
417 | else:
418 | turnover_periods = utils.timedelta_strings_to_integers(turnover_periods)
419 |
420 | quantile_factor = factor_data["factor_quantile"]
421 |
422 | quantile_turnover = {
423 | p: pd.concat(
424 | [
425 | perf.quantile_turnover(quantile_factor, q, p)
426 | for q in quantile_factor.sort_values().unique().tolist()
427 | ],
428 | axis=1,
429 | )
430 | for p in turnover_periods
431 | }
432 |
433 | autocorrelation = pd.concat(
434 | [
435 | perf.factor_rank_autocorrelation(factor_data, period)
436 | for period in turnover_periods
437 | ],
438 | axis=1,
439 | )
440 |
441 | plotting.plot_turnover_table(autocorrelation, quantile_turnover)
442 |
443 | fr_cols = len(turnover_periods)
444 | columns_wide = 1
445 | rows_when_wide = ((fr_cols - 1) // 1) + 1
446 | vertical_sections = fr_cols + 3 * rows_when_wide + 2 * fr_cols
447 | gf = GridFigure(rows=vertical_sections, cols=columns_wide)
448 |
449 | for period in turnover_periods:
450 | if quantile_turnover[period].isnull().all().all():
451 | continue
452 | plotting.plot_top_bottom_quantile_turnover(
453 | quantile_turnover[period], period=period, ax=gf.next_row()
454 | )
455 |
456 | for period in autocorrelation:
457 | if autocorrelation[period].isnull().all():
458 | continue
459 | plotting.plot_factor_rank_auto_correlation(
460 | autocorrelation[period], period=period, ax=gf.next_row()
461 | )
462 |
463 | plt.show()
464 | gf.close()
465 |
466 |
467 | @plotting.customize
468 | def create_full_tear_sheet(
469 | factor_data, long_short=True, group_neutral=False, by_group=False
470 | ):
471 | """
472 | Creates a full tear sheet for analysis and evaluating single
473 | return predicting (alpha) factor.
474 |
475 | Parameters
476 | ----------
477 | factor_data : pd.DataFrame - MultiIndex
478 | A MultiIndex DataFrame indexed by date (level 0) and asset (level 1),
479 | containing the values for a single alpha factor, forward returns for
480 | each period, the factor quantile/bin that factor value belongs to, and
481 | (optionally) the group the asset belongs to.
482 | - See full explanation in utils.get_clean_factor_and_forward_returns
483 | long_short : bool
484 | Should this computation happen on a long short portfolio?
485 | - See tears.create_returns_tear_sheet for details on how this flag
486 | affects returns analysis
487 | group_neutral : bool
488 | Should this computation happen on a group neutral portfolio?
489 | - See tears.create_returns_tear_sheet for details on how this flag
490 | affects returns analysis
491 | - See tears.create_information_tear_sheet for details on how this
492 | flag affects information analysis
493 | by_group : bool
494 | If True, display graphs separately for each group.
495 | """
496 |
497 | plotting.plot_quantile_statistics_table(factor_data)
498 | create_returns_tear_sheet(
499 | factor_data, long_short, group_neutral, by_group, set_context=False
500 | )
501 | create_information_tear_sheet(
502 | factor_data, group_neutral, by_group, set_context=False
503 | )
504 | create_turnover_tear_sheet(factor_data, set_context=False)
505 |
506 |
507 | @plotting.customize
508 | def create_event_returns_tear_sheet(
509 | factor_data,
510 | returns,
511 | avgretplot=(5, 15),
512 | long_short=True,
513 | group_neutral=False,
514 | std_bar=True,
515 | by_group=False,
516 | ):
517 | """
518 | Creates a tear sheet to view the average cumulative returns for a
519 | factor within a window (pre and post event).
520 |
521 | Parameters
522 | ----------
523 | factor_data : pd.DataFrame - MultiIndex
524 | A MultiIndex Series indexed by date (level 0) and asset (level 1),
525 | containing the values for a single alpha factor, the factor
526 | quantile/bin that factor value belongs to and (optionally) the group
527 | the asset belongs to.
528 | - See full explanation in utils.get_clean_factor_and_forward_returns
529 | returns : pd.DataFrame
530 | A DataFrame indexed by date with assets in the columns containing daily
531 | returns.
532 | - See full explanation in utils.get_clean_factor_and_forward_returns
533 | avgretplot: tuple (int, int) - (before, after)
534 | If not None, plot quantile average cumulative returns
535 | long_short : bool
536 | Should this computation happen on a long short portfolio? if so then
537 | factor returns will be demeaned across the factor universe
538 | group_neutral : bool
539 | Should this computation happen on a group neutral portfolio? if so,
540 | returns demeaning will occur on the group level.
541 | std_bar : boolean, optional
542 | Show plots with standard deviation bars, one for each quantile
543 | by_group : bool
544 | If True, display graphs separately for each group.
545 | """
546 |
547 | before, after = avgretplot
548 |
549 | avg_cumulative_returns = perf.average_cumulative_return_by_quantile(
550 | factor_data,
551 | returns,
552 | periods_before=before,
553 | periods_after=after,
554 | demeaned=long_short,
555 | group_adjust=group_neutral,
556 | )
557 |
558 | num_quantiles = int(factor_data["factor_quantile"].max())
559 |
560 | vertical_sections = 1
561 | if std_bar:
562 | vertical_sections += ((num_quantiles - 1) // 2) + 1
563 | cols = 2 if num_quantiles != 1 else 1
564 | gf = GridFigure(rows=vertical_sections, cols=cols)
565 | plotting.plot_quantile_average_cumulative_return(
566 | avg_cumulative_returns,
567 | by_quantile=False,
568 | std_bar=False,
569 | ax=gf.next_row(),
570 | )
571 | if std_bar:
572 | ax_avg_cumulative_returns_by_q = [gf.next_cell() for _ in range(num_quantiles)]
573 | plotting.plot_quantile_average_cumulative_return(
574 | avg_cumulative_returns,
575 | by_quantile=True,
576 | std_bar=True,
577 | ax=ax_avg_cumulative_returns_by_q,
578 | )
579 |
580 | plt.show()
581 | gf.close()
582 |
583 | if by_group:
584 | groups = factor_data["group"].unique()
585 | num_groups = len(groups)
586 | vertical_sections = ((num_groups - 1) // 2) + 1
587 | gf = GridFigure(rows=vertical_sections, cols=2)
588 |
589 | avg_cumret_by_group = perf.average_cumulative_return_by_quantile(
590 | factor_data,
591 | returns,
592 | periods_before=before,
593 | periods_after=after,
594 | demeaned=long_short,
595 | group_adjust=group_neutral,
596 | by_group=True,
597 | )
598 |
599 | for group, avg_cumret in avg_cumret_by_group.groupby(level="group"):
600 | avg_cumret.index = avg_cumret.index.droplevel("group")
601 | plotting.plot_quantile_average_cumulative_return(
602 | avg_cumret,
603 | by_quantile=False,
604 | std_bar=False,
605 | title=group,
606 | ax=gf.next_cell(),
607 | )
608 |
609 | plt.show()
610 | gf.close()
611 |
612 |
613 | @plotting.customize
614 | def create_event_study_tear_sheet(
615 | factor_data, returns, avgretplot=(5, 15), rate_of_ret=True, n_bars=50
616 | ):
617 | """
618 | Creates an event study tear sheet for analysis of a specific event.
619 |
620 | Parameters
621 | ----------
622 | factor_data : pd.DataFrame - MultiIndex
623 | A MultiIndex DataFrame indexed by date (level 0) and asset (level 1),
624 | containing the values for a single event, forward returns for each
625 | period, the factor quantile/bin that factor value belongs to, and
626 | (optionally) the group the asset belongs to.
627 | returns : pd.DataFrame, required only if 'avgretplot' is provided
628 | A DataFrame indexed by date with assets in the columns containing daily
629 | returns.
630 | - See full explanation in utils.get_clean_factor_and_forward_returns
631 | avgretplot: tuple (int, int) - (before, after), optional
632 | If not None, plot event style average cumulative returns within a
633 | window (pre and post event).
634 | rate_of_ret : bool, optional
635 | Display rate of return instead of simple return in 'Mean Period Wise
636 | Return By Factor Quantile' and 'Period Wise Return By Factor Quantile'
637 | plots
638 | n_bars : int, optional
639 | Number of bars in event distribution plot
640 | """
641 |
642 | long_short = False
643 |
644 | plotting.plot_quantile_statistics_table(factor_data)
645 |
646 | gf = GridFigure(rows=1, cols=1)
647 | plotting.plot_events_distribution(
648 | events=factor_data["factor"], num_bars=n_bars, ax=gf.next_row()
649 | )
650 | plt.show()
651 | gf.close()
652 |
653 | if returns is not None and avgretplot is not None:
654 |
655 | create_event_returns_tear_sheet(
656 | factor_data=factor_data,
657 | returns=returns,
658 | avgretplot=avgretplot,
659 | long_short=long_short,
660 | group_neutral=False,
661 | std_bar=True,
662 | by_group=False,
663 | )
664 |
665 | factor_returns = perf.factor_returns(factor_data, demeaned=False, equal_weight=True)
666 |
667 | mean_quant_ret, std_quantile = perf.mean_return_by_quantile(
668 | factor_data, by_group=False, demeaned=long_short
669 | )
670 | if rate_of_ret:
671 | mean_quant_ret = mean_quant_ret.apply(
672 | utils.rate_of_return, axis=0, base_period=mean_quant_ret.columns[0]
673 | )
674 |
675 | mean_quant_ret_bydate, std_quant_daily = perf.mean_return_by_quantile(
676 | factor_data, by_date=True, by_group=False, demeaned=long_short
677 | )
678 | if rate_of_ret:
679 | mean_quant_ret_bydate = mean_quant_ret_bydate.apply(
680 | utils.rate_of_return,
681 | axis=0,
682 | base_period=mean_quant_ret_bydate.columns[0],
683 | )
684 |
685 | fr_cols = len(factor_returns.columns)
686 | vertical_sections = 2 + fr_cols * 1
687 | gf = GridFigure(rows=vertical_sections + 1, cols=1)
688 |
689 | plotting.plot_quantile_returns_bar(
690 | mean_quant_ret, by_group=False, ylim_percentiles=None, ax=gf.next_row()
691 | )
692 |
693 | plotting.plot_quantile_returns_violin(
694 | mean_quant_ret_bydate, ylim_percentiles=(1, 99), ax=gf.next_row()
695 | )
696 |
697 | trading_calendar = factor_data.index.levels[0].freq
698 | if trading_calendar is None:
699 | trading_calendar = pd.tseries.offsets.BDay()
700 | warnings.warn(
701 | "'freq' not set in factor_data index: assuming business day",
702 | UserWarning,
703 | )
704 |
705 | plt.show()
706 | gf.close()
707 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stefan-jansen/alphalens-reloaded/f0a07c22d554e4b4036983cc80320b432714fe7e/tests/__init__.py
--------------------------------------------------------------------------------
/tests/test_tears.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright 2017 Quantopian, Inc.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | import warnings
17 |
18 | from unittest import TestCase
19 | from unittest.mock import patch, Mock
20 | from parameterized import parameterized
21 | from numpy import nan
22 | from pandas import DataFrame, date_range, Timedelta, concat
23 |
24 | warnings.filterwarnings("ignore", category=UserWarning)
25 | warnings.filterwarnings("ignore", category=DeprecationWarning)
26 |
27 | from alphalens.tears import ( # noqa: E402
28 | create_returns_tear_sheet,
29 | create_information_tear_sheet,
30 | create_turnover_tear_sheet,
31 | create_summary_tear_sheet,
32 | create_full_tear_sheet,
33 | create_event_returns_tear_sheet,
34 | create_event_study_tear_sheet,
35 | ) # noqa: E402
36 |
37 | from alphalens.utils import get_clean_factor_and_forward_returns # noqa: E402
38 |
39 |
40 | @patch("matplotlib.pyplot.show", Mock())
41 | class TearsTestCase(TestCase):
42 | tickers = ["A", "B", "C", "D", "E", "F"]
43 |
44 | factor_groups = {"A": 1, "B": 2, "C": 1, "D": 2, "E": 1, "F": 2}
45 |
46 | price_data = [
47 | [1.25**i, 1.50**i, 1.00**i, 0.50**i, 1.50**i, 1.00**i]
48 | for i in range(1, 51)
49 | ]
50 |
51 | factor_data = [
52 | [3, 4, 2, 1, nan, nan],
53 | [3, 4, 2, 1, nan, nan],
54 | [3, 4, 2, 1, nan, nan],
55 | [3, 4, 2, 1, nan, nan],
56 | [3, 4, 2, 1, nan, nan],
57 | [3, 4, 2, 1, nan, nan],
58 | [3, nan, nan, 1, 4, 2],
59 | [3, nan, nan, 1, 4, 2],
60 | [3, 4, 2, 1, nan, nan],
61 | [3, 4, 2, 1, nan, nan],
62 | [3, nan, nan, 1, 4, 2],
63 | [3, nan, nan, 1, 4, 2],
64 | [3, nan, nan, 1, 4, 2],
65 | [3, nan, nan, 1, 4, 2],
66 | [3, nan, nan, 1, 4, 2],
67 | [3, nan, nan, 1, 4, 2],
68 | [3, nan, nan, 1, 4, 2],
69 | [3, nan, nan, 1, 4, 2],
70 | [3, nan, nan, 1, 4, 2],
71 | [3, nan, nan, 1, 4, 2],
72 | [3, 4, 2, 1, nan, nan],
73 | [3, 4, 2, 1, nan, nan],
74 | [3, 4, 2, 1, nan, nan],
75 | [3, 4, 2, 1, nan, nan],
76 | [3, 4, 2, 1, nan, nan],
77 | [3, 4, 2, 1, nan, nan],
78 | [3, 4, 2, 1, nan, nan],
79 | [3, 4, 2, 1, nan, nan],
80 | [3, nan, nan, 1, 4, 2],
81 | [3, nan, nan, 1, 4, 2],
82 | ]
83 |
84 | event_data = [
85 | [1, nan, nan, nan, nan, nan],
86 | [4, nan, nan, 7, nan, nan],
87 | [nan, nan, nan, nan, nan, nan],
88 | [nan, 3, nan, 2, nan, nan],
89 | [1, nan, nan, nan, nan, nan],
90 | [nan, nan, 2, nan, nan, nan],
91 | [nan, nan, nan, 2, nan, nan],
92 | [nan, nan, nan, 1, nan, nan],
93 | [2, nan, nan, nan, nan, nan],
94 | [nan, nan, nan, nan, 5, nan],
95 | [nan, nan, nan, 2, nan, nan],
96 | [nan, nan, nan, nan, nan, nan],
97 | [2, nan, nan, nan, nan, nan],
98 | [nan, nan, nan, nan, nan, 5],
99 | [nan, nan, nan, 1, nan, nan],
100 | [nan, nan, nan, nan, 4, nan],
101 | [5, nan, nan, 4, nan, nan],
102 | [nan, nan, nan, 3, nan, nan],
103 | [nan, nan, nan, 4, nan, nan],
104 | [nan, nan, 2, nan, nan, nan],
105 | [5, nan, nan, nan, nan, nan],
106 | [nan, 1, nan, nan, nan, nan],
107 | [nan, nan, nan, nan, 4, nan],
108 | [0, nan, nan, nan, nan, nan],
109 | [nan, 5, nan, nan, nan, 4],
110 | [nan, nan, nan, nan, nan, nan],
111 | [nan, nan, 5, nan, nan, 3],
112 | [nan, nan, 1, 2, 3, nan],
113 | [nan, nan, nan, 5, nan, nan],
114 | [nan, nan, 1, nan, 3, nan],
115 | ]
116 |
117 | #
118 | # business days calendar
119 | #
120 | bprice_index = date_range(start="2015-1-10", end="2015-3-22", freq="B")
121 | bprice_index.name = "date"
122 | bprices = DataFrame(index=bprice_index, columns=tickers, data=price_data)
123 |
124 | bfactor_index = date_range(start="2015-1-15", end="2015-2-25", freq="B")
125 | bfactor_index.name = "date"
126 | bfactor = DataFrame(index=bfactor_index, columns=tickers, data=factor_data).stack()
127 |
128 | #
129 | # full calendar
130 | #
131 | price_index = date_range(start="2015-1-10", end="2015-2-28")
132 | price_index.name = "date"
133 | prices = DataFrame(index=price_index, columns=tickers, data=price_data)
134 |
135 | factor_index = date_range(start="2015-1-15", end="2015-2-13")
136 | factor_index.name = "date"
137 | factor = DataFrame(index=factor_index, columns=tickers, data=factor_data).stack()
138 |
139 | #
140 | # intraday factor
141 | #
142 | today_open = DataFrame(
143 | index=price_index + Timedelta("9h30m"),
144 | columns=tickers,
145 | data=price_data,
146 | )
147 | today_open_1h = DataFrame(
148 | index=price_index + Timedelta("10h30m"),
149 | columns=tickers,
150 | data=price_data,
151 | )
152 | today_open_1h += today_open_1h * 0.001
153 | today_open_3h = DataFrame(
154 | index=price_index + Timedelta("12h30m"),
155 | columns=tickers,
156 | data=price_data,
157 | )
158 | today_open_3h -= today_open_3h * 0.002
159 | intraday_prices = concat([today_open, today_open_1h, today_open_3h]).sort_index()
160 |
161 | intraday_factor = DataFrame(
162 | index=factor_index + Timedelta("9h30m"),
163 | columns=tickers,
164 | data=factor_data,
165 | ).stack()
166 |
167 | #
168 | # event factor
169 | #
170 | bevent_factor = DataFrame(
171 | index=bfactor_index, columns=tickers, data=event_data
172 | ).stack()
173 |
174 | event_factor = DataFrame(
175 | index=factor_index, columns=tickers, data=event_data
176 | ).stack()
177 |
178 | all_prices = [prices, bprices]
179 | all_factors = [factor, bfactor]
180 | all_events = [event_factor, bevent_factor]
181 |
182 | def __localize_prices_and_factor(self, prices, factor, tz):
183 | if tz is not None:
184 | factor = factor.unstack()
185 | factor.index = factor.index.tz_localize(tz)
186 | factor = factor.stack()
187 | prices = prices.copy()
188 | prices.index = prices.index.tz_localize(tz)
189 | return prices, factor
190 |
191 | @parameterized.expand([(2, (1, 5, 10), None), (3, (2, 4, 6), 20)])
192 | def test_create_returns_tear_sheet(self, quantiles, periods, filter_zscore):
193 | """
194 | Test no exceptions are thrown
195 | """
196 |
197 | factor_data = get_clean_factor_and_forward_returns(
198 | self.factor,
199 | self.prices,
200 | quantiles=quantiles,
201 | periods=periods,
202 | filter_zscore=filter_zscore,
203 | )
204 |
205 | create_returns_tear_sheet(
206 | factor_data, long_short=False, group_neutral=False, by_group=False
207 | )
208 |
209 | @parameterized.expand([(1, (1, 5, 10), None), (4, (1, 2, 3, 7), 20)])
210 | def test_create_information_tear_sheet(self, quantiles, periods, filter_zscore):
211 | """
212 | Test no exceptions are thrown
213 | """
214 | factor_data = get_clean_factor_and_forward_returns(
215 | self.factor,
216 | self.prices,
217 | quantiles=quantiles,
218 | periods=periods,
219 | filter_zscore=filter_zscore,
220 | )
221 |
222 | create_information_tear_sheet(factor_data, group_neutral=False, by_group=False)
223 |
224 | @parameterized.expand(
225 | [
226 | (2, (2, 3, 6), None, 20),
227 | (4, (1, 2, 3, 7), None, None),
228 | (2, (2, 3, 6), ["1D", "2D"], 20),
229 | (4, (1, 2, 3, 7), ["1D"], None),
230 | ]
231 | )
232 | def test_create_turnover_tear_sheet(
233 | self, quantiles, periods, turnover_periods, filter_zscore
234 | ):
235 | """
236 | Test no exceptions are thrown
237 | """
238 | factor_data = get_clean_factor_and_forward_returns(
239 | self.factor,
240 | self.prices,
241 | quantiles=quantiles,
242 | periods=periods,
243 | filter_zscore=filter_zscore,
244 | )
245 |
246 | create_turnover_tear_sheet(factor_data, turnover_periods)
247 |
248 | @parameterized.expand([(2, (1, 5, 10), None), (3, (1, 2, 3, 7), 20)])
249 | def test_create_summary_tear_sheet(self, quantiles, periods, filter_zscore):
250 | """
251 | Test no exceptions are thrown
252 | """
253 | factor_data = get_clean_factor_and_forward_returns(
254 | self.factor,
255 | self.prices,
256 | quantiles=quantiles,
257 | periods=periods,
258 | filter_zscore=filter_zscore,
259 | )
260 |
261 | create_summary_tear_sheet(factor_data, long_short=True, group_neutral=False)
262 | create_summary_tear_sheet(factor_data, long_short=False, group_neutral=False)
263 |
264 | @parameterized.expand(
265 | [
266 | (2, (1, 5, 10), None, None),
267 | (3, (2, 4, 6), 20, "US/Eastern"),
268 | (4, (1, 8), 20, None),
269 | (4, (1, 2, 3, 7), None, "US/Eastern"),
270 | ]
271 | )
272 | def test_create_full_tear_sheet(self, quantiles, periods, filter_zscore, tz):
273 | """
274 | Test no exceptions are thrown
275 | """
276 | for factor, prices in zip(self.all_factors, self.all_prices):
277 | prices, factor = self.__localize_prices_and_factor(prices, factor, tz)
278 | factor_data = get_clean_factor_and_forward_returns(
279 | factor,
280 | prices,
281 | groupby=self.factor_groups,
282 | quantiles=quantiles,
283 | periods=periods,
284 | filter_zscore=filter_zscore,
285 | )
286 |
287 | create_full_tear_sheet(
288 | factor_data,
289 | long_short=False,
290 | group_neutral=False,
291 | by_group=False,
292 | )
293 | create_full_tear_sheet(
294 | factor_data,
295 | long_short=True,
296 | group_neutral=False,
297 | by_group=True,
298 | )
299 | create_full_tear_sheet(
300 | factor_data, long_short=True, group_neutral=True, by_group=True
301 | )
302 |
303 | @parameterized.expand(
304 | [
305 | (2, (1, 5, 10), None, None),
306 | (3, (2, 4, 6), 20, None),
307 | (4, (3, 4), None, "US/Eastern"),
308 | (1, (2, 3, 6, 9), 20, "US/Eastern"),
309 | ]
310 | )
311 | def test_create_event_returns_tear_sheet(
312 | self, quantiles, periods, filter_zscore, tz
313 | ):
314 | """
315 | Test no exceptions are thrown
316 | """
317 | for factor, prices in zip(self.all_factors, self.all_prices):
318 | prices, factor = self.__localize_prices_and_factor(prices, factor, tz)
319 | factor_data = get_clean_factor_and_forward_returns(
320 | factor,
321 | prices,
322 | groupby=self.factor_groups,
323 | quantiles=quantiles,
324 | periods=periods,
325 | filter_zscore=filter_zscore,
326 | )
327 |
328 | create_event_returns_tear_sheet(
329 | factor_data,
330 | prices,
331 | avgretplot=(5, 11),
332 | long_short=False,
333 | group_neutral=False,
334 | by_group=False,
335 | )
336 | create_event_returns_tear_sheet(
337 | factor_data,
338 | prices,
339 | avgretplot=(5, 11),
340 | long_short=True,
341 | group_neutral=False,
342 | by_group=False,
343 | )
344 | create_event_returns_tear_sheet(
345 | factor_data,
346 | prices,
347 | avgretplot=(5, 11),
348 | long_short=False,
349 | group_neutral=True,
350 | by_group=False,
351 | )
352 | create_event_returns_tear_sheet(
353 | factor_data,
354 | prices,
355 | avgretplot=(5, 11),
356 | long_short=False,
357 | group_neutral=False,
358 | by_group=True,
359 | )
360 | create_event_returns_tear_sheet(
361 | factor_data,
362 | prices,
363 | avgretplot=(5, 11),
364 | long_short=True,
365 | group_neutral=False,
366 | by_group=True,
367 | )
368 | create_event_returns_tear_sheet(
369 | factor_data,
370 | prices,
371 | avgretplot=(5, 11),
372 | long_short=False,
373 | group_neutral=True,
374 | by_group=True,
375 | )
376 |
377 | @parameterized.expand(
378 | [
379 | ((6, 8), None, None),
380 | ((6, 8), None, None),
381 | ((6, 3), 20, None),
382 | # ((6, 3), 20, 'US/Eastern'), # TODO: these tests fail
383 | ((0, 3), None, None),
384 | # ((3, 0), 20, 'US/Eastern') # TODO: these tests fail
385 | ]
386 | )
387 | def test_create_event_study_tear_sheet(self, avgretplot, filter_zscore, tz):
388 | """
389 | Test no exceptions are thrown
390 | """
391 | for factor, prices in zip(self.all_events, self.all_prices):
392 | prices, factor = self.__localize_prices_and_factor(prices, factor, tz)
393 | factor_data = get_clean_factor_and_forward_returns(
394 | factor,
395 | prices,
396 | bins=1,
397 | quantiles=None,
398 | periods=(1, 2),
399 | filter_zscore=filter_zscore,
400 | )
401 |
402 | create_event_study_tear_sheet(factor_data, prices, avgretplot=avgretplot)
403 |
--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright 2018 Quantopian, Inc.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | from packaging.version import Version
17 | from unittest import TestCase, skipIf
18 |
19 | import pandas as pd
20 | from numpy import nan
21 | from pandas import (
22 | Series,
23 | DataFrame,
24 | date_range,
25 | MultiIndex,
26 | Timedelta,
27 | Timestamp,
28 | concat,
29 | )
30 | from pandas.testing import assert_frame_equal, assert_series_equal
31 | from parameterized import parameterized
32 |
33 | from alphalens.utils import (
34 | get_clean_factor_and_forward_returns,
35 | compute_forward_returns,
36 | quantize_factor,
37 | )
38 |
39 | pandas_version = Version(pd.__version__)
40 |
41 | pandas_one_point_zero = Version("1.0") < pandas_version < Version("1.1")
42 |
43 |
44 | class UtilsTestCase(TestCase):
45 | dr = date_range(start="2015-1-1", end="2015-1-2")
46 | dr.name = "date"
47 | tickers = ["A", "B", "C", "D"]
48 |
49 | factor = DataFrame(
50 | index=dr, columns=tickers, data=[[1, 2, 3, 4], [4, 3, 2, 1]]
51 | ).stack()
52 | factor.index = factor.index.set_names(["date", "asset"])
53 | factor.name = "factor"
54 | factor_data = DataFrame()
55 | factor_data["factor"] = factor
56 | factor_data["group"] = Series(
57 | index=factor.index, data=[1, 1, 2, 2, 1, 1, 2, 2], dtype="category"
58 | )
59 |
60 | biased_factor = DataFrame(
61 | index=dr,
62 | columns=tickers.extend(["E", "F", "G", "H"]),
63 | data=[[-1, 3, -2, 4, -5, 7, -6, 8], [-4, 2, -3, 1, -8, 6, -7, 5]],
64 | ).stack()
65 | biased_factor.index = biased_factor.index.set_names(["date", "asset"])
66 | biased_factor.name = "factor"
67 | biased_factor_data = DataFrame()
68 | biased_factor_data["factor"] = biased_factor
69 | biased_factor_data["group"] = Series(
70 | index=biased_factor.index,
71 | data=[1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2],
72 | dtype="category",
73 | )
74 |
75 | def test_compute_forward_returns(self):
76 | dr = date_range(start="2015-1-1", end="2015-1-3")
77 | prices = DataFrame(index=dr, columns=["A", "B"], data=[[1, 1], [1, 2], [2, 1]])
78 | factor = prices.stack()
79 |
80 | fp = compute_forward_returns(factor, prices, periods=[1, 2])
81 |
82 | ix = MultiIndex.from_product([dr, ["A", "B"]], names=["date", "asset"])
83 | expected = DataFrame(index=ix, columns=["1D", "2D"])
84 | expected["1D"] = [0.0, 1.0, 1.0, -0.5, nan, nan]
85 | expected["2D"] = [1.0, 0.0, nan, nan, nan, nan]
86 |
87 | assert_frame_equal(fp, expected)
88 |
89 | def test_compute_forward_returns_index_out_of_bound(self):
90 | dr = date_range(start="2014-12-29", end="2015-1-3")
91 | prices = DataFrame(
92 | index=dr,
93 | columns=["A", "B"],
94 | data=[[nan, nan], [nan, nan], [nan, nan], [1, 1], [1, 2], [2, 1]],
95 | )
96 |
97 | dr = date_range(start="2015-1-1", end="2015-1-3")
98 | factor = DataFrame(index=dr, columns=["A", "B"], data=[[1, 1], [1, 2], [2, 1]])
99 | factor = factor.stack()
100 |
101 | fp = compute_forward_returns(factor, prices, periods=[1, 2])
102 |
103 | ix = MultiIndex.from_product([dr, ["A", "B"]], names=["date", "asset"])
104 | expected = DataFrame(index=ix, columns=["1D", "2D"])
105 | expected["1D"] = [0.0, 1.0, 1.0, -0.5, nan, nan]
106 | expected["2D"] = [1.0, 0.0, nan, nan, nan, nan]
107 |
108 | assert_frame_equal(fp, expected)
109 |
110 | def test_compute_forward_returns_non_cum(self):
111 | dr = date_range(start="2015-1-1", end="2015-1-3")
112 | prices = DataFrame(index=dr, columns=["A", "B"], data=[[1, 1], [1, 2], [2, 1]])
113 | factor = prices.stack()
114 |
115 | fp = compute_forward_returns(
116 | factor, prices, periods=[1, 2], cumulative_returns=False
117 | )
118 |
119 | ix = MultiIndex.from_product([dr, ["A", "B"]], names=["date", "asset"])
120 | expected = DataFrame(index=ix, columns=["1D", "2D"])
121 | expected["1D"] = [0.0, 1.0, 1.0, -0.5, nan, nan]
122 | expected["2D"] = [1.0, -0.5, nan, nan, nan, nan]
123 |
124 | assert_frame_equal(fp, expected)
125 |
126 | @parameterized.expand(
127 | [
128 | (factor_data, 4, None, False, False, [1, 2, 3, 4, 4, 3, 2, 1]),
129 | (factor_data, 2, None, False, False, [1, 1, 2, 2, 2, 2, 1, 1]),
130 | (factor_data, 2, None, True, False, [1, 2, 1, 2, 2, 1, 2, 1]),
131 | (
132 | biased_factor_data,
133 | 4,
134 | None,
135 | False,
136 | True,
137 | [2, 3, 2, 3, 1, 4, 1, 4, 2, 3, 2, 3, 1, 4, 1, 4],
138 | ),
139 | (
140 | biased_factor_data,
141 | 2,
142 | None,
143 | False,
144 | True,
145 | [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2],
146 | ),
147 | (
148 | biased_factor_data,
149 | 2,
150 | None,
151 | True,
152 | True,
153 | [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2],
154 | ),
155 | (
156 | biased_factor_data,
157 | None,
158 | 4,
159 | False,
160 | True,
161 | [2, 3, 2, 3, 1, 4, 1, 4, 2, 3, 2, 3, 1, 4, 1, 4],
162 | ),
163 | (
164 | biased_factor_data,
165 | None,
166 | 2,
167 | False,
168 | True,
169 | [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2],
170 | ),
171 | (
172 | biased_factor_data,
173 | None,
174 | 2,
175 | True,
176 | True,
177 | [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2],
178 | ),
179 | (
180 | factor_data,
181 | [0, 0.25, 0.5, 0.75, 1.0],
182 | None,
183 | False,
184 | False,
185 | [1, 2, 3, 4, 4, 3, 2, 1],
186 | ),
187 | (
188 | factor_data,
189 | [0, 0.5, 0.75, 1.0],
190 | None,
191 | False,
192 | False,
193 | [1, 1, 2, 3, 3, 2, 1, 1],
194 | ),
195 | (
196 | factor_data,
197 | [0, 0.25, 0.5, 1.0],
198 | None,
199 | False,
200 | False,
201 | [1, 2, 3, 3, 3, 3, 2, 1],
202 | ),
203 | (
204 | factor_data,
205 | [0, 0.5, 1.0],
206 | None,
207 | False,
208 | False,
209 | [1, 1, 2, 2, 2, 2, 1, 1],
210 | ),
211 | (
212 | factor_data,
213 | [0.25, 0.5, 0.75],
214 | None,
215 | False,
216 | False,
217 | [nan, 1, 2, nan, nan, 2, 1, nan],
218 | ),
219 | (
220 | factor_data,
221 | [0, 0.5, 1.0],
222 | None,
223 | True,
224 | False,
225 | [1, 2, 1, 2, 2, 1, 2, 1],
226 | ),
227 | (
228 | factor_data,
229 | [0.5, 1.0],
230 | None,
231 | True,
232 | False,
233 | [nan, 1, nan, 1, 1, nan, 1, nan],
234 | ),
235 | (
236 | factor_data,
237 | [0, 1.0],
238 | None,
239 | True,
240 | False,
241 | [1, 1, 1, 1, 1, 1, 1, 1],
242 | ),
243 | (factor_data, None, 4, False, False, [1, 2, 3, 4, 4, 3, 2, 1]),
244 | (factor_data, None, 2, False, False, [1, 1, 2, 2, 2, 2, 1, 1]),
245 | (factor_data, None, 3, False, False, [1, 1, 2, 3, 3, 2, 1, 1]),
246 | (factor_data, None, 8, False, False, [1, 3, 6, 8, 8, 6, 3, 1]),
247 | (
248 | factor_data,
249 | None,
250 | [0, 1, 2, 3, 5],
251 | False,
252 | False,
253 | [1, 2, 3, 4, 4, 3, 2, 1],
254 | ),
255 | (
256 | factor_data,
257 | None,
258 | [1, 2, 3],
259 | False,
260 | False,
261 | [nan, 1, 2, nan, nan, 2, 1, nan],
262 | ),
263 | (
264 | factor_data,
265 | None,
266 | [0, 2, 5],
267 | False,
268 | False,
269 | [1, 1, 2, 2, 2, 2, 1, 1],
270 | ),
271 | (
272 | factor_data,
273 | None,
274 | [0.5, 2.5, 4.5],
275 | False,
276 | False,
277 | [1, 1, 2, 2, 2, 2, 1, 1],
278 | ),
279 | (
280 | factor_data,
281 | None,
282 | [0.5, 2.5],
283 | True,
284 | False,
285 | [1, 1, nan, nan, nan, nan, 1, 1],
286 | ),
287 | (factor_data, None, 2, True, False, [1, 2, 1, 2, 2, 1, 2, 1]),
288 | ]
289 | )
290 | def test_quantize_factor(
291 | self, factor, quantiles, bins, by_group, zero_aware, expected_vals
292 | ):
293 | quantized_factor = quantize_factor(
294 | factor,
295 | quantiles=quantiles,
296 | bins=bins,
297 | by_group=by_group,
298 | zero_aware=zero_aware,
299 | )
300 | expected = Series(
301 | index=factor.index, data=expected_vals, name="factor_quantile"
302 | ).dropna()
303 | assert_series_equal(quantized_factor, expected)
304 |
305 | def test_get_clean_factor_and_forward_returns_1(self):
306 | """
307 | Test get_clean_factor_and_forward_returns with a daily factor
308 | """
309 | tickers = ["A", "B", "C", "D", "E", "F"]
310 |
311 | factor_groups = {"A": 1, "B": 2, "C": 1, "D": 2, "E": 1, "F": 2}
312 |
313 | price_data = [
314 | [1.10**i, 0.50**i, 3.00**i, 0.90**i, 0.50**i, 1.00**i]
315 | for i in range(1, 7)
316 | ] # 6 days = 3 + 3 fwd returns
317 |
318 | factor_data = [
319 | [3, 4, 2, 1, nan, nan],
320 | [3, nan, nan, 1, 4, 2],
321 | [3, 4, 2, 1, nan, nan],
322 | ] # 3 days
323 |
324 | start = "2015-1-11"
325 | factor_end = "2015-1-13"
326 | price_end = "2015-1-16" # 3D fwd returns
327 |
328 | price_index = date_range(start=start, end=price_end)
329 | price_index.name = "date"
330 | prices = DataFrame(index=price_index, columns=tickers, data=price_data)
331 |
332 | factor_index = date_range(start=start, end=factor_end)
333 | factor_index.name = "date"
334 | factor = DataFrame(
335 | index=factor_index, columns=tickers, data=factor_data
336 | ).stack()
337 |
338 | factor_data = get_clean_factor_and_forward_returns(
339 | factor,
340 | prices,
341 | groupby=factor_groups,
342 | quantiles=4,
343 | periods=(1, 2, 3),
344 | )
345 |
346 | expected_idx = factor.index.rename(["date", "asset"])
347 | expected_cols = [
348 | "1D",
349 | "2D",
350 | "3D",
351 | "factor",
352 | "group",
353 | "factor_quantile",
354 | ]
355 | expected_data = [
356 | [0.1, 0.21, 0.331, 3.0, 1, 3],
357 | [-0.5, -0.75, -0.875, 4.0, 2, 4],
358 | [2.0, 8.00, 26.000, 2.0, 1, 2],
359 | [-0.1, -0.19, -0.271, 1.0, 2, 1],
360 | [0.1, 0.21, 0.331, 3.0, 1, 3],
361 | [-0.1, -0.19, -0.271, 1.0, 2, 1],
362 | [-0.5, -0.75, -0.875, 4.0, 1, 4],
363 | [0.0, 0.00, 0.000, 2.0, 2, 2],
364 | [0.1, 0.21, 0.331, 3.0, 1, 3],
365 | [-0.5, -0.75, -0.875, 4.0, 2, 4],
366 | [2.0, 8.00, 26.000, 2.0, 1, 2],
367 | [-0.1, -0.19, -0.271, 1.0, 2, 1],
368 | ]
369 | expected = DataFrame(
370 | index=expected_idx, columns=expected_cols, data=expected_data
371 | )
372 | expected["group"] = expected["group"].astype("category")
373 |
374 | assert_frame_equal(factor_data, expected)
375 |
376 | def test_get_clean_factor_and_forward_returns_2(self):
377 | """
378 | Test get_clean_factor_and_forward_returns with a daily factor
379 | on a business day calendar
380 | """
381 | tickers = ["A", "B", "C", "D", "E", "F"]
382 |
383 | factor_groups = {"A": 1, "B": 2, "C": 1, "D": 2, "E": 1, "F": 2}
384 |
385 | price_data = [
386 | [1.10**i, 0.50**i, 3.00**i, 0.90**i, 0.50**i, 1.00**i]
387 | for i in range(1, 7)
388 | ] # 6 days = 3 + 3 fwd returns
389 |
390 | factor_data = [
391 | [3, 4, 2, 1, nan, nan],
392 | [3, nan, nan, 1, 4, 2],
393 | [3, 4, 2, 1, nan, nan],
394 | ] # 3 days
395 |
396 | start = "2017-1-12"
397 | factor_end = "2017-1-16"
398 | price_end = "2017-1-19" # 3D fwd returns
399 |
400 | price_index = date_range(start=start, end=price_end, freq="B")
401 | price_index.name = "date"
402 | prices = DataFrame(index=price_index, columns=tickers, data=price_data)
403 |
404 | factor_index = date_range(start=start, end=factor_end, freq="B")
405 | factor_index.name = "date"
406 | factor = DataFrame(
407 | index=factor_index, columns=tickers, data=factor_data
408 | ).stack()
409 |
410 | factor_data = get_clean_factor_and_forward_returns(
411 | factor,
412 | prices,
413 | groupby=factor_groups,
414 | quantiles=4,
415 | periods=(1, 2, 3),
416 | )
417 |
418 | expected_idx = factor.index.rename(["date", "asset"])
419 | expected_cols = [
420 | "1D",
421 | "2D",
422 | "3D",
423 | "factor",
424 | "group",
425 | "factor_quantile",
426 | ]
427 | expected_data = [
428 | [0.1, 0.21, 0.331, 3.0, 1, 3],
429 | [-0.5, -0.75, -0.875, 4.0, 2, 4],
430 | [2.0, 8.00, 26.000, 2.0, 1, 2],
431 | [-0.1, -0.19, -0.271, 1.0, 2, 1],
432 | [0.1, 0.21, 0.331, 3.0, 1, 3],
433 | [-0.1, -0.19, -0.271, 1.0, 2, 1],
434 | [-0.5, -0.75, -0.875, 4.0, 1, 4],
435 | [0.0, 0.00, 0.000, 2.0, 2, 2],
436 | [0.1, 0.21, 0.331, 3.0, 1, 3],
437 | [-0.5, -0.75, -0.875, 4.0, 2, 4],
438 | [2.0, 8.00, 26.000, 2.0, 1, 2],
439 | [-0.1, -0.19, -0.271, 1.0, 2, 1],
440 | ]
441 | expected = DataFrame(
442 | index=expected_idx, columns=expected_cols, data=expected_data
443 | )
444 | expected["group"] = expected["group"].astype("category")
445 |
446 | assert_frame_equal(factor_data, expected)
447 |
448 | def test_get_clean_factor_and_forward_returns_3(self):
449 | """
450 | Test get_clean_factor_and_forward_returns with and intraday factor
451 | """
452 | tickers = ["A", "B", "C", "D", "E", "F"]
453 |
454 | factor_groups = {"A": 1, "B": 2, "C": 1, "D": 2, "E": 1, "F": 2}
455 |
456 | price_data = [
457 | [1.10**i, 0.50**i, 3.00**i, 0.90**i, 0.50**i, 1.00**i]
458 | for i in range(1, 5)
459 | ] # 4 days = 3 + 1 fwd returns
460 |
461 | factor_data = [
462 | [3, 4, 2, 1, nan, nan],
463 | [3, nan, nan, 1, 4, 2],
464 | [3, 4, 2, 1, nan, nan],
465 | ] # 3 days
466 |
467 | start = "2017-1-12"
468 | factor_end = "2017-1-16"
469 | price_end = "2017-1-17" # 1D fwd returns
470 |
471 | price_index = date_range(start=start, end=price_end, freq="B")
472 | price_index.name = "date"
473 | today_open = DataFrame(
474 | index=price_index + Timedelta("9h30m"),
475 | columns=tickers,
476 | data=price_data,
477 | )
478 | today_open_1h = DataFrame(
479 | index=price_index + Timedelta("10h30m"),
480 | columns=tickers,
481 | data=price_data,
482 | )
483 | today_open_1h += today_open_1h * 0.001
484 | today_open_3h = DataFrame(
485 | index=price_index + Timedelta("12h30m"),
486 | columns=tickers,
487 | data=price_data,
488 | )
489 | today_open_3h -= today_open_3h * 0.002
490 | prices = concat([today_open, today_open_1h, today_open_3h]).sort_index()
491 |
492 | factor_index = date_range(start=start, end=factor_end, freq="B")
493 | factor_index.name = "date"
494 | factor = DataFrame(
495 | index=factor_index + Timedelta("9h30m"),
496 | columns=tickers,
497 | data=factor_data,
498 | ).stack()
499 |
500 | factor_data = get_clean_factor_and_forward_returns(
501 | factor,
502 | prices,
503 | groupby=factor_groups,
504 | quantiles=4,
505 | periods=(1, 2, 3),
506 | )
507 |
508 | expected_idx = factor.index.rename(["date", "asset"])
509 | expected_cols = [
510 | "1h",
511 | "3h",
512 | "1D",
513 | "factor",
514 | "group",
515 | "factor_quantile",
516 | ]
517 | expected_data = [
518 | [0.001, -0.002, 0.1, 3.0, 1, 3],
519 | [0.001, -0.002, -0.5, 4.0, 2, 4],
520 | [0.001, -0.002, 2.0, 2.0, 1, 2],
521 | [0.001, -0.002, -0.1, 1.0, 2, 1],
522 | [0.001, -0.002, 0.1, 3.0, 1, 3],
523 | [0.001, -0.002, -0.1, 1.0, 2, 1],
524 | [0.001, -0.002, -0.5, 4.0, 1, 4],
525 | [0.001, -0.002, 0.0, 2.0, 2, 2],
526 | [0.001, -0.002, 0.1, 3.0, 1, 3],
527 | [0.001, -0.002, -0.5, 4.0, 2, 4],
528 | [0.001, -0.002, 2.0, 2.0, 1, 2],
529 | [0.001, -0.002, -0.1, 1.0, 2, 1],
530 | ]
531 | expected = DataFrame(
532 | index=expected_idx, columns=expected_cols, data=expected_data
533 | )
534 | expected["group"] = expected["group"].astype("category")
535 |
536 | assert_frame_equal(factor_data, expected)
537 |
538 | def test_get_clean_factor_and_forward_returns_4(self):
539 | """
540 | Test get_clean_factor_and_forward_returns on an event
541 | """
542 | tickers = ["A", "B", "C", "D", "E", "F"]
543 |
544 | factor_groups = {"A": 1, "B": 2, "C": 1, "D": 2, "E": 1, "F": 2}
545 |
546 | price_data = [
547 | [1.10**i, 0.50**i, 3.00**i, 0.90**i, 0.50**i, 1.00**i]
548 | for i in range(1, 9)
549 | ]
550 |
551 | factor_data = [
552 | [1, nan, nan, nan, nan, 6],
553 | [4, nan, nan, 7, nan, nan],
554 | [nan, nan, nan, nan, nan, nan],
555 | [nan, 3, nan, 2, nan, nan],
556 | [nan, nan, 1, nan, 3, nan],
557 | ]
558 |
559 | price_index = date_range(start="2017-1-12", end="2017-1-23", freq="B")
560 | price_index.name = "date"
561 | prices = DataFrame(index=price_index, columns=tickers, data=price_data)
562 |
563 | factor_index = date_range(start="2017-1-12", end="2017-1-18", freq="B")
564 | factor_index.name = "date"
565 | factor = DataFrame(
566 | index=factor_index, columns=tickers, data=factor_data
567 | ).stack()
568 |
569 | factor_data = get_clean_factor_and_forward_returns(
570 | factor,
571 | prices,
572 | groupby=factor_groups,
573 | quantiles=4,
574 | periods=(1, 2, 3),
575 | )
576 |
577 | expected_idx = factor.index.rename(["date", "asset"])
578 | expected_cols = [
579 | "1D",
580 | "2D",
581 | "3D",
582 | "factor",
583 | "group",
584 | "factor_quantile",
585 | ]
586 | expected_data = [
587 | [0.1, 0.21, 0.331, 1.0, 1, 1],
588 | [0.0, 0.00, 0.000, 6.0, 2, 4],
589 | [0.1, 0.21, 0.331, 4.0, 1, 1],
590 | [-0.1, -0.19, -0.271, 7.0, 2, 4],
591 | [-0.5, -0.75, -0.875, 3.0, 2, 4],
592 | [-0.1, -0.19, -0.271, 2.0, 2, 1],
593 | [2.0, 8.00, 26.000, 1.0, 1, 1],
594 | [-0.5, -0.75, -0.875, 3.0, 1, 4],
595 | ]
596 | expected = DataFrame(
597 | index=expected_idx, columns=expected_cols, data=expected_data
598 | )
599 | expected["group"] = expected["group"].astype("category")
600 |
601 | assert_frame_equal(factor_data, expected)
602 |
603 | # todo: breaks on 1.0<=pd.__version<1.1
604 | @skipIf(pandas_one_point_zero, "Skipping for 1.0<=pd.__version<1.1")
605 | def test_get_clean_factor_and_forward_returns_5(self):
606 | """
607 | Test get_clean_factor_and_forward_returns with intraday factor
608 | and holidays
609 | """
610 | tickers = ["A", "B", "C", "D", "E", "F"]
611 |
612 | factor_groups = {"A": 1, "B": 2, "C": 1, "D": 2, "E": 1, "F": 2}
613 |
614 | price_data = [
615 | [1.10**i, 0.50**i, 3.00**i, 0.90**i, 0.50**i, 1.00**i]
616 | for i in range(1, 20)
617 | ] # 19 days = 18 + 1 fwd returns
618 |
619 | factor_data = [
620 | [3, 4, 2, 1, nan, nan],
621 | [3, nan, nan, 1, 4, 2],
622 | [3, 4, 2, 1, nan, nan],
623 | ] * 6 # 18 days
624 |
625 | start = "2017-1-12"
626 | factor_end = "2017-2-10"
627 | price_end = "2017-2-13" # 1D (business day) fwd returns
628 | holidays = ["2017-1-13", "2017-1-18", "2017-1-30", "2017-2-7"]
629 | holidays = [Timestamp(d) for d in holidays]
630 |
631 | price_index = date_range(start=start, end=price_end, freq="B")
632 | price_index.name = "date"
633 | price_index = price_index.drop(holidays)
634 |
635 | today_open = DataFrame(
636 | index=price_index + Timedelta("9h30m"),
637 | columns=tickers,
638 | data=price_data,
639 | )
640 | today_open_1h = DataFrame(
641 | index=price_index + Timedelta("10h30m"),
642 | columns=tickers,
643 | data=price_data,
644 | )
645 | today_open_1h += today_open_1h * 0.001
646 | today_open_3h = DataFrame(
647 | index=price_index + Timedelta("12h30m"),
648 | columns=tickers,
649 | data=price_data,
650 | )
651 | today_open_3h -= today_open_3h * 0.002
652 | prices = concat([today_open, today_open_1h, today_open_3h]).sort_index()
653 |
654 | factor_index = date_range(start=start, end=factor_end, freq="B", name="date")
655 | factor_index = factor_index.drop(holidays)
656 | factor = DataFrame(
657 | index=factor_index + Timedelta("9h30m"),
658 | columns=tickers,
659 | data=factor_data,
660 | ).stack()
661 |
662 | factor_data = get_clean_factor_and_forward_returns(
663 | factor,
664 | prices,
665 | groupby=factor_groups,
666 | quantiles=4,
667 | periods=(1, 2, 3),
668 | )
669 |
670 | expected_idx = factor.index.rename(["date", "asset"])
671 | expected_cols = [
672 | "1h",
673 | "3h",
674 | "1D",
675 | "factor",
676 | "group",
677 | "factor_quantile",
678 | ]
679 | expected_data = [
680 | [0.001, -0.002, 0.1, 3.0, 1, 3],
681 | [0.001, -0.002, -0.5, 4.0, 2, 4],
682 | [0.001, -0.002, 2.0, 2.0, 1, 2],
683 | [0.001, -0.002, -0.1, 1.0, 2, 1],
684 | [0.001, -0.002, 0.1, 3.0, 1, 3],
685 | [0.001, -0.002, -0.1, 1.0, 2, 1],
686 | [0.001, -0.002, -0.5, 4.0, 1, 4],
687 | [0.001, -0.002, 0.0, 2.0, 2, 2],
688 | [0.001, -0.002, 0.1, 3.0, 1, 3],
689 | [0.001, -0.002, -0.5, 4.0, 2, 4],
690 | [0.001, -0.002, 2.0, 2.0, 1, 2],
691 | [0.001, -0.002, -0.1, 1.0, 2, 1],
692 | ] * 6 # 18 days
693 | expected = DataFrame(
694 | index=expected_idx, columns=expected_cols, data=expected_data
695 | )
696 | expected["group"] = expected["group"].astype("category")
697 |
698 | assert_frame_equal(factor_data, expected)
699 |
700 | inferred_holidays = factor_data.index.levels[0].freq.holidays
701 | inferred_holidays = [
702 | Timestamp(d.astype("datetime64[ns]")) for d in inferred_holidays
703 | ]
704 | assert sorted(holidays) == sorted(inferred_holidays)
705 |
706 | # todo: breaks on 1.0<=pd.__version<1.1
707 | @skipIf(pandas_one_point_zero, "Skipping for 1.0<=pd.__version<1.1")
708 | def test_get_clean_factor_and_forward_returns_6(self):
709 | """
710 | Test get_clean_factor_and_forward_returns with a daily factor
711 | on a business day calendar and holidays
712 | """
713 | tickers = ["A", "B", "C", "D", "E", "F"]
714 |
715 | factor_groups = {"A": 1, "B": 2, "C": 1, "D": 2, "E": 1, "F": 2}
716 |
717 | price_data = [
718 | [1.10**i, 0.50**i, 3.00**i, 0.90**i, 0.50**i, 1.00**i]
719 | for i in range(1, 22)
720 | ] # 21 days = 18 + 3 fwd returns
721 |
722 | factor_data = [
723 | [3, 4, 2, 1, nan, nan],
724 | [3, nan, nan, 1, 4, 2],
725 | [3, 4, 2, 1, nan, nan],
726 | ] * 6 # 18 days
727 |
728 | start = "2017-1-12"
729 | factor_end = "2017-2-10"
730 | price_end = "2017-2-15" # 3D (business day) fwd returns
731 | holidays = ["2017-1-13", "2017-1-18", "2017-1-30", "2017-2-7"]
732 | holidays = [Timestamp(d) for d in holidays]
733 |
734 | price_index = date_range(start=start, end=price_end, freq="B")
735 | price_index.name = "date"
736 | price_index = price_index.drop(holidays)
737 | prices = DataFrame(index=price_index, columns=tickers, data=price_data)
738 |
739 | factor_index = date_range(start=start, end=factor_end, freq="B")
740 | factor_index.name = "date"
741 | factor_index = factor_index.drop(holidays)
742 | factor = DataFrame(
743 | index=factor_index, columns=tickers, data=factor_data
744 | ).stack()
745 |
746 | factor_data = get_clean_factor_and_forward_returns(
747 | factor,
748 | prices,
749 | groupby=factor_groups,
750 | quantiles=4,
751 | periods=(1, 2, 3),
752 | )
753 |
754 | expected_idx = factor.index.rename(["date", "asset"])
755 | expected_cols = [
756 | "1D",
757 | "2D",
758 | "3D",
759 | "factor",
760 | "group",
761 | "factor_quantile",
762 | ]
763 | expected_data = [
764 | [0.1, 0.21, 0.331, 3.0, 1, 3],
765 | [-0.5, -0.75, -0.875, 4.0, 2, 4],
766 | [2.0, 8.00, 26.000, 2.0, 1, 2],
767 | [-0.1, -0.19, -0.271, 1.0, 2, 1],
768 | [0.1, 0.21, 0.331, 3.0, 1, 3],
769 | [-0.1, -0.19, -0.271, 1.0, 2, 1],
770 | [-0.5, -0.75, -0.875, 4.0, 1, 4],
771 | [0.0, 0.00, 0.000, 2.0, 2, 2],
772 | [0.1, 0.21, 0.331, 3.0, 1, 3],
773 | [-0.5, -0.75, -0.875, 4.0, 2, 4],
774 | [2.0, 8.00, 26.000, 2.0, 1, 2],
775 | [-0.1, -0.19, -0.271, 1.0, 2, 1],
776 | ] * 6 # 18 days
777 | expected = DataFrame(
778 | index=expected_idx, columns=expected_cols, data=expected_data
779 | )
780 | expected["group"] = expected["group"].astype("category")
781 |
782 | assert_frame_equal(factor_data, expected)
783 |
784 | inferred_holidays = factor_data.index.levels[0].freq.holidays
785 | inferred_holidays = [
786 | Timestamp(d.astype("datetime64[ns]")) for d in inferred_holidays
787 | ]
788 | assert sorted(holidays) == sorted(inferred_holidays)
789 |
--------------------------------------------------------------------------------