├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug-report-template.md
    │   └── feature-request-template.md
    └── workflows
    │   └── tsfracdiff_tests.yml
├── LICENSE
├── README.md
├── docs
    ├── .nojekyll
    └── tsfracdiff
    │   ├── .nojekyll
    │   ├── index.html
    │   ├── tsfracdiff.html
    │   └── unit_root_tests.html
├── examples
    ├── Example.html
    └── Example.ipynb
├── pyproject.toml
├── requirements.txt
├── setup.cfg
├── setup.py
├── tests
    └── test_module.py
└── tsfracdiff
    ├── __init__.py
    ├── tsfracdiff.py
    └── unit_root_tests.py


/.github/ISSUE_TEMPLATE/bug-report-template.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug Report Template
 3 | about: Report a bug
 4 | title: "[BUG]"
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Version Numbers:**
24 |  - OS
25 |  - tsfracdiff Version
26 |  - Python Version
27 | 
28 | **Additional context**
29 | Add any other context about the problem here.
30 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-request-template.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature Request Template
 3 | about: Suggest an idea for this project
 4 | title: "[FEAT]"
 5 | labels: enhancement
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/workflows/tsfracdiff_tests.yml:
--------------------------------------------------------------------------------
 1 | # Build & unit tests
 2 | 
 3 | name: Unit Tests
 4 | 
 5 | on:
 6 |   push:
 7 |     branches: [ master, dev ]
 8 |   pull_request:
 9 |     branches: [ master, dev ]
10 | 
11 | jobs:
12 |   build:
13 | 
14 |     runs-on: ubuntu-latest
15 |     strategy:
16 |       fail-fast: false
17 |       matrix:
18 |         python-version: ["3.7", "3.8", "3.9", "3.10"]
19 | 
20 |     steps:
21 |     - uses: actions/checkout@v3
22 |     - name: Set up Python ${{ matrix.python-version }}
23 |       uses: actions/setup-python@v3
24 |       with:
25 |         python-version: ${{ matrix.python-version }}
26 |     - name: Install dependencies and package
27 |       run: |
28 |         python -m pip install --upgrade pip
29 |         python -m pip install flake8 pytest pytest
30 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
31 |         python -m pip install -e .
32 |     - name: Lint with flake8
33 |       run: |
34 |         # Stop the build if syntax errors
35 |         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
36 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
37 |         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
38 |     - name: Run Unit Tests
39 |       run: |
40 |         python -m pytest
41 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) [2022] [adamvvu]
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![Build](https://img.shields.io/github/actions/workflow/status/adamvvu/tsfracdiff/tsfracdiff_tests.yml?style=for-the-badge)](https://github.com/adamvvu/tsfracdiff/actions/workflows/tsfracdiff_tests.yml)
 2 | [![PyPi](https://img.shields.io/pypi/v/tsfracdiff?style=for-the-badge)](https://pypi.org/project/tsfracdiff/)
 3 | [![Downloads](https://img.shields.io/pypi/dm/tsfracdiff?style=for-the-badge)](https://pypi.org/project/tsfracdiff/)
 4 | [![License](https://img.shields.io/badge/license-MIT-green?style=for-the-badge)](https://github.com/adamvvu/tsfracdiff/blob/master/LICENSE)
 5 | 
 6 | Efficient and easy to use fractional differentiation transformations for
 7 | stationarizing time series data in Python.
 8 | 
 9 | ------------------------------------------------------------------------
10 | 
11 | ## **tsfracdiff**
12 | 
13 | Data with high persistence, serial correlation, and non-stationarity
14 | pose significant challenges when used directly as predictive signals in
15 | many machine learning and statistical models. A common approach is to
16 | take the first difference as a stationarity transformation, but this
17 | wipes out much of the information available in the data. For datasets
18 | where there is a low signal-to-noise ratio such as financial market
19 | data, this effect can be particularly severe. Hosking (1981) introduces
20 | fractional (non-integer) differentiation for its flexibility in modeling
21 | short-term and long-term time series dynamics, and López de Prado (2018)
22 | proposes the use of fractional differentiation as a feature
23 | transformation for financial machine learning applications. This library
24 | is an extension of their ideas, with some modifications for efficiency
25 | and robustness.
26 | 
27 | [Documentation](https://adamvvu.github.io/tsfracdiff/docs/)
28 | 
29 | ## Getting Started
30 | 
31 | ### Installation
32 | 
33 | `pip install tsfracdiff`
34 | 
35 | #### Dependencies:
36 | 
37 |     # Required
38 |     python3 # Python 3.7+
39 |     numpy
40 |     pandas
41 |     arch
42 | 
43 |     # Suggested
44 |     joblib
45 | 
46 | ### Usage
47 | 
48 | ``` python
49 | # A pandas.DataFrame/np.array with potentially non-stationary time series
50 | df 
51 | 
52 | # Automatic stationary transformation with minimal information loss
53 | from tsfracdiff import FractionalDifferentiator
54 | fracDiff = FractionalDifferentiator()
55 | df = fracDiff.FitTransform(df)
56 | ```
57 | 
58 | For a more in-depth example, see this
59 | [notebook](https://adamvvu.github.io/tsfracdiff/examples/Example.html).
60 | 
61 | ## References
62 | 
63 | Hosking, J. R. M. (1981). Fractional Differencing. Biometrika, 68(1),
64 | 165--176. <https://doi.org/10.2307/2335817>
65 | 
66 | López de Prado, Marcos (2018). Advances in Financial Machine Learning.
67 | John Wiley & Sons, Inc.
68 | 


--------------------------------------------------------------------------------
/docs/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adamvvu/tsfracdiff/fd1816021ac717eb6c761365931279071b66ead6/docs/.nojekyll


--------------------------------------------------------------------------------
/docs/tsfracdiff/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adamvvu/tsfracdiff/fd1816021ac717eb6c761365931279071b66ead6/docs/tsfracdiff/.nojekyll


--------------------------------------------------------------------------------
/docs/tsfracdiff/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en">
 3 | <head>
 4 | <meta charset="utf-8">
 5 | <meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1" />
 6 | <meta name="generator" content="pdoc 0.10.0" />
 7 | <title>tsfracdiff API documentation</title>
 8 | <meta name="description" content="" />
 9 | <link rel="preload stylesheet" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/11.0.1/sanitize.min.css" integrity="sha256-PK9q560IAAa6WVRRh76LtCaI8pjTJ2z11v0miyNNjrs=" crossorigin>
10 | <link rel="preload stylesheet" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/11.0.1/typography.min.css" integrity="sha256-7l/o7C8jubJiy74VsKTidCy1yBkRtiUGbVkYBylBqUg=" crossorigin>
11 | <link rel="stylesheet preload" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.1.1/styles/github.min.css" crossorigin>
12 | <style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:30px;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:1em 0 .50em 0}h3{font-size:1.4em;margin:25px 0 10px 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .3s ease-in-out}a:hover{color:#e82}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900}pre code{background:#f8f8f8;font-size:.8em;line-height:1.4em}code{background:#f2f2f1;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{background:#f8f8f8;border:0;border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0;padding:1ex}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-weight:bold;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em .5em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
13 | <style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.item .name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul{padding-left:1.5em}.toc > ul > li{margin-top:.5em}}</style>
14 | <style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
15 | <script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.1.1/highlight.min.js" integrity="sha256-Uv3H6lx7dJmRfRvH8TH6kJD1TSK1aFcwgx+mdg3epi8=" crossorigin></script>
16 | <script>window.addEventListener('DOMContentLoaded', () => hljs.initHighlighting())</script>
17 | </head>
18 | <body>
19 | <main>
20 | <article id="content">
21 | <header>
22 | <h1 class="title">Package <code>tsfracdiff</code></h1>
23 | </header>
24 | <section id="section-intro">
25 | <details class="source">
26 | <summary>
27 | <span>Expand source code</span>
28 | </summary>
29 | <pre><code class="python">from .unit_root_tests import *
30 | from .tsfracdiff import *
31 | 
32 | __version__ = &#34;1.0.4&#34;</code></pre>
33 | </details>
34 | </section>
35 | <section>
36 | <h2 class="section-title" id="header-submodules">Sub-modules</h2>
37 | <dl>
38 | <dt><code class="name"><a title="tsfracdiff.tsfracdiff" href="tsfracdiff.html">tsfracdiff.tsfracdiff</a></code></dt>
39 | <dd>
40 | <div class="desc"></div>
41 | </dd>
42 | <dt><code class="name"><a title="tsfracdiff.unit_root_tests" href="unit_root_tests.html">tsfracdiff.unit_root_tests</a></code></dt>
43 | <dd>
44 | <div class="desc"></div>
45 | </dd>
46 | </dl>
47 | </section>
48 | <section>
49 | </section>
50 | <section>
51 | </section>
52 | <section>
53 | </section>
54 | </article>
55 | <nav id="sidebar">
56 | <h1>Index</h1>
57 | <div class="toc">
58 | <ul></ul>
59 | </div>
60 | <ul id="index">
61 | <li><h3><a href="#header-submodules">Sub-modules</a></h3>
62 | <ul>
63 | <li><code><a title="tsfracdiff.tsfracdiff" href="tsfracdiff.html">tsfracdiff.tsfracdiff</a></code></li>
64 | <li><code><a title="tsfracdiff.unit_root_tests" href="unit_root_tests.html">tsfracdiff.unit_root_tests</a></code></li>
65 | </ul>
66 | </li>
67 | </ul>
68 | </nav>
69 | </main>
70 | <footer id="footer">
71 | <p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.10.0</a>.</p>
72 | </footer>
73 | </body>
74 | </html>


--------------------------------------------------------------------------------
/docs/tsfracdiff/tsfracdiff.html:
--------------------------------------------------------------------------------
  1 | <!doctype html>
  2 | <html lang="en">
  3 | <head>
  4 | <meta charset="utf-8">
  5 | <meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1" />
  6 | <meta name="generator" content="pdoc 0.10.0" />
  7 | <title>tsfracdiff.tsfracdiff API documentation</title>
  8 | <meta name="description" content="" />
  9 | <link rel="preload stylesheet" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/11.0.1/sanitize.min.css" integrity="sha256-PK9q560IAAa6WVRRh76LtCaI8pjTJ2z11v0miyNNjrs=" crossorigin>
 10 | <link rel="preload stylesheet" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/11.0.1/typography.min.css" integrity="sha256-7l/o7C8jubJiy74VsKTidCy1yBkRtiUGbVkYBylBqUg=" crossorigin>
 11 | <link rel="stylesheet preload" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.1.1/styles/github.min.css" crossorigin>
 12 | <style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:30px;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:1em 0 .50em 0}h3{font-size:1.4em;margin:25px 0 10px 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .3s ease-in-out}a:hover{color:#e82}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900}pre code{background:#f8f8f8;font-size:.8em;line-height:1.4em}code{background:#f2f2f1;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{background:#f8f8f8;border:0;border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0;padding:1ex}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-weight:bold;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em .5em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
 13 | <style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.item .name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul{padding-left:1.5em}.toc > ul > li{margin-top:.5em}}</style>
 14 | <style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
 15 | <script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.1.1/highlight.min.js" integrity="sha256-Uv3H6lx7dJmRfRvH8TH6kJD1TSK1aFcwgx+mdg3epi8=" crossorigin></script>
 16 | <script>window.addEventListener('DOMContentLoaded', () => hljs.initHighlighting())</script>
 17 | </head>
 18 | <body>
 19 | <main>
 20 | <article id="content">
 21 | <header>
 22 | <h1 class="title">Module <code>tsfracdiff.tsfracdiff</code></h1>
 23 | </header>
 24 | <section id="section-intro">
 25 | <details class="source">
 26 | <summary>
 27 | <span>Expand source code</span>
 28 | </summary>
 29 | <pre><code class="python">from .unit_root_tests import *
 30 | 
 31 | import pandas as pd
 32 | import numpy as np
 33 | 
 34 | class FractionalDifferentiator:
 35 |     
 36 |     def __init__(self, maxOrderBound=1, significance=0.01, precision=0.01, memoryThreshold=1e-4,
 37 |                        unitRootTest=&#39;PP&#39;, unitRootTestConfig={}):
 38 |         &#34;&#34;&#34;
 39 |         Provides estimation of the real-valued order of integration and provides fractional 
 40 |         differentiation data transformations.
 41 |         
 42 |         The available stationarity/unit root tests are:
 43 |         -----------------------------------------------
 44 |             - &#39;PP&#39;  : Phillips and Perron (1988) [default]
 45 |             - &#39;ADF&#39; : Augmented Dickey-Fuller (Said &amp; Dickey, 1984)
 46 | 
 47 |         Parameters:
 48 |         -----------
 49 |             maxOrderBound       (float) Maximum real-valued order to search in (0, maxOrderBound)
 50 |             significance        (float) Statistical significance level
 51 |             precision           (float) Precision of estimated order
 52 |             memoryThreshold     (float) Minimum magnitude of weight significance
 53 |             unitRootTest        (str)   Unit-root/stationarity tests: [&#39;PP&#39;,&#39;ADF&#39;]
 54 |             unitRootTestConfig  (dict)  Optional keyword arguments to pass to unit root tests
 55 | 
 56 |         Attributes:
 57 |         -----------
 58 |             orders              (list)  Estimated minimum orders of differentiation
 59 |             numLags             (list)  Number of lags required for transformations
 60 | 
 61 |         Example:
 62 |         --------
 63 |                 # A pandas.DataFrame/np.array with potentially non-stationary time series
 64 |             df 
 65 |         
 66 |                 # Automatic stationary transformation with minimal information loss
 67 |             from tsfracdiff import FractionalDifferentiator
 68 |             fracDiff = FractionalDifferentiator()
 69 |             df = fracDiff.FitTransform(df)
 70 |         &#34;&#34;&#34;
 71 |         self.maxOrderBound = maxOrderBound
 72 |         self.significance = significance
 73 |         self.precision = precision
 74 |         self.memoryThreshold = memoryThreshold
 75 |         
 76 |         # Critical value checks
 77 |         checkCV = False
 78 |         cv_sig = None
 79 |         if (self.significance in [0.01, 0.05, 0.1]):
 80 |             checkCV = True
 81 |             cv_sig = str(int(self.significance * 100)) + &#39;%&#39;
 82 |         
 83 |         # Unit-root/Stationarity tests
 84 |         if unitRootTest == &#39;PP&#39;:
 85 |             self.UnitRootTest = PhillipsPerron(significance=significance, checkCV=checkCV, cv_sig=cv_sig)
 86 |         elif unitRootTest == &#39;ADF&#39;:
 87 |             self.UnitRootTest = ADFuller(significance=significance, checkCV=checkCV, cv_sig=cv_sig)
 88 |         else:
 89 |             raise Exception(&#39;Please specify a valid unit root test.&#39;)
 90 |         self.UnitRootTest.config.update( unitRootTestConfig )
 91 | 
 92 |         # States
 93 |         self.isFitted = False
 94 |         self.orders = []
 95 |         self.numLags = None
 96 |         
 97 |     def Fit(self, df, parallel=True):
 98 |         &#34;&#34;&#34;
 99 |         Estimates the fractional order of integration.
100 |         
101 |         Parameters:
102 |         -----------
103 |             df       (pandas.DataFrame/np.array) Raw data
104 |             parallel (bool) Use multiprocessing if true (default). Requires `joblib`.
105 |         &#34;&#34;&#34;
106 |         df = pd.DataFrame(df)
107 |         
108 |         # Estimate minimum order of differencing
109 |         if parallel:
110 |             try:
111 |                 import multiprocessing
112 |                 from joblib import Parallel, delayed
113 |                 from functools import partial
114 |             except ImportError:
115 |                 raise Exception(&#39;The module `joblib` is required for parallelization.&#39;)
116 | 
117 |             def ApplyParallel(df, func, **kwargs):
118 |                 n_jobs = min(df.shape[1], multiprocessing.cpu_count())
119 |                 res = Parallel(n_jobs=n_jobs)( delayed(partial(func, **kwargs))(x) for x in np.array_split(df, df.shape[1], axis=1) )
120 |                 return res
121 |             orders = ApplyParallel(df, self._MinimumOrderSearch, upperOrder=self.maxOrderBound, first_run=True)
122 |         else:
123 |             orders = []
124 |             for j in range(df.shape[1]):
125 |                 orders.append( self._MinimumOrderSearch(df.iloc[:,j], upperOrder=self.maxOrderBound, first_run=True) )
126 |         self.orders = orders
127 |         self.numLags = [ (len(self._GetMemoryWeights(order, memoryThreshold=self.memoryThreshold)) - 1) for order in self.orders ]
128 |         self.isFitted = True
129 | 
130 |         return
131 |         
132 |     def FitTransform(self, df, parallel=True):
133 |         &#34;&#34;&#34;
134 |         Estimates the fractional order of integration and returns a stationarized dataframe.
135 | 
136 |         Parameters
137 |         ----------
138 |             df       (pandas.DataFrame/np.array) Raw data
139 |             parallel (bool) Use multiprocessing if true (default). Requires `joblib`.
140 |         &#34;&#34;&#34;
141 |         if not self.isFitted: 
142 |             self.Fit(df, parallel=parallel)
143 |         fracDiffed = self.Transform(df)
144 | 
145 |         return fracDiffed
146 |     
147 |     def Transform(self, df):
148 |         &#34;&#34;&#34;
149 |         Applies a fractional differentiation transformation based on estimated orders.
150 | 
151 |         Parameters
152 |         ----------
153 |             df  (pandas.DataFrame/np.array) Raw data
154 |         &#34;&#34;&#34;
155 |         if not self.isFitted: 
156 |             raise Exception(&#39;Fit the model first.&#39;)
157 |             
158 |         df = pd.DataFrame(df)
159 |         fracDiffed = []
160 |         for j in range(df.shape[1]):
161 |             x = self._FracDiff(df.iloc[:,j], order=self.orders[j])
162 |             fracDiffed.append( x )
163 |         fracDiffed = pd.concat(fracDiffed, axis=1).sort_index()
164 | 
165 |         return fracDiffed
166 |     
167 |     def InverseTransform(self, fracDiffed, lagData):
168 |         &#34;&#34;&#34;
169 |         Applies a fractional integration transformation by inverting the fractional differentiation. 
170 | 
171 |         Note: The previous `K` values of the original time series are required to invert the transformation.
172 |         For multi-variate time series, `K` will likely vary across columns and you may find `K` with the
173 |         attribute `.numLags`. 
174 |         
175 |         Parameters
176 |         ----------
177 |             fracDiffed (pandas.DataFrame/np.array) Fractionally differentiated data
178 |             lagData    (pandas.DataFrame/np.array) Previous values of time series. See note.
179 | 
180 |         Example
181 |         -------
182 |             # Multi-variate Time Series/DataFrame
183 |             X                                           # Shape (1000, 2)
184 | 
185 |             # Stationarize
186 |             fracDiff = FractionalDifferentiator()
187 |             X_stationary = fracDiff.FitTransform( X )   # Shape (967, 2)
188 | 
189 |             # Estimated orders
190 |             orders = fracDiff.orders                    # [0.5703, 0.9141]
191 | 
192 |             # Required lagged values
193 |             numLags = fracDiff.numLags                  # [155, 33]
194 |             lagData = X.head(max(numLags))
195 | 
196 |             # Fractionally integrate by passing in the first 155 values
197 |             X_reconstructed = fracDiff.InverseTransform( X_stationary, lagData )    # Recovers the original X
198 |         &#34;&#34;&#34;
199 |         if not self.isFitted: 
200 |             raise Exception(&#39;Fit the model first.&#39;)
201 | 
202 |         maxLags, minLags = max(self.numLags), min(self.numLags)
203 |         lagData = pd.DataFrame(lagData)
204 |         if lagData.shape[0] != maxLags:
205 |             raise Exception(f&#39;The previous {maxLags} values are required.&#39;)
206 |         
207 |         fracDiffed = pd.DataFrame(fracDiffed)
208 |         X = []
209 |         for j in range(fracDiffed.shape[1]):
210 |             memoryWeights = self._GetMemoryWeights(self.orders[j], memoryThreshold=self.memoryThreshold)
211 |             K = self.numLags[j]
212 |             offset = K - minLags
213 | 
214 |             # Initial values
215 |             tsLagData = lagData.iloc[:K, j]
216 |             
217 |             # Transformed values
218 |             X_tilde = fracDiffed.iloc[offset:, j]
219 | 
220 |             # Already stationary: identity transform
221 |             if K == 0:
222 |                 X.append( X_tilde )
223 |                 continue
224 |             
225 |             # Iteratively invert transformation
226 |             X_vals = np.ravel(tsLagData.values)
227 |             X_tilde = np.ravel(X_tilde.values)
228 |             for t in range(len(X_tilde)):
229 |                 x = X_tilde[t] - np.sum( memoryWeights[:-1] * X_vals[-K:] )
230 |                 X_vals = np.append(X_vals, x)
231 |             X_vals = pd.Series(X_vals)
232 |             X.append( X_vals )
233 |         X = pd.concat(X, axis=1).sort_index()
234 |         X.columns = fracDiffed.columns
235 | 
236 |         # Check for duplicate indices
237 |         idx = lagData.index[:minLags].union( fracDiffed.index )
238 |         if len(idx) != X.shape[0]:
239 |             idx = [ t for t in range(X.shape[0]) ]
240 |         X.index = idx
241 | 
242 |         return X
243 | 
244 |     def _GetMemoryWeights(self, order, memoryThreshold=1e-4):
245 |         &#34;&#34;&#34;
246 |         Returns an array of memory weights for each time lag.
247 | 
248 |         Parameters:
249 |         -----------
250 |             order           (float) Order of fracdiff
251 |             memoryThreshold (float) Minimum magnitude of weight significance
252 |         &#34;&#34;&#34;
253 |         memoryWeights = [1,]
254 |         k = 1
255 |         while True:
256 |             weight = -memoryWeights[-1] * ( order - k + 1 ) / k # Iteratively generate next lag weight
257 |             if abs(weight) &lt; memoryThreshold:
258 |                 break
259 |             memoryWeights.append(weight)
260 |             k += 1
261 |         return np.array(list(reversed(memoryWeights)))
262 |     
263 |     def _FracDiff(self, ts, order=1, memoryWeights=None):
264 |         &#34;&#34;&#34;
265 |         Differentiates a time series based on a real-valued order.
266 | 
267 |         Parameters:
268 |         -----------
269 |             ts            (pandas.Series) Univariate time series
270 |             order         (float) Order of differentiation
271 |             memoryWeights (array) Optional pre-computed weights
272 |         &#34;&#34;&#34;
273 |         if memoryWeights is None:
274 |             memoryWeights = self._GetMemoryWeights(order, memoryThreshold=self.memoryThreshold)
275 | 
276 |         K = len(memoryWeights)
277 |         fracDiffedSeries = ts.rolling(K).apply(lambda x: np.sum( x * memoryWeights ), raw=True)
278 |         fracDiffedSeries = fracDiffedSeries.iloc[(K-1):]
279 |         
280 |         return fracDiffedSeries
281 |     
282 |     def _MinimumOrderSearch(self, ts, lowerOrder=0, upperOrder=1, first_run=False):
283 |         &#34;&#34;&#34;
284 |         Binary search algorithm for estimating the minimum order of differentiation required for stationarity.
285 |         
286 |         Parameters
287 |         ----------
288 |             ts                   (pandas.Series) Univariate time series
289 |             lowerOrder           (float) Lower bound on order
290 |             upperOrder           (float) Upper bound on order
291 |             first_run            (bool)  For testing endpoints of order bounds
292 |         &#34;&#34;&#34;  
293 |         ## Convergence criteria
294 |         if abs( upperOrder - lowerOrder ) &lt;= self.precision:
295 |             return upperOrder
296 |         
297 |         ## Initial run: Test endpoints
298 |         if first_run:
299 |             lowerFracDiff = self._FracDiff(ts, order=lowerOrder).dropna()
300 |             upperFracDiff = self._FracDiff(ts, order=upperOrder).dropna()
301 |             
302 |             # Unit root tests
303 |             lowerStationary = self.UnitRootTest.IsStationary( lowerFracDiff )
304 |             upperStationary = self.UnitRootTest.IsStationary( upperFracDiff )
305 | 
306 |             # Series is I(0)
307 |             if lowerStationary:
308 |                 return lowerOrder
309 |             # Series is I(k&gt;&gt;1)
310 |             if not upperStationary:                                                        
311 |                 print(&#39;Warning: Time series is explosive. Increase upper bounds.&#39;)
312 |                 return upperOrder
313 |             
314 |         ## Binary Search: Test midpoint
315 |         midOrder = ( lowerOrder + upperOrder ) / 2                                      
316 |         midFracDiff = self._FracDiff(ts, order=midOrder).dropna()
317 |         midStationary = self.UnitRootTest.IsStationary( midFracDiff )
318 |         
319 |         # Series is weakly stationary in [lowerOrder, midOrder]
320 |         if midStationary:
321 |             return self._MinimumOrderSearch(ts, lowerOrder=lowerOrder, upperOrder=midOrder)
322 |         # Series is weakly stationary in [midOrder, upperOrder]
323 |         else:
324 |             return self._MinimumOrderSearch(ts, lowerOrder=midOrder, upperOrder=upperOrder)
325 |         </code></pre>
326 | </details>
327 | </section>
328 | <section>
329 | </section>
330 | <section>
331 | </section>
332 | <section>
333 | </section>
334 | <section>
335 | <h2 class="section-title" id="header-classes">Classes</h2>
336 | <dl>
337 | <dt id="tsfracdiff.tsfracdiff.FractionalDifferentiator"><code class="flex name class">
338 | <span>class <span class="ident">FractionalDifferentiator</span></span>
339 | <span>(</span><span>maxOrderBound=1, significance=0.01, precision=0.01, memoryThreshold=0.0001, unitRootTest='PP', unitRootTestConfig={})</span>
340 | </code></dt>
341 | <dd>
342 | <div class="desc"><p>Provides estimation of the real-valued order of integration and provides fractional
343 | differentiation data transformations.</p>
344 | <h2 id="the-available-stationarityunit-root-tests-are">The available stationarity/unit root tests are:</h2>
345 | <pre><code>- 'PP'  : Phillips and Perron (1988) [default]
346 | - 'ADF' : Augmented Dickey-Fuller (Said &amp; Dickey, 1984)
347 | </code></pre>
348 | <h2 id="parameters">Parameters:</h2>
349 | <pre><code>maxOrderBound       (float) Maximum real-valued order to search in (0, maxOrderBound)
350 | significance        (float) Statistical significance level
351 | precision           (float) Precision of estimated order
352 | memoryThreshold     (float) Minimum magnitude of weight significance
353 | unitRootTest        (str)   Unit-root/stationarity tests: ['PP','ADF']
354 | unitRootTestConfig  (dict)  Optional keyword arguments to pass to unit root tests
355 | </code></pre>
356 | <h2 id="attributes">Attributes:</h2>
357 | <pre><code>orders              (list)  Estimated minimum orders of differentiation
358 | numLags             (list)  Number of lags required for transformations
359 | </code></pre>
360 | <h2 id="example">Example:</h2>
361 | <pre><code>    # A pandas.DataFrame/np.array with potentially non-stationary time series
362 | df
363 | 
364 |     # Automatic stationary transformation with minimal information loss
365 | from tsfracdiff import FractionalDifferentiator
366 | fracDiff = FractionalDifferentiator()
367 | df = fracDiff.FitTransform(df)
368 | </code></pre></div>
369 | <details class="source">
370 | <summary>
371 | <span>Expand source code</span>
372 | </summary>
373 | <pre><code class="python">class FractionalDifferentiator:
374 |     
375 |     def __init__(self, maxOrderBound=1, significance=0.01, precision=0.01, memoryThreshold=1e-4,
376 |                        unitRootTest=&#39;PP&#39;, unitRootTestConfig={}):
377 |         &#34;&#34;&#34;
378 |         Provides estimation of the real-valued order of integration and provides fractional 
379 |         differentiation data transformations.
380 |         
381 |         The available stationarity/unit root tests are:
382 |         -----------------------------------------------
383 |             - &#39;PP&#39;  : Phillips and Perron (1988) [default]
384 |             - &#39;ADF&#39; : Augmented Dickey-Fuller (Said &amp; Dickey, 1984)
385 | 
386 |         Parameters:
387 |         -----------
388 |             maxOrderBound       (float) Maximum real-valued order to search in (0, maxOrderBound)
389 |             significance        (float) Statistical significance level
390 |             precision           (float) Precision of estimated order
391 |             memoryThreshold     (float) Minimum magnitude of weight significance
392 |             unitRootTest        (str)   Unit-root/stationarity tests: [&#39;PP&#39;,&#39;ADF&#39;]
393 |             unitRootTestConfig  (dict)  Optional keyword arguments to pass to unit root tests
394 | 
395 |         Attributes:
396 |         -----------
397 |             orders              (list)  Estimated minimum orders of differentiation
398 |             numLags             (list)  Number of lags required for transformations
399 | 
400 |         Example:
401 |         --------
402 |                 # A pandas.DataFrame/np.array with potentially non-stationary time series
403 |             df 
404 |         
405 |                 # Automatic stationary transformation with minimal information loss
406 |             from tsfracdiff import FractionalDifferentiator
407 |             fracDiff = FractionalDifferentiator()
408 |             df = fracDiff.FitTransform(df)
409 |         &#34;&#34;&#34;
410 |         self.maxOrderBound = maxOrderBound
411 |         self.significance = significance
412 |         self.precision = precision
413 |         self.memoryThreshold = memoryThreshold
414 |         
415 |         # Critical value checks
416 |         checkCV = False
417 |         cv_sig = None
418 |         if (self.significance in [0.01, 0.05, 0.1]):
419 |             checkCV = True
420 |             cv_sig = str(int(self.significance * 100)) + &#39;%&#39;
421 |         
422 |         # Unit-root/Stationarity tests
423 |         if unitRootTest == &#39;PP&#39;:
424 |             self.UnitRootTest = PhillipsPerron(significance=significance, checkCV=checkCV, cv_sig=cv_sig)
425 |         elif unitRootTest == &#39;ADF&#39;:
426 |             self.UnitRootTest = ADFuller(significance=significance, checkCV=checkCV, cv_sig=cv_sig)
427 |         else:
428 |             raise Exception(&#39;Please specify a valid unit root test.&#39;)
429 |         self.UnitRootTest.config.update( unitRootTestConfig )
430 | 
431 |         # States
432 |         self.isFitted = False
433 |         self.orders = []
434 |         self.numLags = None
435 |         
436 |     def Fit(self, df, parallel=True):
437 |         &#34;&#34;&#34;
438 |         Estimates the fractional order of integration.
439 |         
440 |         Parameters:
441 |         -----------
442 |             df       (pandas.DataFrame/np.array) Raw data
443 |             parallel (bool) Use multiprocessing if true (default). Requires `joblib`.
444 |         &#34;&#34;&#34;
445 |         df = pd.DataFrame(df)
446 |         
447 |         # Estimate minimum order of differencing
448 |         if parallel:
449 |             try:
450 |                 import multiprocessing
451 |                 from joblib import Parallel, delayed
452 |                 from functools import partial
453 |             except ImportError:
454 |                 raise Exception(&#39;The module `joblib` is required for parallelization.&#39;)
455 | 
456 |             def ApplyParallel(df, func, **kwargs):
457 |                 n_jobs = min(df.shape[1], multiprocessing.cpu_count())
458 |                 res = Parallel(n_jobs=n_jobs)( delayed(partial(func, **kwargs))(x) for x in np.array_split(df, df.shape[1], axis=1) )
459 |                 return res
460 |             orders = ApplyParallel(df, self._MinimumOrderSearch, upperOrder=self.maxOrderBound, first_run=True)
461 |         else:
462 |             orders = []
463 |             for j in range(df.shape[1]):
464 |                 orders.append( self._MinimumOrderSearch(df.iloc[:,j], upperOrder=self.maxOrderBound, first_run=True) )
465 |         self.orders = orders
466 |         self.numLags = [ (len(self._GetMemoryWeights(order, memoryThreshold=self.memoryThreshold)) - 1) for order in self.orders ]
467 |         self.isFitted = True
468 | 
469 |         return
470 |         
471 |     def FitTransform(self, df, parallel=True):
472 |         &#34;&#34;&#34;
473 |         Estimates the fractional order of integration and returns a stationarized dataframe.
474 | 
475 |         Parameters
476 |         ----------
477 |             df       (pandas.DataFrame/np.array) Raw data
478 |             parallel (bool) Use multiprocessing if true (default). Requires `joblib`.
479 |         &#34;&#34;&#34;
480 |         if not self.isFitted: 
481 |             self.Fit(df, parallel=parallel)
482 |         fracDiffed = self.Transform(df)
483 | 
484 |         return fracDiffed
485 |     
486 |     def Transform(self, df):
487 |         &#34;&#34;&#34;
488 |         Applies a fractional differentiation transformation based on estimated orders.
489 | 
490 |         Parameters
491 |         ----------
492 |             df  (pandas.DataFrame/np.array) Raw data
493 |         &#34;&#34;&#34;
494 |         if not self.isFitted: 
495 |             raise Exception(&#39;Fit the model first.&#39;)
496 |             
497 |         df = pd.DataFrame(df)
498 |         fracDiffed = []
499 |         for j in range(df.shape[1]):
500 |             x = self._FracDiff(df.iloc[:,j], order=self.orders[j])
501 |             fracDiffed.append( x )
502 |         fracDiffed = pd.concat(fracDiffed, axis=1).sort_index()
503 | 
504 |         return fracDiffed
505 |     
506 |     def InverseTransform(self, fracDiffed, lagData):
507 |         &#34;&#34;&#34;
508 |         Applies a fractional integration transformation by inverting the fractional differentiation. 
509 | 
510 |         Note: The previous `K` values of the original time series are required to invert the transformation.
511 |         For multi-variate time series, `K` will likely vary across columns and you may find `K` with the
512 |         attribute `.numLags`. 
513 |         
514 |         Parameters
515 |         ----------
516 |             fracDiffed (pandas.DataFrame/np.array) Fractionally differentiated data
517 |             lagData    (pandas.DataFrame/np.array) Previous values of time series. See note.
518 | 
519 |         Example
520 |         -------
521 |             # Multi-variate Time Series/DataFrame
522 |             X                                           # Shape (1000, 2)
523 | 
524 |             # Stationarize
525 |             fracDiff = FractionalDifferentiator()
526 |             X_stationary = fracDiff.FitTransform( X )   # Shape (967, 2)
527 | 
528 |             # Estimated orders
529 |             orders = fracDiff.orders                    # [0.5703, 0.9141]
530 | 
531 |             # Required lagged values
532 |             numLags = fracDiff.numLags                  # [155, 33]
533 |             lagData = X.head(max(numLags))
534 | 
535 |             # Fractionally integrate by passing in the first 155 values
536 |             X_reconstructed = fracDiff.InverseTransform( X_stationary, lagData )    # Recovers the original X
537 |         &#34;&#34;&#34;
538 |         if not self.isFitted: 
539 |             raise Exception(&#39;Fit the model first.&#39;)
540 | 
541 |         maxLags, minLags = max(self.numLags), min(self.numLags)
542 |         lagData = pd.DataFrame(lagData)
543 |         if lagData.shape[0] != maxLags:
544 |             raise Exception(f&#39;The previous {maxLags} values are required.&#39;)
545 |         
546 |         fracDiffed = pd.DataFrame(fracDiffed)
547 |         X = []
548 |         for j in range(fracDiffed.shape[1]):
549 |             memoryWeights = self._GetMemoryWeights(self.orders[j], memoryThreshold=self.memoryThreshold)
550 |             K = self.numLags[j]
551 |             offset = K - minLags
552 | 
553 |             # Initial values
554 |             tsLagData = lagData.iloc[:K, j]
555 |             
556 |             # Transformed values
557 |             X_tilde = fracDiffed.iloc[offset:, j]
558 | 
559 |             # Already stationary: identity transform
560 |             if K == 0:
561 |                 X.append( X_tilde )
562 |                 continue
563 |             
564 |             # Iteratively invert transformation
565 |             X_vals = np.ravel(tsLagData.values)
566 |             X_tilde = np.ravel(X_tilde.values)
567 |             for t in range(len(X_tilde)):
568 |                 x = X_tilde[t] - np.sum( memoryWeights[:-1] * X_vals[-K:] )
569 |                 X_vals = np.append(X_vals, x)
570 |             X_vals = pd.Series(X_vals)
571 |             X.append( X_vals )
572 |         X = pd.concat(X, axis=1).sort_index()
573 |         X.columns = fracDiffed.columns
574 | 
575 |         # Check for duplicate indices
576 |         idx = lagData.index[:minLags].union( fracDiffed.index )
577 |         if len(idx) != X.shape[0]:
578 |             idx = [ t for t in range(X.shape[0]) ]
579 |         X.index = idx
580 | 
581 |         return X
582 | 
583 |     def _GetMemoryWeights(self, order, memoryThreshold=1e-4):
584 |         &#34;&#34;&#34;
585 |         Returns an array of memory weights for each time lag.
586 | 
587 |         Parameters:
588 |         -----------
589 |             order           (float) Order of fracdiff
590 |             memoryThreshold (float) Minimum magnitude of weight significance
591 |         &#34;&#34;&#34;
592 |         memoryWeights = [1,]
593 |         k = 1
594 |         while True:
595 |             weight = -memoryWeights[-1] * ( order - k + 1 ) / k # Iteratively generate next lag weight
596 |             if abs(weight) &lt; memoryThreshold:
597 |                 break
598 |             memoryWeights.append(weight)
599 |             k += 1
600 |         return np.array(list(reversed(memoryWeights)))
601 |     
602 |     def _FracDiff(self, ts, order=1, memoryWeights=None):
603 |         &#34;&#34;&#34;
604 |         Differentiates a time series based on a real-valued order.
605 | 
606 |         Parameters:
607 |         -----------
608 |             ts            (pandas.Series) Univariate time series
609 |             order         (float) Order of differentiation
610 |             memoryWeights (array) Optional pre-computed weights
611 |         &#34;&#34;&#34;
612 |         if memoryWeights is None:
613 |             memoryWeights = self._GetMemoryWeights(order, memoryThreshold=self.memoryThreshold)
614 | 
615 |         K = len(memoryWeights)
616 |         fracDiffedSeries = ts.rolling(K).apply(lambda x: np.sum( x * memoryWeights ), raw=True)
617 |         fracDiffedSeries = fracDiffedSeries.iloc[(K-1):]
618 |         
619 |         return fracDiffedSeries
620 |     
621 |     def _MinimumOrderSearch(self, ts, lowerOrder=0, upperOrder=1, first_run=False):
622 |         &#34;&#34;&#34;
623 |         Binary search algorithm for estimating the minimum order of differentiation required for stationarity.
624 |         
625 |         Parameters
626 |         ----------
627 |             ts                   (pandas.Series) Univariate time series
628 |             lowerOrder           (float) Lower bound on order
629 |             upperOrder           (float) Upper bound on order
630 |             first_run            (bool)  For testing endpoints of order bounds
631 |         &#34;&#34;&#34;  
632 |         ## Convergence criteria
633 |         if abs( upperOrder - lowerOrder ) &lt;= self.precision:
634 |             return upperOrder
635 |         
636 |         ## Initial run: Test endpoints
637 |         if first_run:
638 |             lowerFracDiff = self._FracDiff(ts, order=lowerOrder).dropna()
639 |             upperFracDiff = self._FracDiff(ts, order=upperOrder).dropna()
640 |             
641 |             # Unit root tests
642 |             lowerStationary = self.UnitRootTest.IsStationary( lowerFracDiff )
643 |             upperStationary = self.UnitRootTest.IsStationary( upperFracDiff )
644 | 
645 |             # Series is I(0)
646 |             if lowerStationary:
647 |                 return lowerOrder
648 |             # Series is I(k&gt;&gt;1)
649 |             if not upperStationary:                                                        
650 |                 print(&#39;Warning: Time series is explosive. Increase upper bounds.&#39;)
651 |                 return upperOrder
652 |             
653 |         ## Binary Search: Test midpoint
654 |         midOrder = ( lowerOrder + upperOrder ) / 2                                      
655 |         midFracDiff = self._FracDiff(ts, order=midOrder).dropna()
656 |         midStationary = self.UnitRootTest.IsStationary( midFracDiff )
657 |         
658 |         # Series is weakly stationary in [lowerOrder, midOrder]
659 |         if midStationary:
660 |             return self._MinimumOrderSearch(ts, lowerOrder=lowerOrder, upperOrder=midOrder)
661 |         # Series is weakly stationary in [midOrder, upperOrder]
662 |         else:
663 |             return self._MinimumOrderSearch(ts, lowerOrder=midOrder, upperOrder=upperOrder)</code></pre>
664 | </details>
665 | <h3>Methods</h3>
666 | <dl>
667 | <dt id="tsfracdiff.tsfracdiff.FractionalDifferentiator.Fit"><code class="name flex">
668 | <span>def <span class="ident">Fit</span></span>(<span>self, df, parallel=True)</span>
669 | </code></dt>
670 | <dd>
671 | <div class="desc"><p>Estimates the fractional order of integration.</p>
672 | <h2 id="parameters">Parameters:</h2>
673 | <pre><code>df       (pandas.DataFrame/np.array) Raw data
674 | parallel (bool) Use multiprocessing if true (default). Requires &lt;code&gt;joblib&lt;/code&gt;.
675 | </code></pre></div>
676 | <details class="source">
677 | <summary>
678 | <span>Expand source code</span>
679 | </summary>
680 | <pre><code class="python">def Fit(self, df, parallel=True):
681 |     &#34;&#34;&#34;
682 |     Estimates the fractional order of integration.
683 |     
684 |     Parameters:
685 |     -----------
686 |         df       (pandas.DataFrame/np.array) Raw data
687 |         parallel (bool) Use multiprocessing if true (default). Requires `joblib`.
688 |     &#34;&#34;&#34;
689 |     df = pd.DataFrame(df)
690 |     
691 |     # Estimate minimum order of differencing
692 |     if parallel:
693 |         try:
694 |             import multiprocessing
695 |             from joblib import Parallel, delayed
696 |             from functools import partial
697 |         except ImportError:
698 |             raise Exception(&#39;The module `joblib` is required for parallelization.&#39;)
699 | 
700 |         def ApplyParallel(df, func, **kwargs):
701 |             n_jobs = min(df.shape[1], multiprocessing.cpu_count())
702 |             res = Parallel(n_jobs=n_jobs)( delayed(partial(func, **kwargs))(x) for x in np.array_split(df, df.shape[1], axis=1) )
703 |             return res
704 |         orders = ApplyParallel(df, self._MinimumOrderSearch, upperOrder=self.maxOrderBound, first_run=True)
705 |     else:
706 |         orders = []
707 |         for j in range(df.shape[1]):
708 |             orders.append( self._MinimumOrderSearch(df.iloc[:,j], upperOrder=self.maxOrderBound, first_run=True) )
709 |     self.orders = orders
710 |     self.numLags = [ (len(self._GetMemoryWeights(order, memoryThreshold=self.memoryThreshold)) - 1) for order in self.orders ]
711 |     self.isFitted = True
712 | 
713 |     return</code></pre>
714 | </details>
715 | </dd>
716 | <dt id="tsfracdiff.tsfracdiff.FractionalDifferentiator.FitTransform"><code class="name flex">
717 | <span>def <span class="ident">FitTransform</span></span>(<span>self, df, parallel=True)</span>
718 | </code></dt>
719 | <dd>
720 | <div class="desc"><p>Estimates the fractional order of integration and returns a stationarized dataframe.</p>
721 | <h2 id="parameters">Parameters</h2>
722 | <pre><code>df       (pandas.DataFrame/np.array) Raw data
723 | parallel (bool) Use multiprocessing if true (default). Requires &lt;code&gt;joblib&lt;/code&gt;.
724 | </code></pre></div>
725 | <details class="source">
726 | <summary>
727 | <span>Expand source code</span>
728 | </summary>
729 | <pre><code class="python">def FitTransform(self, df, parallel=True):
730 |     &#34;&#34;&#34;
731 |     Estimates the fractional order of integration and returns a stationarized dataframe.
732 | 
733 |     Parameters
734 |     ----------
735 |         df       (pandas.DataFrame/np.array) Raw data
736 |         parallel (bool) Use multiprocessing if true (default). Requires `joblib`.
737 |     &#34;&#34;&#34;
738 |     if not self.isFitted: 
739 |         self.Fit(df, parallel=parallel)
740 |     fracDiffed = self.Transform(df)
741 | 
742 |     return fracDiffed</code></pre>
743 | </details>
744 | </dd>
745 | <dt id="tsfracdiff.tsfracdiff.FractionalDifferentiator.InverseTransform"><code class="name flex">
746 | <span>def <span class="ident">InverseTransform</span></span>(<span>self, fracDiffed, lagData)</span>
747 | </code></dt>
748 | <dd>
749 | <div class="desc"><p>Applies a fractional integration transformation by inverting the fractional differentiation. </p>
750 | <p>Note: The previous <code>K</code> values of the original time series are required to invert the transformation.
751 | For multi-variate time series, <code>K</code> will likely vary across columns and you may find <code>K</code> with the
752 | attribute <code>.numLags</code>. </p>
753 | <h2 id="parameters">Parameters</h2>
754 | <pre><code>fracDiffed (pandas.DataFrame/np.array) Fractionally differentiated data
755 | lagData    (pandas.DataFrame/np.array) Previous values of time series. See note.
756 | </code></pre>
757 | <h2 id="example">Example</h2>
758 | <pre><code># Multi-variate Time Series/DataFrame
759 | X                                           # Shape (1000, 2)
760 | 
761 | # Stationarize
762 | fracDiff = FractionalDifferentiator()
763 | X_stationary = fracDiff.FitTransform( X )   # Shape (967, 2)
764 | 
765 | # Estimated orders
766 | orders = fracDiff.orders                    # [0.5703, 0.9141]
767 | 
768 | # Required lagged values
769 | numLags = fracDiff.numLags                  # [155, 33]
770 | lagData = X.head(max(numLags))
771 | 
772 | # Fractionally integrate by passing in the first 155 values
773 | X_reconstructed = fracDiff.InverseTransform( X_stationary, lagData )    # Recovers the original X
774 | </code></pre></div>
775 | <details class="source">
776 | <summary>
777 | <span>Expand source code</span>
778 | </summary>
779 | <pre><code class="python">def InverseTransform(self, fracDiffed, lagData):
780 |     &#34;&#34;&#34;
781 |     Applies a fractional integration transformation by inverting the fractional differentiation. 
782 | 
783 |     Note: The previous `K` values of the original time series are required to invert the transformation.
784 |     For multi-variate time series, `K` will likely vary across columns and you may find `K` with the
785 |     attribute `.numLags`. 
786 |     
787 |     Parameters
788 |     ----------
789 |         fracDiffed (pandas.DataFrame/np.array) Fractionally differentiated data
790 |         lagData    (pandas.DataFrame/np.array) Previous values of time series. See note.
791 | 
792 |     Example
793 |     -------
794 |         # Multi-variate Time Series/DataFrame
795 |         X                                           # Shape (1000, 2)
796 | 
797 |         # Stationarize
798 |         fracDiff = FractionalDifferentiator()
799 |         X_stationary = fracDiff.FitTransform( X )   # Shape (967, 2)
800 | 
801 |         # Estimated orders
802 |         orders = fracDiff.orders                    # [0.5703, 0.9141]
803 | 
804 |         # Required lagged values
805 |         numLags = fracDiff.numLags                  # [155, 33]
806 |         lagData = X.head(max(numLags))
807 | 
808 |         # Fractionally integrate by passing in the first 155 values
809 |         X_reconstructed = fracDiff.InverseTransform( X_stationary, lagData )    # Recovers the original X
810 |     &#34;&#34;&#34;
811 |     if not self.isFitted: 
812 |         raise Exception(&#39;Fit the model first.&#39;)
813 | 
814 |     maxLags, minLags = max(self.numLags), min(self.numLags)
815 |     lagData = pd.DataFrame(lagData)
816 |     if lagData.shape[0] != maxLags:
817 |         raise Exception(f&#39;The previous {maxLags} values are required.&#39;)
818 |     
819 |     fracDiffed = pd.DataFrame(fracDiffed)
820 |     X = []
821 |     for j in range(fracDiffed.shape[1]):
822 |         memoryWeights = self._GetMemoryWeights(self.orders[j], memoryThreshold=self.memoryThreshold)
823 |         K = self.numLags[j]
824 |         offset = K - minLags
825 | 
826 |         # Initial values
827 |         tsLagData = lagData.iloc[:K, j]
828 |         
829 |         # Transformed values
830 |         X_tilde = fracDiffed.iloc[offset:, j]
831 | 
832 |         # Already stationary: identity transform
833 |         if K == 0:
834 |             X.append( X_tilde )
835 |             continue
836 |         
837 |         # Iteratively invert transformation
838 |         X_vals = np.ravel(tsLagData.values)
839 |         X_tilde = np.ravel(X_tilde.values)
840 |         for t in range(len(X_tilde)):
841 |             x = X_tilde[t] - np.sum( memoryWeights[:-1] * X_vals[-K:] )
842 |             X_vals = np.append(X_vals, x)
843 |         X_vals = pd.Series(X_vals)
844 |         X.append( X_vals )
845 |     X = pd.concat(X, axis=1).sort_index()
846 |     X.columns = fracDiffed.columns
847 | 
848 |     # Check for duplicate indices
849 |     idx = lagData.index[:minLags].union( fracDiffed.index )
850 |     if len(idx) != X.shape[0]:
851 |         idx = [ t for t in range(X.shape[0]) ]
852 |     X.index = idx
853 | 
854 |     return X</code></pre>
855 | </details>
856 | </dd>
857 | <dt id="tsfracdiff.tsfracdiff.FractionalDifferentiator.Transform"><code class="name flex">
858 | <span>def <span class="ident">Transform</span></span>(<span>self, df)</span>
859 | </code></dt>
860 | <dd>
861 | <div class="desc"><p>Applies a fractional differentiation transformation based on estimated orders.</p>
862 | <h2 id="parameters">Parameters</h2>
863 | <pre><code>df  (pandas.DataFrame/np.array) Raw data
864 | </code></pre></div>
865 | <details class="source">
866 | <summary>
867 | <span>Expand source code</span>
868 | </summary>
869 | <pre><code class="python">def Transform(self, df):
870 |     &#34;&#34;&#34;
871 |     Applies a fractional differentiation transformation based on estimated orders.
872 | 
873 |     Parameters
874 |     ----------
875 |         df  (pandas.DataFrame/np.array) Raw data
876 |     &#34;&#34;&#34;
877 |     if not self.isFitted: 
878 |         raise Exception(&#39;Fit the model first.&#39;)
879 |         
880 |     df = pd.DataFrame(df)
881 |     fracDiffed = []
882 |     for j in range(df.shape[1]):
883 |         x = self._FracDiff(df.iloc[:,j], order=self.orders[j])
884 |         fracDiffed.append( x )
885 |     fracDiffed = pd.concat(fracDiffed, axis=1).sort_index()
886 | 
887 |     return fracDiffed</code></pre>
888 | </details>
889 | </dd>
890 | </dl>
891 | </dd>
892 | </dl>
893 | </section>
894 | </article>
895 | <nav id="sidebar">
896 | <h1>Index</h1>
897 | <div class="toc">
898 | <ul></ul>
899 | </div>
900 | <ul id="index">
901 | <li><h3>Super-module</h3>
902 | <ul>
903 | <li><code><a title="tsfracdiff" href="index.html">tsfracdiff</a></code></li>
904 | </ul>
905 | </li>
906 | <li><h3><a href="#header-classes">Classes</a></h3>
907 | <ul>
908 | <li>
909 | <h4><code><a title="tsfracdiff.tsfracdiff.FractionalDifferentiator" href="#tsfracdiff.tsfracdiff.FractionalDifferentiator">FractionalDifferentiator</a></code></h4>
910 | <ul class="">
911 | <li><code><a title="tsfracdiff.tsfracdiff.FractionalDifferentiator.Fit" href="#tsfracdiff.tsfracdiff.FractionalDifferentiator.Fit">Fit</a></code></li>
912 | <li><code><a title="tsfracdiff.tsfracdiff.FractionalDifferentiator.FitTransform" href="#tsfracdiff.tsfracdiff.FractionalDifferentiator.FitTransform">FitTransform</a></code></li>
913 | <li><code><a title="tsfracdiff.tsfracdiff.FractionalDifferentiator.InverseTransform" href="#tsfracdiff.tsfracdiff.FractionalDifferentiator.InverseTransform">InverseTransform</a></code></li>
914 | <li><code><a title="tsfracdiff.tsfracdiff.FractionalDifferentiator.Transform" href="#tsfracdiff.tsfracdiff.FractionalDifferentiator.Transform">Transform</a></code></li>
915 | </ul>
916 | </li>
917 | </ul>
918 | </li>
919 | </ul>
920 | </nav>
921 | </main>
922 | <footer id="footer">
923 | <p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.10.0</a>.</p>
924 | </footer>
925 | </body>
926 | </html>


--------------------------------------------------------------------------------
/docs/tsfracdiff/unit_root_tests.html:
--------------------------------------------------------------------------------
  1 | <!doctype html>
  2 | <html lang="en">
  3 | <head>
  4 | <meta charset="utf-8">
  5 | <meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1" />
  6 | <meta name="generator" content="pdoc 0.10.0" />
  7 | <title>tsfracdiff.unit_root_tests API documentation</title>
  8 | <meta name="description" content="" />
  9 | <link rel="preload stylesheet" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/11.0.1/sanitize.min.css" integrity="sha256-PK9q560IAAa6WVRRh76LtCaI8pjTJ2z11v0miyNNjrs=" crossorigin>
 10 | <link rel="preload stylesheet" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/11.0.1/typography.min.css" integrity="sha256-7l/o7C8jubJiy74VsKTidCy1yBkRtiUGbVkYBylBqUg=" crossorigin>
 11 | <link rel="stylesheet preload" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.1.1/styles/github.min.css" crossorigin>
 12 | <style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:30px;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:1em 0 .50em 0}h3{font-size:1.4em;margin:25px 0 10px 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .3s ease-in-out}a:hover{color:#e82}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900}pre code{background:#f8f8f8;font-size:.8em;line-height:1.4em}code{background:#f2f2f1;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{background:#f8f8f8;border:0;border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0;padding:1ex}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-weight:bold;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em .5em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
 13 | <style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.item .name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul{padding-left:1.5em}.toc > ul > li{margin-top:.5em}}</style>
 14 | <style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
 15 | <script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.1.1/highlight.min.js" integrity="sha256-Uv3H6lx7dJmRfRvH8TH6kJD1TSK1aFcwgx+mdg3epi8=" crossorigin></script>
 16 | <script>window.addEventListener('DOMContentLoaded', () => hljs.initHighlighting())</script>
 17 | </head>
 18 | <body>
 19 | <main>
 20 | <article id="content">
 21 | <header>
 22 | <h1 class="title">Module <code>tsfracdiff.unit_root_tests</code></h1>
 23 | </header>
 24 | <section id="section-intro">
 25 | <details class="source">
 26 | <summary>
 27 | <span>Expand source code</span>
 28 | </summary>
 29 | <pre><code class="python">import arch
 30 | from arch.unitroot import PhillipsPerron as PP
 31 | from arch.unitroot import ADF
 32 | 
 33 | ## TODO: Ng and Perron (2001)?
 34 | 
 35 | class PhillipsPerron:
 36 |     &#34;&#34;&#34;
 37 |     Unit root testing via Phillips and Perron (1988). This test is robust to
 38 |     serial correlation and heteroskedasticity.
 39 | 
 40 |     References:
 41 |     -----------
 42 |     Phillips, P. C. B., &amp; Perron, P. (1988). Testing for a unit root in time series regression. 
 43 |     Biometrika, 75(2), 335–346. https://doi.org/10.1093/biomet/75.2.335
 44 |     &#34;&#34;&#34;
 45 |     
 46 |     def __init__(self, 
 47 |                 config={ &#39;trend&#39; : &#39;n&#39;, &#39;test_type&#39; : &#39;tau&#39;}, 
 48 |                 significance=0.01,
 49 |                 checkCV=False, 
 50 |                 cv_sig=None):
 51 |         self.config = config
 52 |         self.significance = significance
 53 |         self.checkCV = checkCV
 54 |         self.cv_sig = cv_sig
 55 | 
 56 |     def IsStationary(self, ts):
 57 |         &#34;&#34;&#34;
 58 |         Performs a unit root test.
 59 |         &#34;&#34;&#34;
 60 | 
 61 |         testResults = PP(ts, trend=self.config[&#39;trend&#39;], test_type=self.config[&#39;test_type&#39;])
 62 |         pval, cv, stat = testResults.pvalue, testResults.critical_values, testResults.stat
 63 | 
 64 |         result = self.HypothesisTest(pval, cv, stat)
 65 | 
 66 |         return result
 67 | 
 68 |     def HypothesisTest(self, pval, cv, stat):
 69 |         &#34;&#34;&#34;
 70 |         Null Hypothesis: Time series is integrated of order I(1)
 71 |         Alt Hypothesis: Time series is integrated of order I(k&lt;1)
 72 |         &#34;&#34;&#34;
 73 |         
 74 |         # Reject the hypothesis
 75 |         if (pval &lt; self.significance) or ( self.checkCV and (stat &lt; cv.get(self.cv_sig, 0)) ):
 76 |             return True
 77 |         # Fail to reject the hypothesis
 78 |         else:
 79 |             return False
 80 | 
 81 | class ADFuller:
 82 |     &#34;&#34;&#34;
 83 |     Unit root testing via Said and Dickey (1984). This test assumes a parametric
 84 |     ARMA structure to correct for serial correlation but assumes the errors are homoskedastic.
 85 | 
 86 |     References:
 87 |     -----------
 88 |     Said E. Said, &amp; Dickey, D. A. (1984). Testing for Unit Roots in Autoregressive-Moving Average 
 89 |     Models of Unknown Order. Biometrika, 71(3), 599–607. https://doi.org/10.2307/2336570
 90 |     &#34;&#34;&#34;
 91 |     def __init__(self, 
 92 |                 config={ &#39;trend&#39; : &#39;n&#39;, &#39;method&#39; : &#39;AIC&#39;}, 
 93 |                 significance=0.01,
 94 |                 checkCV=False, 
 95 |                 cv_sig=None):
 96 |         self.config = config
 97 |         self.significance = significance
 98 |         self.checkCV = checkCV
 99 |         self.cv_sig = cv_sig
100 | 
101 |         ## Compatability workaround //
102 |         #   arch &lt;= 4.17 uses capital letters but newer versions use lowercase
103 |         if (str(arch.__version__) &gt; &#39;4.17&#39;):
104 |             if self.config.get(&#39;method&#39;) == &#39;AIC&#39;:
105 |                 self.config[&#39;method&#39;] = &#39;aic&#39;
106 |             elif self.config.get(&#39;method&#39;) == &#39;BIC&#39;:
107 |                 self.config[&#39;method&#39;] = &#39;bic&#39;
108 | 
109 |     def IsStationary(self, ts):
110 |         &#34;&#34;&#34;
111 |         Performs a unit root test.
112 |         &#34;&#34;&#34;
113 | 
114 |         testResults = ADF(ts, trend=self.config[&#39;trend&#39;], method=self.config[&#39;method&#39;])
115 |         pval, cv, stat = testResults.pvalue, testResults.critical_values, testResults.stat
116 | 
117 |         result = self.HypothesisTest(pval, cv, stat)
118 | 
119 |         return result
120 | 
121 |     def HypothesisTest(self, pval, cv, stat):
122 |         &#34;&#34;&#34;
123 |         Null Hypothesis: Gamma = 0 (Unit root)
124 |         Alt Hypothesis: Gamma &lt; 0
125 |         &#34;&#34;&#34;
126 |         
127 |         # Reject the hypothesis
128 |         if (pval &lt; self.significance) or ( self.checkCV and (stat &lt; cv.get(self.cv_sig, 0)) ):
129 |             return True
130 |         # Fail to reject the hypothesis
131 |         else:
132 |             return False
133 | 
134 |     
135 | 
136 |     </code></pre>
137 | </details>
138 | </section>
139 | <section>
140 | </section>
141 | <section>
142 | </section>
143 | <section>
144 | </section>
145 | <section>
146 | <h2 class="section-title" id="header-classes">Classes</h2>
147 | <dl>
148 | <dt id="tsfracdiff.unit_root_tests.ADFuller"><code class="flex name class">
149 | <span>class <span class="ident">ADFuller</span></span>
150 | <span>(</span><span>config={'trend': 'n', 'method': 'AIC'}, significance=0.01, checkCV=False, cv_sig=None)</span>
151 | </code></dt>
152 | <dd>
153 | <div class="desc"><p>Unit root testing via Said and Dickey (1984). This test assumes a parametric
154 | ARMA structure to correct for serial correlation but assumes the errors are homoskedastic.</p>
155 | <h2 id="references">References:</h2>
156 | <p>Said E. Said, &amp; Dickey, D. A. (1984). Testing for Unit Roots in Autoregressive-Moving Average
157 | Models of Unknown Order. Biometrika, 71(3), 599–607. <a href="https://doi.org/10.2307/2336570">https://doi.org/10.2307/2336570</a></p></div>
158 | <details class="source">
159 | <summary>
160 | <span>Expand source code</span>
161 | </summary>
162 | <pre><code class="python">class ADFuller:
163 |     &#34;&#34;&#34;
164 |     Unit root testing via Said and Dickey (1984). This test assumes a parametric
165 |     ARMA structure to correct for serial correlation but assumes the errors are homoskedastic.
166 | 
167 |     References:
168 |     -----------
169 |     Said E. Said, &amp; Dickey, D. A. (1984). Testing for Unit Roots in Autoregressive-Moving Average 
170 |     Models of Unknown Order. Biometrika, 71(3), 599–607. https://doi.org/10.2307/2336570
171 |     &#34;&#34;&#34;
172 |     def __init__(self, 
173 |                 config={ &#39;trend&#39; : &#39;n&#39;, &#39;method&#39; : &#39;AIC&#39;}, 
174 |                 significance=0.01,
175 |                 checkCV=False, 
176 |                 cv_sig=None):
177 |         self.config = config
178 |         self.significance = significance
179 |         self.checkCV = checkCV
180 |         self.cv_sig = cv_sig
181 | 
182 |         ## Compatability workaround //
183 |         #   arch &lt;= 4.17 uses capital letters but newer versions use lowercase
184 |         if (str(arch.__version__) &gt; &#39;4.17&#39;):
185 |             if self.config.get(&#39;method&#39;) == &#39;AIC&#39;:
186 |                 self.config[&#39;method&#39;] = &#39;aic&#39;
187 |             elif self.config.get(&#39;method&#39;) == &#39;BIC&#39;:
188 |                 self.config[&#39;method&#39;] = &#39;bic&#39;
189 | 
190 |     def IsStationary(self, ts):
191 |         &#34;&#34;&#34;
192 |         Performs a unit root test.
193 |         &#34;&#34;&#34;
194 | 
195 |         testResults = ADF(ts, trend=self.config[&#39;trend&#39;], method=self.config[&#39;method&#39;])
196 |         pval, cv, stat = testResults.pvalue, testResults.critical_values, testResults.stat
197 | 
198 |         result = self.HypothesisTest(pval, cv, stat)
199 | 
200 |         return result
201 | 
202 |     def HypothesisTest(self, pval, cv, stat):
203 |         &#34;&#34;&#34;
204 |         Null Hypothesis: Gamma = 0 (Unit root)
205 |         Alt Hypothesis: Gamma &lt; 0
206 |         &#34;&#34;&#34;
207 |         
208 |         # Reject the hypothesis
209 |         if (pval &lt; self.significance) or ( self.checkCV and (stat &lt; cv.get(self.cv_sig, 0)) ):
210 |             return True
211 |         # Fail to reject the hypothesis
212 |         else:
213 |             return False</code></pre>
214 | </details>
215 | <h3>Methods</h3>
216 | <dl>
217 | <dt id="tsfracdiff.unit_root_tests.ADFuller.HypothesisTest"><code class="name flex">
218 | <span>def <span class="ident">HypothesisTest</span></span>(<span>self, pval, cv, stat)</span>
219 | </code></dt>
220 | <dd>
221 | <div class="desc"><p>Null Hypothesis: Gamma = 0 (Unit root)
222 | Alt Hypothesis: Gamma &lt; 0</p></div>
223 | <details class="source">
224 | <summary>
225 | <span>Expand source code</span>
226 | </summary>
227 | <pre><code class="python">def HypothesisTest(self, pval, cv, stat):
228 |     &#34;&#34;&#34;
229 |     Null Hypothesis: Gamma = 0 (Unit root)
230 |     Alt Hypothesis: Gamma &lt; 0
231 |     &#34;&#34;&#34;
232 |     
233 |     # Reject the hypothesis
234 |     if (pval &lt; self.significance) or ( self.checkCV and (stat &lt; cv.get(self.cv_sig, 0)) ):
235 |         return True
236 |     # Fail to reject the hypothesis
237 |     else:
238 |         return False</code></pre>
239 | </details>
240 | </dd>
241 | <dt id="tsfracdiff.unit_root_tests.ADFuller.IsStationary"><code class="name flex">
242 | <span>def <span class="ident">IsStationary</span></span>(<span>self, ts)</span>
243 | </code></dt>
244 | <dd>
245 | <div class="desc"><p>Performs a unit root test.</p></div>
246 | <details class="source">
247 | <summary>
248 | <span>Expand source code</span>
249 | </summary>
250 | <pre><code class="python">def IsStationary(self, ts):
251 |     &#34;&#34;&#34;
252 |     Performs a unit root test.
253 |     &#34;&#34;&#34;
254 | 
255 |     testResults = ADF(ts, trend=self.config[&#39;trend&#39;], method=self.config[&#39;method&#39;])
256 |     pval, cv, stat = testResults.pvalue, testResults.critical_values, testResults.stat
257 | 
258 |     result = self.HypothesisTest(pval, cv, stat)
259 | 
260 |     return result</code></pre>
261 | </details>
262 | </dd>
263 | </dl>
264 | </dd>
265 | <dt id="tsfracdiff.unit_root_tests.PhillipsPerron"><code class="flex name class">
266 | <span>class <span class="ident">PhillipsPerron</span></span>
267 | <span>(</span><span>config={'trend': 'n', 'test_type': 'tau'}, significance=0.01, checkCV=False, cv_sig=None)</span>
268 | </code></dt>
269 | <dd>
270 | <div class="desc"><p>Unit root testing via Phillips and Perron (1988). This test is robust to
271 | serial correlation and heteroskedasticity.</p>
272 | <h2 id="references">References:</h2>
273 | <p>Phillips, P. C. B., &amp; Perron, P. (1988). Testing for a unit root in time series regression.
274 | Biometrika, 75(2), 335–346. <a href="https://doi.org/10.1093/biomet/75.2.335">https://doi.org/10.1093/biomet/75.2.335</a></p></div>
275 | <details class="source">
276 | <summary>
277 | <span>Expand source code</span>
278 | </summary>
279 | <pre><code class="python">class PhillipsPerron:
280 |     &#34;&#34;&#34;
281 |     Unit root testing via Phillips and Perron (1988). This test is robust to
282 |     serial correlation and heteroskedasticity.
283 | 
284 |     References:
285 |     -----------
286 |     Phillips, P. C. B., &amp; Perron, P. (1988). Testing for a unit root in time series regression. 
287 |     Biometrika, 75(2), 335–346. https://doi.org/10.1093/biomet/75.2.335
288 |     &#34;&#34;&#34;
289 |     
290 |     def __init__(self, 
291 |                 config={ &#39;trend&#39; : &#39;n&#39;, &#39;test_type&#39; : &#39;tau&#39;}, 
292 |                 significance=0.01,
293 |                 checkCV=False, 
294 |                 cv_sig=None):
295 |         self.config = config
296 |         self.significance = significance
297 |         self.checkCV = checkCV
298 |         self.cv_sig = cv_sig
299 | 
300 |     def IsStationary(self, ts):
301 |         &#34;&#34;&#34;
302 |         Performs a unit root test.
303 |         &#34;&#34;&#34;
304 | 
305 |         testResults = PP(ts, trend=self.config[&#39;trend&#39;], test_type=self.config[&#39;test_type&#39;])
306 |         pval, cv, stat = testResults.pvalue, testResults.critical_values, testResults.stat
307 | 
308 |         result = self.HypothesisTest(pval, cv, stat)
309 | 
310 |         return result
311 | 
312 |     def HypothesisTest(self, pval, cv, stat):
313 |         &#34;&#34;&#34;
314 |         Null Hypothesis: Time series is integrated of order I(1)
315 |         Alt Hypothesis: Time series is integrated of order I(k&lt;1)
316 |         &#34;&#34;&#34;
317 |         
318 |         # Reject the hypothesis
319 |         if (pval &lt; self.significance) or ( self.checkCV and (stat &lt; cv.get(self.cv_sig, 0)) ):
320 |             return True
321 |         # Fail to reject the hypothesis
322 |         else:
323 |             return False</code></pre>
324 | </details>
325 | <h3>Methods</h3>
326 | <dl>
327 | <dt id="tsfracdiff.unit_root_tests.PhillipsPerron.HypothesisTest"><code class="name flex">
328 | <span>def <span class="ident">HypothesisTest</span></span>(<span>self, pval, cv, stat)</span>
329 | </code></dt>
330 | <dd>
331 | <div class="desc"><p>Null Hypothesis: Time series is integrated of order I(1)
332 | Alt Hypothesis: Time series is integrated of order I(k&lt;1)</p></div>
333 | <details class="source">
334 | <summary>
335 | <span>Expand source code</span>
336 | </summary>
337 | <pre><code class="python">def HypothesisTest(self, pval, cv, stat):
338 |     &#34;&#34;&#34;
339 |     Null Hypothesis: Time series is integrated of order I(1)
340 |     Alt Hypothesis: Time series is integrated of order I(k&lt;1)
341 |     &#34;&#34;&#34;
342 |     
343 |     # Reject the hypothesis
344 |     if (pval &lt; self.significance) or ( self.checkCV and (stat &lt; cv.get(self.cv_sig, 0)) ):
345 |         return True
346 |     # Fail to reject the hypothesis
347 |     else:
348 |         return False</code></pre>
349 | </details>
350 | </dd>
351 | <dt id="tsfracdiff.unit_root_tests.PhillipsPerron.IsStationary"><code class="name flex">
352 | <span>def <span class="ident">IsStationary</span></span>(<span>self, ts)</span>
353 | </code></dt>
354 | <dd>
355 | <div class="desc"><p>Performs a unit root test.</p></div>
356 | <details class="source">
357 | <summary>
358 | <span>Expand source code</span>
359 | </summary>
360 | <pre><code class="python">def IsStationary(self, ts):
361 |     &#34;&#34;&#34;
362 |     Performs a unit root test.
363 |     &#34;&#34;&#34;
364 | 
365 |     testResults = PP(ts, trend=self.config[&#39;trend&#39;], test_type=self.config[&#39;test_type&#39;])
366 |     pval, cv, stat = testResults.pvalue, testResults.critical_values, testResults.stat
367 | 
368 |     result = self.HypothesisTest(pval, cv, stat)
369 | 
370 |     return result</code></pre>
371 | </details>
372 | </dd>
373 | </dl>
374 | </dd>
375 | </dl>
376 | </section>
377 | </article>
378 | <nav id="sidebar">
379 | <h1>Index</h1>
380 | <div class="toc">
381 | <ul></ul>
382 | </div>
383 | <ul id="index">
384 | <li><h3>Super-module</h3>
385 | <ul>
386 | <li><code><a title="tsfracdiff" href="index.html">tsfracdiff</a></code></li>
387 | </ul>
388 | </li>
389 | <li><h3><a href="#header-classes">Classes</a></h3>
390 | <ul>
391 | <li>
392 | <h4><code><a title="tsfracdiff.unit_root_tests.ADFuller" href="#tsfracdiff.unit_root_tests.ADFuller">ADFuller</a></code></h4>
393 | <ul class="">
394 | <li><code><a title="tsfracdiff.unit_root_tests.ADFuller.HypothesisTest" href="#tsfracdiff.unit_root_tests.ADFuller.HypothesisTest">HypothesisTest</a></code></li>
395 | <li><code><a title="tsfracdiff.unit_root_tests.ADFuller.IsStationary" href="#tsfracdiff.unit_root_tests.ADFuller.IsStationary">IsStationary</a></code></li>
396 | </ul>
397 | </li>
398 | <li>
399 | <h4><code><a title="tsfracdiff.unit_root_tests.PhillipsPerron" href="#tsfracdiff.unit_root_tests.PhillipsPerron">PhillipsPerron</a></code></h4>
400 | <ul class="">
401 | <li><code><a title="tsfracdiff.unit_root_tests.PhillipsPerron.HypothesisTest" href="#tsfracdiff.unit_root_tests.PhillipsPerron.HypothesisTest">HypothesisTest</a></code></li>
402 | <li><code><a title="tsfracdiff.unit_root_tests.PhillipsPerron.IsStationary" href="#tsfracdiff.unit_root_tests.PhillipsPerron.IsStationary">IsStationary</a></code></li>
403 | </ul>
404 | </li>
405 | </ul>
406 | </li>
407 | </ul>
408 | </nav>
409 | </main>
410 | <footer id="footer">
411 | <p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.10.0</a>.</p>
412 | </footer>
413 | </body>
414 | </html>


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "tsfracdiff"
 7 | description = "Efficient and easy to use fractional differentiation transformations for stationarizing time series data."
 8 | authors = [
 9 |     {name = "Adam Wu"},
10 |     {email = "adamwu1@outlook.com"}
11 | ]
12 | readme = "README.md"
13 | license = {file = "LICENSE"}
14 | requires-python = ">=3.7"
15 | dependencies = [
16 |     "numpy",
17 |     "pandas",
18 |     "arch",
19 |     "joblib"
20 | ]
21 | classifiers = [
22 |     'Intended Audience :: Science/Research',
23 | 	'Topic :: Scientific/Engineering :: Information Analysis',
24 | 	'Programming Language :: Python :: 3 ',
25 | 	'Operating System :: OS Independent',
26 | 	'License :: OSI Approved :: MIT License'
27 | ]
28 | dynamic = [ "version" ]
29 | 
30 | [tool.setuptools.dynamic]
31 | version = {attr = "tsfracdiff.__version__"}
32 | 
33 | [project.urls]
34 | homepage = "https://github.com/adamvvu/tsfracdiff"
35 | documentation = "https://github.com/adamvvu/tsfracdiff"
36 | repository = "https://github.com/adamvvu/tsfracdiff"


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # Required Dependencies
2 | numpy
3 | pandas
4 | arch <= 4.17; python_version == '3.6.*'
5 | arch; python_version >= '3.7'
6 | 
7 | # Suggested/Optional
8 | joblib


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | license_files = LICENSE
3 | version = attr: tsfracdiff.__version__


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, Command
 2 | from codecs import open
 3 | from os import path
 4 | 
 5 | currPath = path.abspath(path.dirname(__file__))
 6 | 
 7 | # Parse README
 8 | with open(path.join(currPath, 'README.md'), encoding='utf-8') as f:
 9 |     long_description = f.read()
10 | 
11 | # Parse version
12 | with open(path.join(currPath, 'tsfracdiff', '__init__.py')) as f:
13 |     for line in f:
14 |         if line.startswith('__version__'):
15 |             version = line.split('"')[1]
16 | 
17 | setup(
18 |     name='tsfracdiff',
19 |     description='Efficient and easy to use fractional differentiation transformations for stationarizing time series data.',
20 |     version=version,
21 |     long_description=long_description,
22 |     long_description_content_type='text/markdown',
23 |     url='https://github.com/adamvvu/tsfracdiff',
24 |     author='Adam Wu',
25 |     author_email='adamwu1@outlook.com',
26 |     packages=['tsfracdiff'],
27 |     classifiers=[
28 | 	'Intended Audience :: Science/Research',
29 | 	'Topic :: Scientific/Engineering :: Information Analysis',
30 | 	'Programming Language :: Python',
31 | 	'Operating System :: OS Independent',
32 | 	'License :: OSI Approved :: MIT License'
33 |     ],
34 |     python_requires='>=3.7',
35 |     install_requires=[
36 |         'numpy',
37 |         'pandas',
38 |         'arch',
39 | 	'joblib'
40 |     ],
41 |     license_files = ('LICENSE',),
42 | )


--------------------------------------------------------------------------------
/tests/test_module.py:
--------------------------------------------------------------------------------
 1 | from tsfracdiff import *
 2 | import numpy as np
 3 | import pandas as pd
 4 | np.random.seed(42)
 5 | import pytest
 6 | 
 7 | def _GenerateData():
 8 | 
 9 |     T = 1000
10 |     K = 5
11 |     
12 |     df = [ np.array([1 for k in range(K)]) ]
13 |     mu = np.random.normal(0, 0.25, size=(K))
14 |     for t in range(T-1):
15 |         d_t = mu + df[-1] + np.random.normal(0, 1, size=(K))
16 |         df.append( d_t )
17 |     df = pd.DataFrame(np.vstack(df))
18 |     return df
19 | 
20 | def _TestStationary( df_frac, fracDiff ):
21 |     if isinstance(df_frac, pd.DataFrame):
22 |         for k in range(df_frac.shape[1]):
23 |             assert fracDiff.UnitRootTest.IsStationary( df_frac.iloc[:,k].dropna() )
24 |     elif isinstance(df_frac, np.ndarray):
25 |         for k in range(df_frac.shape[1]):
26 |             assert fracDiff.UnitRootTest.IsStationary( df_frac[:,k].dropna() )
27 |     else:
28 |         raise Exception('Invalid datatype returned.')
29 | 
30 |     return
31 | 
32 | def _TestFracDiff( df, unitRootTest, parallel=True ):
33 |     fracDiff = FractionalDifferentiator(unitRootTest=unitRootTest)
34 |     df_frac = fracDiff.FitTransform( df, parallel=parallel )
35 |     _TestStationary( df_frac, fracDiff )
36 |     return df_frac
37 | 
38 | def _TestAutoFracDiff( df, unitRootTest ):
39 |     """
40 |     Test automatic fit-transform
41 |     """
42 |     df_frac_par = _TestFracDiff( df, unitRootTest=unitRootTest, parallel=True )
43 |     df_frac_seq = _TestFracDiff( df, unitRootTest=unitRootTest, parallel=False )
44 |     assert np.allclose(df_frac_par.values, df_frac_seq.values, equal_nan=True)
45 |     print('AutoFracDiff: OK')
46 |     return
47 | 
48 | def _TestInvTransform( df, unitRootTest ):
49 |     """
50 |     Test inverse-transform
51 |     """
52 |     fracDiff = FractionalDifferentiator(unitRootTest=unitRootTest)
53 |     df_frac = fracDiff.FitTransform( df )
54 |     df_inv = fracDiff.InverseTransform( df_frac, lagData=df.head(max(fracDiff.numLags)) )
55 |     assert np.allclose(df.values, df_inv.values, equal_nan=True)
56 |     print('InvTransform: OK')
57 |     return
58 | 
59 | def test_RunAllTests():
60 | 
61 |     df = _GenerateData()
62 | 
63 |     unitRootTests = ['PP', 'ADF']
64 |     for unitRootTest in unitRootTests:
65 |         print(f'Testing {unitRootTest}')
66 |         _TestAutoFracDiff( df, unitRootTest=unitRootTest )
67 |         _TestInvTransform( df, unitRootTest=unitRootTest )
68 | 
69 |     return


--------------------------------------------------------------------------------
/tsfracdiff/__init__.py:
--------------------------------------------------------------------------------
1 | from .unit_root_tests import *
2 | from .tsfracdiff import *
3 | 
4 | __version__ = "1.0.4"


--------------------------------------------------------------------------------
/tsfracdiff/tsfracdiff.py:
--------------------------------------------------------------------------------
  1 | from .unit_root_tests import *
  2 | 
  3 | import pandas as pd
  4 | import numpy as np
  5 | 
  6 | class FractionalDifferentiator:
  7 |     
  8 |     def __init__(self, maxOrderBound=1, significance=0.01, precision=0.01, memoryThreshold=1e-4,
  9 |                        unitRootTest='PP', unitRootTestConfig={}):
 10 |         """
 11 |         Provides estimation of the real-valued order of integration and provides fractional 
 12 |         differentiation data transformations.
 13 |         
 14 |         The available stationarity/unit root tests are:
 15 |         -----------------------------------------------
 16 |             - 'PP'  : Phillips and Perron (1988) [default]
 17 |             - 'ADF' : Augmented Dickey-Fuller (Said & Dickey, 1984)
 18 | 
 19 |         Parameters:
 20 |         -----------
 21 |             maxOrderBound       (float) Maximum real-valued order to search in (0, maxOrderBound)
 22 |             significance        (float) Statistical significance level
 23 |             precision           (float) Precision of estimated order
 24 |             memoryThreshold     (float) Minimum magnitude of weight significance
 25 |             unitRootTest        (str)   Unit-root/stationarity tests: ['PP','ADF']
 26 |             unitRootTestConfig  (dict)  Optional keyword arguments to pass to unit root tests
 27 | 
 28 |         Attributes:
 29 |         -----------
 30 |             orders              (list)  Estimated minimum orders of differentiation
 31 |             numLags             (list)  Number of lags required for transformations
 32 | 
 33 |         Example:
 34 |         --------
 35 | 	        # A pandas.DataFrame/np.array with potentially non-stationary time series
 36 |             df 
 37 | 	
 38 | 	        # Automatic stationary transformation with minimal information loss
 39 |             from tsfracdiff import FractionalDifferentiator
 40 |             fracDiff = FractionalDifferentiator()
 41 |             df = fracDiff.FitTransform(df)
 42 |         """
 43 |         self.maxOrderBound = maxOrderBound
 44 |         self.significance = significance
 45 |         self.precision = precision
 46 |         self.memoryThreshold = memoryThreshold
 47 |         
 48 |         # Critical value checks
 49 |         checkCV = False
 50 |         cv_sig = None
 51 |         if (self.significance in [0.01, 0.05, 0.1]):
 52 |             checkCV = True
 53 |             cv_sig = str(int(self.significance * 100)) + '%'
 54 |         
 55 |         # Unit-root/Stationarity tests
 56 |         if unitRootTest == 'PP':
 57 |             self.UnitRootTest = PhillipsPerron(significance=significance, checkCV=checkCV, cv_sig=cv_sig)
 58 |         elif unitRootTest == 'ADF':
 59 |             self.UnitRootTest = ADFuller(significance=significance, checkCV=checkCV, cv_sig=cv_sig)
 60 |         else:
 61 |             raise Exception('Please specify a valid unit root test.')
 62 |         self.UnitRootTest.config.update( unitRootTestConfig )
 63 | 
 64 |         # States
 65 |         self.isFitted = False
 66 |         self.orders = []
 67 |         self.numLags = None
 68 |         
 69 |     def Fit(self, df, parallel=True):
 70 |         """
 71 |         Estimates the fractional order of integration.
 72 |         
 73 |         Parameters:
 74 |         -----------
 75 |             df       (pandas.DataFrame/np.array) Raw data
 76 |             parallel (bool) Use multiprocessing if true (default). Requires `joblib`.
 77 |         """
 78 |         df = pd.DataFrame(df)
 79 |         
 80 |         # Estimate minimum order of differencing
 81 |         if parallel:
 82 |             try:
 83 |                 import multiprocessing
 84 |                 from joblib import Parallel, delayed
 85 |                 from functools import partial
 86 |             except ImportError:
 87 |                 raise Exception('The module `joblib` is required for parallelization.')
 88 | 
 89 |             def ApplyParallel(df, func, **kwargs):
 90 |                 n_jobs = min(df.shape[1], multiprocessing.cpu_count())
 91 |                 res = Parallel(n_jobs=n_jobs)( delayed(partial(func, **kwargs))(x) for x in np.array_split(df, df.shape[1], axis=1) )
 92 |                 return res
 93 |             orders = ApplyParallel(df, self._MinimumOrderSearch, upperOrder=self.maxOrderBound, first_run=True)
 94 |         else:
 95 |             orders = []
 96 |             for j in range(df.shape[1]):
 97 |                 orders.append( self._MinimumOrderSearch(df.iloc[:,j], upperOrder=self.maxOrderBound, first_run=True) )
 98 |         self.orders = orders
 99 |         self.numLags = [ (len(self._GetMemoryWeights(order, memoryThreshold=self.memoryThreshold)) - 1) for order in self.orders ]
100 |         self.isFitted = True
101 | 
102 |         return
103 |         
104 |     def FitTransform(self, df, parallel=True):
105 |         """
106 |         Estimates the fractional order of integration and returns a stationarized dataframe.
107 | 
108 |         Parameters
109 |         ----------
110 |             df       (pandas.DataFrame/np.array) Raw data
111 |             parallel (bool) Use multiprocessing if true (default). Requires `joblib`.
112 |         """
113 |         if not self.isFitted: 
114 |             self.Fit(df, parallel=parallel)
115 |         fracDiffed = self.Transform(df)
116 | 
117 |         return fracDiffed
118 |     
119 |     def Transform(self, df):
120 |         """
121 |         Applies a fractional differentiation transformation based on estimated orders.
122 | 
123 |         Parameters
124 |         ----------
125 |             df  (pandas.DataFrame/np.array) Raw data
126 |         """
127 |         if not self.isFitted: 
128 |             raise Exception('Fit the model first.')
129 |             
130 |         df = pd.DataFrame(df)
131 |         fracDiffed = []
132 |         for j in range(df.shape[1]):
133 |             x = self._FracDiff(df.iloc[:,j], order=self.orders[j])
134 |             fracDiffed.append( x )
135 |         fracDiffed = pd.concat(fracDiffed, axis=1).sort_index()
136 | 
137 |         return fracDiffed
138 |     
139 |     def InverseTransform(self, fracDiffed, lagData):
140 |         """
141 |         Applies a fractional integration transformation by inverting the fractional differentiation. 
142 | 
143 |         Note: The previous `K` values of the original time series are required to invert the transformation.
144 |         For multi-variate time series, `K` will likely vary across columns and you may find `K` with the
145 |         attribute `.numLags`. 
146 |         
147 |         Parameters
148 |         ----------
149 |             fracDiffed (pandas.DataFrame/np.array) Fractionally differentiated data
150 |             lagData    (pandas.DataFrame/np.array) Previous values of time series. See note.
151 | 
152 |         Example
153 |         -------
154 |             # Multi-variate Time Series/DataFrame
155 |             X                                           # Shape (1000, 2)
156 | 
157 |             # Stationarize
158 |             fracDiff = FractionalDifferentiator()
159 |             X_stationary = fracDiff.FitTransform( X )   # Shape (967, 2)
160 | 
161 |             # Estimated orders
162 |             orders = fracDiff.orders                    # [0.5703, 0.9141]
163 | 
164 |             # Required lagged values
165 |             numLags = fracDiff.numLags                  # [155, 33]
166 |             lagData = X.head(max(numLags))
167 | 
168 |             # Fractionally integrate by passing in the first 155 values
169 |             X_reconstructed = fracDiff.InverseTransform( X_stationary, lagData )    # Recovers the original X
170 |         """
171 |         if not self.isFitted: 
172 |             raise Exception('Fit the model first.')
173 | 
174 |         maxLags, minLags = max(self.numLags), min(self.numLags)
175 |         lagData = pd.DataFrame(lagData)
176 |         if lagData.shape[0] != maxLags:
177 |             raise Exception(f'The previous {maxLags} values are required.')
178 |         
179 |         fracDiffed = pd.DataFrame(fracDiffed)
180 |         X = []
181 |         for j in range(fracDiffed.shape[1]):
182 |             memoryWeights = self._GetMemoryWeights(self.orders[j], memoryThreshold=self.memoryThreshold)
183 |             K = self.numLags[j]
184 |             offset = K - minLags
185 | 
186 |             # Initial values
187 |             tsLagData = lagData.iloc[:K, j]
188 |             
189 |             # Transformed values
190 |             X_tilde = fracDiffed.iloc[offset:, j]
191 | 
192 |             # Already stationary: identity transform
193 |             if K == 0:
194 |                 X.append( X_tilde )
195 |                 continue
196 |             
197 |             # Iteratively invert transformation
198 |             X_vals = np.ravel(tsLagData.values)
199 |             X_tilde = np.ravel(X_tilde.values)
200 |             for t in range(len(X_tilde)):
201 |                 x = X_tilde[t] - np.sum( memoryWeights[:-1] * X_vals[-K:] )
202 |                 X_vals = np.append(X_vals, x)
203 |             X_vals = pd.Series(X_vals)
204 |             X.append( X_vals )
205 |         X = pd.concat(X, axis=1).sort_index()
206 |         X.columns = fracDiffed.columns
207 | 
208 |         # Check for duplicate indices
209 |         idx = lagData.index[:minLags].union( fracDiffed.index )
210 |         if len(idx) != X.shape[0]:
211 |             idx = [ t for t in range(X.shape[0]) ]
212 |         X.index = idx
213 | 
214 |         return X
215 | 
216 |     def _GetMemoryWeights(self, order, memoryThreshold=1e-4):
217 |         """
218 |         Returns an array of memory weights for each time lag.
219 | 
220 |         Parameters:
221 |         -----------
222 |             order           (float) Order of fracdiff
223 |             memoryThreshold (float) Minimum magnitude of weight significance
224 |         """
225 |         memoryWeights = [1,]
226 |         k = 1
227 |         while True:
228 |             weight = -memoryWeights[-1] * ( order - k + 1 ) / k # Iteratively generate next lag weight
229 |             if abs(weight) < memoryThreshold:
230 |                 break
231 |             memoryWeights.append(weight)
232 |             k += 1
233 |         return np.array(list(reversed(memoryWeights)))
234 |     
235 |     def _FracDiff(self, ts, order=1, memoryWeights=None):
236 |         """
237 |         Differentiates a time series based on a real-valued order.
238 | 
239 |         Parameters:
240 |         -----------
241 |             ts            (pandas.Series) Univariate time series
242 |             order         (float) Order of differentiation
243 |             memoryWeights (array) Optional pre-computed weights
244 |         """
245 |         if memoryWeights is None:
246 |             memoryWeights = self._GetMemoryWeights(order, memoryThreshold=self.memoryThreshold)
247 | 
248 |         K = len(memoryWeights)
249 |         fracDiffedSeries = ts.rolling(K).apply(lambda x: np.sum( x * memoryWeights ), raw=True)
250 |         fracDiffedSeries = fracDiffedSeries.iloc[(K-1):]
251 |         
252 |         return fracDiffedSeries
253 |     
254 |     def _MinimumOrderSearch(self, ts, lowerOrder=0, upperOrder=1, first_run=False):
255 |         """
256 |         Binary search algorithm for estimating the minimum order of differentiation required for stationarity.
257 |         
258 |         Parameters
259 |         ----------
260 |             ts                   (pandas.Series) Univariate time series
261 |             lowerOrder           (float) Lower bound on order
262 |             upperOrder           (float) Upper bound on order
263 |             first_run            (bool)  For testing endpoints of order bounds
264 |         """  
265 |         ## Convergence criteria
266 |         if abs( upperOrder - lowerOrder ) <= self.precision:
267 |             return upperOrder
268 |         
269 |         ## Initial run: Test endpoints
270 |         if first_run:
271 |             lowerFracDiff = self._FracDiff(ts, order=lowerOrder).dropna()
272 |             upperFracDiff = self._FracDiff(ts, order=upperOrder).dropna()
273 |             
274 |             # Unit root tests
275 |             lowerStationary = self.UnitRootTest.IsStationary( lowerFracDiff )
276 |             upperStationary = self.UnitRootTest.IsStationary( upperFracDiff )
277 | 
278 |             # Series is I(0)
279 |             if lowerStationary:
280 |                 return lowerOrder
281 |             # Series is I(k>>1)
282 |             if not upperStationary:                                                        
283 |                 print('Warning: Time series is explosive. Increase upper bounds.')
284 |                 return upperOrder
285 |             
286 |         ## Binary Search: Test midpoint
287 |         midOrder = ( lowerOrder + upperOrder ) / 2                                      
288 |         midFracDiff = self._FracDiff(ts, order=midOrder).dropna()
289 |         midStationary = self.UnitRootTest.IsStationary( midFracDiff )
290 |         
291 |         # Series is weakly stationary in [lowerOrder, midOrder]
292 |         if midStationary:
293 |             return self._MinimumOrderSearch(ts, lowerOrder=lowerOrder, upperOrder=midOrder)
294 |         # Series is weakly stationary in [midOrder, upperOrder]
295 |         else:
296 |             return self._MinimumOrderSearch(ts, lowerOrder=midOrder, upperOrder=upperOrder)
297 |         


--------------------------------------------------------------------------------
/tsfracdiff/unit_root_tests.py:
--------------------------------------------------------------------------------
  1 | import arch
  2 | from arch.unitroot import PhillipsPerron as PP
  3 | from arch.unitroot import ADF
  4 | 
  5 | ## TODO: Ng and Perron (2001)?
  6 | 
  7 | class PhillipsPerron:
  8 |     """
  9 |     Unit root testing via Phillips and Perron (1988). This test is robust to
 10 |     serial correlation and heteroskedasticity.
 11 | 
 12 |     References:
 13 |     -----------
 14 |     Phillips, P. C. B., & Perron, P. (1988). Testing for a unit root in time series regression. 
 15 |     Biometrika, 75(2), 335–346. https://doi.org/10.1093/biomet/75.2.335
 16 |     """
 17 |     
 18 |     def __init__(self, 
 19 |                 config={ 'trend' : 'n', 'test_type' : 'tau'}, 
 20 |                 significance=0.01,
 21 |                 checkCV=False, 
 22 |                 cv_sig=None):
 23 |         self.config = config
 24 |         self.significance = significance
 25 |         self.checkCV = checkCV
 26 |         self.cv_sig = cv_sig
 27 | 
 28 |     def IsStationary(self, ts):
 29 |         """
 30 |         Performs a unit root test.
 31 |         """
 32 | 
 33 |         testResults = PP(ts, trend=self.config['trend'], test_type=self.config['test_type'])
 34 |         pval, cv, stat = testResults.pvalue, testResults.critical_values, testResults.stat
 35 | 
 36 |         result = self.HypothesisTest(pval, cv, stat)
 37 | 
 38 |         return result
 39 | 
 40 |     def HypothesisTest(self, pval, cv, stat):
 41 |         """
 42 |         Null Hypothesis: Time series is integrated of order I(1)
 43 |         Alt Hypothesis: Time series is integrated of order I(k<1)
 44 |         """
 45 |         
 46 |         # Reject the hypothesis
 47 |         if (pval < self.significance) or ( self.checkCV and (stat < cv.get(self.cv_sig, 0)) ):
 48 |             return True
 49 |         # Fail to reject the hypothesis
 50 |         else:
 51 |             return False
 52 | 
 53 | class ADFuller:
 54 |     """
 55 |     Unit root testing via Said and Dickey (1984). This test assumes a parametric
 56 |     ARMA structure to correct for serial correlation but assumes the errors are homoskedastic.
 57 | 
 58 |     References:
 59 |     -----------
 60 |     Said E. Said, & Dickey, D. A. (1984). Testing for Unit Roots in Autoregressive-Moving Average 
 61 |     Models of Unknown Order. Biometrika, 71(3), 599–607. https://doi.org/10.2307/2336570
 62 |     """
 63 |     def __init__(self, 
 64 |                 config={ 'trend' : 'n', 'method' : 'AIC'}, 
 65 |                 significance=0.01,
 66 |                 checkCV=False, 
 67 |                 cv_sig=None):
 68 |         self.config = config
 69 |         self.significance = significance
 70 |         self.checkCV = checkCV
 71 |         self.cv_sig = cv_sig
 72 | 
 73 |         ## Compatability workaround //
 74 |         #   arch <= 4.17 uses capital letters but newer versions use lowercase
 75 |         if (str(arch.__version__) > '4.17'):
 76 |             if self.config.get('method') == 'AIC':
 77 |                 self.config['method'] = 'aic'
 78 |             elif self.config.get('method') == 'BIC':
 79 |                 self.config['method'] = 'bic'
 80 | 
 81 |     def IsStationary(self, ts):
 82 |         """
 83 |         Performs a unit root test.
 84 |         """
 85 | 
 86 |         testResults = ADF(ts, trend=self.config['trend'], method=self.config['method'])
 87 |         pval, cv, stat = testResults.pvalue, testResults.critical_values, testResults.stat
 88 | 
 89 |         result = self.HypothesisTest(pval, cv, stat)
 90 | 
 91 |         return result
 92 | 
 93 |     def HypothesisTest(self, pval, cv, stat):
 94 |         """
 95 |         Null Hypothesis: Gamma = 0 (Unit root)
 96 |         Alt Hypothesis: Gamma < 0
 97 |         """
 98 |         
 99 |         # Reject the hypothesis
100 |         if (pval < self.significance) or ( self.checkCV and (stat < cv.get(self.cv_sig, 0)) ):
101 |             return True
102 |         # Fail to reject the hypothesis
103 |         else:
104 |             return False
105 | 
106 |     
107 | 
108 |     


--------------------------------------------------------------------------------