├── .circleci
└── config.yml
├── .codecov.yml
├── .coveragerc
├── .github
├── CONTRIBUTING.md
├── FUNDING.yml
├── ISSUE_TEMPLATE
│ ├── bug_report.yml
│ ├── feature_request.yml
│ └── question.yml
├── PULL_REQUEST_TEMPLATE.md
├── stale.yml
├── utils
│ └── get_dependency_releases.py
└── workflows
│ ├── badges.yml
│ ├── build_and_deploy.yml
│ ├── nightly_cron.yml
│ └── test_tagging.yml
├── .gitignore
├── AUTHORS.md
├── CITATION.cff
├── CODE_OF_CONDUCT.md
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.md
├── benchmarks
├── Benchmarking Seasonality.ipynb
├── bench_autoarima.ipynb
├── benchmark_funcs.py
└── item_sales_daily.csv.gz
├── build_tools
├── build_requirements.txt
├── circle
│ ├── build_doc.sh
│ ├── build_wheel.sh
│ ├── deploy.sh
│ ├── deploy_doc.sh
│ ├── dind
│ │ ├── README.md
│ │ ├── build_manylinux_wheel.sh
│ │ └── install_and_test.sh
│ ├── get_version.sh
│ └── test_unit.sh
├── get_tag.py
└── github
│ ├── deploy.sh
│ ├── get_latest_dependencies.py
│ └── test_version_tagging.sh
├── doc
├── Makefile
├── _static
│ ├── css
│ │ ├── fields.css
│ │ └── gitcontrib.css
│ └── js
│ │ ├── contrib.js
│ │ └── jquery.min.js
├── _templates
│ ├── class.rst
│ ├── class_with_call.rst
│ ├── class_without_init.rst
│ ├── function.rst
│ └── numpydoc_docstring.rst
├── about.rst
├── citing.rst
├── conf.py
├── contributing.rst
├── contributors.rst
├── img
│ ├── bad_issue.png
│ ├── favicon.ico
│ ├── good_issue.png
│ ├── lynx_autocorr.png
│ ├── m_matters.png
│ ├── stock_forecasts.png
│ ├── stock_lag_plot.png
│ └── sunspots
│ │ ├── bc-transformed.png
│ │ ├── log-transformed.png
│ │ ├── model-1.png
│ │ ├── model-2.png
│ │ └── untransformed.png
├── includes
│ └── api_css.rst
├── index.rst
├── make.bat
├── migration-guide.rst
├── modules
│ ├── classes.rst
│ └── datasets.rst
├── no-successful-model.rst
├── quickstart.rst
├── refreshing.rst
├── rfc
│ ├── 372-exog-to-x.rst
│ └── index.rst
├── seasonal-differencing-issues.rst
├── serialization.rst
├── setup.rst
├── sg_execution_times.rst
├── sphinxext
│ ├── MANIFEST.in
│ └── github_link.py
├── tips_and_tricks.rst
├── usecases.rst
├── usecases
│ ├── stocks.rst
│ └── sun-spots.rst
├── user_guide.rst
└── whats_new.rst
├── etc
└── downloads_badges.py
├── examples
├── README.txt
├── arima
│ ├── README.txt
│ ├── example_add_new_samples.py
│ ├── example_auto_arima.py
│ ├── example_persisting_a_model.py
│ └── example_seasonal_decomposition.py
├── datasets
│ ├── README.txt
│ └── example_load_data.py
├── example_pipeline.py
├── example_simple_fit.py
├── issue12
│ ├── dummy_data.csv
│ └── issue-12.ipynb
├── model_selection
│ ├── README.txt
│ ├── example_cross_val_predict.py
│ └── example_cross_validation.py
├── preprocessing
│ ├── README.txt
│ └── example_date_featurizer.py
├── quick_start_example.ipynb
├── quick_start_output.png
├── stock_market_example.ipynb
└── utils
│ ├── README.txt
│ ├── example_array_concatenation.py
│ ├── example_array_differencing.py
│ └── example_tsdisplay.py
├── pmdarima
├── __check_build
│ ├── __init__.py
│ ├── _check_build.pyx
│ ├── setup.py
│ └── tests
│ │ ├── __init__.py
│ │ └── test_check_build.py
├── __init__.py
├── _build_utils
│ ├── __init__.py
│ ├── pre_build_helpers.py
│ └── tests
│ │ └── __init__.py
├── arima
│ ├── __init__.py
│ ├── _arima.pyx
│ ├── _arima_fast_helpers.h
│ ├── _auto_solvers.py
│ ├── _context.py
│ ├── _doc.py
│ ├── _validation.py
│ ├── approx.py
│ ├── arima.py
│ ├── auto.py
│ ├── seasonality.py
│ ├── setup.py
│ ├── stationarity.py
│ ├── tests
│ │ ├── __init__.py
│ │ ├── data
│ │ │ └── issue_191.csv
│ │ ├── test_approx.py
│ │ ├── test_arima.py
│ │ ├── test_arima_diagnostics.py
│ │ ├── test_auto.py
│ │ ├── test_auto_solvers.py
│ │ ├── test_c_arima.py
│ │ ├── test_context.py
│ │ ├── test_seasonality.py
│ │ ├── test_stationarity.py
│ │ ├── test_utils.py
│ │ └── test_validation.py
│ └── utils.py
├── base.py
├── compat
│ ├── __init__.py
│ ├── matplotlib.py
│ ├── numpy.py
│ ├── pandas.py
│ ├── pytest.py
│ ├── sklearn.py
│ ├── statsmodels.py
│ └── tests
│ │ ├── __init__.py
│ │ ├── test_sklearn.py
│ │ └── test_statsmodels.py
├── context_managers.py
├── datasets
│ ├── __init__.py
│ ├── _base.py
│ ├── airpassengers.py
│ ├── ausbeer.py
│ ├── austres.py
│ ├── data
│ │ ├── dated.tar.gz
│ │ ├── msft.tar.gz
│ │ └── sunspots.txt.gz
│ ├── gasoline.py
│ ├── heartrate.py
│ ├── lynx.py
│ ├── setup.py
│ ├── stocks.py
│ ├── sunspots.py
│ ├── taylor.py
│ ├── tests
│ │ ├── __init__.py
│ │ └── test_load_datasets.py
│ ├── wineind.py
│ └── woolyrnq.py
├── decorators.py
├── metrics.py
├── model_selection
│ ├── __init__.py
│ ├── _split.py
│ ├── _validation.py
│ └── tests
│ │ ├── __init__.py
│ │ ├── test_split.py
│ │ └── test_validation.py
├── pipeline.py
├── preprocessing
│ ├── __init__.py
│ ├── base.py
│ ├── endog
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── boxcox.py
│ │ ├── log.py
│ │ └── tests
│ │ │ ├── __init__.py
│ │ │ ├── test_base.py
│ │ │ ├── test_boxcox.py
│ │ │ └── test_log.py
│ ├── exog
│ │ ├── __init__.py
│ │ ├── _fourier.pyx
│ │ ├── base.py
│ │ ├── dates.py
│ │ ├── fourier.py
│ │ ├── setup.py
│ │ └── tests
│ │ │ ├── __init__.py
│ │ │ ├── test_base.py
│ │ │ ├── test_dates.py
│ │ │ └── test_fourier.py
│ ├── setup.py
│ └── tests
│ │ ├── __init__.py
│ │ └── test_base.py
├── setup.py
├── tests
│ ├── __init__.py
│ ├── test_context_managers.py
│ ├── test_estimators.py
│ ├── test_metrics.py
│ └── test_pipeline.py
├── utils
│ ├── __init__.py
│ ├── _array.pyx
│ ├── _show_versions.py
│ ├── array.py
│ ├── metaestimators.py
│ ├── setup.py
│ ├── tests
│ │ ├── __init__.py
│ │ ├── test_array.py
│ │ ├── test_meta.py
│ │ ├── test_show_versions.py
│ │ ├── test_vis.py
│ │ └── test_wrapped.py
│ ├── visualization.py
│ └── wrapped.py
└── warnings.py
├── pyproject.toml
├── requirements.txt
├── setup.cfg
└── setup.py
/.codecov.yml:
--------------------------------------------------------------------------------
1 | # https://gist.github.com/stevepeak/53bee7b2c326b24a9b4a
2 |
3 | # Prevent codecov from commenting
4 | comment: false
5 |
6 | coverage:
7 | precision: 2
8 | round: down
9 |
10 | status:
11 | project:
12 | default:
13 | target: 95%
14 | informational: true
15 | branches:
16 | - master
17 |
18 | # The patch just adds noise to the PRs. We only really care about overall
19 | # coverage
20 | patch: off
21 |
22 | ignore:
23 | - "**/setup.py"
24 | - "*/pmdarima/__check_build/*"
25 | - "*/pmdarima/_build_utils/*"
26 | - "*/pmdarima/_config.py"
27 | - "*/pmdarima/__init__.py"
28 | - "*/pmdarima/compat/matplotlib.py"
29 | - "*/pmdarima/utils/tests/test_vis.py"
30 | - "*/pmdarima/utils/visualization.py"
31 |
--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [report]
2 | show_missing = True
3 |
4 | [run]
5 | source = pmdarima
6 | include = */pmdarima/*
7 | omit =
8 | */setup.py
9 | */pmdarima/__check_build/*
10 | */pmdarima/_build_utils/*
11 | */pmdarima/_config.py
12 | */pmdarima/setup.py
13 | */pmdarima/__init__.py
14 | */pmdarima/compat/matplotlib.py
15 | */pmdarima/utils/tests/test_vis.py
16 | */pmdarima/utils/visualization.py
17 |
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: [tgsmith61591, aaronreidsmith]
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.yml:
--------------------------------------------------------------------------------
1 | name: Bug Report
2 | description: Create a bug report to help us improve pmdarima
3 | labels: [':beetle: : bug']
4 | body:
5 | - type: markdown
6 | attributes:
7 | value: |
8 | Thanks for taking the time to file a bug! Please fill in the below information in as much detail as possible.
9 | - type: textarea
10 | id: description
11 | attributes:
12 | label: Describe the bug
13 | description: A clear and concise description of what the bug is.
14 | validations:
15 | required: true
16 | - type: textarea
17 | id: reproduction
18 | attributes:
19 | label: To Reproduce
20 | description: Steps to reproduce the behavior
21 | validations:
22 | required: true
23 | - type: textarea
24 | id: versions
25 | attributes:
26 | label: Versions
27 | description: |
28 | Instructions to get necessary versions
29 |
30 |
31 | Please run the following snippet and paste the output below.
32 |
33 | ```python
34 | import pmdarima; pmdarima.show_versions()
35 |
36 | # For pmdarima versions <1.5.2 use this:
37 | import platform; print(platform.platform())
38 | import sys; print("Python", sys.version)
39 | import pmdarima; print("pmdarima", pmdarima.__version__)
40 | import numpy; print("NumPy", numpy.__version__)
41 | import scipy; print("SciPy", scipy.__version__)
42 | import sklearn; print("Scikit-Learn", sklearn.__version__)
43 | import statsmodels; print("Statsmodels", statsmodels.__version__)
44 | ```
45 |
46 |
47 | Instructions to get necessary versions
26 |
27 |
28 | Please run the following snippet and paste the output below.
29 |
30 | ```python
31 | import pmdarima; pmdarima.show_versions()
32 |
33 | # For pmdarima versions <1.5.2 use this:
34 | import platform; print(platform.platform())
35 | import sys; print("Python", sys.version)
36 | import pmdarima; print("pmdarima", pmdarima.__version__)
37 | import numpy; print("NumPy", numpy.__version__)
38 | import scipy; print("SciPy", scipy.__version__)
39 | import sklearn; print("Scikit-Learn", sklearn.__version__)
40 | import statsmodels; print("Statsmodels", statsmodels.__version__)
41 | ```
42 |
43 |
44 |
7 |
8 | # Description
9 |
10 | Please include a summary of the change and which issue is fixed.
11 | Please also include relevant motivation and context. List any dependencies
12 | that are required for this change.
13 |
14 | Fixes #(issue)
15 |
16 | ## Type of change
17 |
18 | Please delete options that are not relevant.
19 |
20 | - [ ] Bug fix (non-breaking change which fixes an issue)
21 | - [ ] New feature (non-breaking change which adds functionality)
22 | - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
23 | - [ ] Documentation change
24 |
25 | # How Has This Been Tested?
26 |
27 | Please describe the tests that you ran to verify your changes.
28 | Provide instructions so we can reproduce. Please also list any relevant details
29 | for your test configuration
30 |
31 | - [ ] Test A
32 | - [ ] Test B
33 |
34 | # Checklist:
35 |
36 | - [ ] I have performed a self-review of my own code
37 | - [ ] I have commented my code, particularly in hard-to-understand areas
38 | - [ ] I have made corresponding changes to the documentation
39 | - [ ] My changes generate no new warnings
40 | - [ ] I have added tests that prove my fix is effective or that my feature works
41 | - [ ] New and existing unit tests pass locally with my changes
42 |
--------------------------------------------------------------------------------
/.github/stale.yml:
--------------------------------------------------------------------------------
1 | # Configuration for probot-stale - https://github.com/probot/stale
2 |
3 | # Number of days of inactivity before an Issue or Pull Request becomes stale
4 | daysUntilStale: 60
5 |
6 | # Number of days of inactivity before an Issue or Pull Request with the stale label is closed.
7 | # Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale.
8 | daysUntilClose: 14
9 |
10 | # Only issues or pull requests with all of these labels are check if stale. Defaults to `[]` (disabled)
11 | onlyLabels: []
12 |
13 | # Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable
14 | exemptLabels: []
15 |
16 | # Set to true to ignore issues in a project (defaults to false)
17 | exemptProjects: false
18 |
19 | # Set to true to ignore issues in a milestone (defaults to false)
20 | exemptMilestones: false
21 |
22 | # Set to true to ignore issues with an assignee (defaults to false)
23 | exemptAssignees: false
24 |
25 | # Label to use when marking as stale
26 | staleLabel: stale-issue
27 |
28 | # Comment to post when marking as stale. Set to `false` to disable
29 | markComment: >
30 | This issue has been automatically marked as stale because it has not had
31 | recent activity. It will be closed if no further activity occurs. Thank you
32 | for your contributions.
33 |
34 | # Comment to post when removing the stale label.
35 | # unmarkComment: >
36 | # Your comment here.
37 |
38 | # Comment to post when closing a stale Issue or Pull Request.
39 | # closeComment: >
40 | # Your comment here.
41 |
42 | # Limit the number of actions per hour, from 1-30. Default is 30
43 | limitPerRun: 30
44 |
45 | # Limit to only `issues` or `pulls`
46 | only: issues
47 |
48 | # Optionally, specify configuration settings that are specific to just 'issues' or 'pulls':
49 | #pulls:
50 | # daysUntilStale: 60
51 | # markComment: >
52 | # This pull request has been automatically marked as stale because it has not had
53 | # recent activity. It will be closed if no further activity occurs. Thank you
54 | # for your contributions.
55 |
56 | # issues:
57 | # exemptLabels:
58 | # - confirmed
--------------------------------------------------------------------------------
/.github/utils/get_dependency_releases.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | import requests
4 | from tabulate import tabulate
5 |
6 | if len(sys.argv) < 2:
7 | print(f'Usage: python {sys.argv[0]} ')
8 | sys.exit(1)
9 |
10 | packages = sys.argv[1:]
11 | session = requests.Session()
12 |
13 | releases = []
14 | for package in packages:
15 | response = session.get(f'https://pypi.org/pypi/{package}/json')
16 | response.raise_for_status()
17 | pypi = response.json()
18 | latest_version = pypi['info']['version']
19 | latest_release_date = pypi['releases'][latest_version][0]['upload_time']
20 | releases.append([
21 | package, latest_version, latest_release_date.replace('T', ' ') + ' UTC']
22 | )
23 |
24 | session.close()
25 |
26 | table = tabulate(
27 | sorted(releases, key=lambda entry: entry[2], reverse=True),
28 | headers=['Package', 'Version', 'Release Date']
29 | )
30 | # Need repr so this is on one line for Slack
31 | print(repr('```\n' + table + '\n```').replace("'", ''))
32 |
--------------------------------------------------------------------------------
/.github/workflows/badges.yml:
--------------------------------------------------------------------------------
1 | name: Update Downloads Badges
2 |
3 | on:
4 | schedule:
5 | - cron: '15 17 * * *' # Every day at 5:15pm UTC
6 |
7 | jobs:
8 | update:
9 | name: Update Downloads Badges
10 | runs-on: ubuntu-latest
11 | steps:
12 | - name: Checkout
13 | uses: actions/checkout@v3
14 | - name: Setup Python
15 | uses: actions/setup-python@v4
16 | with:
17 | python-version: '3.11'
18 | architecture: 'x64'
19 | - name: Install Requirements
20 | run: python -m pip install requests
21 | - name: Run Python Script
22 | run: python etc/downloads_badges.py
23 | env:
24 | ZAPIER_SHA: ${{ secrets.ZAPIER_SHA }}
--------------------------------------------------------------------------------
/.github/workflows/test_tagging.yml:
--------------------------------------------------------------------------------
1 | name: Test VERSION tagging
2 |
3 | on:
4 | push:
5 | branches:
6 | - 'master'
7 |
8 | pull_request:
9 | branches:
10 | - '*'
11 |
12 | # Cancel older runs of the same workflow on the same branch
13 | concurrency:
14 | group: ${{ github.workflow }}-${{ github.ref }}
15 | cancel-in-progress: true
16 |
17 | jobs:
18 | test-tagging:
19 | name: Test VERSION tagging
20 |
21 | runs-on: ubuntu-latest
22 |
23 | steps:
24 | - name: Checkout
25 | uses: actions/checkout@v3
26 |
27 | - name: Setting up Python
28 | uses: actions/setup-python@v4
29 | with:
30 | python-version: '3.11'
31 | architecture: 'x64'
32 |
33 | - name: Ensure VERSION tagging works
34 | run: |
35 | chmod +x build_tools/github/test_version_tagging.sh
36 | ./build_tools/github/test_version_tagging.sh
37 | shell: bash
38 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # VS Code
2 | .vscode/
3 |
4 | # Mac stuff
5 | .DS_Store
6 | .idea/
7 |
8 | # custom extension
9 | .pyramid-cache/
10 | *.pmdpkl
11 |
12 | # Don't want to include scratch stuff
13 | scratch/
14 |
15 | # Models
16 | *.pkl
17 | !pmdarima/datasets/data/*.pkl
18 |
19 | # sdist artifacts
20 | MANIFEST
21 |
22 | # Pypi pws
23 | .pypipws
24 |
25 | # Coverage
26 | coverage
27 |
28 | # Pytest
29 | .pytest_cache
30 |
31 | # OAuth access for automating releases
32 | ACCESS_TOKEN
33 |
34 | # Byte-compiled / optimized / DLL files
35 | __pycache__/
36 | *.py[cod]
37 | *$py.class
38 |
39 | # C extensions and cython reports
40 | *.so
41 | *.c
42 | pmdarima/arima/*.html
43 | pmdarima/preprocessing/exog/*.html
44 | pmdarima/utils/*.html
45 |
46 | # Distribution / packaging
47 | .Python
48 | env/
49 | build/
50 | doc/_build
51 | doc/auto_examples
52 | doc/modules/generated
53 | develop-eggs/
54 | dist/
55 | downloads/
56 | eggs/
57 | .eggs/
58 | lib/
59 | lib64/
60 | parts/
61 | sdist/
62 | var/
63 | *.egg-info/
64 | .installed.cfg
65 | *.egg
66 |
67 | # PyInstaller
68 | # Usually these files are written by a python script from a template
69 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
70 | *.manifest
71 | *.spec
72 |
73 | # Installer logs
74 | pip-log.txt
75 | pip-delete-this-directory.txt
76 |
77 | # Unit test / coverage reports
78 | htmlcov/
79 | .tox/
80 | .coverage
81 | .coverage.*
82 | .cache
83 | coverage.xml
84 | *,cover
85 | .hypothesis/
86 |
87 | # Translations
88 | *.mo
89 | *.pot
90 |
91 | # Django stuff:
92 | *.log
93 | local_settings.py
94 |
95 | # Flask stuff:
96 | instance/
97 | .webassets-cache
98 |
99 | # Scrapy stuff:
100 | .scrapy
101 |
102 | # Sphinx documentation
103 | docs/_build/
104 |
105 | # PyBuilder
106 | target/
107 |
108 | # IPython Notebook
109 | .ipynb_checkpoints
110 |
111 | # pyenv
112 | .python-version
113 |
114 | # celery beat schedule file
115 | celerybeat-schedule
116 |
117 | # dotenv
118 | .env
119 |
120 | # virtualenv
121 | venv/
122 | ENV/
123 |
124 | # Spyder project settings
125 | .spyderproject
126 |
127 | # Rope project settings
128 | .ropeproject
129 |
130 | # We want this to only be created on CI/CD platforms
131 | VERSION
132 |
--------------------------------------------------------------------------------
/AUTHORS.md:
--------------------------------------------------------------------------------
1 | ## Authors
2 |
3 | The following people have been core contributors to `pmdarima`'s development:
4 |
5 | * [Taylor Smith](https://github.com/tgsmith61591)
6 | * [Gary Foreman](https://github.com/garyForeman)
7 | * [Charles Drotar](https://github.com/charlesdrotar)
8 | * [Steven Hoelscher](https://github.com/shoelsch)
9 | * [Aaron Smith](https://github.com/aaronreidsmith)
10 | * [Krishna Sunkara](https://github.com/kpsunkara)
11 | * [Christopher Siewert](https://github.com/christopher-siewert)
12 |
13 | __Please do not email the authors directly with questions or issues.__ Rather, use
14 | the [issues](https://github.com/alkaline-ml/pmdarima/issues) page. Furthermore, issues
15 | or emails specifically related to assistance in learning time series analysis should be
16 | saved for Stack Overflow.
17 |
--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
1 | cff-version: 1.2.0
2 | message: "If you would like to include pmdarima in your published work, please cite it as follows"
3 | authors:
4 | - family-names: "Smith"
5 | given-names: "Taylor G."
6 | keywords:
7 | - python
8 | - "machine learning"
9 | - "time series"
10 | - econometrics
11 | - forecasting
12 | - arima
13 | - "forecasting models"
14 | - sarimax
15 | title: "pmdarima"
16 | version: 2.0.4
17 | date-released: 2023-10-23
18 | license: MIT
19 | repository-artifact: https://pypi.org/project/pmdarima
20 | repository-code: https://github.com/alkaline-ml/pmdarima
21 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | In the interest of fostering an open and welcoming environment, we as
6 | contributors and maintainers pledge to making participation in our project and
7 | our community a harassment-free experience for everyone, regardless of age, body
8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 |
12 | ## Our Standards
13 |
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 |
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 |
23 | Examples of unacceptable behavior by participants include:
24 |
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 | address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 | professional setting
33 |
34 | ## Our Responsibilities
35 |
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 |
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 |
46 | ## Scope
47 |
48 | This Code of Conduct applies within all project spaces, and it also applies when
49 | an individual is representing the project or its community in public spaces.
50 | Examples of representing a project or community include using an official
51 | project e-mail address, posting via an official social media account, or acting
52 | as an appointed representative at an online or offline event. Representation of
53 | a project may be further defined and clarified by project maintainers.
54 |
55 | ## Enforcement
56 |
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at . All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 |
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 |
68 | ## Attribution
69 |
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 |
73 | [homepage]: https://www.contributor-covenant.org
74 |
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 Taylor G Smith
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | # recursive-include doc *
2 | include pmdarima/VERSION
3 | recursive-include examples *
4 | recursive-include pmdarima *
5 | recursive-include pmdarima/__check_build *
6 | recursive-include pmdarima/_build_utils *
7 | recursive-include pmdarima/arima *
8 | recursive-include pmdarima/compat *
9 | recursive-include pmdarima/datasets *
10 | recursive-include pmdarima/datasets/data *
11 | recursive-include pmdarima/model_selection *
12 | recursive-include pmdarima/preprocessing/endog *
13 | recursive-include pmdarima/preprocessing/exog *
14 | recursive-include pmdarima/tests *
15 | recursive-include pmdarima/utils *
16 | include LICENSE
17 | include README.md
18 | include requirements.txt
19 | global-exclude __pycache__
20 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | # simple makefile to simplify repetitive build env management tasks under posix
2 | # this is adopted from the sklearn Makefile
3 |
4 | # caution: testing won't work on windows
5 |
6 | PYTHON ?= python
7 | DOCKER ?= docker
8 | HERE = $(shell pwd)
9 |
10 | .PHONY: clean develop test install bdist_wheel version
11 |
12 | clean:
13 | $(PYTHON) setup.py clean
14 | rm -rf dist
15 | rm -rf build
16 | rm -rf .pytest_cache
17 | rm -rf pmdarima.egg-info
18 | rm -f pmdarima/VERSION
19 | rm -f conda/meta.yaml
20 | rm -rf .coverage.*
21 |
22 | deploy-requirements:
23 | $(PYTHON) -m pip install twine readme_renderer[md]
24 |
25 | # Depends on an artifact existing in dist/, and two environment variables
26 | deploy-twine-test: bdist_wheel deploy-requirements
27 | $(PYTHON) -m twine upload \
28 | --repository-url https://test.pypi.org/legacy/ dist/* \
29 | --username ${TWINE_USERNAME} \
30 | --password ${TWINE_PASSWORD}
31 |
32 | documentation: version
33 | $(DOCKER) run -v $(HERE):/pmdarima -w /pmdarima --rm alkalineml/pmdarima-doc-base:latest /bin/bash -c "make install docker-documentation"
34 |
35 | # This one assumes we are in the docker container, so it can either be called from above (locally), or directly (on CI)
36 | docker-documentation: version
37 | @make -C doc clean html EXAMPLES_PATTERN=example_* PMDARIMA_VERSION=$(PMDARIMA_VERSION)
38 |
39 | requirements:
40 | $(PYTHON) -m pip install -r requirements.txt
41 |
42 | bdist_wheel: version
43 | $(PYTHON) setup.py bdist_wheel
44 |
45 | sdist: version
46 | $(PYTHON) setup.py sdist
47 |
48 | develop: version
49 | $(PYTHON) setup.py develop
50 |
51 | install: version
52 | $(PYTHON) setup.py install
53 |
54 | lint-requirements:
55 | $(PYTHON) -m pip install flake8
56 |
57 | testing-requirements:
58 | $(PYTHON) -m pip install pytest flake8 matplotlib pytest-mpl coverage pytest-cov codecov
59 |
60 | test-lint:
61 | $(PYTHON) -m flake8 pmdarima --filename='*.py' --ignore F401,F403,W293,W504
62 |
63 | test-unit:
64 | $(PYTHON) -m pytest -v --durations=20 --cov-config .coveragerc --cov pmdarima -p no:logging
65 |
66 | test: test-unit test-lint
67 | # Coverage creates all these random little artifacts we don't want
68 | rm .coverage.* || echo "No coverage artifacts to remove"
69 |
70 | twine-check: bdist_wheel deploy-requirements
71 | # Check that twine will parse the README acceptably
72 | $(PYTHON) -m twine check dist/*
73 |
74 | version: requirements
75 | @$(PYTHON) build_tools/get_tag.py
76 |
--------------------------------------------------------------------------------
/benchmarks/benchmark_funcs.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Benchmark various approaches to functions to speed things up.
4 | # ... hopefully.
5 |
6 | import numpy as np
7 |
8 | import time
9 |
10 |
11 | def _do_time(func, n_iter=10, *args, **kwargs):
12 | times = []
13 | for _ in range(n_iter):
14 | start = time.time()
15 | func(*args, **kwargs)
16 | times.append(time.time() - start)
17 |
18 | times = np.asarray(times)
19 | print("Completed %i iterations (avg=%.6f, min=%.6f, max=%.6f)"
20 | % (n_iter, times.mean(), times.min(), times.max()))
21 |
22 |
23 | def benchmark_is_constant():
24 | """This benchmarks the "is_constant" function from ``pmdarima.arima.utils``
25 | This was added in 0.6.2.
26 | """
27 | # WINNER!
28 | def is_const1(x):
29 | """This is the version in Pyramid 0.6.2.
30 |
31 | Parameters
32 | ----------
33 | x : np.ndarray
34 | This is the array.
35 | """
36 | return (x == x[0]).all()
37 |
38 | def is_const2(x):
39 | """This should ostensibly only take O(N) rather than O(2N) like
40 | its predecessor. But we'll see...
41 |
42 | Parameters
43 | ----------
44 | x : np.ndarray
45 | This is the array.
46 | """
47 | return np.unique(x).shape[0] == 1
48 |
49 | x = np.random.choice(np.arange(10), 1000000, replace=True)
50 | _do_time(is_const1, 25, x)
51 | _do_time(is_const2, 25, x)
52 |
53 |
54 | if __name__ == '__main__':
55 | benchmark_is_constant()
56 |
--------------------------------------------------------------------------------
/benchmarks/item_sales_daily.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/benchmarks/item_sales_daily.csv.gz
--------------------------------------------------------------------------------
/build_tools/build_requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.21.2; python_version < '3.10'
2 | numpy==1.21.6; python_version == '3.10' and platform_system != 'Windows'
3 | numpy==1.22.3; python_version == '3.10' and platform_system == 'Windows'
4 | numpy==1.23.2; python_version == '3.11'
5 | numpy==1.26.0; python_version == '3.12'
6 | scipy==1.3.2; python_version <= '3.8' and platform_machine != 'aarch64'
7 | scipy==1.5.3; python_version <= '3.8' and platform_machine == 'aarch64'
8 | scipy==1.5.4; python_version == '3.9'
9 | scipy==1.7.2; python_version == '3.10'
10 | scipy==1.9.3; python_version == '3.11'
11 | scipy==1.11.2; python_version == '3.12'
12 | statsmodels==0.13.2; python_version <= '3.10'
13 | statsmodels==0.13.3; python_version == '3.11'
14 | statsmodels==0.14.0; python_version == '3.12'
15 | cython>=0.29,!=0.29.18,!=0.29.31
16 | scikit-learn>=0.22
17 | pandas>=0.19
18 | patsy
19 | pytest
20 | pytest-mpl
21 | pytest-benchmark
22 | setuptools>=38.6.0,!=50.0.0
23 | packaging>=17.1 # Bundled with setuptools, but want to be explicit
24 | wheel
25 | twine>=1.13.0
26 | readme_renderer
27 | matplotlib
28 | urllib3
29 |
--------------------------------------------------------------------------------
/build_tools/circle/build_doc.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 |
5 | # this is a hack, but we have to make sure we're only ever running this from
6 | # the top level of the package and not in the subdirectory...
7 | if [[ ! -d pmdarima/__check_build ]]; then
8 | echo "This must be run from the pmdarima project directory"
9 | exit 3
10 | fi
11 |
12 | # Set ${version}
13 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
14 | source "${DIR}/get_version.sh"
15 |
16 | make docker-documentation PMDARIMA_VERSION=${version}
17 |
--------------------------------------------------------------------------------
/build_tools/circle/build_wheel.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e -x
4 |
5 | function build_wheel {
6 | local pyver=$1
7 | local arch=$2
8 | local ucs_setting=$3
9 |
10 | # https://www.python.org/dev/peps/pep-0513/#ucs-2-vs-ucs-4-builds
11 | ucs_tag=""
12 | if [ "$ucs_setting" = "ucs4" ]; then
13 | ucs_tag="${ucs_tag}u"
14 | fi
15 |
16 | distutils_version=""
17 | if [ "$pyver" = "3.12" ]; then
18 | distutils_version="local"
19 | else
20 | distutils_version="stdlib"
21 | fi
22 |
23 | ML_PYTHON_VERSION=$(python -c \
24 | "print('cp{maj}{min}-cp{maj}{min}{ucs}'.format( \
25 | maj='${pyver}'.split('.')[0], \
26 | min='${pyver}'.split('.')[1], \
27 | ucs='${ucs_tag}'))")
28 |
29 | DOCKER_CONTAINER_NAME=wheel_builder_$(uuidgen)
30 |
31 | ML_IMAGE="quay.io/pypa/manylinux_2_28_${arch}:2023-10-07-c1e05d1" # `latest` as of 2023-10-09
32 | PMDARIMA_VERSION=`cat ~/pmdarima/pmdarima/VERSION`
33 |
34 | docker pull "${ML_IMAGE}"
35 | # -v "${_root}":/io \
36 | docker run \
37 | --name "${DOCKER_CONTAINER_NAME}" \
38 | -v `pwd`:/io \
39 | -e "PYTHON_VERSION=${ML_PYTHON_VERSION}" \
40 | -e "PMDARIMA_VERSION=${PMDARIMA_VERSION}" \
41 | -e "SETUPTOOLS_USE_DISTUTILS=${distutils_version}" \
42 | "${ML_IMAGE}" "/io/build_tools/circle/dind/build_manylinux_wheel.sh"
43 | sudo docker cp "${DOCKER_CONTAINER_NAME}:/io/dist/." "${_root}/dist/"
44 | docker rm $(docker ps -a -f status=exited -q)
45 | }
46 |
47 | # Create base directory
48 | pushd $(dirname $0) > /dev/null
49 | _root=$(dirname $(dirname $(pwd -P))) # get one directory up from parent to get to root dir
50 | popd > /dev/null
51 |
52 | echo "Building LINUX OS wheels"
53 |
54 | # Positional arg
55 | pyver=$1
56 |
57 | # We no longer explicitly set these... but in the past we did.
58 | if [ -z "$UCS_SETTING" ] || [ "$UCS_SETTING" = "ucs2" ]; then
59 | build_wheel $pyver "x86_64" "ucs2"
60 | elif [ "$UCS_SETTING" = "ucs4" ]; then
61 | build_wheel $pyver "x86_64" "ucs4"
62 | else
63 | echo "Unrecognized UCS_SETTING: ${UCS_SETTING}"
64 | fi
65 |
--------------------------------------------------------------------------------
/build_tools/circle/deploy.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e -x
4 |
5 | pip install twine wheel
6 |
7 | # Check our VERSION. Basically, if it contains letters, it is a pre-release. Otherwise,
8 | # it has to match X.Y or X.Y.Z
9 | #
10 | # On CircleCI, we look for the `v` at the beginning of the version, since we are looking at the tag
11 | if [[ ${CIRCLE_TAG} =~ ^v?[0-9]+\.[0-9]+\.?[0-9]*[a-zA-Z]+[0-9]*$ ]]; then
12 | echo 'Uploading to test pypi'
13 | twine upload --skip-existing --repository-url https://test.pypi.org/legacy/ dist/pmdarima-*
14 | elif [[ ${CIRCLE_TAG} =~ ^v?[0-9]+\.[0-9]+\.?[0-9]*$ ]]; then
15 | echo 'Uploading to production pypi'
16 | twine upload --skip-existing dist/pmdarima-*
17 | else
18 | echo "Malformed tag: ${CIRCLE_TAG}"
19 | exit 1
20 | fi
21 |
--------------------------------------------------------------------------------
/build_tools/circle/dind/README.md:
--------------------------------------------------------------------------------
1 | ## `dind` (Docker in Docker)
2 |
3 | Scripts executed in Docker containers from Machine executors on Circle.
4 |
--------------------------------------------------------------------------------
/build_tools/circle/dind/build_manylinux_wheel.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # EXECUTED IN QUAY DOCKER IMAGE WHERE /io IS A MOUNTED VOLUME OF PMDARIMA ROOT
4 |
5 | # Modify permissions on file
6 | set -e -x
7 |
8 | # Compile wheels
9 | PYTHON="/opt/python/${PYTHON_VERSION}/bin/python"
10 | PIP="/opt/python/${PYTHON_VERSION}/bin/pip"
11 |
12 | # We have to use wheel < 0.32 since they inexplicably removed the open_for_csv
13 | # function from the package after 0.31.1 and it fails for Python 3.6?!
14 | ${PIP} install --upgrade pip wheel==0.31.1
15 | ${PIP} install --upgrade "setuptools>=38.6.0,!=50.0.0"
16 |
17 | # NOW we can install requirements
18 | ${PIP} install -r /io/build_tools/build_requirements.txt
19 | ${PIP} install -r /io/requirements.txt
20 | make -C /io/ PYTHON="${PYTHON}"
21 |
22 | # Make sure the VERSION file is present for this. For whatever reason, the
23 | # make -C call removes it
24 | echo ${PMDARIMA_VERSION} > /io/pmdarima/VERSION
25 | ${PIP} wheel /io/ -w /io/dist/
26 |
27 | # Bundle external shared libraries into the wheels.
28 | for whl in /io/dist/*.whl; do
29 | if [[ "$whl" =~ "pmdarima" ]]; then
30 | auditwheel repair $whl -w /io/dist/ #repair pmdarima wheel and output to /io/dist
31 | fi
32 |
33 | rm $whl # remove wheel
34 | done
35 |
--------------------------------------------------------------------------------
/build_tools/circle/dind/install_and_test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # EXECUTED IN A DOCKER CONTAINER
4 |
5 | set -e
6 |
7 | # Make sure we're in the root PMDARIMA dir (mounted at /io)
8 | cd /io
9 |
10 | make develop
11 | make testing-requirements
12 | make test-unit
13 |
14 | # Upload coverage
15 | codecov || echo "codecov upload failed"
16 |
--------------------------------------------------------------------------------
/build_tools/circle/get_version.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # The version is retrieved from the CIRCLE_TAG. If there is no version, we just
4 | # call it 0.0.0, since we won't be pushing anyways (not master and no tag)
5 | if [[ -n ${CIRCLE_TAG} ]]; then
6 | # We should have the VERSION file on tags now since 'make documentation'
7 | # gets it. If not, we use 0.0.0. There are two cases we ever deploy:
8 | # 1. Master (where version is not used, as we use 'develop'
9 | # 2. Tags (where version IS defined)
10 | echo "On tag"
11 | make version
12 | export version=$(cat pmdarima/VERSION)
13 | else
14 | echo "Not on tag, will use version=0.0.0"
15 | export version="0.0.0"
16 | fi
17 |
--------------------------------------------------------------------------------
/build_tools/circle/test_unit.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e -x
4 |
5 | # Create base directory
6 | pushd $(dirname $0) > /dev/null
7 | rootdir=$(dirname $(dirname $(pwd -P))) # get one directory up from parent to get to root dir
8 | popd > /dev/null
9 |
10 | echo "Installing package from whl file"
11 |
12 | # Construct docker image
13 | pyver=$1
14 | pythonimg="python:${pyver}"
15 |
16 | # Mount root as a volume, execute installation + unit tests within the container
17 | env > vars.env
18 | docker run \
19 | --rm \
20 | -v `pwd`:/io \
21 | --env-file vars.env \
22 | ${pythonimg} \
23 | sh /io/build_tools/circle/dind/install_and_test.sh
24 |
25 | status=$?
26 | exit $status
27 |
--------------------------------------------------------------------------------
/build_tools/get_tag.py:
--------------------------------------------------------------------------------
1 | import os
2 | from os.path import abspath, dirname
3 |
4 | # This file assumes that our tags are always in this format: vX.X.X.
5 | # In that case, we would only want to write X.X.X
6 |
7 | TOP_LEVEL = abspath(dirname(dirname(__file__)))
8 | OUT_FILE = os.path.join(TOP_LEVEL, 'pmdarima', 'VERSION')
9 | DEFAULT_TAG = '0.0.0'
10 |
11 |
12 | def get_version_from_tag(tag):
13 | """Handles 1.5.0 or v1.5.0"""
14 | return tag[1:] if tag.startswith('v') else tag
15 |
16 |
17 | # Circle is easy, since they give us the git tag
18 | if os.getenv('CIRCLECI', False) and os.getenv('CIRCLE_TAG', False):
19 | print('Tagged commit on Circle CI. Writing to {0}'.format(OUT_FILE))
20 | with open(OUT_FILE, 'w') as f:
21 | tag = get_version_from_tag(os.getenv('CIRCLE_TAG'))
22 | f.write(tag)
23 |
24 | elif os.getenv('GITHUB_REF') and \
25 | os.getenv('GITHUB_REF').startswith('refs/tags/'):
26 | print('Tagged commit on Github Actions. Writing to {0}'.format(OUT_FILE))
27 | with open(OUT_FILE, 'w') as f:
28 | tag = os.getenv('GITHUB_REF').split('/')[-1]
29 | f.write(get_version_from_tag(tag))
30 |
31 | # Local or non-tagged commit. setuptools requires a VERSION file, so just write a default one
32 | else:
33 | print("Not a tagged commit or not on CI. Using default tag")
34 | with open(OUT_FILE, 'w') as f:
35 | f.write(DEFAULT_TAG)
36 |
--------------------------------------------------------------------------------
/build_tools/github/deploy.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [[ $(cat ${GITHUB_WORKSPACE}/pmdarima/VERSION) =~ ^[0-9]+\.[0-9]+\.?[0-9]*[a-zA-Z]+[0-9]*$ ]]; then
4 | echo 'Uploading to test pypi'
5 | python -m twine upload --repository-url https://test.pypi.org/legacy/ --skip-existing dist/pmdarima-*
6 | elif [[ $(cat ${GITHUB_WORKSPACE}/pmdarima/VERSION) =~ ^[0-9]+\.[0-9]+\.?[0-9]*$ ]]; then
7 | echo 'Uploading to production pypi'
8 | python -m twine upload --skip-existing dist/pmdarima-*
9 | else
10 | echo "Malformed tag: $(cat ${GITHUB_WORKSPACE}/pmdarima/VERSION)"
11 | exit 1
12 | fi
--------------------------------------------------------------------------------
/build_tools/github/get_latest_dependencies.py:
--------------------------------------------------------------------------------
1 | """This script reads our requirements.txt file and removes the pinned versions"""
2 |
3 | import re
4 | import os
5 | from os.path import abspath, dirname
6 |
7 | TOP_LEVEL = abspath(dirname(dirname(dirname(__file__))))
8 | REQUIREMENTS = os.path.join(TOP_LEVEL, 'requirements.txt')
9 | BUILD_REQUIREMENTS = os.path.join(TOP_LEVEL, 'build_tools', 'build_requirements.txt')
10 |
11 |
12 | def find_latest_dependencies(*requirements_files):
13 | """Given one or more requirements.txt files, strip off any pinned versions
14 |
15 | Parameters
16 | ----------
17 | *requirements_files : str
18 | One or more paths to requirements.txt files to parse
19 |
20 | Returns
21 | -------
22 | requirements : list
23 | List of parsed dependencies without their pinned versions
24 | """
25 | requirements = []
26 | for requirements_file in requirements_files:
27 | with open(requirements_file) as file:
28 | for line in file:
29 | requirement = line.strip()
30 | if line.startswith('#'):
31 | continue
32 | match = re.match(r'^([A-Za-z\-0-9]+)', requirement)
33 | if match.group(0).lower() not in requirements:
34 | requirements.append(match.group(0).lower())
35 |
36 | return requirements
37 |
38 |
39 | requirements = find_latest_dependencies(REQUIREMENTS, BUILD_REQUIREMENTS)
40 | # We print because this is called from a bash script and we need to return a
41 | # space-separated list
42 | print(' '.join(requirements))
43 |
--------------------------------------------------------------------------------
/build_tools/github/test_version_tagging.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 | pip install pathlib
5 |
6 | GITHUB_REF=refs/tags/v0.99.999 python ${GITHUB_WORKSPACE}/build_tools/get_tag.py
7 |
8 | if [[ ! -f ${GITHUB_WORKSPACE}/pmdarima/VERSION ]]; then
9 | echo "Expected VERSION file"
10 | exit 4
11 | fi
12 |
--------------------------------------------------------------------------------
/doc/_static/css/fields.css:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | body {
4 | font-family: Helvetica, Arial, sans-serif !important;
5 | font-style: 100%;
6 | }
7 | */
8 |
9 | table.docutils tr {
10 | border-style: solid none solid none;
11 | border-width: 1px 0 1px 0;
12 | border-color: #ddd;
13 | }
14 |
15 | div.body p, div.body dd, div.body li {
16 | line-height: 1.5em;
17 | }
18 |
19 | div.body {
20 | font-size: 0.9em;
21 | }
22 |
23 | div.body p {
24 | margin-top: 1.2em;
25 | margin-bottom: .1em;
26 | }
27 |
28 | div.body p, div.body dd, div.body li {
29 | line-height: 1.5em;
30 | }
31 |
32 | table.docutils td, table.docutils th {
33 | padding: 1px 8px 1px 5px !important;
34 | border-top: 0;
35 | border-left: 0;
36 | border-right: 0;
37 | border-bottom: 1px solid #aaa;
38 | }
39 |
40 | .field-odd, .field-even {
41 | background-color: #fff;
42 | }
43 |
44 | .field-name {
45 | background-color: #F0F7FA;
46 | }
47 |
48 | .field-body {
49 | background-color: #fff;
50 | }
51 |
52 | th.field-name {
53 | white-space: nowrap;
54 | }
55 |
56 | .field-odd, .field-even {
57 | background-color: #fff;
58 | }
59 |
60 | .field-name {
61 | background-color: #F0F7FA;
62 | }
63 |
64 | .field-body {
65 | background-color: #fff;
66 | }
67 |
68 | h2 > a.reference {
69 | font-weight: bold;
70 | color: #2878A2;
71 | text-decoration: none;
72 | word-wrap: break-word;
73 | font-family: Arial, sans-serif;
74 | }
75 |
76 | a.reference > code.xref {
77 | background: transparent;
78 | border: none;
79 | font-size: 1.1em;
80 | font-family: monospace;
81 | padding: 2px 4px;
82 | color: #2980B9 !important;
83 | }
84 |
--------------------------------------------------------------------------------
/doc/_static/css/gitcontrib.css:
--------------------------------------------------------------------------------
1 | .capped-card {
2 | font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Helvetica, Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol";
3 | font-size: 14px;
4 | line-height: 1.5;
5 | color: #24292e;
6 | list-style: none !important;
7 | }
8 |
9 | .contrib-wrapper {
10 | /* position: absolute; */
11 | display: block;
12 | height: 80px;
13 | width: 450px;
14 | }
15 |
16 | .committer {
17 | text-decoration: none;
18 | color: #0366d6;
19 | }
20 |
21 | .contrib-avatar-wrapper {
22 | position: absolute;
23 | display: block;
24 | margin-left: 0px;
25 | margin-top: 0px;
26 | height: 80px;
27 | width: 80px;
28 | }
29 |
30 | .avatar {
31 | margin-left: 10px;
32 | margin-top: 10px;
33 | }
34 |
35 | img.avatar {
36 | width: 60px;
37 | height: 60px;
38 | }
39 |
40 | .contrib-author-wrapper {
41 | position: absolute;
42 | display: block;
43 | margin-left: 80px;
44 | margin-top: 0px;
45 | width: 310px;
46 | height: 50px;
47 | }
48 |
49 | .contrib-author-wrapper h3 {
50 | margin-left: 5px;
51 | margin-top: 5px;
52 | font-weight: normal;
53 | font-size: 24px;
54 | }
55 |
56 | .rank {
57 | float: right;
58 | font-size: 13px;
59 | color: #586069;
60 | margin-right: 10px;
61 | margin-top: 10px;
62 | }
63 |
64 | .contrib-stats-wrapper {
65 | position: absolute;
66 | display: block;
67 | margin-left: 80px;
68 | margin-top: 50px;
69 | width: 370px;
70 | height: 30px;
71 | }
72 |
73 | .contrib-stats {
74 | position: absolute;
75 | display: block;
76 | margin-left: 80px;
77 | margin-right: 20px;
78 | margin-top: 10px;
79 | width: 350px;
80 | height: 25px;
81 | }
82 |
83 | .ameta {
84 | margin-left: 5px;
85 | }
86 |
87 | .cmt {
88 | color: #586069;
89 | text-decoration: none;
90 | }
91 |
92 | .a {
93 | color: #28a745;
94 | }
95 |
96 | .d {
97 | color: #cb2431;
98 | }
--------------------------------------------------------------------------------
/doc/_static/js/contrib.js:
--------------------------------------------------------------------------------
1 | function commaFmt(x) {
2 | return x.toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",");
3 | }
4 |
5 | function fetchContributors() {
6 | $.getJSON("https://api.github.com/repos/alkaline-ml/pmdarima/stats/contributors", function(arr) {
7 | // sort the array based on total count
8 | arr.sort(function(a, b) {
9 | var aTotal = a['total'];
10 | var bTotal = b['total'];
11 |
12 | // reverse for desc
13 | return (aTotal > bTotal) ? -1 : (bTotal > aTotal) ? 1 : 0;
14 | });
15 |
16 | $.each(arr, function(i, obj) {
17 | var total = obj['total'];
18 | var adds = 0;
19 | var dels = 0;
20 |
21 | // get the counts of adds/deletes
22 | $.each(obj['weeks'], function(wk, weekData) {
23 | adds += weekData['a'];
24 | dels += weekData['d'];
25 | });
26 |
27 | var authorJSON = obj['author'];
28 | var authorLogin = authorJSON['login'];
29 | var authorURL = authorJSON['html_url'];
30 | var avatarURL = authorJSON['avatar_url'] + '&s=60';
31 | var p = (total > 1) ? 's' : '';
32 |
33 | // Add HTML elements to the ol element below
34 | var li = $('' +
35 | '' +
36 | '
' +
37 | '

' +
38 | '
' +
39 | '
' +
42 | '
' +
43 | '#' + (i + 1).toString() + '' +
44 | '
' +
45 | '
' +
54 | '
' +
55 | '')
56 |
57 | // can only do this once the doc is ready
58 | $('#contrib').append(li);
59 | });
60 | });
61 | }
62 |
--------------------------------------------------------------------------------
/doc/_templates/class.rst:
--------------------------------------------------------------------------------
1 | :mod:`{{module}}`.{{objname}}
2 | {{ underline }}==============
3 |
4 | .. currentmodule:: {{ module }}
5 |
6 | .. autoclass:: {{ objname }}
7 |
8 | {% block methods %}
9 | .. automethod:: __init__
10 | {% endblock %}
11 |
12 | .. include:: {{module}}.{{objname}}.examples
13 |
14 | .. raw:: html
15 |
16 |
--------------------------------------------------------------------------------
/doc/_templates/class_with_call.rst:
--------------------------------------------------------------------------------
1 | :mod:`{{module}}`.{{objname}}
2 | {{ underline }}===============
3 |
4 | .. currentmodule:: {{ module }}
5 |
6 | .. autoclass:: {{ objname }}
7 |
8 | {% block methods %}
9 | .. automethod:: __init__
10 | .. automethod:: __call__
11 | {% endblock %}
12 |
13 | .. include:: {{module}}.{{objname}}.examples
14 |
15 | .. raw:: html
16 |
17 |
--------------------------------------------------------------------------------
/doc/_templates/class_without_init.rst:
--------------------------------------------------------------------------------
1 | :mod:`{{module}}`.{{objname}}
2 | {{ underline }}==============
3 |
4 | .. currentmodule:: {{ module }}
5 |
6 | .. autoclass:: {{ objname }}
7 |
8 | .. include:: {{module}}.{{objname}}.examples
9 |
10 | .. raw:: html
11 |
12 |
--------------------------------------------------------------------------------
/doc/_templates/function.rst:
--------------------------------------------------------------------------------
1 | :mod:`{{module}}`.{{objname}}
2 | {{ underline }}====================
3 |
4 | .. currentmodule:: {{ module }}
5 |
6 | .. autofunction:: {{ objname }}
7 |
8 | .. include:: {{module}}.{{objname}}.examples
9 |
10 | .. raw:: html
11 |
12 |
--------------------------------------------------------------------------------
/doc/_templates/numpydoc_docstring.rst:
--------------------------------------------------------------------------------
1 | {{index}}
2 | {{summary}}
3 | {{extended_summary}}
4 | {{parameters}}
5 | {{returns}}
6 | {{yields}}
7 | {{other_parameters}}
8 | {{attributes}}
9 | {{raises}}
10 | {{warns}}
11 | {{warnings}}
12 | {{see_also}}
13 | {{notes}}
14 | {{references}}
15 | {{examples}}
16 | {{methods}}
17 |
--------------------------------------------------------------------------------
/doc/about.rst:
--------------------------------------------------------------------------------
1 | .. _about:
2 |
3 | =================
4 | About the project
5 | =================
6 |
7 | ``pmdarima`` is designed to behave as similarly to R's well-known
8 | `auto.arima `_
9 | as possible.
10 |
11 | The project emerged as a result of a long-standing personal debate between
12 | my colleagues and `me `_ about why python is
13 | vastly superior to R. Since R's forecasting capabilities far superseded those of Python's
14 | existing libraries, ``pmdarima`` was created to close that gap and give analysts/researchers
15 | one less reason why R is a viable language for practical machine learning.
16 |
17 | *(Of course, take my soapbox speech with a grain of salt... I once was an R addict but am now recovering)*
18 |
19 |
20 | The name...
21 | -----------
22 |
23 | The name "pyramid" originally was the result of an anagram between the "py" prefix and
24 | the characters needed to spell "arima". However, the popular web framework sharing the
25 | same name caused a `namespace collision `_
26 | and the package has since been renamed ``pmdarima``. You may still see it referred to interchangeably
27 | throughout the doc as "pyramid".
28 |
29 |
30 | How it works
31 | ------------
32 |
33 | ``pmdarima`` is essentially a Python & Cython wrapper of several different statistical
34 | and machine learning libraries (statsmodels and scikit-learn), and operates by generalizing
35 | all ARIMA models into a single class (unlike statsmodels).
36 |
37 | It does this by wrapping the respective statsmodels interfaces
38 | (``ARMA``, ``ARIMA`` and ``SARIMAX``) inside the ``pmdarima.ARIMA`` class,
39 | and as a result there is a bit of monkey patching that happens beneath the hood.
40 |
41 | How ``auto_arima`` works
42 | ~~~~~~~~~~~~~~~~~~~~~~~~
43 |
44 | The ``auto_arima`` function itself operates a bit like a grid search, in that it
45 | tries various sets of ``p`` and ``q`` (also ``P`` and ``Q`` for seasonal models)
46 | parameters, selecting the model that minimizes the AIC (or BIC, or whatever
47 | information criterion you select). To select the differencing terms, ``auto_arima``
48 | uses a test of stationarity (such as an augmented Dickey-Fuller test) and seasonality
49 | (such as the Canova-Hansen test) for seasonal models.
50 |
51 | For more in-depth information on the process by which ``auto_arima`` selects
52 | the best model, check out the :ref:`tips_and_tricks` section.
53 |
54 | Feedback
55 | --------
56 |
57 | This is an open-source (read: *FREE*) project. That means several things:
58 |
59 | * It is not infallible
60 | * It's a community effort
61 | * Making demands doesn't go over well
62 |
63 | I know that there are those who have built models with pmdarima as a tool
64 | to support their work. I also know that people can depend on the functionality of
65 | this library in order to do their job well. And for that, I'm committed to
66 | keeping things running smoothly.
67 |
68 | However, as I'm the sole maintainer, things can sometimes stack up.
69 | Please feel free to make pull requests (see :ref:`contrib`), file issues, and
70 | make feature requests. But note the third point: :ref:`contributors` to this
71 | project do it for fun. Let's keep it cordial.
72 |
73 | **If you encounter any issues in the project, please see the** :ref:`filing_bugs` **section for how to file an issue.**
74 |
--------------------------------------------------------------------------------
/doc/citing.rst:
--------------------------------------------------------------------------------
1 | .. _citing:
2 |
3 | ======
4 | Citing
5 | ======
6 |
7 | If you would like to include ``pmdarima`` in your published work, please cite it as follows:
8 |
9 | .. raw:: html
10 |
11 |
30 |
31 | BibTeX Entry:
32 |
33 | .. code-block:: tex
34 |
35 | @MISC {pmdarima,
36 | author = {Taylor G. Smith and others},
37 | title = {{pmdarima}: ARIMA estimators for {Python}},
38 | year = {2017--},
39 | url = "http://www.alkaline-ml.com/pmdarima",
40 | note = {[Online; accessed ]}
41 | }
42 |
--------------------------------------------------------------------------------
/doc/contributors.rst:
--------------------------------------------------------------------------------
1 | .. _contributors:
2 |
3 | ============
4 | Contributors
5 | ============
6 |
7 | Thanks to the following users for their contributions to pmdarima!
8 |
9 | .. raw:: html
10 |
11 |
12 |
13 |
14 |
15 |
16 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/doc/img/bad_issue.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/doc/img/bad_issue.png
--------------------------------------------------------------------------------
/doc/img/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/doc/img/favicon.ico
--------------------------------------------------------------------------------
/doc/img/good_issue.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/doc/img/good_issue.png
--------------------------------------------------------------------------------
/doc/img/lynx_autocorr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/doc/img/lynx_autocorr.png
--------------------------------------------------------------------------------
/doc/img/m_matters.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/doc/img/m_matters.png
--------------------------------------------------------------------------------
/doc/img/stock_forecasts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/doc/img/stock_forecasts.png
--------------------------------------------------------------------------------
/doc/img/stock_lag_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/doc/img/stock_lag_plot.png
--------------------------------------------------------------------------------
/doc/img/sunspots/bc-transformed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/doc/img/sunspots/bc-transformed.png
--------------------------------------------------------------------------------
/doc/img/sunspots/log-transformed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/doc/img/sunspots/log-transformed.png
--------------------------------------------------------------------------------
/doc/img/sunspots/model-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/doc/img/sunspots/model-1.png
--------------------------------------------------------------------------------
/doc/img/sunspots/model-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/doc/img/sunspots/model-2.png
--------------------------------------------------------------------------------
/doc/img/sunspots/untransformed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/doc/img/sunspots/untransformed.png
--------------------------------------------------------------------------------
/doc/includes/api_css.rst:
--------------------------------------------------------------------------------
1 | ..
2 | File to ..include in the API ref document.
3 |
4 | .. raw:: html
5 |
6 |
30 |
--------------------------------------------------------------------------------
/doc/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=python -msphinx
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | set SPHINXPROJ=pmdarima
13 |
14 | if "%1" == "" goto help
15 |
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | echo.
19 | echo.The Sphinx module was not found. Make sure you have Sphinx installed,
20 | echo.then set the SPHINXBUILD environment variable to point to the full
21 | echo.path of the 'sphinx-build' executable. Alternatively you may add the
22 | echo.Sphinx directory to PATH.
23 | echo.
24 | echo.If you don't have Sphinx installed, grab it from
25 | echo.http://sphinx-doc.org/
26 | exit /b 1
27 | )
28 |
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 |
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 |
35 | :end
36 | popd
37 |
--------------------------------------------------------------------------------
/doc/migration-guide.rst:
--------------------------------------------------------------------------------
1 | .. _migration:
2 |
3 | ============================
4 | ``pmdarima`` Migration guide
5 | ============================
6 |
7 | In `issue #34 `_ we made the
8 | decision to migrate from the ``pyramid-arima`` namespace to the ``pmdarima``
9 | namespace to avoid collisions with the web framework named ``pyramid``.
10 |
11 | Migration is simple:
12 |
13 | .. code-block:: bash
14 |
15 | $ pip install pmdarima
16 |
17 | Rather that importing functions and modules from the ``pyramid`` package, simply
18 | import from ``pmdarima`` instead:
19 |
20 | .. code-block:: python
21 |
22 | from pmdarima.arima import auto_arima
23 |
24 | Or just import it as a namespace:
25 |
26 | .. code-block:: python
27 |
28 | import pmdarima as pm
29 | my_model = pm.auto_arima(my_timeseries)
30 |
31 | For further installation instructions, check out the :ref:`setup` and :ref:`quickstart` guides.
32 |
--------------------------------------------------------------------------------
/doc/no-successful-model.rst:
--------------------------------------------------------------------------------
1 | .. _no_successful_model:
2 |
3 | ===================================
4 | When no viable models can be found
5 | ===================================
6 |
7 | For certain time series, the search may return no viable models::
8 |
9 | Traceback (most recent call last):
10 | File "", line 1, in
11 | "Could not successfully fit a viable ARIMA model "
12 | ValueError: Could not successfully fit a viable ARIMA model to input data.
13 | See http://alkaline-ml.com/pmdarima/no-successful-model.html for more information on why this can happen.
14 |
15 |
16 | This can happen for a number of reasons:
17 |
18 | * Most commonly, the roots of your model may be nearly non-invertible, meaning the inverted roots
19 | lie too close to the unit circle. Here's a good `blog post `_
20 | on the subject. Make sure ``trace`` is truthy in order to see these warnings when fitting your model.
21 |
22 | * Sometimes, your data may not be stationary and can raise errors from statsmodels when fitting. In this case,
23 | the stepwise algorithm will filter out problem model fits. This can arise in a number of situations, ranging
24 | from non-stationarity to actual code errors. Setting ``error_action='trace'`` will log the stacktraces of
25 | any errors encountered during the search.
26 |
27 | * Your input data may not be suitable for ARIMA modeling. For instance, it could be a simple polynomial
28 | or solved by linear regression (i.e., differencing the time series has made it perfectly constant).
29 |
30 | Make sure to set ``trace`` to at least 1 in order to see the search progress, and to a value >1 to see the
31 | maximum trace logging available. If you still cannot diagnose why you are getting this error message, consider
32 | :ref:`filing_bugs`.
33 |
--------------------------------------------------------------------------------
/doc/refreshing.rst:
--------------------------------------------------------------------------------
1 | .. _refreshing:
2 |
3 | ============================
4 | Refreshing your ARIMA models
5 | ============================
6 |
7 | There are two ways to keep your models up-to-date with pmdarima:
8 |
9 | 1. Periodically, your ARIMA will need to be refreshed given new observations. See
10 | `this discussion `_
11 | and `this one `_
12 | on either re-using ``auto_arima``-estimated order terms or re-fitting altogether.
13 |
14 | 2. If you're not ready to totally refresh your model parameters, but would like to add observations to
15 | your model (so new forecasts originate from the latest samples) with minor parameter updates, the ARIMA class makes it
16 | possible to `add new samples <./modules/generated/pmdarima.arima.ARIMA.html#pmdarima.arima.ARIMA.update>`_.
17 | See `this example `_
18 | for more info.
19 |
20 |
21 | Updating your model with new observations
22 | -----------------------------------------
23 |
24 | The easiest way to keep your model up-to-date without completely refitting it is simply to
25 | update your model with new observations so that future forecasts take the newest observations
26 | into consideration. Assume that you fit the following model:
27 |
28 | .. code-block:: python
29 |
30 | import pmdarima as pm
31 | from pmdarima.datasets import load_wineind
32 |
33 | y = load_wineind()
34 | train, test = y[:125], y[125:]
35 |
36 | # Fit an ARIMA
37 | arima = pm.ARIMA(order=(1, 1, 2), seasonal_order=(0, 1, 1, 12))
38 | arima.fit(y)
39 |
40 | After fitting and persisting your model (see :ref:`serializing`), you use your model
41 | to produce forecasts. After a few forecasts, you want to record the *actual* observed
42 | values so your model considers them when making newer forecasts:
43 |
44 | .. code-block:: python
45 |
46 | arima.update(test) # pretend these are the new ones
47 |
48 | Your model will now produce forecasts from the *new* latest observations. Of course,
49 | you'll have to re-persist your ARIMA model after updating it! Internally, this step
50 | uses the existing parameters, taking a small amount of steps and allowing MLE to
51 | update your parameters a small amount. You can pass the ``maxiter`` to control the
52 | amount your model updates.
53 |
--------------------------------------------------------------------------------
/doc/rfc/372-exog-to-x.rst:
--------------------------------------------------------------------------------
1 | .. _exog_to_X:
2 |
3 | ===========================
4 | RFC: ``exogenous`` -> ``X``
5 | ===========================
6 |
7 | This RFC proposes the renaming of the ``exogenous`` arg to ``X``. While this would
8 | impact the public API, we would allow the current ``exogenous`` argument to persist
9 | for several minor release cycles with a deprecation warning before completely removing it
10 | in the next major release (2.0).
11 |
12 | Why?
13 | ----
14 |
15 | * **It's typo-prone**. We've received several issues lately with people asking why the ``exogenous``
16 | argument was not doing anything. Upon close inspection, it was evident they were misspelling the
17 | arg as "exogeneous", and the presence of ``**kwargs`` in the function signature allowed
18 | the argument through without raising a ``TypeError``.
19 |
20 | * **It's clunky**. Typing ``exogenous`` when other APIs have simplified this to the ubiquitous
21 | ``X`` used in other scikit-style packages (scikit-learn, scikit-image, sktime) seems like
22 | a slightly annoying, arbitrary difference in signature definitions that keeps us from
23 | matching the signatures of other similar packages.
24 |
25 | * **It can be confusing**. Not all of our user base is familiar with the classical statistics
26 | terminology and may not realize what this argument permits them. Conversely, nearly all
27 | users are familiar with the idea of what ``X`` allows them.
28 |
29 | How?
30 | ----
31 |
32 | For a while, we'd allow the ``exogenous`` argument to be passed in ``**kwargs``, and would simply
33 | warn if it were present. For example:
34 |
35 | .. code-block:: python
36 |
37 | def fit(self, y, X=None, **kwargs):
38 | if X is None:
39 | X = kwargs.pop("exogenous", None)
40 | if X is not None:
41 | warnings.warn("`exogenous` is deprecated and will raise an error "
42 | "in version 2.0 - Use the `X` arg instead",
43 | DeprecationWarning)
44 |
45 | This would ensure backwards compatibility for several minor release cycles before the
46 | change was made, and would give sufficient time to users to switch over to the new naming scheme.
47 |
48 | Precedent
49 | ---------
50 |
51 | Scikit-learn has made similar package naming decisions in the name of package consistency and ubiquity,
52 | notably in migrating the ``cross_validation`` namespace to the ``model_selection`` namespace in version
53 | 0.18. This was preceded by several minor releases that warned on imports.
54 |
--------------------------------------------------------------------------------
/doc/rfc/index.rst:
--------------------------------------------------------------------------------
1 | .. _rfc:
2 |
3 | =============
4 | pmdarima RFCs
5 | =============
6 |
7 | An RFC, or "request for comments," is a common practice in open source packages, and
8 | allows users and contributors to weigh in on a proposal that will fundamentally alter
9 | the public API (usually encompassing breaking changes or design decisions). All ``pmdarima``
10 | RFCs will be included for future users of the package to read through so that decision-making
11 | is transparent and makes sense to all users.
12 |
13 | .. toctree::
14 | :maxdepth: 2
15 | :hidden:
16 |
17 | Renaming the "exogenous" argument <./372-exog-to-x.rst>
18 |
--------------------------------------------------------------------------------
/doc/seasonal-differencing-issues.rst:
--------------------------------------------------------------------------------
1 | .. _seasonal_differencing_issues:
2 |
3 | ============================================
4 | Encountering issues in seasonal differencing
5 | ============================================
6 |
7 | For certain time series, the seasonal differencing operation may fail::
8 |
9 | Traceback (most recent call last):
10 | File "", line 1, in
11 | "Could not successfully fit a viable ARIMA model "
12 | There are no more samples after a first-order seasonal differencing. See
13 | http://alkaline-ml.com/pmdarima/seasonal-differencing-issues.html for a
14 | more in-depth explanation and potential work-arounds.
15 |
16 |
17 | In short, the seasonal differencing test has detected your time series could benefit
18 | from a non-zero seasonal differencing term, ``D``, but your data is exhausted after
19 | differencing it by ``m``. Basically, your dataset is too small to be differenced by ``m``.
20 | You only have several options as a work-around here:
21 |
22 | * Use a larger training set.
23 |
24 | * Determine whether or not you've set the appropriate ``m``. Should it be smaller? See
25 | :ref:`period` for more information on the topic.
26 |
27 | * Manually set ``D=0`` in the :func:`pmdarima.arima.auto_arima` call. This is the least
28 | desirable solution, since it skips a step that could lead to a better model.
29 |
30 | The best decision is always to use a larger training set, but sometimes that simply
31 | is not possible. Make sure to set ``trace`` to at least 1 in order to see the search progress, and to a value >1 to see the
32 | maximum trace logging available. If you still cannot diagnose why you are getting this error message, consider
33 | :ref:`filing_bugs`.
34 |
--------------------------------------------------------------------------------
/doc/serialization.rst:
--------------------------------------------------------------------------------
1 | .. _serializing:
2 |
3 | =============================
4 | Serializing your ARIMA models
5 | =============================
6 |
7 | After you've fit your model and you're ready to start making predictions out
8 | in your production environment, it's time to save your ARIMA to disk.
9 | Pmdarima models can be serialized with ``pickle`` or ``joblib``, just as with
10 | most other python objects:
11 |
12 | .. code-block:: python
13 |
14 | from pmdarima.arima import auto_arima
15 | from pmdarima.datasets import load_lynx
16 | import numpy as np
17 |
18 | # For serialization:
19 | import joblib
20 | import pickle
21 |
22 | # Load data and fit a model
23 | y = load_lynx()
24 | arima = auto_arima(y, seasonal=True)
25 |
26 | # Serialize with Pickle
27 | with open('arima.pkl', 'wb') as pkl:
28 | pickle.dump(arima, pkl)
29 |
30 | # You can still make predictions from the model at this point
31 | arima.predict(n_periods=5)
32 |
33 | # Now read it back and make a prediction
34 | with open('arima.pkl', 'rb') as pkl:
35 | pickle_preds = pickle.load(pkl).predict(n_periods=5)
36 |
37 | # Or maybe joblib tickles your fancy
38 | joblib.dump(arima, 'arima.pkl')
39 | joblib_preds = joblib.load('arima.pkl').predict(n_periods=5)
40 |
41 | # show they're the same
42 | np.allclose(pickle_preds, joblib_preds)
43 |
44 |
--------------------------------------------------------------------------------
/doc/setup.rst:
--------------------------------------------------------------------------------
1 | .. _setup:
2 |
3 | =====
4 | Setup
5 | =====
6 |
7 | Pmdarima depends on several prominent python packages:
8 |
9 | * `Numpy `_ (>=1.17.3)
10 | * `SciPy `_ (>=1.3.2)
11 | * `Scikit-learn `_ (>=0.22)
12 | * `Pandas `_ (>=0.19)
13 | * `Statsmodels `_ (>=0.11)
14 |
15 | Install from PyPi
16 | -----------------
17 |
18 | Pmdarima is on pypi under the package name ``pmdarima`` and can be
19 | downloaded via ``pip``:
20 |
21 | .. code-block:: bash
22 |
23 | $ pip install pmdarima
24 |
25 | Pmdarima uses Cython, which means there is some C source that was built in
26 | the distribution process. To ensure the package was built correctly, import
27 | the following module in python:
28 |
29 | .. code-block:: python
30 |
31 | from pmdarima.arima import auto_arima
32 |
33 | If you encounter an ``ImportError``, try updating numpy and re-installing. Outdated
34 | numpy versions have been observed to break the pmdarima build.
35 |
36 | Install from Conda
37 | ------------------
38 |
39 | Pmdarima is on conda under the channel ``conda-forge`` and package name ``pmdarima``. It can be downloaded via ``conda``
40 | like so:
41 |
42 | .. code-block:: bash
43 |
44 | $ conda config --add channels conda-forge
45 | $ conda config --set channel_priority strict
46 | $ conda install pmdarima
47 |
48 | **Note:** We do not maintain our own Conda binaries, they are maintained at https://github.com/conda-forge/pmdarima-feedstock.
49 | See that repo for further documentation on working with Pmdarima on Conda.
50 |
51 | Build from source
52 | -----------------
53 |
54 | If you'd like to install a development or bleeding edge version of pmdarima,
55 | you can always build it from the git source. First clone it from Git:
56 |
57 | .. code-block:: bash
58 |
59 | $ git clone https://github.com/alkaline-ml/pmdarima.git
60 | $ cd pmdarima
61 |
62 | Building the package will require ``gcc`` (unix) or a Windows equivalent, like
63 | ``MinGW``. To build in development mode (for running unit tests):
64 |
65 | .. code-block:: bash
66 |
67 | $ python setup.py develop
68 |
69 | You can also use the ``Makefile`` if you're on a posix machine:
70 |
71 | .. code-block:: bash
72 |
73 | $ make develop
74 |
75 | Alternatively, to install the package in your ``site-packages``:
76 |
77 | .. code-block:: bash
78 |
79 | $ python setup.py install
80 |
81 | Or, with the ``Makefile``:
82 |
83 | .. code-block:: bash
84 |
85 | $ make install
86 |
--------------------------------------------------------------------------------
/doc/sg_execution_times.rst:
--------------------------------------------------------------------------------
1 |
2 | :orphan:
3 |
4 | .. _sphx_glr_sg_execution_times:
5 |
6 |
7 | Computation times
8 | =================
9 | **00:11.856** total execution time for 13 files **from all galleries**:
10 |
11 | .. container::
12 |
13 | .. raw:: html
14 |
15 |
19 |
20 |
21 |
22 |
27 |
28 | .. list-table::
29 | :header-rows: 1
30 | :class: table table-striped sg-datatable
31 |
32 | * - Example
33 | - Time
34 | - Mem (MB)
35 | * - :ref:`sphx_glr_auto_examples_preprocessing_example_date_featurizer.py` (``../examples/preprocessing/example_date_featurizer.py``)
36 | - 00:03.490
37 | - 0.0
38 | * - :ref:`sphx_glr_auto_examples_example_simple_fit.py` (``../examples/example_simple_fit.py``)
39 | - 00:03.400
40 | - 0.0
41 | * - :ref:`sphx_glr_auto_examples_model_selection_example_cross_val_predict.py` (``../examples/model_selection/example_cross_val_predict.py``)
42 | - 00:01.976
43 | - 0.0
44 | * - :ref:`sphx_glr_auto_examples_example_pipeline.py` (``../examples/example_pipeline.py``)
45 | - 00:00.937
46 | - 0.0
47 | * - :ref:`sphx_glr_auto_examples_model_selection_example_cross_validation.py` (``../examples/model_selection/example_cross_validation.py``)
48 | - 00:00.775
49 | - 0.0
50 | * - :ref:`sphx_glr_auto_examples_arima_example_persisting_a_model.py` (``../examples/arima/example_persisting_a_model.py``)
51 | - 00:00.602
52 | - 0.0
53 | * - :ref:`sphx_glr_auto_examples_utils_example_tsdisplay.py` (``../examples/utils/example_tsdisplay.py``)
54 | - 00:00.233
55 | - 0.0
56 | * - :ref:`sphx_glr_auto_examples_arima_example_add_new_samples.py` (``../examples/arima/example_add_new_samples.py``)
57 | - 00:00.198
58 | - 0.0
59 | * - :ref:`sphx_glr_auto_examples_arima_example_seasonal_decomposition.py` (``../examples/arima/example_seasonal_decomposition.py``)
60 | - 00:00.140
61 | - 0.0
62 | * - :ref:`sphx_glr_auto_examples_arima_example_auto_arima.py` (``../examples/arima/example_auto_arima.py``)
63 | - 00:00.102
64 | - 0.0
65 | * - :ref:`sphx_glr_auto_examples_datasets_example_load_data.py` (``../examples/datasets/example_load_data.py``)
66 | - 00:00.001
67 | - 0.0
68 | * - :ref:`sphx_glr_auto_examples_utils_example_array_differencing.py` (``../examples/utils/example_array_differencing.py``)
69 | - 00:00.001
70 | - 0.0
71 | * - :ref:`sphx_glr_auto_examples_utils_example_array_concatenation.py` (``../examples/utils/example_array_concatenation.py``)
72 | - 00:00.000
73 | - 0.0
74 |
--------------------------------------------------------------------------------
/doc/sphinxext/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include tests *.py
2 | include *.txt
--------------------------------------------------------------------------------
/doc/sphinxext/github_link.py:
--------------------------------------------------------------------------------
1 | from operator import attrgetter
2 | import inspect
3 | import subprocess
4 | import os
5 | import sys
6 | from functools import partial
7 |
8 | REVISION_CMD = 'git rev-parse --short HEAD'
9 |
10 |
11 | def _get_git_revision():
12 | try:
13 | revision = subprocess.check_output(REVISION_CMD.split()).strip()
14 | except (subprocess.CalledProcessError, OSError):
15 | print('Failed to execute git to get revision')
16 | return None
17 | return revision.decode('utf-8')
18 |
19 |
20 | def _linkcode_resolve(domain, info, package, url_fmt, revision):
21 | """Determine a link to online source for a class/method/function
22 |
23 | This is called by sphinx.ext.linkcode
24 |
25 | An example with a long-untouched module that everyone has
26 | >>> _linkcode_resolve('py', {'module': 'tty',
27 | ... 'fullname': 'setraw'},
28 | ... package='tty',
29 | ... url_fmt='http://hg.python.org/cpython/file/'
30 | ... '{revision}/Lib/{package}/{path}#L{lineno}',
31 | ... revision='xxxx')
32 | 'http://hg.python.org/cpython/file/xxxx/Lib/tty/tty.py#L18'
33 | """
34 |
35 | if revision is None:
36 | return
37 | if domain not in ('py', 'pyx'):
38 | return
39 | if not info.get('module') or not info.get('fullname'):
40 | return
41 |
42 | class_name = info['fullname'].split('.')[0]
43 | if type(class_name) != str:
44 | # Python 2 only
45 | class_name = class_name.encode('utf-8')
46 | module = __import__(info['module'], fromlist=[class_name])
47 | obj = attrgetter(info['fullname'])(module)
48 |
49 | try:
50 | fn = inspect.getsourcefile(obj)
51 | except Exception:
52 | fn = None
53 | if not fn:
54 | try:
55 | fn = inspect.getsourcefile(sys.modules[obj.__module__])
56 | except Exception:
57 | fn = None
58 | if not fn:
59 | return
60 |
61 | fn = os.path.relpath(fn,
62 | start=os.path.dirname(__import__(package).__file__))
63 | try:
64 | lineno = inspect.getsourcelines(obj)[1]
65 | except Exception:
66 | lineno = ''
67 | return url_fmt.format(revision=revision, package=package,
68 | path=fn, lineno=lineno)
69 |
70 |
71 | def make_linkcode_resolve(package, url_fmt):
72 | """Returns a linkcode_resolve function for the given URL format
73 |
74 | revision is a git commit reference (hash or name)
75 |
76 | package is the name of the root module of the package
77 |
78 | url_fmt is along the lines of ('https://github.com/USER/PROJECT/'
79 | 'blob/{revision}/{package}/'
80 | '{path}#L{lineno}')
81 | """
82 | revision = _get_git_revision()
83 | return partial(_linkcode_resolve, revision=revision, package=package,
84 | url_fmt=url_fmt)
85 |
--------------------------------------------------------------------------------
/doc/usecases.rst:
--------------------------------------------------------------------------------
1 | .. title:: Use cases
2 |
3 | .. _use_cases:
4 |
5 | =========
6 | Use cases
7 | =========
8 |
9 | Here are some easy-to-follow, common use cases for why you might use pmdarima
10 | in your forecasting work.
11 |
12 | .. raw:: html
13 |
14 |
15 |
16 | .. toctree::
17 |
18 | usecases/stocks.rst
19 | usecases/sun-spots.rst
20 |
21 | .. raw:: html
22 |
23 |
24 |
--------------------------------------------------------------------------------
/doc/user_guide.rst:
--------------------------------------------------------------------------------
1 | .. title:: User guide: contents
2 |
3 | .. _user_guide:
4 |
5 | ==========
6 | User Guide
7 | ==========
8 |
9 | The following guides cover how to get started with a pmdarima distribution. The
10 | easiest solution is simply installing from PyPi, but if you'd like to
11 | contribute you'll need to be able to build from source, as laid out in the
12 | :ref:`setup` section.
13 |
14 | .. raw:: html
15 |
16 |
17 |
18 | .. toctree::
19 | :numbered:
20 | :maxdepth: 2
21 |
22 | about.rst
23 | setup.rst
24 | quickstart.rst
25 | serialization.rst
26 | refreshing.rst
27 | tips_and_tricks.rst
28 | no-successful-model.rst
29 | seasonal-differencing-issues.rst
30 | modules/datasets.rst
31 | usecases.rst
32 | contributing.rst
33 | contributors.rst
34 | citing.rst
35 |
36 | .. raw:: html
37 |
38 |
39 |
--------------------------------------------------------------------------------
/etc/downloads_badges.py:
--------------------------------------------------------------------------------
1 | from datetime import date, timedelta
2 | import json
3 | import math
4 | import os
5 | import requests
6 | from statistics import mean
7 |
8 |
9 | def millify(n):
10 | """Abbreviate a number to nearest thousand, million, etc.
11 |
12 | Adapted from: https://stackoverflow.com/a/3155023/10696164
13 |
14 | Parameters
15 | ----------
16 | n : int
17 | The number to abbreviate
18 |
19 | Returns
20 | -------
21 | millified : str
22 | The number abbreviated to the nearest thousand, million, etc.
23 | """
24 | millnames = ['', 'k', 'M', 'B', 'T']
25 | n = float(n)
26 | millidx = max(
27 | 0,
28 | min(
29 | len(millnames) - 1,
30 | int(math.floor(0 if n == 0 else math.log10(abs(n)) / 3))
31 | )
32 | )
33 | final_num = float(n / 10 ** (3 * millidx))
34 | one_decimal = round(final_num, 1)
35 |
36 | # If the number is in the millions, and has a decimal, we want to show one
37 | # decimal. I.e.:
38 | # - 967123 -> 967k
39 | # - 1000123 -> 1M
40 | # - 1100123 -> 1.1M
41 | final_output = one_decimal if n > 1e6 and not one_decimal.is_integer() else int(round(final_num, 0))
42 |
43 | return f'{final_output}{millnames[millidx]}'
44 |
45 |
46 | def get_default_value(downloads):
47 | """Find the default value (one day's worth of downloads) for a given input
48 |
49 | Parameters
50 | ----------
51 | downloads : dict
52 | A dict of dates and downloads on that day
53 |
54 | Returns
55 | -------
56 | default_value : int
57 | The default value, which is the average of the last 7 days of downloads
58 | that are contained in the input dictionary.
59 | """
60 | last_7_keys = sorted(downloads.keys())[-7:]
61 | default_value = int(mean([downloads[key] for key in last_7_keys]))
62 | return default_value
63 |
64 |
65 | # Used to calculate downloads for the last week
66 | today = date.today()
67 | last_week = today - timedelta(days=7)
68 | DATE_FORMAT = '%Y-%m-%d'
69 |
70 | # Open a session to save time
71 | session = requests.Session()
72 |
73 | # Get the data for both the legacy namespace and our current one
74 | pyramid_arima = json.loads(session.get('https://api.pepy.tech/api/projects/pyramid-arima').text)
75 | pmdarima = json.loads(session.get('https://api.pepy.tech/api/projects/pmdarima').text)
76 |
77 | # Sum up pmdarima and pyramid-arima downloads to the past week
78 | pmdarima_downloads = 0
79 | default_pmdarima_value = get_default_value(pmdarima['downloads'])
80 | for i in range(7):
81 | pmdarima_downloads += pmdarima['downloads'].get(
82 | (last_week + timedelta(days=i)).strftime(DATE_FORMAT),
83 | default_pmdarima_value
84 | )
85 |
86 | pyramid_arima_downloads = 0
87 | default_pyramid_arima_value = get_default_value(pyramid_arima['downloads'])
88 | for i in range(7):
89 | pyramid_arima_downloads += pyramid_arima['downloads'].get(
90 | (last_week + timedelta(days=i)).strftime(DATE_FORMAT),
91 | default_pyramid_arima_value
92 | )
93 |
94 | # Millify the totals
95 | total_downloads = millify(pyramid_arima['total_downloads'] + pmdarima['total_downloads'])
96 | weekly_downloads = millify(pmdarima_downloads + pyramid_arima_downloads)
97 |
98 | data = {
99 | 'total': total_downloads,
100 | 'weekly': weekly_downloads
101 | }
102 |
103 | request = session.post(
104 | url='https://store.zapier.com/api/records',
105 | headers={
106 | 'X-Secret': os.getenv('ZAPIER_SHA')
107 | },
108 | data=json.dumps(data)
109 | )
110 | request.raise_for_status()
111 |
112 | print(f"""
113 | New total downloads: {data['total']}
114 | New weekly downloads: {data['weekly']}
115 | """)
116 |
--------------------------------------------------------------------------------
/examples/README.txt:
--------------------------------------------------------------------------------
1 | .. _general_examples:
2 |
3 | Examples
4 | ========
5 |
6 | General examples
7 | ----------------
8 |
9 | General-purpose and introductory examples for ``pmdarima``. These examples are
10 | designed to introduce you to the package style and layout.
11 |
12 | .. raw:: html
13 |
14 |
15 |
--------------------------------------------------------------------------------
/examples/arima/README.txt:
--------------------------------------------------------------------------------
1 | .. _arima_examples:
2 |
3 | ARIMA examples
4 | --------------
5 |
6 | Examples of how to use the :mod:`pmdarima.arima` module to fit timeseries
7 | models.
8 |
9 | .. raw:: html
10 |
11 |
12 |
--------------------------------------------------------------------------------
/examples/arima/example_add_new_samples.py:
--------------------------------------------------------------------------------
1 | """
2 | =====================================
3 | Adding new observations to your model
4 | =====================================
5 |
6 |
7 | This example demonstrates how to add new ground truth
8 | observations to your model so that forecasting continues
9 | with respect to true, observed values. This also slightly
10 | updates the model parameters, taking several new steps from
11 | the existing model parameters.
12 |
13 | .. raw:: html
14 |
15 |
16 | """
17 | print(__doc__)
18 |
19 | # Author: Taylor Smith
20 |
21 | import pmdarima as pm
22 | from pmdarima import model_selection
23 | import matplotlib.pyplot as plt
24 | import numpy as np
25 |
26 | # #############################################################################
27 | # Load the data and split it into separate pieces
28 | data = pm.datasets.load_lynx()
29 | train, test = model_selection.train_test_split(data, train_size=100)
30 |
31 | # #############################################################################
32 | # Fit with some validation (cv) samples
33 | arima = pm.auto_arima(train, start_p=1, start_q=1, d=0, max_p=5, max_q=5,
34 | out_of_sample_size=10, suppress_warnings=True,
35 | stepwise=True, error_action='ignore')
36 |
37 | # Now plot the results and the forecast for the test set
38 | preds, conf_int = arima.predict(n_periods=test.shape[0],
39 | return_conf_int=True)
40 |
41 | fig, axes = plt.subplots(2, 1, figsize=(12, 8))
42 | x_axis = np.arange(train.shape[0] + preds.shape[0])
43 | axes[0].plot(x_axis[:train.shape[0]], train, alpha=0.75)
44 | axes[0].scatter(x_axis[train.shape[0]:], preds, alpha=0.4, marker='o')
45 | axes[0].scatter(x_axis[train.shape[0]:], test, alpha=0.4, marker='x')
46 | axes[0].fill_between(x_axis[-preds.shape[0]:], conf_int[:, 0], conf_int[:, 1],
47 | alpha=0.1, color='b')
48 |
49 | # fill the section where we "held out" samples in our model fit
50 |
51 | axes[0].set_title("Train samples & forecasted test samples")
52 |
53 | # Now add the actual samples to the model and create NEW forecasts
54 | arima.update(test)
55 | new_preds, new_conf_int = arima.predict(n_periods=10, return_conf_int=True)
56 | new_x_axis = np.arange(data.shape[0] + 10)
57 |
58 | axes[1].plot(new_x_axis[:data.shape[0]], data, alpha=0.75)
59 | axes[1].scatter(new_x_axis[data.shape[0]:], new_preds, alpha=0.4, marker='o')
60 | axes[1].fill_between(new_x_axis[-new_preds.shape[0]:],
61 | new_conf_int[:, 0],
62 | new_conf_int[:, 1],
63 | alpha=0.1, color='g')
64 | axes[1].set_title("Added new observed values with new forecasts")
65 | plt.show()
66 |
--------------------------------------------------------------------------------
/examples/arima/example_auto_arima.py:
--------------------------------------------------------------------------------
1 | """
2 | ===========================
3 | Fitting an auto_arima model
4 | ===========================
5 |
6 |
7 | This example demonstrates how we can use the ``auto_arima`` function to
8 | select an optimal time series model. We'll be fitting our model on the lynx
9 | dataset available in the :ref:`datasets` submodule.
10 |
11 | .. raw:: html
12 |
13 |
14 | """
15 | print(__doc__)
16 |
17 | # Author: Taylor Smith
18 |
19 | import pmdarima as pm
20 | from pmdarima import model_selection
21 | from sklearn.metrics import mean_squared_error
22 | import matplotlib.pyplot as plt
23 | import numpy as np
24 |
25 | # #############################################################################
26 | # Load the data and split it into separate pieces
27 | data = pm.datasets.load_lynx()
28 | train, test = model_selection.train_test_split(data, train_size=90)
29 |
30 | # Fit a simple auto_arima model
31 | modl = pm.auto_arima(train, start_p=1, start_q=1, start_P=1, start_Q=1,
32 | max_p=5, max_q=5, max_P=5, max_Q=5, seasonal=True,
33 | stepwise=True, suppress_warnings=True, D=10, max_D=10,
34 | error_action='ignore')
35 |
36 | # Create predictions for the future, evaluate on test
37 | preds, conf_int = modl.predict(n_periods=test.shape[0], return_conf_int=True)
38 |
39 | # Print the error:
40 | print("Test RMSE: %.3f" % np.sqrt(mean_squared_error(test, preds)))
41 |
42 | # #############################################################################
43 | # Plot the points and the forecasts
44 | x_axis = np.arange(train.shape[0] + preds.shape[0])
45 | x_years = x_axis + 1821 # Year starts at 1821
46 |
47 | plt.plot(x_years[x_axis[:train.shape[0]]], train, alpha=0.75)
48 | plt.plot(x_years[x_axis[train.shape[0]:]], preds, alpha=0.75) # Forecasts
49 | plt.scatter(x_years[x_axis[train.shape[0]:]], test,
50 | alpha=0.4, marker='x') # Test data
51 | plt.fill_between(x_years[x_axis[-preds.shape[0]:]],
52 | conf_int[:, 0], conf_int[:, 1],
53 | alpha=0.1, color='b')
54 | plt.title("Lynx forecasts")
55 | plt.xlabel("Year")
56 |
--------------------------------------------------------------------------------
/examples/arima/example_persisting_a_model.py:
--------------------------------------------------------------------------------
1 | """
2 | =========================
3 | Persisting an ARIMA model
4 | =========================
5 |
6 |
7 | This example demonstrates how we can persist an ARIMA model to disk after
8 | fitting it. It can then be loaded back up and used to generate forecasts.
9 |
10 | .. raw:: html
11 |
12 |
13 | """
14 | print(__doc__)
15 |
16 | # Author: Taylor Smith
17 |
18 | import pmdarima as pm
19 | from pmdarima import model_selection
20 | import joblib # for persistence
21 | import os
22 |
23 | # #############################################################################
24 | # Load the data and split it into separate pieces
25 | y = pm.datasets.load_wineind()
26 | train, test = model_selection.train_test_split(y, train_size=125)
27 |
28 | # Fit an ARIMA
29 | arima = pm.ARIMA(order=(1, 1, 2), seasonal_order=(0, 1, 1, 12))
30 | arima.fit(y)
31 |
32 | # #############################################################################
33 | # Persist a model and create predictions after re-loading it
34 | pickle_tgt = "arima.pkl"
35 | try:
36 | # Pickle it
37 | joblib.dump(arima, pickle_tgt, compress=3)
38 |
39 | # Load the model up, create predictions
40 | arima_loaded = joblib.load(pickle_tgt)
41 | preds = arima_loaded.predict(n_periods=test.shape[0])
42 | print("Predictions: %r" % preds)
43 |
44 | finally:
45 | # Remove the pickle file at the end of this example
46 | try:
47 | os.unlink(pickle_tgt)
48 | except OSError:
49 | pass
50 |
--------------------------------------------------------------------------------
/examples/arima/example_seasonal_decomposition.py:
--------------------------------------------------------------------------------
1 | """
2 | ==========================================
3 | Seasonal decomposition of your time-series
4 | ==========================================
5 |
6 |
7 | This example demonstrates how we can use the ``decompose`` function to extract
8 | the trend, seasonal, and random components of the time series and then
9 | plot them all using the ``decomposed_plot`` function. We'll be plotting both
10 | ``additive`` and ``multiplicative`` examples of seasonality. To see the R
11 | equivalent that inspired this example go `here `_.
12 |
13 | .. raw:: html
14 |
15 |
16 | """
17 | print(__doc__)
18 |
19 | # Author: Charles Drotar
20 |
21 | from pmdarima import arima
22 | from pmdarima import datasets
23 | from pmdarima import utils
24 |
25 | # #############################################################################
26 |
27 | # So what is happening when we call `decomposed`?
28 | # 1) The trend is extracted from the signal via a convolution using either a
29 | # SMA or a user-defined filter.
30 | # 2) We remove the effects of the trend from the original signal by either
31 | # subtracting its effects or dividing out its effects for `additive` or
32 | # 'multiplicative' types of decompositions, respectively. We then take the
33 | # mean across all seasons to get the values for a single season. For m=4, we
34 | # expect 4 values for a single season.
35 | # 3) We then create the seasonal series by replicating the single season
36 | # until it is the same length of the trend signal.
37 | # 4) Lastly to get the random/noise elements of the signal we remove the effects
38 | # of both the trend and seasonal series and we are now left with the
39 | # variation of the original signal that is neither explainable by seasonal
40 | # nor trend effects.
41 | #
42 | # This logic produces a named tuple of the original signal, trend, seasonal,
43 | # and random components. It is this named tuple that is passed to
44 | # `decomposed_plot`
45 |
46 | figure_kwargs = {'figsize': (6, 6)} # set figure size for both examples
47 |
48 | #
49 | # ADDITIVE EXAMPLE : ausbeer
50 | #
51 |
52 | # Decompose the ausbeer dataset into trend, seasonal and random parts.
53 | # We subset to a small window of the time series.
54 |
55 | head_index = 17*4+2
56 | tail_index = 17*4-4
57 | first_index = head_index - tail_index
58 | last_index = head_index
59 | ausbeer = datasets.load_ausbeer()
60 | timeserie_beer = ausbeer[first_index:last_index]
61 | decomposed = arima.decompose(timeserie_beer, 'additive', m=4)
62 |
63 | # Plot the decomposed signal of ausbeer as a subplot
64 |
65 | axes = utils.decomposed_plot(decomposed, figure_kwargs=figure_kwargs,
66 | show=False)
67 | axes[0].set_title("Ausbeer Seasonal Decomposition")
68 |
69 |
70 | #
71 | # MULTIPLICATIVE EXAMPLE: airpassengers
72 | #
73 |
74 | # Decompose the airpassengers dataset into trend, seasonal and random parts.
75 | decomposed = arima.decompose(datasets.load_airpassengers(),
76 | 'multiplicative', m=12)
77 |
78 | # Plot the decomposed signal of airpassengers as a subplot
79 |
80 | axes = utils.decomposed_plot(decomposed, figure_kwargs=figure_kwargs,
81 | show=False)
82 | axes[0].set_title("Airpassengers Seasonal Decomposition")
83 |
--------------------------------------------------------------------------------
/examples/datasets/README.txt:
--------------------------------------------------------------------------------
1 | .. _datasets_examples:
2 |
3 | Datasets examples
4 | -----------------
5 |
6 | Examples of how to use the :mod:`pmdarima.datasets` module to conveniently load
7 | toy time series data for model benchmarking and experimentation.
8 |
9 | .. raw:: html
10 |
11 |
12 |
--------------------------------------------------------------------------------
/examples/datasets/example_load_data.py:
--------------------------------------------------------------------------------
1 | """
2 | ===============
3 | Dataset loading
4 | ===============
5 |
6 |
7 | In this example, we demonstrate pyramid's built-in toy datasets that can be
8 | used for benchmarking or experimentation. Pyramid has several built-in datasets
9 | that exhibit seasonality, non-stationarity, and other time series nuances.
10 |
11 | .. raw:: html
12 |
13 |
14 | """
15 | print(__doc__)
16 |
17 | # Author: Taylor Smith
18 |
19 | import pmdarima as pm
20 |
21 | # #############################################################################
22 | # You can load the datasets via load_
23 | lynx = pm.datasets.load_lynx()
24 | print("Lynx array:")
25 | print(lynx)
26 |
27 | # You can also get a series, if you rather
28 | print("\nLynx series head:")
29 | print(pm.datasets.load_lynx(as_series=True).head())
30 |
31 | # Several other datasets:
32 | air_passengers = pm.datasets.load_airpassengers()
33 | austres = pm.datasets.load_austres()
34 | heart_rate = pm.datasets.load_heartrate()
35 | wineind = pm.datasets.load_wineind()
36 | woolyrnq = pm.datasets.load_woolyrnq()
37 |
--------------------------------------------------------------------------------
/examples/example_simple_fit.py:
--------------------------------------------------------------------------------
1 | """
2 | =======================
3 | Simple auto_arima model
4 | =======================
5 |
6 |
7 | This is a simple example of how we can fit an ARIMA model in several lines
8 | without knowing anything about our data or optimal hyper parameters.
9 |
10 | .. raw:: html
11 |
12 |
13 | """
14 | print(__doc__)
15 |
16 | # Author: Taylor Smith
17 |
18 | import pmdarima as pm
19 | from pmdarima import model_selection
20 | import numpy as np
21 | from matplotlib import pyplot as plt
22 |
23 | # #############################################################################
24 | # Load the data and split it into separate pieces
25 | data = pm.datasets.load_wineind()
26 | train, test = model_selection.train_test_split(data, train_size=150)
27 |
28 | # Fit a simple auto_arima model
29 | arima = pm.auto_arima(train, error_action='ignore', trace=True,
30 | suppress_warnings=True, maxiter=5,
31 | seasonal=True, m=12)
32 |
33 | # #############################################################################
34 | # Plot actual test vs. forecasts:
35 | x = np.arange(test.shape[0])
36 | plt.scatter(x, test, marker='x')
37 | plt.plot(x, arima.predict(n_periods=test.shape[0]))
38 | plt.title('Actual test samples vs. forecasts')
39 | plt.show()
40 |
--------------------------------------------------------------------------------
/examples/model_selection/README.txt:
--------------------------------------------------------------------------------
1 | .. _model_selection_examples:
2 |
3 | Cross-validation examples
4 | -------------------------
5 |
6 | Examples of how to use the :mod:`pmdarima.model_selection` module to fit
7 | timeseries models in a cross-validated fashion.
8 |
9 | .. raw:: html
10 |
11 |
12 |
--------------------------------------------------------------------------------
/examples/model_selection/example_cross_val_predict.py:
--------------------------------------------------------------------------------
1 | """
2 | ============================
3 | Cross-validation predictions
4 | ============================
5 |
6 | In addition to computing cross-validation scores, you can use cross-validation
7 | to produce predictions. Unlike traditional cross-validation, where folds are
8 | independent of one another, time-series folds may overlap (particularly in a
9 | sliding window). To account for this, folds that forecast the same time step
10 | average their forecasts using either a "mean" or "median" (tunable).
11 |
12 | .. raw:: html
13 |
14 |
15 | """
16 | print(__doc__)
17 |
18 | # Author: Taylor Smith
19 |
20 | import numpy as np
21 | import pmdarima as pm
22 | from pmdarima import model_selection
23 | from matplotlib import pyplot as plt
24 |
25 | print("pmdarima version: %s" % pm.__version__)
26 |
27 | # Load the data and split it into separate pieces
28 | y = pm.datasets.load_wineind()
29 | est = pm.ARIMA(order=(1, 1, 2),
30 | seasonal_order=(0, 1, 1, 12),
31 | suppress_warnings=True)
32 | cv = model_selection.SlidingWindowForecastCV(window_size=150, step=4, h=4)
33 | predictions = model_selection.cross_val_predict(
34 | est, y, cv=cv, verbose=2, averaging="median")
35 |
36 | # plot the predictions over the original series
37 | x_axis = np.arange(y.shape[0])
38 | n_test = predictions.shape[0]
39 |
40 | plt.plot(x_axis, y, alpha=0.75, c='b')
41 | plt.plot(x_axis[-n_test:], predictions, alpha=0.75, c='g') # Forecasts
42 | plt.title("Cross-validated wineind forecasts")
43 | plt.show()
44 |
--------------------------------------------------------------------------------
/examples/model_selection/example_cross_validation.py:
--------------------------------------------------------------------------------
1 | """
2 | ========================================
3 | Cross-validating your time series models
4 | ========================================
5 |
6 |
7 | Like scikit-learn, ``pmdarima`` provides several different strategies for
8 | cross-validating your time series models. The interface was designed to behave
9 | as similarly as possible to that of scikit to make its usage as simple as
10 | possible.
11 |
12 | .. raw:: html
13 |
14 |
15 | """
16 | print(__doc__)
17 |
18 | # Author: Taylor Smith
19 |
20 | import numpy as np
21 | import pmdarima as pm
22 | from pmdarima import model_selection
23 |
24 | print("pmdarima version: %s" % pm.__version__)
25 |
26 | # Load the data and split it into separate pieces
27 | data = pm.datasets.load_wineind()
28 | train, test = model_selection.train_test_split(data, train_size=165)
29 |
30 | # Even though we have a dedicated train/test split, we can (and should) still
31 | # use cross-validation on our training set to get a good estimate of the model
32 | # performance. We can choose which model is better based on how it performs
33 | # over various folds.
34 | model1 = pm.ARIMA(order=(2, 1, 1))
35 | model2 = pm.ARIMA(order=(1, 1, 2),
36 | seasonal_order=(0, 1, 1, 12),
37 | suppress_warnings=True)
38 | cv = model_selection.SlidingWindowForecastCV(window_size=100, step=24, h=1)
39 |
40 | model1_cv_scores = model_selection.cross_val_score(
41 | model1, train, scoring='smape', cv=cv, verbose=2)
42 |
43 | model2_cv_scores = model_selection.cross_val_score(
44 | model2, train, scoring='smape', cv=cv, verbose=2)
45 |
46 | print("Model 1 CV scores: {}".format(model1_cv_scores.tolist()))
47 | print("Model 2 CV scores: {}".format(model2_cv_scores.tolist()))
48 |
49 | # Pick based on which has a lower mean error rate
50 | m1_average_error = np.average(model1_cv_scores)
51 | m2_average_error = np.average(model2_cv_scores)
52 | errors = [m1_average_error, m2_average_error]
53 | models = [model1, model2]
54 |
55 | # print out the answer
56 | better_index = np.argmin(errors) # type: int
57 | print("Lowest average SMAPE: {} (model{})".format(
58 | errors[better_index], better_index + 1))
59 | print("Best model: {}".format(models[better_index]))
60 |
--------------------------------------------------------------------------------
/examples/preprocessing/README.txt:
--------------------------------------------------------------------------------
1 | .. _preprocessing_examples:
2 |
3 | Preprocessing examples
4 | ----------------------
5 |
6 | Examples of how to use the :mod:`pmdarima.preprocessing` module to transform
7 | your time series or exog features inside or outside of a pipeline.
8 |
9 | .. raw:: html
10 |
11 |
12 |
--------------------------------------------------------------------------------
/examples/preprocessing/example_date_featurizer.py:
--------------------------------------------------------------------------------
1 | """
2 | =================================================
3 | Modeling quasi-seasonal trends with date features
4 | =================================================
5 |
6 |
7 | Some trends are common enough to appear seasonal, yet sporadic enough that
8 | approaching them from a seasonal perspective may not be valid. An example of
9 | this is the `"end-of-the-month" effect `_.
10 | In this example, we'll explore how we can create meaningful features that
11 | express seasonal trends without needing to fit a seasonal model.
12 |
13 | .. raw:: html
14 |
15 |
16 | """
17 | print(__doc__)
18 |
19 | # Author: Taylor Smith
20 |
21 | import pmdarima as pm
22 | from pmdarima import arima
23 | from pmdarima import model_selection
24 | from pmdarima import pipeline
25 | from pmdarima import preprocessing
26 | from pmdarima.datasets._base import load_date_example
27 |
28 | import numpy as np
29 | from matplotlib import pyplot as plt
30 |
31 | print(f"pmdarima version: {pm.__version__}")
32 |
33 | # Load the data and split it into separate pieces
34 | y, X = load_date_example()
35 | y_train, y_test, X_train, X_test = \
36 | model_selection.train_test_split(y, X, test_size=20)
37 |
38 | # We can examine traits about the time series:
39 | pm.tsdisplay(y_train, lag_max=10)
40 |
41 | # We can see the ACF increases and decreases rather rapidly, which means we may
42 | # need some differencing. There also does not appear to be an obvious seasonal
43 | # trend.
44 | n_diffs = arima.ndiffs(y_train, max_d=5)
45 |
46 | # Here's what the featurizer will create for us:
47 | date_feat = preprocessing.DateFeaturizer(
48 | column_name="date", # the name of the date feature in the X matrix
49 | with_day_of_week=True,
50 | with_day_of_month=True)
51 |
52 | _, X_train_feats = date_feat.fit_transform(y_train, X_train)
53 | print(f"Head of generated X features:\n{repr(X_train_feats.head())}")
54 |
55 | # We can plug this X featurizer into a pipeline:
56 | pipe = pipeline.Pipeline([
57 | ('date', date_feat),
58 | ('arima', arima.AutoARIMA(d=n_diffs,
59 | trace=3,
60 | stepwise=True,
61 | suppress_warnings=True,
62 | seasonal=False))
63 | ])
64 |
65 | pipe.fit(y_train, X_train)
66 |
67 | # Plot our forecasts
68 | forecasts = pipe.predict(X=X_test)
69 |
70 | fig = plt.figure(figsize=(16, 8))
71 | ax = fig.add_subplot(1, 1, 1)
72 |
73 | n_train = y_train.shape[0]
74 | x = np.arange(n_train + forecasts.shape[0])
75 |
76 | ax.plot(x[:n_train], y_train, color='blue', label='Training Data')
77 | ax.plot(x[n_train:], forecasts, color='green', marker='o',
78 | label='Predicted')
79 | ax.plot(x[n_train:], y_test, color='red', label='Actual')
80 | ax.legend(loc='lower left', borderaxespad=0.5)
81 | ax.set_title('Predicted Foo')
82 | ax.set_ylabel('# Foo')
83 |
84 | plt.show()
85 |
86 | # What next? Try combining different featurizers in your pipeline to enhance
87 | # a model's predictive power.
88 |
--------------------------------------------------------------------------------
/examples/quick_start_output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/examples/quick_start_output.png
--------------------------------------------------------------------------------
/examples/utils/README.txt:
--------------------------------------------------------------------------------
1 | .. _utils_examples:
2 |
3 | Utils examples
4 | --------------
5 |
6 | Examples of how to use the :mod:`pmdarima.utils` module to plot timeseries
7 | data, difference arrays, and more.
8 |
9 | .. raw:: html
10 |
11 |
12 |
--------------------------------------------------------------------------------
/examples/utils/example_array_concatenation.py:
--------------------------------------------------------------------------------
1 | """
2 | ===================
3 | Array concatenation
4 | ===================
5 |
6 |
7 | In this example, we demonstrate pyramid's convenient ``c`` function, which is,
8 | in essence, the same as R's. It's nothing more than a convenience function in
9 | the package, but one you should understand if you're contributing.
10 |
11 | .. raw:: html
12 |
13 |
14 | """
15 | print(__doc__)
16 |
17 | # Author: Taylor Smith
18 |
19 | import pmdarima as pm
20 | import numpy as np
21 |
22 | # #############################################################################
23 | # You can use the 'c' function to define an array from *args
24 | array1 = pm.c(1, 2, 3, 4, 5)
25 |
26 | # Or you can define an array from an existing iterable:
27 | array2 = pm.c([1, 2, 3, 4, 5])
28 | assert np.array_equal(array1, array2)
29 |
30 | # You can even use 'c' to flatten arrays:
31 | array_flat = pm.c(1, 2, 3, [4, 5])
32 | assert np.array_equal(array_flat, np.arange(5) + 1)
33 |
--------------------------------------------------------------------------------
/examples/utils/example_array_differencing.py:
--------------------------------------------------------------------------------
1 | """
2 | ==================
3 | Array differencing
4 | ==================
5 |
6 |
7 | In this example, we demonstrate pyramid's array differencing, and how it's used
8 | in conjunction with the ``d`` term to lag a time series.
9 |
10 | .. raw:: html
11 |
12 |
13 | """
14 | print(__doc__)
15 |
16 | # Author: Taylor Smith
17 |
18 | from pmdarima.utils import array
19 |
20 | # Build an array and show first order differencing results
21 | x = array.c(10, 4, 2, 9, 34)
22 | lag_1 = array.diff(x, lag=1, differences=1)
23 |
24 | # The result will be the same as: x[1:] - x[:-1]
25 | print(lag_1) # [-6., -2., 7., 25.]
26 |
27 | # Note that lag and differences are not the same! If we crank diff up by one,
28 | # it performs the same differencing as above TWICE. Lag, therefore, controls
29 | # the number of steps backward the ts looks when it differences, and the
30 | # `differences` parameter controls how many times to repeat.
31 | print(array.diff(x, lag=1, differences=2)) # [4., 9., 18.]
32 |
33 | # Conversely, when we set lag to 2, the array looks two steps back for its
34 | # differencing operation (only one).
35 | print(array.diff(x, lag=2, differences=1)) # [-8., 5., 32.]
36 |
37 | # The lag parameter is controlled by `m`, which is the seasonal periodicity of
38 | # a time series. If your series is non-seasonal, lag will typically be 1.
39 |
--------------------------------------------------------------------------------
/examples/utils/example_tsdisplay.py:
--------------------------------------------------------------------------------
1 | """
2 | ====================================
3 | Displaying key timeseries statistics
4 | ====================================
5 |
6 |
7 | Visualizing characteristics of a time series is a key component to effective
8 | forecasting. In this example, we'll look at a very simple method to examine
9 | critical statistics of a time series object.
10 |
11 | .. raw:: html
12 |
13 |
14 | """
15 | print(__doc__)
16 |
17 | # Author: Taylor Smith
18 |
19 | import pmdarima as pm
20 | from pmdarima import datasets
21 | from pmdarima import preprocessing
22 |
23 | # We'll use the sunspots dataset for this example
24 | y = datasets.load_sunspots(True)
25 | print("Data shape: {}".format(y.shape[0]))
26 | print("Data head:")
27 | print(y.head())
28 |
29 | # Let's look at the series, its ACF plot, and a histogram of its values
30 | pm.tsdisplay(y, lag_max=90, title="Sunspots", show=True)
31 |
32 | # Notice that the histogram is very skewed. This is a prime candidate for
33 | # box-cox transformation
34 | y_bc, _ = preprocessing.BoxCoxEndogTransformer(lmbda2=1e-6).fit_transform(y)
35 | pm.tsdisplay(
36 | y_bc, lag_max=90, title="Sunspots (BoxCox-transformed)", show=True)
37 |
38 | print("""
39 | As evidenced by the more normally distributed values in the transformed series,
40 | using a Box-Cox transformation may prove useful prior to fitting your model.
41 | """)
42 |
--------------------------------------------------------------------------------
/pmdarima/__check_build/__init__.py:
--------------------------------------------------------------------------------
1 | """ Module to give helpful messages to the user that did not
2 | compile the pmdarima lib properly.
3 | """
4 | import os
5 |
6 | INPLACE_MSG = """
7 | It appears that you are importing a local pmdarima source tree. For
8 | this, you need to have an inplace install. Maybe you are in the source
9 | directory and you need to try from another location."""
10 |
11 | STANDARD_MSG = """
12 | If you have used an installer, please check that it is suited for your
13 | Python version, your operating system and your platform."""
14 |
15 |
16 | def raise_build_error(e):
17 | # Raise a comprehensible error and list the contents of the
18 | # directory to help debugging on the mailing list.
19 | local_dir = os.path.split(__file__)[0]
20 | msg = STANDARD_MSG
21 | if local_dir == "pmdarima/__check_build":
22 | # Picking up the local install: this will work only if the
23 | # install is an 'inplace build'
24 | msg = INPLACE_MSG
25 | dir_content = list()
26 | for i, filename in enumerate(os.listdir(local_dir)):
27 | if (i + 1) % 3:
28 | dir_content.append(filename.ljust(26))
29 | else:
30 | dir_content.append(filename + '\n')
31 | raise ImportError("""%s
32 | ___________________________________________________________________________
33 | Contents of %s:
34 | %s
35 | ___________________________________________________________________________
36 | It seems that pmdarima has not been built correctly.
37 | If you have installed pmdarima from source, please do not forget
38 | to build the package before using it: run `python setup.py install` or
39 | `make` from the top-level directory.
40 | %s""" % (e, local_dir, ''.join(dir_content).strip(), msg))
41 |
42 |
43 | try:
44 | from ._check_build import check_build
45 | except ImportError as ie:
46 | raise_build_error(ie)
47 |
--------------------------------------------------------------------------------
/pmdarima/__check_build/_check_build.pyx:
--------------------------------------------------------------------------------
1 | def check_build():
2 | return
3 |
--------------------------------------------------------------------------------
/pmdarima/__check_build/setup.py:
--------------------------------------------------------------------------------
1 | # Author: Virgile Fritsch (originally written
2 | # for sklearn, adapted for pmdarima)
3 | # License: BSD 3 clause
4 |
5 | import numpy as np
6 |
7 |
8 | def configuration(parent_package='', top_path=None):
9 | from numpy.distutils.misc_util import Configuration
10 | config = Configuration('__check_build', parent_package, top_path)
11 | config.add_extension('_check_build',
12 | sources=['_check_build.pyx'],
13 | include_dirs=[np.get_include()])
14 |
15 | return config
16 |
17 |
18 | if __name__ == '__main__':
19 | from numpy.distutils.core import setup
20 | setup(**configuration(top_path='').todict())
21 |
--------------------------------------------------------------------------------
/pmdarima/__check_build/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
--------------------------------------------------------------------------------
/pmdarima/__check_build/tests/test_check_build.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from pmdarima.__check_build import raise_build_error
4 |
5 | import pytest
6 |
7 |
8 | def test_raise_build_error():
9 | try:
10 | # Raise a value error to pass into the raise_build_error
11 | # to assert it turns it into an ImportError
12 | raise ValueError("this is a dummy err msg")
13 | except ValueError as v:
14 | with pytest.raises(ImportError):
15 | raise_build_error(v)
16 |
--------------------------------------------------------------------------------
/pmdarima/_build_utils/pre_build_helpers.py:
--------------------------------------------------------------------------------
1 | """
2 | Helpers to check build environment before actual build of pmdarima
3 |
4 | Adapted from: https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/_build_utils/pre_build_helpers.py
5 | """ # noqa
6 |
7 | import os
8 | import sys
9 | import glob
10 | import tempfile
11 | import textwrap
12 | import subprocess
13 |
14 | from distutils.sysconfig import customize_compiler
15 | from numpy.distutils.ccompiler import new_compiler
16 |
17 |
18 | def compile_test_program(code, extra_preargs=[], extra_postargs=[]):
19 | """Check that some C code can be compiled and run"""
20 | ccompiler = new_compiler()
21 | customize_compiler(ccompiler)
22 |
23 | # extra_(pre/post)args can be a callable to make it possible to get its
24 | # value from the compiler
25 | if callable(extra_preargs):
26 | extra_preargs = extra_preargs(ccompiler)
27 | if callable(extra_postargs):
28 | extra_postargs = extra_postargs(ccompiler)
29 |
30 | start_dir = os.path.abspath('.')
31 |
32 | with tempfile.TemporaryDirectory() as tmp_dir:
33 | try:
34 | os.chdir(tmp_dir)
35 |
36 | # Write test program
37 | with open('test_program.c', 'w') as f:
38 | f.write(code)
39 |
40 | os.mkdir('objects')
41 |
42 | # Compile, test program
43 | ccompiler.compile(['test_program.c'], output_dir='objects',
44 | extra_postargs=extra_postargs)
45 |
46 | # Link test program
47 | objects = glob.glob(
48 | os.path.join('objects', '*' + ccompiler.obj_extension))
49 | ccompiler.link_executable(objects, 'test_program',
50 | extra_preargs=extra_preargs,
51 | extra_postargs=extra_postargs)
52 |
53 | if "PYTHON_CROSSENV" not in os.environ:
54 | # Run test program if not cross compiling
55 | # will raise a CalledProcessError if return code was non-zero
56 | output = subprocess.check_output('./test_program')
57 | output = output.decode(sys.stdout.encoding or 'utf-8').splitlines() # noqa
58 | else:
59 | output = []
60 | except Exception:
61 | raise
62 | finally:
63 | os.chdir(start_dir)
64 |
65 | return output
66 |
67 |
68 | def basic_check_build():
69 | """Check basic compilation and linking of C code"""
70 | code = textwrap.dedent(
71 | """\
72 | #include
73 | int main(void) {
74 | return 0;
75 | }
76 | """)
77 | compile_test_program(code)
78 |
--------------------------------------------------------------------------------
/pmdarima/_build_utils/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
--------------------------------------------------------------------------------
/pmdarima/arima/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Author: Taylor Smith
4 |
5 | from .approx import *
6 | from .arima import *
7 | from .auto import *
8 | from .utils import *
9 |
10 | # These need to be top-level since 0.7.0 for the documentation
11 | from .seasonality import decompose
12 | from .seasonality import CHTest
13 | from .seasonality import OCSBTest
14 | from .stationarity import ADFTest
15 | from .stationarity import KPSSTest
16 | from .stationarity import PPTest
17 |
18 | __all__ = [s for s in dir() if not s.startswith("_")]
19 |
--------------------------------------------------------------------------------
/pmdarima/arima/_arima_fast_helpers.h:
--------------------------------------------------------------------------------
1 | // We cannot directly reuse the npy_isfinite from npy_math.h as numpy
2 | // and pyramid are not necessarily built with the same compiler.
3 | #ifdef _MSC_VER
4 | # include
5 | # define pyr_isfinite _finite
6 | #else
7 | # include
8 | # define pyr_isfinite npy_isfinite
9 | #endif
--------------------------------------------------------------------------------
/pmdarima/arima/setup.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import os
4 |
5 | import numpy
6 | from numpy.distutils.misc_util import Configuration
7 |
8 | from pmdarima._build_utils import get_blas_info
9 |
10 |
11 | def configuration(parent_package="", top_path=None):
12 | cblas_libs, blas_info = get_blas_info()
13 |
14 | # Use this rather than cblas_libs so we don't fail on Windows
15 | libraries = []
16 | if os.name == 'posix':
17 | cblas_libs.append('m')
18 | libraries.append('m')
19 |
20 | config = Configuration("arima", parent_package, top_path)
21 | config.add_extension("_arima",
22 | sources=["_arima.pyx"],
23 | include_dirs=[numpy.get_include(),
24 | # Should this be explicitly included?:
25 | '_arima_fast_helpers.h',
26 | blas_info.pop('include_dirs', [])],
27 | libraries=libraries,
28 | extra_compile_args=blas_info.pop(
29 | 'extra_compile_args', []),
30 | **blas_info)
31 |
32 | config.add_subpackage('tests')
33 | config.add_data_dir('tests/data')
34 |
35 | return config
36 |
37 |
38 | if __name__ == "__main__":
39 | from numpy.distutils.core import setup
40 | setup(**configuration().todict())
41 |
--------------------------------------------------------------------------------
/pmdarima/arima/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/pmdarima/arima/tests/__init__.py
--------------------------------------------------------------------------------
/pmdarima/arima/tests/data/issue_191.csv:
--------------------------------------------------------------------------------
1 | Month,0
2 | 2016-01-01,129.97783044109778
3 | 2016-02-01,306.55148688938147
4 | 2016-03-01,143.46609586423057
5 | 2016-04-01,385.0286675330632
6 | 2016-05-01,80.92959253879673
7 | 2016-06-01,1058.2157327421448
8 | 2016-07-01,1247.051448666004
9 | 2016-08-01,1833.1778915985017
10 | 2016-09-01,3338.9587951991443
11 | 2016-10-01,2855.8336518614783
12 | 2016-11-01,3309.5298524577643
13 | 2016-12-01,1351.2789542083938
14 | 2017-01-01,1920.2101811761734
15 | 2017-02-01,2168.912102232124
16 | 2017-03-01,3910.982302744965
17 | 2017-04-01,3190.3251082433057
18 | 2017-05-01,1374.2227079742736
19 | 2017-06-01,1403.1415360040357
20 | 2017-07-01,953.1645718609441
21 | 2017-08-01,1413.5523140947494
22 | 2017-09-01,2821.320862583547
23 | 2017-10-01,2467.3544074992637
24 | 2017-11-01,2976.3257808230696
25 | 2017-12-01,2918.4881247635467
26 | 2018-01-01,1980.0
27 | 2018-02-01,3962.0
28 | 2018-03-01,6944.0
29 | 2018-04-01,2720.0
30 | 2018-05-01,3172.0
31 | 2018-06-01,3877.0
32 | 2018-07-01,5234.0
33 | 2018-08-01,4493.0
34 | 2018-09-01,9407.0
35 | 2018-10-01,9079.0
36 | 2018-11-01,10435.0
37 | 2018-12-01,4934.0
38 | 2019-01-01,4598.0
39 | 2019-02-01,7364.0
40 | 2019-03-01,10836.0
41 | 2019-04-01,8119.0
42 | 2019-05-01,10854.0
43 | 2019-06-01,5149.256744318752
44 | 2019-07-01,6820.377809726632
45 | 2019-08-01,9176.990725800295
46 | 2019-09-01,15991.129595953533
47 | 2019-10-01,14868.559905791291
48 |
--------------------------------------------------------------------------------
/pmdarima/arima/tests/test_approx.py:
--------------------------------------------------------------------------------
1 | # Test the approximation function
2 |
3 | from pmdarima.arima.approx import approx, _regularize
4 | from pmdarima.utils.array import c
5 | from pmdarima.arima.stationarity import ADFTest
6 |
7 | from numpy.testing import assert_array_almost_equal
8 | import numpy as np
9 |
10 | import pytest
11 |
12 | table = c(0.216, 0.176, 0.146, 0.119)
13 | tablep = c(0.01, 0.025, 0.05, 0.10)
14 | stat = 1.01
15 |
16 |
17 | def test_regularize():
18 | x, y = c(0.5, 0.5, 1.0, 1.5), c(1, 2, 3, 4)
19 | x, y = _regularize(x, y, 'mean')
20 |
21 | assert_array_almost_equal(x, np.array([0.5, 1.0, 1.5]))
22 | assert_array_almost_equal(y, np.array([1.5, 3.0, 4.0]))
23 |
24 |
25 | def test_approx_rule1():
26 | # for rule = 1
27 | x, y = approx(table, tablep, stat, rule=1)
28 | assert_array_almost_equal(x, c(1.01))
29 | assert_array_almost_equal(y, c(np.nan))
30 |
31 |
32 | def test_approx_rule2():
33 | # for rule = 2
34 | x, y = approx(table, tablep, stat, rule=2)
35 | assert_array_almost_equal(x, c(1.01))
36 | assert_array_almost_equal(y, c(0.01))
37 |
38 |
39 | @pytest.mark.parametrize(
40 | 'kwargs', [
41 |
42 | # fails for length differences
43 | dict(x=[1, 2, 3], y=[1, 2], xout=1.0),
44 |
45 | # fails for bad string
46 | dict(x=table, y=table, xout=1.0, method='bad-string'),
47 |
48 | # fails for bad length
49 | dict(x=[], y=[], xout=[], ties='mean'),
50 |
51 | # fails for bad length
52 | dict(x=[], y=[], xout=[], method='constant'),
53 |
54 | # fails for linear when < 2 samples
55 | dict(x=[1], y=[1], xout=[], method='linear', ties='ordered'),
56 |
57 | # fails for bad length
58 | dict(x=[], y=[], xout=[], method='constant'),
59 |
60 | ]
61 | )
62 | def test_corner_errors(kwargs):
63 | with pytest.raises(ValueError):
64 | approx(**kwargs)
65 |
66 |
67 | def test_valid_corner():
68 | # *doesn't* fail for constant when < 2 samples
69 | approx(x=[1], y=[1], xout=[], method='constant', ties='ordered')
70 |
71 |
72 | def test_approx_precision():
73 | # Test an example from R vs. Python to compare the expected values and
74 | # make sure we get as close as possible. This is from an ADFTest where k=1
75 | # and x=austres
76 | tableipl = np.array([[-4.0664],
77 | [-3.7468],
78 | [-3.462],
79 | [-3.1572],
80 | [-1.2128],
81 | [-0.8928],
82 | [-0.6104],
83 | [-0.2704]])
84 |
85 | _, interpol = approx(tableipl, ADFTest.tablep, xout=-1.337233, rule=2)
86 | assert np.allclose(interpol, 0.84880354) # in R we get 0.8488036
87 |
--------------------------------------------------------------------------------
/pmdarima/arima/tests/test_arima_diagnostics.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from pmdarima.datasets import load_lynx
4 | from pmdarima.arima import ARIMA
5 |
6 | from unittest.mock import patch
7 | import pytest
8 |
9 | lynx = load_lynx()
10 |
11 |
12 | class MockMPLFigure:
13 | def __init__(self, fig, figsize):
14 | self.fig = fig
15 | self.figsize = figsize
16 | self.subplots = []
17 |
18 | def add_subplot(self, *args):
19 | ax = MockMPLAxis(*args)
20 | self.subplots.append(ax)
21 | return ax
22 |
23 |
24 | class MockMPLAxis:
25 | def __init__(self, *args):
26 | pass
27 |
28 | def hist(self, *args, **kwargs):
29 | pass
30 |
31 | def hlines(self, *args, **kwargs):
32 | # We can hack our assertion here since we always pass alpha=0.5
33 | for k, v in kwargs.items():
34 | setattr(self, k, v)
35 |
36 | def legend(self):
37 | pass
38 |
39 | def plot(self, x, y, **kwargs):
40 | self.x = x
41 | self.y = y
42 |
43 | def set_title(self, title):
44 | self.title = title
45 |
46 | def set_xlim(self, *args):
47 | if len(args) == 2:
48 | mn, mx = args
49 | else: # len(args) == 1
50 | mn, mx = args[0]
51 |
52 | self.mn = mn
53 | self.mx = mx
54 |
55 | def set_ylim(self, mn, mx):
56 | self.mn = mn
57 | self.mx = mx
58 |
59 |
60 | def mock_qqplot(resid, line, ax):
61 | ax.qqplot_called = True
62 |
63 |
64 | def mock_acf_plot(resid, ax, lags):
65 | ax.acfplot_called = True
66 |
67 |
68 | @pytest.mark.parametrize(
69 | 'model_type,model', [
70 | pytest.param('arma', ARIMA(order=(1, 0, 0), maxiter=50)),
71 | pytest.param('arima', ARIMA(order=(1, 1, 0), maxiter=50)),
72 | pytest.param('sarimax', ARIMA(order=(1, 1, 0),
73 | maxiter=50,
74 | seasonal_order=(1, 0, 0, 12)))
75 | ])
76 | def test_mock_plot_diagnostics(model_type, model):
77 | model.fit(lynx)
78 |
79 | with patch('statsmodels.graphics.utils.create_mpl_fig', MockMPLFigure), \
80 | patch('statsmodels.graphics.gofplots.qqplot', mock_qqplot), \
81 | patch('statsmodels.graphics.tsaplots.plot_acf', mock_acf_plot):
82 |
83 | diag = model.plot_diagnostics(figsize=(10, 12))
84 |
85 | # Asserting on mock attributes to show that we follow the expected
86 | # logical branches
87 | assert diag.figsize == (10, 12)
88 | assert len(diag.subplots) == 4
89 |
90 | # First one should have 'alpha' from the plot call
91 | assert hasattr(diag.subplots[0], 'alpha') and \
92 | diag.subplots[0].alpha == 0.5
93 |
94 | # Third figure gets QQPLOT called on it
95 | assert hasattr(diag.subplots[2], 'qqplot_called') and \
96 | diag.subplots[2].qqplot_called
97 |
98 | # Fourth figure gets ACF plot call on it
99 | assert hasattr(diag.subplots[3], 'acfplot_called') and \
100 | diag.subplots[3].acfplot_called
101 |
--------------------------------------------------------------------------------
/pmdarima/arima/tests/test_auto_solvers.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from pmdarima.arima import _auto_solvers as solvers
4 | from pmdarima.compat.pytest import pytest_error_str
5 |
6 | import numpy as np
7 | import pytest
8 |
9 |
10 | @pytest.mark.parametrize(
11 | 'models,expected', [
12 |
13 | # No nones, no overlap in IC
14 | pytest.param(
15 | [('foo', 'time', 1.0),
16 | ('bar', 'time', 3.0),
17 | ('baz', 'time', 2.0)],
18 | ['foo', 'baz', 'bar'],
19 | ),
20 |
21 | # we filter out Nones and infs
22 | pytest.param(
23 | [('foo', 'time', 1.0),
24 | ('bar', 'time', 3.0),
25 | ('baz', 'time', np.inf),
26 | (None, 'time', 0.0)],
27 | ['foo', 'bar'],
28 | ),
29 |
30 | ]
31 | )
32 | def test_sort_and_filter_fits_valid(models, expected):
33 | actual = solvers._sort_and_filter_fits(models)
34 | assert tuple(expected) == tuple(actual), \
35 | "\nExpected: %r" \
36 | "\nActual: %r" \
37 | % (expected, actual)
38 |
39 |
40 | def test_sort_and_filter_fits_error():
41 | results = [(None, 'time', 1.0), ('foo', 'time', np.inf)]
42 |
43 | with pytest.raises(ValueError) as ve:
44 | solvers._sort_and_filter_fits(results)
45 | assert "no-successful-model" in pytest_error_str(ve)
46 |
--------------------------------------------------------------------------------
/pmdarima/arima/tests/test_c_arima.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from pmdarima.arima._arima import C_is_not_finite
4 |
5 | import numpy as np
6 |
7 |
8 | def test_not_finite():
9 | assert C_is_not_finite(np.nan)
10 | assert C_is_not_finite(np.inf)
11 | assert not C_is_not_finite(5.)
12 |
--------------------------------------------------------------------------------
/pmdarima/arima/tests/test_utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import numpy as np
4 | import pytest
5 |
6 | from pmdarima.arima import utils as arima_utils
7 | from pmdarima.compat.pytest import pytest_warning_messages, pytest_error_str
8 |
9 |
10 | def test_issue_341():
11 | seas_diffed = np.array([124., -114., -163., -83.])
12 |
13 | with pytest.raises(ValueError) as ve:
14 | arima_utils.ndiffs(seas_diffed, test='adf')
15 |
16 | assert "raised from LinAlgError" in pytest_error_str(ve)
17 |
18 |
19 | def test_issue_351():
20 | y = np.array([
21 | 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 1, 6, 2, 1, 0,
23 | 2, 0, 1, 0, 0, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 0, 0, 6,
24 | 0, 0, 0, 0, 0, 1, 3, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0
25 | ])
26 |
27 | with pytest.warns(UserWarning) as w_list:
28 | D = arima_utils.nsdiffs(y, m=52, max_D=2, test='ocsb')
29 |
30 | assert D == 1
31 |
32 | warnings_messages = pytest_warning_messages(w_list)
33 | assert len(warnings_messages) == 1
34 | assert 'shorter than m' in warnings_messages[0]
35 |
--------------------------------------------------------------------------------
/pmdarima/base.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Base classes and interfaces
4 |
5 | import abc
6 | from abc import ABCMeta
7 |
8 | from sklearn.base import BaseEstimator
9 |
10 | # TODO: change this to base TS model if we ever hope to support more
11 |
12 |
13 | class BaseARIMA(BaseEstimator, metaclass=ABCMeta):
14 | """A base ARIMA class"""
15 |
16 | @abc.abstractmethod
17 | def fit(self, y, X, **fit_args):
18 | """Fit an ARIMA model"""
19 |
20 | def fit_predict(self, y, X=None, n_periods=10, **fit_args):
21 | """Fit an ARIMA to a vector, ``y``, of observations with an
22 | optional matrix of ``exogenous`` variables, and then generate
23 | predictions.
24 |
25 | Parameters
26 | ----------
27 | y : array-like or iterable, shape=(n_samples,)
28 | The time-series to which to fit the ``ARIMA`` estimator. This may
29 | either be a Pandas ``Series`` object (statsmodels can internally
30 | use the dates in the index), or a numpy array. This should be a
31 | one-dimensional array of floats, and should not contain any
32 | ``np.nan`` or ``np.inf`` values.
33 |
34 | X : array-like, shape=[n_obs, n_vars], optional (default=None)
35 | An optional 2-d array of exogenous variables. If provided, these
36 | variables are used as additional features in the regression
37 | operation. This should not include a constant or trend. Note that
38 | if an ``ARIMA`` is fit on exogenous features, it must be provided
39 | exogenous features for making predictions.
40 |
41 | n_periods : int, optional (default=10)
42 | The number of periods in the future to forecast.
43 |
44 | fit_args : dict or kwargs, optional (default=None)
45 | Any keyword args to pass to the fit method.
46 | """
47 | self.fit(y, X, **fit_args)
48 |
49 | # TODO: remove kwargs from call
50 | return self.predict(n_periods=n_periods, X=X, **fit_args)
51 |
52 | # TODO: remove kwargs from all of these
53 |
54 | @abc.abstractmethod
55 | def predict(self, n_periods, X, return_conf_int=False, alpha=0.05,
56 | **kwargs):
57 | """Create forecasts on a fitted model"""
58 |
59 | @abc.abstractmethod
60 | def predict_in_sample(self, X, start, end, dynamic, **kwargs):
61 | """Get in-sample forecasts"""
62 |
63 | @abc.abstractmethod
64 | def update(self, y, X=None, maxiter=None, **kwargs):
65 | """Update an ARIMA model"""
66 |
--------------------------------------------------------------------------------
/pmdarima/compat/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | The variables defined in compat are designed to provide compatibility.
3 | Each sub-module is specifically designed not to make calls out
4 | to other portions of pmdarima and to remove circular dependencies.
5 | """
6 |
7 | from .matplotlib import *
8 | from .pandas import *
9 | from .numpy import *
10 | from .sklearn import *
11 | from .statsmodels import *
12 |
13 | __all__ = [s for s in dir() if not s.startswith('_')]
14 |
--------------------------------------------------------------------------------
/pmdarima/compat/matplotlib.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Author: Taylor G Smith
4 | #
5 | # Patch backend for MPL
6 |
7 | import sys
8 | import os
9 |
10 | __all__ = [
11 | 'get_compatible_pyplot',
12 | 'mpl_hist_arg'
13 | ]
14 |
15 |
16 | def get_compatible_pyplot(backend=None, debug=True):
17 | """Make the backend of MPL compatible.
18 |
19 | In Travis Mac distributions, python is not installed as a framework. This
20 | means that using the TkAgg backend is the best solution (so it doesn't
21 | try to use the mac OS backend by default).
22 |
23 | Parameters
24 | ----------
25 | backend : str, optional (default="TkAgg")
26 | The backend to default to.
27 |
28 | debug : bool, optional (default=True)
29 | Whether to log the existing backend to stderr.
30 | """
31 | import matplotlib
32 |
33 | # If the backend provided is None, just default to
34 | # what's already being used.
35 | existing_backend = matplotlib.get_backend()
36 | if backend is not None:
37 | # Can this raise?...
38 | matplotlib.use(backend)
39 |
40 | # Print out the new backend
41 | if debug:
42 | sys.stderr.write("Currently using '%s' MPL backend, "
43 | "switching to '%s' backend%s"
44 | % (existing_backend, backend, os.linesep))
45 |
46 | # If backend is not set via env variable, but debug is
47 | elif debug:
48 | sys.stderr.write("Using '%s' MPL backend%s"
49 | % (existing_backend, os.linesep))
50 |
51 | from matplotlib import pyplot as plt
52 | return plt
53 |
54 |
55 | def mpl_hist_arg(value=True):
56 | """Find the appropriate `density` kwarg for our given matplotlib version.
57 |
58 | This will determine if we should use `normed` or `density`. Additionally,
59 | since this is a kwarg, the user can supply a value (True or False) that
60 | they would like in the output dictionary.
61 |
62 | Parameters
63 | ----------
64 | value : bool, optional (default=True)
65 | The boolean value of density/normed
66 |
67 | Returns
68 | -------
69 | density_kwarg : dict
70 | A dictionary containing the appropriate density kwarg for the
71 | installed matplotlib version, mapped to the provided or default
72 | value
73 | """
74 | import matplotlib
75 |
76 | density_kwarg = 'density' if matplotlib.__version__ >= '2.1.0'\
77 | else 'normed'
78 | return {density_kwarg: value}
79 |
--------------------------------------------------------------------------------
/pmdarima/compat/numpy.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Author: Taylor Smith
4 | #
5 | # Provide numpy compatibility and common variables. Since this
6 | # is a relatively sparse script, I feel I must defend this design
7 | # choice. See the docstring in the __init__: "Each sub-module is specifically
8 | # designed not to make calls out to other portions of pmdarima and to
9 | # remove circular dependencies."
10 | #
11 | # Since DTYPE is used commonly, this removes circular dependencies or
12 | # hard-coding.
13 |
14 | import numpy as np
15 |
16 | # this is going to be the data-type used across pmdarima
17 | DTYPE = np.float64
18 |
--------------------------------------------------------------------------------
/pmdarima/compat/pandas.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Importing visualization modules changes in version 0.19
4 | try: # <= v0.19
5 | from pandas.tools import plotting
6 | except ImportError: # 0.20+
7 | from pandas import plotting
8 |
--------------------------------------------------------------------------------
/pmdarima/compat/pytest.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import contextlib
4 | import pytest
5 |
6 |
7 | def pytest_error_str(error):
8 | """Different for different versions of Pytest"""
9 | try:
10 | return str(error.value)
11 | except AttributeError:
12 | return str(error)
13 |
14 |
15 | def pytest_warning_messages(warnings):
16 | """Get the warning messages for captured warnings"""
17 | return [str(w.message) for w in warnings.list]
18 |
19 |
20 | @contextlib.contextmanager
21 | def raises(exception):
22 | """Allows context managers for catching NO errors"""
23 | if exception is None:
24 | yield None
25 |
26 | else:
27 | with pytest.raises(exception) as e:
28 | yield e
29 |
--------------------------------------------------------------------------------
/pmdarima/compat/sklearn.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Author: Charles Drotar
4 | #
5 | # Patch backend for sklearn
6 | from packaging.version import Version
7 |
8 | import sklearn
9 | from sklearn.exceptions import NotFittedError
10 |
11 | __all__ = [
12 | 'check_is_fitted',
13 | 'if_delegate_has_method',
14 | 'safe_indexing',
15 | ]
16 |
17 |
18 | def check_is_fitted(estimator, attributes):
19 | """Ensure the model has been fitted
20 |
21 | Typically called at the beginning of an operation on a model that requires
22 | having been fit. Raises a ``NotFittedError`` if the model has not been
23 | fit.
24 |
25 | This is an adaptation of scikit-learn's ``check_is_fitted``, which has been
26 | changed recently in a way that is no longer compatible with our package.
27 |
28 | Parameters
29 | ----------
30 | estimator : estimator instance,
31 | The estimator that will be checked to see if it is fitted.
32 |
33 | attributes : str or iterable
34 | The attributes to check for
35 | """
36 | if isinstance(attributes, str):
37 | attributes = [attributes]
38 | if not hasattr(attributes, "__iter__"):
39 | raise TypeError("attributes must be a string or iterable")
40 | for attr in attributes:
41 | if hasattr(estimator, attr):
42 | return
43 | raise NotFittedError("Model has not been fit!")
44 |
45 |
46 | def safe_indexing(X, indices):
47 | """Slice an array or dataframe. This is deprecated in sklearn"""
48 | if hasattr(X, 'iloc'):
49 | return X.iloc[indices]
50 | # numpy:
51 | # TODO: this does not currently support axis 1
52 | if hasattr(X, 'ndim') and X.ndim == 2:
53 | return X[indices, :]
54 | # list or 1d array
55 | return X[indices]
56 |
57 |
58 | def _estimator_has(attr):
59 | """Checks if the model has a given attribute.
60 |
61 | Meant to be used along with `sklearn.utils.metaestimators.available_if`
62 |
63 | Parameters
64 | ----------
65 | attr : str
66 | The attribute to check the calling object for
67 |
68 | Returns
69 | -------
70 | fn : callable
71 | A function that will either raise an `AttributeError` if the attribute
72 | does not exist, or True if it does.
73 | """
74 | def check(self):
75 | # raise original `AttributeError` if `attr` does not exist
76 | getattr(self, attr)
77 | return True
78 |
79 | return check
80 |
81 |
82 | def if_delegate_has_method(attr):
83 | """Compat method to replace `sklearn.utils.metaestimators.if_delegate_has`
84 |
85 | Older versions (< 1.0.0) of sklearn support it, but newer versions use
86 | `available_if` instead.
87 |
88 | References
89 | ----------
90 | .. [1] https://git.io/JzKiv
91 | .. [2] https://git.io/JzKiJ
92 | """
93 | if Version(sklearn.__version__) < Version("1.0.0"):
94 | from sklearn.utils.metaestimators import if_delegate_has_method
95 | return if_delegate_has_method(attr)
96 | else:
97 | from sklearn.utils.metaestimators import available_if
98 | return available_if(_estimator_has(attr))
99 |
--------------------------------------------------------------------------------
/pmdarima/compat/statsmodels.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Handle inconsistencies in the statsmodels API versions
4 |
5 | from collections.abc import Iterable
6 | from packaging.version import Version
7 | import statsmodels as sm
8 |
9 | __all__ = [
10 | 'bind_df_model'
11 | ]
12 |
13 | _sm_version = sm.__version__
14 |
15 |
16 | def bind_df_model(model_fit, arima_results):
17 | """Set model degrees of freedom.
18 |
19 | Older versions of statsmodels don't handle this issue. Sets the
20 | model degrees of freedom in place if not already present.
21 |
22 | Parameters
23 | ----------
24 | model_fit : ARMA, ARIMA or SARIMAX
25 | The fitted model.
26 |
27 | arima_results : ModelResultsWrapper
28 | The results wrapper.
29 | """
30 | if not hasattr(arima_results, 'df_model'):
31 | df_model = model_fit.k_exog + model_fit.k_trend + \
32 | model_fit.k_ar + model_fit.k_ma + \
33 | model_fit.k_seasonal_ar + model_fit.k_seasonal_ma
34 | setattr(arima_results, 'df_model', df_model)
35 |
36 |
37 | def check_seasonal_order(order):
38 | """Check the seasonal order
39 |
40 | Statsmodels 0.11.0 introduced a check for seasonal order == 1 that can
41 | raise a ValueError, but some of our old defaults allow for m == 1 in an
42 | otherwise null seasonal order.
43 |
44 | Parameters
45 | ----------
46 | order : tuple
47 | The existing seasonal order
48 | """
49 |
50 | # If order[0] is an iterable, but not a string then we don't perform check.
51 | # Otherwise we perform the check and override order if it satisfies check.
52 | # See issue#370: https://github.com/alkaline-ml/pmdarima/issues/370
53 | if isinstance(order[0], Iterable) and not isinstance(order[0], str):
54 | return order
55 | else:
56 | if sum(order[:3]) == 0 and order[-1] == 1:
57 | order = (0, 0, 0, 0)
58 |
59 | # user's order may be invalid, but we'll let statsmodels' validation
60 | # handle that.
61 | return order
62 |
63 |
64 | def _use_sm13():
65 | return Version(sm.__version__) >= Version("0.13.0")
66 |
--------------------------------------------------------------------------------
/pmdarima/compat/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/pmdarima/compat/tests/__init__.py
--------------------------------------------------------------------------------
/pmdarima/compat/tests/test_sklearn.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from pmdarima.arima import ARIMA
4 | from pmdarima.compat.pytest import pytest_error_str
5 | from pmdarima.compat import sklearn as sk
6 |
7 | import numpy as np
8 | import pandas as pd
9 | from numpy.testing import assert_array_equal
10 | import pytest
11 |
12 |
13 | @pytest.mark.parametrize(
14 | 'x,i,exp', [
15 | pytest.param(np.array([1, 2, 3, 4, 5]), [0, 1], np.array([1, 2])),
16 | pytest.param(pd.Series([1, 2, 3, 4, 5]), [0, 1], np.array([1, 2])),
17 | pytest.param(np.array([[1, 2], [3, 4]]), [0], np.array([[1, 2]])),
18 | ]
19 | )
20 | def test_safe_indexing(x, i, exp):
21 | res = sk.safe_indexing(x, i)
22 | if hasattr(res, "values"): # pd.Series
23 | res = res.values
24 | assert_array_equal(exp, res)
25 |
26 |
27 | def test_check_is_fitted_error():
28 | with pytest.raises(TypeError) as te:
29 | sk.check_is_fitted(None, None)
30 | assert "attributes must be a string or iterable" in pytest_error_str(te)
31 |
32 |
33 | def test_not_fitted_error():
34 | with pytest.raises(sk.NotFittedError) as nfe:
35 | mod = ARIMA((0, 1, 0))
36 | sk.check_is_fitted(mod, "arima_res_")
37 | assert "Model has not been fit!" in pytest_error_str(nfe)
38 |
--------------------------------------------------------------------------------
/pmdarima/compat/tests/test_statsmodels.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from pmdarima.compat.statsmodels import bind_df_model, check_seasonal_order
4 |
5 |
6 | # Test binding the degrees of freedom to a class in place. It's hard to test
7 | # on a potentially non-existent version of statsmodels, so we have to mock the
8 | # class
9 | def test_bind_df_model():
10 | class ModelFit(object):
11 | k_exog = 2
12 | k_trend = 1
13 | k_ar = 3
14 | k_ma = 2
15 | k_seasonal_ar = 1
16 | k_seasonal_ma = 2
17 |
18 | class ARIMAResults(object):
19 | pass
20 |
21 | fit = ModelFit()
22 | res = ARIMAResults()
23 |
24 | # First, there is no 'df_model' in arima res
25 | assert not hasattr(res, 'df_model')
26 | bind_df_model(fit, res)
27 |
28 | # Now it should
29 | assert hasattr(res, 'df_model')
30 | assert res.df_model == 11, res.df_model
31 |
32 |
33 | def test_check_seasonal_order():
34 | # issue370, using an iterable at position 0 returns
35 | order = ([1, 2, 3, 52], 0, 1, 7)
36 | checked_order = check_seasonal_order(order)
37 | assert order == checked_order
38 |
39 | # Special case where we override the seasonal order that is passed in.
40 | order = (0, 0, 0, 1)
41 | checked_order = check_seasonal_order(order)
42 | assert checked_order == (0, 0, 0, 0)
43 |
--------------------------------------------------------------------------------
/pmdarima/context_managers.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import contextlib
4 |
5 | __all__ = ['except_and_reraise']
6 |
7 |
8 | @contextlib.contextmanager
9 | def except_and_reraise(*except_errs, raise_err=None, raise_msg=None):
10 | """Catch a lower-level error and re-raise with a more meaningful message
11 |
12 | In some cases, Numpy linalg errors can be raised in perplexing spots. This
13 | allows us to catch the lower-level errors in spots where we are aware of
14 | them so that we may raise with a more meaningful message.
15 |
16 | Parameters
17 | ----------
18 | *except_errs : var-args, BaseException
19 | A variable list of exceptions to catch
20 |
21 | raise_err : BaseException, Error
22 | The exception to raise
23 |
24 | raise_msg : str
25 | The message to raise
26 | """
27 | if raise_err is None:
28 | raise TypeError("raise_err must be used as a key-word arg")
29 | if raise_msg is None:
30 | raise TypeError("raise_msg must be used as a key-word arg")
31 |
32 | try:
33 | yield
34 | except except_errs as e:
35 | message = "%s (raised from %s: %s)" \
36 | % (raise_msg,
37 | e.__class__.__name__,
38 | str(e))
39 | raise raise_err(message)
40 |
--------------------------------------------------------------------------------
/pmdarima/datasets/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from .airpassengers import *
3 | from .ausbeer import *
4 | from .austres import *
5 | from .gasoline import *
6 | from .heartrate import *
7 | from .lynx import *
8 | from .stocks import *
9 | from .sunspots import *
10 | from .taylor import *
11 | from .wineind import *
12 | from .woolyrnq import *
13 |
14 | __all__ = [s for s in dir() if not s.startswith("_")]
15 |
--------------------------------------------------------------------------------
/pmdarima/datasets/_base.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import os
4 | from os.path import abspath, dirname, join, expanduser
5 | import numpy as np
6 | import pandas as pd
7 | import urllib3
8 | import tarfile
9 |
10 | from ..compat.numpy import DTYPE
11 |
12 | # caches anything read from disk to avoid re-reads
13 | _cache = {}
14 | http = urllib3.PoolManager()
15 |
16 |
17 | def get_data_path():
18 | """Get the absolute path to the ``data`` directory"""
19 | dataset_dir = abspath(dirname(__file__))
20 | data_dir = join(dataset_dir, 'data')
21 | return data_dir
22 |
23 |
24 | def get_data_cache_path():
25 | """Get the absolute path to where we cache data from the web"""
26 | return abspath(expanduser(join("~", ".pmdarima-data")))
27 |
28 |
29 | def fetch_from_web_or_disk(url, key, cache=True, dtype=DTYPE):
30 | """Fetch a dataset from the web, and save it in the pmdarima cache"""
31 | if key in _cache:
32 | return _cache[key]
33 |
34 | disk_cache_path = get_data_cache_path()
35 |
36 | # don't ask, just tell. avoid race conditions
37 | os.makedirs(disk_cache_path, exist_ok=True)
38 |
39 | # See if it's already there
40 | data_path = join(disk_cache_path, key + '.csv.gz')
41 | if os.path.exists(data_path):
42 | rslt = np.loadtxt(data_path).ravel()
43 |
44 | else:
45 | r = None
46 | rslt = None
47 | try:
48 | r = http.request('GET', url)
49 | # rank 1 because it's a time series
50 | rslt = np.asarray(
51 | r.data.decode('utf-8').split('\n'), dtype=dtype)
52 |
53 | finally:
54 | if rslt is not None:
55 | try:
56 | r.release_conn()
57 | except Exception:
58 | pass
59 |
60 | # if we got here, rslt is good. We need to save it to disk
61 | np.savetxt(fname=data_path, X=rslt)
62 |
63 | # If we get here, we have rslt.
64 | if cache:
65 | _cache[key] = rslt
66 |
67 | return rslt
68 |
69 |
70 | def _load_tarfile(key):
71 | """Internal method for loading a tar file"""
72 | base_path = abspath(dirname(__file__))
73 | file_path = join(base_path, "data", key)
74 | with tarfile.open(file_path, "r:*") as tar:
75 | csv_path = tar.getnames()[0] # there is only one file per tar
76 | return pd.read_csv(tar.extractfile(csv_path), header=0)
77 |
78 |
79 | def load_date_example():
80 | """Loads a nondescript dated example for internal use"""
81 | X = _load_tarfile("dated.tar.gz")
82 | # make sure it's a date time
83 | X['date'] = pd.to_datetime(X['date'])
84 | y = X.pop('y')
85 | return y, X
86 |
--------------------------------------------------------------------------------
/pmdarima/datasets/airpassengers.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import numpy as np
4 | import pandas as pd
5 |
6 | from ..compat import DTYPE
7 |
8 | __all__ = [
9 | 'load_airpassengers'
10 | ]
11 |
12 |
13 | def load_airpassengers(as_series=False, dtype=DTYPE):
14 | """Monthly airline passengers.
15 |
16 | The classic Box & Jenkins airline data. Monthly totals of international
17 | airline passengers, 1949 to 1960.
18 |
19 | Parameters
20 | ----------
21 | as_series : bool, optional (default=False)
22 | Whether to return a Pandas series. If False, will return a 1d
23 | numpy array.
24 |
25 | dtype : type, optional (default=np.float64)
26 | The type to return for the array. Default is np.float64, which is used
27 | throughout the package as the default type.
28 |
29 | Returns
30 | -------
31 | rslt : array-like, shape=(n_samples,)
32 | The time series vector.
33 |
34 | Examples
35 | --------
36 | >>> from pmdarima.datasets import load_airpassengers
37 | >>> load_airpassengers() # doctest: +SKIP
38 | np.array([
39 | 112, 118, 132, 129, 121, 135, 148, 148, 136, 119, 104, 118,
40 | 115, 126, 141, 135, 125, 149, 170, 170, 158, 133, 114, 140,
41 | 145, 150, 178, 163, 172, 178, 199, 199, 184, 162, 146, 166,
42 | 171, 180, 193, 181, 183, 218, 230, 242, 209, 191, 172, 194,
43 | 196, 196, 236, 235, 229, 243, 264, 272, 237, 211, 180, 201,
44 | 204, 188, 235, 227, 234, 264, 302, 293, 259, 229, 203, 229,
45 | 242, 233, 267, 269, 270, 315, 364, 347, 312, 274, 237, 278,
46 | 284, 277, 317, 313, 318, 374, 413, 405, 355, 306, 271, 306,
47 | 315, 301, 356, 348, 355, 422, 465, 467, 404, 347, 305, 336,
48 | 340, 318, 362, 348, 363, 435, 491, 505, 404, 359, 310, 337,
49 | 360, 342, 406, 396, 420, 472, 548, 559, 463, 407, 362, 405,
50 | 417, 391, 419, 461, 472, 535, 622, 606, 508, 461, 390, 432])
51 |
52 | >>> load_airpassengers(True).head()
53 | 0 112.0
54 | 1 118.0
55 | 2 132.0
56 | 3 129.0
57 | 4 121.0
58 | dtype: float64
59 |
60 | Notes
61 | -----
62 | This is monthly data, so *m* should be set to 12 when using in a seasonal
63 | context.
64 |
65 | References
66 | ----------
67 | .. [1] Box, G. E. P., Jenkins, G. M. and Reinsel, G. C. (1976)
68 | "Time Series Analysis, Forecasting and Control. Third Edition."
69 | Holden-Day. Series G.
70 | """
71 | rslt = np.array([
72 | 112, 118, 132, 129, 121, 135, 148, 148, 136, 119, 104, 118,
73 | 115, 126, 141, 135, 125, 149, 170, 170, 158, 133, 114, 140,
74 | 145, 150, 178, 163, 172, 178, 199, 199, 184, 162, 146, 166,
75 | 171, 180, 193, 181, 183, 218, 230, 242, 209, 191, 172, 194,
76 | 196, 196, 236, 235, 229, 243, 264, 272, 237, 211, 180, 201,
77 | 204, 188, 235, 227, 234, 264, 302, 293, 259, 229, 203, 229,
78 | 242, 233, 267, 269, 270, 315, 364, 347, 312, 274, 237, 278,
79 | 284, 277, 317, 313, 318, 374, 413, 405, 355, 306, 271, 306,
80 | 315, 301, 356, 348, 355, 422, 465, 467, 404, 347, 305, 336,
81 | 340, 318, 362, 348, 363, 435, 491, 505, 404, 359, 310, 337,
82 | 360, 342, 406, 396, 420, 472, 548, 559, 463, 407, 362, 405,
83 | 417, 391, 419, 461, 472, 535, 622, 606, 508, 461, 390, 432
84 | ]).astype(dtype)
85 |
86 | if as_series:
87 | return pd.Series(rslt)
88 | return rslt
89 |
--------------------------------------------------------------------------------
/pmdarima/datasets/austres.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import numpy as np
4 | import pandas as pd
5 |
6 | from ..compat import DTYPE
7 |
8 | __all__ = [
9 | 'load_austres'
10 | ]
11 |
12 |
13 | def load_austres(as_series=False, dtype=DTYPE):
14 | """Quarterly residential data.
15 |
16 | Numbers (in thousands) of Australian residents measured quarterly from
17 | March 1971 to March 1994.
18 |
19 | Parameters
20 | ----------
21 | as_series : bool, optional (default=False)
22 | Whether to return a Pandas series. If False, will return a 1d
23 | numpy array.
24 |
25 | dtype : type, optional (default=np.float64)
26 | The type to return for the array. Default is np.float64, which is used
27 | throughout the package as the default type.
28 |
29 | Returns
30 | -------
31 | rslt : array-like, shape=(n_samples,)
32 | The austres vector.
33 |
34 | Examples
35 | --------
36 | >>> from pmdarima.datasets import load_austres
37 | >>> load_austres()
38 | np.array([13067.3, 13130.5, 13198.4, 13254.2, 13303.7, 13353.9,
39 | 13409.3, 13459.2, 13504.5, 13552.6, 13614.3, 13669.5,
40 | 13722.6, 13772.1, 13832.0, 13862.6, 13893.0, 13926.8,
41 | 13968.9, 14004.7, 14033.1, 14066.0, 14110.1, 14155.6,
42 | 14192.2, 14231.7, 14281.5, 14330.3, 14359.3, 14396.6,
43 | 14430.8, 14478.4, 14515.7, 14554.9, 14602.5, 14646.4,
44 | 14695.4, 14746.6, 14807.4, 14874.4, 14923.3, 14988.7,
45 | 15054.1, 15121.7, 15184.2, 15239.3, 15288.9, 15346.2,
46 | 15393.5, 15439.0, 15483.5, 15531.5, 15579.4, 15628.5,
47 | 15677.3, 15736.7, 15788.3, 15839.7, 15900.6, 15961.5,
48 | 16018.3, 16076.9, 16139.0, 16203.0, 16263.3, 16327.9,
49 | 16398.9, 16478.3, 16538.2, 16621.6, 16697.0, 16777.2,
50 | 16833.1, 16891.6, 16956.8, 17026.3, 17085.4, 17106.9,
51 | 17169.4, 17239.4, 17292.0, 17354.2, 17414.2, 17447.3,
52 | 17482.6, 17526.0, 17568.7, 17627.1, 17661.5])
53 |
54 | >>> load_austres(True).head()
55 | 0 13067.3
56 | 1 13130.5
57 | 2 13198.4
58 | 3 13254.2
59 | 4 13303.7
60 | dtype: float64
61 |
62 | Notes
63 | -----
64 | This is quarterly data, so *m* should be set to 4 when using in a seasonal
65 | context.
66 |
67 | References
68 | ----------
69 | .. [1] P. J. Brockwell and R. A. Davis (1996)
70 | "Introduction to Time Series and Forecasting." Springer
71 | """
72 | rslt = np.array([
73 | 13067.3, 13130.5, 13198.4, 13254.2, 13303.7, 13353.9,
74 | 13409.3, 13459.2, 13504.5, 13552.6, 13614.3, 13669.5,
75 | 13722.6, 13772.1, 13832.0, 13862.6, 13893.0, 13926.8,
76 | 13968.9, 14004.7, 14033.1, 14066.0, 14110.1, 14155.6,
77 | 14192.2, 14231.7, 14281.5, 14330.3, 14359.3, 14396.6,
78 | 14430.8, 14478.4, 14515.7, 14554.9, 14602.5, 14646.4,
79 | 14695.4, 14746.6, 14807.4, 14874.4, 14923.3, 14988.7,
80 | 15054.1, 15121.7, 15184.2, 15239.3, 15288.9, 15346.2,
81 | 15393.5, 15439.0, 15483.5, 15531.5, 15579.4, 15628.5,
82 | 15677.3, 15736.7, 15788.3, 15839.7, 15900.6, 15961.5,
83 | 16018.3, 16076.9, 16139.0, 16203.0, 16263.3, 16327.9,
84 | 16398.9, 16478.3, 16538.2, 16621.6, 16697.0, 16777.2,
85 | 16833.1, 16891.6, 16956.8, 17026.3, 17085.4, 17106.9,
86 | 17169.4, 17239.4, 17292.0, 17354.2, 17414.2, 17447.3,
87 | 17482.6, 17526.0, 17568.7, 17627.1, 17661.5]).astype(dtype)
88 |
89 | if as_series:
90 | return pd.Series(rslt)
91 | return rslt
92 |
--------------------------------------------------------------------------------
/pmdarima/datasets/data/dated.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/pmdarima/datasets/data/dated.tar.gz
--------------------------------------------------------------------------------
/pmdarima/datasets/data/msft.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/pmdarima/datasets/data/msft.tar.gz
--------------------------------------------------------------------------------
/pmdarima/datasets/data/sunspots.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/pmdarima/datasets/data/sunspots.txt.gz
--------------------------------------------------------------------------------
/pmdarima/datasets/gasoline.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import pandas as pd
4 |
5 | from ..compat.numpy import DTYPE
6 | from ._base import fetch_from_web_or_disk
7 |
8 | __all__ = [
9 | 'load_gasoline'
10 | ]
11 |
12 | url = 'http://alkaline-ml.com/datasets/gasoline.csv'
13 |
14 |
15 | def load_gasoline(as_series=False, dtype=DTYPE):
16 | """Weekly US finished motor gasoline products
17 |
18 | A weekly time series of US finished motor gasoline products supplied (in
19 | thousands of barrels per day) from February 1991 to May 2005.
20 |
21 | Parameters
22 | ----------
23 | as_series : bool, optional (default=False)
24 | Whether to return a Pandas series. If True, the index will be set to
25 | the observed years/months. If False, will return a 1d numpy array.
26 |
27 | dtype : type, optional (default=np.float64)
28 | The type to return for the array. Default is np.float64, which is used
29 | throughout the package as the default type.
30 |
31 | Notes
32 | -----
33 | The seasonal periodicity of this example is rather difficult, since it's
34 | not an integer. To be exact, the periodicity is ``365.25 / 7``
35 | (~=52.1785714285714). To fit the best possible model to this data, you'll
36 | need to explore using exogenous features
37 |
38 | See Also
39 | --------
40 | :class:`pmdarima.preprocessing.exog.FourierFeaturizer`
41 |
42 | Examples
43 | --------
44 | >>> from pmdarima.datasets import load_gasoline
45 | >>> load_gasoline()
46 | array([6621. , 6433. , 6582. , ..., 9024. , 9175. , 9269. ])
47 |
48 | >>> load_gasoline(True).head()
49 | 0 6621.0
50 | 1 6433.0
51 | 2 6582.0
52 | 3 7224.0
53 | 4 6875.0
54 | dtype: float64
55 |
56 | References
57 | ----------
58 | .. [1] http://www.eia.gov/dnav/pet/hist/LeafHandler.ashx?n=PET&s=wgfupus2&f=W
59 | .. [2] https://robjhyndman.com/hyndsight/forecasting-weekly-data/
60 |
61 | Returns
62 | -------
63 | rslt : array-like, shape=(n_samples,)
64 | The gasoline dataset. There are 745 examples.
65 | """ # noqa
66 | rslt = fetch_from_web_or_disk(url, 'gasoline', cache=True).astype(dtype)
67 | if not as_series:
68 | return rslt
69 |
70 | return pd.Series(rslt)
71 |
--------------------------------------------------------------------------------
/pmdarima/datasets/lynx.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Author: Taylor Smith
4 | #
5 | # This is the lynx dataset found in R.
6 |
7 | import numpy as np
8 | import pandas as pd
9 |
10 | from ..compat import DTYPE
11 |
12 | __all__ = [
13 | 'load_lynx'
14 | ]
15 |
16 |
17 | def load_lynx(as_series=False, dtype=DTYPE):
18 | """Annual numbers of lynx trappings for 1821–1934 in Canada.
19 |
20 | This time-series records the number of skins of predators (lynx) that were
21 | collected over several years by the Hudson's Bay Company. The dataset was
22 | taken from Brockwell & Davis (1991) and appears to be the series
23 | considered by Campbell & Walker (1977).
24 |
25 | Parameters
26 | ----------
27 | as_series : bool, optional (default=False)
28 | Whether to return a Pandas series. If True, the index will be set to
29 | the observed years. If False, will return a 1d numpy array.
30 |
31 | dtype : type, optional (default=np.float64)
32 | The type to return for the array. Default is np.float64, which is used
33 | throughout the package as the default type.
34 |
35 | Examples
36 | --------
37 | >>> from pmdarima.datasets import load_lynx
38 | >>> load_lynx()
39 | array([ 269, 321, 585, 871, 1475, 2821, 3928, 5943, 4950, 2577, 523,
40 | 98, 184, 279, 409, 2285, 2685, 3409, 1824, 409, 151, 45,
41 | 68, 213, 546, 1033, 2129, 2536, 957, 361, 377, 225, 360,
42 | 731, 1638, 2725, 2871, 2119, 684, 299, 236, 245, 552, 1623,
43 | 3311, 6721, 4254, 687, 255, 473, 358, 784, 1594, 1676, 2251,
44 | 1426, 756, 299, 201, 229, 469, 736, 2042, 2811, 4431, 2511,
45 | 389, 73, 39, 49, 59, 188, 377, 1292, 4031, 3495, 587,
46 | 105, 153, 387, 758, 1307, 3465, 6991, 6313, 3794, 1836, 345,
47 | 382, 808, 1388, 2713, 3800, 3091, 2985, 3790, 674, 81, 80,
48 | 108, 229, 399, 1132, 2432, 3574, 2935, 1537, 529, 485, 662,
49 | 1000, 1590, 2657, 3396])
50 |
51 | >>> load_lynx(True).head()
52 | 1821 269
53 | 1822 321
54 | 1823 585
55 | 1824 871
56 | 1825 1475
57 | dtype: int64
58 |
59 | Notes
60 | -----
61 | This is annual data and not seasonal in nature (i.e., :math:`m=1`)
62 |
63 | References
64 | ----------
65 | .. [1] Brockwell, P. J. and Davis, R. A. (1991)
66 | Time Series and Forecasting Methods. Second edition.
67 | Springer. Series G (page 557).
68 |
69 | .. [2] https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/lynx.html
70 |
71 | Returns
72 | -------
73 | lynx : array-like, shape=(n_samples,)
74 | The lynx dataset. There are 114 observations.
75 | """ # noqa: E501
76 | rslt = np.array([269, 321, 585, 871, 1475, 2821, 3928, 5943, 4950,
77 | 2577, 523, 98, 184, 279, 409, 2285, 2685, 3409,
78 | 1824, 409, 151, 45, 68, 213, 546, 1033, 2129,
79 | 2536, 957, 361, 377, 225, 360, 731, 1638, 2725,
80 | 2871, 2119, 684, 299, 236, 245, 552, 1623, 3311,
81 | 6721, 4254, 687, 255, 473, 358, 784, 1594, 1676,
82 | 2251, 1426, 756, 299, 201, 229, 469, 736, 2042,
83 | 2811, 4431, 2511, 389, 73, 39, 49, 59, 188,
84 | 377, 1292, 4031, 3495, 587, 105, 153, 387, 758,
85 | 1307, 3465, 6991, 6313, 3794, 1836, 345, 382, 808,
86 | 1388, 2713, 3800, 3091, 2985, 3790, 674, 81, 80,
87 | 108, 229, 399, 1132, 2432, 3574, 2935, 1537, 529,
88 | 485, 662, 1000, 1590, 2657, 3396]).astype(dtype)
89 |
90 | # Set the index if necessary
91 | if as_series:
92 | return pd.Series(rslt, index=range(1821, 1935))
93 | return rslt
94 |
--------------------------------------------------------------------------------
/pmdarima/datasets/setup.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 |
4 | def configuration(parent_package='', top_path=None):
5 | from numpy.distutils.misc_util import Configuration
6 |
7 | config = Configuration('datasets', parent_package, top_path)
8 | config.add_data_dir('data')
9 | config.add_subpackage('tests')
10 | return config
11 |
12 |
13 | if __name__ == '__main__':
14 | from numpy.distutils.core import setup
15 | setup(**configuration(top_path='').todict())
16 |
--------------------------------------------------------------------------------
/pmdarima/datasets/stocks.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from ._base import _load_tarfile
4 |
5 | __all__ = ['load_msft']
6 |
7 |
8 | def load_msft():
9 | """Load the microsoft stock data
10 |
11 | Financial data for the MSFT stock between the dates of Mar 13, 1986 and
12 | Nov 10, 2017. This data is part of the Kaggle stock dataset [1]. Features
13 | are as follows:
14 |
15 | * Date : datetime
16 | * Open : float32
17 | * High : float32
18 | * Low : float32
19 | * Close : float32
20 | * Volume : long
21 | * OpenInt : int
22 |
23 | References
24 | ----------
25 | .. [1] https://www.kaggle.com/borismarjanovic/price-volume-data-for-all-us-stocks-etfs
26 |
27 | Returns
28 | -------
29 | df : pd.DataFrame, shape=(7983, 7)
30 | A dataframe of endog and exog values.
31 | """ # noqa:E501
32 | return _load_tarfile("msft.tar.gz")
33 |
--------------------------------------------------------------------------------
/pmdarima/datasets/sunspots.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Author: Taylor Smith
4 | #
5 | # This is the sunspots dataset found in R.
6 |
7 | import numpy as np
8 | import pandas as pd
9 |
10 | from os.path import join
11 | import calendar
12 |
13 | from ..compat import DTYPE
14 | from . import _base as base
15 |
16 | __all__ = [
17 | 'load_sunspots'
18 | ]
19 |
20 |
21 | def load_sunspots(as_series=False, dtype=DTYPE):
22 | """Monthly Sunspot Numbers, 1749 - 1983
23 |
24 | Monthly mean relative sunspot numbers from 1749 to 1983. Collected at Swiss
25 | Federal Observatory, Zurich until 1960, then Tokyo Astronomical
26 | Observatory.
27 |
28 | Parameters
29 | ----------
30 | as_series : bool, optional (default=False)
31 | Whether to return a Pandas series. If True, the index will be set to
32 | the observed years/months. If False, will return a 1d numpy array.
33 |
34 | dtype : type, optional (default=np.float64)
35 | The type to return for the array. Default is np.float64, which is used
36 | throughout the package as the default type.
37 |
38 | Notes
39 | -----
40 | This is monthly data, so *m* should be set to 12 when using in a seasonal
41 | context.
42 |
43 | Examples
44 | --------
45 | >>> from pmdarima.datasets import load_sunspots
46 | >>> load_sunspots()
47 | array([58. , 62.6, 70. , ..., 55.8, 33.3, 33.4])
48 |
49 | >>> load_sunspots(True).head()
50 | Jan 1749 58.0
51 | Feb 1749 62.6
52 | Mar 1749 70.0
53 | Apr 1749 55.7
54 | May 1749 85.0
55 | dtype: float64
56 |
57 | References
58 | ----------
59 | .. [1] https://www.rdocumentation.org/packages/datasets/versions/3.6.1/topics/sunspots
60 |
61 | Returns
62 | -------
63 | rslt : array-like, shape=(n_samples,)
64 | The sunspots dataset. There are 2820 observations.
65 | """ # noqa: E501
66 | rslt = base._cache.get('sunspots', None)
67 | if rslt is None:
68 | data_path = join(base.get_data_path(), 'sunspots.txt.gz')
69 | rslt = np.loadtxt(data_path).ravel()
70 | base._cache['sunspots'] = rslt
71 |
72 | # don't want to cache type conversion
73 | rslt = rslt.astype(dtype)
74 |
75 | if not as_series:
76 | return rslt
77 |
78 | # Otherwise we want a series and have to cleverly create the index
79 | index = [
80 | "%s %i" % (calendar.month_abbr[i + 1], year)
81 | for year in range(1749, 1984)
82 | for i in range(12)
83 | ]
84 |
85 | return pd.Series(rslt, index=index)
86 |
--------------------------------------------------------------------------------
/pmdarima/datasets/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/pmdarima/datasets/tests/__init__.py
--------------------------------------------------------------------------------
/pmdarima/datasets/tests/test_load_datasets.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from pmdarima.datasets import load_heartrate, load_lynx, load_wineind, \
4 | load_woolyrnq, load_ausbeer, load_austres, load_gasoline, \
5 | load_airpassengers, load_taylor, load_msft, load_sunspots, _base as base
6 |
7 | import numpy as np
8 | import pandas as pd
9 | import os
10 | import shutil
11 |
12 | from numpy.testing import assert_array_equal
13 | import pytest
14 |
15 |
16 | def _inner_load(f):
17 | n = None
18 | for as_series in (True, False):
19 | x = f(as_series=as_series)
20 |
21 | # ensure shape is same for both
22 | if n is None:
23 | n = x.shape[0]
24 | else:
25 | assert x.shape[0] == n
26 |
27 | if as_series:
28 | assert isinstance(x, pd.Series)
29 | else:
30 | assert isinstance(x, np.ndarray)
31 |
32 |
33 | # Simply test loading the datasets and that we get the expected type
34 | @pytest.mark.parametrize(
35 | 'f', [load_heartrate,
36 | load_lynx,
37 | load_wineind,
38 | load_woolyrnq,
39 | load_ausbeer,
40 | load_austres,
41 | load_taylor,
42 | load_airpassengers])
43 | def test_load(f):
44 | _inner_load(f)
45 |
46 |
47 | @pytest.mark.parametrize(
48 | 'f', [load_msft])
49 | def test_df_loads(f):
50 | df = f()
51 | assert isinstance(df, pd.DataFrame)
52 |
53 |
54 | @pytest.mark.parametrize(
55 | 'f, cache_name', [
56 | pytest.param(load_sunspots, 'sunspots'),
57 | ])
58 | def test_load_from_gzip(f, cache_name):
59 | _inner_load(f)
60 | assert cache_name in base._cache
61 |
62 |
63 | @pytest.mark.parametrize(
64 | 'func, key', [
65 | pytest.param(load_gasoline, 'gasoline'),
66 | ]
67 | )
68 | def test_load_from_web(func, key):
69 | # make sure there is no data folder
70 | disk_cache_folder = base.get_data_cache_path()
71 | if os.path.exists(disk_cache_folder):
72 | shutil.rmtree(disk_cache_folder)
73 |
74 | try:
75 | # loads from web
76 | y = func(as_series=False)
77 |
78 | # show the key is in _cache
79 | assert key in base._cache
80 |
81 | # show exists on disk
82 | assert os.path.exists(os.path.join(disk_cache_folder, key + '.csv.gz'))
83 |
84 | # pop from cache so we can load it from disk
85 | base._cache.pop(key)
86 | x = func(as_series=True) # true for coverage
87 |
88 | assert_array_equal(y, x.values)
89 |
90 | finally:
91 | if os.path.exists(disk_cache_folder):
92 | shutil.rmtree(disk_cache_folder)
93 |
--------------------------------------------------------------------------------
/pmdarima/decorators.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import functools
4 | import warnings
5 |
6 | __all__ = ['deprecated']
7 |
8 |
9 | def deprecated(use_instead, notes=None):
10 | """Mark functions as deprecated.
11 |
12 | This decorator will result in a warning being emitted when the decorated
13 | function is used.
14 |
15 | Parameters
16 | ----------
17 | use_instead : str
18 | The name of the function to use instead.
19 |
20 | notes : str, optional (default=None)
21 | Additional notes to add to the warning message.
22 | """
23 | if notes is None:
24 | notes = ""
25 | else:
26 | notes = " " + notes
27 |
28 | def wrapped_func(func):
29 | @functools.wraps(func)
30 | def _inner(*args, **kwargs):
31 | warnings.simplefilter('always', DeprecationWarning) # un-filter
32 | msg = ("{0} is deprecated and will be removed in a future "
33 | "release of pmdarima. Use {1} instead.{2}"
34 | .format(func.__name__, use_instead, notes))
35 |
36 | warnings.warn(
37 | msg,
38 | category=DeprecationWarning,
39 | stacklevel=2)
40 | warnings.simplefilter('default', DeprecationWarning) # re-filter
41 | return func(*args, **kwargs)
42 | return _inner
43 | return wrapped_func
44 |
--------------------------------------------------------------------------------
/pmdarima/metrics.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from .utils import check_endog
4 | import numpy as np
5 |
6 | __all__ = ['smape']
7 |
8 |
9 | def smape(y_true, y_pred):
10 | r"""Compute the Symmetric Mean Absolute Percentage Error.
11 |
12 | The symmetric mean absolute percentage error (SMAPE) is an accuracy measure
13 | based on percentage (or relative) errors. Defined as follows:
14 |
15 | :math:`\frac{100\%}{n}\sum_{t=1}^{n}{\frac{|F_{t}-A_{t}|}{
16 | (|A_{t}|+|F_{t}|)/2}}`
17 |
18 | Where a perfect SMAPE score is 0.0, and a higher score indicates a higher
19 | error rate.
20 |
21 | Parameters
22 | ----------
23 | y_true : array-like, shape=(n_samples,)
24 | The true test values of y.
25 |
26 | y_pred : array-like, shape=(n_samples,)
27 | The forecasted values of y.
28 |
29 | Examples
30 | --------
31 | A typical case:
32 | >>> import numpy as np
33 | >>> y_true = np.array([0.07533, 0.07533, 0.07533, 0.07533,
34 | ... 0.07533, 0.07533, 0.0672, 0.0672])
35 | >>> y_pred = np.array([0.102, 0.107, 0.047, 0.1,
36 | ... 0.032, 0.047, 0.108, 0.089])
37 | >>> smape(y_true, y_pred)
38 | 42.60306631890196
39 |
40 | A perfect score:
41 | >>> smape(y_true, y_true)
42 | 0.0
43 |
44 | References
45 | ----------
46 | .. [1] https://en.wikipedia.org/wiki/Symmetric_mean_absolute_percentage_error
47 | """ # noqa: E501
48 | y_true = check_endog(
49 | y_true,
50 | copy=False,
51 | preserve_series=False,
52 | )
53 | y_pred = check_endog(
54 | y_pred,
55 | copy=False,
56 | preserve_series=False,
57 | )
58 | abs_diff = np.abs(y_pred - y_true)
59 | return np.mean((abs_diff * 200 / (np.abs(y_pred) + np.abs(y_true))))
60 |
--------------------------------------------------------------------------------
/pmdarima/model_selection/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from ._split import *
4 | from ._validation import *
5 |
6 | __all__ = [s for s in dir() if not s.startswith("_")]
7 |
--------------------------------------------------------------------------------
/pmdarima/model_selection/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/pmdarima/model_selection/tests/__init__.py
--------------------------------------------------------------------------------
/pmdarima/preprocessing/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from .base import *
4 | from .endog import *
5 | from .exog import *
6 |
7 | __all__ = [s for s in dir() if not s.startswith("_")]
8 |
--------------------------------------------------------------------------------
/pmdarima/preprocessing/endog/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from .boxcox import *
4 | from .log import *
5 |
6 | # don't want to accidentally hoist `base` to top-level, since preprocessing has
7 | # its own base
8 | __all__ = [s for s in dir() if not (s.startswith("_") or s == 'base')]
9 |
--------------------------------------------------------------------------------
/pmdarima/preprocessing/endog/base.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import abc
4 |
5 | from ..base import BaseTransformer
6 |
7 |
8 | class BaseEndogTransformer(BaseTransformer, metaclass=abc.ABCMeta):
9 | """A base class for endogenous array transformers"""
10 |
11 | def _check_y_X(self, y, X):
12 | """Check the endog and exog arrays"""
13 | y, X = super(BaseEndogTransformer, self)._check_y_X(y, X)
14 | if y is None:
15 | raise ValueError("y must be non-None for endogenous transformers")
16 | return y, X
17 |
18 | @abc.abstractmethod
19 | def inverse_transform(self, y, X=None):
20 | """Inverse transform a transformed array
21 |
22 | Inverse the transformation on the transformed array.
23 |
24 | Parameters
25 | ----------
26 | y : array-like or None, shape=(n_samples,)
27 | The transformed endogenous (time-series) array.
28 |
29 | X : array-like or None, shape=(n_samples, n_features), optional
30 | The exogenous array of additional covariates. Not used for
31 | endogenous transformers. Default is None, and non-None values will
32 | serve as pass-through arrays.
33 |
34 | Returns
35 | -------
36 | y : array-like or None
37 | The inverse-transformed y array
38 |
39 | X : array-like or None
40 | The inverse-transformed exogenous array
41 | """
42 |
--------------------------------------------------------------------------------
/pmdarima/preprocessing/endog/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/pmdarima/preprocessing/endog/tests/__init__.py
--------------------------------------------------------------------------------
/pmdarima/preprocessing/endog/tests/test_base.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import pytest
4 | from pmdarima.compat.pytest import pytest_error_str
5 | from pmdarima.preprocessing.endog import LogEndogTransformer
6 |
7 |
8 | def test_value_error_on_check():
9 | trans = LogEndogTransformer() # could be anything, just need an instance
10 | with pytest.raises(ValueError) as ve:
11 | trans._check_y_X(None, None)
12 | assert 'non-None' in pytest_error_str(ve)
13 |
--------------------------------------------------------------------------------
/pmdarima/preprocessing/endog/tests/test_boxcox.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import numpy as np
4 | from numpy.testing import assert_array_almost_equal
5 | from scipy import stats
6 | import pytest
7 |
8 | from pmdarima.compat.pytest import pytest_error_str
9 | from pmdarima.preprocessing import BoxCoxEndogTransformer
10 |
11 | loggamma = stats.loggamma.rvs(5, size=500) + 5
12 |
13 |
14 | @pytest.mark.parametrize(
15 | 'X', [
16 | None,
17 | np.random.rand(loggamma.shape[0], 3),
18 | ]
19 | )
20 | def test_invertible(X):
21 | trans = BoxCoxEndogTransformer()
22 | y_t, e_t = trans.fit_transform(loggamma, X)
23 | y_prime, e_prime = trans.inverse_transform(y_t, X=e_t)
24 |
25 | assert_array_almost_equal(loggamma, y_prime)
26 |
27 | # X should all be the same too
28 | if X is None:
29 | assert X is e_t is e_prime is None
30 | else:
31 | assert_array_almost_equal(X, e_t)
32 | assert_array_almost_equal(X, e_prime)
33 |
34 |
35 | def test_invertible_when_lambda_is_0():
36 | y = [1, 2, 3]
37 | trans = BoxCoxEndogTransformer(lmbda=0.)
38 | y_t, _ = trans.fit_transform(y)
39 | y_prime, _ = trans.inverse_transform(y_t)
40 | assert_array_almost_equal(y, y_prime)
41 |
42 |
43 | def test_value_error_on_neg_lambda():
44 | trans = BoxCoxEndogTransformer(lmbda2=-4.)
45 | with pytest.raises(ValueError) as ve:
46 | trans.fit_transform([1, 2, 3])
47 | assert 'lmbda2 must be a non-negative' in pytest_error_str(ve)
48 |
49 |
50 | class TestNonInvertibleBC:
51 | y = [-1., 0., 1.]
52 |
53 | def test_expected_error(self):
54 | y = self.y
55 | trans = BoxCoxEndogTransformer(lmbda=2.)
56 | with pytest.raises(ValueError):
57 | trans.fit_transform(y)
58 |
59 | def test_expected_warning(self):
60 | y = self.y
61 | trans = BoxCoxEndogTransformer(lmbda=2., neg_action="warn")
62 | with pytest.warns(UserWarning):
63 | y_t, _ = trans.fit_transform(y)
64 |
65 | # When we invert, it will not be the same
66 | y_prime, _ = trans.inverse_transform(y_t)
67 | assert not np.allclose(y_prime, y)
68 |
69 | def test_no_warning_on_ignore(self):
70 | y = self.y
71 | trans = BoxCoxEndogTransformer(lmbda=2., neg_action="ignore")
72 | y_t, _ = trans.fit_transform(y)
73 |
74 | # When we invert, it will not be the same
75 | y_prime, _ = trans.inverse_transform(y_t)
76 | assert not np.allclose(y_prime, y)
77 |
78 | def test_invertible_when_lam2(self):
79 | y = self.y
80 | trans = BoxCoxEndogTransformer(lmbda=2., lmbda2=2.)
81 | y_t, _ = trans.fit_transform(y)
82 |
83 | # When we invert, it will not be the same
84 | y_prime, _ = trans.inverse_transform(y_t)
85 | assert_array_almost_equal(y, y_prime)
86 |
--------------------------------------------------------------------------------
/pmdarima/preprocessing/endog/tests/test_log.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from numpy.testing import assert_array_almost_equal
4 | from sklearn.base import clone
5 |
6 | from pmdarima.preprocessing import LogEndogTransformer
7 | from pmdarima.preprocessing import BoxCoxEndogTransformer
8 |
9 |
10 | def test_same():
11 | y = [1, 2, 3]
12 | trans = BoxCoxEndogTransformer(lmbda=0)
13 | log_trans = LogEndogTransformer()
14 | y_t, _ = trans.fit_transform(y)
15 | log_y_t, _ = log_trans.fit_transform(y)
16 | assert_array_almost_equal(log_y_t, y_t)
17 |
18 |
19 | def test_invertible():
20 | y = [1, 2, 3]
21 | trans = LogEndogTransformer()
22 | y_t, _ = trans.fit_transform(y)
23 | y_prime, _ = trans.inverse_transform(y_t)
24 | assert_array_almost_equal(y, y_prime)
25 |
26 |
27 | def test_log_clone_issue_407():
28 | # https://github.com/alkaline-ml/pmdarima/issues/407
29 | log = LogEndogTransformer(lmbda=10)
30 | res, _ = log.fit_transform([0, 10])
31 |
32 | # we swap lmbda2 and lmbda internally
33 | assert log.lmbda2 == 10
34 | assert log.lmbda == 0
35 |
36 | log2 = clone(log)
37 | assert log2.lmbda2 == 10
38 | assert log2.lmbda == 0
39 | res2, _ = log2.fit_transform([0, 10])
40 |
41 | assert_array_almost_equal(res, res2)
42 |
--------------------------------------------------------------------------------
/pmdarima/preprocessing/exog/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from .dates import *
4 | from .fourier import *
5 |
6 | # don't want to accidentally hoist `base` to top-level, since preprocessing has
7 | # its own base
8 | __all__ = [s for s in dir() if not (s.startswith("_") or s == 'base')]
9 |
--------------------------------------------------------------------------------
/pmdarima/preprocessing/exog/_fourier.pyx:
--------------------------------------------------------------------------------
1 | #cython: boundscheck=False
2 | #cython: cdivision=True
3 | #cython: wraparound=False
4 | #cython: nonecheck=False
5 | #cython: language_level=3
6 | #
7 | # Author: Taylor G Smith
8 |
9 | import numpy as np
10 |
11 | from cython.view cimport array as cvarray
12 | from libc.math cimport sin, cos, M_PI
13 | cimport numpy as np
14 | cimport cython
15 |
16 | ctypedef float [:, :] float_array_2d_t
17 | ctypedef double [:, :] double_array_2d_t
18 |
19 | ctypedef np.npy_intp INTP
20 |
21 | np.import_array()
22 |
23 |
24 | cpdef double[:, :] C_fourier_terms(double[:] p, double[:] times):
25 | cdef INTP i, j, k = p.shape[0], n = times.shape[0], m
26 | cdef float v
27 |
28 | cdef double [:, :] X = cvarray(shape=(k * 2, n),
29 | itemsize=sizeof(double),
30 | format="d") # d for double and also DUH
31 |
32 | with nogil:
33 | j = 0
34 | for i in range(0, k * 2, 2):
35 |
36 | # 2 * p[j] * times * PI
37 | v = p[j] * 2 * M_PI
38 |
39 | for m in range(n):
40 | X[i, m] = sin(v * times[m])
41 | X[i + 1, m] = cos(v * times[m])
42 |
43 | j += 1
44 |
45 | return X
46 |
--------------------------------------------------------------------------------
/pmdarima/preprocessing/exog/base.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import pandas as pd
4 | import numpy as np
5 | import abc
6 |
7 | from ..base import BaseTransformer
8 |
9 |
10 | class BaseExogTransformer(BaseTransformer, metaclass=abc.ABCMeta):
11 | """A base class for exogenous array transformers"""
12 |
13 | def _check_y_X(self, y, X, null_allowed=False):
14 | """Check the endog and exog arrays"""
15 | y, X = super(BaseExogTransformer, self)._check_y_X(y, X)
16 | if X is None and not null_allowed:
17 | raise ValueError("X must be non-None for exog transformers")
18 | return y, X
19 |
20 |
21 | class BaseExogFeaturizer(BaseExogTransformer, metaclass=abc.ABCMeta):
22 | """Transformers that create new exog features from the endog or exog array
23 |
24 | Parameters
25 | ----------
26 | prefix : str or None, optional (default=None)
27 | The feature prefix
28 | """
29 | def __init__(self, prefix=None):
30 | self.prefix = prefix
31 |
32 | @abc.abstractmethod
33 | def _get_prefix(self):
34 | """Get the feature prefix for when exog is a pd.DataFrame"""
35 |
36 | def _get_feature_names(self, X):
37 | pfx = self._get_prefix()
38 | return ['%s_%i' % (pfx, i) for i in range(X.shape[1])]
39 |
40 | def _safe_hstack(self, X, features):
41 | """H-stack dataframes or np.ndarrays"""
42 | if X is None or isinstance(X, pd.DataFrame):
43 | # the features we're adding may be np.ndarray
44 | if not isinstance(features, pd.DataFrame):
45 | features = pd.DataFrame.from_records(features)
46 |
47 | # subclass may override this
48 | features.columns = self._get_feature_names(features)
49 |
50 | if X is not None:
51 | # ignore_index will remove names, which is a stupid quirk
52 | # of pandas... so manually reset the indices
53 | # https://stackoverflow.com/a/43406062/3015734
54 | X.index = features.index = np.arange(X.shape[0])
55 | return pd.concat([X, features], axis=1)
56 | # if X was None coming in, we'd still like to favor a pd.DF
57 | return features
58 |
59 | return np.hstack([X, features])
60 |
61 | def transform(self, y, X=None, n_periods=0, **kwargs):
62 | """Transform the new array
63 |
64 | Apply the transformation to the array after learning the training set's
65 | characteristics in the ``fit`` method. The transform method for
66 | featurizers behaves slightly differently in that the ``n_periods` may
67 | be required to extrapolate for periods in the future.
68 |
69 | Parameters
70 | ----------
71 | y : array-like or None, shape=(n_samples,)
72 | The endogenous (time-series) array.
73 |
74 | X : array-like or None, shape=(n_samples, n_features)
75 | An array of additional covariates.
76 |
77 | n_periods : int, optional (default=0)
78 | The number of periods in the future to forecast. If ``n_periods``
79 | is 0, will compute the features for the training set.
80 | ``n_periods`` corresponds to the number of samples that will be
81 | returned.
82 |
83 | **kwargs : keyword args
84 | Keyword arguments required by the transform function.
85 |
86 | Returns
87 | -------
88 | y : array-like or None
89 | The transformed y array
90 |
91 | X : array-like or None
92 | The transformed X array
93 | """
94 |
--------------------------------------------------------------------------------
/pmdarima/preprocessing/exog/setup.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import os
4 |
5 | import numpy
6 | from numpy.distutils.misc_util import Configuration
7 |
8 | from pmdarima._build_utils import get_blas_info
9 |
10 |
11 | def configuration(parent_package="", top_path=None):
12 | cblas_libs, blas_info = get_blas_info()
13 |
14 | # Use this rather than cblas_libs so we don't fail on Windows
15 | libraries = []
16 | if os.name == 'posix':
17 | cblas_libs.append('m')
18 | libraries.append('m')
19 |
20 | config = Configuration("exog", parent_package, top_path)
21 | config.add_extension("_fourier",
22 | sources=["_fourier.pyx"],
23 | include_dirs=[numpy.get_include(),
24 | blas_info.pop('include_dirs', [])],
25 | libraries=libraries,
26 | extra_compile_args=blas_info.pop(
27 | 'extra_compile_args', []),
28 | **blas_info)
29 |
30 | config.add_subpackage('tests')
31 |
32 | return config
33 |
34 |
35 | if __name__ == "__main__":
36 | from numpy.distutils.core import setup
37 | setup(**configuration().todict())
38 |
--------------------------------------------------------------------------------
/pmdarima/preprocessing/exog/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
--------------------------------------------------------------------------------
/pmdarima/preprocessing/exog/tests/test_base.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from pmdarima.preprocessing.exog import base
4 | from pmdarima import datasets
5 | import numpy as np
6 | import pandas as pd
7 |
8 | wineind = datasets.load_wineind()
9 |
10 |
11 | class RandomExogFeaturizer(base.BaseExogFeaturizer):
12 | """Creates random exog features. This is just used to test base func"""
13 |
14 | def _get_prefix(self):
15 | return "RND"
16 |
17 | def fit(self, y, X, **_):
18 | return self
19 |
20 | def transform(self, y, X=None, n_periods=0, **_):
21 | Xt = np.random.rand(y.shape[0], 4)
22 | Xt = self._safe_hstack(X, Xt)
23 | return y, Xt
24 |
25 |
26 | def test_default_get_feature_names():
27 | feat = RandomExogFeaturizer()
28 | y_trans, X = feat.fit_transform(wineind)
29 | assert y_trans is wineind
30 | assert X.columns.tolist() == \
31 | ['RND_0', 'RND_1', 'RND_2', 'RND_3']
32 |
33 |
34 | def test_default_get_feature_names_with_X():
35 | feat = RandomExogFeaturizer()
36 | X = pd.DataFrame.from_records(
37 | np.random.rand(wineind.shape[0], 2), columns=['a', 'b'])
38 | y_trans, X_trans = feat.fit_transform(wineind, X)
39 | assert y_trans is wineind
40 | assert X_trans.columns.tolist() == \
41 | ['a', 'b', 'RND_0', 'RND_1', 'RND_2', 'RND_3']
42 |
--------------------------------------------------------------------------------
/pmdarima/preprocessing/setup.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import os
4 |
5 | from numpy.distutils.misc_util import Configuration
6 |
7 | from pmdarima._build_utils import get_blas_info
8 |
9 |
10 | def configuration(parent_package="", top_path=None):
11 | cblas_libs, blas_info = get_blas_info()
12 |
13 | # Use this rather than cblas_libs so we don't fail on Windows
14 | libraries = []
15 | if os.name == 'posix':
16 | cblas_libs.append('m')
17 | libraries.append('m')
18 |
19 | config = Configuration("preprocessing", parent_package, top_path)
20 |
21 | config.add_subpackage('endog')
22 | config.add_subpackage('endog/tests')
23 | config.add_subpackage('exog') # builds src and adds its own tests
24 |
25 | return config
26 |
27 |
28 | if __name__ == "__main__":
29 | from numpy.distutils.core import setup
30 | setup(**configuration().todict())
31 |
--------------------------------------------------------------------------------
/pmdarima/preprocessing/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/pmdarima/preprocessing/tests/__init__.py
--------------------------------------------------------------------------------
/pmdarima/preprocessing/tests/test_base.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import pytest
4 | from pmdarima.preprocessing import base
5 | from pmdarima.compat.pytest import pytest_error_str
6 |
7 |
8 | def test_value_error_on_update_check():
9 | with pytest.raises(ValueError) as ve:
10 | base.UpdatableMixin()._check_endog(None)
11 | assert 'cannot be None' in pytest_error_str(ve)
12 |
--------------------------------------------------------------------------------
/pmdarima/setup.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Author: Taylor Smith
4 | #
5 | # Setup for submodules of pmdarima
6 |
7 | import os
8 | import sys
9 |
10 | from pmdarima._build_utils import cythonize_extensions
11 |
12 |
13 | # DEFINE CONFIG
14 | def configuration(parent_package='', top_path=None):
15 | from numpy.distutils.misc_util import Configuration
16 |
17 | libs = []
18 | if os.name == 'posix':
19 | libs.append('m')
20 |
21 | config = Configuration('pmdarima', parent_package, top_path)
22 |
23 | # build utilities
24 | config.add_subpackage('__check_build')
25 | config.add_subpackage('_build_utils')
26 |
27 | # submodules that do NOT have their own setup.py. manually add their tests
28 | config.add_subpackage('compat')
29 | config.add_subpackage('compat/tests')
30 | config.add_subpackage('datasets')
31 | config.add_subpackage('datasets/tests')
32 | config.add_subpackage('model_selection')
33 | config.add_subpackage('model_selection/tests')
34 |
35 | # the following packages have cython or their own setup.py files.
36 | config.add_subpackage('arima')
37 | config.add_subpackage('preprocessing')
38 | config.add_subpackage('utils')
39 |
40 | # add test directory
41 | config.add_subpackage('tests')
42 |
43 | # Do cythonization, but only if this is not a release tarball, since the
44 | # C/C++ files are not necessarily forward compatible with future versions
45 | # of python.
46 | if 'sdist' not in sys.argv:
47 | cythonize_extensions(top_path, config)
48 |
49 | return config
50 |
51 |
52 | if __name__ == '__main__':
53 | from numpy.distutils.core import setup
54 | setup(**configuration(top_path='').todict())
55 |
--------------------------------------------------------------------------------
/pmdarima/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/pmdarima/tests/__init__.py
--------------------------------------------------------------------------------
/pmdarima/tests/test_context_managers.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from pmdarima import context_managers as ctx
4 | from pmdarima.compat.pytest import pytest_error_str
5 |
6 | import pytest
7 |
8 |
9 | def test_except_and_reraise_do_reraise():
10 | with pytest.raises(KeyError) as ke:
11 | with ctx.except_and_reraise(
12 | ValueError,
13 | raise_err=KeyError,
14 | raise_msg="bar message"
15 | ):
16 | raise ValueError("contains foo message")
17 |
18 | msg = pytest_error_str(ke)
19 | assert "bar message" in msg
20 | assert "raised from ValueError" in msg
21 |
22 |
23 | def test_except_and_reraise_no_reraise():
24 | with pytest.raises(KeyError) as ke:
25 | with ctx.except_and_reraise(
26 | ValueError,
27 | raise_err=TypeError,
28 | raise_msg="bar message"
29 | ):
30 | raise KeyError("foo message")
31 |
32 | assert "foo message" in pytest_error_str(ke)
33 |
34 |
35 | @pytest.mark.parametrize('err', [ValueError, KeyError, TypeError])
36 | def test_multiple(err):
37 |
38 | class FooError(BaseException):
39 | pass
40 |
41 | with pytest.raises(FooError) as fe:
42 | with ctx.except_and_reraise(
43 | ValueError, KeyError, TypeError,
44 | raise_err=FooError,
45 | raise_msg="gotcha, fam",
46 | ):
47 | raise err("Boo!")
48 |
49 | assert "gotcha, fam" in pytest_error_str(fe)
50 |
--------------------------------------------------------------------------------
/pmdarima/tests/test_estimators.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from sklearn.base import clone
4 | from pmdarima.arima import ARIMA, AutoARIMA
5 | from pmdarima.pipeline import Pipeline
6 | from pmdarima.datasets import load_wineind
7 | from pmdarima.preprocessing import FourierFeaturizer
8 | import pytest
9 |
10 | y = load_wineind()
11 |
12 |
13 | @pytest.mark.parametrize(
14 | 'est', [
15 | ARIMA(order=(2, 1, 1)),
16 | AutoARIMA(seasonal=False, maxiter=3),
17 | Pipeline([
18 | ("fourier", FourierFeaturizer(m=12)),
19 | ("arima", AutoARIMA(seasonal=False, stepwise=True,
20 | suppress_warnings=True, d=1, max_p=2, max_q=0,
21 | start_q=0, start_p=1,
22 | maxiter=3, error_action='ignore'))
23 | ])
24 | ]
25 | )
26 | def test_clonable(est):
27 | # fit it, then clone it
28 | est.fit(y)
29 | est2 = clone(est)
30 | assert isinstance(est2, est.__class__)
31 | assert est is not est2
32 |
--------------------------------------------------------------------------------
/pmdarima/tests/test_metrics.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from pmdarima.metrics import smape
4 | import numpy as np
5 | import pytest
6 |
7 |
8 | @pytest.mark.parametrize(
9 | 'actual,forecasted,expected', [
10 | pytest.param([0.07533, 0.07533, 0.07533, 0.07533,
11 | 0.07533, 0.07533, 0.0672, 0.0672],
12 | [0.102, 0.107, 0.047, 0.1,
13 | 0.032, 0.047, 0.108, 0.089], 42.60306631890196),
14 |
15 | # when y_true == y_pred, we get 0 err
16 | pytest.param([0.07533, 0.07533, 0.07533, 0.07533,
17 | 0.07533, 0.07533, 0.0672, 0.0672],
18 | [0.07533, 0.07533, 0.07533, 0.07533,
19 | 0.07533, 0.07533, 0.0672, 0.0672], 0),
20 | ]
21 | )
22 | def test_smape(actual, forecasted, expected):
23 | err = smape(actual, forecasted)
24 | assert np.allclose(expected, err)
25 |
--------------------------------------------------------------------------------
/pmdarima/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Author: Taylor Smith
4 |
5 | from .array import *
6 | from .metaestimators import *
7 | from .visualization import *
8 | from .wrapped import *
9 |
10 |
11 | def get_callable(key, dct):
12 | """Get the callable mapped by a key from a dictionary. This is
13 | necessary for pickling (so we don't try to pickle an unbound method).
14 |
15 | Parameters
16 | ----------
17 | key : str
18 | The key for the ``dct`` dictionary.
19 |
20 | dct : dict
21 | The dictionary of callables.
22 | """
23 | fun = dct.get(key, None)
24 |
25 | if not isinstance(key, str) or fun is None: # ah, that's no fun :(
26 | raise ValueError('key must be a string in one in %r, but got %r'
27 | % (dct, key))
28 | return fun
29 |
30 |
31 | __all__ = [s for s in dir() if not s.startswith("_")]
32 |
--------------------------------------------------------------------------------
/pmdarima/utils/_array.pyx:
--------------------------------------------------------------------------------
1 | #cython: boundscheck=False
2 | #cython: cdivision=True
3 | #cython: wraparound=False
4 | #cython: nonecheck=False
5 | #cython: language_level=3
6 | #
7 | # This is the Cython translation of the diffinv function R source code
8 | #
9 | # Author: Charles Drotar
10 |
11 | import numpy as np
12 | cimport numpy as np
13 | cimport cython
14 |
15 | ctypedef np.npy_intp INTP
16 |
17 | cdef fused floating1d:
18 | float[::1]
19 | double[::1]
20 |
21 | np.import_array()
22 |
23 |
24 | @cython.boundscheck(False)
25 | @cython.wraparound(False)
26 | cpdef double[:] C_intgrt_vec(floating1d x,
27 | floating1d xi,
28 | INTP lag):
29 | """Inverse diff
30 |
31 | References
32 | ----------
33 | .. [1] https://github.com/wch/r-source/blob/trunk/src/library/stats/R/diffinv.R#L39
34 | .. [2] https://github.com/mirror/r/blob/65a0e33a4b0a119703586fcd1f9742654738ae54/src/library/stats/src/PPsum.c#L46
35 | """
36 | cdef INTP i, n = x.shape[0]
37 | cdef np.ndarray[double, ndim=1, mode='c'] ans = \
38 | np.zeros(n + lag, dtype=np.float64, order='c')
39 |
40 | with nogil:
41 | for i in range(lag, lag + n):
42 | ans[i] = x[i - lag] + ans[i - lag]
43 | return ans
44 |
--------------------------------------------------------------------------------
/pmdarima/utils/_show_versions.py:
--------------------------------------------------------------------------------
1 | """
2 | Utility methods to print system info for debugging
3 |
4 | adapted from ``pandas.show_versions``
5 | adapted from ``sklearn.show_versions``
6 | """
7 | # License: BSD 3 clause
8 |
9 | import platform
10 | import sys
11 |
12 | _pmdarima_deps = (
13 | # setuptools needs to be before pip: https://github.com/pypa/setuptools/issues/3044#issuecomment-1024972548 # noqa:E501
14 | "setuptools",
15 | "pip",
16 | "sklearn",
17 | "statsmodels",
18 | "numpy",
19 | "scipy",
20 | "Cython",
21 | "pandas",
22 | "joblib",
23 | "pmdarima",
24 | )
25 |
26 | # Packages that have a different import name than name on PyPI
27 | _install_mapping = {
28 | "sklearn": "scikit-learn"
29 | }
30 |
31 |
32 | def _get_sys_info():
33 | """System information
34 |
35 | Return
36 | ------
37 | sys_info : dict
38 | system and Python version information
39 |
40 | """
41 | python = sys.version.replace('\n', ' ')
42 |
43 | blob = [
44 | ("python", python),
45 | ('executable', sys.executable),
46 | ("machine", platform.platform()),
47 | ]
48 |
49 | return dict(blob)
50 |
51 |
52 | def _get_deps_info(deps=_pmdarima_deps):
53 | """Overview of the installed version of main dependencies
54 |
55 | Returns
56 | -------
57 | deps_info: dict
58 | version information on relevant Python libraries
59 | """
60 | def get_version(module):
61 | return module.__version__
62 |
63 | deps_info = {}
64 |
65 | # TODO: We can get rid of this when we deprecate 3.7
66 | if sys.version_info.minor <= 7:
67 | import importlib
68 |
69 | for modname in deps:
70 | try:
71 | if modname in sys.modules:
72 | mod = sys.modules[modname]
73 | else:
74 | mod = importlib.import_module(modname)
75 | ver = get_version(mod)
76 | deps_info[modname] = ver
77 | except ImportError:
78 | deps_info[modname] = None
79 |
80 | else:
81 | from importlib.metadata import PackageNotFoundError, version
82 |
83 | for modname in deps:
84 | try:
85 | deps_info[modname] = version(_install_mapping.get(modname, modname)) # noqa:E501
86 | except PackageNotFoundError:
87 | deps_info[modname] = None
88 |
89 | return deps_info
90 |
91 |
92 | def show_versions():
93 | """Print useful debugging information"""
94 | sys_info = _get_sys_info()
95 | deps_info = _get_deps_info()
96 |
97 | print('\nSystem:')
98 | for k, stat in sys_info.items():
99 | print("{k:>10}: {stat}".format(k=k, stat=stat))
100 |
101 | print('\nPython dependencies:')
102 | for k, stat in deps_info.items():
103 | print("{k:>11}: {stat}".format(k=k, stat=stat))
104 |
--------------------------------------------------------------------------------
/pmdarima/utils/metaestimators.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Author: Taylor Smith
4 | #
5 | # Metaestimators for the ARIMA class. These classes are derived from the
6 | # sklearn metaestimators, but adapted for more specific use with pmdarima.
7 |
8 | from operator import attrgetter
9 | from functools import update_wrapper
10 |
11 | __all__ = [
12 | 'if_has_delegate'
13 | ]
14 |
15 |
16 | class _IffHasDelegate(object):
17 | """Implements a conditional property using the descriptor protocol.
18 | Using this class to create a decorator will raise an ``AttributeError``
19 | if none of the delegates (specified in ``delegate_names``) is an attribute
20 | of the base object or the first found delegate does not have an attribute
21 | ``attribute_name``.
22 |
23 | This allows ducktyping of the decorated method based on
24 | ``delegate.attribute_name``. Here ``delegate`` is the first item in
25 | ``delegate_names`` for which ``hasattr(object, delegate) is True``.
26 |
27 | See https://docs.python.org/3/howto/descriptor.html for an explanation of
28 | descriptors.
29 | """
30 | def __init__(self, fn, delegate_names):
31 | self.fn = fn
32 | self.delegate_names = delegate_names
33 |
34 | # update the docstring of the descriptor
35 | update_wrapper(self, fn)
36 |
37 | def __get__(self, obj, type=None):
38 | # raise an AttributeError if the attribute is not present on the object
39 | if obj is not None:
40 | # delegate only on instances, not the classes.
41 | # this is to allow access to the docstrings.
42 | for delegate_name in self.delegate_names:
43 | try:
44 | attrgetter(delegate_name)(obj)
45 | except AttributeError:
46 | continue
47 | else:
48 | break
49 | else:
50 | attrgetter(self.delegate_names[-1])(obj)
51 |
52 | # lambda, but not partial, allows help() to work with update_wrapper
53 | out = (lambda *args, **kwargs: self.fn(obj, *args, **kwargs))
54 | # update the docstring of the returned function
55 | update_wrapper(out, self.fn)
56 | return out
57 |
58 |
59 | def if_has_delegate(delegate):
60 | """Wrap a delegated instance attribute function.
61 |
62 | Creates a decorator for methods that are delegated in the presence of a
63 | results wrapper. This enables duck-typing by ``hasattr`` returning True
64 | according to the sub-estimator.
65 |
66 | This function was adapted from scikit-learn, which defines
67 | ``if_delegate_has_method``, but operates differently by injecting methods
68 | not based on method presence, but by delegate presence.
69 |
70 | Examples
71 | --------
72 | >>> from pmdarima.utils.metaestimators import if_has_delegate
73 | >>>
74 | >>> class A(object):
75 | ... @if_has_delegate('d')
76 | ... def func(self):
77 | ... return True
78 | >>>
79 | >>> a = A()
80 | >>> # the delegate does not exist yet
81 | >>> assert not hasattr(a, 'func')
82 | >>> # inject the attribute
83 | >>> a.d = None
84 | >>> assert hasattr(a, 'func') and a.func()
85 |
86 | Parameters
87 | ----------
88 | delegate : string, list of strings or tuple of strings
89 | Name of the sub-estimator that can be accessed as an attribute of the
90 | base object. If a list or a tuple of names are provided, the first
91 | sub-estimator that is an attribute of the base object will be used.
92 | """
93 | if isinstance(delegate, list):
94 | delegate = tuple(delegate)
95 | if not isinstance(delegate, tuple):
96 | delegate = (delegate,)
97 |
98 | return lambda fn: _IffHasDelegate(fn, delegate)
99 |
--------------------------------------------------------------------------------
/pmdarima/utils/setup.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import os
4 |
5 | import numpy
6 | from numpy.distutils.misc_util import Configuration
7 |
8 | from pmdarima._build_utils import get_blas_info
9 |
10 |
11 | def configuration(parent_package="", top_path=None):
12 | cblas_libs, blas_info = get_blas_info()
13 |
14 | # Use this rather than cblas_libs so we don't fail on Windows
15 | libraries = []
16 | if os.name == 'posix':
17 | cblas_libs.append('m')
18 | libraries.append('m')
19 |
20 | config = Configuration("utils", parent_package, top_path)
21 | config.add_extension("_array",
22 | sources=["_array.pyx"],
23 | include_dirs=[numpy.get_include(),
24 | blas_info.pop('include_dirs', [])],
25 | libraries=libraries,
26 | extra_compile_args=blas_info.pop(
27 | 'extra_compile_args', []),
28 | **blas_info)
29 |
30 | config.add_subpackage('tests')
31 |
32 | return config
33 |
34 |
35 | if __name__ == "__main__":
36 | from numpy.distutils.core import setup
37 | setup(**configuration().todict())
38 |
--------------------------------------------------------------------------------
/pmdarima/utils/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/pmdarima/utils/tests/__init__.py
--------------------------------------------------------------------------------
/pmdarima/utils/tests/test_meta.py:
--------------------------------------------------------------------------------
1 |
2 | from pmdarima.utils.metaestimators import if_has_delegate
3 |
4 |
5 | class _IfHasDelegateTester(object):
6 | def __init__(self):
7 | pass
8 |
9 | def fit(self):
10 | self.a_ = None
11 | return self
12 |
13 | @if_has_delegate('a_')
14 | def predict(self):
15 | return True
16 |
17 | @if_has_delegate(['b_', 'a_'])
18 | def predict2(self):
19 | return True
20 |
21 |
22 | def test_single_delegate():
23 | # show it passes for a "fit"
24 | assert _IfHasDelegateTester().fit().predict()
25 | assert not hasattr(_IfHasDelegateTester(), 'predict')
26 |
27 |
28 | def test_multiple_delegates():
29 | assert _IfHasDelegateTester().fit().predict2()
30 | assert not hasattr(_IfHasDelegateTester(), 'predict2')
31 |
--------------------------------------------------------------------------------
/pmdarima/utils/tests/test_show_versions.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import pmdarima as pm
4 | from pmdarima.utils._show_versions import _get_deps_info
5 |
6 |
7 | # Just show it doesn't blow up...
8 | def test_show_versions():
9 | pm.show_versions()
10 |
11 |
12 | def test_show_versions_when_not_present():
13 | deps = ['big-ol-fake-pkg']
14 | assert _get_deps_info(deps=deps)['big-ol-fake-pkg'] is None
15 |
--------------------------------------------------------------------------------
/pmdarima/utils/tests/test_vis.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from unittest.mock import patch
4 | import numpy as np
5 | import pytest
6 |
7 |
8 | class MockACPlot:
9 | def __init__(self, series):
10 | self.series = series
11 | self.showed = False
12 |
13 | def show(self):
14 | self.showed = True
15 | return self
16 |
17 |
18 | class MockPlottable:
19 | def __init__(self):
20 | self.showed = False
21 |
22 | def show(self):
23 | self.showed = True
24 | return self
25 |
26 |
27 | # ACF/PACF
28 | class MockTSAPlots:
29 | plot_acf = plot_pacf = (lambda **kwargs: MockPlottable())
30 |
31 |
32 | # TODO: can we get this to work eventually?
33 | if False:
34 | @pytest.mark.parametrize('show', [True, False])
35 | def test_visualizations(show):
36 | with patch('statsmodels.graphics.tsaplots', MockTSAPlots):
37 |
38 | # Have to import AFTER tha patch, since the pm.__init__ will
39 | # promptly import the visualization suite, which overwrites the
40 | # patch
41 | from pmdarima.utils import visualization
42 | dataset = np.random.RandomState(42).rand(150)
43 |
44 | # ac_plot = pm.autocorr_plot(dataset, show=show)
45 | acf_plot = visualization.plot_acf(dataset, show=show)
46 | pacf_plot = visualization.plot_pacf(dataset, show=show)
47 |
48 | # assert ac_plot.showed is show
49 | assert acf_plot.showed is show
50 | assert pacf_plot.showed is show
51 |
--------------------------------------------------------------------------------
/pmdarima/utils/tests/test_wrapped.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import pmdarima as pm
4 | from pmdarima.utils.wrapped import acf, pacf
5 |
6 | import statsmodels.api as sm
7 | import numpy as np
8 |
9 | import pytest
10 |
11 | y = pm.datasets.load_wineind()
12 |
13 |
14 | @pytest.mark.parametrize(
15 | 'wrapped_func,native_func', [
16 | pytest.param(sm.tsa.stattools.acf, acf),
17 | pytest.param(sm.tsa.stattools.pacf, pacf)
18 | ])
19 | def test_wrapped_functions(wrapped_func, native_func):
20 | sm_res = wrapped_func(y) # type: np.ndarray
21 | pm_res = native_func(y)
22 | assert np.allclose(sm_res, pm_res)
23 |
24 | # Show the docstrings are the same
25 | assert wrapped_func.__doc__ == native_func.__doc__
26 |
--------------------------------------------------------------------------------
/pmdarima/utils/wrapped.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Taylor G Smith
4 | #
5 | # Wrapped functions
6 | from functools import wraps
7 | import warnings
8 |
9 | from statsmodels.tsa.stattools import acf as sm_acf, pacf as sm_pacf
10 |
11 | __all__ = [
12 | 'acf',
13 | 'pacf'
14 | ]
15 |
16 | # TODO: remove all explicit args/kwargs, making them *args, **kwargs
17 |
18 |
19 | def inheritdoc(parent):
20 | """Inherit documentation from a parent
21 |
22 | Parameters
23 | ----------
24 | parent : callable
25 | The parent function or class that contains the sought-after
26 | docstring. If it doesn't have a docstring, this might behave
27 | in unexpected ways.
28 |
29 | Examples
30 | --------
31 | >>> def a(x=1):
32 | ... '''This is documentation'''
33 | ... return x
34 | ...
35 | >>> @inheritdoc(a)
36 | ... def b(x):
37 | ... return 2 * a(x)
38 | ...
39 | >>> print(b.__doc__)
40 | This is documentation
41 |
42 | >>> print(b(2))
43 | 4
44 | """
45 | def wrapper(func):
46 | # Assign the parent docstring to the child
47 | func.__doc__ = parent.__doc__
48 |
49 | @wraps(func)
50 | def caller(*args, **kwargs):
51 | return func(*args, **kwargs)
52 | return caller
53 | return wrapper
54 |
55 |
56 | @inheritdoc(parent=sm_acf)
57 | def acf(
58 | x,
59 | nlags=None,
60 | qstat=False,
61 | fft=None,
62 | alpha=None,
63 | missing='none',
64 | adjusted=False,
65 | ):
66 | return sm_acf(
67 | x=x,
68 | nlags=nlags,
69 | qstat=qstat,
70 | fft=fft,
71 | alpha=alpha,
72 | missing=missing,
73 | adjusted=adjusted,
74 | )
75 |
76 |
77 | @inheritdoc(parent=sm_pacf)
78 | def pacf(x, nlags=None, method='ywadjusted', alpha=None):
79 | # Handle kwarg deprecation in statsmodels 0.13.0
80 | if "unbiased" in method:
81 | warnings.warn(
82 | "The `*unbiased` methods have been deprecated in "
83 | "statsmodels >= 0.13.0. Please use `*adjusted` instead.",
84 | DeprecationWarning
85 | )
86 | method = method.replace("unbiased", "adjusted")
87 | elif method in ("ydu", "ywu", "ldu"):
88 | warnings.warn(
89 | "The `ydu`, `ywu`, and `ldu` methods have been deprecated in "
90 | "statsmodels >= 0.13.0. Please use `yda`, `ywa`, and `lda` "
91 | "instead.",
92 | DeprecationWarning
93 | )
94 | method = method.replace("u", "a")
95 |
96 | return sm_pacf(x=x, nlags=nlags, method=method, alpha=alpha)
97 |
--------------------------------------------------------------------------------
/pmdarima/warnings.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | __all__ = [
4 | 'ModelFitWarning'
5 | ]
6 |
7 |
8 | class ModelFitWarning(UserWarning):
9 | """Generic warning used for a model fit that might fail. More descriptive
10 | than simply trying to lump everything into a default UserWarning, which
11 | gives the user no insight into the reason for the warning apart from a
12 | (potentially) cryptic message. This allows the user to understand the
13 | warning emanates from an attempted model fit and originates from within
14 | the pmdarima package.
15 | """
16 | pass
17 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 | "numpy==1.21.2; python_version < '3.10'",
4 | "numpy==1.21.6; python_version == '3.10' and platform_system != 'Windows'",
5 | "numpy==1.22.3; python_version == '3.10' and platform_system == 'Windows'",
6 | "numpy==1.23.2; python_version == '3.11'",
7 | "numpy==1.26.0; python_version == '3.12'",
8 | "scipy==1.3.2; python_version <= '3.8' and platform_machine != 'aarch64'",
9 | "scipy==1.5.3; python_version <= '3.8' and platform_machine == 'aarch64'",
10 | "scipy==1.5.4; python_version == '3.9'",
11 | "scipy==1.7.2; python_version == '3.10'",
12 | "scipy==1.9.3; python_version == '3.11'",
13 | "scipy==1.11.2; python_version == '3.12'",
14 | "statsmodels==0.13.2; python_version <= '3.10'",
15 | "statsmodels==0.13.3; python_version == '3.11'",
16 | "statsmodels==0.14.0; python_version == '3.12'",
17 | "cython>=0.29,!=0.29.18,!=0.29.31",
18 | "setuptools",
19 | ]
20 | build-backend = "setuptools.build_meta:__legacy__" # TODO: Don't use legacy backend
21 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | joblib>=0.11
2 | Cython>=0.29,!=0.29.18,!=0.29.31
3 | numpy>=1.21.2,<2.0.0
4 | pandas>=0.19
5 | scikit-learn>=0.22
6 | scipy>=1.3.2
7 | statsmodels>=0.13.2
8 | urllib3
9 | setuptools>=38.6.0,!=50.0.0
10 | packaging>=17.1 # Bundled with setuptools, but want to be explicit
11 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | # This is adapted from sklearn's setup cfg.
2 |
3 | [aliases]
4 | test = pytest
5 |
6 | [tool:pytest]
7 | filterwarnings =
8 | # Warnings that we raise:
9 | ignore::UserWarning
10 |
11 | # Warnings that statsmodels raises a lot of
12 | ignore::statsmodels.tools.sm_exceptions.HessianInversionWarning
13 |
14 | # statsmodels warning that doesn't apply to us
15 | ignore:fft=True
16 |
17 | # joblib warning that we can't control (fixed in this commit https://github.com/joblib/joblib/commit/a861f43167ab63fe683c45679e34143751cb976d, but not deployed yet)
18 | # This is the solution from pytest: https://github.com/pytest-dev/pytest/issues/4116#issuecomment-429101898
19 | ignore:tostring.*is deprecated
20 |
21 | # This is fixed in patsy... No idea which dependency is actually calling patsy (https://github.com/pydata/patsy/blob/4c613d0ad3009044ca3aee5a5d70bd56af8f396b/patsy/constraint.py#L13-L16)
22 | ignore:Using or importing the ABCs
23 |
24 | [metadata]
25 | description-file = README.md
26 |
--------------------------------------------------------------------------------