├── .circleci └── config.yml ├── .codecov.yml ├── .coveragerc ├── .github ├── CONTRIBUTING.md ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug_report.yml │ ├── feature_request.yml │ └── question.yml ├── PULL_REQUEST_TEMPLATE.md ├── stale.yml ├── utils │ └── get_dependency_releases.py └── workflows │ ├── badges.yml │ ├── build_and_deploy.yml │ ├── nightly_cron.yml │ └── test_tagging.yml ├── .gitignore ├── AUTHORS.md ├── CITATION.cff ├── CODE_OF_CONDUCT.md ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── benchmarks ├── Benchmarking Seasonality.ipynb ├── bench_autoarima.ipynb ├── benchmark_funcs.py └── item_sales_daily.csv.gz ├── build_tools ├── build_requirements.txt ├── circle │ ├── build_doc.sh │ ├── build_wheel.sh │ ├── deploy.sh │ ├── deploy_doc.sh │ ├── dind │ │ ├── README.md │ │ ├── build_manylinux_wheel.sh │ │ └── install_and_test.sh │ ├── get_version.sh │ └── test_unit.sh ├── get_tag.py └── github │ ├── deploy.sh │ ├── get_latest_dependencies.py │ └── test_version_tagging.sh ├── doc ├── Makefile ├── _static │ ├── css │ │ ├── fields.css │ │ └── gitcontrib.css │ └── js │ │ ├── contrib.js │ │ └── jquery.min.js ├── _templates │ ├── class.rst │ ├── class_with_call.rst │ ├── class_without_init.rst │ ├── function.rst │ └── numpydoc_docstring.rst ├── about.rst ├── citing.rst ├── conf.py ├── contributing.rst ├── contributors.rst ├── img │ ├── bad_issue.png │ ├── favicon.ico │ ├── good_issue.png │ ├── lynx_autocorr.png │ ├── m_matters.png │ ├── stock_forecasts.png │ ├── stock_lag_plot.png │ └── sunspots │ │ ├── bc-transformed.png │ │ ├── log-transformed.png │ │ ├── model-1.png │ │ ├── model-2.png │ │ └── untransformed.png ├── includes │ └── api_css.rst ├── index.rst ├── make.bat ├── migration-guide.rst ├── modules │ ├── classes.rst │ └── datasets.rst ├── no-successful-model.rst ├── quickstart.rst ├── refreshing.rst ├── rfc │ ├── 372-exog-to-x.rst │ └── index.rst ├── seasonal-differencing-issues.rst ├── serialization.rst ├── setup.rst ├── sg_execution_times.rst ├── sphinxext │ ├── MANIFEST.in │ └── github_link.py ├── tips_and_tricks.rst ├── usecases.rst ├── usecases │ ├── stocks.rst │ └── sun-spots.rst ├── user_guide.rst └── whats_new.rst ├── etc └── downloads_badges.py ├── examples ├── README.txt ├── arima │ ├── README.txt │ ├── example_add_new_samples.py │ ├── example_auto_arima.py │ ├── example_persisting_a_model.py │ └── example_seasonal_decomposition.py ├── datasets │ ├── README.txt │ └── example_load_data.py ├── example_pipeline.py ├── example_simple_fit.py ├── issue12 │ ├── dummy_data.csv │ └── issue-12.ipynb ├── model_selection │ ├── README.txt │ ├── example_cross_val_predict.py │ └── example_cross_validation.py ├── preprocessing │ ├── README.txt │ └── example_date_featurizer.py ├── quick_start_example.ipynb ├── quick_start_output.png ├── stock_market_example.ipynb └── utils │ ├── README.txt │ ├── example_array_concatenation.py │ ├── example_array_differencing.py │ └── example_tsdisplay.py ├── pmdarima ├── __check_build │ ├── __init__.py │ ├── _check_build.pyx │ ├── setup.py │ └── tests │ │ ├── __init__.py │ │ └── test_check_build.py ├── __init__.py ├── _build_utils │ ├── __init__.py │ ├── pre_build_helpers.py │ └── tests │ │ └── __init__.py ├── arima │ ├── __init__.py │ ├── _arima.pyx │ ├── _arima_fast_helpers.h │ ├── _auto_solvers.py │ ├── _context.py │ ├── _doc.py │ ├── _validation.py │ ├── approx.py │ ├── arima.py │ ├── auto.py │ ├── seasonality.py │ ├── setup.py │ ├── stationarity.py │ ├── tests │ │ ├── __init__.py │ │ ├── data │ │ │ └── issue_191.csv │ │ ├── test_approx.py │ │ ├── test_arima.py │ │ ├── test_arima_diagnostics.py │ │ ├── test_auto.py │ │ ├── test_auto_solvers.py │ │ ├── test_c_arima.py │ │ ├── test_context.py │ │ ├── test_seasonality.py │ │ ├── test_stationarity.py │ │ ├── test_utils.py │ │ └── test_validation.py │ └── utils.py ├── base.py ├── compat │ ├── __init__.py │ ├── matplotlib.py │ ├── numpy.py │ ├── pandas.py │ ├── pytest.py │ ├── sklearn.py │ ├── statsmodels.py │ └── tests │ │ ├── __init__.py │ │ ├── test_sklearn.py │ │ └── test_statsmodels.py ├── context_managers.py ├── datasets │ ├── __init__.py │ ├── _base.py │ ├── airpassengers.py │ ├── ausbeer.py │ ├── austres.py │ ├── data │ │ ├── dated.tar.gz │ │ ├── msft.tar.gz │ │ └── sunspots.txt.gz │ ├── gasoline.py │ ├── heartrate.py │ ├── lynx.py │ ├── setup.py │ ├── stocks.py │ ├── sunspots.py │ ├── taylor.py │ ├── tests │ │ ├── __init__.py │ │ └── test_load_datasets.py │ ├── wineind.py │ └── woolyrnq.py ├── decorators.py ├── metrics.py ├── model_selection │ ├── __init__.py │ ├── _split.py │ ├── _validation.py │ └── tests │ │ ├── __init__.py │ │ ├── test_split.py │ │ └── test_validation.py ├── pipeline.py ├── preprocessing │ ├── __init__.py │ ├── base.py │ ├── endog │ │ ├── __init__.py │ │ ├── base.py │ │ ├── boxcox.py │ │ ├── log.py │ │ └── tests │ │ │ ├── __init__.py │ │ │ ├── test_base.py │ │ │ ├── test_boxcox.py │ │ │ └── test_log.py │ ├── exog │ │ ├── __init__.py │ │ ├── _fourier.pyx │ │ ├── base.py │ │ ├── dates.py │ │ ├── fourier.py │ │ ├── setup.py │ │ └── tests │ │ │ ├── __init__.py │ │ │ ├── test_base.py │ │ │ ├── test_dates.py │ │ │ └── test_fourier.py │ ├── setup.py │ └── tests │ │ ├── __init__.py │ │ └── test_base.py ├── setup.py ├── tests │ ├── __init__.py │ ├── test_context_managers.py │ ├── test_estimators.py │ ├── test_metrics.py │ └── test_pipeline.py ├── utils │ ├── __init__.py │ ├── _array.pyx │ ├── _show_versions.py │ ├── array.py │ ├── metaestimators.py │ ├── setup.py │ ├── tests │ │ ├── __init__.py │ │ ├── test_array.py │ │ ├── test_meta.py │ │ ├── test_show_versions.py │ │ ├── test_vis.py │ │ └── test_wrapped.py │ ├── visualization.py │ └── wrapped.py └── warnings.py ├── pyproject.toml ├── requirements.txt ├── setup.cfg └── setup.py /.codecov.yml: -------------------------------------------------------------------------------- 1 | # https://gist.github.com/stevepeak/53bee7b2c326b24a9b4a 2 | 3 | # Prevent codecov from commenting 4 | comment: false 5 | 6 | coverage: 7 | precision: 2 8 | round: down 9 | 10 | status: 11 | project: 12 | default: 13 | target: 95% 14 | informational: true 15 | branches: 16 | - master 17 | 18 | # The patch just adds noise to the PRs. We only really care about overall 19 | # coverage 20 | patch: off 21 | 22 | ignore: 23 | - "**/setup.py" 24 | - "*/pmdarima/__check_build/*" 25 | - "*/pmdarima/_build_utils/*" 26 | - "*/pmdarima/_config.py" 27 | - "*/pmdarima/__init__.py" 28 | - "*/pmdarima/compat/matplotlib.py" 29 | - "*/pmdarima/utils/tests/test_vis.py" 30 | - "*/pmdarima/utils/visualization.py" 31 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [report] 2 | show_missing = True 3 | 4 | [run] 5 | source = pmdarima 6 | include = */pmdarima/* 7 | omit = 8 | */setup.py 9 | */pmdarima/__check_build/* 10 | */pmdarima/_build_utils/* 11 | */pmdarima/_config.py 12 | */pmdarima/setup.py 13 | */pmdarima/__init__.py 14 | */pmdarima/compat/matplotlib.py 15 | */pmdarima/utils/tests/test_vis.py 16 | */pmdarima/utils/visualization.py 17 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: [tgsmith61591, aaronreidsmith] -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- 1 | name: Bug Report 2 | description: Create a bug report to help us improve pmdarima 3 | labels: [':beetle: : bug'] 4 | body: 5 | - type: markdown 6 | attributes: 7 | value: | 8 | Thanks for taking the time to file a bug! Please fill in the below information in as much detail as possible. 9 | - type: textarea 10 | id: description 11 | attributes: 12 | label: Describe the bug 13 | description: A clear and concise description of what the bug is. 14 | validations: 15 | required: true 16 | - type: textarea 17 | id: reproduction 18 | attributes: 19 | label: To Reproduce 20 | description: Steps to reproduce the behavior 21 | validations: 22 | required: true 23 | - type: textarea 24 | id: versions 25 | attributes: 26 | label: Versions 27 | description: | 28 |
Instructions to get necessary versions 29 |

30 | 31 | Please run the following snippet and paste the output below. 32 | 33 | ```python 34 | import pmdarima; pmdarima.show_versions() 35 | 36 | # For pmdarima versions <1.5.2 use this: 37 | import platform; print(platform.platform()) 38 | import sys; print("Python", sys.version) 39 | import pmdarima; print("pmdarima", pmdarima.__version__) 40 | import numpy; print("NumPy", numpy.__version__) 41 | import scipy; print("SciPy", scipy.__version__) 42 | import sklearn; print("Scikit-Learn", sklearn.__version__) 43 | import statsmodels; print("Statsmodels", statsmodels.__version__) 44 | ``` 45 | 46 |

47 |
Instructions to get necessary versions 26 |

27 | 28 | Please run the following snippet and paste the output below. 29 | 30 | ```python 31 | import pmdarima; pmdarima.show_versions() 32 | 33 | # For pmdarima versions <1.5.2 use this: 34 | import platform; print(platform.platform()) 35 | import sys; print("Python", sys.version) 36 | import pmdarima; print("pmdarima", pmdarima.__version__) 37 | import numpy; print("NumPy", numpy.__version__) 38 | import scipy; print("SciPy", scipy.__version__) 39 | import sklearn; print("Scikit-Learn", sklearn.__version__) 40 | import statsmodels; print("Statsmodels", statsmodels.__version__) 41 | ``` 42 | 43 |

44 | 7 | 8 | # Description 9 | 10 | Please include a summary of the change and which issue is fixed. 11 | Please also include relevant motivation and context. List any dependencies 12 | that are required for this change. 13 | 14 | Fixes #(issue) 15 | 16 | ## Type of change 17 | 18 | Please delete options that are not relevant. 19 | 20 | - [ ] Bug fix (non-breaking change which fixes an issue) 21 | - [ ] New feature (non-breaking change which adds functionality) 22 | - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) 23 | - [ ] Documentation change 24 | 25 | # How Has This Been Tested? 26 | 27 | Please describe the tests that you ran to verify your changes. 28 | Provide instructions so we can reproduce. Please also list any relevant details 29 | for your test configuration 30 | 31 | - [ ] Test A 32 | - [ ] Test B 33 | 34 | # Checklist: 35 | 36 | - [ ] I have performed a self-review of my own code 37 | - [ ] I have commented my code, particularly in hard-to-understand areas 38 | - [ ] I have made corresponding changes to the documentation 39 | - [ ] My changes generate no new warnings 40 | - [ ] I have added tests that prove my fix is effective or that my feature works 41 | - [ ] New and existing unit tests pass locally with my changes 42 | -------------------------------------------------------------------------------- /.github/stale.yml: -------------------------------------------------------------------------------- 1 | # Configuration for probot-stale - https://github.com/probot/stale 2 | 3 | # Number of days of inactivity before an Issue or Pull Request becomes stale 4 | daysUntilStale: 60 5 | 6 | # Number of days of inactivity before an Issue or Pull Request with the stale label is closed. 7 | # Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale. 8 | daysUntilClose: 14 9 | 10 | # Only issues or pull requests with all of these labels are check if stale. Defaults to `[]` (disabled) 11 | onlyLabels: [] 12 | 13 | # Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable 14 | exemptLabels: [] 15 | 16 | # Set to true to ignore issues in a project (defaults to false) 17 | exemptProjects: false 18 | 19 | # Set to true to ignore issues in a milestone (defaults to false) 20 | exemptMilestones: false 21 | 22 | # Set to true to ignore issues with an assignee (defaults to false) 23 | exemptAssignees: false 24 | 25 | # Label to use when marking as stale 26 | staleLabel: stale-issue 27 | 28 | # Comment to post when marking as stale. Set to `false` to disable 29 | markComment: > 30 | This issue has been automatically marked as stale because it has not had 31 | recent activity. It will be closed if no further activity occurs. Thank you 32 | for your contributions. 33 | 34 | # Comment to post when removing the stale label. 35 | # unmarkComment: > 36 | # Your comment here. 37 | 38 | # Comment to post when closing a stale Issue or Pull Request. 39 | # closeComment: > 40 | # Your comment here. 41 | 42 | # Limit the number of actions per hour, from 1-30. Default is 30 43 | limitPerRun: 30 44 | 45 | # Limit to only `issues` or `pulls` 46 | only: issues 47 | 48 | # Optionally, specify configuration settings that are specific to just 'issues' or 'pulls': 49 | #pulls: 50 | # daysUntilStale: 60 51 | # markComment: > 52 | # This pull request has been automatically marked as stale because it has not had 53 | # recent activity. It will be closed if no further activity occurs. Thank you 54 | # for your contributions. 55 | 56 | # issues: 57 | # exemptLabels: 58 | # - confirmed -------------------------------------------------------------------------------- /.github/utils/get_dependency_releases.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import requests 4 | from tabulate import tabulate 5 | 6 | if len(sys.argv) < 2: 7 | print(f'Usage: python {sys.argv[0]} ') 8 | sys.exit(1) 9 | 10 | packages = sys.argv[1:] 11 | session = requests.Session() 12 | 13 | releases = [] 14 | for package in packages: 15 | response = session.get(f'https://pypi.org/pypi/{package}/json') 16 | response.raise_for_status() 17 | pypi = response.json() 18 | latest_version = pypi['info']['version'] 19 | latest_release_date = pypi['releases'][latest_version][0]['upload_time'] 20 | releases.append([ 21 | package, latest_version, latest_release_date.replace('T', ' ') + ' UTC'] 22 | ) 23 | 24 | session.close() 25 | 26 | table = tabulate( 27 | sorted(releases, key=lambda entry: entry[2], reverse=True), 28 | headers=['Package', 'Version', 'Release Date'] 29 | ) 30 | # Need repr so this is on one line for Slack 31 | print(repr('```\n' + table + '\n```').replace("'", '')) 32 | -------------------------------------------------------------------------------- /.github/workflows/badges.yml: -------------------------------------------------------------------------------- 1 | name: Update Downloads Badges 2 | 3 | on: 4 | schedule: 5 | - cron: '15 17 * * *' # Every day at 5:15pm UTC 6 | 7 | jobs: 8 | update: 9 | name: Update Downloads Badges 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout 13 | uses: actions/checkout@v3 14 | - name: Setup Python 15 | uses: actions/setup-python@v4 16 | with: 17 | python-version: '3.11' 18 | architecture: 'x64' 19 | - name: Install Requirements 20 | run: python -m pip install requests 21 | - name: Run Python Script 22 | run: python etc/downloads_badges.py 23 | env: 24 | ZAPIER_SHA: ${{ secrets.ZAPIER_SHA }} -------------------------------------------------------------------------------- /.github/workflows/test_tagging.yml: -------------------------------------------------------------------------------- 1 | name: Test VERSION tagging 2 | 3 | on: 4 | push: 5 | branches: 6 | - 'master' 7 | 8 | pull_request: 9 | branches: 10 | - '*' 11 | 12 | # Cancel older runs of the same workflow on the same branch 13 | concurrency: 14 | group: ${{ github.workflow }}-${{ github.ref }} 15 | cancel-in-progress: true 16 | 17 | jobs: 18 | test-tagging: 19 | name: Test VERSION tagging 20 | 21 | runs-on: ubuntu-latest 22 | 23 | steps: 24 | - name: Checkout 25 | uses: actions/checkout@v3 26 | 27 | - name: Setting up Python 28 | uses: actions/setup-python@v4 29 | with: 30 | python-version: '3.11' 31 | architecture: 'x64' 32 | 33 | - name: Ensure VERSION tagging works 34 | run: | 35 | chmod +x build_tools/github/test_version_tagging.sh 36 | ./build_tools/github/test_version_tagging.sh 37 | shell: bash 38 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # VS Code 2 | .vscode/ 3 | 4 | # Mac stuff 5 | .DS_Store 6 | .idea/ 7 | 8 | # custom extension 9 | .pyramid-cache/ 10 | *.pmdpkl 11 | 12 | # Don't want to include scratch stuff 13 | scratch/ 14 | 15 | # Models 16 | *.pkl 17 | !pmdarima/datasets/data/*.pkl 18 | 19 | # sdist artifacts 20 | MANIFEST 21 | 22 | # Pypi pws 23 | .pypipws 24 | 25 | # Coverage 26 | coverage 27 | 28 | # Pytest 29 | .pytest_cache 30 | 31 | # OAuth access for automating releases 32 | ACCESS_TOKEN 33 | 34 | # Byte-compiled / optimized / DLL files 35 | __pycache__/ 36 | *.py[cod] 37 | *$py.class 38 | 39 | # C extensions and cython reports 40 | *.so 41 | *.c 42 | pmdarima/arima/*.html 43 | pmdarima/preprocessing/exog/*.html 44 | pmdarima/utils/*.html 45 | 46 | # Distribution / packaging 47 | .Python 48 | env/ 49 | build/ 50 | doc/_build 51 | doc/auto_examples 52 | doc/modules/generated 53 | develop-eggs/ 54 | dist/ 55 | downloads/ 56 | eggs/ 57 | .eggs/ 58 | lib/ 59 | lib64/ 60 | parts/ 61 | sdist/ 62 | var/ 63 | *.egg-info/ 64 | .installed.cfg 65 | *.egg 66 | 67 | # PyInstaller 68 | # Usually these files are written by a python script from a template 69 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 70 | *.manifest 71 | *.spec 72 | 73 | # Installer logs 74 | pip-log.txt 75 | pip-delete-this-directory.txt 76 | 77 | # Unit test / coverage reports 78 | htmlcov/ 79 | .tox/ 80 | .coverage 81 | .coverage.* 82 | .cache 83 | coverage.xml 84 | *,cover 85 | .hypothesis/ 86 | 87 | # Translations 88 | *.mo 89 | *.pot 90 | 91 | # Django stuff: 92 | *.log 93 | local_settings.py 94 | 95 | # Flask stuff: 96 | instance/ 97 | .webassets-cache 98 | 99 | # Scrapy stuff: 100 | .scrapy 101 | 102 | # Sphinx documentation 103 | docs/_build/ 104 | 105 | # PyBuilder 106 | target/ 107 | 108 | # IPython Notebook 109 | .ipynb_checkpoints 110 | 111 | # pyenv 112 | .python-version 113 | 114 | # celery beat schedule file 115 | celerybeat-schedule 116 | 117 | # dotenv 118 | .env 119 | 120 | # virtualenv 121 | venv/ 122 | ENV/ 123 | 124 | # Spyder project settings 125 | .spyderproject 126 | 127 | # Rope project settings 128 | .ropeproject 129 | 130 | # We want this to only be created on CI/CD platforms 131 | VERSION 132 | -------------------------------------------------------------------------------- /AUTHORS.md: -------------------------------------------------------------------------------- 1 | ## Authors 2 | 3 | The following people have been core contributors to `pmdarima`'s development: 4 | 5 | * [Taylor Smith](https://github.com/tgsmith61591) 6 | * [Gary Foreman](https://github.com/garyForeman) 7 | * [Charles Drotar](https://github.com/charlesdrotar) 8 | * [Steven Hoelscher](https://github.com/shoelsch) 9 | * [Aaron Smith](https://github.com/aaronreidsmith) 10 | * [Krishna Sunkara](https://github.com/kpsunkara) 11 | * [Christopher Siewert](https://github.com/christopher-siewert) 12 | 13 | __Please do not email the authors directly with questions or issues.__ Rather, use 14 | the [issues](https://github.com/alkaline-ml/pmdarima/issues) page. Furthermore, issues 15 | or emails specifically related to assistance in learning time series analysis should be 16 | saved for Stack Overflow. 17 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you would like to include pmdarima in your published work, please cite it as follows" 3 | authors: 4 | - family-names: "Smith" 5 | given-names: "Taylor G." 6 | keywords: 7 | - python 8 | - "machine learning" 9 | - "time series" 10 | - econometrics 11 | - forecasting 12 | - arima 13 | - "forecasting models" 14 | - sarimax 15 | title: "pmdarima" 16 | version: 2.0.4 17 | date-released: 2023-10-23 18 | license: MIT 19 | repository-artifact: https://pypi.org/project/pmdarima 20 | repository-code: https://github.com/alkaline-ml/pmdarima 21 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies within all project spaces, and it also applies when 49 | an individual is representing the project or its community in public spaces. 50 | Examples of representing a project or community include using an official 51 | project e-mail address, posting via an official social media account, or acting 52 | as an appointed representative at an online or offline event. Representation of 53 | a project may be further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at . All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Taylor G Smith 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | # recursive-include doc * 2 | include pmdarima/VERSION 3 | recursive-include examples * 4 | recursive-include pmdarima * 5 | recursive-include pmdarima/__check_build * 6 | recursive-include pmdarima/_build_utils * 7 | recursive-include pmdarima/arima * 8 | recursive-include pmdarima/compat * 9 | recursive-include pmdarima/datasets * 10 | recursive-include pmdarima/datasets/data * 11 | recursive-include pmdarima/model_selection * 12 | recursive-include pmdarima/preprocessing/endog * 13 | recursive-include pmdarima/preprocessing/exog * 14 | recursive-include pmdarima/tests * 15 | recursive-include pmdarima/utils * 16 | include LICENSE 17 | include README.md 18 | include requirements.txt 19 | global-exclude __pycache__ 20 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # simple makefile to simplify repetitive build env management tasks under posix 2 | # this is adopted from the sklearn Makefile 3 | 4 | # caution: testing won't work on windows 5 | 6 | PYTHON ?= python 7 | DOCKER ?= docker 8 | HERE = $(shell pwd) 9 | 10 | .PHONY: clean develop test install bdist_wheel version 11 | 12 | clean: 13 | $(PYTHON) setup.py clean 14 | rm -rf dist 15 | rm -rf build 16 | rm -rf .pytest_cache 17 | rm -rf pmdarima.egg-info 18 | rm -f pmdarima/VERSION 19 | rm -f conda/meta.yaml 20 | rm -rf .coverage.* 21 | 22 | deploy-requirements: 23 | $(PYTHON) -m pip install twine readme_renderer[md] 24 | 25 | # Depends on an artifact existing in dist/, and two environment variables 26 | deploy-twine-test: bdist_wheel deploy-requirements 27 | $(PYTHON) -m twine upload \ 28 | --repository-url https://test.pypi.org/legacy/ dist/* \ 29 | --username ${TWINE_USERNAME} \ 30 | --password ${TWINE_PASSWORD} 31 | 32 | documentation: version 33 | $(DOCKER) run -v $(HERE):/pmdarima -w /pmdarima --rm alkalineml/pmdarima-doc-base:latest /bin/bash -c "make install docker-documentation" 34 | 35 | # This one assumes we are in the docker container, so it can either be called from above (locally), or directly (on CI) 36 | docker-documentation: version 37 | @make -C doc clean html EXAMPLES_PATTERN=example_* PMDARIMA_VERSION=$(PMDARIMA_VERSION) 38 | 39 | requirements: 40 | $(PYTHON) -m pip install -r requirements.txt 41 | 42 | bdist_wheel: version 43 | $(PYTHON) setup.py bdist_wheel 44 | 45 | sdist: version 46 | $(PYTHON) setup.py sdist 47 | 48 | develop: version 49 | $(PYTHON) setup.py develop 50 | 51 | install: version 52 | $(PYTHON) setup.py install 53 | 54 | lint-requirements: 55 | $(PYTHON) -m pip install flake8 56 | 57 | testing-requirements: 58 | $(PYTHON) -m pip install pytest flake8 matplotlib pytest-mpl coverage pytest-cov codecov 59 | 60 | test-lint: 61 | $(PYTHON) -m flake8 pmdarima --filename='*.py' --ignore F401,F403,W293,W504 62 | 63 | test-unit: 64 | $(PYTHON) -m pytest -v --durations=20 --cov-config .coveragerc --cov pmdarima -p no:logging 65 | 66 | test: test-unit test-lint 67 | # Coverage creates all these random little artifacts we don't want 68 | rm .coverage.* || echo "No coverage artifacts to remove" 69 | 70 | twine-check: bdist_wheel deploy-requirements 71 | # Check that twine will parse the README acceptably 72 | $(PYTHON) -m twine check dist/* 73 | 74 | version: requirements 75 | @$(PYTHON) build_tools/get_tag.py 76 | -------------------------------------------------------------------------------- /benchmarks/benchmark_funcs.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Benchmark various approaches to functions to speed things up. 4 | # ... hopefully. 5 | 6 | import numpy as np 7 | 8 | import time 9 | 10 | 11 | def _do_time(func, n_iter=10, *args, **kwargs): 12 | times = [] 13 | for _ in range(n_iter): 14 | start = time.time() 15 | func(*args, **kwargs) 16 | times.append(time.time() - start) 17 | 18 | times = np.asarray(times) 19 | print("Completed %i iterations (avg=%.6f, min=%.6f, max=%.6f)" 20 | % (n_iter, times.mean(), times.min(), times.max())) 21 | 22 | 23 | def benchmark_is_constant(): 24 | """This benchmarks the "is_constant" function from ``pmdarima.arima.utils`` 25 | This was added in 0.6.2. 26 | """ 27 | # WINNER! 28 | def is_const1(x): 29 | """This is the version in Pyramid 0.6.2. 30 | 31 | Parameters 32 | ---------- 33 | x : np.ndarray 34 | This is the array. 35 | """ 36 | return (x == x[0]).all() 37 | 38 | def is_const2(x): 39 | """This should ostensibly only take O(N) rather than O(2N) like 40 | its predecessor. But we'll see... 41 | 42 | Parameters 43 | ---------- 44 | x : np.ndarray 45 | This is the array. 46 | """ 47 | return np.unique(x).shape[0] == 1 48 | 49 | x = np.random.choice(np.arange(10), 1000000, replace=True) 50 | _do_time(is_const1, 25, x) 51 | _do_time(is_const2, 25, x) 52 | 53 | 54 | if __name__ == '__main__': 55 | benchmark_is_constant() 56 | -------------------------------------------------------------------------------- /benchmarks/item_sales_daily.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/benchmarks/item_sales_daily.csv.gz -------------------------------------------------------------------------------- /build_tools/build_requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.21.2; python_version < '3.10' 2 | numpy==1.21.6; python_version == '3.10' and platform_system != 'Windows' 3 | numpy==1.22.3; python_version == '3.10' and platform_system == 'Windows' 4 | numpy==1.23.2; python_version == '3.11' 5 | numpy==1.26.0; python_version == '3.12' 6 | scipy==1.3.2; python_version <= '3.8' and platform_machine != 'aarch64' 7 | scipy==1.5.3; python_version <= '3.8' and platform_machine == 'aarch64' 8 | scipy==1.5.4; python_version == '3.9' 9 | scipy==1.7.2; python_version == '3.10' 10 | scipy==1.9.3; python_version == '3.11' 11 | scipy==1.11.2; python_version == '3.12' 12 | statsmodels==0.13.2; python_version <= '3.10' 13 | statsmodels==0.13.3; python_version == '3.11' 14 | statsmodels==0.14.0; python_version == '3.12' 15 | cython>=0.29,!=0.29.18,!=0.29.31 16 | scikit-learn>=0.22 17 | pandas>=0.19 18 | patsy 19 | pytest 20 | pytest-mpl 21 | pytest-benchmark 22 | setuptools>=38.6.0,!=50.0.0 23 | packaging>=17.1 # Bundled with setuptools, but want to be explicit 24 | wheel 25 | twine>=1.13.0 26 | readme_renderer 27 | matplotlib 28 | urllib3 29 | -------------------------------------------------------------------------------- /build_tools/circle/build_doc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # this is a hack, but we have to make sure we're only ever running this from 6 | # the top level of the package and not in the subdirectory... 7 | if [[ ! -d pmdarima/__check_build ]]; then 8 | echo "This must be run from the pmdarima project directory" 9 | exit 3 10 | fi 11 | 12 | # Set ${version} 13 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 14 | source "${DIR}/get_version.sh" 15 | 16 | make docker-documentation PMDARIMA_VERSION=${version} 17 | -------------------------------------------------------------------------------- /build_tools/circle/build_wheel.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e -x 4 | 5 | function build_wheel { 6 | local pyver=$1 7 | local arch=$2 8 | local ucs_setting=$3 9 | 10 | # https://www.python.org/dev/peps/pep-0513/#ucs-2-vs-ucs-4-builds 11 | ucs_tag="" 12 | if [ "$ucs_setting" = "ucs4" ]; then 13 | ucs_tag="${ucs_tag}u" 14 | fi 15 | 16 | distutils_version="" 17 | if [ "$pyver" = "3.12" ]; then 18 | distutils_version="local" 19 | else 20 | distutils_version="stdlib" 21 | fi 22 | 23 | ML_PYTHON_VERSION=$(python -c \ 24 | "print('cp{maj}{min}-cp{maj}{min}{ucs}'.format( \ 25 | maj='${pyver}'.split('.')[0], \ 26 | min='${pyver}'.split('.')[1], \ 27 | ucs='${ucs_tag}'))") 28 | 29 | DOCKER_CONTAINER_NAME=wheel_builder_$(uuidgen) 30 | 31 | ML_IMAGE="quay.io/pypa/manylinux_2_28_${arch}:2023-10-07-c1e05d1" # `latest` as of 2023-10-09 32 | PMDARIMA_VERSION=`cat ~/pmdarima/pmdarima/VERSION` 33 | 34 | docker pull "${ML_IMAGE}" 35 | # -v "${_root}":/io \ 36 | docker run \ 37 | --name "${DOCKER_CONTAINER_NAME}" \ 38 | -v `pwd`:/io \ 39 | -e "PYTHON_VERSION=${ML_PYTHON_VERSION}" \ 40 | -e "PMDARIMA_VERSION=${PMDARIMA_VERSION}" \ 41 | -e "SETUPTOOLS_USE_DISTUTILS=${distutils_version}" \ 42 | "${ML_IMAGE}" "/io/build_tools/circle/dind/build_manylinux_wheel.sh" 43 | sudo docker cp "${DOCKER_CONTAINER_NAME}:/io/dist/." "${_root}/dist/" 44 | docker rm $(docker ps -a -f status=exited -q) 45 | } 46 | 47 | # Create base directory 48 | pushd $(dirname $0) > /dev/null 49 | _root=$(dirname $(dirname $(pwd -P))) # get one directory up from parent to get to root dir 50 | popd > /dev/null 51 | 52 | echo "Building LINUX OS wheels" 53 | 54 | # Positional arg 55 | pyver=$1 56 | 57 | # We no longer explicitly set these... but in the past we did. 58 | if [ -z "$UCS_SETTING" ] || [ "$UCS_SETTING" = "ucs2" ]; then 59 | build_wheel $pyver "x86_64" "ucs2" 60 | elif [ "$UCS_SETTING" = "ucs4" ]; then 61 | build_wheel $pyver "x86_64" "ucs4" 62 | else 63 | echo "Unrecognized UCS_SETTING: ${UCS_SETTING}" 64 | fi 65 | -------------------------------------------------------------------------------- /build_tools/circle/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e -x 4 | 5 | pip install twine wheel 6 | 7 | # Check our VERSION. Basically, if it contains letters, it is a pre-release. Otherwise, 8 | # it has to match X.Y or X.Y.Z 9 | # 10 | # On CircleCI, we look for the `v` at the beginning of the version, since we are looking at the tag 11 | if [[ ${CIRCLE_TAG} =~ ^v?[0-9]+\.[0-9]+\.?[0-9]*[a-zA-Z]+[0-9]*$ ]]; then 12 | echo 'Uploading to test pypi' 13 | twine upload --skip-existing --repository-url https://test.pypi.org/legacy/ dist/pmdarima-* 14 | elif [[ ${CIRCLE_TAG} =~ ^v?[0-9]+\.[0-9]+\.?[0-9]*$ ]]; then 15 | echo 'Uploading to production pypi' 16 | twine upload --skip-existing dist/pmdarima-* 17 | else 18 | echo "Malformed tag: ${CIRCLE_TAG}" 19 | exit 1 20 | fi 21 | -------------------------------------------------------------------------------- /build_tools/circle/dind/README.md: -------------------------------------------------------------------------------- 1 | ## `dind` (Docker in Docker) 2 | 3 | Scripts executed in Docker containers from Machine executors on Circle. 4 | -------------------------------------------------------------------------------- /build_tools/circle/dind/build_manylinux_wheel.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # EXECUTED IN QUAY DOCKER IMAGE WHERE /io IS A MOUNTED VOLUME OF PMDARIMA ROOT 4 | 5 | # Modify permissions on file 6 | set -e -x 7 | 8 | # Compile wheels 9 | PYTHON="/opt/python/${PYTHON_VERSION}/bin/python" 10 | PIP="/opt/python/${PYTHON_VERSION}/bin/pip" 11 | 12 | # We have to use wheel < 0.32 since they inexplicably removed the open_for_csv 13 | # function from the package after 0.31.1 and it fails for Python 3.6?! 14 | ${PIP} install --upgrade pip wheel==0.31.1 15 | ${PIP} install --upgrade "setuptools>=38.6.0,!=50.0.0" 16 | 17 | # NOW we can install requirements 18 | ${PIP} install -r /io/build_tools/build_requirements.txt 19 | ${PIP} install -r /io/requirements.txt 20 | make -C /io/ PYTHON="${PYTHON}" 21 | 22 | # Make sure the VERSION file is present for this. For whatever reason, the 23 | # make -C call removes it 24 | echo ${PMDARIMA_VERSION} > /io/pmdarima/VERSION 25 | ${PIP} wheel /io/ -w /io/dist/ 26 | 27 | # Bundle external shared libraries into the wheels. 28 | for whl in /io/dist/*.whl; do 29 | if [[ "$whl" =~ "pmdarima" ]]; then 30 | auditwheel repair $whl -w /io/dist/ #repair pmdarima wheel and output to /io/dist 31 | fi 32 | 33 | rm $whl # remove wheel 34 | done 35 | -------------------------------------------------------------------------------- /build_tools/circle/dind/install_and_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # EXECUTED IN A DOCKER CONTAINER 4 | 5 | set -e 6 | 7 | # Make sure we're in the root PMDARIMA dir (mounted at /io) 8 | cd /io 9 | 10 | make develop 11 | make testing-requirements 12 | make test-unit 13 | 14 | # Upload coverage 15 | codecov || echo "codecov upload failed" 16 | -------------------------------------------------------------------------------- /build_tools/circle/get_version.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # The version is retrieved from the CIRCLE_TAG. If there is no version, we just 4 | # call it 0.0.0, since we won't be pushing anyways (not master and no tag) 5 | if [[ -n ${CIRCLE_TAG} ]]; then 6 | # We should have the VERSION file on tags now since 'make documentation' 7 | # gets it. If not, we use 0.0.0. There are two cases we ever deploy: 8 | # 1. Master (where version is not used, as we use 'develop' 9 | # 2. Tags (where version IS defined) 10 | echo "On tag" 11 | make version 12 | export version=$(cat pmdarima/VERSION) 13 | else 14 | echo "Not on tag, will use version=0.0.0" 15 | export version="0.0.0" 16 | fi 17 | -------------------------------------------------------------------------------- /build_tools/circle/test_unit.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e -x 4 | 5 | # Create base directory 6 | pushd $(dirname $0) > /dev/null 7 | rootdir=$(dirname $(dirname $(pwd -P))) # get one directory up from parent to get to root dir 8 | popd > /dev/null 9 | 10 | echo "Installing package from whl file" 11 | 12 | # Construct docker image 13 | pyver=$1 14 | pythonimg="python:${pyver}" 15 | 16 | # Mount root as a volume, execute installation + unit tests within the container 17 | env > vars.env 18 | docker run \ 19 | --rm \ 20 | -v `pwd`:/io \ 21 | --env-file vars.env \ 22 | ${pythonimg} \ 23 | sh /io/build_tools/circle/dind/install_and_test.sh 24 | 25 | status=$? 26 | exit $status 27 | -------------------------------------------------------------------------------- /build_tools/get_tag.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os.path import abspath, dirname 3 | 4 | # This file assumes that our tags are always in this format: vX.X.X. 5 | # In that case, we would only want to write X.X.X 6 | 7 | TOP_LEVEL = abspath(dirname(dirname(__file__))) 8 | OUT_FILE = os.path.join(TOP_LEVEL, 'pmdarima', 'VERSION') 9 | DEFAULT_TAG = '0.0.0' 10 | 11 | 12 | def get_version_from_tag(tag): 13 | """Handles 1.5.0 or v1.5.0""" 14 | return tag[1:] if tag.startswith('v') else tag 15 | 16 | 17 | # Circle is easy, since they give us the git tag 18 | if os.getenv('CIRCLECI', False) and os.getenv('CIRCLE_TAG', False): 19 | print('Tagged commit on Circle CI. Writing to {0}'.format(OUT_FILE)) 20 | with open(OUT_FILE, 'w') as f: 21 | tag = get_version_from_tag(os.getenv('CIRCLE_TAG')) 22 | f.write(tag) 23 | 24 | elif os.getenv('GITHUB_REF') and \ 25 | os.getenv('GITHUB_REF').startswith('refs/tags/'): 26 | print('Tagged commit on Github Actions. Writing to {0}'.format(OUT_FILE)) 27 | with open(OUT_FILE, 'w') as f: 28 | tag = os.getenv('GITHUB_REF').split('/')[-1] 29 | f.write(get_version_from_tag(tag)) 30 | 31 | # Local or non-tagged commit. setuptools requires a VERSION file, so just write a default one 32 | else: 33 | print("Not a tagged commit or not on CI. Using default tag") 34 | with open(OUT_FILE, 'w') as f: 35 | f.write(DEFAULT_TAG) 36 | -------------------------------------------------------------------------------- /build_tools/github/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ $(cat ${GITHUB_WORKSPACE}/pmdarima/VERSION) =~ ^[0-9]+\.[0-9]+\.?[0-9]*[a-zA-Z]+[0-9]*$ ]]; then 4 | echo 'Uploading to test pypi' 5 | python -m twine upload --repository-url https://test.pypi.org/legacy/ --skip-existing dist/pmdarima-* 6 | elif [[ $(cat ${GITHUB_WORKSPACE}/pmdarima/VERSION) =~ ^[0-9]+\.[0-9]+\.?[0-9]*$ ]]; then 7 | echo 'Uploading to production pypi' 8 | python -m twine upload --skip-existing dist/pmdarima-* 9 | else 10 | echo "Malformed tag: $(cat ${GITHUB_WORKSPACE}/pmdarima/VERSION)" 11 | exit 1 12 | fi -------------------------------------------------------------------------------- /build_tools/github/get_latest_dependencies.py: -------------------------------------------------------------------------------- 1 | """This script reads our requirements.txt file and removes the pinned versions""" 2 | 3 | import re 4 | import os 5 | from os.path import abspath, dirname 6 | 7 | TOP_LEVEL = abspath(dirname(dirname(dirname(__file__)))) 8 | REQUIREMENTS = os.path.join(TOP_LEVEL, 'requirements.txt') 9 | BUILD_REQUIREMENTS = os.path.join(TOP_LEVEL, 'build_tools', 'build_requirements.txt') 10 | 11 | 12 | def find_latest_dependencies(*requirements_files): 13 | """Given one or more requirements.txt files, strip off any pinned versions 14 | 15 | Parameters 16 | ---------- 17 | *requirements_files : str 18 | One or more paths to requirements.txt files to parse 19 | 20 | Returns 21 | ------- 22 | requirements : list 23 | List of parsed dependencies without their pinned versions 24 | """ 25 | requirements = [] 26 | for requirements_file in requirements_files: 27 | with open(requirements_file) as file: 28 | for line in file: 29 | requirement = line.strip() 30 | if line.startswith('#'): 31 | continue 32 | match = re.match(r'^([A-Za-z\-0-9]+)', requirement) 33 | if match.group(0).lower() not in requirements: 34 | requirements.append(match.group(0).lower()) 35 | 36 | return requirements 37 | 38 | 39 | requirements = find_latest_dependencies(REQUIREMENTS, BUILD_REQUIREMENTS) 40 | # We print because this is called from a bash script and we need to return a 41 | # space-separated list 42 | print(' '.join(requirements)) 43 | -------------------------------------------------------------------------------- /build_tools/github/test_version_tagging.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | pip install pathlib 5 | 6 | GITHUB_REF=refs/tags/v0.99.999 python ${GITHUB_WORKSPACE}/build_tools/get_tag.py 7 | 8 | if [[ ! -f ${GITHUB_WORKSPACE}/pmdarima/VERSION ]]; then 9 | echo "Expected VERSION file" 10 | exit 4 11 | fi 12 | -------------------------------------------------------------------------------- /doc/_static/css/fields.css: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | body { 4 | font-family: Helvetica, Arial, sans-serif !important; 5 | font-style: 100%; 6 | } 7 | */ 8 | 9 | table.docutils tr { 10 | border-style: solid none solid none; 11 | border-width: 1px 0 1px 0; 12 | border-color: #ddd; 13 | } 14 | 15 | div.body p, div.body dd, div.body li { 16 | line-height: 1.5em; 17 | } 18 | 19 | div.body { 20 | font-size: 0.9em; 21 | } 22 | 23 | div.body p { 24 | margin-top: 1.2em; 25 | margin-bottom: .1em; 26 | } 27 | 28 | div.body p, div.body dd, div.body li { 29 | line-height: 1.5em; 30 | } 31 | 32 | table.docutils td, table.docutils th { 33 | padding: 1px 8px 1px 5px !important; 34 | border-top: 0; 35 | border-left: 0; 36 | border-right: 0; 37 | border-bottom: 1px solid #aaa; 38 | } 39 | 40 | .field-odd, .field-even { 41 | background-color: #fff; 42 | } 43 | 44 | .field-name { 45 | background-color: #F0F7FA; 46 | } 47 | 48 | .field-body { 49 | background-color: #fff; 50 | } 51 | 52 | th.field-name { 53 | white-space: nowrap; 54 | } 55 | 56 | .field-odd, .field-even { 57 | background-color: #fff; 58 | } 59 | 60 | .field-name { 61 | background-color: #F0F7FA; 62 | } 63 | 64 | .field-body { 65 | background-color: #fff; 66 | } 67 | 68 | h2 > a.reference { 69 | font-weight: bold; 70 | color: #2878A2; 71 | text-decoration: none; 72 | word-wrap: break-word; 73 | font-family: Arial, sans-serif; 74 | } 75 | 76 | a.reference > code.xref { 77 | background: transparent; 78 | border: none; 79 | font-size: 1.1em; 80 | font-family: monospace; 81 | padding: 2px 4px; 82 | color: #2980B9 !important; 83 | } 84 | -------------------------------------------------------------------------------- /doc/_static/css/gitcontrib.css: -------------------------------------------------------------------------------- 1 | .capped-card { 2 | font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Helvetica, Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol"; 3 | font-size: 14px; 4 | line-height: 1.5; 5 | color: #24292e; 6 | list-style: none !important; 7 | } 8 | 9 | .contrib-wrapper { 10 | /* position: absolute; */ 11 | display: block; 12 | height: 80px; 13 | width: 450px; 14 | } 15 | 16 | .committer { 17 | text-decoration: none; 18 | color: #0366d6; 19 | } 20 | 21 | .contrib-avatar-wrapper { 22 | position: absolute; 23 | display: block; 24 | margin-left: 0px; 25 | margin-top: 0px; 26 | height: 80px; 27 | width: 80px; 28 | } 29 | 30 | .avatar { 31 | margin-left: 10px; 32 | margin-top: 10px; 33 | } 34 | 35 | img.avatar { 36 | width: 60px; 37 | height: 60px; 38 | } 39 | 40 | .contrib-author-wrapper { 41 | position: absolute; 42 | display: block; 43 | margin-left: 80px; 44 | margin-top: 0px; 45 | width: 310px; 46 | height: 50px; 47 | } 48 | 49 | .contrib-author-wrapper h3 { 50 | margin-left: 5px; 51 | margin-top: 5px; 52 | font-weight: normal; 53 | font-size: 24px; 54 | } 55 | 56 | .rank { 57 | float: right; 58 | font-size: 13px; 59 | color: #586069; 60 | margin-right: 10px; 61 | margin-top: 10px; 62 | } 63 | 64 | .contrib-stats-wrapper { 65 | position: absolute; 66 | display: block; 67 | margin-left: 80px; 68 | margin-top: 50px; 69 | width: 370px; 70 | height: 30px; 71 | } 72 | 73 | .contrib-stats { 74 | position: absolute; 75 | display: block; 76 | margin-left: 80px; 77 | margin-right: 20px; 78 | margin-top: 10px; 79 | width: 350px; 80 | height: 25px; 81 | } 82 | 83 | .ameta { 84 | margin-left: 5px; 85 | } 86 | 87 | .cmt { 88 | color: #586069; 89 | text-decoration: none; 90 | } 91 | 92 | .a { 93 | color: #28a745; 94 | } 95 | 96 | .d { 97 | color: #cb2431; 98 | } -------------------------------------------------------------------------------- /doc/_static/js/contrib.js: -------------------------------------------------------------------------------- 1 | function commaFmt(x) { 2 | return x.toString().replace(/\B(?=(\d{3})+(?!\d))/g, ","); 3 | } 4 | 5 | function fetchContributors() { 6 | $.getJSON("https://api.github.com/repos/alkaline-ml/pmdarima/stats/contributors", function(arr) { 7 | // sort the array based on total count 8 | arr.sort(function(a, b) { 9 | var aTotal = a['total']; 10 | var bTotal = b['total']; 11 | 12 | // reverse for desc 13 | return (aTotal > bTotal) ? -1 : (bTotal > aTotal) ? 1 : 0; 14 | }); 15 | 16 | $.each(arr, function(i, obj) { 17 | var total = obj['total']; 18 | var adds = 0; 19 | var dels = 0; 20 | 21 | // get the counts of adds/deletes 22 | $.each(obj['weeks'], function(wk, weekData) { 23 | adds += weekData['a']; 24 | dels += weekData['d']; 25 | }); 26 | 27 | var authorJSON = obj['author']; 28 | var authorLogin = authorJSON['login']; 29 | var authorURL = authorJSON['html_url']; 30 | var avatarURL = authorJSON['avatar_url'] + '&s=60'; 31 | var p = (total > 1) ? 's' : ''; 32 | 33 | // Add HTML elements to the ol element below 34 | var li = $('
  • ' + 35 | '
    ' + 36 | '
    ' + 37 | '' + 38 | '
    ' + 39 | '
    ' + 40 | '

    ' + authorLogin + '

    ' + 41 | '
    ' + 42 | '
    ' + 43 | '#' + (i + 1).toString() + '' + 44 | '
    ' + 45 | '
    ' + 46 | '' + 47 | '' + 48 | '' + commaFmt(total) + ' commit' + p + ' / ' + 49 | '' + commaFmt(adds) + ' ++ / ' + 50 | '' + commaFmt(dels) + ' --' + 51 | '' + 52 | '' + 53 | '
    ' + 54 | '
    ' + 55 | '
  • ') 56 | 57 | // can only do this once the doc is ready 58 | $('#contrib').append(li); 59 | }); 60 | }); 61 | } 62 | -------------------------------------------------------------------------------- /doc/_templates/class.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}============== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | 8 | {% block methods %} 9 | .. automethod:: __init__ 10 | {% endblock %} 11 | 12 | .. include:: {{module}}.{{objname}}.examples 13 | 14 | .. raw:: html 15 | 16 |
    -------------------------------------------------------------------------------- /doc/_templates/class_with_call.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}=============== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | 8 | {% block methods %} 9 | .. automethod:: __init__ 10 | .. automethod:: __call__ 11 | {% endblock %} 12 | 13 | .. include:: {{module}}.{{objname}}.examples 14 | 15 | .. raw:: html 16 | 17 |
    -------------------------------------------------------------------------------- /doc/_templates/class_without_init.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}============== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | 8 | .. include:: {{module}}.{{objname}}.examples 9 | 10 | .. raw:: html 11 | 12 |
    -------------------------------------------------------------------------------- /doc/_templates/function.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}==================== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autofunction:: {{ objname }} 7 | 8 | .. include:: {{module}}.{{objname}}.examples 9 | 10 | .. raw:: html 11 | 12 |
    -------------------------------------------------------------------------------- /doc/_templates/numpydoc_docstring.rst: -------------------------------------------------------------------------------- 1 | {{index}} 2 | {{summary}} 3 | {{extended_summary}} 4 | {{parameters}} 5 | {{returns}} 6 | {{yields}} 7 | {{other_parameters}} 8 | {{attributes}} 9 | {{raises}} 10 | {{warns}} 11 | {{warnings}} 12 | {{see_also}} 13 | {{notes}} 14 | {{references}} 15 | {{examples}} 16 | {{methods}} 17 | -------------------------------------------------------------------------------- /doc/about.rst: -------------------------------------------------------------------------------- 1 | .. _about: 2 | 3 | ================= 4 | About the project 5 | ================= 6 | 7 | ``pmdarima`` is designed to behave as similarly to R's well-known 8 | `auto.arima `_ 9 | as possible. 10 | 11 | The project emerged as a result of a long-standing personal debate between 12 | my colleagues and `me `_ about why python is 13 | vastly superior to R. Since R's forecasting capabilities far superseded those of Python's 14 | existing libraries, ``pmdarima`` was created to close that gap and give analysts/researchers 15 | one less reason why R is a viable language for practical machine learning. 16 | 17 | *(Of course, take my soapbox speech with a grain of salt... I once was an R addict but am now recovering)* 18 | 19 | 20 | The name... 21 | ----------- 22 | 23 | The name "pyramid" originally was the result of an anagram between the "py" prefix and 24 | the characters needed to spell "arima". However, the popular web framework sharing the 25 | same name caused a `namespace collision `_ 26 | and the package has since been renamed ``pmdarima``. You may still see it referred to interchangeably 27 | throughout the doc as "pyramid". 28 | 29 | 30 | How it works 31 | ------------ 32 | 33 | ``pmdarima`` is essentially a Python & Cython wrapper of several different statistical 34 | and machine learning libraries (statsmodels and scikit-learn), and operates by generalizing 35 | all ARIMA models into a single class (unlike statsmodels). 36 | 37 | It does this by wrapping the respective statsmodels interfaces 38 | (``ARMA``, ``ARIMA`` and ``SARIMAX``) inside the ``pmdarima.ARIMA`` class, 39 | and as a result there is a bit of monkey patching that happens beneath the hood. 40 | 41 | How ``auto_arima`` works 42 | ~~~~~~~~~~~~~~~~~~~~~~~~ 43 | 44 | The ``auto_arima`` function itself operates a bit like a grid search, in that it 45 | tries various sets of ``p`` and ``q`` (also ``P`` and ``Q`` for seasonal models) 46 | parameters, selecting the model that minimizes the AIC (or BIC, or whatever 47 | information criterion you select). To select the differencing terms, ``auto_arima`` 48 | uses a test of stationarity (such as an augmented Dickey-Fuller test) and seasonality 49 | (such as the Canova-Hansen test) for seasonal models. 50 | 51 | For more in-depth information on the process by which ``auto_arima`` selects 52 | the best model, check out the :ref:`tips_and_tricks` section. 53 | 54 | Feedback 55 | -------- 56 | 57 | This is an open-source (read: *FREE*) project. That means several things: 58 | 59 | * It is not infallible 60 | * It's a community effort 61 | * Making demands doesn't go over well 62 | 63 | I know that there are those who have built models with pmdarima as a tool 64 | to support their work. I also know that people can depend on the functionality of 65 | this library in order to do their job well. And for that, I'm committed to 66 | keeping things running smoothly. 67 | 68 | However, as I'm the sole maintainer, things can sometimes stack up. 69 | Please feel free to make pull requests (see :ref:`contrib`), file issues, and 70 | make feature requests. But note the third point: :ref:`contributors` to this 71 | project do it for fun. Let's keep it cordial. 72 | 73 | **If you encounter any issues in the project, please see the** :ref:`filing_bugs` **section for how to file an issue.** 74 | -------------------------------------------------------------------------------- /doc/citing.rst: -------------------------------------------------------------------------------- 1 | .. _citing: 2 | 3 | ====== 4 | Citing 5 | ====== 6 | 7 | If you would like to include ``pmdarima`` in your published work, please cite it as follows: 8 | 9 | .. raw:: html 10 | 11 |
      12 |
    • Smith, Taylor G., et al. pmdarima: ARIMA estimators for Python, 2017-, 13 | http://www.alkaline-ml.com/pmdarima 14 | [Online; accessed 15 | 16 | 17 | 27 | 28 |
    • 29 |
    30 | 31 | BibTeX Entry: 32 | 33 | .. code-block:: tex 34 | 35 | @MISC {pmdarima, 36 | author = {Taylor G. Smith and others}, 37 | title = {{pmdarima}: ARIMA estimators for {Python}}, 38 | year = {2017--}, 39 | url = "http://www.alkaline-ml.com/pmdarima", 40 | note = {[Online; accessed ]} 41 | } 42 | -------------------------------------------------------------------------------- /doc/contributors.rst: -------------------------------------------------------------------------------- 1 | .. _contributors: 2 | 3 | ============ 4 | Contributors 5 | ============ 6 | 7 | Thanks to the following users for their contributions to pmdarima! 8 | 9 | .. raw:: html 10 | 11 | 12 | 13 | 14 | 15 | 16 | 22 | 23 | 24 |
      -------------------------------------------------------------------------------- /doc/img/bad_issue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/doc/img/bad_issue.png -------------------------------------------------------------------------------- /doc/img/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/doc/img/favicon.ico -------------------------------------------------------------------------------- /doc/img/good_issue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/doc/img/good_issue.png -------------------------------------------------------------------------------- /doc/img/lynx_autocorr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/doc/img/lynx_autocorr.png -------------------------------------------------------------------------------- /doc/img/m_matters.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/doc/img/m_matters.png -------------------------------------------------------------------------------- /doc/img/stock_forecasts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/doc/img/stock_forecasts.png -------------------------------------------------------------------------------- /doc/img/stock_lag_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/doc/img/stock_lag_plot.png -------------------------------------------------------------------------------- /doc/img/sunspots/bc-transformed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/doc/img/sunspots/bc-transformed.png -------------------------------------------------------------------------------- /doc/img/sunspots/log-transformed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/doc/img/sunspots/log-transformed.png -------------------------------------------------------------------------------- /doc/img/sunspots/model-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/doc/img/sunspots/model-1.png -------------------------------------------------------------------------------- /doc/img/sunspots/model-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/doc/img/sunspots/model-2.png -------------------------------------------------------------------------------- /doc/img/sunspots/untransformed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/doc/img/sunspots/untransformed.png -------------------------------------------------------------------------------- /doc/includes/api_css.rst: -------------------------------------------------------------------------------- 1 | .. 2 | File to ..include in the API ref document. 3 | 4 | .. raw:: html 5 | 6 | 30 | -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=python -msphinx 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | set SPHINXPROJ=pmdarima 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The Sphinx module was not found. Make sure you have Sphinx installed, 20 | echo.then set the SPHINXBUILD environment variable to point to the full 21 | echo.path of the 'sphinx-build' executable. Alternatively you may add the 22 | echo.Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /doc/migration-guide.rst: -------------------------------------------------------------------------------- 1 | .. _migration: 2 | 3 | ============================ 4 | ``pmdarima`` Migration guide 5 | ============================ 6 | 7 | In `issue #34 `_ we made the 8 | decision to migrate from the ``pyramid-arima`` namespace to the ``pmdarima`` 9 | namespace to avoid collisions with the web framework named ``pyramid``. 10 | 11 | Migration is simple: 12 | 13 | .. code-block:: bash 14 | 15 | $ pip install pmdarima 16 | 17 | Rather that importing functions and modules from the ``pyramid`` package, simply 18 | import from ``pmdarima`` instead: 19 | 20 | .. code-block:: python 21 | 22 | from pmdarima.arima import auto_arima 23 | 24 | Or just import it as a namespace: 25 | 26 | .. code-block:: python 27 | 28 | import pmdarima as pm 29 | my_model = pm.auto_arima(my_timeseries) 30 | 31 | For further installation instructions, check out the :ref:`setup` and :ref:`quickstart` guides. 32 | -------------------------------------------------------------------------------- /doc/no-successful-model.rst: -------------------------------------------------------------------------------- 1 | .. _no_successful_model: 2 | 3 | =================================== 4 | When no viable models can be found 5 | =================================== 6 | 7 | For certain time series, the search may return no viable models:: 8 | 9 | Traceback (most recent call last): 10 | File "", line 1, in 11 | "Could not successfully fit a viable ARIMA model " 12 | ValueError: Could not successfully fit a viable ARIMA model to input data. 13 | See http://alkaline-ml.com/pmdarima/no-successful-model.html for more information on why this can happen. 14 | 15 | 16 | This can happen for a number of reasons: 17 | 18 | * Most commonly, the roots of your model may be nearly non-invertible, meaning the inverted roots 19 | lie too close to the unit circle. Here's a good `blog post `_ 20 | on the subject. Make sure ``trace`` is truthy in order to see these warnings when fitting your model. 21 | 22 | * Sometimes, your data may not be stationary and can raise errors from statsmodels when fitting. In this case, 23 | the stepwise algorithm will filter out problem model fits. This can arise in a number of situations, ranging 24 | from non-stationarity to actual code errors. Setting ``error_action='trace'`` will log the stacktraces of 25 | any errors encountered during the search. 26 | 27 | * Your input data may not be suitable for ARIMA modeling. For instance, it could be a simple polynomial 28 | or solved by linear regression (i.e., differencing the time series has made it perfectly constant). 29 | 30 | Make sure to set ``trace`` to at least 1 in order to see the search progress, and to a value >1 to see the 31 | maximum trace logging available. If you still cannot diagnose why you are getting this error message, consider 32 | :ref:`filing_bugs`. 33 | -------------------------------------------------------------------------------- /doc/refreshing.rst: -------------------------------------------------------------------------------- 1 | .. _refreshing: 2 | 3 | ============================ 4 | Refreshing your ARIMA models 5 | ============================ 6 | 7 | There are two ways to keep your models up-to-date with pmdarima: 8 | 9 | 1. Periodically, your ARIMA will need to be refreshed given new observations. See 10 | `this discussion `_ 11 | and `this one `_ 12 | on either re-using ``auto_arima``-estimated order terms or re-fitting altogether. 13 | 14 | 2. If you're not ready to totally refresh your model parameters, but would like to add observations to 15 | your model (so new forecasts originate from the latest samples) with minor parameter updates, the ARIMA class makes it 16 | possible to `add new samples <./modules/generated/pmdarima.arima.ARIMA.html#pmdarima.arima.ARIMA.update>`_. 17 | See `this example `_ 18 | for more info. 19 | 20 | 21 | Updating your model with new observations 22 | ----------------------------------------- 23 | 24 | The easiest way to keep your model up-to-date without completely refitting it is simply to 25 | update your model with new observations so that future forecasts take the newest observations 26 | into consideration. Assume that you fit the following model: 27 | 28 | .. code-block:: python 29 | 30 | import pmdarima as pm 31 | from pmdarima.datasets import load_wineind 32 | 33 | y = load_wineind() 34 | train, test = y[:125], y[125:] 35 | 36 | # Fit an ARIMA 37 | arima = pm.ARIMA(order=(1, 1, 2), seasonal_order=(0, 1, 1, 12)) 38 | arima.fit(y) 39 | 40 | After fitting and persisting your model (see :ref:`serializing`), you use your model 41 | to produce forecasts. After a few forecasts, you want to record the *actual* observed 42 | values so your model considers them when making newer forecasts: 43 | 44 | .. code-block:: python 45 | 46 | arima.update(test) # pretend these are the new ones 47 | 48 | Your model will now produce forecasts from the *new* latest observations. Of course, 49 | you'll have to re-persist your ARIMA model after updating it! Internally, this step 50 | uses the existing parameters, taking a small amount of steps and allowing MLE to 51 | update your parameters a small amount. You can pass the ``maxiter`` to control the 52 | amount your model updates. 53 | -------------------------------------------------------------------------------- /doc/rfc/372-exog-to-x.rst: -------------------------------------------------------------------------------- 1 | .. _exog_to_X: 2 | 3 | =========================== 4 | RFC: ``exogenous`` -> ``X`` 5 | =========================== 6 | 7 | This RFC proposes the renaming of the ``exogenous`` arg to ``X``. While this would 8 | impact the public API, we would allow the current ``exogenous`` argument to persist 9 | for several minor release cycles with a deprecation warning before completely removing it 10 | in the next major release (2.0). 11 | 12 | Why? 13 | ---- 14 | 15 | * **It's typo-prone**. We've received several issues lately with people asking why the ``exogenous`` 16 | argument was not doing anything. Upon close inspection, it was evident they were misspelling the 17 | arg as "exogeneous", and the presence of ``**kwargs`` in the function signature allowed 18 | the argument through without raising a ``TypeError``. 19 | 20 | * **It's clunky**. Typing ``exogenous`` when other APIs have simplified this to the ubiquitous 21 | ``X`` used in other scikit-style packages (scikit-learn, scikit-image, sktime) seems like 22 | a slightly annoying, arbitrary difference in signature definitions that keeps us from 23 | matching the signatures of other similar packages. 24 | 25 | * **It can be confusing**. Not all of our user base is familiar with the classical statistics 26 | terminology and may not realize what this argument permits them. Conversely, nearly all 27 | users are familiar with the idea of what ``X`` allows them. 28 | 29 | How? 30 | ---- 31 | 32 | For a while, we'd allow the ``exogenous`` argument to be passed in ``**kwargs``, and would simply 33 | warn if it were present. For example: 34 | 35 | .. code-block:: python 36 | 37 | def fit(self, y, X=None, **kwargs): 38 | if X is None: 39 | X = kwargs.pop("exogenous", None) 40 | if X is not None: 41 | warnings.warn("`exogenous` is deprecated and will raise an error " 42 | "in version 2.0 - Use the `X` arg instead", 43 | DeprecationWarning) 44 | 45 | This would ensure backwards compatibility for several minor release cycles before the 46 | change was made, and would give sufficient time to users to switch over to the new naming scheme. 47 | 48 | Precedent 49 | --------- 50 | 51 | Scikit-learn has made similar package naming decisions in the name of package consistency and ubiquity, 52 | notably in migrating the ``cross_validation`` namespace to the ``model_selection`` namespace in version 53 | 0.18. This was preceded by several minor releases that warned on imports. 54 | -------------------------------------------------------------------------------- /doc/rfc/index.rst: -------------------------------------------------------------------------------- 1 | .. _rfc: 2 | 3 | ============= 4 | pmdarima RFCs 5 | ============= 6 | 7 | An RFC, or "request for comments," is a common practice in open source packages, and 8 | allows users and contributors to weigh in on a proposal that will fundamentally alter 9 | the public API (usually encompassing breaking changes or design decisions). All ``pmdarima`` 10 | RFCs will be included for future users of the package to read through so that decision-making 11 | is transparent and makes sense to all users. 12 | 13 | .. toctree:: 14 | :maxdepth: 2 15 | :hidden: 16 | 17 | Renaming the "exogenous" argument <./372-exog-to-x.rst> 18 | -------------------------------------------------------------------------------- /doc/seasonal-differencing-issues.rst: -------------------------------------------------------------------------------- 1 | .. _seasonal_differencing_issues: 2 | 3 | ============================================ 4 | Encountering issues in seasonal differencing 5 | ============================================ 6 | 7 | For certain time series, the seasonal differencing operation may fail:: 8 | 9 | Traceback (most recent call last): 10 | File "", line 1, in 11 | "Could not successfully fit a viable ARIMA model " 12 | There are no more samples after a first-order seasonal differencing. See 13 | http://alkaline-ml.com/pmdarima/seasonal-differencing-issues.html for a 14 | more in-depth explanation and potential work-arounds. 15 | 16 | 17 | In short, the seasonal differencing test has detected your time series could benefit 18 | from a non-zero seasonal differencing term, ``D``, but your data is exhausted after 19 | differencing it by ``m``. Basically, your dataset is too small to be differenced by ``m``. 20 | You only have several options as a work-around here: 21 | 22 | * Use a larger training set. 23 | 24 | * Determine whether or not you've set the appropriate ``m``. Should it be smaller? See 25 | :ref:`period` for more information on the topic. 26 | 27 | * Manually set ``D=0`` in the :func:`pmdarima.arima.auto_arima` call. This is the least 28 | desirable solution, since it skips a step that could lead to a better model. 29 | 30 | The best decision is always to use a larger training set, but sometimes that simply 31 | is not possible. Make sure to set ``trace`` to at least 1 in order to see the search progress, and to a value >1 to see the 32 | maximum trace logging available. If you still cannot diagnose why you are getting this error message, consider 33 | :ref:`filing_bugs`. 34 | -------------------------------------------------------------------------------- /doc/serialization.rst: -------------------------------------------------------------------------------- 1 | .. _serializing: 2 | 3 | ============================= 4 | Serializing your ARIMA models 5 | ============================= 6 | 7 | After you've fit your model and you're ready to start making predictions out 8 | in your production environment, it's time to save your ARIMA to disk. 9 | Pmdarima models can be serialized with ``pickle`` or ``joblib``, just as with 10 | most other python objects: 11 | 12 | .. code-block:: python 13 | 14 | from pmdarima.arima import auto_arima 15 | from pmdarima.datasets import load_lynx 16 | import numpy as np 17 | 18 | # For serialization: 19 | import joblib 20 | import pickle 21 | 22 | # Load data and fit a model 23 | y = load_lynx() 24 | arima = auto_arima(y, seasonal=True) 25 | 26 | # Serialize with Pickle 27 | with open('arima.pkl', 'wb') as pkl: 28 | pickle.dump(arima, pkl) 29 | 30 | # You can still make predictions from the model at this point 31 | arima.predict(n_periods=5) 32 | 33 | # Now read it back and make a prediction 34 | with open('arima.pkl', 'rb') as pkl: 35 | pickle_preds = pickle.load(pkl).predict(n_periods=5) 36 | 37 | # Or maybe joblib tickles your fancy 38 | joblib.dump(arima, 'arima.pkl') 39 | joblib_preds = joblib.load('arima.pkl').predict(n_periods=5) 40 | 41 | # show they're the same 42 | np.allclose(pickle_preds, joblib_preds) 43 | 44 | -------------------------------------------------------------------------------- /doc/setup.rst: -------------------------------------------------------------------------------- 1 | .. _setup: 2 | 3 | ===== 4 | Setup 5 | ===== 6 | 7 | Pmdarima depends on several prominent python packages: 8 | 9 | * `Numpy `_ (>=1.17.3) 10 | * `SciPy `_ (>=1.3.2) 11 | * `Scikit-learn `_ (>=0.22) 12 | * `Pandas `_ (>=0.19) 13 | * `Statsmodels `_ (>=0.11) 14 | 15 | Install from PyPi 16 | ----------------- 17 | 18 | Pmdarima is on pypi under the package name ``pmdarima`` and can be 19 | downloaded via ``pip``: 20 | 21 | .. code-block:: bash 22 | 23 | $ pip install pmdarima 24 | 25 | Pmdarima uses Cython, which means there is some C source that was built in 26 | the distribution process. To ensure the package was built correctly, import 27 | the following module in python: 28 | 29 | .. code-block:: python 30 | 31 | from pmdarima.arima import auto_arima 32 | 33 | If you encounter an ``ImportError``, try updating numpy and re-installing. Outdated 34 | numpy versions have been observed to break the pmdarima build. 35 | 36 | Install from Conda 37 | ------------------ 38 | 39 | Pmdarima is on conda under the channel ``conda-forge`` and package name ``pmdarima``. It can be downloaded via ``conda`` 40 | like so: 41 | 42 | .. code-block:: bash 43 | 44 | $ conda config --add channels conda-forge 45 | $ conda config --set channel_priority strict 46 | $ conda install pmdarima 47 | 48 | **Note:** We do not maintain our own Conda binaries, they are maintained at https://github.com/conda-forge/pmdarima-feedstock. 49 | See that repo for further documentation on working with Pmdarima on Conda. 50 | 51 | Build from source 52 | ----------------- 53 | 54 | If you'd like to install a development or bleeding edge version of pmdarima, 55 | you can always build it from the git source. First clone it from Git: 56 | 57 | .. code-block:: bash 58 | 59 | $ git clone https://github.com/alkaline-ml/pmdarima.git 60 | $ cd pmdarima 61 | 62 | Building the package will require ``gcc`` (unix) or a Windows equivalent, like 63 | ``MinGW``. To build in development mode (for running unit tests): 64 | 65 | .. code-block:: bash 66 | 67 | $ python setup.py develop 68 | 69 | You can also use the ``Makefile`` if you're on a posix machine: 70 | 71 | .. code-block:: bash 72 | 73 | $ make develop 74 | 75 | Alternatively, to install the package in your ``site-packages``: 76 | 77 | .. code-block:: bash 78 | 79 | $ python setup.py install 80 | 81 | Or, with the ``Makefile``: 82 | 83 | .. code-block:: bash 84 | 85 | $ make install 86 | -------------------------------------------------------------------------------- /doc/sg_execution_times.rst: -------------------------------------------------------------------------------- 1 | 2 | :orphan: 3 | 4 | .. _sphx_glr_sg_execution_times: 5 | 6 | 7 | Computation times 8 | ================= 9 | **00:11.856** total execution time for 13 files **from all galleries**: 10 | 11 | .. container:: 12 | 13 | .. raw:: html 14 | 15 | 19 | 20 | 21 | 22 | 27 | 28 | .. list-table:: 29 | :header-rows: 1 30 | :class: table table-striped sg-datatable 31 | 32 | * - Example 33 | - Time 34 | - Mem (MB) 35 | * - :ref:`sphx_glr_auto_examples_preprocessing_example_date_featurizer.py` (``../examples/preprocessing/example_date_featurizer.py``) 36 | - 00:03.490 37 | - 0.0 38 | * - :ref:`sphx_glr_auto_examples_example_simple_fit.py` (``../examples/example_simple_fit.py``) 39 | - 00:03.400 40 | - 0.0 41 | * - :ref:`sphx_glr_auto_examples_model_selection_example_cross_val_predict.py` (``../examples/model_selection/example_cross_val_predict.py``) 42 | - 00:01.976 43 | - 0.0 44 | * - :ref:`sphx_glr_auto_examples_example_pipeline.py` (``../examples/example_pipeline.py``) 45 | - 00:00.937 46 | - 0.0 47 | * - :ref:`sphx_glr_auto_examples_model_selection_example_cross_validation.py` (``../examples/model_selection/example_cross_validation.py``) 48 | - 00:00.775 49 | - 0.0 50 | * - :ref:`sphx_glr_auto_examples_arima_example_persisting_a_model.py` (``../examples/arima/example_persisting_a_model.py``) 51 | - 00:00.602 52 | - 0.0 53 | * - :ref:`sphx_glr_auto_examples_utils_example_tsdisplay.py` (``../examples/utils/example_tsdisplay.py``) 54 | - 00:00.233 55 | - 0.0 56 | * - :ref:`sphx_glr_auto_examples_arima_example_add_new_samples.py` (``../examples/arima/example_add_new_samples.py``) 57 | - 00:00.198 58 | - 0.0 59 | * - :ref:`sphx_glr_auto_examples_arima_example_seasonal_decomposition.py` (``../examples/arima/example_seasonal_decomposition.py``) 60 | - 00:00.140 61 | - 0.0 62 | * - :ref:`sphx_glr_auto_examples_arima_example_auto_arima.py` (``../examples/arima/example_auto_arima.py``) 63 | - 00:00.102 64 | - 0.0 65 | * - :ref:`sphx_glr_auto_examples_datasets_example_load_data.py` (``../examples/datasets/example_load_data.py``) 66 | - 00:00.001 67 | - 0.0 68 | * - :ref:`sphx_glr_auto_examples_utils_example_array_differencing.py` (``../examples/utils/example_array_differencing.py``) 69 | - 00:00.001 70 | - 0.0 71 | * - :ref:`sphx_glr_auto_examples_utils_example_array_concatenation.py` (``../examples/utils/example_array_concatenation.py``) 72 | - 00:00.000 73 | - 0.0 74 | -------------------------------------------------------------------------------- /doc/sphinxext/MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include tests *.py 2 | include *.txt -------------------------------------------------------------------------------- /doc/sphinxext/github_link.py: -------------------------------------------------------------------------------- 1 | from operator import attrgetter 2 | import inspect 3 | import subprocess 4 | import os 5 | import sys 6 | from functools import partial 7 | 8 | REVISION_CMD = 'git rev-parse --short HEAD' 9 | 10 | 11 | def _get_git_revision(): 12 | try: 13 | revision = subprocess.check_output(REVISION_CMD.split()).strip() 14 | except (subprocess.CalledProcessError, OSError): 15 | print('Failed to execute git to get revision') 16 | return None 17 | return revision.decode('utf-8') 18 | 19 | 20 | def _linkcode_resolve(domain, info, package, url_fmt, revision): 21 | """Determine a link to online source for a class/method/function 22 | 23 | This is called by sphinx.ext.linkcode 24 | 25 | An example with a long-untouched module that everyone has 26 | >>> _linkcode_resolve('py', {'module': 'tty', 27 | ... 'fullname': 'setraw'}, 28 | ... package='tty', 29 | ... url_fmt='http://hg.python.org/cpython/file/' 30 | ... '{revision}/Lib/{package}/{path}#L{lineno}', 31 | ... revision='xxxx') 32 | 'http://hg.python.org/cpython/file/xxxx/Lib/tty/tty.py#L18' 33 | """ 34 | 35 | if revision is None: 36 | return 37 | if domain not in ('py', 'pyx'): 38 | return 39 | if not info.get('module') or not info.get('fullname'): 40 | return 41 | 42 | class_name = info['fullname'].split('.')[0] 43 | if type(class_name) != str: 44 | # Python 2 only 45 | class_name = class_name.encode('utf-8') 46 | module = __import__(info['module'], fromlist=[class_name]) 47 | obj = attrgetter(info['fullname'])(module) 48 | 49 | try: 50 | fn = inspect.getsourcefile(obj) 51 | except Exception: 52 | fn = None 53 | if not fn: 54 | try: 55 | fn = inspect.getsourcefile(sys.modules[obj.__module__]) 56 | except Exception: 57 | fn = None 58 | if not fn: 59 | return 60 | 61 | fn = os.path.relpath(fn, 62 | start=os.path.dirname(__import__(package).__file__)) 63 | try: 64 | lineno = inspect.getsourcelines(obj)[1] 65 | except Exception: 66 | lineno = '' 67 | return url_fmt.format(revision=revision, package=package, 68 | path=fn, lineno=lineno) 69 | 70 | 71 | def make_linkcode_resolve(package, url_fmt): 72 | """Returns a linkcode_resolve function for the given URL format 73 | 74 | revision is a git commit reference (hash or name) 75 | 76 | package is the name of the root module of the package 77 | 78 | url_fmt is along the lines of ('https://github.com/USER/PROJECT/' 79 | 'blob/{revision}/{package}/' 80 | '{path}#L{lineno}') 81 | """ 82 | revision = _get_git_revision() 83 | return partial(_linkcode_resolve, revision=revision, package=package, 84 | url_fmt=url_fmt) 85 | -------------------------------------------------------------------------------- /doc/usecases.rst: -------------------------------------------------------------------------------- 1 | .. title:: Use cases 2 | 3 | .. _use_cases: 4 | 5 | ========= 6 | Use cases 7 | ========= 8 | 9 | Here are some easy-to-follow, common use cases for why you might use pmdarima 10 | in your forecasting work. 11 | 12 | .. raw:: html 13 | 14 |
      15 | 16 | .. toctree:: 17 | 18 | usecases/stocks.rst 19 | usecases/sun-spots.rst 20 | 21 | .. raw:: html 22 | 23 |
      24 | -------------------------------------------------------------------------------- /doc/user_guide.rst: -------------------------------------------------------------------------------- 1 | .. title:: User guide: contents 2 | 3 | .. _user_guide: 4 | 5 | ========== 6 | User Guide 7 | ========== 8 | 9 | The following guides cover how to get started with a pmdarima distribution. The 10 | easiest solution is simply installing from PyPi, but if you'd like to 11 | contribute you'll need to be able to build from source, as laid out in the 12 | :ref:`setup` section. 13 | 14 | .. raw:: html 15 | 16 |
      17 | 18 | .. toctree:: 19 | :numbered: 20 | :maxdepth: 2 21 | 22 | about.rst 23 | setup.rst 24 | quickstart.rst 25 | serialization.rst 26 | refreshing.rst 27 | tips_and_tricks.rst 28 | no-successful-model.rst 29 | seasonal-differencing-issues.rst 30 | modules/datasets.rst 31 | usecases.rst 32 | contributing.rst 33 | contributors.rst 34 | citing.rst 35 | 36 | .. raw:: html 37 | 38 |
      39 | -------------------------------------------------------------------------------- /etc/downloads_badges.py: -------------------------------------------------------------------------------- 1 | from datetime import date, timedelta 2 | import json 3 | import math 4 | import os 5 | import requests 6 | from statistics import mean 7 | 8 | 9 | def millify(n): 10 | """Abbreviate a number to nearest thousand, million, etc. 11 | 12 | Adapted from: https://stackoverflow.com/a/3155023/10696164 13 | 14 | Parameters 15 | ---------- 16 | n : int 17 | The number to abbreviate 18 | 19 | Returns 20 | ------- 21 | millified : str 22 | The number abbreviated to the nearest thousand, million, etc. 23 | """ 24 | millnames = ['', 'k', 'M', 'B', 'T'] 25 | n = float(n) 26 | millidx = max( 27 | 0, 28 | min( 29 | len(millnames) - 1, 30 | int(math.floor(0 if n == 0 else math.log10(abs(n)) / 3)) 31 | ) 32 | ) 33 | final_num = float(n / 10 ** (3 * millidx)) 34 | one_decimal = round(final_num, 1) 35 | 36 | # If the number is in the millions, and has a decimal, we want to show one 37 | # decimal. I.e.: 38 | # - 967123 -> 967k 39 | # - 1000123 -> 1M 40 | # - 1100123 -> 1.1M 41 | final_output = one_decimal if n > 1e6 and not one_decimal.is_integer() else int(round(final_num, 0)) 42 | 43 | return f'{final_output}{millnames[millidx]}' 44 | 45 | 46 | def get_default_value(downloads): 47 | """Find the default value (one day's worth of downloads) for a given input 48 | 49 | Parameters 50 | ---------- 51 | downloads : dict 52 | A dict of dates and downloads on that day 53 | 54 | Returns 55 | ------- 56 | default_value : int 57 | The default value, which is the average of the last 7 days of downloads 58 | that are contained in the input dictionary. 59 | """ 60 | last_7_keys = sorted(downloads.keys())[-7:] 61 | default_value = int(mean([downloads[key] for key in last_7_keys])) 62 | return default_value 63 | 64 | 65 | # Used to calculate downloads for the last week 66 | today = date.today() 67 | last_week = today - timedelta(days=7) 68 | DATE_FORMAT = '%Y-%m-%d' 69 | 70 | # Open a session to save time 71 | session = requests.Session() 72 | 73 | # Get the data for both the legacy namespace and our current one 74 | pyramid_arima = json.loads(session.get('https://api.pepy.tech/api/projects/pyramid-arima').text) 75 | pmdarima = json.loads(session.get('https://api.pepy.tech/api/projects/pmdarima').text) 76 | 77 | # Sum up pmdarima and pyramid-arima downloads to the past week 78 | pmdarima_downloads = 0 79 | default_pmdarima_value = get_default_value(pmdarima['downloads']) 80 | for i in range(7): 81 | pmdarima_downloads += pmdarima['downloads'].get( 82 | (last_week + timedelta(days=i)).strftime(DATE_FORMAT), 83 | default_pmdarima_value 84 | ) 85 | 86 | pyramid_arima_downloads = 0 87 | default_pyramid_arima_value = get_default_value(pyramid_arima['downloads']) 88 | for i in range(7): 89 | pyramid_arima_downloads += pyramid_arima['downloads'].get( 90 | (last_week + timedelta(days=i)).strftime(DATE_FORMAT), 91 | default_pyramid_arima_value 92 | ) 93 | 94 | # Millify the totals 95 | total_downloads = millify(pyramid_arima['total_downloads'] + pmdarima['total_downloads']) 96 | weekly_downloads = millify(pmdarima_downloads + pyramid_arima_downloads) 97 | 98 | data = { 99 | 'total': total_downloads, 100 | 'weekly': weekly_downloads 101 | } 102 | 103 | request = session.post( 104 | url='https://store.zapier.com/api/records', 105 | headers={ 106 | 'X-Secret': os.getenv('ZAPIER_SHA') 107 | }, 108 | data=json.dumps(data) 109 | ) 110 | request.raise_for_status() 111 | 112 | print(f""" 113 | New total downloads: {data['total']} 114 | New weekly downloads: {data['weekly']} 115 | """) 116 | -------------------------------------------------------------------------------- /examples/README.txt: -------------------------------------------------------------------------------- 1 | .. _general_examples: 2 | 3 | Examples 4 | ======== 5 | 6 | General examples 7 | ---------------- 8 | 9 | General-purpose and introductory examples for ``pmdarima``. These examples are 10 | designed to introduce you to the package style and layout. 11 | 12 | .. raw:: html 13 | 14 |
      15 | -------------------------------------------------------------------------------- /examples/arima/README.txt: -------------------------------------------------------------------------------- 1 | .. _arima_examples: 2 | 3 | ARIMA examples 4 | -------------- 5 | 6 | Examples of how to use the :mod:`pmdarima.arima` module to fit timeseries 7 | models. 8 | 9 | .. raw:: html 10 | 11 |
      12 | -------------------------------------------------------------------------------- /examples/arima/example_add_new_samples.py: -------------------------------------------------------------------------------- 1 | """ 2 | ===================================== 3 | Adding new observations to your model 4 | ===================================== 5 | 6 | 7 | This example demonstrates how to add new ground truth 8 | observations to your model so that forecasting continues 9 | with respect to true, observed values. This also slightly 10 | updates the model parameters, taking several new steps from 11 | the existing model parameters. 12 | 13 | .. raw:: html 14 | 15 |
      16 | """ 17 | print(__doc__) 18 | 19 | # Author: Taylor Smith 20 | 21 | import pmdarima as pm 22 | from pmdarima import model_selection 23 | import matplotlib.pyplot as plt 24 | import numpy as np 25 | 26 | # ############################################################################# 27 | # Load the data and split it into separate pieces 28 | data = pm.datasets.load_lynx() 29 | train, test = model_selection.train_test_split(data, train_size=100) 30 | 31 | # ############################################################################# 32 | # Fit with some validation (cv) samples 33 | arima = pm.auto_arima(train, start_p=1, start_q=1, d=0, max_p=5, max_q=5, 34 | out_of_sample_size=10, suppress_warnings=True, 35 | stepwise=True, error_action='ignore') 36 | 37 | # Now plot the results and the forecast for the test set 38 | preds, conf_int = arima.predict(n_periods=test.shape[0], 39 | return_conf_int=True) 40 | 41 | fig, axes = plt.subplots(2, 1, figsize=(12, 8)) 42 | x_axis = np.arange(train.shape[0] + preds.shape[0]) 43 | axes[0].plot(x_axis[:train.shape[0]], train, alpha=0.75) 44 | axes[0].scatter(x_axis[train.shape[0]:], preds, alpha=0.4, marker='o') 45 | axes[0].scatter(x_axis[train.shape[0]:], test, alpha=0.4, marker='x') 46 | axes[0].fill_between(x_axis[-preds.shape[0]:], conf_int[:, 0], conf_int[:, 1], 47 | alpha=0.1, color='b') 48 | 49 | # fill the section where we "held out" samples in our model fit 50 | 51 | axes[0].set_title("Train samples & forecasted test samples") 52 | 53 | # Now add the actual samples to the model and create NEW forecasts 54 | arima.update(test) 55 | new_preds, new_conf_int = arima.predict(n_periods=10, return_conf_int=True) 56 | new_x_axis = np.arange(data.shape[0] + 10) 57 | 58 | axes[1].plot(new_x_axis[:data.shape[0]], data, alpha=0.75) 59 | axes[1].scatter(new_x_axis[data.shape[0]:], new_preds, alpha=0.4, marker='o') 60 | axes[1].fill_between(new_x_axis[-new_preds.shape[0]:], 61 | new_conf_int[:, 0], 62 | new_conf_int[:, 1], 63 | alpha=0.1, color='g') 64 | axes[1].set_title("Added new observed values with new forecasts") 65 | plt.show() 66 | -------------------------------------------------------------------------------- /examples/arima/example_auto_arima.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================== 3 | Fitting an auto_arima model 4 | =========================== 5 | 6 | 7 | This example demonstrates how we can use the ``auto_arima`` function to 8 | select an optimal time series model. We'll be fitting our model on the lynx 9 | dataset available in the :ref:`datasets` submodule. 10 | 11 | .. raw:: html 12 | 13 |
      14 | """ 15 | print(__doc__) 16 | 17 | # Author: Taylor Smith 18 | 19 | import pmdarima as pm 20 | from pmdarima import model_selection 21 | from sklearn.metrics import mean_squared_error 22 | import matplotlib.pyplot as plt 23 | import numpy as np 24 | 25 | # ############################################################################# 26 | # Load the data and split it into separate pieces 27 | data = pm.datasets.load_lynx() 28 | train, test = model_selection.train_test_split(data, train_size=90) 29 | 30 | # Fit a simple auto_arima model 31 | modl = pm.auto_arima(train, start_p=1, start_q=1, start_P=1, start_Q=1, 32 | max_p=5, max_q=5, max_P=5, max_Q=5, seasonal=True, 33 | stepwise=True, suppress_warnings=True, D=10, max_D=10, 34 | error_action='ignore') 35 | 36 | # Create predictions for the future, evaluate on test 37 | preds, conf_int = modl.predict(n_periods=test.shape[0], return_conf_int=True) 38 | 39 | # Print the error: 40 | print("Test RMSE: %.3f" % np.sqrt(mean_squared_error(test, preds))) 41 | 42 | # ############################################################################# 43 | # Plot the points and the forecasts 44 | x_axis = np.arange(train.shape[0] + preds.shape[0]) 45 | x_years = x_axis + 1821 # Year starts at 1821 46 | 47 | plt.plot(x_years[x_axis[:train.shape[0]]], train, alpha=0.75) 48 | plt.plot(x_years[x_axis[train.shape[0]:]], preds, alpha=0.75) # Forecasts 49 | plt.scatter(x_years[x_axis[train.shape[0]:]], test, 50 | alpha=0.4, marker='x') # Test data 51 | plt.fill_between(x_years[x_axis[-preds.shape[0]:]], 52 | conf_int[:, 0], conf_int[:, 1], 53 | alpha=0.1, color='b') 54 | plt.title("Lynx forecasts") 55 | plt.xlabel("Year") 56 | -------------------------------------------------------------------------------- /examples/arima/example_persisting_a_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | ========================= 3 | Persisting an ARIMA model 4 | ========================= 5 | 6 | 7 | This example demonstrates how we can persist an ARIMA model to disk after 8 | fitting it. It can then be loaded back up and used to generate forecasts. 9 | 10 | .. raw:: html 11 | 12 |
      13 | """ 14 | print(__doc__) 15 | 16 | # Author: Taylor Smith 17 | 18 | import pmdarima as pm 19 | from pmdarima import model_selection 20 | import joblib # for persistence 21 | import os 22 | 23 | # ############################################################################# 24 | # Load the data and split it into separate pieces 25 | y = pm.datasets.load_wineind() 26 | train, test = model_selection.train_test_split(y, train_size=125) 27 | 28 | # Fit an ARIMA 29 | arima = pm.ARIMA(order=(1, 1, 2), seasonal_order=(0, 1, 1, 12)) 30 | arima.fit(y) 31 | 32 | # ############################################################################# 33 | # Persist a model and create predictions after re-loading it 34 | pickle_tgt = "arima.pkl" 35 | try: 36 | # Pickle it 37 | joblib.dump(arima, pickle_tgt, compress=3) 38 | 39 | # Load the model up, create predictions 40 | arima_loaded = joblib.load(pickle_tgt) 41 | preds = arima_loaded.predict(n_periods=test.shape[0]) 42 | print("Predictions: %r" % preds) 43 | 44 | finally: 45 | # Remove the pickle file at the end of this example 46 | try: 47 | os.unlink(pickle_tgt) 48 | except OSError: 49 | pass 50 | -------------------------------------------------------------------------------- /examples/arima/example_seasonal_decomposition.py: -------------------------------------------------------------------------------- 1 | """ 2 | ========================================== 3 | Seasonal decomposition of your time-series 4 | ========================================== 5 | 6 | 7 | This example demonstrates how we can use the ``decompose`` function to extract 8 | the trend, seasonal, and random components of the time series and then 9 | plot them all using the ``decomposed_plot`` function. We'll be plotting both 10 | ``additive`` and ``multiplicative`` examples of seasonality. To see the R 11 | equivalent that inspired this example go `here `_. 12 | 13 | .. raw:: html 14 | 15 |
      16 | """ 17 | print(__doc__) 18 | 19 | # Author: Charles Drotar 20 | 21 | from pmdarima import arima 22 | from pmdarima import datasets 23 | from pmdarima import utils 24 | 25 | # ############################################################################# 26 | 27 | # So what is happening when we call `decomposed`? 28 | # 1) The trend is extracted from the signal via a convolution using either a 29 | # SMA or a user-defined filter. 30 | # 2) We remove the effects of the trend from the original signal by either 31 | # subtracting its effects or dividing out its effects for `additive` or 32 | # 'multiplicative' types of decompositions, respectively. We then take the 33 | # mean across all seasons to get the values for a single season. For m=4, we 34 | # expect 4 values for a single season. 35 | # 3) We then create the seasonal series by replicating the single season 36 | # until it is the same length of the trend signal. 37 | # 4) Lastly to get the random/noise elements of the signal we remove the effects 38 | # of both the trend and seasonal series and we are now left with the 39 | # variation of the original signal that is neither explainable by seasonal 40 | # nor trend effects. 41 | # 42 | # This logic produces a named tuple of the original signal, trend, seasonal, 43 | # and random components. It is this named tuple that is passed to 44 | # `decomposed_plot` 45 | 46 | figure_kwargs = {'figsize': (6, 6)} # set figure size for both examples 47 | 48 | # 49 | # ADDITIVE EXAMPLE : ausbeer 50 | # 51 | 52 | # Decompose the ausbeer dataset into trend, seasonal and random parts. 53 | # We subset to a small window of the time series. 54 | 55 | head_index = 17*4+2 56 | tail_index = 17*4-4 57 | first_index = head_index - tail_index 58 | last_index = head_index 59 | ausbeer = datasets.load_ausbeer() 60 | timeserie_beer = ausbeer[first_index:last_index] 61 | decomposed = arima.decompose(timeserie_beer, 'additive', m=4) 62 | 63 | # Plot the decomposed signal of ausbeer as a subplot 64 | 65 | axes = utils.decomposed_plot(decomposed, figure_kwargs=figure_kwargs, 66 | show=False) 67 | axes[0].set_title("Ausbeer Seasonal Decomposition") 68 | 69 | 70 | # 71 | # MULTIPLICATIVE EXAMPLE: airpassengers 72 | # 73 | 74 | # Decompose the airpassengers dataset into trend, seasonal and random parts. 75 | decomposed = arima.decompose(datasets.load_airpassengers(), 76 | 'multiplicative', m=12) 77 | 78 | # Plot the decomposed signal of airpassengers as a subplot 79 | 80 | axes = utils.decomposed_plot(decomposed, figure_kwargs=figure_kwargs, 81 | show=False) 82 | axes[0].set_title("Airpassengers Seasonal Decomposition") 83 | -------------------------------------------------------------------------------- /examples/datasets/README.txt: -------------------------------------------------------------------------------- 1 | .. _datasets_examples: 2 | 3 | Datasets examples 4 | ----------------- 5 | 6 | Examples of how to use the :mod:`pmdarima.datasets` module to conveniently load 7 | toy time series data for model benchmarking and experimentation. 8 | 9 | .. raw:: html 10 | 11 |
      12 | -------------------------------------------------------------------------------- /examples/datasets/example_load_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | =============== 3 | Dataset loading 4 | =============== 5 | 6 | 7 | In this example, we demonstrate pyramid's built-in toy datasets that can be 8 | used for benchmarking or experimentation. Pyramid has several built-in datasets 9 | that exhibit seasonality, non-stationarity, and other time series nuances. 10 | 11 | .. raw:: html 12 | 13 |
      14 | """ 15 | print(__doc__) 16 | 17 | # Author: Taylor Smith 18 | 19 | import pmdarima as pm 20 | 21 | # ############################################################################# 22 | # You can load the datasets via load_ 23 | lynx = pm.datasets.load_lynx() 24 | print("Lynx array:") 25 | print(lynx) 26 | 27 | # You can also get a series, if you rather 28 | print("\nLynx series head:") 29 | print(pm.datasets.load_lynx(as_series=True).head()) 30 | 31 | # Several other datasets: 32 | air_passengers = pm.datasets.load_airpassengers() 33 | austres = pm.datasets.load_austres() 34 | heart_rate = pm.datasets.load_heartrate() 35 | wineind = pm.datasets.load_wineind() 36 | woolyrnq = pm.datasets.load_woolyrnq() 37 | -------------------------------------------------------------------------------- /examples/example_simple_fit.py: -------------------------------------------------------------------------------- 1 | """ 2 | ======================= 3 | Simple auto_arima model 4 | ======================= 5 | 6 | 7 | This is a simple example of how we can fit an ARIMA model in several lines 8 | without knowing anything about our data or optimal hyper parameters. 9 | 10 | .. raw:: html 11 | 12 |
      13 | """ 14 | print(__doc__) 15 | 16 | # Author: Taylor Smith 17 | 18 | import pmdarima as pm 19 | from pmdarima import model_selection 20 | import numpy as np 21 | from matplotlib import pyplot as plt 22 | 23 | # ############################################################################# 24 | # Load the data and split it into separate pieces 25 | data = pm.datasets.load_wineind() 26 | train, test = model_selection.train_test_split(data, train_size=150) 27 | 28 | # Fit a simple auto_arima model 29 | arima = pm.auto_arima(train, error_action='ignore', trace=True, 30 | suppress_warnings=True, maxiter=5, 31 | seasonal=True, m=12) 32 | 33 | # ############################################################################# 34 | # Plot actual test vs. forecasts: 35 | x = np.arange(test.shape[0]) 36 | plt.scatter(x, test, marker='x') 37 | plt.plot(x, arima.predict(n_periods=test.shape[0])) 38 | plt.title('Actual test samples vs. forecasts') 39 | plt.show() 40 | -------------------------------------------------------------------------------- /examples/model_selection/README.txt: -------------------------------------------------------------------------------- 1 | .. _model_selection_examples: 2 | 3 | Cross-validation examples 4 | ------------------------- 5 | 6 | Examples of how to use the :mod:`pmdarima.model_selection` module to fit 7 | timeseries models in a cross-validated fashion. 8 | 9 | .. raw:: html 10 | 11 |
      12 | -------------------------------------------------------------------------------- /examples/model_selection/example_cross_val_predict.py: -------------------------------------------------------------------------------- 1 | """ 2 | ============================ 3 | Cross-validation predictions 4 | ============================ 5 | 6 | In addition to computing cross-validation scores, you can use cross-validation 7 | to produce predictions. Unlike traditional cross-validation, where folds are 8 | independent of one another, time-series folds may overlap (particularly in a 9 | sliding window). To account for this, folds that forecast the same time step 10 | average their forecasts using either a "mean" or "median" (tunable). 11 | 12 | .. raw:: html 13 | 14 |
      15 | """ 16 | print(__doc__) 17 | 18 | # Author: Taylor Smith 19 | 20 | import numpy as np 21 | import pmdarima as pm 22 | from pmdarima import model_selection 23 | from matplotlib import pyplot as plt 24 | 25 | print("pmdarima version: %s" % pm.__version__) 26 | 27 | # Load the data and split it into separate pieces 28 | y = pm.datasets.load_wineind() 29 | est = pm.ARIMA(order=(1, 1, 2), 30 | seasonal_order=(0, 1, 1, 12), 31 | suppress_warnings=True) 32 | cv = model_selection.SlidingWindowForecastCV(window_size=150, step=4, h=4) 33 | predictions = model_selection.cross_val_predict( 34 | est, y, cv=cv, verbose=2, averaging="median") 35 | 36 | # plot the predictions over the original series 37 | x_axis = np.arange(y.shape[0]) 38 | n_test = predictions.shape[0] 39 | 40 | plt.plot(x_axis, y, alpha=0.75, c='b') 41 | plt.plot(x_axis[-n_test:], predictions, alpha=0.75, c='g') # Forecasts 42 | plt.title("Cross-validated wineind forecasts") 43 | plt.show() 44 | -------------------------------------------------------------------------------- /examples/model_selection/example_cross_validation.py: -------------------------------------------------------------------------------- 1 | """ 2 | ======================================== 3 | Cross-validating your time series models 4 | ======================================== 5 | 6 | 7 | Like scikit-learn, ``pmdarima`` provides several different strategies for 8 | cross-validating your time series models. The interface was designed to behave 9 | as similarly as possible to that of scikit to make its usage as simple as 10 | possible. 11 | 12 | .. raw:: html 13 | 14 |
      15 | """ 16 | print(__doc__) 17 | 18 | # Author: Taylor Smith 19 | 20 | import numpy as np 21 | import pmdarima as pm 22 | from pmdarima import model_selection 23 | 24 | print("pmdarima version: %s" % pm.__version__) 25 | 26 | # Load the data and split it into separate pieces 27 | data = pm.datasets.load_wineind() 28 | train, test = model_selection.train_test_split(data, train_size=165) 29 | 30 | # Even though we have a dedicated train/test split, we can (and should) still 31 | # use cross-validation on our training set to get a good estimate of the model 32 | # performance. We can choose which model is better based on how it performs 33 | # over various folds. 34 | model1 = pm.ARIMA(order=(2, 1, 1)) 35 | model2 = pm.ARIMA(order=(1, 1, 2), 36 | seasonal_order=(0, 1, 1, 12), 37 | suppress_warnings=True) 38 | cv = model_selection.SlidingWindowForecastCV(window_size=100, step=24, h=1) 39 | 40 | model1_cv_scores = model_selection.cross_val_score( 41 | model1, train, scoring='smape', cv=cv, verbose=2) 42 | 43 | model2_cv_scores = model_selection.cross_val_score( 44 | model2, train, scoring='smape', cv=cv, verbose=2) 45 | 46 | print("Model 1 CV scores: {}".format(model1_cv_scores.tolist())) 47 | print("Model 2 CV scores: {}".format(model2_cv_scores.tolist())) 48 | 49 | # Pick based on which has a lower mean error rate 50 | m1_average_error = np.average(model1_cv_scores) 51 | m2_average_error = np.average(model2_cv_scores) 52 | errors = [m1_average_error, m2_average_error] 53 | models = [model1, model2] 54 | 55 | # print out the answer 56 | better_index = np.argmin(errors) # type: int 57 | print("Lowest average SMAPE: {} (model{})".format( 58 | errors[better_index], better_index + 1)) 59 | print("Best model: {}".format(models[better_index])) 60 | -------------------------------------------------------------------------------- /examples/preprocessing/README.txt: -------------------------------------------------------------------------------- 1 | .. _preprocessing_examples: 2 | 3 | Preprocessing examples 4 | ---------------------- 5 | 6 | Examples of how to use the :mod:`pmdarima.preprocessing` module to transform 7 | your time series or exog features inside or outside of a pipeline. 8 | 9 | .. raw:: html 10 | 11 |
      12 | -------------------------------------------------------------------------------- /examples/preprocessing/example_date_featurizer.py: -------------------------------------------------------------------------------- 1 | """ 2 | ================================================= 3 | Modeling quasi-seasonal trends with date features 4 | ================================================= 5 | 6 | 7 | Some trends are common enough to appear seasonal, yet sporadic enough that 8 | approaching them from a seasonal perspective may not be valid. An example of 9 | this is the `"end-of-the-month" effect `_. 10 | In this example, we'll explore how we can create meaningful features that 11 | express seasonal trends without needing to fit a seasonal model. 12 | 13 | .. raw:: html 14 | 15 |
      16 | """ 17 | print(__doc__) 18 | 19 | # Author: Taylor Smith 20 | 21 | import pmdarima as pm 22 | from pmdarima import arima 23 | from pmdarima import model_selection 24 | from pmdarima import pipeline 25 | from pmdarima import preprocessing 26 | from pmdarima.datasets._base import load_date_example 27 | 28 | import numpy as np 29 | from matplotlib import pyplot as plt 30 | 31 | print(f"pmdarima version: {pm.__version__}") 32 | 33 | # Load the data and split it into separate pieces 34 | y, X = load_date_example() 35 | y_train, y_test, X_train, X_test = \ 36 | model_selection.train_test_split(y, X, test_size=20) 37 | 38 | # We can examine traits about the time series: 39 | pm.tsdisplay(y_train, lag_max=10) 40 | 41 | # We can see the ACF increases and decreases rather rapidly, which means we may 42 | # need some differencing. There also does not appear to be an obvious seasonal 43 | # trend. 44 | n_diffs = arima.ndiffs(y_train, max_d=5) 45 | 46 | # Here's what the featurizer will create for us: 47 | date_feat = preprocessing.DateFeaturizer( 48 | column_name="date", # the name of the date feature in the X matrix 49 | with_day_of_week=True, 50 | with_day_of_month=True) 51 | 52 | _, X_train_feats = date_feat.fit_transform(y_train, X_train) 53 | print(f"Head of generated X features:\n{repr(X_train_feats.head())}") 54 | 55 | # We can plug this X featurizer into a pipeline: 56 | pipe = pipeline.Pipeline([ 57 | ('date', date_feat), 58 | ('arima', arima.AutoARIMA(d=n_diffs, 59 | trace=3, 60 | stepwise=True, 61 | suppress_warnings=True, 62 | seasonal=False)) 63 | ]) 64 | 65 | pipe.fit(y_train, X_train) 66 | 67 | # Plot our forecasts 68 | forecasts = pipe.predict(X=X_test) 69 | 70 | fig = plt.figure(figsize=(16, 8)) 71 | ax = fig.add_subplot(1, 1, 1) 72 | 73 | n_train = y_train.shape[0] 74 | x = np.arange(n_train + forecasts.shape[0]) 75 | 76 | ax.plot(x[:n_train], y_train, color='blue', label='Training Data') 77 | ax.plot(x[n_train:], forecasts, color='green', marker='o', 78 | label='Predicted') 79 | ax.plot(x[n_train:], y_test, color='red', label='Actual') 80 | ax.legend(loc='lower left', borderaxespad=0.5) 81 | ax.set_title('Predicted Foo') 82 | ax.set_ylabel('# Foo') 83 | 84 | plt.show() 85 | 86 | # What next? Try combining different featurizers in your pipeline to enhance 87 | # a model's predictive power. 88 | -------------------------------------------------------------------------------- /examples/quick_start_output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/examples/quick_start_output.png -------------------------------------------------------------------------------- /examples/utils/README.txt: -------------------------------------------------------------------------------- 1 | .. _utils_examples: 2 | 3 | Utils examples 4 | -------------- 5 | 6 | Examples of how to use the :mod:`pmdarima.utils` module to plot timeseries 7 | data, difference arrays, and more. 8 | 9 | .. raw:: html 10 | 11 |
      12 | -------------------------------------------------------------------------------- /examples/utils/example_array_concatenation.py: -------------------------------------------------------------------------------- 1 | """ 2 | =================== 3 | Array concatenation 4 | =================== 5 | 6 | 7 | In this example, we demonstrate pyramid's convenient ``c`` function, which is, 8 | in essence, the same as R's. It's nothing more than a convenience function in 9 | the package, but one you should understand if you're contributing. 10 | 11 | .. raw:: html 12 | 13 |
      14 | """ 15 | print(__doc__) 16 | 17 | # Author: Taylor Smith 18 | 19 | import pmdarima as pm 20 | import numpy as np 21 | 22 | # ############################################################################# 23 | # You can use the 'c' function to define an array from *args 24 | array1 = pm.c(1, 2, 3, 4, 5) 25 | 26 | # Or you can define an array from an existing iterable: 27 | array2 = pm.c([1, 2, 3, 4, 5]) 28 | assert np.array_equal(array1, array2) 29 | 30 | # You can even use 'c' to flatten arrays: 31 | array_flat = pm.c(1, 2, 3, [4, 5]) 32 | assert np.array_equal(array_flat, np.arange(5) + 1) 33 | -------------------------------------------------------------------------------- /examples/utils/example_array_differencing.py: -------------------------------------------------------------------------------- 1 | """ 2 | ================== 3 | Array differencing 4 | ================== 5 | 6 | 7 | In this example, we demonstrate pyramid's array differencing, and how it's used 8 | in conjunction with the ``d`` term to lag a time series. 9 | 10 | .. raw:: html 11 | 12 |
      13 | """ 14 | print(__doc__) 15 | 16 | # Author: Taylor Smith 17 | 18 | from pmdarima.utils import array 19 | 20 | # Build an array and show first order differencing results 21 | x = array.c(10, 4, 2, 9, 34) 22 | lag_1 = array.diff(x, lag=1, differences=1) 23 | 24 | # The result will be the same as: x[1:] - x[:-1] 25 | print(lag_1) # [-6., -2., 7., 25.] 26 | 27 | # Note that lag and differences are not the same! If we crank diff up by one, 28 | # it performs the same differencing as above TWICE. Lag, therefore, controls 29 | # the number of steps backward the ts looks when it differences, and the 30 | # `differences` parameter controls how many times to repeat. 31 | print(array.diff(x, lag=1, differences=2)) # [4., 9., 18.] 32 | 33 | # Conversely, when we set lag to 2, the array looks two steps back for its 34 | # differencing operation (only one). 35 | print(array.diff(x, lag=2, differences=1)) # [-8., 5., 32.] 36 | 37 | # The lag parameter is controlled by `m`, which is the seasonal periodicity of 38 | # a time series. If your series is non-seasonal, lag will typically be 1. 39 | -------------------------------------------------------------------------------- /examples/utils/example_tsdisplay.py: -------------------------------------------------------------------------------- 1 | """ 2 | ==================================== 3 | Displaying key timeseries statistics 4 | ==================================== 5 | 6 | 7 | Visualizing characteristics of a time series is a key component to effective 8 | forecasting. In this example, we'll look at a very simple method to examine 9 | critical statistics of a time series object. 10 | 11 | .. raw:: html 12 | 13 |
      14 | """ 15 | print(__doc__) 16 | 17 | # Author: Taylor Smith 18 | 19 | import pmdarima as pm 20 | from pmdarima import datasets 21 | from pmdarima import preprocessing 22 | 23 | # We'll use the sunspots dataset for this example 24 | y = datasets.load_sunspots(True) 25 | print("Data shape: {}".format(y.shape[0])) 26 | print("Data head:") 27 | print(y.head()) 28 | 29 | # Let's look at the series, its ACF plot, and a histogram of its values 30 | pm.tsdisplay(y, lag_max=90, title="Sunspots", show=True) 31 | 32 | # Notice that the histogram is very skewed. This is a prime candidate for 33 | # box-cox transformation 34 | y_bc, _ = preprocessing.BoxCoxEndogTransformer(lmbda2=1e-6).fit_transform(y) 35 | pm.tsdisplay( 36 | y_bc, lag_max=90, title="Sunspots (BoxCox-transformed)", show=True) 37 | 38 | print(""" 39 | As evidenced by the more normally distributed values in the transformed series, 40 | using a Box-Cox transformation may prove useful prior to fitting your model. 41 | """) 42 | -------------------------------------------------------------------------------- /pmdarima/__check_build/__init__.py: -------------------------------------------------------------------------------- 1 | """ Module to give helpful messages to the user that did not 2 | compile the pmdarima lib properly. 3 | """ 4 | import os 5 | 6 | INPLACE_MSG = """ 7 | It appears that you are importing a local pmdarima source tree. For 8 | this, you need to have an inplace install. Maybe you are in the source 9 | directory and you need to try from another location.""" 10 | 11 | STANDARD_MSG = """ 12 | If you have used an installer, please check that it is suited for your 13 | Python version, your operating system and your platform.""" 14 | 15 | 16 | def raise_build_error(e): 17 | # Raise a comprehensible error and list the contents of the 18 | # directory to help debugging on the mailing list. 19 | local_dir = os.path.split(__file__)[0] 20 | msg = STANDARD_MSG 21 | if local_dir == "pmdarima/__check_build": 22 | # Picking up the local install: this will work only if the 23 | # install is an 'inplace build' 24 | msg = INPLACE_MSG 25 | dir_content = list() 26 | for i, filename in enumerate(os.listdir(local_dir)): 27 | if (i + 1) % 3: 28 | dir_content.append(filename.ljust(26)) 29 | else: 30 | dir_content.append(filename + '\n') 31 | raise ImportError("""%s 32 | ___________________________________________________________________________ 33 | Contents of %s: 34 | %s 35 | ___________________________________________________________________________ 36 | It seems that pmdarima has not been built correctly. 37 | If you have installed pmdarima from source, please do not forget 38 | to build the package before using it: run `python setup.py install` or 39 | `make` from the top-level directory. 40 | %s""" % (e, local_dir, ''.join(dir_content).strip(), msg)) 41 | 42 | 43 | try: 44 | from ._check_build import check_build 45 | except ImportError as ie: 46 | raise_build_error(ie) 47 | -------------------------------------------------------------------------------- /pmdarima/__check_build/_check_build.pyx: -------------------------------------------------------------------------------- 1 | def check_build(): 2 | return 3 | -------------------------------------------------------------------------------- /pmdarima/__check_build/setup.py: -------------------------------------------------------------------------------- 1 | # Author: Virgile Fritsch (originally written 2 | # for sklearn, adapted for pmdarima) 3 | # License: BSD 3 clause 4 | 5 | import numpy as np 6 | 7 | 8 | def configuration(parent_package='', top_path=None): 9 | from numpy.distutils.misc_util import Configuration 10 | config = Configuration('__check_build', parent_package, top_path) 11 | config.add_extension('_check_build', 12 | sources=['_check_build.pyx'], 13 | include_dirs=[np.get_include()]) 14 | 15 | return config 16 | 17 | 18 | if __name__ == '__main__': 19 | from numpy.distutils.core import setup 20 | setup(**configuration(top_path='').todict()) 21 | -------------------------------------------------------------------------------- /pmdarima/__check_build/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /pmdarima/__check_build/tests/test_check_build.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from pmdarima.__check_build import raise_build_error 4 | 5 | import pytest 6 | 7 | 8 | def test_raise_build_error(): 9 | try: 10 | # Raise a value error to pass into the raise_build_error 11 | # to assert it turns it into an ImportError 12 | raise ValueError("this is a dummy err msg") 13 | except ValueError as v: 14 | with pytest.raises(ImportError): 15 | raise_build_error(v) 16 | -------------------------------------------------------------------------------- /pmdarima/_build_utils/pre_build_helpers.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helpers to check build environment before actual build of pmdarima 3 | 4 | Adapted from: https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/_build_utils/pre_build_helpers.py 5 | """ # noqa 6 | 7 | import os 8 | import sys 9 | import glob 10 | import tempfile 11 | import textwrap 12 | import subprocess 13 | 14 | from distutils.sysconfig import customize_compiler 15 | from numpy.distutils.ccompiler import new_compiler 16 | 17 | 18 | def compile_test_program(code, extra_preargs=[], extra_postargs=[]): 19 | """Check that some C code can be compiled and run""" 20 | ccompiler = new_compiler() 21 | customize_compiler(ccompiler) 22 | 23 | # extra_(pre/post)args can be a callable to make it possible to get its 24 | # value from the compiler 25 | if callable(extra_preargs): 26 | extra_preargs = extra_preargs(ccompiler) 27 | if callable(extra_postargs): 28 | extra_postargs = extra_postargs(ccompiler) 29 | 30 | start_dir = os.path.abspath('.') 31 | 32 | with tempfile.TemporaryDirectory() as tmp_dir: 33 | try: 34 | os.chdir(tmp_dir) 35 | 36 | # Write test program 37 | with open('test_program.c', 'w') as f: 38 | f.write(code) 39 | 40 | os.mkdir('objects') 41 | 42 | # Compile, test program 43 | ccompiler.compile(['test_program.c'], output_dir='objects', 44 | extra_postargs=extra_postargs) 45 | 46 | # Link test program 47 | objects = glob.glob( 48 | os.path.join('objects', '*' + ccompiler.obj_extension)) 49 | ccompiler.link_executable(objects, 'test_program', 50 | extra_preargs=extra_preargs, 51 | extra_postargs=extra_postargs) 52 | 53 | if "PYTHON_CROSSENV" not in os.environ: 54 | # Run test program if not cross compiling 55 | # will raise a CalledProcessError if return code was non-zero 56 | output = subprocess.check_output('./test_program') 57 | output = output.decode(sys.stdout.encoding or 'utf-8').splitlines() # noqa 58 | else: 59 | output = [] 60 | except Exception: 61 | raise 62 | finally: 63 | os.chdir(start_dir) 64 | 65 | return output 66 | 67 | 68 | def basic_check_build(): 69 | """Check basic compilation and linking of C code""" 70 | code = textwrap.dedent( 71 | """\ 72 | #include 73 | int main(void) { 74 | return 0; 75 | } 76 | """) 77 | compile_test_program(code) 78 | -------------------------------------------------------------------------------- /pmdarima/_build_utils/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /pmdarima/arima/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Author: Taylor Smith 4 | 5 | from .approx import * 6 | from .arima import * 7 | from .auto import * 8 | from .utils import * 9 | 10 | # These need to be top-level since 0.7.0 for the documentation 11 | from .seasonality import decompose 12 | from .seasonality import CHTest 13 | from .seasonality import OCSBTest 14 | from .stationarity import ADFTest 15 | from .stationarity import KPSSTest 16 | from .stationarity import PPTest 17 | 18 | __all__ = [s for s in dir() if not s.startswith("_")] 19 | -------------------------------------------------------------------------------- /pmdarima/arima/_arima_fast_helpers.h: -------------------------------------------------------------------------------- 1 | // We cannot directly reuse the npy_isfinite from npy_math.h as numpy 2 | // and pyramid are not necessarily built with the same compiler. 3 | #ifdef _MSC_VER 4 | # include 5 | # define pyr_isfinite _finite 6 | #else 7 | # include 8 | # define pyr_isfinite npy_isfinite 9 | #endif -------------------------------------------------------------------------------- /pmdarima/arima/setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | 5 | import numpy 6 | from numpy.distutils.misc_util import Configuration 7 | 8 | from pmdarima._build_utils import get_blas_info 9 | 10 | 11 | def configuration(parent_package="", top_path=None): 12 | cblas_libs, blas_info = get_blas_info() 13 | 14 | # Use this rather than cblas_libs so we don't fail on Windows 15 | libraries = [] 16 | if os.name == 'posix': 17 | cblas_libs.append('m') 18 | libraries.append('m') 19 | 20 | config = Configuration("arima", parent_package, top_path) 21 | config.add_extension("_arima", 22 | sources=["_arima.pyx"], 23 | include_dirs=[numpy.get_include(), 24 | # Should this be explicitly included?: 25 | '_arima_fast_helpers.h', 26 | blas_info.pop('include_dirs', [])], 27 | libraries=libraries, 28 | extra_compile_args=blas_info.pop( 29 | 'extra_compile_args', []), 30 | **blas_info) 31 | 32 | config.add_subpackage('tests') 33 | config.add_data_dir('tests/data') 34 | 35 | return config 36 | 37 | 38 | if __name__ == "__main__": 39 | from numpy.distutils.core import setup 40 | setup(**configuration().todict()) 41 | -------------------------------------------------------------------------------- /pmdarima/arima/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/pmdarima/arima/tests/__init__.py -------------------------------------------------------------------------------- /pmdarima/arima/tests/data/issue_191.csv: -------------------------------------------------------------------------------- 1 | Month,0 2 | 2016-01-01,129.97783044109778 3 | 2016-02-01,306.55148688938147 4 | 2016-03-01,143.46609586423057 5 | 2016-04-01,385.0286675330632 6 | 2016-05-01,80.92959253879673 7 | 2016-06-01,1058.2157327421448 8 | 2016-07-01,1247.051448666004 9 | 2016-08-01,1833.1778915985017 10 | 2016-09-01,3338.9587951991443 11 | 2016-10-01,2855.8336518614783 12 | 2016-11-01,3309.5298524577643 13 | 2016-12-01,1351.2789542083938 14 | 2017-01-01,1920.2101811761734 15 | 2017-02-01,2168.912102232124 16 | 2017-03-01,3910.982302744965 17 | 2017-04-01,3190.3251082433057 18 | 2017-05-01,1374.2227079742736 19 | 2017-06-01,1403.1415360040357 20 | 2017-07-01,953.1645718609441 21 | 2017-08-01,1413.5523140947494 22 | 2017-09-01,2821.320862583547 23 | 2017-10-01,2467.3544074992637 24 | 2017-11-01,2976.3257808230696 25 | 2017-12-01,2918.4881247635467 26 | 2018-01-01,1980.0 27 | 2018-02-01,3962.0 28 | 2018-03-01,6944.0 29 | 2018-04-01,2720.0 30 | 2018-05-01,3172.0 31 | 2018-06-01,3877.0 32 | 2018-07-01,5234.0 33 | 2018-08-01,4493.0 34 | 2018-09-01,9407.0 35 | 2018-10-01,9079.0 36 | 2018-11-01,10435.0 37 | 2018-12-01,4934.0 38 | 2019-01-01,4598.0 39 | 2019-02-01,7364.0 40 | 2019-03-01,10836.0 41 | 2019-04-01,8119.0 42 | 2019-05-01,10854.0 43 | 2019-06-01,5149.256744318752 44 | 2019-07-01,6820.377809726632 45 | 2019-08-01,9176.990725800295 46 | 2019-09-01,15991.129595953533 47 | 2019-10-01,14868.559905791291 48 | -------------------------------------------------------------------------------- /pmdarima/arima/tests/test_approx.py: -------------------------------------------------------------------------------- 1 | # Test the approximation function 2 | 3 | from pmdarima.arima.approx import approx, _regularize 4 | from pmdarima.utils.array import c 5 | from pmdarima.arima.stationarity import ADFTest 6 | 7 | from numpy.testing import assert_array_almost_equal 8 | import numpy as np 9 | 10 | import pytest 11 | 12 | table = c(0.216, 0.176, 0.146, 0.119) 13 | tablep = c(0.01, 0.025, 0.05, 0.10) 14 | stat = 1.01 15 | 16 | 17 | def test_regularize(): 18 | x, y = c(0.5, 0.5, 1.0, 1.5), c(1, 2, 3, 4) 19 | x, y = _regularize(x, y, 'mean') 20 | 21 | assert_array_almost_equal(x, np.array([0.5, 1.0, 1.5])) 22 | assert_array_almost_equal(y, np.array([1.5, 3.0, 4.0])) 23 | 24 | 25 | def test_approx_rule1(): 26 | # for rule = 1 27 | x, y = approx(table, tablep, stat, rule=1) 28 | assert_array_almost_equal(x, c(1.01)) 29 | assert_array_almost_equal(y, c(np.nan)) 30 | 31 | 32 | def test_approx_rule2(): 33 | # for rule = 2 34 | x, y = approx(table, tablep, stat, rule=2) 35 | assert_array_almost_equal(x, c(1.01)) 36 | assert_array_almost_equal(y, c(0.01)) 37 | 38 | 39 | @pytest.mark.parametrize( 40 | 'kwargs', [ 41 | 42 | # fails for length differences 43 | dict(x=[1, 2, 3], y=[1, 2], xout=1.0), 44 | 45 | # fails for bad string 46 | dict(x=table, y=table, xout=1.0, method='bad-string'), 47 | 48 | # fails for bad length 49 | dict(x=[], y=[], xout=[], ties='mean'), 50 | 51 | # fails for bad length 52 | dict(x=[], y=[], xout=[], method='constant'), 53 | 54 | # fails for linear when < 2 samples 55 | dict(x=[1], y=[1], xout=[], method='linear', ties='ordered'), 56 | 57 | # fails for bad length 58 | dict(x=[], y=[], xout=[], method='constant'), 59 | 60 | ] 61 | ) 62 | def test_corner_errors(kwargs): 63 | with pytest.raises(ValueError): 64 | approx(**kwargs) 65 | 66 | 67 | def test_valid_corner(): 68 | # *doesn't* fail for constant when < 2 samples 69 | approx(x=[1], y=[1], xout=[], method='constant', ties='ordered') 70 | 71 | 72 | def test_approx_precision(): 73 | # Test an example from R vs. Python to compare the expected values and 74 | # make sure we get as close as possible. This is from an ADFTest where k=1 75 | # and x=austres 76 | tableipl = np.array([[-4.0664], 77 | [-3.7468], 78 | [-3.462], 79 | [-3.1572], 80 | [-1.2128], 81 | [-0.8928], 82 | [-0.6104], 83 | [-0.2704]]) 84 | 85 | _, interpol = approx(tableipl, ADFTest.tablep, xout=-1.337233, rule=2) 86 | assert np.allclose(interpol, 0.84880354) # in R we get 0.8488036 87 | -------------------------------------------------------------------------------- /pmdarima/arima/tests/test_arima_diagnostics.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from pmdarima.datasets import load_lynx 4 | from pmdarima.arima import ARIMA 5 | 6 | from unittest.mock import patch 7 | import pytest 8 | 9 | lynx = load_lynx() 10 | 11 | 12 | class MockMPLFigure: 13 | def __init__(self, fig, figsize): 14 | self.fig = fig 15 | self.figsize = figsize 16 | self.subplots = [] 17 | 18 | def add_subplot(self, *args): 19 | ax = MockMPLAxis(*args) 20 | self.subplots.append(ax) 21 | return ax 22 | 23 | 24 | class MockMPLAxis: 25 | def __init__(self, *args): 26 | pass 27 | 28 | def hist(self, *args, **kwargs): 29 | pass 30 | 31 | def hlines(self, *args, **kwargs): 32 | # We can hack our assertion here since we always pass alpha=0.5 33 | for k, v in kwargs.items(): 34 | setattr(self, k, v) 35 | 36 | def legend(self): 37 | pass 38 | 39 | def plot(self, x, y, **kwargs): 40 | self.x = x 41 | self.y = y 42 | 43 | def set_title(self, title): 44 | self.title = title 45 | 46 | def set_xlim(self, *args): 47 | if len(args) == 2: 48 | mn, mx = args 49 | else: # len(args) == 1 50 | mn, mx = args[0] 51 | 52 | self.mn = mn 53 | self.mx = mx 54 | 55 | def set_ylim(self, mn, mx): 56 | self.mn = mn 57 | self.mx = mx 58 | 59 | 60 | def mock_qqplot(resid, line, ax): 61 | ax.qqplot_called = True 62 | 63 | 64 | def mock_acf_plot(resid, ax, lags): 65 | ax.acfplot_called = True 66 | 67 | 68 | @pytest.mark.parametrize( 69 | 'model_type,model', [ 70 | pytest.param('arma', ARIMA(order=(1, 0, 0), maxiter=50)), 71 | pytest.param('arima', ARIMA(order=(1, 1, 0), maxiter=50)), 72 | pytest.param('sarimax', ARIMA(order=(1, 1, 0), 73 | maxiter=50, 74 | seasonal_order=(1, 0, 0, 12))) 75 | ]) 76 | def test_mock_plot_diagnostics(model_type, model): 77 | model.fit(lynx) 78 | 79 | with patch('statsmodels.graphics.utils.create_mpl_fig', MockMPLFigure), \ 80 | patch('statsmodels.graphics.gofplots.qqplot', mock_qqplot), \ 81 | patch('statsmodels.graphics.tsaplots.plot_acf', mock_acf_plot): 82 | 83 | diag = model.plot_diagnostics(figsize=(10, 12)) 84 | 85 | # Asserting on mock attributes to show that we follow the expected 86 | # logical branches 87 | assert diag.figsize == (10, 12) 88 | assert len(diag.subplots) == 4 89 | 90 | # First one should have 'alpha' from the plot call 91 | assert hasattr(diag.subplots[0], 'alpha') and \ 92 | diag.subplots[0].alpha == 0.5 93 | 94 | # Third figure gets QQPLOT called on it 95 | assert hasattr(diag.subplots[2], 'qqplot_called') and \ 96 | diag.subplots[2].qqplot_called 97 | 98 | # Fourth figure gets ACF plot call on it 99 | assert hasattr(diag.subplots[3], 'acfplot_called') and \ 100 | diag.subplots[3].acfplot_called 101 | -------------------------------------------------------------------------------- /pmdarima/arima/tests/test_auto_solvers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from pmdarima.arima import _auto_solvers as solvers 4 | from pmdarima.compat.pytest import pytest_error_str 5 | 6 | import numpy as np 7 | import pytest 8 | 9 | 10 | @pytest.mark.parametrize( 11 | 'models,expected', [ 12 | 13 | # No nones, no overlap in IC 14 | pytest.param( 15 | [('foo', 'time', 1.0), 16 | ('bar', 'time', 3.0), 17 | ('baz', 'time', 2.0)], 18 | ['foo', 'baz', 'bar'], 19 | ), 20 | 21 | # we filter out Nones and infs 22 | pytest.param( 23 | [('foo', 'time', 1.0), 24 | ('bar', 'time', 3.0), 25 | ('baz', 'time', np.inf), 26 | (None, 'time', 0.0)], 27 | ['foo', 'bar'], 28 | ), 29 | 30 | ] 31 | ) 32 | def test_sort_and_filter_fits_valid(models, expected): 33 | actual = solvers._sort_and_filter_fits(models) 34 | assert tuple(expected) == tuple(actual), \ 35 | "\nExpected: %r" \ 36 | "\nActual: %r" \ 37 | % (expected, actual) 38 | 39 | 40 | def test_sort_and_filter_fits_error(): 41 | results = [(None, 'time', 1.0), ('foo', 'time', np.inf)] 42 | 43 | with pytest.raises(ValueError) as ve: 44 | solvers._sort_and_filter_fits(results) 45 | assert "no-successful-model" in pytest_error_str(ve) 46 | -------------------------------------------------------------------------------- /pmdarima/arima/tests/test_c_arima.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from pmdarima.arima._arima import C_is_not_finite 4 | 5 | import numpy as np 6 | 7 | 8 | def test_not_finite(): 9 | assert C_is_not_finite(np.nan) 10 | assert C_is_not_finite(np.inf) 11 | assert not C_is_not_finite(5.) 12 | -------------------------------------------------------------------------------- /pmdarima/arima/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import pytest 5 | 6 | from pmdarima.arima import utils as arima_utils 7 | from pmdarima.compat.pytest import pytest_warning_messages, pytest_error_str 8 | 9 | 10 | def test_issue_341(): 11 | seas_diffed = np.array([124., -114., -163., -83.]) 12 | 13 | with pytest.raises(ValueError) as ve: 14 | arima_utils.ndiffs(seas_diffed, test='adf') 15 | 16 | assert "raised from LinAlgError" in pytest_error_str(ve) 17 | 18 | 19 | def test_issue_351(): 20 | y = np.array([ 21 | 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 22 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 1, 6, 2, 1, 0, 23 | 2, 0, 1, 0, 0, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 0, 0, 6, 24 | 0, 0, 0, 0, 0, 1, 3, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0 25 | ]) 26 | 27 | with pytest.warns(UserWarning) as w_list: 28 | D = arima_utils.nsdiffs(y, m=52, max_D=2, test='ocsb') 29 | 30 | assert D == 1 31 | 32 | warnings_messages = pytest_warning_messages(w_list) 33 | assert len(warnings_messages) == 1 34 | assert 'shorter than m' in warnings_messages[0] 35 | -------------------------------------------------------------------------------- /pmdarima/base.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Base classes and interfaces 4 | 5 | import abc 6 | from abc import ABCMeta 7 | 8 | from sklearn.base import BaseEstimator 9 | 10 | # TODO: change this to base TS model if we ever hope to support more 11 | 12 | 13 | class BaseARIMA(BaseEstimator, metaclass=ABCMeta): 14 | """A base ARIMA class""" 15 | 16 | @abc.abstractmethod 17 | def fit(self, y, X, **fit_args): 18 | """Fit an ARIMA model""" 19 | 20 | def fit_predict(self, y, X=None, n_periods=10, **fit_args): 21 | """Fit an ARIMA to a vector, ``y``, of observations with an 22 | optional matrix of ``exogenous`` variables, and then generate 23 | predictions. 24 | 25 | Parameters 26 | ---------- 27 | y : array-like or iterable, shape=(n_samples,) 28 | The time-series to which to fit the ``ARIMA`` estimator. This may 29 | either be a Pandas ``Series`` object (statsmodels can internally 30 | use the dates in the index), or a numpy array. This should be a 31 | one-dimensional array of floats, and should not contain any 32 | ``np.nan`` or ``np.inf`` values. 33 | 34 | X : array-like, shape=[n_obs, n_vars], optional (default=None) 35 | An optional 2-d array of exogenous variables. If provided, these 36 | variables are used as additional features in the regression 37 | operation. This should not include a constant or trend. Note that 38 | if an ``ARIMA`` is fit on exogenous features, it must be provided 39 | exogenous features for making predictions. 40 | 41 | n_periods : int, optional (default=10) 42 | The number of periods in the future to forecast. 43 | 44 | fit_args : dict or kwargs, optional (default=None) 45 | Any keyword args to pass to the fit method. 46 | """ 47 | self.fit(y, X, **fit_args) 48 | 49 | # TODO: remove kwargs from call 50 | return self.predict(n_periods=n_periods, X=X, **fit_args) 51 | 52 | # TODO: remove kwargs from all of these 53 | 54 | @abc.abstractmethod 55 | def predict(self, n_periods, X, return_conf_int=False, alpha=0.05, 56 | **kwargs): 57 | """Create forecasts on a fitted model""" 58 | 59 | @abc.abstractmethod 60 | def predict_in_sample(self, X, start, end, dynamic, **kwargs): 61 | """Get in-sample forecasts""" 62 | 63 | @abc.abstractmethod 64 | def update(self, y, X=None, maxiter=None, **kwargs): 65 | """Update an ARIMA model""" 66 | -------------------------------------------------------------------------------- /pmdarima/compat/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The variables defined in compat are designed to provide compatibility. 3 | Each sub-module is specifically designed not to make calls out 4 | to other portions of pmdarima and to remove circular dependencies. 5 | """ 6 | 7 | from .matplotlib import * 8 | from .pandas import * 9 | from .numpy import * 10 | from .sklearn import * 11 | from .statsmodels import * 12 | 13 | __all__ = [s for s in dir() if not s.startswith('_')] 14 | -------------------------------------------------------------------------------- /pmdarima/compat/matplotlib.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Author: Taylor G Smith 4 | # 5 | # Patch backend for MPL 6 | 7 | import sys 8 | import os 9 | 10 | __all__ = [ 11 | 'get_compatible_pyplot', 12 | 'mpl_hist_arg' 13 | ] 14 | 15 | 16 | def get_compatible_pyplot(backend=None, debug=True): 17 | """Make the backend of MPL compatible. 18 | 19 | In Travis Mac distributions, python is not installed as a framework. This 20 | means that using the TkAgg backend is the best solution (so it doesn't 21 | try to use the mac OS backend by default). 22 | 23 | Parameters 24 | ---------- 25 | backend : str, optional (default="TkAgg") 26 | The backend to default to. 27 | 28 | debug : bool, optional (default=True) 29 | Whether to log the existing backend to stderr. 30 | """ 31 | import matplotlib 32 | 33 | # If the backend provided is None, just default to 34 | # what's already being used. 35 | existing_backend = matplotlib.get_backend() 36 | if backend is not None: 37 | # Can this raise?... 38 | matplotlib.use(backend) 39 | 40 | # Print out the new backend 41 | if debug: 42 | sys.stderr.write("Currently using '%s' MPL backend, " 43 | "switching to '%s' backend%s" 44 | % (existing_backend, backend, os.linesep)) 45 | 46 | # If backend is not set via env variable, but debug is 47 | elif debug: 48 | sys.stderr.write("Using '%s' MPL backend%s" 49 | % (existing_backend, os.linesep)) 50 | 51 | from matplotlib import pyplot as plt 52 | return plt 53 | 54 | 55 | def mpl_hist_arg(value=True): 56 | """Find the appropriate `density` kwarg for our given matplotlib version. 57 | 58 | This will determine if we should use `normed` or `density`. Additionally, 59 | since this is a kwarg, the user can supply a value (True or False) that 60 | they would like in the output dictionary. 61 | 62 | Parameters 63 | ---------- 64 | value : bool, optional (default=True) 65 | The boolean value of density/normed 66 | 67 | Returns 68 | ------- 69 | density_kwarg : dict 70 | A dictionary containing the appropriate density kwarg for the 71 | installed matplotlib version, mapped to the provided or default 72 | value 73 | """ 74 | import matplotlib 75 | 76 | density_kwarg = 'density' if matplotlib.__version__ >= '2.1.0'\ 77 | else 'normed' 78 | return {density_kwarg: value} 79 | -------------------------------------------------------------------------------- /pmdarima/compat/numpy.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Author: Taylor Smith 4 | # 5 | # Provide numpy compatibility and common variables. Since this 6 | # is a relatively sparse script, I feel I must defend this design 7 | # choice. See the docstring in the __init__: "Each sub-module is specifically 8 | # designed not to make calls out to other portions of pmdarima and to 9 | # remove circular dependencies." 10 | # 11 | # Since DTYPE is used commonly, this removes circular dependencies or 12 | # hard-coding. 13 | 14 | import numpy as np 15 | 16 | # this is going to be the data-type used across pmdarima 17 | DTYPE = np.float64 18 | -------------------------------------------------------------------------------- /pmdarima/compat/pandas.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Importing visualization modules changes in version 0.19 4 | try: # <= v0.19 5 | from pandas.tools import plotting 6 | except ImportError: # 0.20+ 7 | from pandas import plotting 8 | -------------------------------------------------------------------------------- /pmdarima/compat/pytest.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import contextlib 4 | import pytest 5 | 6 | 7 | def pytest_error_str(error): 8 | """Different for different versions of Pytest""" 9 | try: 10 | return str(error.value) 11 | except AttributeError: 12 | return str(error) 13 | 14 | 15 | def pytest_warning_messages(warnings): 16 | """Get the warning messages for captured warnings""" 17 | return [str(w.message) for w in warnings.list] 18 | 19 | 20 | @contextlib.contextmanager 21 | def raises(exception): 22 | """Allows context managers for catching NO errors""" 23 | if exception is None: 24 | yield None 25 | 26 | else: 27 | with pytest.raises(exception) as e: 28 | yield e 29 | -------------------------------------------------------------------------------- /pmdarima/compat/sklearn.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Author: Charles Drotar 4 | # 5 | # Patch backend for sklearn 6 | from packaging.version import Version 7 | 8 | import sklearn 9 | from sklearn.exceptions import NotFittedError 10 | 11 | __all__ = [ 12 | 'check_is_fitted', 13 | 'if_delegate_has_method', 14 | 'safe_indexing', 15 | ] 16 | 17 | 18 | def check_is_fitted(estimator, attributes): 19 | """Ensure the model has been fitted 20 | 21 | Typically called at the beginning of an operation on a model that requires 22 | having been fit. Raises a ``NotFittedError`` if the model has not been 23 | fit. 24 | 25 | This is an adaptation of scikit-learn's ``check_is_fitted``, which has been 26 | changed recently in a way that is no longer compatible with our package. 27 | 28 | Parameters 29 | ---------- 30 | estimator : estimator instance, 31 | The estimator that will be checked to see if it is fitted. 32 | 33 | attributes : str or iterable 34 | The attributes to check for 35 | """ 36 | if isinstance(attributes, str): 37 | attributes = [attributes] 38 | if not hasattr(attributes, "__iter__"): 39 | raise TypeError("attributes must be a string or iterable") 40 | for attr in attributes: 41 | if hasattr(estimator, attr): 42 | return 43 | raise NotFittedError("Model has not been fit!") 44 | 45 | 46 | def safe_indexing(X, indices): 47 | """Slice an array or dataframe. This is deprecated in sklearn""" 48 | if hasattr(X, 'iloc'): 49 | return X.iloc[indices] 50 | # numpy: 51 | # TODO: this does not currently support axis 1 52 | if hasattr(X, 'ndim') and X.ndim == 2: 53 | return X[indices, :] 54 | # list or 1d array 55 | return X[indices] 56 | 57 | 58 | def _estimator_has(attr): 59 | """Checks if the model has a given attribute. 60 | 61 | Meant to be used along with `sklearn.utils.metaestimators.available_if` 62 | 63 | Parameters 64 | ---------- 65 | attr : str 66 | The attribute to check the calling object for 67 | 68 | Returns 69 | ------- 70 | fn : callable 71 | A function that will either raise an `AttributeError` if the attribute 72 | does not exist, or True if it does. 73 | """ 74 | def check(self): 75 | # raise original `AttributeError` if `attr` does not exist 76 | getattr(self, attr) 77 | return True 78 | 79 | return check 80 | 81 | 82 | def if_delegate_has_method(attr): 83 | """Compat method to replace `sklearn.utils.metaestimators.if_delegate_has` 84 | 85 | Older versions (< 1.0.0) of sklearn support it, but newer versions use 86 | `available_if` instead. 87 | 88 | References 89 | ---------- 90 | .. [1] https://git.io/JzKiv 91 | .. [2] https://git.io/JzKiJ 92 | """ 93 | if Version(sklearn.__version__) < Version("1.0.0"): 94 | from sklearn.utils.metaestimators import if_delegate_has_method 95 | return if_delegate_has_method(attr) 96 | else: 97 | from sklearn.utils.metaestimators import available_if 98 | return available_if(_estimator_has(attr)) 99 | -------------------------------------------------------------------------------- /pmdarima/compat/statsmodels.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Handle inconsistencies in the statsmodels API versions 4 | 5 | from collections.abc import Iterable 6 | from packaging.version import Version 7 | import statsmodels as sm 8 | 9 | __all__ = [ 10 | 'bind_df_model' 11 | ] 12 | 13 | _sm_version = sm.__version__ 14 | 15 | 16 | def bind_df_model(model_fit, arima_results): 17 | """Set model degrees of freedom. 18 | 19 | Older versions of statsmodels don't handle this issue. Sets the 20 | model degrees of freedom in place if not already present. 21 | 22 | Parameters 23 | ---------- 24 | model_fit : ARMA, ARIMA or SARIMAX 25 | The fitted model. 26 | 27 | arima_results : ModelResultsWrapper 28 | The results wrapper. 29 | """ 30 | if not hasattr(arima_results, 'df_model'): 31 | df_model = model_fit.k_exog + model_fit.k_trend + \ 32 | model_fit.k_ar + model_fit.k_ma + \ 33 | model_fit.k_seasonal_ar + model_fit.k_seasonal_ma 34 | setattr(arima_results, 'df_model', df_model) 35 | 36 | 37 | def check_seasonal_order(order): 38 | """Check the seasonal order 39 | 40 | Statsmodels 0.11.0 introduced a check for seasonal order == 1 that can 41 | raise a ValueError, but some of our old defaults allow for m == 1 in an 42 | otherwise null seasonal order. 43 | 44 | Parameters 45 | ---------- 46 | order : tuple 47 | The existing seasonal order 48 | """ 49 | 50 | # If order[0] is an iterable, but not a string then we don't perform check. 51 | # Otherwise we perform the check and override order if it satisfies check. 52 | # See issue#370: https://github.com/alkaline-ml/pmdarima/issues/370 53 | if isinstance(order[0], Iterable) and not isinstance(order[0], str): 54 | return order 55 | else: 56 | if sum(order[:3]) == 0 and order[-1] == 1: 57 | order = (0, 0, 0, 0) 58 | 59 | # user's order may be invalid, but we'll let statsmodels' validation 60 | # handle that. 61 | return order 62 | 63 | 64 | def _use_sm13(): 65 | return Version(sm.__version__) >= Version("0.13.0") 66 | -------------------------------------------------------------------------------- /pmdarima/compat/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/pmdarima/compat/tests/__init__.py -------------------------------------------------------------------------------- /pmdarima/compat/tests/test_sklearn.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from pmdarima.arima import ARIMA 4 | from pmdarima.compat.pytest import pytest_error_str 5 | from pmdarima.compat import sklearn as sk 6 | 7 | import numpy as np 8 | import pandas as pd 9 | from numpy.testing import assert_array_equal 10 | import pytest 11 | 12 | 13 | @pytest.mark.parametrize( 14 | 'x,i,exp', [ 15 | pytest.param(np.array([1, 2, 3, 4, 5]), [0, 1], np.array([1, 2])), 16 | pytest.param(pd.Series([1, 2, 3, 4, 5]), [0, 1], np.array([1, 2])), 17 | pytest.param(np.array([[1, 2], [3, 4]]), [0], np.array([[1, 2]])), 18 | ] 19 | ) 20 | def test_safe_indexing(x, i, exp): 21 | res = sk.safe_indexing(x, i) 22 | if hasattr(res, "values"): # pd.Series 23 | res = res.values 24 | assert_array_equal(exp, res) 25 | 26 | 27 | def test_check_is_fitted_error(): 28 | with pytest.raises(TypeError) as te: 29 | sk.check_is_fitted(None, None) 30 | assert "attributes must be a string or iterable" in pytest_error_str(te) 31 | 32 | 33 | def test_not_fitted_error(): 34 | with pytest.raises(sk.NotFittedError) as nfe: 35 | mod = ARIMA((0, 1, 0)) 36 | sk.check_is_fitted(mod, "arima_res_") 37 | assert "Model has not been fit!" in pytest_error_str(nfe) 38 | -------------------------------------------------------------------------------- /pmdarima/compat/tests/test_statsmodels.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from pmdarima.compat.statsmodels import bind_df_model, check_seasonal_order 4 | 5 | 6 | # Test binding the degrees of freedom to a class in place. It's hard to test 7 | # on a potentially non-existent version of statsmodels, so we have to mock the 8 | # class 9 | def test_bind_df_model(): 10 | class ModelFit(object): 11 | k_exog = 2 12 | k_trend = 1 13 | k_ar = 3 14 | k_ma = 2 15 | k_seasonal_ar = 1 16 | k_seasonal_ma = 2 17 | 18 | class ARIMAResults(object): 19 | pass 20 | 21 | fit = ModelFit() 22 | res = ARIMAResults() 23 | 24 | # First, there is no 'df_model' in arima res 25 | assert not hasattr(res, 'df_model') 26 | bind_df_model(fit, res) 27 | 28 | # Now it should 29 | assert hasattr(res, 'df_model') 30 | assert res.df_model == 11, res.df_model 31 | 32 | 33 | def test_check_seasonal_order(): 34 | # issue370, using an iterable at position 0 returns 35 | order = ([1, 2, 3, 52], 0, 1, 7) 36 | checked_order = check_seasonal_order(order) 37 | assert order == checked_order 38 | 39 | # Special case where we override the seasonal order that is passed in. 40 | order = (0, 0, 0, 1) 41 | checked_order = check_seasonal_order(order) 42 | assert checked_order == (0, 0, 0, 0) 43 | -------------------------------------------------------------------------------- /pmdarima/context_managers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import contextlib 4 | 5 | __all__ = ['except_and_reraise'] 6 | 7 | 8 | @contextlib.contextmanager 9 | def except_and_reraise(*except_errs, raise_err=None, raise_msg=None): 10 | """Catch a lower-level error and re-raise with a more meaningful message 11 | 12 | In some cases, Numpy linalg errors can be raised in perplexing spots. This 13 | allows us to catch the lower-level errors in spots where we are aware of 14 | them so that we may raise with a more meaningful message. 15 | 16 | Parameters 17 | ---------- 18 | *except_errs : var-args, BaseException 19 | A variable list of exceptions to catch 20 | 21 | raise_err : BaseException, Error 22 | The exception to raise 23 | 24 | raise_msg : str 25 | The message to raise 26 | """ 27 | if raise_err is None: 28 | raise TypeError("raise_err must be used as a key-word arg") 29 | if raise_msg is None: 30 | raise TypeError("raise_msg must be used as a key-word arg") 31 | 32 | try: 33 | yield 34 | except except_errs as e: 35 | message = "%s (raised from %s: %s)" \ 36 | % (raise_msg, 37 | e.__class__.__name__, 38 | str(e)) 39 | raise raise_err(message) 40 | -------------------------------------------------------------------------------- /pmdarima/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .airpassengers import * 3 | from .ausbeer import * 4 | from .austres import * 5 | from .gasoline import * 6 | from .heartrate import * 7 | from .lynx import * 8 | from .stocks import * 9 | from .sunspots import * 10 | from .taylor import * 11 | from .wineind import * 12 | from .woolyrnq import * 13 | 14 | __all__ = [s for s in dir() if not s.startswith("_")] 15 | -------------------------------------------------------------------------------- /pmdarima/datasets/_base.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | from os.path import abspath, dirname, join, expanduser 5 | import numpy as np 6 | import pandas as pd 7 | import urllib3 8 | import tarfile 9 | 10 | from ..compat.numpy import DTYPE 11 | 12 | # caches anything read from disk to avoid re-reads 13 | _cache = {} 14 | http = urllib3.PoolManager() 15 | 16 | 17 | def get_data_path(): 18 | """Get the absolute path to the ``data`` directory""" 19 | dataset_dir = abspath(dirname(__file__)) 20 | data_dir = join(dataset_dir, 'data') 21 | return data_dir 22 | 23 | 24 | def get_data_cache_path(): 25 | """Get the absolute path to where we cache data from the web""" 26 | return abspath(expanduser(join("~", ".pmdarima-data"))) 27 | 28 | 29 | def fetch_from_web_or_disk(url, key, cache=True, dtype=DTYPE): 30 | """Fetch a dataset from the web, and save it in the pmdarima cache""" 31 | if key in _cache: 32 | return _cache[key] 33 | 34 | disk_cache_path = get_data_cache_path() 35 | 36 | # don't ask, just tell. avoid race conditions 37 | os.makedirs(disk_cache_path, exist_ok=True) 38 | 39 | # See if it's already there 40 | data_path = join(disk_cache_path, key + '.csv.gz') 41 | if os.path.exists(data_path): 42 | rslt = np.loadtxt(data_path).ravel() 43 | 44 | else: 45 | r = None 46 | rslt = None 47 | try: 48 | r = http.request('GET', url) 49 | # rank 1 because it's a time series 50 | rslt = np.asarray( 51 | r.data.decode('utf-8').split('\n'), dtype=dtype) 52 | 53 | finally: 54 | if rslt is not None: 55 | try: 56 | r.release_conn() 57 | except Exception: 58 | pass 59 | 60 | # if we got here, rslt is good. We need to save it to disk 61 | np.savetxt(fname=data_path, X=rslt) 62 | 63 | # If we get here, we have rslt. 64 | if cache: 65 | _cache[key] = rslt 66 | 67 | return rslt 68 | 69 | 70 | def _load_tarfile(key): 71 | """Internal method for loading a tar file""" 72 | base_path = abspath(dirname(__file__)) 73 | file_path = join(base_path, "data", key) 74 | with tarfile.open(file_path, "r:*") as tar: 75 | csv_path = tar.getnames()[0] # there is only one file per tar 76 | return pd.read_csv(tar.extractfile(csv_path), header=0) 77 | 78 | 79 | def load_date_example(): 80 | """Loads a nondescript dated example for internal use""" 81 | X = _load_tarfile("dated.tar.gz") 82 | # make sure it's a date time 83 | X['date'] = pd.to_datetime(X['date']) 84 | y = X.pop('y') 85 | return y, X 86 | -------------------------------------------------------------------------------- /pmdarima/datasets/airpassengers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import pandas as pd 5 | 6 | from ..compat import DTYPE 7 | 8 | __all__ = [ 9 | 'load_airpassengers' 10 | ] 11 | 12 | 13 | def load_airpassengers(as_series=False, dtype=DTYPE): 14 | """Monthly airline passengers. 15 | 16 | The classic Box & Jenkins airline data. Monthly totals of international 17 | airline passengers, 1949 to 1960. 18 | 19 | Parameters 20 | ---------- 21 | as_series : bool, optional (default=False) 22 | Whether to return a Pandas series. If False, will return a 1d 23 | numpy array. 24 | 25 | dtype : type, optional (default=np.float64) 26 | The type to return for the array. Default is np.float64, which is used 27 | throughout the package as the default type. 28 | 29 | Returns 30 | ------- 31 | rslt : array-like, shape=(n_samples,) 32 | The time series vector. 33 | 34 | Examples 35 | -------- 36 | >>> from pmdarima.datasets import load_airpassengers 37 | >>> load_airpassengers() # doctest: +SKIP 38 | np.array([ 39 | 112, 118, 132, 129, 121, 135, 148, 148, 136, 119, 104, 118, 40 | 115, 126, 141, 135, 125, 149, 170, 170, 158, 133, 114, 140, 41 | 145, 150, 178, 163, 172, 178, 199, 199, 184, 162, 146, 166, 42 | 171, 180, 193, 181, 183, 218, 230, 242, 209, 191, 172, 194, 43 | 196, 196, 236, 235, 229, 243, 264, 272, 237, 211, 180, 201, 44 | 204, 188, 235, 227, 234, 264, 302, 293, 259, 229, 203, 229, 45 | 242, 233, 267, 269, 270, 315, 364, 347, 312, 274, 237, 278, 46 | 284, 277, 317, 313, 318, 374, 413, 405, 355, 306, 271, 306, 47 | 315, 301, 356, 348, 355, 422, 465, 467, 404, 347, 305, 336, 48 | 340, 318, 362, 348, 363, 435, 491, 505, 404, 359, 310, 337, 49 | 360, 342, 406, 396, 420, 472, 548, 559, 463, 407, 362, 405, 50 | 417, 391, 419, 461, 472, 535, 622, 606, 508, 461, 390, 432]) 51 | 52 | >>> load_airpassengers(True).head() 53 | 0 112.0 54 | 1 118.0 55 | 2 132.0 56 | 3 129.0 57 | 4 121.0 58 | dtype: float64 59 | 60 | Notes 61 | ----- 62 | This is monthly data, so *m* should be set to 12 when using in a seasonal 63 | context. 64 | 65 | References 66 | ---------- 67 | .. [1] Box, G. E. P., Jenkins, G. M. and Reinsel, G. C. (1976) 68 | "Time Series Analysis, Forecasting and Control. Third Edition." 69 | Holden-Day. Series G. 70 | """ 71 | rslt = np.array([ 72 | 112, 118, 132, 129, 121, 135, 148, 148, 136, 119, 104, 118, 73 | 115, 126, 141, 135, 125, 149, 170, 170, 158, 133, 114, 140, 74 | 145, 150, 178, 163, 172, 178, 199, 199, 184, 162, 146, 166, 75 | 171, 180, 193, 181, 183, 218, 230, 242, 209, 191, 172, 194, 76 | 196, 196, 236, 235, 229, 243, 264, 272, 237, 211, 180, 201, 77 | 204, 188, 235, 227, 234, 264, 302, 293, 259, 229, 203, 229, 78 | 242, 233, 267, 269, 270, 315, 364, 347, 312, 274, 237, 278, 79 | 284, 277, 317, 313, 318, 374, 413, 405, 355, 306, 271, 306, 80 | 315, 301, 356, 348, 355, 422, 465, 467, 404, 347, 305, 336, 81 | 340, 318, 362, 348, 363, 435, 491, 505, 404, 359, 310, 337, 82 | 360, 342, 406, 396, 420, 472, 548, 559, 463, 407, 362, 405, 83 | 417, 391, 419, 461, 472, 535, 622, 606, 508, 461, 390, 432 84 | ]).astype(dtype) 85 | 86 | if as_series: 87 | return pd.Series(rslt) 88 | return rslt 89 | -------------------------------------------------------------------------------- /pmdarima/datasets/austres.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import pandas as pd 5 | 6 | from ..compat import DTYPE 7 | 8 | __all__ = [ 9 | 'load_austres' 10 | ] 11 | 12 | 13 | def load_austres(as_series=False, dtype=DTYPE): 14 | """Quarterly residential data. 15 | 16 | Numbers (in thousands) of Australian residents measured quarterly from 17 | March 1971 to March 1994. 18 | 19 | Parameters 20 | ---------- 21 | as_series : bool, optional (default=False) 22 | Whether to return a Pandas series. If False, will return a 1d 23 | numpy array. 24 | 25 | dtype : type, optional (default=np.float64) 26 | The type to return for the array. Default is np.float64, which is used 27 | throughout the package as the default type. 28 | 29 | Returns 30 | ------- 31 | rslt : array-like, shape=(n_samples,) 32 | The austres vector. 33 | 34 | Examples 35 | -------- 36 | >>> from pmdarima.datasets import load_austres 37 | >>> load_austres() 38 | np.array([13067.3, 13130.5, 13198.4, 13254.2, 13303.7, 13353.9, 39 | 13409.3, 13459.2, 13504.5, 13552.6, 13614.3, 13669.5, 40 | 13722.6, 13772.1, 13832.0, 13862.6, 13893.0, 13926.8, 41 | 13968.9, 14004.7, 14033.1, 14066.0, 14110.1, 14155.6, 42 | 14192.2, 14231.7, 14281.5, 14330.3, 14359.3, 14396.6, 43 | 14430.8, 14478.4, 14515.7, 14554.9, 14602.5, 14646.4, 44 | 14695.4, 14746.6, 14807.4, 14874.4, 14923.3, 14988.7, 45 | 15054.1, 15121.7, 15184.2, 15239.3, 15288.9, 15346.2, 46 | 15393.5, 15439.0, 15483.5, 15531.5, 15579.4, 15628.5, 47 | 15677.3, 15736.7, 15788.3, 15839.7, 15900.6, 15961.5, 48 | 16018.3, 16076.9, 16139.0, 16203.0, 16263.3, 16327.9, 49 | 16398.9, 16478.3, 16538.2, 16621.6, 16697.0, 16777.2, 50 | 16833.1, 16891.6, 16956.8, 17026.3, 17085.4, 17106.9, 51 | 17169.4, 17239.4, 17292.0, 17354.2, 17414.2, 17447.3, 52 | 17482.6, 17526.0, 17568.7, 17627.1, 17661.5]) 53 | 54 | >>> load_austres(True).head() 55 | 0 13067.3 56 | 1 13130.5 57 | 2 13198.4 58 | 3 13254.2 59 | 4 13303.7 60 | dtype: float64 61 | 62 | Notes 63 | ----- 64 | This is quarterly data, so *m* should be set to 4 when using in a seasonal 65 | context. 66 | 67 | References 68 | ---------- 69 | .. [1] P. J. Brockwell and R. A. Davis (1996) 70 | "Introduction to Time Series and Forecasting." Springer 71 | """ 72 | rslt = np.array([ 73 | 13067.3, 13130.5, 13198.4, 13254.2, 13303.7, 13353.9, 74 | 13409.3, 13459.2, 13504.5, 13552.6, 13614.3, 13669.5, 75 | 13722.6, 13772.1, 13832.0, 13862.6, 13893.0, 13926.8, 76 | 13968.9, 14004.7, 14033.1, 14066.0, 14110.1, 14155.6, 77 | 14192.2, 14231.7, 14281.5, 14330.3, 14359.3, 14396.6, 78 | 14430.8, 14478.4, 14515.7, 14554.9, 14602.5, 14646.4, 79 | 14695.4, 14746.6, 14807.4, 14874.4, 14923.3, 14988.7, 80 | 15054.1, 15121.7, 15184.2, 15239.3, 15288.9, 15346.2, 81 | 15393.5, 15439.0, 15483.5, 15531.5, 15579.4, 15628.5, 82 | 15677.3, 15736.7, 15788.3, 15839.7, 15900.6, 15961.5, 83 | 16018.3, 16076.9, 16139.0, 16203.0, 16263.3, 16327.9, 84 | 16398.9, 16478.3, 16538.2, 16621.6, 16697.0, 16777.2, 85 | 16833.1, 16891.6, 16956.8, 17026.3, 17085.4, 17106.9, 86 | 17169.4, 17239.4, 17292.0, 17354.2, 17414.2, 17447.3, 87 | 17482.6, 17526.0, 17568.7, 17627.1, 17661.5]).astype(dtype) 88 | 89 | if as_series: 90 | return pd.Series(rslt) 91 | return rslt 92 | -------------------------------------------------------------------------------- /pmdarima/datasets/data/dated.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/pmdarima/datasets/data/dated.tar.gz -------------------------------------------------------------------------------- /pmdarima/datasets/data/msft.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/pmdarima/datasets/data/msft.tar.gz -------------------------------------------------------------------------------- /pmdarima/datasets/data/sunspots.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/pmdarima/datasets/data/sunspots.txt.gz -------------------------------------------------------------------------------- /pmdarima/datasets/gasoline.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pandas as pd 4 | 5 | from ..compat.numpy import DTYPE 6 | from ._base import fetch_from_web_or_disk 7 | 8 | __all__ = [ 9 | 'load_gasoline' 10 | ] 11 | 12 | url = 'http://alkaline-ml.com/datasets/gasoline.csv' 13 | 14 | 15 | def load_gasoline(as_series=False, dtype=DTYPE): 16 | """Weekly US finished motor gasoline products 17 | 18 | A weekly time series of US finished motor gasoline products supplied (in 19 | thousands of barrels per day) from February 1991 to May 2005. 20 | 21 | Parameters 22 | ---------- 23 | as_series : bool, optional (default=False) 24 | Whether to return a Pandas series. If True, the index will be set to 25 | the observed years/months. If False, will return a 1d numpy array. 26 | 27 | dtype : type, optional (default=np.float64) 28 | The type to return for the array. Default is np.float64, which is used 29 | throughout the package as the default type. 30 | 31 | Notes 32 | ----- 33 | The seasonal periodicity of this example is rather difficult, since it's 34 | not an integer. To be exact, the periodicity is ``365.25 / 7`` 35 | (~=52.1785714285714). To fit the best possible model to this data, you'll 36 | need to explore using exogenous features 37 | 38 | See Also 39 | -------- 40 | :class:`pmdarima.preprocessing.exog.FourierFeaturizer` 41 | 42 | Examples 43 | -------- 44 | >>> from pmdarima.datasets import load_gasoline 45 | >>> load_gasoline() 46 | array([6621. , 6433. , 6582. , ..., 9024. , 9175. , 9269. ]) 47 | 48 | >>> load_gasoline(True).head() 49 | 0 6621.0 50 | 1 6433.0 51 | 2 6582.0 52 | 3 7224.0 53 | 4 6875.0 54 | dtype: float64 55 | 56 | References 57 | ---------- 58 | .. [1] http://www.eia.gov/dnav/pet/hist/LeafHandler.ashx?n=PET&s=wgfupus2&f=W 59 | .. [2] https://robjhyndman.com/hyndsight/forecasting-weekly-data/ 60 | 61 | Returns 62 | ------- 63 | rslt : array-like, shape=(n_samples,) 64 | The gasoline dataset. There are 745 examples. 65 | """ # noqa 66 | rslt = fetch_from_web_or_disk(url, 'gasoline', cache=True).astype(dtype) 67 | if not as_series: 68 | return rslt 69 | 70 | return pd.Series(rslt) 71 | -------------------------------------------------------------------------------- /pmdarima/datasets/lynx.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Author: Taylor Smith 4 | # 5 | # This is the lynx dataset found in R. 6 | 7 | import numpy as np 8 | import pandas as pd 9 | 10 | from ..compat import DTYPE 11 | 12 | __all__ = [ 13 | 'load_lynx' 14 | ] 15 | 16 | 17 | def load_lynx(as_series=False, dtype=DTYPE): 18 | """Annual numbers of lynx trappings for 1821–1934 in Canada. 19 | 20 | This time-series records the number of skins of predators (lynx) that were 21 | collected over several years by the Hudson's Bay Company. The dataset was 22 | taken from Brockwell & Davis (1991) and appears to be the series 23 | considered by Campbell & Walker (1977). 24 | 25 | Parameters 26 | ---------- 27 | as_series : bool, optional (default=False) 28 | Whether to return a Pandas series. If True, the index will be set to 29 | the observed years. If False, will return a 1d numpy array. 30 | 31 | dtype : type, optional (default=np.float64) 32 | The type to return for the array. Default is np.float64, which is used 33 | throughout the package as the default type. 34 | 35 | Examples 36 | -------- 37 | >>> from pmdarima.datasets import load_lynx 38 | >>> load_lynx() 39 | array([ 269, 321, 585, 871, 1475, 2821, 3928, 5943, 4950, 2577, 523, 40 | 98, 184, 279, 409, 2285, 2685, 3409, 1824, 409, 151, 45, 41 | 68, 213, 546, 1033, 2129, 2536, 957, 361, 377, 225, 360, 42 | 731, 1638, 2725, 2871, 2119, 684, 299, 236, 245, 552, 1623, 43 | 3311, 6721, 4254, 687, 255, 473, 358, 784, 1594, 1676, 2251, 44 | 1426, 756, 299, 201, 229, 469, 736, 2042, 2811, 4431, 2511, 45 | 389, 73, 39, 49, 59, 188, 377, 1292, 4031, 3495, 587, 46 | 105, 153, 387, 758, 1307, 3465, 6991, 6313, 3794, 1836, 345, 47 | 382, 808, 1388, 2713, 3800, 3091, 2985, 3790, 674, 81, 80, 48 | 108, 229, 399, 1132, 2432, 3574, 2935, 1537, 529, 485, 662, 49 | 1000, 1590, 2657, 3396]) 50 | 51 | >>> load_lynx(True).head() 52 | 1821 269 53 | 1822 321 54 | 1823 585 55 | 1824 871 56 | 1825 1475 57 | dtype: int64 58 | 59 | Notes 60 | ----- 61 | This is annual data and not seasonal in nature (i.e., :math:`m=1`) 62 | 63 | References 64 | ---------- 65 | .. [1] Brockwell, P. J. and Davis, R. A. (1991) 66 | Time Series and Forecasting Methods. Second edition. 67 | Springer. Series G (page 557). 68 | 69 | .. [2] https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/lynx.html 70 | 71 | Returns 72 | ------- 73 | lynx : array-like, shape=(n_samples,) 74 | The lynx dataset. There are 114 observations. 75 | """ # noqa: E501 76 | rslt = np.array([269, 321, 585, 871, 1475, 2821, 3928, 5943, 4950, 77 | 2577, 523, 98, 184, 279, 409, 2285, 2685, 3409, 78 | 1824, 409, 151, 45, 68, 213, 546, 1033, 2129, 79 | 2536, 957, 361, 377, 225, 360, 731, 1638, 2725, 80 | 2871, 2119, 684, 299, 236, 245, 552, 1623, 3311, 81 | 6721, 4254, 687, 255, 473, 358, 784, 1594, 1676, 82 | 2251, 1426, 756, 299, 201, 229, 469, 736, 2042, 83 | 2811, 4431, 2511, 389, 73, 39, 49, 59, 188, 84 | 377, 1292, 4031, 3495, 587, 105, 153, 387, 758, 85 | 1307, 3465, 6991, 6313, 3794, 1836, 345, 382, 808, 86 | 1388, 2713, 3800, 3091, 2985, 3790, 674, 81, 80, 87 | 108, 229, 399, 1132, 2432, 3574, 2935, 1537, 529, 88 | 485, 662, 1000, 1590, 2657, 3396]).astype(dtype) 89 | 90 | # Set the index if necessary 91 | if as_series: 92 | return pd.Series(rslt, index=range(1821, 1935)) 93 | return rslt 94 | -------------------------------------------------------------------------------- /pmdarima/datasets/setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | def configuration(parent_package='', top_path=None): 5 | from numpy.distutils.misc_util import Configuration 6 | 7 | config = Configuration('datasets', parent_package, top_path) 8 | config.add_data_dir('data') 9 | config.add_subpackage('tests') 10 | return config 11 | 12 | 13 | if __name__ == '__main__': 14 | from numpy.distutils.core import setup 15 | setup(**configuration(top_path='').todict()) 16 | -------------------------------------------------------------------------------- /pmdarima/datasets/stocks.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ._base import _load_tarfile 4 | 5 | __all__ = ['load_msft'] 6 | 7 | 8 | def load_msft(): 9 | """Load the microsoft stock data 10 | 11 | Financial data for the MSFT stock between the dates of Mar 13, 1986 and 12 | Nov 10, 2017. This data is part of the Kaggle stock dataset [1]. Features 13 | are as follows: 14 | 15 | * Date : datetime 16 | * Open : float32 17 | * High : float32 18 | * Low : float32 19 | * Close : float32 20 | * Volume : long 21 | * OpenInt : int 22 | 23 | References 24 | ---------- 25 | .. [1] https://www.kaggle.com/borismarjanovic/price-volume-data-for-all-us-stocks-etfs 26 | 27 | Returns 28 | ------- 29 | df : pd.DataFrame, shape=(7983, 7) 30 | A dataframe of endog and exog values. 31 | """ # noqa:E501 32 | return _load_tarfile("msft.tar.gz") 33 | -------------------------------------------------------------------------------- /pmdarima/datasets/sunspots.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Author: Taylor Smith 4 | # 5 | # This is the sunspots dataset found in R. 6 | 7 | import numpy as np 8 | import pandas as pd 9 | 10 | from os.path import join 11 | import calendar 12 | 13 | from ..compat import DTYPE 14 | from . import _base as base 15 | 16 | __all__ = [ 17 | 'load_sunspots' 18 | ] 19 | 20 | 21 | def load_sunspots(as_series=False, dtype=DTYPE): 22 | """Monthly Sunspot Numbers, 1749 - 1983 23 | 24 | Monthly mean relative sunspot numbers from 1749 to 1983. Collected at Swiss 25 | Federal Observatory, Zurich until 1960, then Tokyo Astronomical 26 | Observatory. 27 | 28 | Parameters 29 | ---------- 30 | as_series : bool, optional (default=False) 31 | Whether to return a Pandas series. If True, the index will be set to 32 | the observed years/months. If False, will return a 1d numpy array. 33 | 34 | dtype : type, optional (default=np.float64) 35 | The type to return for the array. Default is np.float64, which is used 36 | throughout the package as the default type. 37 | 38 | Notes 39 | ----- 40 | This is monthly data, so *m* should be set to 12 when using in a seasonal 41 | context. 42 | 43 | Examples 44 | -------- 45 | >>> from pmdarima.datasets import load_sunspots 46 | >>> load_sunspots() 47 | array([58. , 62.6, 70. , ..., 55.8, 33.3, 33.4]) 48 | 49 | >>> load_sunspots(True).head() 50 | Jan 1749 58.0 51 | Feb 1749 62.6 52 | Mar 1749 70.0 53 | Apr 1749 55.7 54 | May 1749 85.0 55 | dtype: float64 56 | 57 | References 58 | ---------- 59 | .. [1] https://www.rdocumentation.org/packages/datasets/versions/3.6.1/topics/sunspots 60 | 61 | Returns 62 | ------- 63 | rslt : array-like, shape=(n_samples,) 64 | The sunspots dataset. There are 2820 observations. 65 | """ # noqa: E501 66 | rslt = base._cache.get('sunspots', None) 67 | if rslt is None: 68 | data_path = join(base.get_data_path(), 'sunspots.txt.gz') 69 | rslt = np.loadtxt(data_path).ravel() 70 | base._cache['sunspots'] = rslt 71 | 72 | # don't want to cache type conversion 73 | rslt = rslt.astype(dtype) 74 | 75 | if not as_series: 76 | return rslt 77 | 78 | # Otherwise we want a series and have to cleverly create the index 79 | index = [ 80 | "%s %i" % (calendar.month_abbr[i + 1], year) 81 | for year in range(1749, 1984) 82 | for i in range(12) 83 | ] 84 | 85 | return pd.Series(rslt, index=index) 86 | -------------------------------------------------------------------------------- /pmdarima/datasets/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/pmdarima/datasets/tests/__init__.py -------------------------------------------------------------------------------- /pmdarima/datasets/tests/test_load_datasets.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from pmdarima.datasets import load_heartrate, load_lynx, load_wineind, \ 4 | load_woolyrnq, load_ausbeer, load_austres, load_gasoline, \ 5 | load_airpassengers, load_taylor, load_msft, load_sunspots, _base as base 6 | 7 | import numpy as np 8 | import pandas as pd 9 | import os 10 | import shutil 11 | 12 | from numpy.testing import assert_array_equal 13 | import pytest 14 | 15 | 16 | def _inner_load(f): 17 | n = None 18 | for as_series in (True, False): 19 | x = f(as_series=as_series) 20 | 21 | # ensure shape is same for both 22 | if n is None: 23 | n = x.shape[0] 24 | else: 25 | assert x.shape[0] == n 26 | 27 | if as_series: 28 | assert isinstance(x, pd.Series) 29 | else: 30 | assert isinstance(x, np.ndarray) 31 | 32 | 33 | # Simply test loading the datasets and that we get the expected type 34 | @pytest.mark.parametrize( 35 | 'f', [load_heartrate, 36 | load_lynx, 37 | load_wineind, 38 | load_woolyrnq, 39 | load_ausbeer, 40 | load_austres, 41 | load_taylor, 42 | load_airpassengers]) 43 | def test_load(f): 44 | _inner_load(f) 45 | 46 | 47 | @pytest.mark.parametrize( 48 | 'f', [load_msft]) 49 | def test_df_loads(f): 50 | df = f() 51 | assert isinstance(df, pd.DataFrame) 52 | 53 | 54 | @pytest.mark.parametrize( 55 | 'f, cache_name', [ 56 | pytest.param(load_sunspots, 'sunspots'), 57 | ]) 58 | def test_load_from_gzip(f, cache_name): 59 | _inner_load(f) 60 | assert cache_name in base._cache 61 | 62 | 63 | @pytest.mark.parametrize( 64 | 'func, key', [ 65 | pytest.param(load_gasoline, 'gasoline'), 66 | ] 67 | ) 68 | def test_load_from_web(func, key): 69 | # make sure there is no data folder 70 | disk_cache_folder = base.get_data_cache_path() 71 | if os.path.exists(disk_cache_folder): 72 | shutil.rmtree(disk_cache_folder) 73 | 74 | try: 75 | # loads from web 76 | y = func(as_series=False) 77 | 78 | # show the key is in _cache 79 | assert key in base._cache 80 | 81 | # show exists on disk 82 | assert os.path.exists(os.path.join(disk_cache_folder, key + '.csv.gz')) 83 | 84 | # pop from cache so we can load it from disk 85 | base._cache.pop(key) 86 | x = func(as_series=True) # true for coverage 87 | 88 | assert_array_equal(y, x.values) 89 | 90 | finally: 91 | if os.path.exists(disk_cache_folder): 92 | shutil.rmtree(disk_cache_folder) 93 | -------------------------------------------------------------------------------- /pmdarima/decorators.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import functools 4 | import warnings 5 | 6 | __all__ = ['deprecated'] 7 | 8 | 9 | def deprecated(use_instead, notes=None): 10 | """Mark functions as deprecated. 11 | 12 | This decorator will result in a warning being emitted when the decorated 13 | function is used. 14 | 15 | Parameters 16 | ---------- 17 | use_instead : str 18 | The name of the function to use instead. 19 | 20 | notes : str, optional (default=None) 21 | Additional notes to add to the warning message. 22 | """ 23 | if notes is None: 24 | notes = "" 25 | else: 26 | notes = " " + notes 27 | 28 | def wrapped_func(func): 29 | @functools.wraps(func) 30 | def _inner(*args, **kwargs): 31 | warnings.simplefilter('always', DeprecationWarning) # un-filter 32 | msg = ("{0} is deprecated and will be removed in a future " 33 | "release of pmdarima. Use {1} instead.{2}" 34 | .format(func.__name__, use_instead, notes)) 35 | 36 | warnings.warn( 37 | msg, 38 | category=DeprecationWarning, 39 | stacklevel=2) 40 | warnings.simplefilter('default', DeprecationWarning) # re-filter 41 | return func(*args, **kwargs) 42 | return _inner 43 | return wrapped_func 44 | -------------------------------------------------------------------------------- /pmdarima/metrics.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .utils import check_endog 4 | import numpy as np 5 | 6 | __all__ = ['smape'] 7 | 8 | 9 | def smape(y_true, y_pred): 10 | r"""Compute the Symmetric Mean Absolute Percentage Error. 11 | 12 | The symmetric mean absolute percentage error (SMAPE) is an accuracy measure 13 | based on percentage (or relative) errors. Defined as follows: 14 | 15 | :math:`\frac{100\%}{n}\sum_{t=1}^{n}{\frac{|F_{t}-A_{t}|}{ 16 | (|A_{t}|+|F_{t}|)/2}}` 17 | 18 | Where a perfect SMAPE score is 0.0, and a higher score indicates a higher 19 | error rate. 20 | 21 | Parameters 22 | ---------- 23 | y_true : array-like, shape=(n_samples,) 24 | The true test values of y. 25 | 26 | y_pred : array-like, shape=(n_samples,) 27 | The forecasted values of y. 28 | 29 | Examples 30 | -------- 31 | A typical case: 32 | >>> import numpy as np 33 | >>> y_true = np.array([0.07533, 0.07533, 0.07533, 0.07533, 34 | ... 0.07533, 0.07533, 0.0672, 0.0672]) 35 | >>> y_pred = np.array([0.102, 0.107, 0.047, 0.1, 36 | ... 0.032, 0.047, 0.108, 0.089]) 37 | >>> smape(y_true, y_pred) 38 | 42.60306631890196 39 | 40 | A perfect score: 41 | >>> smape(y_true, y_true) 42 | 0.0 43 | 44 | References 45 | ---------- 46 | .. [1] https://en.wikipedia.org/wiki/Symmetric_mean_absolute_percentage_error 47 | """ # noqa: E501 48 | y_true = check_endog( 49 | y_true, 50 | copy=False, 51 | preserve_series=False, 52 | ) 53 | y_pred = check_endog( 54 | y_pred, 55 | copy=False, 56 | preserve_series=False, 57 | ) 58 | abs_diff = np.abs(y_pred - y_true) 59 | return np.mean((abs_diff * 200 / (np.abs(y_pred) + np.abs(y_true)))) 60 | -------------------------------------------------------------------------------- /pmdarima/model_selection/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ._split import * 4 | from ._validation import * 5 | 6 | __all__ = [s for s in dir() if not s.startswith("_")] 7 | -------------------------------------------------------------------------------- /pmdarima/model_selection/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/pmdarima/model_selection/tests/__init__.py -------------------------------------------------------------------------------- /pmdarima/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .base import * 4 | from .endog import * 5 | from .exog import * 6 | 7 | __all__ = [s for s in dir() if not s.startswith("_")] 8 | -------------------------------------------------------------------------------- /pmdarima/preprocessing/endog/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .boxcox import * 4 | from .log import * 5 | 6 | # don't want to accidentally hoist `base` to top-level, since preprocessing has 7 | # its own base 8 | __all__ = [s for s in dir() if not (s.startswith("_") or s == 'base')] 9 | -------------------------------------------------------------------------------- /pmdarima/preprocessing/endog/base.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import abc 4 | 5 | from ..base import BaseTransformer 6 | 7 | 8 | class BaseEndogTransformer(BaseTransformer, metaclass=abc.ABCMeta): 9 | """A base class for endogenous array transformers""" 10 | 11 | def _check_y_X(self, y, X): 12 | """Check the endog and exog arrays""" 13 | y, X = super(BaseEndogTransformer, self)._check_y_X(y, X) 14 | if y is None: 15 | raise ValueError("y must be non-None for endogenous transformers") 16 | return y, X 17 | 18 | @abc.abstractmethod 19 | def inverse_transform(self, y, X=None): 20 | """Inverse transform a transformed array 21 | 22 | Inverse the transformation on the transformed array. 23 | 24 | Parameters 25 | ---------- 26 | y : array-like or None, shape=(n_samples,) 27 | The transformed endogenous (time-series) array. 28 | 29 | X : array-like or None, shape=(n_samples, n_features), optional 30 | The exogenous array of additional covariates. Not used for 31 | endogenous transformers. Default is None, and non-None values will 32 | serve as pass-through arrays. 33 | 34 | Returns 35 | ------- 36 | y : array-like or None 37 | The inverse-transformed y array 38 | 39 | X : array-like or None 40 | The inverse-transformed exogenous array 41 | """ 42 | -------------------------------------------------------------------------------- /pmdarima/preprocessing/endog/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/pmdarima/preprocessing/endog/tests/__init__.py -------------------------------------------------------------------------------- /pmdarima/preprocessing/endog/tests/test_base.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pytest 4 | from pmdarima.compat.pytest import pytest_error_str 5 | from pmdarima.preprocessing.endog import LogEndogTransformer 6 | 7 | 8 | def test_value_error_on_check(): 9 | trans = LogEndogTransformer() # could be anything, just need an instance 10 | with pytest.raises(ValueError) as ve: 11 | trans._check_y_X(None, None) 12 | assert 'non-None' in pytest_error_str(ve) 13 | -------------------------------------------------------------------------------- /pmdarima/preprocessing/endog/tests/test_boxcox.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | from numpy.testing import assert_array_almost_equal 5 | from scipy import stats 6 | import pytest 7 | 8 | from pmdarima.compat.pytest import pytest_error_str 9 | from pmdarima.preprocessing import BoxCoxEndogTransformer 10 | 11 | loggamma = stats.loggamma.rvs(5, size=500) + 5 12 | 13 | 14 | @pytest.mark.parametrize( 15 | 'X', [ 16 | None, 17 | np.random.rand(loggamma.shape[0], 3), 18 | ] 19 | ) 20 | def test_invertible(X): 21 | trans = BoxCoxEndogTransformer() 22 | y_t, e_t = trans.fit_transform(loggamma, X) 23 | y_prime, e_prime = trans.inverse_transform(y_t, X=e_t) 24 | 25 | assert_array_almost_equal(loggamma, y_prime) 26 | 27 | # X should all be the same too 28 | if X is None: 29 | assert X is e_t is e_prime is None 30 | else: 31 | assert_array_almost_equal(X, e_t) 32 | assert_array_almost_equal(X, e_prime) 33 | 34 | 35 | def test_invertible_when_lambda_is_0(): 36 | y = [1, 2, 3] 37 | trans = BoxCoxEndogTransformer(lmbda=0.) 38 | y_t, _ = trans.fit_transform(y) 39 | y_prime, _ = trans.inverse_transform(y_t) 40 | assert_array_almost_equal(y, y_prime) 41 | 42 | 43 | def test_value_error_on_neg_lambda(): 44 | trans = BoxCoxEndogTransformer(lmbda2=-4.) 45 | with pytest.raises(ValueError) as ve: 46 | trans.fit_transform([1, 2, 3]) 47 | assert 'lmbda2 must be a non-negative' in pytest_error_str(ve) 48 | 49 | 50 | class TestNonInvertibleBC: 51 | y = [-1., 0., 1.] 52 | 53 | def test_expected_error(self): 54 | y = self.y 55 | trans = BoxCoxEndogTransformer(lmbda=2.) 56 | with pytest.raises(ValueError): 57 | trans.fit_transform(y) 58 | 59 | def test_expected_warning(self): 60 | y = self.y 61 | trans = BoxCoxEndogTransformer(lmbda=2., neg_action="warn") 62 | with pytest.warns(UserWarning): 63 | y_t, _ = trans.fit_transform(y) 64 | 65 | # When we invert, it will not be the same 66 | y_prime, _ = trans.inverse_transform(y_t) 67 | assert not np.allclose(y_prime, y) 68 | 69 | def test_no_warning_on_ignore(self): 70 | y = self.y 71 | trans = BoxCoxEndogTransformer(lmbda=2., neg_action="ignore") 72 | y_t, _ = trans.fit_transform(y) 73 | 74 | # When we invert, it will not be the same 75 | y_prime, _ = trans.inverse_transform(y_t) 76 | assert not np.allclose(y_prime, y) 77 | 78 | def test_invertible_when_lam2(self): 79 | y = self.y 80 | trans = BoxCoxEndogTransformer(lmbda=2., lmbda2=2.) 81 | y_t, _ = trans.fit_transform(y) 82 | 83 | # When we invert, it will not be the same 84 | y_prime, _ = trans.inverse_transform(y_t) 85 | assert_array_almost_equal(y, y_prime) 86 | -------------------------------------------------------------------------------- /pmdarima/preprocessing/endog/tests/test_log.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from numpy.testing import assert_array_almost_equal 4 | from sklearn.base import clone 5 | 6 | from pmdarima.preprocessing import LogEndogTransformer 7 | from pmdarima.preprocessing import BoxCoxEndogTransformer 8 | 9 | 10 | def test_same(): 11 | y = [1, 2, 3] 12 | trans = BoxCoxEndogTransformer(lmbda=0) 13 | log_trans = LogEndogTransformer() 14 | y_t, _ = trans.fit_transform(y) 15 | log_y_t, _ = log_trans.fit_transform(y) 16 | assert_array_almost_equal(log_y_t, y_t) 17 | 18 | 19 | def test_invertible(): 20 | y = [1, 2, 3] 21 | trans = LogEndogTransformer() 22 | y_t, _ = trans.fit_transform(y) 23 | y_prime, _ = trans.inverse_transform(y_t) 24 | assert_array_almost_equal(y, y_prime) 25 | 26 | 27 | def test_log_clone_issue_407(): 28 | # https://github.com/alkaline-ml/pmdarima/issues/407 29 | log = LogEndogTransformer(lmbda=10) 30 | res, _ = log.fit_transform([0, 10]) 31 | 32 | # we swap lmbda2 and lmbda internally 33 | assert log.lmbda2 == 10 34 | assert log.lmbda == 0 35 | 36 | log2 = clone(log) 37 | assert log2.lmbda2 == 10 38 | assert log2.lmbda == 0 39 | res2, _ = log2.fit_transform([0, 10]) 40 | 41 | assert_array_almost_equal(res, res2) 42 | -------------------------------------------------------------------------------- /pmdarima/preprocessing/exog/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .dates import * 4 | from .fourier import * 5 | 6 | # don't want to accidentally hoist `base` to top-level, since preprocessing has 7 | # its own base 8 | __all__ = [s for s in dir() if not (s.startswith("_") or s == 'base')] 9 | -------------------------------------------------------------------------------- /pmdarima/preprocessing/exog/_fourier.pyx: -------------------------------------------------------------------------------- 1 | #cython: boundscheck=False 2 | #cython: cdivision=True 3 | #cython: wraparound=False 4 | #cython: nonecheck=False 5 | #cython: language_level=3 6 | # 7 | # Author: Taylor G Smith 8 | 9 | import numpy as np 10 | 11 | from cython.view cimport array as cvarray 12 | from libc.math cimport sin, cos, M_PI 13 | cimport numpy as np 14 | cimport cython 15 | 16 | ctypedef float [:, :] float_array_2d_t 17 | ctypedef double [:, :] double_array_2d_t 18 | 19 | ctypedef np.npy_intp INTP 20 | 21 | np.import_array() 22 | 23 | 24 | cpdef double[:, :] C_fourier_terms(double[:] p, double[:] times): 25 | cdef INTP i, j, k = p.shape[0], n = times.shape[0], m 26 | cdef float v 27 | 28 | cdef double [:, :] X = cvarray(shape=(k * 2, n), 29 | itemsize=sizeof(double), 30 | format="d") # d for double and also DUH 31 | 32 | with nogil: 33 | j = 0 34 | for i in range(0, k * 2, 2): 35 | 36 | # 2 * p[j] * times * PI 37 | v = p[j] * 2 * M_PI 38 | 39 | for m in range(n): 40 | X[i, m] = sin(v * times[m]) 41 | X[i + 1, m] = cos(v * times[m]) 42 | 43 | j += 1 44 | 45 | return X 46 | -------------------------------------------------------------------------------- /pmdarima/preprocessing/exog/base.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pandas as pd 4 | import numpy as np 5 | import abc 6 | 7 | from ..base import BaseTransformer 8 | 9 | 10 | class BaseExogTransformer(BaseTransformer, metaclass=abc.ABCMeta): 11 | """A base class for exogenous array transformers""" 12 | 13 | def _check_y_X(self, y, X, null_allowed=False): 14 | """Check the endog and exog arrays""" 15 | y, X = super(BaseExogTransformer, self)._check_y_X(y, X) 16 | if X is None and not null_allowed: 17 | raise ValueError("X must be non-None for exog transformers") 18 | return y, X 19 | 20 | 21 | class BaseExogFeaturizer(BaseExogTransformer, metaclass=abc.ABCMeta): 22 | """Transformers that create new exog features from the endog or exog array 23 | 24 | Parameters 25 | ---------- 26 | prefix : str or None, optional (default=None) 27 | The feature prefix 28 | """ 29 | def __init__(self, prefix=None): 30 | self.prefix = prefix 31 | 32 | @abc.abstractmethod 33 | def _get_prefix(self): 34 | """Get the feature prefix for when exog is a pd.DataFrame""" 35 | 36 | def _get_feature_names(self, X): 37 | pfx = self._get_prefix() 38 | return ['%s_%i' % (pfx, i) for i in range(X.shape[1])] 39 | 40 | def _safe_hstack(self, X, features): 41 | """H-stack dataframes or np.ndarrays""" 42 | if X is None or isinstance(X, pd.DataFrame): 43 | # the features we're adding may be np.ndarray 44 | if not isinstance(features, pd.DataFrame): 45 | features = pd.DataFrame.from_records(features) 46 | 47 | # subclass may override this 48 | features.columns = self._get_feature_names(features) 49 | 50 | if X is not None: 51 | # ignore_index will remove names, which is a stupid quirk 52 | # of pandas... so manually reset the indices 53 | # https://stackoverflow.com/a/43406062/3015734 54 | X.index = features.index = np.arange(X.shape[0]) 55 | return pd.concat([X, features], axis=1) 56 | # if X was None coming in, we'd still like to favor a pd.DF 57 | return features 58 | 59 | return np.hstack([X, features]) 60 | 61 | def transform(self, y, X=None, n_periods=0, **kwargs): 62 | """Transform the new array 63 | 64 | Apply the transformation to the array after learning the training set's 65 | characteristics in the ``fit`` method. The transform method for 66 | featurizers behaves slightly differently in that the ``n_periods` may 67 | be required to extrapolate for periods in the future. 68 | 69 | Parameters 70 | ---------- 71 | y : array-like or None, shape=(n_samples,) 72 | The endogenous (time-series) array. 73 | 74 | X : array-like or None, shape=(n_samples, n_features) 75 | An array of additional covariates. 76 | 77 | n_periods : int, optional (default=0) 78 | The number of periods in the future to forecast. If ``n_periods`` 79 | is 0, will compute the features for the training set. 80 | ``n_periods`` corresponds to the number of samples that will be 81 | returned. 82 | 83 | **kwargs : keyword args 84 | Keyword arguments required by the transform function. 85 | 86 | Returns 87 | ------- 88 | y : array-like or None 89 | The transformed y array 90 | 91 | X : array-like or None 92 | The transformed X array 93 | """ 94 | -------------------------------------------------------------------------------- /pmdarima/preprocessing/exog/setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | 5 | import numpy 6 | from numpy.distutils.misc_util import Configuration 7 | 8 | from pmdarima._build_utils import get_blas_info 9 | 10 | 11 | def configuration(parent_package="", top_path=None): 12 | cblas_libs, blas_info = get_blas_info() 13 | 14 | # Use this rather than cblas_libs so we don't fail on Windows 15 | libraries = [] 16 | if os.name == 'posix': 17 | cblas_libs.append('m') 18 | libraries.append('m') 19 | 20 | config = Configuration("exog", parent_package, top_path) 21 | config.add_extension("_fourier", 22 | sources=["_fourier.pyx"], 23 | include_dirs=[numpy.get_include(), 24 | blas_info.pop('include_dirs', [])], 25 | libraries=libraries, 26 | extra_compile_args=blas_info.pop( 27 | 'extra_compile_args', []), 28 | **blas_info) 29 | 30 | config.add_subpackage('tests') 31 | 32 | return config 33 | 34 | 35 | if __name__ == "__main__": 36 | from numpy.distutils.core import setup 37 | setup(**configuration().todict()) 38 | -------------------------------------------------------------------------------- /pmdarima/preprocessing/exog/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /pmdarima/preprocessing/exog/tests/test_base.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from pmdarima.preprocessing.exog import base 4 | from pmdarima import datasets 5 | import numpy as np 6 | import pandas as pd 7 | 8 | wineind = datasets.load_wineind() 9 | 10 | 11 | class RandomExogFeaturizer(base.BaseExogFeaturizer): 12 | """Creates random exog features. This is just used to test base func""" 13 | 14 | def _get_prefix(self): 15 | return "RND" 16 | 17 | def fit(self, y, X, **_): 18 | return self 19 | 20 | def transform(self, y, X=None, n_periods=0, **_): 21 | Xt = np.random.rand(y.shape[0], 4) 22 | Xt = self._safe_hstack(X, Xt) 23 | return y, Xt 24 | 25 | 26 | def test_default_get_feature_names(): 27 | feat = RandomExogFeaturizer() 28 | y_trans, X = feat.fit_transform(wineind) 29 | assert y_trans is wineind 30 | assert X.columns.tolist() == \ 31 | ['RND_0', 'RND_1', 'RND_2', 'RND_3'] 32 | 33 | 34 | def test_default_get_feature_names_with_X(): 35 | feat = RandomExogFeaturizer() 36 | X = pd.DataFrame.from_records( 37 | np.random.rand(wineind.shape[0], 2), columns=['a', 'b']) 38 | y_trans, X_trans = feat.fit_transform(wineind, X) 39 | assert y_trans is wineind 40 | assert X_trans.columns.tolist() == \ 41 | ['a', 'b', 'RND_0', 'RND_1', 'RND_2', 'RND_3'] 42 | -------------------------------------------------------------------------------- /pmdarima/preprocessing/setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | 5 | from numpy.distutils.misc_util import Configuration 6 | 7 | from pmdarima._build_utils import get_blas_info 8 | 9 | 10 | def configuration(parent_package="", top_path=None): 11 | cblas_libs, blas_info = get_blas_info() 12 | 13 | # Use this rather than cblas_libs so we don't fail on Windows 14 | libraries = [] 15 | if os.name == 'posix': 16 | cblas_libs.append('m') 17 | libraries.append('m') 18 | 19 | config = Configuration("preprocessing", parent_package, top_path) 20 | 21 | config.add_subpackage('endog') 22 | config.add_subpackage('endog/tests') 23 | config.add_subpackage('exog') # builds src and adds its own tests 24 | 25 | return config 26 | 27 | 28 | if __name__ == "__main__": 29 | from numpy.distutils.core import setup 30 | setup(**configuration().todict()) 31 | -------------------------------------------------------------------------------- /pmdarima/preprocessing/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/pmdarima/preprocessing/tests/__init__.py -------------------------------------------------------------------------------- /pmdarima/preprocessing/tests/test_base.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pytest 4 | from pmdarima.preprocessing import base 5 | from pmdarima.compat.pytest import pytest_error_str 6 | 7 | 8 | def test_value_error_on_update_check(): 9 | with pytest.raises(ValueError) as ve: 10 | base.UpdatableMixin()._check_endog(None) 11 | assert 'cannot be None' in pytest_error_str(ve) 12 | -------------------------------------------------------------------------------- /pmdarima/setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Author: Taylor Smith 4 | # 5 | # Setup for submodules of pmdarima 6 | 7 | import os 8 | import sys 9 | 10 | from pmdarima._build_utils import cythonize_extensions 11 | 12 | 13 | # DEFINE CONFIG 14 | def configuration(parent_package='', top_path=None): 15 | from numpy.distutils.misc_util import Configuration 16 | 17 | libs = [] 18 | if os.name == 'posix': 19 | libs.append('m') 20 | 21 | config = Configuration('pmdarima', parent_package, top_path) 22 | 23 | # build utilities 24 | config.add_subpackage('__check_build') 25 | config.add_subpackage('_build_utils') 26 | 27 | # submodules that do NOT have their own setup.py. manually add their tests 28 | config.add_subpackage('compat') 29 | config.add_subpackage('compat/tests') 30 | config.add_subpackage('datasets') 31 | config.add_subpackage('datasets/tests') 32 | config.add_subpackage('model_selection') 33 | config.add_subpackage('model_selection/tests') 34 | 35 | # the following packages have cython or their own setup.py files. 36 | config.add_subpackage('arima') 37 | config.add_subpackage('preprocessing') 38 | config.add_subpackage('utils') 39 | 40 | # add test directory 41 | config.add_subpackage('tests') 42 | 43 | # Do cythonization, but only if this is not a release tarball, since the 44 | # C/C++ files are not necessarily forward compatible with future versions 45 | # of python. 46 | if 'sdist' not in sys.argv: 47 | cythonize_extensions(top_path, config) 48 | 49 | return config 50 | 51 | 52 | if __name__ == '__main__': 53 | from numpy.distutils.core import setup 54 | setup(**configuration(top_path='').todict()) 55 | -------------------------------------------------------------------------------- /pmdarima/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/pmdarima/tests/__init__.py -------------------------------------------------------------------------------- /pmdarima/tests/test_context_managers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from pmdarima import context_managers as ctx 4 | from pmdarima.compat.pytest import pytest_error_str 5 | 6 | import pytest 7 | 8 | 9 | def test_except_and_reraise_do_reraise(): 10 | with pytest.raises(KeyError) as ke: 11 | with ctx.except_and_reraise( 12 | ValueError, 13 | raise_err=KeyError, 14 | raise_msg="bar message" 15 | ): 16 | raise ValueError("contains foo message") 17 | 18 | msg = pytest_error_str(ke) 19 | assert "bar message" in msg 20 | assert "raised from ValueError" in msg 21 | 22 | 23 | def test_except_and_reraise_no_reraise(): 24 | with pytest.raises(KeyError) as ke: 25 | with ctx.except_and_reraise( 26 | ValueError, 27 | raise_err=TypeError, 28 | raise_msg="bar message" 29 | ): 30 | raise KeyError("foo message") 31 | 32 | assert "foo message" in pytest_error_str(ke) 33 | 34 | 35 | @pytest.mark.parametrize('err', [ValueError, KeyError, TypeError]) 36 | def test_multiple(err): 37 | 38 | class FooError(BaseException): 39 | pass 40 | 41 | with pytest.raises(FooError) as fe: 42 | with ctx.except_and_reraise( 43 | ValueError, KeyError, TypeError, 44 | raise_err=FooError, 45 | raise_msg="gotcha, fam", 46 | ): 47 | raise err("Boo!") 48 | 49 | assert "gotcha, fam" in pytest_error_str(fe) 50 | -------------------------------------------------------------------------------- /pmdarima/tests/test_estimators.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from sklearn.base import clone 4 | from pmdarima.arima import ARIMA, AutoARIMA 5 | from pmdarima.pipeline import Pipeline 6 | from pmdarima.datasets import load_wineind 7 | from pmdarima.preprocessing import FourierFeaturizer 8 | import pytest 9 | 10 | y = load_wineind() 11 | 12 | 13 | @pytest.mark.parametrize( 14 | 'est', [ 15 | ARIMA(order=(2, 1, 1)), 16 | AutoARIMA(seasonal=False, maxiter=3), 17 | Pipeline([ 18 | ("fourier", FourierFeaturizer(m=12)), 19 | ("arima", AutoARIMA(seasonal=False, stepwise=True, 20 | suppress_warnings=True, d=1, max_p=2, max_q=0, 21 | start_q=0, start_p=1, 22 | maxiter=3, error_action='ignore')) 23 | ]) 24 | ] 25 | ) 26 | def test_clonable(est): 27 | # fit it, then clone it 28 | est.fit(y) 29 | est2 = clone(est) 30 | assert isinstance(est2, est.__class__) 31 | assert est is not est2 32 | -------------------------------------------------------------------------------- /pmdarima/tests/test_metrics.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from pmdarima.metrics import smape 4 | import numpy as np 5 | import pytest 6 | 7 | 8 | @pytest.mark.parametrize( 9 | 'actual,forecasted,expected', [ 10 | pytest.param([0.07533, 0.07533, 0.07533, 0.07533, 11 | 0.07533, 0.07533, 0.0672, 0.0672], 12 | [0.102, 0.107, 0.047, 0.1, 13 | 0.032, 0.047, 0.108, 0.089], 42.60306631890196), 14 | 15 | # when y_true == y_pred, we get 0 err 16 | pytest.param([0.07533, 0.07533, 0.07533, 0.07533, 17 | 0.07533, 0.07533, 0.0672, 0.0672], 18 | [0.07533, 0.07533, 0.07533, 0.07533, 19 | 0.07533, 0.07533, 0.0672, 0.0672], 0), 20 | ] 21 | ) 22 | def test_smape(actual, forecasted, expected): 23 | err = smape(actual, forecasted) 24 | assert np.allclose(expected, err) 25 | -------------------------------------------------------------------------------- /pmdarima/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Author: Taylor Smith 4 | 5 | from .array import * 6 | from .metaestimators import * 7 | from .visualization import * 8 | from .wrapped import * 9 | 10 | 11 | def get_callable(key, dct): 12 | """Get the callable mapped by a key from a dictionary. This is 13 | necessary for pickling (so we don't try to pickle an unbound method). 14 | 15 | Parameters 16 | ---------- 17 | key : str 18 | The key for the ``dct`` dictionary. 19 | 20 | dct : dict 21 | The dictionary of callables. 22 | """ 23 | fun = dct.get(key, None) 24 | 25 | if not isinstance(key, str) or fun is None: # ah, that's no fun :( 26 | raise ValueError('key must be a string in one in %r, but got %r' 27 | % (dct, key)) 28 | return fun 29 | 30 | 31 | __all__ = [s for s in dir() if not s.startswith("_")] 32 | -------------------------------------------------------------------------------- /pmdarima/utils/_array.pyx: -------------------------------------------------------------------------------- 1 | #cython: boundscheck=False 2 | #cython: cdivision=True 3 | #cython: wraparound=False 4 | #cython: nonecheck=False 5 | #cython: language_level=3 6 | # 7 | # This is the Cython translation of the diffinv function R source code 8 | # 9 | # Author: Charles Drotar 10 | 11 | import numpy as np 12 | cimport numpy as np 13 | cimport cython 14 | 15 | ctypedef np.npy_intp INTP 16 | 17 | cdef fused floating1d: 18 | float[::1] 19 | double[::1] 20 | 21 | np.import_array() 22 | 23 | 24 | @cython.boundscheck(False) 25 | @cython.wraparound(False) 26 | cpdef double[:] C_intgrt_vec(floating1d x, 27 | floating1d xi, 28 | INTP lag): 29 | """Inverse diff 30 | 31 | References 32 | ---------- 33 | .. [1] https://github.com/wch/r-source/blob/trunk/src/library/stats/R/diffinv.R#L39 34 | .. [2] https://github.com/mirror/r/blob/65a0e33a4b0a119703586fcd1f9742654738ae54/src/library/stats/src/PPsum.c#L46 35 | """ 36 | cdef INTP i, n = x.shape[0] 37 | cdef np.ndarray[double, ndim=1, mode='c'] ans = \ 38 | np.zeros(n + lag, dtype=np.float64, order='c') 39 | 40 | with nogil: 41 | for i in range(lag, lag + n): 42 | ans[i] = x[i - lag] + ans[i - lag] 43 | return ans 44 | -------------------------------------------------------------------------------- /pmdarima/utils/_show_versions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility methods to print system info for debugging 3 | 4 | adapted from ``pandas.show_versions`` 5 | adapted from ``sklearn.show_versions`` 6 | """ 7 | # License: BSD 3 clause 8 | 9 | import platform 10 | import sys 11 | 12 | _pmdarima_deps = ( 13 | # setuptools needs to be before pip: https://github.com/pypa/setuptools/issues/3044#issuecomment-1024972548 # noqa:E501 14 | "setuptools", 15 | "pip", 16 | "sklearn", 17 | "statsmodels", 18 | "numpy", 19 | "scipy", 20 | "Cython", 21 | "pandas", 22 | "joblib", 23 | "pmdarima", 24 | ) 25 | 26 | # Packages that have a different import name than name on PyPI 27 | _install_mapping = { 28 | "sklearn": "scikit-learn" 29 | } 30 | 31 | 32 | def _get_sys_info(): 33 | """System information 34 | 35 | Return 36 | ------ 37 | sys_info : dict 38 | system and Python version information 39 | 40 | """ 41 | python = sys.version.replace('\n', ' ') 42 | 43 | blob = [ 44 | ("python", python), 45 | ('executable', sys.executable), 46 | ("machine", platform.platform()), 47 | ] 48 | 49 | return dict(blob) 50 | 51 | 52 | def _get_deps_info(deps=_pmdarima_deps): 53 | """Overview of the installed version of main dependencies 54 | 55 | Returns 56 | ------- 57 | deps_info: dict 58 | version information on relevant Python libraries 59 | """ 60 | def get_version(module): 61 | return module.__version__ 62 | 63 | deps_info = {} 64 | 65 | # TODO: We can get rid of this when we deprecate 3.7 66 | if sys.version_info.minor <= 7: 67 | import importlib 68 | 69 | for modname in deps: 70 | try: 71 | if modname in sys.modules: 72 | mod = sys.modules[modname] 73 | else: 74 | mod = importlib.import_module(modname) 75 | ver = get_version(mod) 76 | deps_info[modname] = ver 77 | except ImportError: 78 | deps_info[modname] = None 79 | 80 | else: 81 | from importlib.metadata import PackageNotFoundError, version 82 | 83 | for modname in deps: 84 | try: 85 | deps_info[modname] = version(_install_mapping.get(modname, modname)) # noqa:E501 86 | except PackageNotFoundError: 87 | deps_info[modname] = None 88 | 89 | return deps_info 90 | 91 | 92 | def show_versions(): 93 | """Print useful debugging information""" 94 | sys_info = _get_sys_info() 95 | deps_info = _get_deps_info() 96 | 97 | print('\nSystem:') 98 | for k, stat in sys_info.items(): 99 | print("{k:>10}: {stat}".format(k=k, stat=stat)) 100 | 101 | print('\nPython dependencies:') 102 | for k, stat in deps_info.items(): 103 | print("{k:>11}: {stat}".format(k=k, stat=stat)) 104 | -------------------------------------------------------------------------------- /pmdarima/utils/metaestimators.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Author: Taylor Smith 4 | # 5 | # Metaestimators for the ARIMA class. These classes are derived from the 6 | # sklearn metaestimators, but adapted for more specific use with pmdarima. 7 | 8 | from operator import attrgetter 9 | from functools import update_wrapper 10 | 11 | __all__ = [ 12 | 'if_has_delegate' 13 | ] 14 | 15 | 16 | class _IffHasDelegate(object): 17 | """Implements a conditional property using the descriptor protocol. 18 | Using this class to create a decorator will raise an ``AttributeError`` 19 | if none of the delegates (specified in ``delegate_names``) is an attribute 20 | of the base object or the first found delegate does not have an attribute 21 | ``attribute_name``. 22 | 23 | This allows ducktyping of the decorated method based on 24 | ``delegate.attribute_name``. Here ``delegate`` is the first item in 25 | ``delegate_names`` for which ``hasattr(object, delegate) is True``. 26 | 27 | See https://docs.python.org/3/howto/descriptor.html for an explanation of 28 | descriptors. 29 | """ 30 | def __init__(self, fn, delegate_names): 31 | self.fn = fn 32 | self.delegate_names = delegate_names 33 | 34 | # update the docstring of the descriptor 35 | update_wrapper(self, fn) 36 | 37 | def __get__(self, obj, type=None): 38 | # raise an AttributeError if the attribute is not present on the object 39 | if obj is not None: 40 | # delegate only on instances, not the classes. 41 | # this is to allow access to the docstrings. 42 | for delegate_name in self.delegate_names: 43 | try: 44 | attrgetter(delegate_name)(obj) 45 | except AttributeError: 46 | continue 47 | else: 48 | break 49 | else: 50 | attrgetter(self.delegate_names[-1])(obj) 51 | 52 | # lambda, but not partial, allows help() to work with update_wrapper 53 | out = (lambda *args, **kwargs: self.fn(obj, *args, **kwargs)) 54 | # update the docstring of the returned function 55 | update_wrapper(out, self.fn) 56 | return out 57 | 58 | 59 | def if_has_delegate(delegate): 60 | """Wrap a delegated instance attribute function. 61 | 62 | Creates a decorator for methods that are delegated in the presence of a 63 | results wrapper. This enables duck-typing by ``hasattr`` returning True 64 | according to the sub-estimator. 65 | 66 | This function was adapted from scikit-learn, which defines 67 | ``if_delegate_has_method``, but operates differently by injecting methods 68 | not based on method presence, but by delegate presence. 69 | 70 | Examples 71 | -------- 72 | >>> from pmdarima.utils.metaestimators import if_has_delegate 73 | >>> 74 | >>> class A(object): 75 | ... @if_has_delegate('d') 76 | ... def func(self): 77 | ... return True 78 | >>> 79 | >>> a = A() 80 | >>> # the delegate does not exist yet 81 | >>> assert not hasattr(a, 'func') 82 | >>> # inject the attribute 83 | >>> a.d = None 84 | >>> assert hasattr(a, 'func') and a.func() 85 | 86 | Parameters 87 | ---------- 88 | delegate : string, list of strings or tuple of strings 89 | Name of the sub-estimator that can be accessed as an attribute of the 90 | base object. If a list or a tuple of names are provided, the first 91 | sub-estimator that is an attribute of the base object will be used. 92 | """ 93 | if isinstance(delegate, list): 94 | delegate = tuple(delegate) 95 | if not isinstance(delegate, tuple): 96 | delegate = (delegate,) 97 | 98 | return lambda fn: _IffHasDelegate(fn, delegate) 99 | -------------------------------------------------------------------------------- /pmdarima/utils/setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | 5 | import numpy 6 | from numpy.distutils.misc_util import Configuration 7 | 8 | from pmdarima._build_utils import get_blas_info 9 | 10 | 11 | def configuration(parent_package="", top_path=None): 12 | cblas_libs, blas_info = get_blas_info() 13 | 14 | # Use this rather than cblas_libs so we don't fail on Windows 15 | libraries = [] 16 | if os.name == 'posix': 17 | cblas_libs.append('m') 18 | libraries.append('m') 19 | 20 | config = Configuration("utils", parent_package, top_path) 21 | config.add_extension("_array", 22 | sources=["_array.pyx"], 23 | include_dirs=[numpy.get_include(), 24 | blas_info.pop('include_dirs', [])], 25 | libraries=libraries, 26 | extra_compile_args=blas_info.pop( 27 | 'extra_compile_args', []), 28 | **blas_info) 29 | 30 | config.add_subpackage('tests') 31 | 32 | return config 33 | 34 | 35 | if __name__ == "__main__": 36 | from numpy.distutils.core import setup 37 | setup(**configuration().todict()) 38 | -------------------------------------------------------------------------------- /pmdarima/utils/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alkaline-ml/pmdarima/64cee30970eec69ff689e1c68a2db41613baf0c3/pmdarima/utils/tests/__init__.py -------------------------------------------------------------------------------- /pmdarima/utils/tests/test_meta.py: -------------------------------------------------------------------------------- 1 | 2 | from pmdarima.utils.metaestimators import if_has_delegate 3 | 4 | 5 | class _IfHasDelegateTester(object): 6 | def __init__(self): 7 | pass 8 | 9 | def fit(self): 10 | self.a_ = None 11 | return self 12 | 13 | @if_has_delegate('a_') 14 | def predict(self): 15 | return True 16 | 17 | @if_has_delegate(['b_', 'a_']) 18 | def predict2(self): 19 | return True 20 | 21 | 22 | def test_single_delegate(): 23 | # show it passes for a "fit" 24 | assert _IfHasDelegateTester().fit().predict() 25 | assert not hasattr(_IfHasDelegateTester(), 'predict') 26 | 27 | 28 | def test_multiple_delegates(): 29 | assert _IfHasDelegateTester().fit().predict2() 30 | assert not hasattr(_IfHasDelegateTester(), 'predict2') 31 | -------------------------------------------------------------------------------- /pmdarima/utils/tests/test_show_versions.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pmdarima as pm 4 | from pmdarima.utils._show_versions import _get_deps_info 5 | 6 | 7 | # Just show it doesn't blow up... 8 | def test_show_versions(): 9 | pm.show_versions() 10 | 11 | 12 | def test_show_versions_when_not_present(): 13 | deps = ['big-ol-fake-pkg'] 14 | assert _get_deps_info(deps=deps)['big-ol-fake-pkg'] is None 15 | -------------------------------------------------------------------------------- /pmdarima/utils/tests/test_vis.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from unittest.mock import patch 4 | import numpy as np 5 | import pytest 6 | 7 | 8 | class MockACPlot: 9 | def __init__(self, series): 10 | self.series = series 11 | self.showed = False 12 | 13 | def show(self): 14 | self.showed = True 15 | return self 16 | 17 | 18 | class MockPlottable: 19 | def __init__(self): 20 | self.showed = False 21 | 22 | def show(self): 23 | self.showed = True 24 | return self 25 | 26 | 27 | # ACF/PACF 28 | class MockTSAPlots: 29 | plot_acf = plot_pacf = (lambda **kwargs: MockPlottable()) 30 | 31 | 32 | # TODO: can we get this to work eventually? 33 | if False: 34 | @pytest.mark.parametrize('show', [True, False]) 35 | def test_visualizations(show): 36 | with patch('statsmodels.graphics.tsaplots', MockTSAPlots): 37 | 38 | # Have to import AFTER tha patch, since the pm.__init__ will 39 | # promptly import the visualization suite, which overwrites the 40 | # patch 41 | from pmdarima.utils import visualization 42 | dataset = np.random.RandomState(42).rand(150) 43 | 44 | # ac_plot = pm.autocorr_plot(dataset, show=show) 45 | acf_plot = visualization.plot_acf(dataset, show=show) 46 | pacf_plot = visualization.plot_pacf(dataset, show=show) 47 | 48 | # assert ac_plot.showed is show 49 | assert acf_plot.showed is show 50 | assert pacf_plot.showed is show 51 | -------------------------------------------------------------------------------- /pmdarima/utils/tests/test_wrapped.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pmdarima as pm 4 | from pmdarima.utils.wrapped import acf, pacf 5 | 6 | import statsmodels.api as sm 7 | import numpy as np 8 | 9 | import pytest 10 | 11 | y = pm.datasets.load_wineind() 12 | 13 | 14 | @pytest.mark.parametrize( 15 | 'wrapped_func,native_func', [ 16 | pytest.param(sm.tsa.stattools.acf, acf), 17 | pytest.param(sm.tsa.stattools.pacf, pacf) 18 | ]) 19 | def test_wrapped_functions(wrapped_func, native_func): 20 | sm_res = wrapped_func(y) # type: np.ndarray 21 | pm_res = native_func(y) 22 | assert np.allclose(sm_res, pm_res) 23 | 24 | # Show the docstrings are the same 25 | assert wrapped_func.__doc__ == native_func.__doc__ 26 | -------------------------------------------------------------------------------- /pmdarima/utils/wrapped.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Taylor G Smith 4 | # 5 | # Wrapped functions 6 | from functools import wraps 7 | import warnings 8 | 9 | from statsmodels.tsa.stattools import acf as sm_acf, pacf as sm_pacf 10 | 11 | __all__ = [ 12 | 'acf', 13 | 'pacf' 14 | ] 15 | 16 | # TODO: remove all explicit args/kwargs, making them *args, **kwargs 17 | 18 | 19 | def inheritdoc(parent): 20 | """Inherit documentation from a parent 21 | 22 | Parameters 23 | ---------- 24 | parent : callable 25 | The parent function or class that contains the sought-after 26 | docstring. If it doesn't have a docstring, this might behave 27 | in unexpected ways. 28 | 29 | Examples 30 | -------- 31 | >>> def a(x=1): 32 | ... '''This is documentation''' 33 | ... return x 34 | ... 35 | >>> @inheritdoc(a) 36 | ... def b(x): 37 | ... return 2 * a(x) 38 | ... 39 | >>> print(b.__doc__) 40 | This is documentation 41 | 42 | >>> print(b(2)) 43 | 4 44 | """ 45 | def wrapper(func): 46 | # Assign the parent docstring to the child 47 | func.__doc__ = parent.__doc__ 48 | 49 | @wraps(func) 50 | def caller(*args, **kwargs): 51 | return func(*args, **kwargs) 52 | return caller 53 | return wrapper 54 | 55 | 56 | @inheritdoc(parent=sm_acf) 57 | def acf( 58 | x, 59 | nlags=None, 60 | qstat=False, 61 | fft=None, 62 | alpha=None, 63 | missing='none', 64 | adjusted=False, 65 | ): 66 | return sm_acf( 67 | x=x, 68 | nlags=nlags, 69 | qstat=qstat, 70 | fft=fft, 71 | alpha=alpha, 72 | missing=missing, 73 | adjusted=adjusted, 74 | ) 75 | 76 | 77 | @inheritdoc(parent=sm_pacf) 78 | def pacf(x, nlags=None, method='ywadjusted', alpha=None): 79 | # Handle kwarg deprecation in statsmodels 0.13.0 80 | if "unbiased" in method: 81 | warnings.warn( 82 | "The `*unbiased` methods have been deprecated in " 83 | "statsmodels >= 0.13.0. Please use `*adjusted` instead.", 84 | DeprecationWarning 85 | ) 86 | method = method.replace("unbiased", "adjusted") 87 | elif method in ("ydu", "ywu", "ldu"): 88 | warnings.warn( 89 | "The `ydu`, `ywu`, and `ldu` methods have been deprecated in " 90 | "statsmodels >= 0.13.0. Please use `yda`, `ywa`, and `lda` " 91 | "instead.", 92 | DeprecationWarning 93 | ) 94 | method = method.replace("u", "a") 95 | 96 | return sm_pacf(x=x, nlags=nlags, method=method, alpha=alpha) 97 | -------------------------------------------------------------------------------- /pmdarima/warnings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | __all__ = [ 4 | 'ModelFitWarning' 5 | ] 6 | 7 | 8 | class ModelFitWarning(UserWarning): 9 | """Generic warning used for a model fit that might fail. More descriptive 10 | than simply trying to lump everything into a default UserWarning, which 11 | gives the user no insight into the reason for the warning apart from a 12 | (potentially) cryptic message. This allows the user to understand the 13 | warning emanates from an attempted model fit and originates from within 14 | the pmdarima package. 15 | """ 16 | pass 17 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "numpy==1.21.2; python_version < '3.10'", 4 | "numpy==1.21.6; python_version == '3.10' and platform_system != 'Windows'", 5 | "numpy==1.22.3; python_version == '3.10' and platform_system == 'Windows'", 6 | "numpy==1.23.2; python_version == '3.11'", 7 | "numpy==1.26.0; python_version == '3.12'", 8 | "scipy==1.3.2; python_version <= '3.8' and platform_machine != 'aarch64'", 9 | "scipy==1.5.3; python_version <= '3.8' and platform_machine == 'aarch64'", 10 | "scipy==1.5.4; python_version == '3.9'", 11 | "scipy==1.7.2; python_version == '3.10'", 12 | "scipy==1.9.3; python_version == '3.11'", 13 | "scipy==1.11.2; python_version == '3.12'", 14 | "statsmodels==0.13.2; python_version <= '3.10'", 15 | "statsmodels==0.13.3; python_version == '3.11'", 16 | "statsmodels==0.14.0; python_version == '3.12'", 17 | "cython>=0.29,!=0.29.18,!=0.29.31", 18 | "setuptools", 19 | ] 20 | build-backend = "setuptools.build_meta:__legacy__" # TODO: Don't use legacy backend 21 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | joblib>=0.11 2 | Cython>=0.29,!=0.29.18,!=0.29.31 3 | numpy>=1.21.2,<2.0.0 4 | pandas>=0.19 5 | scikit-learn>=0.22 6 | scipy>=1.3.2 7 | statsmodels>=0.13.2 8 | urllib3 9 | setuptools>=38.6.0,!=50.0.0 10 | packaging>=17.1 # Bundled with setuptools, but want to be explicit 11 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | # This is adapted from sklearn's setup cfg. 2 | 3 | [aliases] 4 | test = pytest 5 | 6 | [tool:pytest] 7 | filterwarnings = 8 | # Warnings that we raise: 9 | ignore::UserWarning 10 | 11 | # Warnings that statsmodels raises a lot of 12 | ignore::statsmodels.tools.sm_exceptions.HessianInversionWarning 13 | 14 | # statsmodels warning that doesn't apply to us 15 | ignore:fft=True 16 | 17 | # joblib warning that we can't control (fixed in this commit https://github.com/joblib/joblib/commit/a861f43167ab63fe683c45679e34143751cb976d, but not deployed yet) 18 | # This is the solution from pytest: https://github.com/pytest-dev/pytest/issues/4116#issuecomment-429101898 19 | ignore:tostring.*is deprecated 20 | 21 | # This is fixed in patsy... No idea which dependency is actually calling patsy (https://github.com/pydata/patsy/blob/4c613d0ad3009044ca3aee5a5d70bd56af8f396b/patsy/constraint.py#L13-L16) 22 | ignore:Using or importing the ABCs 23 | 24 | [metadata] 25 | description-file = README.md 26 | --------------------------------------------------------------------------------