├── .circleci └── config.yml ├── .gitattributes ├── .github ├── ISSUE_TEMPLATE │ ├── bug-report.yml │ ├── config.yml │ ├── documentation-issue.yml │ └── feature-request.yml ├── dependabot.yml ├── release-drafter.yml └── workflows │ ├── build-docs.yaml │ ├── ci.yml │ ├── lint.yml │ ├── no-response.yaml │ └── python-publish.yml ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── action_files ├── imports_with_code.py └── test_models │ ├── requirements.txt │ └── src │ ├── data.py │ ├── evaluation.py │ ├── evaluation_temporal.py │ ├── models.py │ └── models_temporal.py ├── environment.yml ├── experiments ├── hierarchical_baselines │ ├── README.md │ ├── environment.yml │ ├── nbs │ │ ├── run_favorita_baselines.ipynb │ │ ├── run_favorita_hiere2e.ipynb │ │ └── run_hiere2e.ipynb │ └── src │ │ ├── run_baselines.py │ │ └── run_favorita_baselines.py └── libs-comparison │ ├── README.md │ ├── environment.yml │ ├── nbs │ └── parse-results.ipynb │ ├── results.png │ └── src │ ├── data.py │ ├── fable.R │ ├── sktime.py │ └── statsforecast.py ├── hierarchicalforecast ├── __init__.py ├── _modidx.py ├── _nbdev.py ├── core.py ├── evaluation.py ├── methods.py ├── probabilistic_methods.py └── utils.py ├── nbs ├── .gitattributes ├── .gitignore ├── _quarto.yml ├── custom.yml ├── examples │ ├── .nodoc │ ├── .notest │ ├── AustralianDomesticTourism-Bootstraped-Intervals.ipynb │ ├── AustralianDomesticTourism-Intervals.ipynb │ ├── AustralianDomesticTourism-Permbu-Intervals.ipynb │ ├── AustralianDomesticTourism.ipynb │ ├── AustralianDomesticTourismCrossTemporal.ipynb │ ├── AustralianDomesticTourismTemporal.ipynb │ ├── AustralianPrisonPopulation.ipynb │ ├── Installation.ipynb │ ├── Introduction.ipynb │ ├── LocalGlobalAggregation.ipynb │ ├── M3withThief.ipynb │ ├── MLFrameworksExample.ipynb │ ├── NonNegativeReconciliation.ipynb │ ├── TourismLarge-Evaluation.ipynb │ ├── TourismSmall.ipynb │ ├── TourismSmallPolars.ipynb │ ├── imgs │ │ ├── AustralianDomesticTourism-results-fable.png │ │ ├── AustralianPrisonPopulation-results-fable.png │ │ ├── hierarchical_motivation1.png │ │ └── hierarchical_motivation2.png │ └── index.qmd ├── favicon_png.png ├── index.ipynb ├── mint.json ├── nbdev.yml ├── sidebar.yml ├── src │ ├── core.ipynb │ ├── evaluation.ipynb │ ├── methods.ipynb │ ├── probabilistic_methods.ipynb │ └── utils.ipynb └── styles.css ├── pyproject.toml ├── settings.ini ├── setup.py └── tests ├── __init__.py └── test_benchmark.py /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | jobs: 3 | nbdev-tests: 4 | resource_class: xlarge 5 | docker: 6 | - image: python:3.10-slim 7 | steps: 8 | - checkout 9 | - run: 10 | name: Install dependencies 11 | command: | 12 | pip install uv 13 | uv venv --python 3.10 14 | - run: 15 | name: Run nbdev tests 16 | command: | 17 | source .venv/bin/activate 18 | uv pip install ".[dev]" 19 | nbdev_test --do_print --timing 20 | test-model-performance: 21 | resource_class: large 22 | docker: 23 | - image: python:3.10-slim 24 | steps: 25 | - checkout 26 | - run: 27 | name: Install dependencies 28 | command: | 29 | pip install uv 30 | uv venv --python 3.10 31 | - run: 32 | name: Run model performance tests 33 | command: | 34 | source .venv/bin/activate 35 | uv pip install ".[dev]" 36 | cd ./action_files/test_models/ 37 | uv pip install -r requirements.txt 38 | python -m src.models 39 | python -m src.evaluation 40 | cd ../../ 41 | - store_artifacts: 42 | path: ./action_files/test_models/data/evaluation.csv 43 | destination: evaluation.csv 44 | test-model-performance-temporal: 45 | resource_class: large 46 | docker: 47 | - image: python:3.10-slim 48 | steps: 49 | - checkout 50 | - run: 51 | name: Install dependencies 52 | command: | 53 | pip install uv 54 | uv venv --python 3.10 55 | - run: 56 | name: Run model performance tests 57 | command: | 58 | source .venv/bin/activate 59 | uv pip install ".[dev]" 60 | cd ./action_files/test_models/ 61 | uv pip install -r requirements.txt 62 | python -m src.models_temporal 63 | python -m src.evaluation_temporal 64 | cd ../../ 65 | - store_artifacts: 66 | path: ./action_files/test_models/data/evaluation.csv 67 | destination: evaluation.csv 68 | workflows: 69 | sample: 70 | jobs: 71 | - nbdev-tests 72 | - test-model-performance 73 | - test-model-performance-temporal -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.ipynb merge=nbdev-merge 2 | nbs/** linguist-documentation 3 | experiments/** linguist-documentation 4 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-report.yml: -------------------------------------------------------------------------------- 1 | name: Bug report 2 | title: "[] " 3 | description: Problems and issues with code of the library 4 | labels: [bug] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | Thank you for reporting the problem! 10 | Please make sure what you are reporting is a bug with reproducible steps. To ask questions 11 | or share ideas, please post on our [Slack community](https://join.slack.com/t/nixtlacommunity/shared_invite/zt-1h77esh5y-iL1m8N0F7qV1HmH~0KYeAQ) instead. 12 | 13 | - type: textarea 14 | attributes: 15 | label: What happened + What you expected to happen 16 | description: Describe 1. the bug 2. expected behavior 3. useful information (e.g., logs) 17 | placeholder: > 18 | Please provide the context in which the problem occurred and explain what happened. Further, 19 | please also explain why you think the behaviour is erroneous. It is extremely helpful if you can 20 | copy and paste the fragment of logs showing the exact error messages or wrong behaviour here. 21 | 22 | **NOTE**: please copy and paste texts instead of taking screenshots of them for easy future search. 23 | validations: 24 | required: true 25 | 26 | - type: textarea 27 | attributes: 28 | label: Versions / Dependencies 29 | description: Please specify the versions of the library, Python, OS, and other libraries that are used. 30 | placeholder: > 31 | Please specify the versions of dependencies. 32 | validations: 33 | required: true 34 | 35 | - type: textarea 36 | attributes: 37 | label: Reproduction script 38 | description: > 39 | Please provide a reproducible script. Providing a narrow reproduction (minimal / no external dependencies) will 40 | help us triage and address issues in the timely manner! 41 | placeholder: > 42 | Please provide a short code snippet (less than 50 lines if possible) that can be copy-pasted to 43 | reproduce the issue. The snippet should have **no external library dependencies** 44 | (i.e., use fake or mock data / environments). 45 | 46 | **NOTE**: If the code snippet cannot be run by itself, the issue will be marked as "needs-repro-script" 47 | until the repro instruction is updated. 48 | validations: 49 | required: true 50 | 51 | - type: dropdown 52 | attributes: 53 | label: Issue Severity 54 | description: | 55 | How does this issue affect your experience as user? 56 | multiple: false 57 | options: 58 | - "Low: It annoys or frustrates me." 59 | - "Medium: It is a significant difficulty but I can work around it." 60 | - "High: It blocks me from completing my task." 61 | validations: 62 | required: false 63 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | contact_links: 3 | - name: Ask a question or get support 4 | url: https://join.slack.com/t/nixtlacommunity/shared_invite/zt-1h77esh5y-iL1m8N0F7qV1HmH~0KYeAQ 5 | about: Ask a question or request support for using a library of the nixtlaverse 6 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/documentation-issue.yml: -------------------------------------------------------------------------------- 1 | name: Documentation 2 | title: "[] " 3 | description: Report an issue with the library documentation 4 | labels: [documentation] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: Thank you for helping us improve the library documentation! 9 | 10 | - type: textarea 11 | attributes: 12 | label: Description 13 | description: | 14 | Tell us about the change you'd like to see. For example, "I'd like to 15 | see more examples of how to use `cross_validation`." 16 | validations: 17 | required: true 18 | 19 | - type: textarea 20 | attributes: 21 | label: Link 22 | description: | 23 | If the problem is related to an existing section, please add a link to 24 | the section. 25 | validations: 26 | required: false 27 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.yml: -------------------------------------------------------------------------------- 1 | name: Library feature request 2 | description: Suggest an idea for a project 3 | title: "[] " 4 | labels: [enhancement, feature] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | Thank you for finding the time to propose a new feature! 10 | We really appreciate the community efforts to improve the nixtlaverse. 11 | 12 | - type: textarea 13 | attributes: 14 | label: Description 15 | description: A short description of your feature 16 | 17 | - type: textarea 18 | attributes: 19 | label: Use case 20 | description: > 21 | Describe the use case of your feature request. It will help us understand and 22 | prioritize the feature request. 23 | placeholder: > 24 | Rather than telling us how you might implement this feature, try to take a 25 | step back and describe what you are trying to achieve. 26 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: github-actions 4 | directory: / 5 | schedule: 6 | interval: weekly 7 | groups: 8 | ci-dependencies: 9 | patterns: ["*"] 10 | -------------------------------------------------------------------------------- /.github/release-drafter.yml: -------------------------------------------------------------------------------- 1 | name-template: 'v$NEXT_PATCH_VERSION' 2 | tag-template: 'v$NEXT_PATCH_VERSION' 3 | categories: 4 | - title: 'New Features' 5 | label: 'feature' 6 | - title: 'Enhancement' 7 | label: 'enhancement' 8 | - title: 'Bug Fixes' 9 | label: 'fix' 10 | - title: 'Breaking Change' 11 | label: 'breaking change' 12 | - title: 'Documentation' 13 | label: 'documentation' 14 | - title: 'Dependencies' 15 | label: 'dependencies' 16 | change-template: '- $TITLE @$AUTHOR (#$NUMBER)' 17 | template: | 18 | ## Changes 19 | $CHANGES 20 | -------------------------------------------------------------------------------- /.github/workflows/build-docs.yaml: -------------------------------------------------------------------------------- 1 | name: "build-docs" 2 | on: 3 | release: 4 | types: [released] 5 | pull_request: 6 | branches: ["main"] 7 | workflow_dispatch: 8 | 9 | defaults: 10 | run: 11 | shell: bash 12 | 13 | jobs: 14 | build-docs: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - name: Clone repo 18 | uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 19 | - name: Clone docs repo 20 | uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 21 | with: 22 | repository: Nixtla/docs 23 | ref: scripts 24 | path: docs-scripts 25 | - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # 5.6.0 26 | with: 27 | python-version: '3.10' 28 | cache-dependency-path: settings.ini 29 | 30 | - name: Install dependencies 31 | run: pip install uv && uv pip install ".[dev]" --system 32 | 33 | - name: Build docs 34 | run: | 35 | mkdir nbs/_extensions 36 | cp -r docs-scripts/mintlify/ nbs/_extensions/ 37 | python docs-scripts/update-quarto.py 38 | nbdev_docs 39 | 40 | - name: Apply final formats 41 | run: bash ./docs-scripts/docs-final-formatting.bash 42 | 43 | - name: Copy over necessary assets 44 | run: | 45 | cp nbs/mint.json _docs/mint.json 46 | cp docs-scripts/imgs/* _docs/ 47 | 48 | - name: Deploy to Mintlify Docs 49 | if: | 50 | github.event_name == 'release' || 51 | github.event_name == 'workflow_dispatch' 52 | uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 53 | with: 54 | github_token: ${{ secrets.GITHUB_TOKEN }} 55 | publish_branch: docs 56 | publish_dir: ./_docs 57 | user_name: github-actions[bot] 58 | user_email: 41898282+github-actions[bot]@users.noreply.github.com 59 | 60 | - name: Trigger mintlify workflow 61 | if: | 62 | github.event_name == 'release' || 63 | github.event_name == 'workflow_dispatch' 64 | uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 65 | with: 66 | github-token: ${{ secrets.DOCS_WORKFLOW_TOKEN }} 67 | script: | 68 | await github.rest.actions.createWorkflowDispatch({ 69 | owner: 'nixtla', 70 | repo: 'docs', 71 | workflow_id: 'mintlify-action.yml', 72 | ref: 'main', 73 | }); 74 | 75 | - name: Configure redirects for gh-pages 76 | run: python docs-scripts/configure-redirects.py hierarchicalforecast 77 | 78 | - name: Deploy to Github Pages 79 | if: | 80 | github.event_name == 'release' || 81 | github.event_name == 'workflow_dispatch' 82 | uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 83 | with: 84 | github_token: ${{ secrets.GITHUB_TOKEN }} 85 | publish_branch: gh-pages 86 | publish_dir: ./gh-pages 87 | user_name: github-actions[bot] 88 | user_email: 41898282+github-actions[bot]@users.noreply.github.com 89 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.ref }} 11 | cancel-in-progress: true 12 | 13 | jobs: 14 | run-tests: 15 | runs-on: ${{ matrix.os }} 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | os: [ubuntu-latest, macos-latest, windows-latest] 20 | python-version: ["3.9", "3.10", "3.11", "3.12"] 21 | steps: 22 | - name: Clone repo 23 | uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 24 | 25 | - name: Set up environment 26 | uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 27 | with: 28 | python-version: ${{ matrix.python-version }} 29 | 30 | - name: Install pip requirements 31 | run: pip install uv && uv pip install --system ".[dev]" 32 | 33 | - name: Tests 34 | run: nbdev_test --do_print --timing --n_workers 0 35 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Lint 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | jobs: 10 | lint: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Clone repo 14 | uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 15 | 16 | - name: Set up python 17 | uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # 5.6.0 18 | with: 19 | python-version: 3.9 20 | 21 | - name: Install dependencies 22 | run: pip install black nbdev pre-commit 23 | 24 | - name: Run pre-commit 25 | run: pre-commit run --files hierarchicalforecast/* -------------------------------------------------------------------------------- /.github/workflows/no-response.yaml: -------------------------------------------------------------------------------- 1 | name: No Response Bot 2 | 3 | on: 4 | issue_comment: 5 | types: [created] 6 | schedule: 7 | - cron: '0 4 * * *' 8 | 9 | jobs: 10 | noResponse: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: lee-dohm/no-response@9bb0a4b5e6a45046f00353d5de7d90fb8bd773bb # v0.5.0 14 | with: 15 | closeComment: > 16 | This issue has been automatically closed because it has been awaiting a response for too long. 17 | When you have time to to work with the maintainers to resolve this issue, please post a new comment and it will be re-opened. 18 | If the issue has been locked for editing by the time you return to it, please open a new issue and reference this one. 19 | daysUntilClose: 30 20 | responseRequiredLabel: awaiting response 21 | token: ${{ github.token }} 22 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | # This workflow uses actions that are not certified by GitHub. 5 | # They are provided by a third-party and are governed by 6 | # separate terms of service, privacy policy, and support 7 | # documentation. 8 | 9 | name: Upload Python Package 10 | 11 | on: 12 | release: 13 | types: [published] 14 | 15 | permissions: 16 | contents: read 17 | 18 | jobs: 19 | deploy: 20 | 21 | runs-on: ubuntu-latest 22 | 23 | steps: 24 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 25 | - name: Set up Python 26 | uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # 5.6.0 27 | with: 28 | python-version: '3.x' 29 | - name: Install dependencies 30 | run: | 31 | python -m pip install --upgrade pip 32 | pip install build 33 | - name: Build package 34 | run: python -m build 35 | - name: Publish package 36 | uses: pypa/gh-action-pypi-publish@76f52bc884231f62b9a034ebfe128415bbaabdfc # v1.12.4 37 | with: 38 | user: __token__ 39 | password: ${{ secrets.PYPI_API_TOKEN }} 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | .last_checked 31 | .gitattributes 32 | .gitconfig 33 | .DS_Store 34 | quarto-macos.pkg 35 | action_files/test_models/data/ 36 | 37 | # PyInstaller 38 | # Usually these files are written by a python script from a template 39 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 40 | *.manifest 41 | *.spec 42 | 43 | # Installer logs 44 | pip-log.txt 45 | pip-delete-this-directory.txt 46 | 47 | # Unit test / coverage reports 48 | htmlcov/ 49 | .tox/ 50 | .nox/ 51 | .coverage 52 | .coverage.* 53 | .cache 54 | nosetests.xml 55 | coverage.xml 56 | *.cover 57 | *.py,cover 58 | .hypothesis/ 59 | .pytest_cache/ 60 | 61 | # Translations 62 | *.mo 63 | *.pot 64 | 65 | # Django stuff: 66 | *.log 67 | local_settings.py 68 | db.sqlite3 69 | db.sqlite3-journal 70 | 71 | # Flask stuff: 72 | instance/ 73 | .webassets-cache 74 | 75 | # Scrapy stuff: 76 | .scrapy 77 | 78 | # Sphinx documentation 79 | docs/_build/ 80 | 81 | # PyBuilder 82 | target/ 83 | 84 | # Jupyter Notebook 85 | .ipynb_checkpoints 86 | 87 | # IPython 88 | profile_default/ 89 | ipython_config.py 90 | 91 | # pyenv 92 | .python-version 93 | 94 | # pipenv 95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 98 | # install all needed dependencies. 99 | #Pipfile.lock 100 | 101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 102 | __pypackages__/ 103 | 104 | # Celery stuff 105 | celerybeat-schedule 106 | celerybeat.pid 107 | 108 | # SageMath parsed files 109 | *.sage.py 110 | 111 | # Environments 112 | .env 113 | .venv 114 | env/ 115 | venv/ 116 | ENV/ 117 | env.bak/ 118 | venv.bak/ 119 | 120 | # Spyder project settings 121 | .spyderproject 122 | .spyproject 123 | 124 | # Rope project settings 125 | .ropeproject 126 | 127 | # mkdocs documentation 128 | /site 129 | 130 | # mypy 131 | .mypy_cache/ 132 | .dmypy.json 133 | dmypy.json 134 | 135 | # Pyre type checker 136 | .pyre/ 137 | 138 | *.csv 139 | *.xls 140 | *.p 141 | *.zip 142 | */data/* 143 | 144 | Gemfile.lock 145 | _docs/ 146 | sidebar.yml 147 | _proc/ 148 | 149 | # VS Code project settings 150 | .vscode 151 | /tmp -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "docs/assets"] 2 | path = docs/assets 3 | url = https://github.com/Nixtla/styles.git 4 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | fail_fast: true 2 | 3 | repos: 4 | - repo: local 5 | hooks: 6 | - id: imports_with_code 7 | name: Cells with imports and code 8 | entry: python action_files/imports_with_code.py 9 | language: system 10 | - repo: https://github.com/fastai/nbdev 11 | rev: 2.2.10 12 | hooks: 13 | - id: nbdev_clean 14 | - id: nbdev_export 15 | - repo: https://github.com/astral-sh/ruff-pre-commit 16 | rev: v0.2.1 17 | hooks: 18 | - id: ruff 19 | - repo: https://github.com/pre-commit/mirrors-mypy 20 | rev: v1.8.0 21 | hooks: 22 | - id: mypy 23 | args: [--ignore-missing-imports] 24 | exclude: (experiments|setup.py) -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | ops@nixtla.io. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | https://www.contributor-covenant.org/faq. Translations are available at 128 | https://www.contributor-covenant.org/translations. 129 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to contribute 2 | 3 | ## Did you find a bug? 4 | 5 | * Ensure the bug was not already reported by searching on GitHub under Issues. 6 | * If you're unable to find an open issue addressing the problem, open a new one. Be sure to include a title and clear description, as much relevant information as possible, and a code sample or an executable test case demonstrating the expected behavior that is not occurring. 7 | * Be sure to add the complete error messages. 8 | 9 | ## Do you have a feature request? 10 | 11 | * Ensure that it hasn't been yet implemented in the `main` branch of the repository and that there's not an Issue requesting it yet. 12 | * Open a new issue and make sure to describe it clearly, mention how it improves the project and why its useful. 13 | 14 | ## Do you want to fix a bug or implement a feature? 15 | 16 | Bug fixes and features are added through pull requests (PRs). 17 | 18 | ## PR submission guidelines 19 | 20 | * Keep each PR focused. While it's more convenient, do not combine several unrelated fixes together. Create as many branches as needing to keep each PR focused. 21 | * Do not mix style changes/fixes with "functional" changes. It's very difficult to review such PRs and it most likely get rejected. 22 | * Do not add/remove vertical whitespace. Preserve the original style of the file you edit as much as you can. 23 | * Do not turn an already submitted PR into your development playground. If after you submitted PR, you discovered that more work is needed - close the PR, do the required work and then submit a new PR. Otherwise each of your commits requires attention from maintainers of the project. 24 | * If, however, you submitted a PR and received a request for changes, you should proceed with commits inside that PR, so that the maintainer can see the incremental fixes and won't need to review the whole PR again. In the exception case where you realize it'll take many many commits to complete the requests, then it's probably best to close the PR, do the work and then submit it again. Use common sense where you'd choose one way over another. 25 | 26 | ### Local setup for working on a PR 27 | 28 | #### Clone the repository 29 | * HTTPS: `git clone https://github.com/Nixtla/hierarchicalforecast.git` 30 | * SSH: `git clone git@github.com:Nixtla/hierarchicalforecast.git` 31 | * GitHub CLI: `gh repo clone Nixtla/hierarchicalforecast` 32 | 33 | #### Set up a conda environment 34 | The repo comes with an `environment.yml` file which contains the libraries needed to run all the tests. In order to set up the environment you must have `conda` installed, we recommend [miniconda](https://docs.conda.io/en/latest/miniconda.html). 35 | 36 | Once you have `conda` go to the top level directory of the repository and run the following lines: 37 | ``` 38 | conda create -n hierarchicalforecast python=3.10 39 | conda activate hierarchicalforecast 40 | ``` 41 | Then, run one of the following commands: 42 | ``` 43 | conda env update -f environment.yml 44 | ``` 45 | 46 | #### Install the library 47 | Once you have your environment setup, activate it using `conda activate hierarchicalforecast` and then install the library in editable mode using `pip install -e ".[dev]"` 48 | 49 | #### Install git hooks 50 | Before doing any changes to the code, please install the git hooks and checks that run automatic scripts during each commit and merge to strip the notebooks of superfluous metadata (and avoid merge conflicts). 51 | ``` 52 | nbdev_install_hooks 53 | pre-commit install 54 | ``` 55 | 56 | ### Preview Changes 57 | You can preview changes in your local browser before pushing by using the `nbdev_preview`. 58 | 59 | ### Build the library 60 | The library is built using the notebooks contained in the `nbs` folder. If you want to make any changes to the library you have to find the relevant notebook, make your changes and then call 61 | ``` 62 | nbdev_export 63 | ``` 64 | 65 | ### Run tests 66 | If you're working on the local interface you can just use `nbdev_test --n_workers 1 --do_print --timing`. 67 | 68 | ### Clean notebook's outputs. 69 | Since the notebooks output cells can vary from run to run (even if they produce the same outputs) the notebooks are cleaned before committing them. Please make sure to run `nbdev_clean --clear_all` before committing your changes. If you clean the library's notebooks with this command please backtrack the changes you make to the example notebooks `git checkout nbs/examples`, unless you intend to change the examples. 70 | 71 | 72 | ## Do you want to contribute to the documentation? 73 | 74 | * Docs are automatically created from the notebooks in the `nbs` folder. 75 | * In order to modify the documentation: 76 | 1. Find the relevant notebook. 77 | 2. Make your changes. 78 | 3. Run all cells. 79 | 4. If you are modifying library notebooks (not in `nbs/examples`), clean all outputs using `Edit > Clear All Outputs`. 80 | 5. Run `nbdev_preview`. 81 | 6. Clean the notebook metadata using `nbdev_clean`. 82 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2022 Nixtla 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include settings.ini 2 | include LICENSE 3 | include CONTRIBUTING.md 4 | include README.md 5 | recursive-exclude * __pycache__ 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Nixtla  [![Slack](https://img.shields.io/badge/Slack-4A154B?&logo=slack&logoColor=white)](https://join.slack.com/t/nixtlacommunity/shared_invite/zt-1pmhan9j5-F54XR20edHk0UtYAPcW4KQ) 2 | 3 |
4 | 5 |

Hierarchical Forecast 👑

6 |

Probabilistic hierarchical forecasting with statistical and econometric methods

7 | 8 | [![CI](https://github.com/Nixtla/hierarchicalforecast/actions/workflows/ci.yml/badge.svg)](https://github.com/Nixtla/hierarchicalforecast/actions/workflows/ci.yml) 9 | [![Python](https://img.shields.io/pypi/pyversions/hierarchicalforecast)](https://pypi.org/project/hierarchicalforecast/) 10 | [![PyPi](https://img.shields.io/pypi/v/hierarchicalforecast?color=blue)](https://pypi.org/project/hierarchicalforecast/) 11 | [![conda-nixtla](https://img.shields.io/conda/vn/conda-forge/hierarchicalforecast?color=seagreen&label=conda)](https://anaconda.org/conda-forge/hierarchicalforecast) 12 | [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://github.com/Nixtla/hierarchicalforecast/blob/main/LICENSE) 13 | 14 | **HierarchicalForecast** offers a collection of cross-sectional and temporal reconciliation methods, including `BottomUp`, `TopDown`, `MiddleOut`, `MinTrace` and `ERM`, as well as probabilistic coherent prediction methods such as `Normality`, `Bootstrap`, and `PERMBU`. 15 |
16 | 17 | ## 📚 Intro 18 | A vast amount of time series datasets are organized into structures with different levels or hierarchies of aggregation. Examples include cross-sectional aggregations such as categories, brands, or geographical groupings, or temporal aggregations such as weeks, months or years. Coherent forecasts across levels are necessary for consistent decision-making and planning. Hierachical Forecast offers different reconciliation methods that render coherent forecasts across cross-sectional and temporal hierachies. 19 | 20 | ## 🎊 Features 21 | 22 | * Classic reconciliation methods: 23 | - `BottomUp`: Simple addition to the upper levels. 24 | - `TopDown`: Distributes the top levels forecasts trough the hierarchies. 25 | * Alternative reconciliation methods: 26 | - `MiddleOut`: It anchors the base predictions in a middle level. The levels above the base predictions use the bottom-up approach, while the levels below use a top-down. 27 | - `MinTrace`: Minimizes the total forecast variance of the space of coherent forecasts, with the Minimum Trace reconciliation. 28 | - `ERM`: Optimizes the reconciliation matrix minimizing an L1 regularized objective. 29 | * Probabilistic coherent methods: 30 | - `Normality`: Uses MinTrace variance-covariance closed form matrix under a normality assumption. 31 | - `Bootstrap`: Generates distribution of hierarchically reconciled predictions using Gamakumara's bootstrap approach. 32 | - `PERMBU`: Reconciles independent sample predictions by reinjecting multivariate dependence with estimated rank permutation copulas, and performing a Bottom-Up aggregation. 33 | * Temporal reconciliation methods: 34 | - All reconciliation methods (except for the insample methods) are available to use with temporal hierarchies too. 35 | 36 | Missing something? Please open an issue here or write us in [![Slack](https://img.shields.io/badge/Slack-4A154B?&logo=slack&logoColor=white)](https://join.slack.com/t/nixtlaworkspace/shared_invite/zt-135dssye9-fWTzMpv2WBthq8NK0Yvu6A) 37 | 38 | ## 📖 Why? 39 | 40 | **Short**: We want to contribute to the ML field by providing reliable baselines and benchmarks for hierarchical forecasting task in industry and academia. Here's the complete [paper](https://arxiv.org/abs/2207.03517). 41 | 42 | **Verbose**: `HierarchicalForecast` integrates publicly available processed datasets, evaluation metrics, and a curated set of standard statistical baselines. In this library we provide usage examples and references to extensive experiments where we showcase the baseline's use and evaluate the accuracy of their predictions. With this work, we hope to contribute to Machine Learning forecasting by bridging the gap to statistical and econometric modeling, as well as providing tools for the development of novel hierarchical forecasting algorithms rooted in a thorough comparison of these well-established models. We intend to continue maintaining and increasing the repository, promoting collaboration across the forecasting community. 43 | 44 | ## 💻 Installation 45 | 46 | You can install `HierarchicalForecast`'s the Python package index [pip](https://pypi.org) with: 47 | 48 | ```python 49 | pip install hierarchicalforecast 50 | ``` 51 | 52 | You can also can install `HierarchicalForecast`'s from [conda](https://anaconda.org) with: 53 | 54 | ```python 55 | conda install -c conda-forge hierarchicalforecast 56 | ``` 57 | 58 | 59 | ## 🧬 How to use 60 | 61 | The following example needs `statsforecast` and `datasetsforecast` as additional packages. If not installed, install it via your preferred method, e.g. `pip install statsforecast datasetsforecast`. 62 | The `datasetsforecast` library allows us to download hierarhical datasets and we will use `statsforecast` to compute the base forecasts to be reconciled. 63 | 64 | You can open a complete example in Colab [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/nixtla/hierarchicalforecast/blob/main/nbs/examples/TourismSmall.ipynb) 65 | 66 | Minimal Example: 67 | ```python 68 | # !pip install -U numba statsforecast datasetsforecast 69 | import numpy as np 70 | import pandas as pd 71 | 72 | #obtain hierarchical dataset 73 | from datasetsforecast.hierarchical import HierarchicalData 74 | 75 | # compute base forecast no coherent 76 | from statsforecast.core import StatsForecast 77 | from statsforecast.models import AutoARIMA, Naive 78 | 79 | #obtain hierarchical reconciliation methods and evaluation 80 | from hierarchicalforecast.core import HierarchicalReconciliation 81 | from hierarchicalforecast.evaluation import evaluate 82 | from hierarchicalforecast.methods import BottomUp, TopDown, MiddleOut 83 | from utilsforecast.losses import mse 84 | 85 | # Load TourismSmall dataset 86 | Y_df, S, tags = HierarchicalData.load('./data', 'TourismSmall') 87 | Y_df['ds'] = pd.to_datetime(Y_df['ds']) 88 | S = S.reset_index(names="unique_id") 89 | 90 | #split train/test sets 91 | Y_test_df = Y_df.groupby('unique_id').tail(4) 92 | Y_train_df = Y_df.drop(Y_test_df.index) 93 | 94 | # Compute base auto-ARIMA predictions 95 | fcst = StatsForecast(models=[AutoARIMA(season_length=4), Naive()], 96 | freq='QE', n_jobs=-1) 97 | Y_hat_df = fcst.forecast(df=Y_train_df, h=4) 98 | 99 | # Reconcile the base predictions 100 | reconcilers = [ 101 | BottomUp(), 102 | TopDown(method='forecast_proportions'), 103 | MiddleOut(middle_level='Country/Purpose/State', 104 | top_down_method='forecast_proportions') 105 | ] 106 | hrec = HierarchicalReconciliation(reconcilers=reconcilers) 107 | Y_rec_df = hrec.reconcile(Y_hat_df=Y_hat_df, Y_df=Y_train_df, 108 | S=S, tags=tags) 109 | ``` 110 | 111 | ### Evaluation 112 | Assumes you have a test dataframe. 113 | 114 | ```python 115 | df = Y_rec_df.merge(Y_test_df, on=['unique_id', 'ds']) 116 | evaluation = evaluate(df = df, 117 | tags = tags, 118 | metrics = [mse], 119 | benchmark = "Naive") 120 | ``` 121 | 122 | ## 📖 Documentation (WIP) 123 | Here is a link to the [documentation](https://nixtlaverse.nixtla.io/hierarchicalforecast/index.html). 124 | 125 | ## 📃 License 126 | This project is licensed under the MIT License - see the [LICENSE](https://github.com/Nixtla/neuralforecast/blob/main/LICENSE) file for details. 127 | 128 | ## 🏟 HTS projects 129 | In the R ecosystem, we recommend checking out [fable](http://fable.tidyverts.org/), and the now-retired [hts](https://github.com/earowang/hts). 130 | In Python we want to acknowledge the following libraries [hiere2e](https://github.com/rshyamsundar/gluonts-hierarchical-ICML-2021), [hierts](https://github.com/elephaint/hierts), [sktime](https://github.com/sktime/sktime-tutorial-pydata-berlin-2022), [darts](https://github.com/unit8co/darts), [pyhts](https://github.com/AngelPone/pyhts), [scikit-hts](https://github.com/carlomazzaferro/scikit-hts). 131 | 132 | ## 📚 References and Acknowledgements 133 | This work is highly influenced by the fantastic work of previous contributors and other scholars who previously proposed the reconciliation methods presented here. We want to highlight the work of Rob Hyndman, George Athanasopoulos, Shanika L. Wickramasuriya, Souhaib Ben Taieb, and Bonsoo Koo. For a full reference link, please visit the Reference section of this [paper](https://arxiv.org/pdf/2207.03517.pdf). 134 | We encourage users to explore this [literature review](https://otexts.com/fpp3/hierarchical-reading.html). 135 | 136 | ## 🙏 How to cite 137 | If you enjoy or benefit from using these Python implementations, a citation to this [hierarchical forecasting reference paper](https://arxiv.org/abs/2207.03517) will be greatly appreciated. 138 | ```bibtex 139 | @article{olivares2024hierarchicalforecastreferenceframeworkhierarchical, 140 | title={HierarchicalForecast: A Reference Framework for Hierarchical Forecasting in Python}, 141 | author={Kin G. Olivares and Azul Garza and David Luo and Cristian Challú and Max Mergenthaler and Souhaib Ben Taieb and Shanika L. Wickramasuriya and Artur Dubrawski}, 142 | year={2024}, 143 | eprint={2207.03517}, 144 | archivePrefix={arXiv}, 145 | primaryClass={stat.ML}, 146 | url={https://arxiv.org/abs/2207.03517}, 147 | } 148 | ``` 149 | -------------------------------------------------------------------------------- /action_files/imports_with_code.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import warnings 3 | from pathlib import Path 4 | 5 | from nbdev.processors import NBProcessor, _do_eval 6 | 7 | 8 | def check_nb(nb_path: str) -> None: 9 | with warnings.catch_warnings(record=True) as issued_warnings: 10 | NBProcessor(nb_path, _do_eval, process=True) 11 | if any( 12 | "Found cells containing imports and other code" in str(w) 13 | for w in issued_warnings 14 | ): 15 | print(f"{nb_path} has cells containing imports and code.") 16 | sys.exit(1) 17 | 18 | 19 | if __name__ == "__main__": 20 | repo_root = Path(__file__).parents[1] 21 | for nb_path in (repo_root / "nbs").glob("*.ipynb"): 22 | check_nb(str(nb_path)) 23 | -------------------------------------------------------------------------------- /action_files/test_models/requirements.txt: -------------------------------------------------------------------------------- 1 | fire 2 | datasetsforecast 3 | -------------------------------------------------------------------------------- /action_files/test_models/src/data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import fire 3 | import pickle 4 | import pandas as pd 5 | 6 | from statsforecast.models import AutoETS 7 | from statsforecast.core import StatsForecast 8 | 9 | from hierarchicalforecast.utils import aggregate 10 | 11 | 12 | def get_data(): 13 | # If data exists read it 14 | if os.path.isfile('data/Y_test.csv'): 15 | Y_test_df = pd.read_csv('data/Y_test.csv') 16 | Y_train_df = pd.read_csv('data/Y_train.csv') 17 | Y_hat_df = pd.read_csv('data/Y_hat.csv') 18 | Y_fitted_df = pd.read_csv('data/Y_fitted.csv') 19 | S_df = pd.read_csv('data/S.csv') 20 | 21 | with open('data/tags.pickle', 'rb') as handle: 22 | tags = pickle.load(handle) 23 | 24 | return Y_train_df, Y_test_df, Y_hat_df, Y_fitted_df, S_df, tags 25 | 26 | # Read and Parse Data 27 | Y_df = pd.read_csv('https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/tourism.csv') 28 | Y_df = Y_df.rename({'Trips': 'y', 'Quarter': 'ds'}, axis=1) 29 | Y_df.insert(0, 'Country', 'Australia') 30 | Y_df = Y_df[['Country', 'Region', 'State', 'Purpose', 'ds', 'y']] 31 | Y_df['ds'] = Y_df['ds'].str.replace(r'(\d+) (Q\d)', r'\1-\2', regex=True) 32 | Y_df['ds'] = pd.to_datetime(Y_df['ds']) 33 | 34 | # Hierarchical Aggregation 35 | spec = [ 36 | ['Country'], 37 | ['Country', 'State'], 38 | ['Country', 'State', 'Region'], 39 | ['Country', 'State', 'Region', 'Purpose'] 40 | ] 41 | 42 | Y_df, S_df, tags = aggregate(Y_df, spec) 43 | 44 | # Train/Test Splits 45 | Y_test_df = Y_df.groupby('unique_id').tail(8) 46 | Y_train_df = Y_df.drop(Y_test_df.index) 47 | 48 | sf = StatsForecast(models=[AutoETS(season_length=4, model='ZZA')], 49 | freq='QS', n_jobs=-1) 50 | Y_hat_df = sf.forecast(df=Y_train_df, h=8, fitted=True) 51 | Y_fitted_df = sf.forecast_fitted_values() 52 | 53 | # Save Data 54 | if not os.path.exists('./data'): 55 | os.makedirs('./data') 56 | 57 | Y_test_df.to_csv('./data/Y_test.csv', index=False) 58 | Y_train_df.to_csv('./data/Y_train.csv', index=False) 59 | 60 | Y_hat_df.to_csv('./data/Y_hat.csv', index=False) 61 | Y_fitted_df.to_csv('./data/Y_fitted.csv', index=False) 62 | S_df.to_csv('./data/S.csv', index=False) 63 | 64 | with open('./data/tags.pickle', 'wb') as handle: 65 | pickle.dump(tags, handle, protocol=pickle.HIGHEST_PROTOCOL) 66 | 67 | return Y_train_df, Y_test_df, Y_hat_df, Y_fitted_df, S_df, tags 68 | 69 | def save_data(): 70 | Y_train_df, Y_test_df, Y_hat_df, Y_fitted_df, S_df, tags = get_data() 71 | 72 | Y_test_df.to_csv('./data/Y_test.csv', index=False) 73 | Y_train_df.to_csv('./data/Y_train.csv', index=False) 74 | 75 | Y_hat_df.to_csv('./data/Y_hat.csv', index=False) 76 | Y_fitted_df.to_csv('./data/Y_fitted.csv', index=False) 77 | S_df.to_csv('./data/S.csv', index=False) 78 | 79 | with open('./data/tags.pickle', 'wb') as handle: 80 | pickle.dump(tags, handle, protocol=pickle.HIGHEST_PROTOCOL) 81 | 82 | 83 | if __name__=="__main__": 84 | fire.Fire(save_data) 85 | -------------------------------------------------------------------------------- /action_files/test_models/src/evaluation.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import numpy as np 3 | import pandas as pd 4 | 5 | from hierarchicalforecast.evaluation import HierarchicalEvaluation 6 | 7 | def rmse(y, y_hat): 8 | return np.mean(np.sqrt(np.mean((y-y_hat)**2, axis=1))) 9 | 10 | def mase(y, y_hat, y_insample, seasonality=4): 11 | errors = np.mean(np.abs(y - y_hat), axis=1) 12 | scale = np.mean(np.abs(y_insample[:, seasonality:] - y_insample[:, :-seasonality]), axis=1) 13 | return np.mean(errors / scale) 14 | 15 | 16 | def evaluate(): 17 | execution_times = pd.read_csv('data/execution_times.csv') 18 | models = [f"{x[0]} ({x[1]:.2f} secs)" for x in execution_times.values] 19 | 20 | Y_rec_df = pd.read_csv('data/Y_rec.csv') 21 | Y_test_df = pd.read_csv('data/Y_test.csv') 22 | Y_train_df = pd.read_csv('data/Y_train.csv') 23 | 24 | with open('data/tags.pickle', 'rb') as handle: 25 | tags = pickle.load(handle) 26 | 27 | eval_tags = {} 28 | eval_tags['Total'] = tags['Country'] 29 | eval_tags['State'] = tags['Country/State'] 30 | eval_tags['Regions'] = tags['Country/State/Region'] 31 | eval_tags['Bottom'] = tags['Country/State/Region/Purpose'] 32 | eval_tags['All'] = np.concatenate(list(tags.values())) 33 | 34 | evaluator = HierarchicalEvaluation(evaluators=[mase]) 35 | evaluation = evaluator.evaluate( 36 | Y_hat_df=Y_rec_df, Y_test_df=Y_test_df, 37 | tags=eval_tags, Y_df=Y_train_df 38 | ) 39 | evaluation = evaluation.query("level != 'Overall'").set_index(['level', 'metric']) 40 | 41 | evaluation.columns = ['Base'] + models 42 | evaluation = evaluation.map('{:.2f}'.format) 43 | return evaluation 44 | 45 | 46 | if __name__ == '__main__': 47 | evaluation = evaluate() 48 | evaluation.to_csv('./data/evaluation.csv') 49 | print(evaluation.T) 50 | -------------------------------------------------------------------------------- /action_files/test_models/src/evaluation_temporal.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import numpy as np 3 | import pandas as pd 4 | 5 | import hierarchicalforecast.evaluation as hfe 6 | from utilsforecast.losses import mae, rmse 7 | 8 | def evaluate(): 9 | execution_times = pd.read_csv('data/execution_times.csv') 10 | models = [f"{x[0]} ({x[1]:.2f} secs)" for x in execution_times.values] 11 | 12 | Y_rec_df = pd.read_csv('data/Y_rec.csv') 13 | Y_test_df = pd.read_csv('data/Y_test.csv') 14 | 15 | with open('data/tags.pickle', 'rb') as handle: 16 | tags = pickle.load(handle) 17 | 18 | Y_hat_df=Y_rec_df.merge(Y_test_df, on=["ds", "unique_id", "temporal_id"], how="left") 19 | 20 | evaluation = hfe.evaluate( 21 | df=Y_hat_df.drop(columns="unique_id"), 22 | tags=tags, 23 | metrics=[mae, rmse], 24 | id_col='temporal_id' 25 | ) 26 | numeric_cols = evaluation.select_dtypes(include="number").columns 27 | evaluation[numeric_cols] = evaluation[numeric_cols].map('{:.3}'.format).astype(np.float64) 28 | evaluation.columns = ['level', 'metric', 'Base'] + models 29 | return evaluation 30 | 31 | if __name__ == '__main__': 32 | evaluation = evaluate() 33 | evaluation.to_csv('./data/evaluation.csv') 34 | print(evaluation.T) 35 | -------------------------------------------------------------------------------- /action_files/test_models/src/models.py: -------------------------------------------------------------------------------- 1 | import os 2 | import fire 3 | import pandas as pd 4 | 5 | from hierarchicalforecast.core import HierarchicalReconciliation 6 | from hierarchicalforecast.methods import ( 7 | BottomUp, BottomUpSparse, TopDown, TopDownSparse, MiddleOut, MiddleOutSparse, 8 | MinTrace, 9 | MinTraceSparse, 10 | OptimalCombination, 11 | ERM, 12 | ) 13 | 14 | from src.data import get_data 15 | 16 | 17 | def main(): 18 | Y_train_df, Y_test_df, Y_hat_df, Y_fitted_df, S_df, tags = get_data() 19 | 20 | reconcilers = [BottomUp(), 21 | BottomUpSparse(), 22 | TopDown(method="forecast_proportions"), 23 | TopDownSparse(method="forecast_proportions"), 24 | TopDown(method="average_proportions"), 25 | TopDownSparse(method="average_proportions"), 26 | TopDown(method="proportion_averages"), 27 | TopDownSparse(method="proportion_averages"), 28 | MiddleOut(middle_level="Country/State", top_down_method="average_proportions"), 29 | MiddleOutSparse(middle_level="Country/State", top_down_method="average_proportions"), 30 | MinTrace(method='ols'), 31 | MinTrace(method='wls_struct'), 32 | MinTrace(method='wls_var'), 33 | MinTrace(method='mint_cov'), 34 | MinTrace(method='mint_shrink'), 35 | MinTraceSparse(method='ols'), 36 | MinTraceSparse(method='wls_struct'), 37 | MinTraceSparse(method='wls_var'), 38 | OptimalCombination(method='ols'), 39 | OptimalCombination(method='wls_struct'), 40 | ERM(method='closed'), 41 | ] 42 | hrec = HierarchicalReconciliation(reconcilers=reconcilers) 43 | Y_rec_df = hrec.reconcile(Y_hat_df=Y_hat_df, 44 | Y_df=Y_fitted_df, S=S_df, tags=tags) 45 | 46 | execution_times = pd.Series(hrec.execution_times).reset_index() 47 | 48 | if not os.path.exists('./data'): 49 | os.makedirs('./data') 50 | Y_rec_df.to_csv('./data/Y_rec.csv', index=False) 51 | execution_times.to_csv('./data/execution_times.csv', index=False) 52 | 53 | 54 | if __name__ == '__main__': 55 | fire.Fire(main) 56 | -------------------------------------------------------------------------------- /action_files/test_models/src/models_temporal.py: -------------------------------------------------------------------------------- 1 | import os 2 | import fire 3 | import pandas as pd 4 | import pickle 5 | 6 | from hierarchicalforecast.utils import aggregate_temporal 7 | from hierarchicalforecast.core import HierarchicalReconciliation 8 | from hierarchicalforecast.methods import ( 9 | BottomUp, BottomUpSparse, TopDown, 10 | TopDownSparse, 11 | MiddleOut, 12 | MiddleOutSparse, 13 | MinTrace, 14 | MinTraceSparse, 15 | OptimalCombination, 16 | # ERM, 17 | ) 18 | 19 | from statsforecast.models import AutoETS 20 | from statsforecast.core import StatsForecast 21 | 22 | def main(): 23 | 24 | # Read data 25 | Y_df = pd.read_csv('https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/tourism.csv') 26 | Y_df = Y_df.rename({'Trips': 'y', 'Quarter': 'ds'}, axis=1) 27 | Y_df.insert(0, 'Country', 'Australia') 28 | Y_df = Y_df[['Country', 'Region', 'State', 'Purpose', 'ds', 'y']] 29 | Y_df['ds'] = Y_df['ds'].str.replace(r'(\d+) (Q\d)', r'\1-\2', regex=True) 30 | Y_df['ds'] = pd.PeriodIndex(Y_df["ds"], freq='Q').to_timestamp() 31 | Y_df["unique_id"] = Y_df["Country"] + "/" + Y_df["State"] + "/" + Y_df["Region"] + "/" + Y_df["Purpose"] 32 | 33 | # split data into train and test 34 | horizon = 8 35 | Y_test_df = Y_df.groupby("unique_id", as_index=False).tail(horizon) 36 | Y_train_df = Y_df.drop(Y_test_df.index) 37 | 38 | # Temporal Hierarchical Aggregation 39 | spec_temporal = {"year": 4, "semiannual": 2, "quarter": 1} 40 | Y_train_df, S_train_df, tags_train = aggregate_temporal(df=Y_train_df, spec=spec_temporal) 41 | Y_test_df, S_test_df, tags_test = aggregate_temporal(df=Y_test_df, spec=spec_temporal) 42 | 43 | # Create forecasts 44 | Y_hat_dfs = [] 45 | id_cols = ["unique_id", "temporal_id", "ds"] 46 | # We will train a model for each temporal level 47 | for level, temporal_ids_train in tags_train.items(): 48 | # Filter the data for the level 49 | Y_level_train = Y_train_df.query("temporal_id in @temporal_ids_train") 50 | temporal_ids_test = tags_test[level] # noqa F841 51 | Y_level_test = Y_test_df.query("temporal_id in @temporal_ids_test") 52 | # For each temporal level we have a different frequency and forecast horizon 53 | freq_level = pd.infer_freq(Y_level_train["ds"].unique()) 54 | horizon_level = Y_level_test["ds"].nunique() 55 | # Train a model and create forecasts 56 | fcst = StatsForecast(models=[AutoETS(model='ZZZ')], freq=freq_level, n_jobs=-1) 57 | Y_hat_df_level = fcst.forecast(df=Y_level_train[["ds", "unique_id", "y"]], h=horizon_level) 58 | # Add the test set to the forecast 59 | Y_hat_df_level = Y_hat_df_level.merge(Y_level_test.drop(columns="y"), on=["ds", "unique_id"], how="left") 60 | # Put cols in the right order (for readability) 61 | Y_hat_cols = id_cols + [col for col in Y_hat_df_level.columns if col not in id_cols] 62 | Y_hat_df_level = Y_hat_df_level[Y_hat_cols] 63 | # Append the forecast to the list 64 | Y_hat_dfs.append(Y_hat_df_level) 65 | 66 | Y_hat_df = pd.concat(Y_hat_dfs, ignore_index=True) 67 | 68 | reconcilers = [ 69 | BottomUp(), 70 | BottomUpSparse(), 71 | TopDown(method="forecast_proportions"), 72 | TopDownSparse(method="forecast_proportions"), 73 | # TopDown(method="average_proportions"), 74 | # TopDownSparse(method="average_proportions"), 75 | # TopDown(method="proportion_averages"), 76 | # TopDownSparse(method="proportion_averages"), 77 | MiddleOut(middle_level="semiannual", top_down_method="forecast_proportions"), 78 | MiddleOutSparse(middle_level="semiannual", top_down_method="forecast_proportions"), 79 | MinTrace(method='ols'), 80 | MinTrace(method='wls_struct'), 81 | # MinTrace(method='wls_var'), 82 | # MinTrace(method='mint_cov'), 83 | # MinTrace(method='mint_shrink'), 84 | MinTraceSparse(method='ols'), 85 | MinTraceSparse(method='wls_struct'), 86 | # MinTraceSparse(method='wls_var'), 87 | OptimalCombination(method='ols'), 88 | OptimalCombination(method='wls_struct'), 89 | # ERM(method='closed'), 90 | ] 91 | hrec = HierarchicalReconciliation(reconcilers=reconcilers) 92 | 93 | Y_rec_df = hrec.reconcile(Y_hat_df=Y_hat_df, 94 | S=S_test_df, 95 | tags=tags_test, 96 | temporal=True, 97 | ) 98 | 99 | execution_times = pd.Series(hrec.execution_times).reset_index() 100 | 101 | if not os.path.exists('./data'): 102 | os.makedirs('./data') 103 | Y_rec_df.to_csv('./data/Y_rec.csv', index=False) 104 | Y_test_df.to_csv('./data/Y_test.csv', index=False) 105 | Y_train_df.to_csv('./data/Y_train.csv', index=False) 106 | with open('./data/tags.pickle', 'wb') as handle: 107 | pickle.dump(tags_test, handle, protocol=pickle.HIGHEST_PROTOCOL) 108 | 109 | execution_times.to_csv('./data/execution_times.csv', index=False) 110 | 111 | if __name__ == '__main__': 112 | fire.Fire(main) 113 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: hierarchicalforecast 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - numpy 6 | - numba 7 | - pandas 8 | - scikit-learn 9 | - quadprog 10 | - clarabel 11 | - matplotlib 12 | - pip 13 | - pip: 14 | - nbdev 15 | - ipython<=8.32.0 -------------------------------------------------------------------------------- /experiments/hierarchical_baselines/README.md: -------------------------------------------------------------------------------- 1 | # HierarchicalForecast Baselines 2 | 3 | This study showcases the HierarchicalForecast library's statistical baselines, following established experimental protocols from previous research Rangapuram et al. [2021], Olivares et al. [2023] and Kamarthi et al. [2022]. The benchmark datasets utilized include Australian Monthly Labour, SF Bay Area daily Traffic, Quarterly Australian Tourism Visits, Monthly Australian Tourism visits, and daily Wikipedia article views. 4 | 5 | In our experiment, we compare the predictions of several state-of-the-art probabilistic coherent methods. The statistical baselines encompass different variants, such as (1) BOOTSTRAP [Panagiotelis et al., 2023], (2) NORMALITY [Wickramasuriya et al., 2022], and (3) PERMBU probabilistic reconciliation [Ben Taieb et al., 2017], combined with mean reconcilers including BottomUp [Orcutt et al., 1968], TopDown [Gross and Sohl, 1990], and MinTrace [Wickramasuriya et al., 2019]. 6 | 7 | ## Probabilistic Coherent Forecast Accuracy 8 | 9 | To evaluate the accuracy of probabilistic coherent forecasts, we employ the Mean scaled Continuous Ranked Probability Scores (sCRPS) integral. This evaluation metric utilizes a Riemann approximation to the sCRPS, considering quantile intervals of 1 percent denoted as $dq$. We report here the best performing method across the BottomUp, TopDown and MinTrace reconciliations. 10 | ```math 11 | \mathrm{sCRPS}(\mathbb{P}, \mathbf{y}_{[i],\tau}) = \frac{2}{|[i\,]|} \sum_{i} 12 | \frac{\int^{1}_{0} \mathrm{QL}(\mathbb{P}_{i,\tau}, y_{i,\tau})_{q} dq }{\sum_{i} | y_{i,\tau} |} 13 | ``` 14 | 15 | | | **BOOTSTRAP** ||| **NORMALITY** ||| **PERMBU** ||| 16 | |--------------|----------|---------|----------|----------|---------|----------|----------|---------|----------| 17 | | **Dataset** | BottomUp | TopDown | MinTrace | BottomUp | TopDown | MinTrace | BottomUp | TopDown | MinTrace | 18 | | Labour | 0.0078 | 0.0668 | 0.0073 | 0.0076 | 0.0656 | 0.0069 | 0.0077 | 0.0623 | 0.0069 | 19 | | Traffic | 0.0736 | 0.0741 | 0.0608 | 0.0845 | 0.0738 | 0.0630 | 0.0849 | 0.0708 | 0.0651 | 20 | | TourismS | 0.0682 | 0.1040 | 0.0703 | 0.0649 | 0.1000 | 0.6830 | 0.0649 | 0.0898 | 0.0680 | 21 | | TourismL | 0.1375 | - | 0.1313 | 0.1620 | - | 0.1338 | - | - | - | 22 | | Wiki2 | 0.2894 | 0.3231 | 0.2808 | 0.3914 | 0.3385 | 0.3385 | 0.3920 | 0.4269 | 0.3821 | 23 |
24 | 25 | 26 | ## Hierarchically Coherent Forecast Accuracy 27 | 28 | To assess the accuracy of hierarchically coherent mean forecasts, we employ the relative mean squared error that compares the Naive forecast, with reconciled StatsForecast's base AutoARIMA forecasts. 29 | ```math 30 | \mathrm{relMSE}(\mathbf{y}_{[i]}, \hat{\mathbf{y}}_{[i]}, \mathbf{\check{y}}_{[i]}) = 31 | \frac{\mathrm{MSE}(\mathbf{y}_{[i]}, \mathbf{\hat{y}}_{[i]})}{\mathrm{MSE}(\mathbf{y}_{[i]}, \mathbf{\check{y}}_{[i]})} 32 | ``` 33 | 34 | | Dataset | BottomUp | TopDown | MinTrace | 35 | |----------|----------|---------|----------| 36 | | Labour | 0.5382 | 16.8204 | 0.3547 | 37 | | Traffic | 0.1394 | 0.0614 | 0.0744 | 38 | | TourismS | 0.1002 | 0.1919 | 0.1235 | 39 | | TourismL | 0.3070 | - | 0.1375 | 40 | | Wiki2 | 1.0163 | 1.4482 | 1.0068 | 41 |
42 | 43 | ## Reproducibility 44 | 45 | 1. Create a conda environment `hierarchical_baselines` using the `environment.yml` file. 46 | ```shell 47 | conda env create -f environment.yml 48 | ``` 49 | 50 | 3. Activate the conda environment using 51 | ```shell 52 | conda activate hierarchical_baselines 53 | ``` 54 | 55 | 4. Run the experiments for each dataset and each model using with 56 | - `--intervals_method` parameter in `['bootstrap', 'normality', 'permbu']` 57 | - `--dataset` parameter in `['Labour', 'Traffic', 'OldTraffic', 'TourismSmall', 'TourismLarge', 'OldTourismLarge', 'Wikitwo']` 58 | ```shell 59 | python src/run_baselines.py --intervals_method 'bootstrap' --dataset 'OldTourismLarge' 60 | ``` 61 | 62 | You can access the final reconciled forecasts from the `./data/{dataset}/{intervals_method}_rec.csv` file. Example: `./data/TourismLarge/bootstrap_rec.csv`. 63 | The file gives you access to the mean and quantile forecasts for all series in the dataset. Example `AutoARIMA/BottomUp`, `AutoARIMA/BottomUp-lo-90`, `AutoARIMA/BottomUp-hi-90`, that correspond to mean, q5 and q95. 64 | 65 |

66 | 67 | ## References 68 | - [Kin G. Olivares, Federico Garza, David Luo, Cristian Challú, Max Mergenthaler, Souhaib Ben Taieb, Shanika Wickramasuriya, and Artur Dubrawski 2023. "HierarchicalForecast: A reference framework for hierarchical forecasting". Journal of Machine Learning Research, submitted URL https://arxiv.org/abs/2207.03517](https://arxiv.org/abs/2207.03517) 69 | 70 | ### Datasets 71 | - [Australian Bureau of Statistics. Labour force, Australia. Accessed Online, 2019. URL https://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/6202.0Dec%202019?OpenDocument.](https://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/6202.0Dec%202019?OpenDocument) 72 | - [Dheeru Dua and Casey Graff. Traffic dataset. UCI machine learning repository, 2017. URL http://archive.ics.uci.edu/ml.](http://archive.ics.uci.edu/ml.) 73 | - [Tourism Australia, Canberra. Tourism Research Australia (2005), Travel by Australians. URL https://www.kaggle.com/luisblanche/quarterly-tourism-in-australia/](https://www.kaggle.com/luisblanche/quarterly-tourism-in-australia/) 74 | - [Tourism Australia, Canberra. Detailed tourism Australia (2005), Travel by Australians, Sep 2019. URL https://robjhyndman.com/publications/hierarchical-tourism/.](https://robjhyndman.com/publications/hierarchical-tourism/) 75 | - [Oren Anava, Vitaly Kuznetsov, and (Google Inc. Sponsorship). Web traffic time series forecasting, forecast future traffic to wikipedia pages. Kaggle Competition, 2018. URL https://www.kaggle.com/c/web-traffic-time-series-forecasting/.](https://www.kaggle.com/c/web-traffic-time-series-forecasting/) 76 | 77 | ### Baselines 78 | - [BOOTSTRAP: Anastasios Panagiotelis, Puwasala Gamakumara, George Athanasopoulos, and Rob J. Hyndman (2022). "Probabilistic forecast reconciliation: Properties, evaluation and score optimisation". European Journal of Operational Research, 306(2):693–706, 2023. ISSN 0377-2217. doi: https://doi.org/10. 1016/j.ejor.2022.07.040. URL https://www.sciencedirect.com/science/article/pii/S0377221722006087.](https://www.sciencedirect.com/science/article/pii/S0377221722006087) 79 | - [PERMBU: Souhaib Ben Taieb, James W. Taylor, and Rob J. Hyndman. Coherent probabilistic forecasts for hierarchical time series. In Doina Precup and Yee Whye Teh, editors, Proceedings of the 34th International Conference on Machine Learning, volume 70 of Proceedings of Machine Learning Research, pages 3348–3357. PMLR, 06–11 Aug 2017. URL http://proceedings.mlr.press/v70/taieb17a.html.](http://proceedings.mlr.press/v70/taieb17a.html) 80 | - [NORMALITY: Shanika L. Wickramasuriya. Probabilistic forecast reconciliation under the Gaussian framework. Accepted at Journal of Business and Economic Statistics, 2023.]() 81 | - [Orcutt, G.H., Watts, H.W., & Edwards, J.B.(1968). \"Data aggregation and information loss\". The American Economic Review, 58 , 773{787)](http://www.jstor.org/stable/1815532). 82 | - [CW. Gross (1990). \"Disaggregation methods to expedite product line forecasting\". Journal of Forecasting, 9 , 233–254. doi:10.1002/for.3980090304](https://onlinelibrary.wiley.com/doi/abs/10.1002/for.3980090304). 83 | - [Wickramasuriya, S.L., Turlach, B.A. & Hyndman, R.J. (2020). "Optimal non-negative forecast reconciliation". Stat Comput 30, 1167–1182, https://doi.org/10.1007/s11222-020-09930-0](https://robjhyndman.com/publications/nnmint/). 84 | -------------------------------------------------------------------------------- /experiments/hierarchical_baselines/environment.yml: -------------------------------------------------------------------------------- 1 | name: hierarchical_baselines 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - numpy<1.24 6 | - quadprog 7 | - pip 8 | - pip: 9 | - statsforecast 10 | - "git+https://github.com/Nixtla/datasetsforecast.git" 11 | - hierarchicalforecast -------------------------------------------------------------------------------- /experiments/hierarchical_baselines/src/run_baselines.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | 4 | import numpy as np 5 | import pandas as pd 6 | import matplotlib.pyplot as plt 7 | 8 | from statsforecast.core import StatsForecast 9 | from statsforecast.models import AutoARIMA, Naive 10 | 11 | from hierarchicalforecast.core import HierarchicalReconciliation 12 | from hierarchicalforecast.evaluation import HierarchicalEvaluation 13 | from hierarchicalforecast.methods import BottomUp, TopDown, MinTrace, ERM 14 | 15 | from hierarchicalforecast.utils import is_strictly_hierarchical 16 | from hierarchicalforecast.utils import HierarchicalPlot, CodeTimer 17 | from hierarchicalforecast.evaluation import scaled_crps, rel_mse, msse 18 | 19 | from datasetsforecast.hierarchical import HierarchicalData, HierarchicalInfo 20 | 21 | import warnings 22 | # Avoid pandas fragmentation warning and positive definite warning 23 | warnings.filterwarnings("ignore") 24 | 25 | 26 | class HierarchicalDataset(object): 27 | # Class with loading, processing and 28 | # prediction evaluation methods for hierarchical data 29 | 30 | available_datasets = ['Labour','Traffic', 31 | 'TourismSmall','TourismLarge','Wiki2', 32 | 'OldTraffic', 'OldTourismLarge'] 33 | 34 | @staticmethod 35 | def _get_hierarchical_scrps(hier_idxs, Y, Yq_hat, q_to_pred): 36 | # We use the indexes obtained from the aggregation tags 37 | # to compute scaled CRPS across the hierarchy levels 38 | scrps_list = [] 39 | for idxs in hier_idxs: 40 | y = Y[idxs, :] 41 | yq_hat = Yq_hat[idxs, :, :] 42 | level_scrps = scaled_crps(y, yq_hat, q_to_pred) 43 | scrps_list.append(level_scrps) 44 | return scrps_list 45 | 46 | @staticmethod 47 | def _get_hierarchical_msse(hier_idxs, Y, Y_hat, Y_train): 48 | # We use the indexes obtained from the aggregation tags 49 | # to compute MS scaled Error across the hierarchy levels 50 | msse_list = [] 51 | for idxs in hier_idxs: 52 | y = Y[idxs, :] 53 | y_hat = Y_hat[idxs, :] 54 | y_train = Y_train[idxs, :] 55 | level_msse = msse(y, y_hat, y_train) 56 | msse_list.append(level_msse) 57 | return msse_list 58 | 59 | @staticmethod 60 | def _get_hierarchical_rel_mse(hier_idxs, Y, Y_hat, Y_train): 61 | # We use the indexes obtained from the aggregation tags 62 | # to compute relative MSE across the hierarchy levels 63 | rel_mse_list = [] 64 | for idxs in hier_idxs: 65 | y = Y[idxs, :] 66 | y_hat = Y_hat[idxs, :] 67 | y_train = Y_train[idxs, :] 68 | level_rel_mse = rel_mse(y, y_hat, y_train) 69 | rel_mse_list.append(level_rel_mse) 70 | return rel_mse_list 71 | 72 | @staticmethod 73 | def _sort_hier_df(Y_df, S_df): 74 | # NeuralForecast core, sorts unique_id lexicographically 75 | # deviating from S_df, this class matches S_df and Y_hat_df order. 76 | Y_df.unique_id = Y_df.unique_id.astype('category') 77 | Y_df.unique_id = Y_df.unique_id.cat.set_categories(S_df.index) 78 | Y_df = Y_df.sort_values(by=['unique_id', 'ds']) 79 | return Y_df 80 | 81 | @staticmethod 82 | def _nonzero_indexes_by_row(M): 83 | return [np.nonzero(M[row,:])[0] for row in range(len(M))] 84 | 85 | @staticmethod 86 | def load_process_data(dataset, directory='./data'): 87 | # Load data 88 | data_info = HierarchicalInfo[dataset] 89 | Y_df, S_df, tags = HierarchicalData.load(directory=directory, 90 | group=dataset) 91 | 92 | # Parse and augment data 93 | Y_df['ds'] = pd.to_datetime(Y_df['ds']) 94 | Y_df = HierarchicalDataset._sort_hier_df(Y_df=Y_df, S_df=S_df) 95 | 96 | # Obtain indexes for plots and evaluation 97 | hier_levels = ['Overall'] + list(tags.keys()) 98 | hier_idxs = [np.arange(len(S_df))] +\ 99 | [S_df.index.get_indexer(tags[level]) for level in list(tags.keys())] 100 | hier_linked_idxs = HierarchicalDataset._nonzero_indexes_by_row(S_df.values.T) 101 | 102 | # Final output 103 | data = dict(Y_df=Y_df, S_df=S_df, tags=tags, 104 | # Hierarchical idxs 105 | hier_idxs=hier_idxs, 106 | hier_levels=hier_levels, 107 | hier_linked_idxs=hier_linked_idxs, 108 | # Dataset Properties 109 | horizon=data_info.papers_horizon, 110 | freq=data_info.freq, 111 | seasonality=data_info.seasonality) 112 | return data 113 | 114 | 115 | def run_baselines(dataset, intervals_method, verbose=False, seed=0): 116 | with CodeTimer('Read and Parse data ', verbose): 117 | data = HierarchicalDataset.load_process_data(dataset=dataset) 118 | Y_df = data['Y_df'][["unique_id", 'ds', 'y']] 119 | S_df, tags = data['S_df'], data['tags'] 120 | horizon = data['horizon'] 121 | seasonality = data['seasonality'] 122 | freq = data['freq'] 123 | 124 | # Train/Test Splits 125 | Y_test_df = Y_df.groupby('unique_id').tail(horizon) 126 | Y_train_df = Y_df.drop(Y_test_df.index) 127 | Y_test_df = Y_test_df.set_index('unique_id') 128 | Y_train_df = Y_train_df.set_index('unique_id') 129 | 130 | dataset_str = f'{dataset}, h={horizon} ' 131 | dataset_str += f'n_series={len(S_df)}, n_bottom={len(S_df.columns)} \n' 132 | dataset_str += f'test ds=[{min(Y_test_df.ds), max(Y_test_df.ds)}] ' 133 | print(dataset_str) 134 | 135 | with CodeTimer('Fit/Predict Model ', verbose): 136 | # Read to avoid unnecesary AutoARIMA computation 137 | yhat_file = f'./data/{dataset}/Y_hat.csv' 138 | yfitted_file = f'./data/{dataset}/Y_fitted.csv' 139 | yrec_file = f'./data/{dataset}/{intervals_method}_rec.csv' 140 | 141 | if os.path.exists(yhat_file): 142 | Y_hat_df = pd.read_csv(yhat_file) 143 | Y_fitted_df = pd.read_csv(yfitted_file) 144 | 145 | else: 146 | if not os.path.exists(f'./data/{dataset}'): 147 | os.makedirs(f'./data/{dataset}') 148 | fcst = StatsForecast( 149 | df=Y_train_df, 150 | models=[AutoARIMA(season_length=seasonality)], 151 | fallback_model=[Naive()], 152 | freq=freq, 153 | n_jobs=-1 154 | ) 155 | Y_hat_df = fcst.forecast(h=horizon, fitted=True, level=LEVEL) 156 | Y_fitted_df = fcst.forecast_fitted_values() 157 | 158 | Y_hat_df = Y_hat_df.reset_index() 159 | Y_fitted_df = Y_fitted_df.reset_index() 160 | Y_hat_df.to_csv(yhat_file, index=False) 161 | Y_fitted_df.to_csv(yfitted_file, index=False) 162 | 163 | Y_hat_df = Y_hat_df.set_index('unique_id') 164 | Y_fitted_df = Y_fitted_df.set_index('unique_id') 165 | 166 | with CodeTimer('Reconcile Predictions ', verbose): 167 | if is_strictly_hierarchical(S=S_df.values.astype(np.float32), 168 | tags={key: S_df.index.get_indexer(val) for key, val in tags.items()}): 169 | reconcilers = [ 170 | BottomUp(), 171 | TopDown(method='average_proportions'), 172 | TopDown(method='proportion_averages'), 173 | MinTrace(method='ols'), 174 | MinTrace(method='wls_var'), 175 | MinTrace(method='mint_shrink'), 176 | #ERM(method='reg_bu', lambda_reg=100) # Extremely inneficient 177 | ERM(method='closed') 178 | ] 179 | else: 180 | reconcilers = [ 181 | BottomUp(), 182 | MinTrace(method='ols'), 183 | MinTrace(method='wls_var'), 184 | MinTrace(method='mint_shrink'), 185 | #ERM(method='reg_bu', lambda_reg=100) # Extremely inneficient 186 | ERM(method='closed') 187 | ] 188 | 189 | hrec = HierarchicalReconciliation(reconcilers=reconcilers) 190 | Y_rec_df = hrec.bootstrap_reconcile(Y_hat_df=Y_hat_df, 191 | Y_df=Y_fitted_df, 192 | S_df=S_df, tags=tags, 193 | level=LEVEL, 194 | intervals_method=intervals_method, 195 | num_samples=10, num_seeds=10) 196 | 197 | # Matching Y_test/Y_rec/S index ordering 198 | Y_test_df = Y_test_df.reset_index() 199 | Y_test_df.unique_id = Y_test_df.unique_id.astype('category') 200 | Y_test_df.unique_id = Y_test_df.unique_id.cat.set_categories(S_df.index) 201 | Y_test_df = Y_test_df.sort_values(by=['unique_id', 'ds']) 202 | 203 | Y_rec_df = Y_rec_df.reset_index() 204 | Y_rec_df.unique_id = Y_rec_df.unique_id.astype('category') 205 | Y_rec_df.unique_id = Y_rec_df.unique_id.cat.set_categories(S_df.index) 206 | Y_rec_df = Y_rec_df.sort_values(by=['seed', 'unique_id', 'ds']) 207 | 208 | Y_rec_df.to_csv(yrec_file, index=False) 209 | 210 | # Parsing model level columns 211 | flat_cols = list(hrec.level_names.keys()) 212 | for model in hrec.level_names: 213 | flat_cols += hrec.level_names[model] 214 | for model in hrec.sample_names: 215 | flat_cols += hrec.sample_names[model] 216 | y_rec = Y_rec_df[flat_cols] 217 | model_columns = y_rec.columns 218 | 219 | n_series = len(S_df) 220 | n_seeds = len(Y_rec_df.seed.unique()) 221 | y_rec = y_rec.values.reshape(n_seeds, n_series, horizon, len(model_columns)) 222 | y_test = Y_test_df['y'].values.reshape(n_series, horizon) 223 | y_train = Y_train_df['y'].values.reshape(n_series, -1) 224 | 225 | with CodeTimer('Evaluate Base Forecasts ', verbose): 226 | crps_results = {'Dataset': [dataset] * len(['Overall'] + list(tags.keys())), 227 | 'Level': ['Overall'] + list(tags.keys()),} 228 | relmse_results = {'Dataset': [dataset] * len(['Overall'] + list(tags.keys())), 229 | 'Level': ['Overall'] + list(tags.keys()),} 230 | msse_results = {'Dataset': [dataset] * len(['Overall'] + list(tags.keys())), 231 | 'Level': ['Overall'] + list(tags.keys()),} 232 | Y_hat_quantiles = Y_hat_df.drop(columns=['ds', 'AutoARIMA']) 233 | y_hat_quantiles_np = Y_hat_quantiles.values.reshape(n_series, horizon, len(QUANTILES)) 234 | y_hat_np = Y_hat_df['AutoARIMA'].values.reshape(n_series, -1) 235 | 236 | crps_results['AutoARIMA'] = HierarchicalDataset._get_hierarchical_scrps( 237 | Y=y_test, 238 | Yq_hat=y_hat_quantiles_np, 239 | q_to_pred=QUANTILES, 240 | hier_idxs=data['hier_idxs']) 241 | relmse_results['AutoARIMA'] = HierarchicalDataset._get_hierarchical_rel_mse( 242 | Y=y_test, 243 | Y_hat=y_hat_np, 244 | Y_train=y_train, 245 | hier_idxs=data['hier_idxs']) 246 | msse_results['AutoARIMA'] = HierarchicalDataset._get_hierarchical_msse( 247 | Y=y_test, 248 | Y_hat=y_hat_np, 249 | Y_train=y_train, 250 | hier_idxs=data['hier_idxs']) 251 | 252 | with CodeTimer('Evaluate Models CRPS ', verbose): 253 | for model in hrec.level_names.keys(): 254 | crps_results[model] = [] 255 | for level in crps_results['Level']: 256 | if level=='Overall': 257 | row_idxs = np.arange(len(S_df)) 258 | else: 259 | row_idxs = S_df.index.get_indexer(tags[level]) 260 | col_idxs = model_columns.get_indexer(hrec.level_names[model]) 261 | _y = y_test[row_idxs,:] 262 | _y_rec_seeds = y_rec[:,row_idxs,:,:][:,:,:,col_idxs] 263 | 264 | level_model_crps =[] 265 | for seed_idx in range(y_rec.shape[0]): 266 | _y_rec = _y_rec_seeds[seed_idx,:,:,:] 267 | level_model_crps.append(scaled_crps(y=_y, y_hat=_y_rec, 268 | quantiles=QUANTILES)) 269 | level_model_crps = f'{np.mean(level_model_crps):.4f}±{(1.96 * np.std(level_model_crps)):.4f}' 270 | crps_results[model].append(level_model_crps) 271 | 272 | crps_results = pd.DataFrame(crps_results) 273 | 274 | with CodeTimer('Evaluate Models relMSE', verbose): 275 | for model in hrec.level_names.keys(): 276 | relmse_results[model] = [] 277 | for level in relmse_results['Level']: 278 | if level=='Overall': 279 | row_idxs = np.arange(len(S_df)) 280 | else: 281 | row_idxs = S_df.index.get_indexer(tags[level]) 282 | col_idx = model_columns.get_loc(model) 283 | _y = y_test[row_idxs,:] 284 | _y_train = y_train[row_idxs,:] 285 | _y_hat_seeds = y_rec[:,row_idxs,:,:][:,:,:,col_idx] 286 | 287 | level_model_relmse = [] 288 | for seed_idx in range(y_rec.shape[0]): 289 | _y_hat = _y_hat_seeds[seed_idx,:,:] 290 | level_model_relmse.append(rel_mse(y=_y, y_hat=_y_hat, y_train=_y_train)) 291 | level_model_relmse = f'{np.mean(level_model_relmse):.4f}' 292 | relmse_results[model].append(level_model_relmse) 293 | 294 | relmse_results = pd.DataFrame(relmse_results) 295 | 296 | with CodeTimer('Evaluate Models MSSE ', verbose): 297 | for model in hrec.level_names.keys(): 298 | msse_results[model] = [] 299 | for level in msse_results['Level']: 300 | if level=='Overall': 301 | row_idxs = np.arange(len(S_df)) 302 | else: 303 | row_idxs = S_df.index.get_indexer(tags[level]) 304 | col_idx = model_columns.get_loc(model) 305 | _y = y_test[row_idxs,:] 306 | _y_train = y_train[row_idxs,:] 307 | _y_hat_seeds = y_rec[:,row_idxs,:,:][:,:,:,col_idx] 308 | 309 | level_model_msse = [] 310 | for seed_idx in range(y_rec.shape[0]): 311 | _y_hat = _y_hat_seeds[seed_idx,:,:] 312 | level_model_msse.append(msse(y=_y, y_hat=_y_hat, y_train=_y_train)) 313 | level_model_msse = f'{np.mean(level_model_msse):.4f}' 314 | msse_results[model].append(level_model_msse) 315 | 316 | msse_results = pd.DataFrame(msse_results) 317 | 318 | return crps_results, relmse_results, msse_results 319 | 320 | if __name__ == '__main__': 321 | 322 | # Parse execution parameters 323 | verbose = True 324 | parser = argparse.ArgumentParser() 325 | parser.add_argument("-intervals_method", "--intervals_method", type=str) 326 | parser.add_argument("-dataset", "--dataset", type=str) 327 | 328 | args = parser.parse_args() 329 | intervals_method = args.intervals_method 330 | dataset = args.dataset 331 | 332 | assert intervals_method in ['bootstrap', 'normality', 'permbu'], \ 333 | "Select `--intervals_method` from ['bootstrap', 'normality', 'permbu']" 334 | 335 | available_datasets = ['Labour', 'Traffic', 'OldTraffic', 336 | 'TourismSmall', 'TourismLarge', 'OldTourismLarge', 'Wikitwo'] 337 | assert dataset in available_datasets, \ 338 | "Select `--dataset` from ['Labour', 'Traffic', 'OldTraffic', \ 339 | 'TourismSmall', 'TourismLarge', 'OldTourismLarge', 'Wikitwo']" 340 | 341 | print(f'\n {intervals_method.upper()} {dataset} statistical baselines evaluation \n') 342 | 343 | LEVEL = np.arange(0, 100, 2) 344 | qs = [[50-lv/2, 50+lv/2] for lv in LEVEL] 345 | QUANTILES = np.sort(np.concatenate(qs)/100) 346 | 347 | # Run experiments 348 | crps_results_list = [] 349 | msse_results_list = [] 350 | relmse_results_list = [] 351 | 352 | try: # Hacky protection for non strictly hierarchical datasets 353 | crps_results, relmse_results, msse_results = run_baselines(dataset=dataset, 354 | intervals_method=intervals_method, verbose=verbose) 355 | crps_results_list.append(crps_results) 356 | msse_results_list.append(msse_results) 357 | relmse_results_list.append(relmse_results) 358 | except Exception as e: 359 | print('failed ', dataset) 360 | print(str(e)) 361 | print('\n\n') 362 | 363 | crps_results_df = pd.concat(crps_results_list) 364 | msse_results_df = pd.concat(msse_results_list) 365 | relmse_results_df = pd.concat(relmse_results_list) 366 | 367 | crps_results_df.to_csv(f'./data/{intervals_method}_crps.csv', index=False) 368 | msse_results_df.to_csv(f'./data/{intervals_method}_msse.csv', index=False) 369 | relmse_results_df.to_csv(f'./data/{intervals_method}_relmse.csv', index=False) 370 | 371 | print('='*(200+24)) 372 | print(f'{intervals_method} sCRPS:') 373 | print(crps_results_df) 374 | 375 | print('\n\n'+'='*(200+24)) 376 | print(f'{intervals_method} relMSE:') 377 | print(relmse_results_df) 378 | 379 | print('\n\n'+'='*(200+24)) 380 | print(f'{intervals_method} MSSE:') 381 | print(msse_results_df) 382 | -------------------------------------------------------------------------------- /experiments/hierarchical_baselines/src/run_favorita_baselines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/hierarchicalforecast/162e42f4143201d44fdd0480306dd4cc19776038/experiments/hierarchical_baselines/src/run_favorita_baselines.py -------------------------------------------------------------------------------- /experiments/libs-comparison/README.md: -------------------------------------------------------------------------------- 1 | # Hierarchical Methods Comparison 2 | 3 | This experiment aims to empirically validate the results presented in other great implementations of hierarchical reconciliation methods for Python and R. We use the ETS model in the following datasets, highly inspired by [Rob Hyndman and George Athanasopoulos's work](https://otexts.com/fpp3/hierarchical.html). 4 | 5 | ## Main results 6 | 7 | To perform the experiments, we used the TourismSmall, Labour, and Wiki2 datasets, widely used for hierarchical reconciliation research. For TourismSmall and Labour, we used the last eight observations as a test and the last 12 observations for Wiki2. 8 | 9 | ### Performance (RMSSE) 10 | 11 | ![image](./results.png) 12 | 13 | Notes: 14 | - [fable](https://github.com/tidyverts/fable) also contains `TopDown` and `MiddleOut` methods, but they rise an error. A [Pull Request](https://github.com/tidyverts/fabletools/pull/362) was opened to fix the issue. 15 | - The `RMSSE` (root mean squared scaled error) was calculated against a naive model. 16 | 17 | **Disclaimer:** 18 | 19 | It was pointed out by sktime contributors that the difference between fable and sktime in our experiments, in terms of forecast accuracy, are due to [problems](https://github.com/sktime/sktime/issues/3162) with the base forecaster in sktime. Using a simple AR models ensures replicability between fable and sktime as seen [here](https://github.com/ciaran-g/hierarchical-fc-comparison). 20 | 21 | ### Time (seconds) 22 | 23 | | Dataset | statsforecast | fable | sktime | 24 | |:-------------|----------------:|--------:|---------:| 25 | | Labour | 1.982 | 11.233 | 44.368 | 26 | | TourismSmall | 0.627 | 7.61 | 19.120 | 27 | | Wiki2 | 1.822 | 47.626 | 119.941 | 28 | 29 | Notes: 30 | - Fitting time for base forecasts. 31 | 32 | ## Reproducibility 33 | 34 | To reproduce the main results you have: 35 | 36 | 1. Execute `conda env create -f environment.yml`. 37 | 2. Activate the environment using `conda activate hts-comparison`. 38 | 3. Run the experiments using `python -m src.[lib] --group [group]` where `[lib]` can be `statsforecast` or `sktime`, and `[group]` can be `Labour`, `Wiki2`, and `TourismSmall`. 39 | 4. To run R experiments you have to prepare the data using `python -m src.data --group [group]` for each `[group]`. Once it is done, just run `Rscript src/fable.R [group]`. 40 | 5. To parse the results, use `nbs/parse-results.ipynb`. 41 | 42 | The results were obtained using a `c5d.24xlarge` AWS instance. 43 | 44 | ## ToDo 45 | 46 | - Run comparisons using the `auto_arima` model. 47 | - Use same base forecasts (obtained with the same library/implementation) with different implementations of the reconciled methods. 48 | - Include `Traffic` Dataset 49 | 50 | -------------------------------------------------------------------------------- /experiments/libs-comparison/environment.yml: -------------------------------------------------------------------------------- 1 | name: hts-comparison 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.7 6 | - pip==20.3.3 7 | - fire 8 | - jupyterlab 9 | - r-base==3.6.3 10 | - r-urca 11 | - r-tidyverse 12 | - r-future 13 | - r-future.apply 14 | - r-forecast 15 | - r-furrr 16 | - r-fable 17 | - r-tsibble 18 | - r-tsibbledata 19 | - pip: 20 | - pyarrow 21 | - statsforecast 22 | - hierarchicalforecast 23 | - datasetsforecast 24 | - sktime 25 | - tabulate 26 | -------------------------------------------------------------------------------- /experiments/libs-comparison/results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/hierarchicalforecast/162e42f4143201d44fdd0480306dd4cc19776038/experiments/libs-comparison/results.png -------------------------------------------------------------------------------- /experiments/libs-comparison/src/data.py: -------------------------------------------------------------------------------- 1 | import fire 2 | from datasetsforecast.hierarchical import HierarchicalData 3 | 4 | 5 | def hierarchical_cols(group: str): 6 | if group == 'Wiki2': 7 | return ['Country', 'Access', 'Agent', 'Topic'], ['Country', 'Access', 'Agent', 'Topic'], '_' 8 | elif group == 'Labour': 9 | return ['Employment', 'Gender', 'Region'], ['Region', 'Employment', 'Gender'], ',' 10 | elif group == 'TourismSmall': 11 | return ['State', 'Purpose', 'CityNonCity'], ['Purpose', 'State', 'CityNonCity'], '-' 12 | raise Exception(f'Unknown group {group}') 13 | 14 | def parse_data(group: str): 15 | #Get bottom time series to use in R 16 | init_cols, hier_cols, sep = hierarchical_cols(group) 17 | Y_df, S, tags = HierarchicalData.load('data', group) 18 | Y_df = Y_df.query('unique_id in @S.columns') 19 | Y_df[init_cols] = Y_df['unique_id'].str.split(sep, expand=True) 20 | Y_df = Y_df[init_cols + ['ds', 'y']] 21 | Y_df = Y_df.groupby(init_cols + ['ds']).sum().reset_index() 22 | Y_df.to_csv(f'data/{group}.csv', index=False) 23 | 24 | 25 | if __name__=="__main__": 26 | fire.Fire(parse_data) 27 | 28 | -------------------------------------------------------------------------------- /experiments/libs-comparison/src/fable.R: -------------------------------------------------------------------------------- 1 | library(fable) 2 | library(tsibble) 3 | library(tsibbledata) 4 | library(lubridate) 5 | library(dplyr) 6 | library(readr) 7 | library(future) 8 | library(stringr) 9 | 10 | args <- commandArgs(trailingOnly=TRUE) 11 | meta <- list( 12 | TourismSmall=list(ds_fn=yearquarter, cutoff=yearquarter('2004-12-31'), 13 | key=c("Purpose", "State", "CityNonCity")), 14 | Labour=list(ds_fn=yearmonth, cutoff=yearmonth('2019-04-01'), 15 | key=c('Region', 'Employment', 'Gender')), 16 | Wiki2=list(ds_fn=ymd, cutoff=ymd('2016-12-17'), 17 | key=c('Country', 'Access', 'Agent', 'Topic')) 18 | ) 19 | group <- args[1] 20 | ds_fn <- meta[[group]][['ds_fn']] 21 | cutoff <- meta[[group]][['cutoff']] 22 | key <- meta[[group]][['key']] 23 | 24 | plan(multiprocess, gc=TRUE) 25 | 26 | 27 | Y_df <- read_csv(str_glue('./data/{group}.csv')) %>% 28 | mutate(ds = ds_fn(ds)) %>% 29 | as_tsibble( 30 | index = ds, 31 | key = key, 32 | ) 33 | if(group == 'TourismSmall'){ 34 | Y_df <- aggregate_key(Y_df, Purpose / State / CityNonCity, y = sum(y)) 35 | } else if (group == 'Labour') { 36 | Y_df <- aggregate_key(Y_df, Region / Employment / Gender, y = sum(y)) 37 | } else if (group == 'Wiki2') { 38 | Y_df <- aggregate_key(Y_df, Country / Access / Agent / Topic, y = sum(y)) 39 | } 40 | 41 | #split train/test sets 42 | Y_df_train <- Y_df %>% 43 | filter(ds <= cutoff) 44 | Y_df_test <- Y_df %>% 45 | filter(ds > cutoff) 46 | 47 | #forecaster 48 | start <- Sys.time() 49 | ets_fit <- Y_df_train %>% 50 | model(ets = ETS(y), naive = NAIVE(y)) 51 | end <- Sys.time() 52 | 53 | ets_fit <- ets_fit %>% 54 | reconcile( 55 | bu = bottom_up(ets), 56 | ols = min_trace(ets, method='ols'), 57 | wls_struct = min_trace(ets, method='wls_struct'), 58 | wls_var = min_trace(ets, method='wls_var'), 59 | mint_shrink = min_trace(ets, method='mint_shrink'), 60 | ) 61 | fc <- ets_fit %>% 62 | forecast(Y_df_test) 63 | 64 | fc <- fc %>% 65 | as_tibble() %>% 66 | select(-y) %>% 67 | left_join(Y_df_test, by=c(key, 'ds')) 68 | 69 | errors <- fc %>% 70 | mutate(error = (y - .mean) ** 2) %>% 71 | group_by_at(c(key, '.model')) %>% 72 | summarise(rmse = sqrt(mean(error))) %>% 73 | ungroup() 74 | 75 | naive_errors <- errors %>% 76 | filter(.model == 'naive') %>% 77 | select(-.model) %>% 78 | rename(naive_rmse = rmse) 79 | 80 | errors <- errors %>% 81 | filter(.model != 'naive') %>% 82 | left_join(naive_errors, by=key) %>% 83 | group_by(.model) %>% 84 | summarise(rmsse = mean(rmse / naive_rmse)) 85 | 86 | write_csv(errors, 87 | str_glue('./results/{group}/fable.csv')) 88 | tibble(group = group, 89 | time = difftime(end, start, units='secs')) %>% 90 | write_csv(str_glue('./results/{group}/fable-time.csv')) 91 | 92 | 93 | -------------------------------------------------------------------------------- /experiments/libs-comparison/src/sktime.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from time import time 3 | 4 | import fire 5 | import numpy as np 6 | import pandas as pd 7 | from datasetsforecast.hierarchical import HierarchicalData, HierarchicalInfo 8 | from sktime.forecasting.ets import AutoETS 9 | from sktime.forecasting.reconcile import ReconcilerForecaster 10 | from sktime.transformations.hierarchical.aggregate import Aggregator 11 | from sktime.transformations.hierarchical.reconcile import Reconciler 12 | 13 | 14 | def rmsse(y, y_hat, y_insample): 15 | errors = np.mean((y - y_hat) ** 2, axis=1) 16 | scale = np.mean((y - y_insample[:, [-1]]) ** 2, axis=1) 17 | return np.mean(np.sqrt(errors) / np.sqrt(scale)) 18 | 19 | def hierarchical_cols(group: str): 20 | if group == 'Wiki2': 21 | return ['Country', 'Access', 'Agent', 'Topic'], ['Country', 'Access', 'Agent', 'Topic'], '_' 22 | elif group == 'Labour': 23 | return ['Employment', 'Gender', 'Region'], ['Region', 'Employment', 'Gender'], ',' 24 | elif group == 'TourismSmall': 25 | return ['State', 'Purpose', 'CityNonCity'], ['Purpose', 'State', 'CityNonCity'], '-' 26 | raise Exception(f'Unknown group {group}') 27 | 28 | def pipeline(group: str): 29 | results_group_dir = Path(f'./results/{group}') 30 | results_group_dir.mkdir(exist_ok=True, parents=True) 31 | init_cols, hier_cols, sep = hierarchical_cols(group) 32 | Y_df, S, tags = HierarchicalData.load('data', group) 33 | n_series = Y_df['unique_id'].nunique() 34 | meta_info_group = HierarchicalInfo[group] 35 | h = meta_info_group.horizon 36 | freq = meta_info_group.freq 37 | sp = meta_info_group.seasonality 38 | #Get only bottom series 39 | #to contruct full dataset using sktime 40 | Y_df = Y_df.query('unique_id in @S.columns') 41 | Y_df[init_cols] = Y_df['unique_id'].str.split(sep, expand=True) 42 | if group == 'Labour': 43 | freq = 'M' 44 | Y_df['ds'] = pd.PeriodIndex(Y_df['ds'], freq=freq) 45 | Y_df = Y_df.set_index(hier_cols+['ds'])[['y']] 46 | #Aggregation 47 | agg = Aggregator(flatten_single_levels=False) 48 | Y_df = agg.fit_transform(Y_df) 49 | Y_df = Y_df.reset_index() 50 | n_agg_series = len(Y_df[hier_cols].drop_duplicates()) 51 | if n_agg_series != n_series: 52 | raise Exception('mismatch n_series original and sktime') 53 | #split train/test sets 54 | Y_df_test = Y_df.groupby(hier_cols).tail(h) 55 | Y_df_train = Y_df.drop(Y_df_test.index) 56 | Y_df_test = Y_df_test.set_index(hier_cols+['ds']) 57 | Y_df_train = Y_df_train.set_index(hier_cols+['ds']) 58 | #forecaster 59 | seasonal = 'Additive' if group == 'Wiki2' else None #prevent negative and zero values 60 | init_time = time() 61 | forecaster = AutoETS(auto=True, sp=sp, seasonal=seasonal, n_jobs=-1) 62 | forecaster.fit(Y_df_train) 63 | prds = forecaster.predict(fh=np.arange(1, h + 1)).rename(columns={'y': 'base'}) 64 | fcst_time = time() - init_time 65 | #reconciliation methods 66 | methods = ['bu', 'ols', 'wls_str', 'td_fcst'] 67 | for method in methods: 68 | reconciler = Reconciler(method=method) 69 | prds_recon = reconciler.fit_transform(prds[['base']]).rename(columns={'base': method}) 70 | prds = prds.merge(prds_recon, how='left', left_index=True, right_index=True) 71 | #methods based on residuals 72 | methods_res = ['wls_var', 'mint_shrink'] 73 | for method in methods_res: 74 | reconciler = ReconcilerForecaster(forecaster, method=method) 75 | reconciler.fit(Y_df_train) 76 | prds_recon = reconciler.predict(fh=np.arange(1, h + 1)).rename(columns={'y': method}) 77 | prds = prds.merge(prds_recon, how='left', left_index=True, right_index=True) 78 | #adding y_test for evaluation 79 | prds = prds.merge(Y_df_test, how='left', left_index=True, right_index=True) 80 | #evaluation 81 | y_test = prds['y'].values.reshape(-1, h) 82 | y_insample = Y_df_train['y'].values.reshape(n_series, -1) 83 | evals = {} 84 | for method in ['base'] + methods + methods_res: 85 | y_hat = prds[method].values.reshape(-1, h) 86 | evals[method] = rmsse(y_test, y_hat, y_insample) 87 | evals = pd.DataFrame(evals, index=[group]) 88 | fcst_time = pd.DataFrame({'group': group, 'time': fcst_time}, index=[0]) 89 | evals.to_csv(results_group_dir / 'sktime.csv', index=False) 90 | fcst_time.to_csv(results_group_dir / 'sktime-time.csv', index=False) 91 | 92 | 93 | if __name__=="__main__": 94 | fire.Fire(pipeline) 95 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /experiments/libs-comparison/src/statsforecast.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | from time import time 4 | os.environ['NUMBA_RELEASE_GIL'] = 'True' 5 | os.environ['NUMBA_CACHE'] = 'True' 6 | 7 | import fire 8 | import numpy as np 9 | import pandas as pd 10 | from datasetsforecast.hierarchical import HierarchicalData, HierarchicalInfo 11 | from hierarchicalforecast.core import HierarchicalReconciliation 12 | from hierarchicalforecast.evaluation import HierarchicalEvaluation 13 | from hierarchicalforecast.methods import ( 14 | BottomUp, TopDown, MiddleOut, 15 | MinTrace, ERM 16 | ) 17 | from statsforecast.core import StatsForecast 18 | from statsforecast.models import ets 19 | from statsforecast.utils import AirPassengers as ap 20 | 21 | 22 | def rmsse(y, y_hat, y_insample): 23 | errors = np.mean((y - y_hat) ** 2, axis=1) 24 | scale = np.mean((y - y_insample[:, [-1]]) ** 2, axis=1) 25 | return np.mean(np.sqrt(errors) / np.sqrt(scale)) 26 | 27 | def get_str_model(group: str): 28 | if group == 'Wiki2': 29 | #avoid issues with seasonal models 30 | #due to negative and zero values 31 | return 'ZZA' 32 | return 'ZZZ' 33 | 34 | def get_ERM_lambda(group: str): 35 | if group == 'Wiki2': 36 | return 1e6 37 | elif group == 'TourismSmall': 38 | return 2e6 39 | elif group == 'Labour': 40 | return 100 41 | raise Exception(f'Unkwon group {group}') 42 | 43 | def pipeline(group: str): 44 | results_group_dir = Path(f'./results/{group}') 45 | results_group_dir.mkdir(exist_ok=True, parents=True) 46 | Y_df, S, tags = HierarchicalData.load('data', group) 47 | Y_df['ds'] = pd.to_datetime(Y_df['ds']) 48 | n_series = Y_df['unique_id'].nunique() 49 | meta_info_group = HierarchicalInfo[group] 50 | h = meta_info_group.horizon 51 | freq = meta_info_group.freq 52 | sp = meta_info_group.seasonality 53 | #split train/test sets 54 | Y_df_test = Y_df.groupby(['unique_id']).tail(h) 55 | Y_df_train = Y_df.drop(Y_df_test.index) 56 | Y_df_test = Y_df_test.set_index('unique_id') 57 | Y_df_train = Y_df_train.set_index('unique_id') 58 | #forecaster 59 | str_model = get_str_model(group) 60 | init_time = time() 61 | forecaster = StatsForecast( 62 | df=Y_df_train, 63 | models=[(ets, sp, str_model)], 64 | freq=freq, 65 | n_jobs=-1, 66 | ) 67 | Y_df_hat = forecaster.forecast(h, fitted=True) 68 | Y_df_fitted = forecaster.forecast_fitted_values() 69 | fcst_time = time() - init_time 70 | #reconciliation methods 71 | methods = [ 72 | BottomUp(), 73 | TopDown(method='forecast_proportions'), 74 | TopDown(method='average_proportions'), 75 | TopDown(method='proportion_averages'), 76 | *[ 77 | MiddleOut(level=name, top_down_method='forecast_proportions') \ 78 | for name in list(tags.keys())[1:-1] 79 | ], 80 | *[ 81 | MiddleOut(level=name, top_down_method='average_proportions') \ 82 | for name in list(tags.keys())[1:-1] 83 | ], 84 | *[ 85 | MiddleOut(level=name, top_down_method='proportion_averages') \ 86 | for name in list(tags.keys())[1:-1] 87 | ], 88 | MinTrace(method='ols'), 89 | MinTrace(method='wls_struct'), 90 | MinTrace(method='wls_var'), 91 | MinTrace(method='mint_shrink'), 92 | ERM(method='closed'), 93 | ERM(method='reg', lambda_reg=get_ERM_lambda(group)), 94 | ERM(method='reg_bu', lambda_reg=get_ERM_lambda(group)), 95 | ] 96 | hrec = HierarchicalReconciliation(reconcilers=methods) 97 | Y_df_hat_rec = hrec.reconcile( 98 | Y_df_hat, 99 | Y_df_fitted, 100 | S, 101 | tags 102 | ) 103 | eval_tags = {'All': np.concatenate(list(tags.values()))} 104 | evaluator = HierarchicalEvaluation(evaluators=[rmsse]) 105 | evals = evaluator.evaluate( 106 | Y_df_hat_rec, 107 | Y_df_test, 108 | eval_tags, 109 | Y_df=Y_df_train 110 | ) 111 | evals = evals.loc['All'].reset_index() 112 | evals = pd.melt( 113 | evals, 114 | value_vars=evals.columns.to_list(), 115 | var_name='model', 116 | value_name=group, 117 | ) 118 | evals[['model', 'rec_method']] = evals['model'].str.split('/', expand=True, n=1) 119 | evals['rec_method'] = evals['rec_method'].fillna('Base') 120 | evals = evals.drop(0) 121 | evals = evals[['rec_method', group]] 122 | fcst_time = pd.DataFrame({'group': group, 'time': fcst_time}, index=[0]) 123 | evals.to_csv(results_group_dir / 'statsforecast.csv', index=False) 124 | fcst_time.to_csv(results_group_dir / 'statsforecast-time.csv', index=False) 125 | 126 | 127 | if __name__=="__main__": 128 | ets(ap.astype(np.float32), 12, season_length=12) 129 | fire.Fire(pipeline) 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /hierarchicalforecast/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "1.2.1" 2 | -------------------------------------------------------------------------------- /hierarchicalforecast/_nbdev.py: -------------------------------------------------------------------------------- 1 | # AUTOGENERATED BY NBDEV! DO NOT EDIT! 2 | 3 | __all__ = ["index", "modules", "custom_doc_links", "git_url"] 4 | 5 | index = {"HierarchicalReconciliation": "core.ipynb", 6 | "HierarchicalEvaluation": "evaluation.ipynb", 7 | "bottom_up": "methods.ipynb", 8 | "BottomUp": "methods.ipynb", 9 | "is_strictly_hierarchical": "methods.ipynb", 10 | "top_down": "methods.ipynb", 11 | "TopDown": "methods.ipynb", 12 | "middle_out": "methods.ipynb", 13 | "MiddleOut": "methods.ipynb", 14 | "crossprod": "methods.ipynb", 15 | "min_trace": "methods.ipynb", 16 | "MinTrace": "methods.ipynb", 17 | "optimal_combination": "methods.ipynb", 18 | "OptimalCombination": "methods.ipynb", 19 | "lasso": "methods.ipynb", 20 | "erm": "methods.ipynb", 21 | "ERM": "methods.ipynb", 22 | "aggregate": "utils.ipynb"} 23 | 24 | modules = ["core.py", 25 | "evaluation.py", 26 | "methods.py", 27 | "utils.py"] 28 | 29 | doc_url = "https://Nixtla.github.io/hierarchicalforecast/" 30 | 31 | git_url = "https://github.com/Nixtla/hierarchicalforecast/tree/main/" 32 | 33 | def custom_doc_links(name): return None 34 | -------------------------------------------------------------------------------- /hierarchicalforecast/probabilistic_methods.py: -------------------------------------------------------------------------------- 1 | # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/src/probabilistic_methods.ipynb. 2 | 3 | # %% auto 0 4 | __all__ = ['Normality'] 5 | 6 | # %% ../nbs/src/probabilistic_methods.ipynb 3 7 | import warnings 8 | from typing import Optional 9 | 10 | import numpy as np 11 | from scipy.stats import norm 12 | from sklearn.preprocessing import OneHotEncoder 13 | 14 | from .utils import is_strictly_hierarchical 15 | 16 | # %% ../nbs/src/probabilistic_methods.ipynb 6 17 | class Normality: 18 | """Normality Probabilistic Reconciliation Class. 19 | 20 | The Normality method leverages the Gaussian Distribution linearity, to 21 | generate hierarchically coherent prediction distributions. This class is 22 | meant to be used as the `sampler` input as other `HierarchicalForecast` [reconciliation classes](https://nixtla.github.io/hierarchicalforecast/methods.html). 23 | 24 | Given base forecasts under a normal distribution: 25 | $$\hat{y}_{h} \sim \mathrm{N}(\hat{\\boldsymbol{\\mu}}, \hat{\mathbf{W}}_{h})$$ 26 | 27 | The reconciled forecasts are also normally distributed: 28 | 29 | $$ 30 | \\tilde{y}_{h} \sim \mathrm{N}(\mathbf{S}\mathbf{P}\hat{\\boldsymbol{\\mu}}, 31 | \mathbf{S}\mathbf{P}\hat{\mathbf{W}}_{h} \mathbf{P}^{\intercal} \mathbf{S}^{\intercal}) 32 | $$ 33 | 34 | **Parameters:**
35 | `S`: np.array, summing matrix of size (`base`, `bottom`).
36 | `P`: np.array, reconciliation matrix of size (`bottom`, `base`).
37 | `y_hat`: Point forecasts values of size (`base`, `horizon`).
38 | `W`: np.array, hierarchical covariance matrix of size (`base`, `base`).
39 | `sigmah`: np.array, forecast standard dev. of size (`base`, `horizon`).
40 | `num_samples`: int, number of bootstraped samples generated.
41 | `seed`: int, random seed for numpy generator's replicability.
42 | 43 | **References:**
44 | - [Panagiotelis A., Gamakumara P. Athanasopoulos G., and Hyndman R. J. (2022). 45 | "Probabilistic forecast reconciliation: Properties, evaluation and score optimisation". European Journal of Operational Research.](https://www.sciencedirect.com/science/article/pii/S0377221722006087) 46 | """ 47 | 48 | def __init__( 49 | self, 50 | S: np.ndarray, 51 | P: np.ndarray, 52 | y_hat: np.ndarray, 53 | sigmah: np.ndarray, 54 | W: np.ndarray, 55 | seed: int = 0, 56 | ): 57 | self.S = S 58 | self.P = P 59 | self.y_hat = y_hat 60 | self.SP = self.S @ self.P 61 | self.W = W 62 | self.sigmah = sigmah 63 | self.seed = seed 64 | 65 | # Base Normality Errors assume independence/diagonal covariance 66 | # TODO: replace bilinearity with elementwise row multiplication 67 | std_ = np.sqrt(self.W.diagonal()) 68 | R1 = self.W / np.outer(std_, std_) 69 | Wh = [np.diag(sigma) @ R1 @ np.diag(sigma).T for sigma in self.sigmah.T] 70 | 71 | # Reconciled covariances across forecast horizon 72 | self.cov_rec = [(self.SP @ W @ self.SP.T) for W in Wh] 73 | self.sigmah_rec = np.hstack( 74 | [np.sqrt(cov.diagonal())[:, None] for cov in self.cov_rec] 75 | ) 76 | 77 | def get_samples(self, num_samples: int): 78 | """Normality Coherent Samples. 79 | 80 | Obtains coherent samples under the Normality assumptions. 81 | 82 | **Parameters:**
83 | `num_samples`: int, number of samples generated from coherent distribution.
84 | 85 | **Returns:**
86 | `samples`: Coherent samples of size (`base`, `horizon`, `num_samples`). 87 | """ 88 | rng = np.random.default_rng(self.seed) 89 | n_series, n_horizon = self.y_hat.shape 90 | samples = np.empty(shape=(num_samples, n_series, n_horizon)) 91 | for t in range(n_horizon): 92 | with warnings.catch_warnings(): 93 | # Avoid 'RuntimeWarning: covariance is not positive-semidefinite.' 94 | # By definition the multivariate distribution is not full-rank 95 | partial_samples = rng.multivariate_normal( 96 | mean=self.SP @ self.y_hat[:, t], 97 | cov=self.cov_rec[t], 98 | size=num_samples, 99 | ) 100 | samples[:, :, t] = partial_samples 101 | 102 | # [samples, N, H] -> [N, H, samples] 103 | samples = samples.transpose((1, 2, 0)) 104 | return samples 105 | 106 | def get_prediction_levels(self, res, level): 107 | """Adds reconciled forecast levels to results dictionary""" 108 | res["sigmah"] = self.sigmah_rec 109 | level = np.asarray(level) 110 | z = norm.ppf(0.5 + level / 200) 111 | for zs, lv in zip(z, level): 112 | res[f"lo-{lv}"] = res["mean"] - zs * self.sigmah_rec 113 | res[f"hi-{lv}"] = res["mean"] + zs * self.sigmah_rec 114 | return res 115 | 116 | def get_prediction_quantiles(self, res, quantiles): 117 | """Adds reconciled forecast quantiles to results dictionary""" 118 | # [N,H,None] + [None None,Q] * [N,H,None] -> [N,H,Q] 119 | z = norm.ppf(quantiles) 120 | res["sigmah"] = self.sigmah_rec 121 | res["quantiles"] = ( 122 | res["mean"][:, :, None] + z[None, None, :] * self.sigmah_rec[:, :, None] 123 | ) 124 | return res 125 | 126 | # %% ../nbs/src/probabilistic_methods.ipynb 10 127 | class Bootstrap: 128 | """Bootstrap Probabilistic Reconciliation Class. 129 | 130 | This method goes beyond the normality assumption for the base forecasts, 131 | the technique simulates future sample paths and uses them to generate 132 | base sample paths that are latered reconciled. This clever idea and its 133 | simplicity allows to generate coherent bootstraped prediction intervals 134 | for any reconciliation strategy. This class is meant to be used as the `sampler` 135 | input as other `HierarchicalForecast` [reconciliation classes](https://nixtla.github.io/hierarchicalforecast/methods.html). 136 | 137 | Given a boostraped set of simulated sample paths: 138 | $$(\hat{\mathbf{y}}^{[1]}_{\\tau}, \dots ,\hat{\mathbf{y}}^{[B]}_{\\tau})$$ 139 | 140 | The reconciled sample paths allow for reconciled distributional forecasts: 141 | $$(\mathbf{S}\mathbf{P}\hat{\mathbf{y}}^{[1]}_{\\tau}, \dots ,\mathbf{S}\mathbf{P}\hat{\mathbf{y}}^{[B]}_{\\tau})$$ 142 | 143 | **Parameters:**
144 | `S`: np.array, summing matrix of size (`base`, `bottom`).
145 | `P`: np.array, reconciliation matrix of size (`bottom`, `base`).
146 | `y_hat`: Point forecasts values of size (`base`, `horizon`).
147 | `y_insample`: Insample values of size (`base`, `insample_size`).
148 | `y_hat_insample`: Insample point forecasts of size (`base`, `insample_size`).
149 | `num_samples`: int, number of bootstraped samples generated.
150 | `seed`: int, random seed for numpy generator's replicability.
151 | 152 | **References:**
153 | - [Puwasala Gamakumara Ph. D. dissertation. Monash University, Econometrics and Business Statistics (2020). 154 | "Probabilistic Forecast Reconciliation"](https://bridges.monash.edu/articles/thesis/Probabilistic_Forecast_Reconciliation_Theory_and_Applications/11869533) 155 | - [Panagiotelis A., Gamakumara P. Athanasopoulos G., and Hyndman R. J. (2022). 156 | "Probabilistic forecast reconciliation: Properties, evaluation and score optimisation". European Journal of Operational Research.](https://www.sciencedirect.com/science/article/pii/S0377221722006087) 157 | """ 158 | 159 | def __init__( 160 | self, 161 | S: np.ndarray, 162 | P: np.ndarray, 163 | y_hat: np.ndarray, 164 | y_insample: np.ndarray, 165 | y_hat_insample: np.ndarray, 166 | num_samples: int = 100, 167 | seed: int = 0, 168 | W: np.ndarray = None, 169 | ): 170 | self.S = S 171 | self.P = P 172 | self.W = W 173 | self.y_hat = y_hat 174 | self.y_insample = y_insample 175 | self.y_hat_insample = y_hat_insample 176 | self.num_samples = num_samples 177 | self.seed = seed 178 | 179 | def get_samples(self, num_samples: int): 180 | """Bootstrap Sample Reconciliation Method. 181 | 182 | Applies Bootstrap sample reconciliation method as defined by Gamakumara 2020. 183 | Generating independent sample paths and reconciling them with Bootstrap. 184 | 185 | **Parameters:**
186 | `num_samples`: int, number of samples generated from coherent distribution.
187 | 188 | **Returns:**
189 | `samples`: Coherent samples of size (`base`, `horizon`, `num_samples`). 190 | """ 191 | residuals = self.y_insample - self.y_hat_insample 192 | h = self.y_hat.shape[1] 193 | 194 | # removing nas from residuals 195 | residuals = residuals[:, np.isnan(residuals).sum(axis=0) == 0] 196 | sample_idx = np.arange(residuals.shape[1] - h) 197 | rng = np.random.default_rng(self.seed) 198 | samples_idx = rng.choice(sample_idx, size=num_samples) 199 | samples = [self.y_hat + residuals[:, idx : (idx + h)] for idx in samples_idx] 200 | SP = self.S @ self.P 201 | samples = np.apply_along_axis( 202 | lambda path: np.matmul(SP, path), axis=1, arr=samples 203 | ) 204 | samples_np = np.stack(samples) 205 | 206 | # [samples, N, H] -> [N, H, samples] 207 | samples_np = samples_np.transpose((1, 2, 0)) 208 | return samples_np 209 | 210 | def get_prediction_levels(self, res, level): 211 | """Adds reconciled forecast levels to results dictionary""" 212 | samples = self.get_samples(num_samples=self.num_samples) 213 | for lv in level: 214 | min_q = (100 - lv) / 200 215 | max_q = min_q + lv / 100 216 | res[f"lo-{lv}"] = np.quantile(samples, min_q, axis=2) 217 | res[f"hi-{lv}"] = np.quantile(samples, max_q, axis=2) 218 | return res 219 | 220 | def get_prediction_quantiles(self, res, quantiles): 221 | """Adds reconciled forecast quantiles to results dictionary""" 222 | samples = self.get_samples(num_samples=self.num_samples) 223 | 224 | # [Q, N, H] -> [N, H, Q] 225 | sample_quantiles = np.quantile(samples, quantiles, axis=2) 226 | res["quantiles"] = sample_quantiles.transpose((1, 2, 0)) 227 | return res 228 | 229 | # %% ../nbs/src/probabilistic_methods.ipynb 14 230 | class PERMBU: 231 | """PERMBU Probabilistic Reconciliation Class. 232 | 233 | The PERMBU method leverages empirical bottom-level marginal distributions 234 | with empirical copula functions (describing bottom-level dependencies) to 235 | generate the distribution of aggregate-level distributions using BottomUp 236 | reconciliation. The sample reordering technique in the PERMBU method reinjects 237 | multivariate dependencies into independent bottom-level samples. 238 | 239 | Algorithm: 240 | 1. For all series compute conditional marginals distributions. 241 | 2. Compute residuals $\hat{\epsilon}_{i,t}$ and obtain rank permutations. 242 | 2. Obtain K-sample from the bottom-level series predictions. 243 | 3. Apply recursively through the hierarchical structure:
244 | 3.1. For a given aggregate series $i$ and its children series:
245 | 3.2. Obtain children's empirical joint using sample reordering copula.
246 | 3.2. From the children's joint obtain the aggregate series's samples. 247 | 248 | **Parameters:**
249 | `S`: np.array, summing matrix of size (`base`, `bottom`).
250 | `tags`: Each key is a level and each value its `S` indices.
251 | `y_insample`: Insample values of size (`base`, `insample_size`).
252 | `y_hat_insample`: Insample point forecasts of size (`base`, `insample_size`).
253 | `sigmah`: np.array, forecast standard dev. of size (`base`, `horizon`).
254 | `num_samples`: int, number of normal prediction samples generated.
255 | `seed`: int, random seed for numpy generator's replicability.
256 | 257 | **References:**
258 | - [Taieb, Souhaib Ben and Taylor, James W and Hyndman, Rob J. (2017). 259 | Coherent probabilistic forecasts for hierarchical time series. 260 | International conference on machine learning ICML.](https://proceedings.mlr.press/v70/taieb17a.html) 261 | """ 262 | 263 | def __init__( 264 | self, 265 | S: np.ndarray, 266 | tags: dict[str, np.ndarray], 267 | y_hat: np.ndarray, 268 | y_insample: np.ndarray, 269 | y_hat_insample: np.ndarray, 270 | sigmah: np.ndarray, 271 | num_samples: Optional[int] = None, 272 | seed: int = 0, 273 | P: np.ndarray = None, 274 | ): 275 | # PERMBU only works for strictly hierarchical structures 276 | if not is_strictly_hierarchical(S, tags): 277 | raise ValueError( 278 | "PERMBU probabilistic reconciliation requires strictly hierarchical structures." 279 | ) 280 | self.S = S 281 | self.P = P 282 | self.y_hat = y_hat 283 | self.y_insample = y_insample 284 | self.y_hat_insample = y_hat_insample 285 | self.sigmah = sigmah 286 | self.num_samples = num_samples 287 | self.seed = seed 288 | 289 | def _obtain_ranks(self, array): 290 | """Vector ranks 291 | 292 | Efficiently obtain vector ranks. 293 | Example `array=[4,2,7,1]` -> `ranks=[2, 1, 3, 0]`. 294 | 295 | **Parameters**
296 | `array`: np.array, matrix with floats or integers on which the 297 | ranks will be computed on the second dimension.
298 | 299 | **Returns**
300 | `ranks`: np.array, matrix with ranks along the second dimension.
301 | """ 302 | temp = array.argsort(axis=1) 303 | ranks = np.empty_like(temp) 304 | a_range = np.arange(temp.shape[1]) 305 | for i_row in range(temp.shape[0]): 306 | ranks[i_row, temp[i_row, :]] = a_range 307 | return ranks 308 | 309 | def _permutate_samples(self, samples, permutations): 310 | """Permutate Samples 311 | 312 | Applies efficient vectorized permutation on the samples. 313 | 314 | **Parameters**
315 | `samples`: np.array [series,samples], independent base samples.
316 | `permutations`: np.array [series,samples], permutation ranks with wich 317 | which `samples` dependence will be restored see `_obtain_ranks`.
318 | 319 | **Returns**
320 | `permutated_samples`: np.array.
321 | """ 322 | # Generate auxiliary and flat permutation indexes 323 | n_rows, n_cols = permutations.shape 324 | aux_row_idx = np.arange(n_rows)[:, None] * n_cols 325 | aux_row_idx = np.repeat(aux_row_idx, repeats=n_cols, axis=1) 326 | permutate_idxs = permutations.flatten() + aux_row_idx.flatten() 327 | 328 | # Apply flat permutation indexes and recover original shape 329 | permutated_samples = samples.flatten() 330 | permutated_samples = permutated_samples[permutate_idxs] 331 | permutated_samples = permutated_samples.reshape(n_rows, n_cols) 332 | return permutated_samples 333 | 334 | def _permutate_predictions(self, prediction_samples, permutations): 335 | """Permutate Prediction Samples 336 | 337 | Applies permutations to prediction_samples across the horizon. 338 | 339 | **Parameters**
340 | `prediction_samples`: np.array [series,horizon,samples], independent 341 | base prediction samples.
342 | `permutations`: np.array [series, samples], permutation ranks with which 343 | `samples` dependence will be restored see `_obtain_ranks`. 344 | it can also apply a random permutation.
345 | 346 | **Returns**
347 | `permutated_prediction_samples`: np.array.
348 | """ 349 | # Apply permutation throughout forecast horizon 350 | permutated_prediction_samples = prediction_samples.copy() 351 | 352 | _, n_horizon, _ = prediction_samples.shape 353 | for t in range(n_horizon): 354 | permutated_prediction_samples[:, t, :] = self._permutate_samples( 355 | prediction_samples[:, t, :], permutations 356 | ) 357 | return permutated_prediction_samples 358 | 359 | def _nonzero_indexes_by_row(self, M): 360 | return [np.nonzero(M[row, :])[0] for row in range(len(M))] 361 | 362 | def get_samples(self, num_samples: Optional[int] = None): 363 | """PERMBU Sample Reconciliation Method. 364 | 365 | Applies PERMBU reconciliation method as defined by Taieb et. al 2017. 366 | Generating independent base prediction samples, restoring its multivariate 367 | dependence using estimated copula with reordering and applying the BottomUp 368 | aggregation to the new samples. 369 | 370 | **Parameters:**
371 | `num_samples`: int, number of samples generated from coherent distribution.
372 | 373 | **Returns:**
374 | `samples`: Coherent samples of size (`base`, `horizon`, `num_samples`). 375 | """ 376 | # Compute residuals and rank permutations 377 | residuals = self.y_insample - self.y_hat_insample 378 | residuals = residuals[:, np.isnan(residuals).sum(axis=0) == 0] 379 | 380 | # Sample h step-ahead base marginal distributions 381 | if num_samples is None: 382 | num_samples = residuals.shape[1] 383 | 384 | # Expand residuals to match num_samples [(a,b),T] -> [(a,b),num_samples] 385 | rng = np.random.default_rng(self.seed) 386 | if num_samples > residuals.shape[1]: 387 | residuals_idxs = rng.choice(residuals.shape[1], size=num_samples) 388 | else: 389 | residuals_idxs = rng.choice( 390 | residuals.shape[1], size=num_samples, replace=False 391 | ) 392 | residuals = residuals[:, residuals_idxs] 393 | rank_permutations = self._obtain_ranks(residuals) 394 | 395 | n_series, n_horizon = self.y_hat.shape 396 | 397 | base_samples = np.array( 398 | [ 399 | rng.normal(loc=m, scale=s, size=num_samples) 400 | for m, s in zip(self.y_hat.flatten(), self.sigmah.flatten()) 401 | ] 402 | ) 403 | base_samples = base_samples.reshape(n_series, n_horizon, num_samples) 404 | 405 | # Initialize PERMBU utility 406 | rec_samples = base_samples.copy() 407 | try: 408 | encoder = OneHotEncoder(sparse_output=False, dtype=np.float64) 409 | except TypeError: 410 | encoder = OneHotEncoder(sparse=False, dtype=np.float64) 411 | hier_links = np.vstack(self._nonzero_indexes_by_row(self.S.T)) 412 | 413 | # BottomUp hierarchy traversing 414 | hier_levels = hier_links.shape[1] - 1 415 | for level_idx in reversed(range(hier_levels)): 416 | # Obtain aggregation matrix from parent/children links 417 | children_links = np.unique(hier_links[:, level_idx : level_idx + 2], axis=0) 418 | children_idxs = np.unique(children_links[:, 1]) 419 | parent_idxs = np.unique(children_links[:, 0]) 420 | Agg = encoder.fit_transform(children_links).T 421 | Agg = Agg[: len(parent_idxs), :] 422 | 423 | # Permute children_samples for each prediction step 424 | children_permutations = rank_permutations[children_idxs, :] 425 | children_samples = rec_samples[children_idxs, :, :] 426 | children_samples = self._permutate_predictions( 427 | prediction_samples=children_samples, permutations=children_permutations 428 | ) 429 | 430 | # Overwrite hier_samples with BottomUp aggregation 431 | # and randomly shuffle parent predictions after aggregation 432 | parent_samples = np.einsum("ab,bhs->ahs", Agg, children_samples) 433 | random_permutation = np.array( 434 | [ 435 | rng.permutation(np.arange(num_samples)) 436 | for serie in range(len(parent_samples)) 437 | ] 438 | ) 439 | parent_samples = self._permutate_predictions( 440 | prediction_samples=parent_samples, permutations=random_permutation 441 | ) 442 | 443 | rec_samples[parent_idxs, :, :] = parent_samples 444 | return rec_samples 445 | 446 | def get_prediction_levels(self, res, level): 447 | """Adds reconciled forecast levels to results dictionary""" 448 | samples = self.get_samples(num_samples=self.num_samples) 449 | for lv in level: 450 | min_q = (100 - lv) / 200 451 | max_q = min_q + lv / 100 452 | res[f"lo-{lv}"] = np.quantile(samples, min_q, axis=2) 453 | res[f"hi-{lv}"] = np.quantile(samples, max_q, axis=2) 454 | return res 455 | 456 | def get_prediction_quantiles(self, res, quantiles): 457 | """Adds reconciled forecast quantiles to results dictionary""" 458 | samples = self.get_samples(num_samples=self.num_samples) 459 | 460 | # [Q, N, H] -> [N, H, Q] 461 | sample_quantiles = np.quantile(samples, quantiles, axis=2) 462 | res["quantiles"] = sample_quantiles.transpose((1, 2, 0)) 463 | return res 464 | -------------------------------------------------------------------------------- /nbs/.gitattributes: -------------------------------------------------------------------------------- 1 | **/*.ipynb filter=clean-nbs 2 | **/*.ipynb diff=ipynb 3 | -------------------------------------------------------------------------------- /nbs/.gitignore: -------------------------------------------------------------------------------- 1 | /.quarto/ 2 | -------------------------------------------------------------------------------- /nbs/_quarto.yml: -------------------------------------------------------------------------------- 1 | project: 2 | type: website 3 | 4 | format: 5 | html: 6 | theme: cosmo 7 | fontsize: 1em 8 | linestretch: 1.7 9 | css: styles.css 10 | toc: true 11 | 12 | website: 13 | twitter-card: true 14 | open-graph: true 15 | google-analytics: "G-NXJNCVR18L" 16 | repo-actions: [issue] 17 | favicon: favicon_png.png 18 | navbar: 19 | background: primary 20 | search: true 21 | collapse-below: lg 22 | left: 23 | - text: "Get Started" 24 | href: examples/TourismSmall.ipynb 25 | - text: "NixtlaVerse" 26 | menu: 27 | - text: "MLForecast 🤖" 28 | href: https://github.com/nixtla/mlforecast 29 | - text: "NeuralForecast 🧠" 30 | href: https://github.com/nixtla/neuralforecast 31 | - text: "StatsForecast ⚡️" 32 | href: https://github.com/nixtla/statsforecast 33 | 34 | - text: "Help" 35 | menu: 36 | - text: "Report an Issue" 37 | icon: bug 38 | href: https://github.com/nixtla/hierarchicalforecast/issues/new/choose 39 | - text: "Join our Slack" 40 | icon: chat-right-text 41 | href: https://join.slack.com/t/nixtlacommunity/shared_invite/zt-1kd5m5db7-7OOfy0xVNf1PcvCiAPWvPw 42 | right: 43 | - icon: github 44 | href: "https://github.com/nixtla/hierarchicalforecast" 45 | - icon: twitter 46 | href: https://twitter.com/nixtlainc 47 | aria-label: Nixtla Twitter 48 | 49 | sidebar: 50 | style: floating 51 | body-footer: | 52 | If you find the code useful, please ⭐ us on [Github](https://github.com/nixtla/hierarchicalforecast) 53 | 54 | metadata-files: [nbdev.yml, sidebar.yml] 55 | -------------------------------------------------------------------------------- /nbs/custom.yml: -------------------------------------------------------------------------------- 1 | website: 2 | reader-mode: false 3 | navbar: 4 | collapse-below: lg 5 | left: 6 | - text: "Get Started" 7 | href: examples/AustralianDomesticTourism.ipynb 8 | - text: "Experiments" 9 | href: https://github.com/Nixtla/hierarchicalforecast/tree/main/experiments 10 | - text: "Help" 11 | menu: 12 | - text: "Report an Issue" 13 | icon: bug 14 | href: https://github.com/nixtla/hierarchicalforecast/issues 15 | - text: "Slack Nixtla" 16 | icon: chat-right-text 17 | href: https://join.slack.com/t/nixtlaworkspace/shared_invite/zt-135dssye9-fWTzMpv2WBthq8NK0Yvu6A 18 | right: 19 | - icon: twitter 20 | href: https://twitter.com/nixtlainc 21 | aria-label: Nixtla Twitter 22 | -------------------------------------------------------------------------------- /nbs/examples/.nodoc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/hierarchicalforecast/162e42f4143201d44fdd0480306dd4cc19776038/nbs/examples/.nodoc -------------------------------------------------------------------------------- /nbs/examples/.notest: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/hierarchicalforecast/162e42f4143201d44fdd0480306dd4cc19776038/nbs/examples/.notest -------------------------------------------------------------------------------- /nbs/examples/Installation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "id": "14f5686c-449b-4376-8c58-fc8141f4b0f8", 7 | "metadata": {}, 8 | "source": [ 9 | "# Install\n", 10 | "\n", 11 | "> Install HierachicalForecast with pip or conda" 12 | ] 13 | }, 14 | { 15 | "attachments": {}, 16 | "cell_type": "markdown", 17 | "id": "0f1d1483-6da7-4372-8390-84c9c280109e", 18 | "metadata": {}, 19 | "source": [ 20 | "You can install the *released version* of `HierachicalForecast` from the [Python package index](https://pypi.org) with:\n", 21 | "\n", 22 | "```python\n", 23 | "pip install hierarchicalforecast\n", 24 | "```\n", 25 | "\n", 26 | "or \n", 27 | "\n", 28 | "```python\n", 29 | "conda install -c conda-forge hierarchicalforecast\n", 30 | "``` \n", 31 | "\n", 32 | ":::{.callout-tip}\n", 33 | "We recommend installing your libraries inside a python virtual or [conda environment](https://docs.conda.io/projects/conda/en/latest/user-guide/install/macos.html).\n", 34 | ":::\n", 35 | "\n", 36 | "#### User our env (optional)\n", 37 | "\n", 38 | "If you don't have a Conda environment and need tools like Numba, Pandas, NumPy, Jupyter, StatsModels, and Nbdev you can use ours by following these steps:\n", 39 | "\n", 40 | "1. Clone the HierachicalForecast repo: \n", 41 | "\n", 42 | "```bash \n", 43 | "$ git clone https://github.com/Nixtla/hierachicalforecast.git && cd hierachicalforecast\n", 44 | "```\n", 45 | "\n", 46 | "2. Create the environment using the `environment.yml` file: \n", 47 | "\n", 48 | "```bash \n", 49 | "$ conda env create -f environment.yml\n", 50 | "```\n", 51 | "\n", 52 | "3. Activate the environment:\n", 53 | "```bash\n", 54 | "$ conda activate statsforecast\n", 55 | "```" 56 | ] 57 | } 58 | ], 59 | "metadata": { 60 | "kernelspec": { 61 | "display_name": "python3", 62 | "language": "python", 63 | "name": "python3" 64 | } 65 | }, 66 | "nbformat": 4, 67 | "nbformat_minor": 5 68 | } 69 | -------------------------------------------------------------------------------- /nbs/examples/LocalGlobalAggregation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# Local vs Global Temporal Aggregation\n", 9 | "\n", 10 | "> Temporal Hierarchical Aggregation on a local or global level." 11 | ] 12 | }, 13 | { 14 | "attachments": {}, 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "In this notebook we explain the difference between temporally aggregating timeseries locally and globally." 19 | ] 20 | }, 21 | { 22 | "attachments": {}, 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "You can run these experiments using CPU or GPU with Google Colab.\n", 27 | "\n", 28 | "\"Open" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "%%capture\n", 38 | "!pip install hierarchicalforecast utilsforecast" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "## 1. Generate Data" 46 | ] 47 | }, 48 | { 49 | "attachments": {}, 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "In this example we will generate synthetic series to explain the difference between local- and global temporal aggregation. We will generate 2 series with a daily frequency." 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "from utilsforecast.data import generate_series" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "freq = \"D\"\n", 72 | "n_series = 2\n", 73 | "df = generate_series(n_series=n_series, \n", 74 | " freq=freq, \n", 75 | " min_length=2 * 365, \n", 76 | " max_length=4 * 365, \n", 77 | " equal_ends=True)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "Note that our two timeseries do not have the same number of timesteps:" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": {}, 91 | "outputs": [ 92 | { 93 | "data": { 94 | "text/plain": [ 95 | "unique_id\n", 96 | "0 1414\n", 97 | "1 1289\n", 98 | "Name: ds, dtype: int64" 99 | ] 100 | }, 101 | "execution_count": null, 102 | "metadata": {}, 103 | "output_type": "execute_result" 104 | } 105 | ], 106 | "source": [ 107 | "df.groupby('unique_id', observed=True)[\"ds\"].count()" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "We then define a spec for our temporal aggregations." 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [ 123 | "spec = {\"year\": 365, \"quarter\": 91, \"month\": 30, \"week\": 7, \"day\": 1}" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "## 2. Local aggregation (default)" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "In local aggregation, we treat the timestamps of each timeseries individually. It means that the temporal aggregation is performed by only looking at the timestamps of each series, disregarding the timestamps of other series. " 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "from hierarchicalforecast.utils import aggregate_temporal" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "Y_df_local, S_df_local, tags_local = aggregate_temporal(df, spec)" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": {}, 161 | "source": [ 162 | "We have created temporal aggregations _per timeseries_, as the temporal aggregation `month-1` doesn't correspond to the same (year, month) for both timeseries. This is because the series with `unique_id=1` is shorter and has its first datapoint in July 2000, in contrast to the series with `unique_id=0`, which is longer and has its first timestamp in March 2000." 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": {}, 169 | "outputs": [ 170 | { 171 | "data": { 172 | "text/html": [ 173 | "
\n", 174 | "\n", 187 | "\n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | "
temporal_idunique_iddsy
39month-102000-03-1693.574676
87month-112000-07-1991.506421
\n", 214 | "
" 215 | ], 216 | "text/plain": [ 217 | " temporal_id unique_id ds y\n", 218 | "39 month-1 0 2000-03-16 93.574676\n", 219 | "87 month-1 1 2000-07-19 91.506421" 220 | ] 221 | }, 222 | "execution_count": null, 223 | "metadata": {}, 224 | "output_type": "execute_result" 225 | } 226 | ], 227 | "source": [ 228 | "Y_df_local.query(\"temporal_id == 'month-1'\")" 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": {}, 234 | "source": [ 235 | "## 2. Global aggregation" 236 | ] 237 | }, 238 | { 239 | "cell_type": "markdown", 240 | "metadata": {}, 241 | "source": [ 242 | "In global aggregation, we examine all unique timestamps across all timeseries, and base our temporal aggregations on the unique list of timestamps across all timeseries. We can specify the aggregation type by setting the `aggregation_type` attritbue in `aggregate_temporal`." 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": null, 248 | "metadata": {}, 249 | "outputs": [], 250 | "source": [ 251 | "Y_df_global, S_df_global, tags_globval = aggregate_temporal(df, spec, aggregation_type=\"global\")\n" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "metadata": {}, 257 | "source": [ 258 | "We have created temporal aggregations _across all timeseries_, as the temporal aggregation `month-1` corresponds to the same (year, month)-combination for both timeseries. Since `month-1` isn't present in the second timeseries (as it is shorter), we have only one record for the aggregation." 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": null, 264 | "metadata": {}, 265 | "outputs": [ 266 | { 267 | "data": { 268 | "text/html": [ 269 | "
\n", 270 | "\n", 283 | "\n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | "
temporal_idunique_iddsy
39month-102000-03-1693.574676
\n", 303 | "
" 304 | ], 305 | "text/plain": [ 306 | " temporal_id unique_id ds y\n", 307 | "39 month-1 0 2000-03-16 93.574676" 308 | ] 309 | }, 310 | "execution_count": null, 311 | "metadata": {}, 312 | "output_type": "execute_result" 313 | } 314 | ], 315 | "source": [ 316 | "Y_df_global.query(\"temporal_id == 'month-1'\")" 317 | ] 318 | }, 319 | { 320 | "cell_type": "markdown", 321 | "metadata": {}, 322 | "source": [ 323 | "For `month-5` however, we have a record for both timeseries, as the second series has its first datapoint in that month." 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": null, 329 | "metadata": {}, 330 | "outputs": [ 331 | { 332 | "data": { 333 | "text/html": [ 334 | "
\n", 335 | "\n", 348 | "\n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | "
temporal_idunique_iddsy
43month-502000-07-1495.169659
87month-512000-07-1474.502584
\n", 375 | "
" 376 | ], 377 | "text/plain": [ 378 | " temporal_id unique_id ds y\n", 379 | "43 month-5 0 2000-07-14 95.169659\n", 380 | "87 month-5 1 2000-07-14 74.502584" 381 | ] 382 | }, 383 | "execution_count": null, 384 | "metadata": {}, 385 | "output_type": "execute_result" 386 | } 387 | ], 388 | "source": [ 389 | "Y_df_global.query(\"temporal_id == 'month-5'\")" 390 | ] 391 | }, 392 | { 393 | "cell_type": "markdown", 394 | "metadata": {}, 395 | "source": [ 396 | "Hence, the global aggregation ensures temporal alignment across all series." 397 | ] 398 | }, 399 | { 400 | "cell_type": "markdown", 401 | "metadata": {}, 402 | "source": [ 403 | "## 3. What to choose?" 404 | ] 405 | }, 406 | { 407 | "cell_type": "markdown", 408 | "metadata": {}, 409 | "source": [ 410 | "- If all timeseries have the same length and same timestamps, `global` and `local` yield the same results.\n", 411 | "- The default behavior is `local`. This means that temporal aggregations between timeseries can't be compared unless the series have the same length and timestamp. This behavior is generally safer, and advised to use when time series are not necessarily related, and you are building per-series models using e.g. `StatsForecast`.\n", 412 | "- The `global` behavior can be useful when dealing with timeseries where we expect relationships between the timeseries. For example, in case of forecasting daily product demand individual products may not always have sales for all timesteps, but one is interested in the overall temporal yearly aggregation across all products. The `global` setting has more room for error, so be careful and check the aggregation result carefully. This would typically be the setting used in combination with models from `MLForecast` or `NeuralForecast`. " 413 | ] 414 | } 415 | ], 416 | "metadata": { 417 | "kernelspec": { 418 | "display_name": "python3", 419 | "language": "python", 420 | "name": "python3" 421 | } 422 | }, 423 | "nbformat": 4, 424 | "nbformat_minor": 4 425 | } 426 | -------------------------------------------------------------------------------- /nbs/examples/TourismSmall.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "id": "843cf8de-d678-4243-a8af-d78439058e6a", 7 | "metadata": {}, 8 | "source": [ 9 | "# Quick Start\n", 10 | "\n", 11 | "> Minimal Example of Hierarchical Reconciliation" 12 | ] 13 | }, 14 | { 15 | "attachments": {}, 16 | "cell_type": "markdown", 17 | "id": "0a836410-7063-4766-b03c-7d22b6abe457", 18 | "metadata": {}, 19 | "source": [ 20 | "Large collections of time series organized into structures at different aggregation levels often require their forecasts to follow their aggregation constraints, which poses the challenge of creating novel algorithms capable of coherent forecasts.\n", 21 | "\n", 22 | "The `HierarchicalForecast` package provides a wide collection of Python implementations of hierarchical forecasting algorithms that follow classic hierarchical reconciliation.\n", 23 | "\n", 24 | "In this notebook we will show how to use the `StatsForecast` library to produce base forecasts, and use `HierarchicalForecast` package to perform hierarchical reconciliation." 25 | ] 26 | }, 27 | { 28 | "attachments": {}, 29 | "cell_type": "markdown", 30 | "id": "46e647a5", 31 | "metadata": {}, 32 | "source": [ 33 | "You can run these experiments using CPU or GPU with Google Colab.\n", 34 | "\n", 35 | "\"Open" 36 | ] 37 | }, 38 | { 39 | "attachments": {}, 40 | "cell_type": "markdown", 41 | "id": "c0dc4e6d", 42 | "metadata": {}, 43 | "source": [ 44 | "## 1. Libraries" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "id": "7f777bdd-dff4-4bc0-8529-b492874de6f0", 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "%%capture\n", 55 | "!pip install hierarchicalforecast statsforecast datasetsforecast" 56 | ] 57 | }, 58 | { 59 | "attachments": {}, 60 | "cell_type": "markdown", 61 | "id": "6221152f", 62 | "metadata": {}, 63 | "source": [ 64 | "## 2. Load Data" 65 | ] 66 | }, 67 | { 68 | "attachments": {}, 69 | "cell_type": "markdown", 70 | "id": "9809d816", 71 | "metadata": {}, 72 | "source": [ 73 | "In this example we will use the `TourismSmall` dataset. The following cell gets the time series for the different levels in the hierarchy, the summing matrix `S` which recovers the full dataset from the bottom level hierarchy and the indices of each hierarchy denoted by `tags`." 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "id": "f7a5828a-2fb4-4811-9e07-0ee291331978", 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "import pandas as pd\n", 84 | "\n", 85 | "from datasetsforecast.hierarchical import HierarchicalData, HierarchicalInfo" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "id": "c18a4300-5b8f-45b5-92ce-e52f8c4dab20", 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "group_name = 'TourismSmall'\n", 96 | "group = HierarchicalInfo.get_group(group_name)\n", 97 | "Y_df, S_df, tags = HierarchicalData.load('./data', group_name)\n", 98 | "S_df = S_df.reset_index(names=\"unique_id\")\n", 99 | "Y_df['ds'] = pd.to_datetime(Y_df['ds'])" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "id": "b964394e-6a79-4c75-be74-3c3994e1bf58", 106 | "metadata": {}, 107 | "outputs": [ 108 | { 109 | "data": { 110 | "text/html": [ 111 | "
\n", 112 | "\n", 125 | "\n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | "
unique_idnsw-hol-citynsw-hol-noncityvic-hol-cityvic-hol-noncityqld-hol-city
0total1.01.01.01.01.0
1hol1.01.01.01.01.0
2vfr0.00.00.00.00.0
3bus0.00.00.00.00.0
4oth0.00.00.00.00.0
5nsw-hol1.01.00.00.00.0
\n", 194 | "
" 195 | ], 196 | "text/plain": [ 197 | " unique_id nsw-hol-city nsw-hol-noncity vic-hol-city vic-hol-noncity \\\n", 198 | "0 total 1.0 1.0 1.0 1.0 \n", 199 | "1 hol 1.0 1.0 1.0 1.0 \n", 200 | "2 vfr 0.0 0.0 0.0 0.0 \n", 201 | "3 bus 0.0 0.0 0.0 0.0 \n", 202 | "4 oth 0.0 0.0 0.0 0.0 \n", 203 | "5 nsw-hol 1.0 1.0 0.0 0.0 \n", 204 | "\n", 205 | " qld-hol-city \n", 206 | "0 1.0 \n", 207 | "1 1.0 \n", 208 | "2 0.0 \n", 209 | "3 0.0 \n", 210 | "4 0.0 \n", 211 | "5 0.0 " 212 | ] 213 | }, 214 | "execution_count": null, 215 | "metadata": {}, 216 | "output_type": "execute_result" 217 | } 218 | ], 219 | "source": [ 220 | "S_df.iloc[:6, :6]" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": null, 226 | "id": "fd32f1a5-2b60-454e-afc9-6911f84f6698", 227 | "metadata": {}, 228 | "outputs": [ 229 | { 230 | "data": { 231 | "text/plain": [ 232 | "{'Country': array(['total'], dtype=object),\n", 233 | " 'Country/Purpose': array(['hol', 'vfr', 'bus', 'oth'], dtype=object),\n", 234 | " 'Country/Purpose/State': array(['nsw-hol', 'vic-hol', 'qld-hol', 'sa-hol', 'wa-hol', 'tas-hol',\n", 235 | " 'nt-hol', 'nsw-vfr', 'vic-vfr', 'qld-vfr', 'sa-vfr', 'wa-vfr',\n", 236 | " 'tas-vfr', 'nt-vfr', 'nsw-bus', 'vic-bus', 'qld-bus', 'sa-bus',\n", 237 | " 'wa-bus', 'tas-bus', 'nt-bus', 'nsw-oth', 'vic-oth', 'qld-oth',\n", 238 | " 'sa-oth', 'wa-oth', 'tas-oth', 'nt-oth'], dtype=object),\n", 239 | " 'Country/Purpose/State/CityNonCity': array(['nsw-hol-city', 'nsw-hol-noncity', 'vic-hol-city',\n", 240 | " 'vic-hol-noncity', 'qld-hol-city', 'qld-hol-noncity',\n", 241 | " 'sa-hol-city', 'sa-hol-noncity', 'wa-hol-city', 'wa-hol-noncity',\n", 242 | " 'tas-hol-city', 'tas-hol-noncity', 'nt-hol-city', 'nt-hol-noncity',\n", 243 | " 'nsw-vfr-city', 'nsw-vfr-noncity', 'vic-vfr-city',\n", 244 | " 'vic-vfr-noncity', 'qld-vfr-city', 'qld-vfr-noncity',\n", 245 | " 'sa-vfr-city', 'sa-vfr-noncity', 'wa-vfr-city', 'wa-vfr-noncity',\n", 246 | " 'tas-vfr-city', 'tas-vfr-noncity', 'nt-vfr-city', 'nt-vfr-noncity',\n", 247 | " 'nsw-bus-city', 'nsw-bus-noncity', 'vic-bus-city',\n", 248 | " 'vic-bus-noncity', 'qld-bus-city', 'qld-bus-noncity',\n", 249 | " 'sa-bus-city', 'sa-bus-noncity', 'wa-bus-city', 'wa-bus-noncity',\n", 250 | " 'tas-bus-city', 'tas-bus-noncity', 'nt-bus-city', 'nt-bus-noncity',\n", 251 | " 'nsw-oth-city', 'nsw-oth-noncity', 'vic-oth-city',\n", 252 | " 'vic-oth-noncity', 'qld-oth-city', 'qld-oth-noncity',\n", 253 | " 'sa-oth-city', 'sa-oth-noncity', 'wa-oth-city', 'wa-oth-noncity',\n", 254 | " 'tas-oth-city', 'tas-oth-noncity', 'nt-oth-city', 'nt-oth-noncity'],\n", 255 | " dtype=object)}" 256 | ] 257 | }, 258 | "execution_count": null, 259 | "metadata": {}, 260 | "output_type": "execute_result" 261 | } 262 | ], 263 | "source": [ 264 | "tags" 265 | ] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "id": "56a7aadb-6e2c-456a-a0b5-b29b30deadb5", 270 | "metadata": {}, 271 | "source": [ 272 | "We split the dataframe in train/test splits." 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": null, 278 | "id": "462451d8-2fc0-445e-9458-908811011dd9", 279 | "metadata": {}, 280 | "outputs": [], 281 | "source": [ 282 | "Y_test_df = Y_df.groupby('unique_id').tail(group.horizon)\n", 283 | "Y_train_df = Y_df.drop(Y_test_df.index)" 284 | ] 285 | }, 286 | { 287 | "attachments": {}, 288 | "cell_type": "markdown", 289 | "id": "1958d4e6", 290 | "metadata": {}, 291 | "source": [ 292 | "## 3. Base forecasts" 293 | ] 294 | }, 295 | { 296 | "cell_type": "markdown", 297 | "id": "b7cfb43a-cd16-418c-a04b-e075c176cc9e", 298 | "metadata": {}, 299 | "source": [ 300 | "The following cell computes the *base forecast* for each time series using the `auto_arima` and `naive` models. Observe that `Y_hat_df` contains the forecasts but they are not coherent." 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": null, 306 | "id": "ce5017ee", 307 | "metadata": {}, 308 | "outputs": [], 309 | "source": [ 310 | "from statsforecast.core import StatsForecast\n", 311 | "from statsforecast.models import AutoARIMA, Naive" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": null, 317 | "id": "f99e7b7b-f4b8-4f2f-a1a7-c8be98a1e280", 318 | "metadata": {}, 319 | "outputs": [], 320 | "source": [ 321 | "fcst = StatsForecast(\n", 322 | " models=[AutoARIMA(season_length=group.seasonality), Naive()], \n", 323 | " freq=\"QE\", \n", 324 | " n_jobs=-1\n", 325 | ")\n", 326 | "Y_hat_df = fcst.forecast(df=Y_train_df, h=group.horizon)" 327 | ] 328 | }, 329 | { 330 | "attachments": {}, 331 | "cell_type": "markdown", 332 | "id": "ef1c9163", 333 | "metadata": {}, 334 | "source": [ 335 | "## 4. Hierarchical reconciliation" 336 | ] 337 | }, 338 | { 339 | "attachments": {}, 340 | "cell_type": "markdown", 341 | "id": "cc296762-2009-4aef-8b31-f24aad9d0787", 342 | "metadata": {}, 343 | "source": [ 344 | "The following cell makes the previous forecasts coherent using the `HierarchicalReconciliation` class. The used methods to make the forecasts coherent are:\n", 345 | "\n", 346 | "- `BottomUp`: The reconciliation of the method is a simple addition to the upper levels.\n", 347 | "- `TopDown`: The second method constrains the base-level predictions to the top-most aggregate-level serie and then distributes it to the disaggregate series through the use of proportions. \n", 348 | "- `MiddleOut`: Anchors the base predictions in a middle level." 349 | ] 350 | }, 351 | { 352 | "cell_type": "code", 353 | "execution_count": null, 354 | "id": "63ec7e26", 355 | "metadata": {}, 356 | "outputs": [], 357 | "source": [ 358 | "from hierarchicalforecast.core import HierarchicalReconciliation\n", 359 | "from hierarchicalforecast.methods import BottomUp, TopDown, MiddleOut" 360 | ] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "execution_count": null, 365 | "id": "a43be9e7-99a9-4981-bfd7-8552efba9751", 366 | "metadata": {}, 367 | "outputs": [], 368 | "source": [ 369 | "reconcilers = [\n", 370 | " BottomUp(),\n", 371 | " TopDown(method='forecast_proportions'),\n", 372 | " TopDown(method='proportion_averages'),\n", 373 | " MiddleOut(middle_level=\"Country/Purpose/State\", top_down_method=\"proportion_averages\"),\n", 374 | "]\n", 375 | "hrec = HierarchicalReconciliation(reconcilers=reconcilers)\n", 376 | "Y_rec_df = hrec.reconcile(Y_hat_df=Y_hat_df, Y_df=Y_train_df, S=S_df, tags=tags)" 377 | ] 378 | }, 379 | { 380 | "attachments": {}, 381 | "cell_type": "markdown", 382 | "id": "6590a5e2", 383 | "metadata": {}, 384 | "source": [ 385 | "## 5. Evaluation" 386 | ] 387 | }, 388 | { 389 | "cell_type": "markdown", 390 | "id": "03c4752c-53f8-4b1f-8169-32075b8e4050", 391 | "metadata": {}, 392 | "source": [ 393 | "The `HierarchicalForecast` package includes the `evaluate` function to evaluate the different hierarchies and we can use utilsforecast to compute the mean absolute error relative to a baseline model." 394 | ] 395 | }, 396 | { 397 | "cell_type": "code", 398 | "execution_count": null, 399 | "id": "0be293f3", 400 | "metadata": {}, 401 | "outputs": [], 402 | "source": [ 403 | "from hierarchicalforecast.evaluation import evaluate\n", 404 | "from utilsforecast.losses import mse" 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": null, 410 | "id": "8599f85a", 411 | "metadata": {}, 412 | "outputs": [ 413 | { 414 | "data": { 415 | "text/html": [ 416 | "
\n", 417 | "\n", 430 | "\n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | "
AutoARIMAAutoARIMA/BottomUpAutoARIMA/TopDown_method-forecast_proportionsAutoARIMA/TopDown_method-proportion_averagesAutoARIMA/MiddleOut_middle_level-Country/Purpose/State_top_down_method-proportion_averages
levelmetric
Countrymse-scaled0.3178970.3670780.3178970.3178970.305053
Country/Purposemse-scaled0.3189500.2336060.2622160.3202250.196062
Country/Purpose/Statemse-scaled0.2680570.2811890.3203490.5113560.268057
Country/Purpose/State/CityNonCitymse-scaled0.2921360.2921360.3232610.5097840.280599
Overallmse-scaled0.3089420.2956900.2970720.3647750.255038
\n", 499 | "
" 500 | ], 501 | "text/plain": [ 502 | " AutoARIMA AutoARIMA/BottomUp \\\n", 503 | "level metric \n", 504 | "Country mse-scaled 0.317897 0.367078 \n", 505 | "Country/Purpose mse-scaled 0.318950 0.233606 \n", 506 | "Country/Purpose/State mse-scaled 0.268057 0.281189 \n", 507 | "Country/Purpose/State/CityNonCity mse-scaled 0.292136 0.292136 \n", 508 | "Overall mse-scaled 0.308942 0.295690 \n", 509 | "\n", 510 | " AutoARIMA/TopDown_method-forecast_proportions \\\n", 511 | "level metric \n", 512 | "Country mse-scaled 0.317897 \n", 513 | "Country/Purpose mse-scaled 0.262216 \n", 514 | "Country/Purpose/State mse-scaled 0.320349 \n", 515 | "Country/Purpose/State/CityNonCity mse-scaled 0.323261 \n", 516 | "Overall mse-scaled 0.297072 \n", 517 | "\n", 518 | " AutoARIMA/TopDown_method-proportion_averages \\\n", 519 | "level metric \n", 520 | "Country mse-scaled 0.317897 \n", 521 | "Country/Purpose mse-scaled 0.320225 \n", 522 | "Country/Purpose/State mse-scaled 0.511356 \n", 523 | "Country/Purpose/State/CityNonCity mse-scaled 0.509784 \n", 524 | "Overall mse-scaled 0.364775 \n", 525 | "\n", 526 | " AutoARIMA/MiddleOut_middle_level-Country/Purpose/State_top_down_method-proportion_averages \n", 527 | "level metric \n", 528 | "Country mse-scaled 0.305053 \n", 529 | "Country/Purpose mse-scaled 0.196062 \n", 530 | "Country/Purpose/State mse-scaled 0.268057 \n", 531 | "Country/Purpose/State/CityNonCity mse-scaled 0.280599 \n", 532 | "Overall mse-scaled 0.255038 " 533 | ] 534 | }, 535 | "execution_count": null, 536 | "metadata": {}, 537 | "output_type": "execute_result" 538 | } 539 | ], 540 | "source": [ 541 | "df = Y_rec_df.merge(Y_test_df, on=['unique_id', 'ds'])\n", 542 | "evaluation = evaluate(df = df,\n", 543 | " tags = tags,\n", 544 | " train_df = Y_train_df,\n", 545 | " metrics = [mse],\n", 546 | " benchmark=\"Naive\")\n", 547 | "\n", 548 | "evaluation.set_index([\"level\", \"metric\"]).filter(like=\"ARIMA\", axis=1)" 549 | ] 550 | }, 551 | { 552 | "cell_type": "markdown", 553 | "id": "a51830f5", 554 | "metadata": {}, 555 | "source": [ 556 | "### References\n", 557 | "- [Orcutt, G.H., Watts, H.W., & Edwards, J.B.(1968). Data aggregation and information loss. The American \n", 558 | "Economic Review, 58 , 773(787)](http://www.jstor.org/stable/1815532).\n", 559 | "- [Disaggregation methods to expedite product line forecasting. Journal of Forecasting, 9 , 233–254. \n", 560 | "doi:10.1002/for.3980090304](https://onlinelibrary.wiley.com/doi/abs/10.1002/for.3980090304).
\n", 561 | "- [An investigation of aggregate variable time series forecast strategies with specific subaggregate \n", 562 | "time series statistical correlation. Computers and Operations Research, 26 , 1133–1149. \n", 563 | "doi:10.1016/S0305-0548(99)00017-9](https://doi.org/10.1016/S0305-0548(99)00017-9).\n", 564 | "- [Hyndman, R.J., & Athanasopoulos, G. (2021). \"Forecasting: principles and practice, 3rd edition: \n", 565 | "Chapter 11: Forecasting hierarchical and grouped series.\". OTexts: Melbourne, Australia. OTexts.com/fpp3 \n", 566 | "Accessed on July 2022.](https://otexts.com/fpp3/hierarchical.html)" 567 | ] 568 | } 569 | ], 570 | "metadata": { 571 | "kernelspec": { 572 | "display_name": "python3", 573 | "language": "python", 574 | "name": "python3" 575 | } 576 | }, 577 | "nbformat": 4, 578 | "nbformat_minor": 5 579 | } 580 | -------------------------------------------------------------------------------- /nbs/examples/TourismSmallPolars.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "id": "843cf8de-d678-4243-a8af-d78439058e6a", 7 | "metadata": {}, 8 | "source": [ 9 | "# Quick Start (Polars)\n", 10 | "\n", 11 | "> Minimal Example of Hierarchical Reconciliation using Polars" 12 | ] 13 | }, 14 | { 15 | "attachments": {}, 16 | "cell_type": "markdown", 17 | "id": "0a836410-7063-4766-b03c-7d22b6abe457", 18 | "metadata": {}, 19 | "source": [ 20 | "Large collections of time series organized into structures at different aggregation levels often require their forecasts to follow their aggregation constraints, which poses the challenge of creating novel algorithms capable of coherent forecasts.\n", 21 | "\n", 22 | "The `HierarchicalForecast` package provides a wide collection of Python implementations of hierarchical forecasting algorithms that follow classic hierarchical reconciliation.\n", 23 | "\n", 24 | "In this notebook we will show how to use the `StatsForecast` library to produce base forecasts, and use `HierarchicalForecast` package to perform hierarchical reconciliation." 25 | ] 26 | }, 27 | { 28 | "attachments": {}, 29 | "cell_type": "markdown", 30 | "id": "46e647a5", 31 | "metadata": {}, 32 | "source": [ 33 | "You can run these experiments using CPU or GPU with Google Colab.\n", 34 | "\n", 35 | "\"Open" 36 | ] 37 | }, 38 | { 39 | "attachments": {}, 40 | "cell_type": "markdown", 41 | "id": "c0dc4e6d", 42 | "metadata": {}, 43 | "source": [ 44 | "## 1. Libraries" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "id": "7f777bdd-dff4-4bc0-8529-b492874de6f0", 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "%%capture\n", 55 | "!pip install hierarchicalforecast statsforecast datasetsforecast" 56 | ] 57 | }, 58 | { 59 | "attachments": {}, 60 | "cell_type": "markdown", 61 | "id": "6221152f", 62 | "metadata": {}, 63 | "source": [ 64 | "## 2. Load Data" 65 | ] 66 | }, 67 | { 68 | "attachments": {}, 69 | "cell_type": "markdown", 70 | "id": "9809d816", 71 | "metadata": {}, 72 | "source": [ 73 | "In this example we will use the `TourismSmall` dataset. The following cell gets the time series for the different levels in the hierarchy, the summing matrix `S` which recovers the full dataset from the bottom level hierarchy and the indices of each hierarchy denoted by `tags`." 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "id": "f7a5828a-2fb4-4811-9e07-0ee291331978", 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "import numpy as np\n", 84 | "import polars as pl\n", 85 | "\n", 86 | "from datasetsforecast.hierarchical import HierarchicalData, HierarchicalInfo" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "id": "c18a4300-5b8f-45b5-92ce-e52f8c4dab20", 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "group_name = 'TourismSmall'\n", 97 | "group = HierarchicalInfo.get_group(group_name)\n", 98 | "Y_df, S_df, tags = HierarchicalData.load('./data', group_name)\n", 99 | "\n", 100 | "Y_df = pl.from_pandas(Y_df)\n", 101 | "S_df = pl.from_pandas(S_df.reset_index(names=\"unique_id\"))\n", 102 | "Y_df = Y_df.with_columns(pl.col('ds').cast(pl.Date))" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "id": "b964394e-6a79-4c75-be74-3c3994e1bf58", 109 | "metadata": {}, 110 | "outputs": [ 111 | { 112 | "data": { 113 | "text/html": [ 114 | "
\n", 121 | "shape: (6, 6)
unique_idnsw-hol-citynsw-hol-noncityvic-hol-cityvic-hol-noncityqld-hol-city
strf64f64f64f64f64
"total"1.01.01.01.01.0
"hol"1.01.01.01.01.0
"vfr"0.00.00.00.00.0
"bus"0.00.00.00.00.0
"oth"0.00.00.00.00.0
"nsw-hol"1.01.00.00.00.0
" 122 | ], 123 | "text/plain": [ 124 | "shape: (6, 6)\n", 125 | "┌───────────┬──────────────┬─────────────────┬──────────────┬─────────────────┬──────────────┐\n", 126 | "│ unique_id ┆ nsw-hol-city ┆ nsw-hol-noncity ┆ vic-hol-city ┆ vic-hol-noncity ┆ qld-hol-city │\n", 127 | "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", 128 | "│ str ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", 129 | "╞═══════════╪══════════════╪═════════════════╪══════════════╪═════════════════╪══════════════╡\n", 130 | "│ total ┆ 1.0 ┆ 1.0 ┆ 1.0 ┆ 1.0 ┆ 1.0 │\n", 131 | "│ hol ┆ 1.0 ┆ 1.0 ┆ 1.0 ┆ 1.0 ┆ 1.0 │\n", 132 | "│ vfr ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.0 │\n", 133 | "│ bus ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.0 │\n", 134 | "│ oth ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.0 │\n", 135 | "│ nsw-hol ┆ 1.0 ┆ 1.0 ┆ 0.0 ┆ 0.0 ┆ 0.0 │\n", 136 | "└───────────┴──────────────┴─────────────────┴──────────────┴─────────────────┴──────────────┘" 137 | ] 138 | }, 139 | "execution_count": null, 140 | "metadata": {}, 141 | "output_type": "execute_result" 142 | } 143 | ], 144 | "source": [ 145 | "S_df[:6, :6]" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "id": "fd32f1a5-2b60-454e-afc9-6911f84f6698", 152 | "metadata": {}, 153 | "outputs": [ 154 | { 155 | "data": { 156 | "text/plain": [ 157 | "{'Country': array(['total'], dtype=object),\n", 158 | " 'Country/Purpose': array(['hol', 'vfr', 'bus', 'oth'], dtype=object),\n", 159 | " 'Country/Purpose/State': array(['nsw-hol', 'vic-hol', 'qld-hol', 'sa-hol', 'wa-hol', 'tas-hol',\n", 160 | " 'nt-hol', 'nsw-vfr', 'vic-vfr', 'qld-vfr', 'sa-vfr', 'wa-vfr',\n", 161 | " 'tas-vfr', 'nt-vfr', 'nsw-bus', 'vic-bus', 'qld-bus', 'sa-bus',\n", 162 | " 'wa-bus', 'tas-bus', 'nt-bus', 'nsw-oth', 'vic-oth', 'qld-oth',\n", 163 | " 'sa-oth', 'wa-oth', 'tas-oth', 'nt-oth'], dtype=object),\n", 164 | " 'Country/Purpose/State/CityNonCity': array(['nsw-hol-city', 'nsw-hol-noncity', 'vic-hol-city',\n", 165 | " 'vic-hol-noncity', 'qld-hol-city', 'qld-hol-noncity',\n", 166 | " 'sa-hol-city', 'sa-hol-noncity', 'wa-hol-city', 'wa-hol-noncity',\n", 167 | " 'tas-hol-city', 'tas-hol-noncity', 'nt-hol-city', 'nt-hol-noncity',\n", 168 | " 'nsw-vfr-city', 'nsw-vfr-noncity', 'vic-vfr-city',\n", 169 | " 'vic-vfr-noncity', 'qld-vfr-city', 'qld-vfr-noncity',\n", 170 | " 'sa-vfr-city', 'sa-vfr-noncity', 'wa-vfr-city', 'wa-vfr-noncity',\n", 171 | " 'tas-vfr-city', 'tas-vfr-noncity', 'nt-vfr-city', 'nt-vfr-noncity',\n", 172 | " 'nsw-bus-city', 'nsw-bus-noncity', 'vic-bus-city',\n", 173 | " 'vic-bus-noncity', 'qld-bus-city', 'qld-bus-noncity',\n", 174 | " 'sa-bus-city', 'sa-bus-noncity', 'wa-bus-city', 'wa-bus-noncity',\n", 175 | " 'tas-bus-city', 'tas-bus-noncity', 'nt-bus-city', 'nt-bus-noncity',\n", 176 | " 'nsw-oth-city', 'nsw-oth-noncity', 'vic-oth-city',\n", 177 | " 'vic-oth-noncity', 'qld-oth-city', 'qld-oth-noncity',\n", 178 | " 'sa-oth-city', 'sa-oth-noncity', 'wa-oth-city', 'wa-oth-noncity',\n", 179 | " 'tas-oth-city', 'tas-oth-noncity', 'nt-oth-city', 'nt-oth-noncity'],\n", 180 | " dtype=object)}" 181 | ] 182 | }, 183 | "execution_count": null, 184 | "metadata": {}, 185 | "output_type": "execute_result" 186 | } 187 | ], 188 | "source": [ 189 | "tags" 190 | ] 191 | }, 192 | { 193 | "cell_type": "markdown", 194 | "id": "56a7aadb-6e2c-456a-a0b5-b29b30deadb5", 195 | "metadata": {}, 196 | "source": [ 197 | "We split the dataframe in train/test splits." 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": null, 203 | "id": "462451d8-2fc0-445e-9458-908811011dd9", 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [ 207 | "Y_test_df = Y_df.group_by('unique_id').tail(group.horizon)\n", 208 | "Y_train_df = Y_df.filter(pl.col('ds') < Y_test_df['ds'].min())" 209 | ] 210 | }, 211 | { 212 | "attachments": {}, 213 | "cell_type": "markdown", 214 | "id": "1958d4e6", 215 | "metadata": {}, 216 | "source": [ 217 | "## 3. Base forecasts" 218 | ] 219 | }, 220 | { 221 | "cell_type": "markdown", 222 | "id": "b7cfb43a-cd16-418c-a04b-e075c176cc9e", 223 | "metadata": {}, 224 | "source": [ 225 | "The following cell computes the *base forecast* for each time series using the `auto_arima` and `naive` models. Observe that `Y_hat_df` contains the forecasts but they are not coherent." 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": null, 231 | "id": "ce5017ee", 232 | "metadata": {}, 233 | "outputs": [], 234 | "source": [ 235 | "from statsforecast.core import StatsForecast\n", 236 | "from statsforecast.models import AutoARIMA, Naive" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": null, 242 | "id": "f99e7b7b-f4b8-4f2f-a1a7-c8be98a1e280", 243 | "metadata": {}, 244 | "outputs": [], 245 | "source": [ 246 | "fcst = StatsForecast(\n", 247 | " models=[AutoARIMA(season_length=group.seasonality), Naive()], \n", 248 | " freq=\"1q\", \n", 249 | " n_jobs=-1\n", 250 | ")\n", 251 | "Y_hat_df = fcst.forecast(df=Y_train_df, h=group.horizon)" 252 | ] 253 | }, 254 | { 255 | "attachments": {}, 256 | "cell_type": "markdown", 257 | "id": "ef1c9163", 258 | "metadata": {}, 259 | "source": [ 260 | "## 4. Hierarchical reconciliation" 261 | ] 262 | }, 263 | { 264 | "attachments": {}, 265 | "cell_type": "markdown", 266 | "id": "cc296762-2009-4aef-8b31-f24aad9d0787", 267 | "metadata": {}, 268 | "source": [ 269 | "The following cell makes the previous forecasts coherent using the `HierarchicalReconciliation` class. The used methods to make the forecasts coherent are:\n", 270 | "\n", 271 | "- `BottomUp`: The reconciliation of the method is a simple addition to the upper levels.\n", 272 | "- `TopDown`: The second method constrains the base-level predictions to the top-most aggregate-level serie and then distributes it to the disaggregate series through the use of proportions. \n", 273 | "- `MiddleOut`: Anchors the base predictions in a middle level." 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": null, 279 | "id": "63ec7e26", 280 | "metadata": {}, 281 | "outputs": [], 282 | "source": [ 283 | "from hierarchicalforecast.core import HierarchicalReconciliation\n", 284 | "from hierarchicalforecast.methods import BottomUp, TopDown, MiddleOut" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": null, 290 | "id": "a43be9e7-99a9-4981-bfd7-8552efba9751", 291 | "metadata": {}, 292 | "outputs": [], 293 | "source": [ 294 | "reconcilers = [\n", 295 | " BottomUp(),\n", 296 | " TopDown(method='forecast_proportions'),\n", 297 | " MiddleOut(middle_level='Country/Purpose/State', \n", 298 | " top_down_method='forecast_proportions')\n", 299 | "]\n", 300 | "hrec = HierarchicalReconciliation(reconcilers=reconcilers)\n", 301 | "Y_rec_df = hrec.reconcile(Y_hat_df=Y_hat_df, Y_df=Y_train_df, S=S_df, tags=tags)" 302 | ] 303 | }, 304 | { 305 | "attachments": {}, 306 | "cell_type": "markdown", 307 | "id": "6590a5e2", 308 | "metadata": {}, 309 | "source": [ 310 | "## 5. Evaluation" 311 | ] 312 | }, 313 | { 314 | "cell_type": "markdown", 315 | "id": "03c4752c-53f8-4b1f-8169-32075b8e4050", 316 | "metadata": {}, 317 | "source": [ 318 | "The `HierarchicalForecast` package includes the `evaluate` function to evaluate the different hierarchies and we can use utilsforecast to compute the mean absolute error relative to a baseline model." 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": null, 324 | "id": "a797f84d", 325 | "metadata": {}, 326 | "outputs": [], 327 | "source": [ 328 | "from hierarchicalforecast.evaluation import evaluate\n", 329 | "from utilsforecast.losses import mse" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": null, 335 | "id": "a793cff0-e6bf-469d-86d8-cf6ce7a8d922", 336 | "metadata": {}, 337 | "outputs": [ 338 | { 339 | "data": { 340 | "text/html": [ 341 | "
\n", 348 | "shape: (5, 5)
levelmetricAutoARIMAAutoARIMA/BottomUpAutoARIMA/TopDown_method-forecast_proportions
strstrf64f64f64
"Country""mse-scaled"0.3178970.2269990.317897
"Country/Purpose""mse-scaled"0.3232070.1993590.251368
"Country/Purpose/State""mse-scaled"0.2661180.3057110.308241
"Country/Purpose/State/CityNonC…"mse-scaled"0.3051730.3051730.305913
"Overall""mse-scaled"0.3117070.2349340.289406
" 349 | ], 350 | "text/plain": [ 351 | "shape: (5, 5)\n", 352 | "┌──────────────────────────┬────────────┬───────────┬────────────────────┬─────────────────────────┐\n", 353 | "│ level ┆ metric ┆ AutoARIMA ┆ AutoARIMA/BottomUp ┆ AutoARIMA/TopDown_metho │\n", 354 | "│ --- ┆ --- ┆ --- ┆ --- ┆ d-forec… │\n", 355 | "│ str ┆ str ┆ f64 ┆ f64 ┆ --- │\n", 356 | "│ ┆ ┆ ┆ ┆ f64 │\n", 357 | "╞══════════════════════════╪════════════╪═══════════╪════════════════════╪═════════════════════════╡\n", 358 | "│ Country ┆ mse-scaled ┆ 0.317897 ┆ 0.226999 ┆ 0.317897 │\n", 359 | "│ Country/Purpose ┆ mse-scaled ┆ 0.323207 ┆ 0.199359 ┆ 0.251368 │\n", 360 | "│ Country/Purpose/State ┆ mse-scaled ┆ 0.266118 ┆ 0.305711 ┆ 0.308241 │\n", 361 | "│ Country/Purpose/State/Ci ┆ mse-scaled ┆ 0.305173 ┆ 0.305173 ┆ 0.305913 │\n", 362 | "│ tyNonC… ┆ ┆ ┆ ┆ │\n", 363 | "│ Overall ┆ mse-scaled ┆ 0.311707 ┆ 0.234934 ┆ 0.289406 │\n", 364 | "└──────────────────────────┴────────────┴───────────┴────────────────────┴─────────────────────────┘" 365 | ] 366 | }, 367 | "execution_count": null, 368 | "metadata": {}, 369 | "output_type": "execute_result" 370 | } 371 | ], 372 | "source": [ 373 | "df = Y_rec_df.join(Y_test_df, on=['unique_id', 'ds'])\n", 374 | "evaluation = evaluate(df = df,\n", 375 | " tags = tags,\n", 376 | " train_df = Y_train_df,\n", 377 | " metrics = [mse],\n", 378 | " benchmark=\"Naive\")\n", 379 | "\n", 380 | "evaluation[[\"level\", \"metric\", \"AutoARIMA\", \"AutoARIMA/BottomUp\", \"AutoARIMA/TopDown_method-forecast_proportions\"]]" 381 | ] 382 | }, 383 | { 384 | "cell_type": "markdown", 385 | "id": "a51830f5", 386 | "metadata": {}, 387 | "source": [ 388 | "### References\n", 389 | "- [Orcutt, G.H., Watts, H.W., & Edwards, J.B.(1968). Data aggregation and information loss. The American \n", 390 | "Economic Review, 58 , 773(787)](http://www.jstor.org/stable/1815532).\n", 391 | "- [Disaggregation methods to expedite product line forecasting. Journal of Forecasting, 9 , 233–254. \n", 392 | "doi:10.1002/for.3980090304](https://onlinelibrary.wiley.com/doi/abs/10.1002/for.3980090304).
\n", 393 | "- [An investigation of aggregate variable time series forecast strategies with specific subaggregate \n", 394 | "time series statistical correlation. Computers and Operations Research, 26 , 1133–1149. \n", 395 | "doi:10.1016/S0305-0548(99)00017-9](https://doi.org/10.1016/S0305-0548(99)00017-9).\n", 396 | "- [Hyndman, R.J., & Athanasopoulos, G. (2021). \"Forecasting: principles and practice, 3rd edition: \n", 397 | "Chapter 11: Forecasting hierarchical and grouped series.\". OTexts: Melbourne, Australia. OTexts.com/fpp3 \n", 398 | "Accessed on July 2022.](https://otexts.com/fpp3/hierarchical.html)" 399 | ] 400 | } 401 | ], 402 | "metadata": { 403 | "kernelspec": { 404 | "display_name": "python3", 405 | "language": "python", 406 | "name": "python3" 407 | } 408 | }, 409 | "nbformat": 4, 410 | "nbformat_minor": 5 411 | } 412 | -------------------------------------------------------------------------------- /nbs/examples/imgs/AustralianDomesticTourism-results-fable.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/hierarchicalforecast/162e42f4143201d44fdd0480306dd4cc19776038/nbs/examples/imgs/AustralianDomesticTourism-results-fable.png -------------------------------------------------------------------------------- /nbs/examples/imgs/AustralianPrisonPopulation-results-fable.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/hierarchicalforecast/162e42f4143201d44fdd0480306dd4cc19776038/nbs/examples/imgs/AustralianPrisonPopulation-results-fable.png -------------------------------------------------------------------------------- /nbs/examples/imgs/hierarchical_motivation1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/hierarchicalforecast/162e42f4143201d44fdd0480306dd4cc19776038/nbs/examples/imgs/hierarchical_motivation1.png -------------------------------------------------------------------------------- /nbs/examples/imgs/hierarchical_motivation2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/hierarchicalforecast/162e42f4143201d44fdd0480306dd4cc19776038/nbs/examples/imgs/hierarchical_motivation2.png -------------------------------------------------------------------------------- /nbs/examples/index.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | order: 1 3 | title: Tutorials 4 | listing: 5 | fields: [title] 6 | type: table 7 | sort-ui: false 8 | filter-ui: false 9 | --- 10 | 11 | Click through to any of these tutorials to get started with `HierarchicalForecast`'s features. 12 | -------------------------------------------------------------------------------- /nbs/favicon_png.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/hierarchicalforecast/162e42f4143201d44fdd0480306dd4cc19776038/nbs/favicon_png.png -------------------------------------------------------------------------------- /nbs/index.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "id": "018f6145-b103-4f3e-b4cd-0aab4d8bbdb7", 7 | "metadata": {}, 8 | "source": [ 9 | "# Hierarchical Forecast 👑" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "id": "112249bc-95d7-42bd-9eb9-b6bb7dda5b20", 15 | "metadata": {}, 16 | "source": [ 17 | "Large collections of time series organized into structures at different aggregation levels often require their forecasts to follow their aggregation constraints, which poses the challenge of creating novel algorithms capable of coherent forecasts.\n", 18 | "\n", 19 | "**HierarchicalForecast** offers a collection of cross-sectional and temporal reconciliation methods, including `BottomUp`, `TopDown`, `MiddleOut`, `MinTrace` and `ERM`, as well as probabilistic coherent predictions including `Normality`, `Bootstrap`, and `PERMBU`." 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "id": "295fee6a-1ca0-47c5-81f3-5a7e52afc426", 25 | "metadata": {}, 26 | "source": [ 27 | "## 🎊 Features \n", 28 | "\n", 29 | "* Classic reconciliation methods:\n", 30 | " - `BottomUp`: Simple addition to the upper levels.\n", 31 | " - `TopDown`: Distributes the top levels forecasts trough the hierarchies.\n", 32 | "* Alternative reconciliation methods:\n", 33 | " - `MiddleOut`: It anchors the base predictions in a middle level. The levels above the base predictions use the bottom-up approach, while the levels below use a top-down.\n", 34 | " - `MinTrace`: Minimizes the total forecast variance of the space of coherent forecasts, with the Minimum Trace reconciliation.\n", 35 | " - `ERM`: Optimizes the reconciliation matrix minimizing an L1 regularized objective.\n", 36 | "* Probabilistic coherent methods:\n", 37 | " - `Normality`: Uses MinTrace variance-covariance closed form matrix under a normality assumption.\n", 38 | " - `Bootstrap`: Generates distribution of hierarchically reconciled predictions using Gamakumara's bootstrap approach.\n", 39 | " - `PERMBU`: Reconciles independent sample predictions by reinjecting multivariate dependence with estimated rank permutation copulas, and performing a Bottom-Up aggregation.\n", 40 | "* Temporal reconciliation methods:\n", 41 | " - All reconciliation methods (except for the insample methods) are available to use with temporal hierarchies too.\n", 42 | "\n", 43 | "Missing something? Please open an issue here or write us in [![Slack](https://img.shields.io/badge/Slack-4A154B?&logo=slack&logoColor=white)](https://join.slack.com/t/nixtlaworkspace/shared_invite/zt-135dssye9-fWTzMpv2WBthq8NK0Yvu6A)\n" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "id": "11131f77-4f6c-4232-9780-31afe1b9a034", 49 | "metadata": {}, 50 | "source": [ 51 | "## 📖 Why? \n", 52 | "\n", 53 | "**Short**: We want to contribute to the ML field by providing reliable baselines and benchmarks for hierarchical forecasting task in industry and academia. Here's the complete [paper](https://arxiv.org/abs/2207.03517).\n", 54 | "\n", 55 | "**Verbose**: `HierarchicalForecast` integrates publicly available processed datasets, evaluation metrics, and a curated set of statistical baselines. In this library we provide usage examples and references to extensive experiments where we showcase the baseline's use and evaluate the accuracy of their predictions. With this work, we hope to contribute to Machine Learning forecasting by bridging the gap to statistical and econometric modeling, as well as providing tools for the development of novel hierarchical forecasting algorithms rooted in a thorough comparison of these well-established models. We intend to continue maintaining and increasing the repository, promoting collaboration across the forecasting community." 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "id": "2e3f7a11-d151-4662-a3b7-46f945141996", 61 | "metadata": {}, 62 | "source": [ 63 | "## 💻 Installation\n", 64 | "\n", 65 | "### PyPI\n", 66 | "\n", 67 | "You can install the *released version* of `HierarchicalForecast` from the [Python package index](https://pypi.org) with:\n", 68 | "\n", 69 | "```python\n", 70 | "pip install hierarchicalforecast\n", 71 | "```\n", 72 | "\n", 73 | "(Installing inside a python virtualenvironment or a conda environment is recommended.)\n", 74 | "\n", 75 | "### Conda\n", 76 | "\n", 77 | "Also you can install the *released version* of `HierarchicalForecast` from [conda](https://anaconda.org) with:\n", 78 | "\n", 79 | "```python\n", 80 | "conda install -c conda-forge hierarchicalforecast\n", 81 | "```\n", 82 | "\n", 83 | "(Installing inside a python virtualenvironment or a conda environment is recommended.)\n", 84 | "\n", 85 | "### Dev Mode\n", 86 | "\n", 87 | "If you want to make some modifications to the code and see the effects in real time (without reinstalling), follow the steps below:\n", 88 | "\n", 89 | "```bash\n", 90 | "git clone https://github.com/Nixtla/hierarchicalforecast.git\n", 91 | "cd hierarchicalforecast\n", 92 | "pip install -e .\n", 93 | "```" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "id": "5dd31c3e-ffb6-4364-acf6-e4646fd693a9", 99 | "metadata": {}, 100 | "source": [ 101 | "## 🧬 How to use\n", 102 | "\n", 103 | "The following example needs `statsforecast` and `datasetsforecast` as additional packages. If not installed, install it via your preferred method, e.g. `pip install statsforecast datasetsforecast`.\n", 104 | "The `datasetsforecast` library allows us to download hierarhical datasets and we will use `statsforecast` to compute base forecasts to be reconciled.\n", 105 | "\n", 106 | "You can open this example in Colab [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/nixtla/hierarchicalforecast/blob/main/nbs/examples/TourismSmall.ipynb)\n", 107 | "\n", 108 | "```python\n", 109 | "import pandas as pd\n", 110 | "\n", 111 | "#obtain hierarchical dataset\n", 112 | "from datasetsforecast.hierarchical import HierarchicalData\n", 113 | "\n", 114 | "# compute base forecast no coherent\n", 115 | "from statsforecast.core import StatsForecast\n", 116 | "from statsforecast.models import AutoARIMA, Naive\n", 117 | "\n", 118 | "#obtain hierarchical reconciliation methods and evaluation\n", 119 | "from hierarchicalforecast.core import HierarchicalReconciliation\n", 120 | "from hierarchicalforecast.evaluation import evaluate\n", 121 | "from hierarchicalforecast.methods import BottomUp, TopDown, MiddleOut\n", 122 | "from utilsforecast.losses import mse\n", 123 | "\n", 124 | "# Load TourismSmall dataset\n", 125 | "Y_df, S, tags = HierarchicalData.load('./data', 'TourismSmall')\n", 126 | "Y_df['ds'] = pd.to_datetime(Y_df['ds'])\n", 127 | "S = S.reset_index(names=\"unique_id\")\n", 128 | "\n", 129 | "#split train/test sets\n", 130 | "Y_test_df = Y_df.groupby('unique_id').tail(4)\n", 131 | "Y_train_df = Y_df.drop(Y_test_df.index)\n", 132 | "\n", 133 | "# Compute base auto-ARIMA predictions\n", 134 | "fcst = StatsForecast(models=[AutoARIMA(season_length=4), Naive()],\n", 135 | " freq='QE', n_jobs=-1)\n", 136 | "Y_hat_df = fcst.forecast(df=Y_train_df, h=4)\n", 137 | "\n", 138 | "# Reconcile the base predictions\n", 139 | "reconcilers = [\n", 140 | " BottomUp(),\n", 141 | " TopDown(method='forecast_proportions'),\n", 142 | " MiddleOut(middle_level='Country/Purpose/State',\n", 143 | " top_down_method='forecast_proportions')\n", 144 | "]\n", 145 | "hrec = HierarchicalReconciliation(reconcilers=reconcilers)\n", 146 | "Y_rec_df = hrec.reconcile(Y_hat_df=Y_hat_df, Y_df=Y_train_df,\n", 147 | " S=S, tags=tags)\n", 148 | "```" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "id": "ac517335-d4fa-4d56-9cfa-a19782280aa4", 154 | "metadata": {}, 155 | "source": [ 156 | "### Evaluation\n", 157 | "\n", 158 | "```python\n", 159 | "df = Y_rec_df.merge(Y_test_df, on=['unique_id', 'ds'], how='left')\n", 160 | "\n", 161 | "evaluate(df=df, metrics=[mse],\n", 162 | " tags=tags, benchmark='Naive')\n", 163 | "```" 164 | ] 165 | }, 166 | { 167 | "attachments": {}, 168 | "cell_type": "markdown", 169 | "id": "16dc2940-a4e0-486c-bac5-403f9084d6ac", 170 | "metadata": {}, 171 | "source": [ 172 | "## How to cite\n", 173 | "\n", 174 | "Here's the complete [paper](https://arxiv.org/abs/2207.03517).\n", 175 | "\n", 176 | "```bibtex\n", 177 | "@article{olivares2022hierarchicalforecast,\n", 178 | " author = {Kin G. Olivares and\n", 179 | " Federico Garza and \n", 180 | " David Luo and \n", 181 | " Cristian Challú and\n", 182 | " Max Mergenthaler and\n", 183 | " Souhaib Ben Taieb and\n", 184 | " Shanika L. Wickramasuriya and\n", 185 | " Artur Dubrawski},\n", 186 | " title = {{HierarchicalForecast}: A Reference Framework for Hierarchical Forecasting in Python},\n", 187 | " journal = {Work in progress paper, submitted to Journal of Machine Learning Research.},\n", 188 | " volume = {abs/2207.03517},\n", 189 | " year = {2022},\n", 190 | " url = {https://arxiv.org/abs/2207.03517},\n", 191 | " archivePrefix = {arXiv}\n", 192 | "}\n", 193 | "```" 194 | ] 195 | } 196 | ], 197 | "metadata": { 198 | "kernelspec": { 199 | "display_name": "python3", 200 | "language": "python", 201 | "name": "python3" 202 | } 203 | }, 204 | "nbformat": 4, 205 | "nbformat_minor": 5 206 | } 207 | -------------------------------------------------------------------------------- /nbs/mint.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://mintlify.com/schema.json", 3 | "name": "Nixtla", 4 | "logo": { 5 | "light": "/light.png", 6 | "dark": "/dark.png" 7 | }, 8 | "favicon": "/favicon.svg", 9 | "colors": { 10 | "primary": "#0E0E0E", 11 | "light": "#FAFAFA", 12 | "dark": "#0E0E0E", 13 | "anchors": { 14 | "from": "#2AD0CA", 15 | "to": "#0E00F8" 16 | } 17 | }, 18 | "topbarCtaButton": { 19 | "type": "github", 20 | "url": "https://github.com/Nixtla/hierarchicalforecast" 21 | }, 22 | "topAnchor": { 23 | "name": "HierarchicalForecast", 24 | "icon": "crown" 25 | }, 26 | "navigation": [ 27 | { 28 | "group": "", 29 | "pages": ["index.html"] 30 | }, 31 | { 32 | "group": "Getting Started", 33 | "pages": [ 34 | "examples/installation.html", 35 | "examples/tourismsmall.html", 36 | "examples/tourismsmallpolars.html", 37 | "examples/introduction.html" 38 | ] 39 | }, 40 | { 41 | "group": "Tutorials", 42 | "pages": [ 43 | { 44 | "group": "Point Reconciliation", 45 | "pages": [ 46 | "examples/australiandomestictourism.html", 47 | "examples/australianprisonpopulation.html", 48 | "examples/nonnegativereconciliation.html" 49 | ] 50 | }, 51 | { 52 | "group": "Probabilistic Reconciliation", 53 | "pages": [ 54 | "examples/australiandomestictourism-intervals.html", 55 | "examples/australiandomestictourism-bootstraped-intervals.html", 56 | "examples/australiandomestictourism-permbu-intervals.html", 57 | "examples/tourismlarge-evaluation.html" 58 | ] 59 | }, 60 | { 61 | "group": "Temporal Reconciliation", 62 | "pages": [ 63 | "examples/australiandomestictourismtemporal.html", 64 | "examples/australiandomestictourismcrosstemporal.html", 65 | "examples/m3withthief.html", 66 | "examples/localglobalaggregation.html" 67 | ] 68 | }, 69 | "examples/mlframeworksexample.html" 70 | ] 71 | }, 72 | { 73 | "group": "API Reference", 74 | "pages": [ 75 | "src/core.html", 76 | "src/methods.html", 77 | "src/probabilistic_methods.html", 78 | "src/evaluation.html", 79 | "src/utils.html" 80 | ] 81 | } 82 | ] 83 | } 84 | -------------------------------------------------------------------------------- /nbs/nbdev.yml: -------------------------------------------------------------------------------- 1 | project: 2 | output-dir: _docs 3 | 4 | website: 5 | title: "hierarchicalforecast" 6 | site-url: "https://Nixtla.github.io/hierarchicalforecast/" 7 | description: "Hierarchical Methods Time series forecasting" 8 | repo-branch: main 9 | repo-url: "https://github.com/Nixtla/hierarchicalforecast/" 10 | -------------------------------------------------------------------------------- /nbs/sidebar.yml: -------------------------------------------------------------------------------- 1 | website: 2 | sidebar: 3 | collapse-level: 1 4 | contents: 5 | - index.ipynb 6 | - text: "--" 7 | - section: "Getting Started" 8 | contents: 9 | - examples/Installation.ipynb 10 | - examples/TourismSmall.ipynb 11 | - examples/TourismSmallPolars.ipynb 12 | - examples/Introduction.ipynb 13 | - section: Tutorials 14 | contents: 15 | - section: Point Reconciliation 16 | contents: 17 | - examples/AustralianDomesticTourism.ipynb 18 | - examples/AustralianPrisonPopulation.ipynb 19 | - examples/NonNegativeReconciliation.ipynb 20 | - section: Probabilistic Reconciliation 21 | contents: 22 | - examples/AustralianDomesticTourism-Intervals.ipynb 23 | - examples/AustralianDomesticTourism-Bootstraped-Intervals.ipynb 24 | - examples/AustralianDomesticTourism-Permbu-Intervals.ipynb 25 | - examples/TourismLarge-Evaluation.ipynb 26 | - section: ML Forecast Reconciliation 27 | contents: 28 | - examples/MLFrameworksExample.ipynb 29 | - section: "API Reference" 30 | contents: src/* 31 | - section: Community 32 | contents: 33 | - Contributing -------------------------------------------------------------------------------- /nbs/styles.css: -------------------------------------------------------------------------------- 1 | .cell { 2 | margin-bottom: 1rem; 3 | } 4 | 5 | .cell > .sourceCode { 6 | margin-bottom: 0; 7 | } 8 | 9 | .cell-output > pre { 10 | margin-bottom: 0; 11 | } 12 | 13 | .cell-output > pre, .cell-output > .sourceCode > pre, .cell-output-stdout > pre { 14 | margin-left: 0.8rem; 15 | margin-top: 0; 16 | background: none; 17 | border-left: 2px solid lightsalmon; 18 | border-top-left-radius: 0; 19 | border-top-right-radius: 0; 20 | } 21 | 22 | .cell-output > .sourceCode { 23 | border: none; 24 | } 25 | 26 | .cell-output > .sourceCode { 27 | background: none; 28 | margin-top: 0; 29 | } 30 | 31 | div.description { 32 | padding-left: 2px; 33 | padding-top: 5px; 34 | font-style: italic; 35 | font-size: 135%; 36 | opacity: 70%; 37 | } 38 | 39 | /* show_doc signature */ 40 | blockquote > pre { 41 | font-size: 14px; 42 | } 43 | 44 | .table { 45 | font-size: 16px; 46 | /* disable striped tables */ 47 | --bs-table-striped-bg: var(--bs-table-bg); 48 | } 49 | 50 | .quarto-figure-center > figure > figcaption { 51 | text-align: center; 52 | } 53 | 54 | .figure-caption { 55 | font-size: 75%; 56 | font-style: italic; 57 | } -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.ruff.lint] 2 | select = [ 3 | "F", # pyflakes 4 | ] 5 | -------------------------------------------------------------------------------- /settings.ini: -------------------------------------------------------------------------------- 1 | [DEFAULT] 2 | host = github 3 | lib_name = hierarchicalforecast 4 | user = Nixtla 5 | description = Hierarchical Methods Time series forecasting 6 | keywords = time-series forecasting datasets hierarchical 7 | author = Nixtla 8 | author_email = business@nixtla.io 9 | copyright = Nixtla Inc. 10 | branch = main 11 | version = 1.2.1 12 | min_python = 3.9 13 | audience = Developers 14 | language = English 15 | custom_sidebar = True 16 | license = apache2 17 | status = 2 18 | requirements = numpy, numba, pandas>=2.1.0, scikit-learn>=1.2, quadprog, clarabel, matplotlib, narwhals>=1.27.0, utilsforecast>=0.2.12, intel-cmplr-lib-rt ; platform_system!="Darwin" and platform_machine=="x86_64" 19 | dev_requirements = datasetsforecast ipython<=8.32.0 nbdev statsforecast>=1.0.0 requests scipy pre-commit ruff black pytest pytest-benchmark 20 | polars_requirements = polars[numpy] 21 | nbs_path = nbs 22 | doc_path = _docs 23 | recursive = True 24 | doc_host = https://%(user)s.github.io 25 | doc_baseurl = /%(lib_name)s/ 26 | git_url = https://github.com/%(user)s/hierarchicalforecast/ 27 | lib_path = %(lib_name)s 28 | title = %(lib_name)s 29 | black_formatting = True 30 | jupyter_hooks = True 31 | clean_ids = True 32 | readme_nb = index.ipynb 33 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from pkg_resources import parse_version 2 | from configparser import ConfigParser 3 | import setuptools 4 | assert parse_version(setuptools.__version__)>=parse_version('36.2') 5 | 6 | # note: all settings are in settings.ini; edit there, not here 7 | config = ConfigParser(delimiters=['=']) 8 | config.read('settings.ini') 9 | cfg = config['DEFAULT'] 10 | 11 | cfg_keys = 'version description keywords author author_email'.split() 12 | expected = cfg_keys + "lib_name user branch license status min_python audience language".split() 13 | for o in expected: assert o in cfg, "missing expected setting: {}".format(o) 14 | setup_cfg = {o:cfg[o] for o in cfg_keys} 15 | 16 | licenses = { 17 | 'apache2': ('Apache Software License 2.0','OSI Approved :: Apache Software License'), 18 | 'mit': ('MIT License', 'OSI Approved :: MIT License'), 19 | 'gpl2': ('GNU General Public License v2', 'OSI Approved :: GNU General Public License v2 (GPLv2)'), 20 | 'gpl3': ('GNU General Public License v3', 'OSI Approved :: GNU General Public License v3 (GPLv3)'), 21 | 'bsd3': ('BSD License', 'OSI Approved :: BSD License'), 22 | } 23 | statuses = [ '1 - Planning', '2 - Pre-Alpha', '3 - Alpha', 24 | '4 - Beta', '5 - Production/Stable', '6 - Mature', '7 - Inactive' ] 25 | py_versions = '3.9 3.10 3.11 3.12'.split() 26 | 27 | requirements = cfg['requirements'].split(',') 28 | if cfg.get('pip_requirements'): requirements += cfg.get('pip_requirements','').split() 29 | min_python = cfg['min_python'] 30 | lic = licenses.get(cfg['license'].lower(), (cfg['license'], None)) 31 | dev_requirements = (cfg.get('dev_requirements') or '').split() 32 | polars_requirements = (cfg.get('polars_requirements') or '').split() 33 | dev_requirements.extend(polars_requirements) 34 | 35 | setuptools.setup( 36 | name = 'hierarchicalforecast', 37 | license = lic[0], 38 | classifiers = [ 39 | 'Development Status :: ' + statuses[int(cfg['status'])], 40 | 'Intended Audience :: ' + cfg['audience'].title(), 41 | 'Natural Language :: ' + cfg['language'].title(), 42 | ] + ['Programming Language :: Python :: '+o for o in py_versions[py_versions.index(min_python):]] + (['License :: ' + lic[1] ] if lic[1] else []), 43 | url = cfg['git_url'], 44 | packages = setuptools.find_packages(), 45 | include_package_data = True, 46 | install_requires = requirements, 47 | extras_require={'dev': dev_requirements, 48 | 'polars': polars_requirements, 49 | }, 50 | dependency_links = cfg.get('dep_links','').split(), 51 | python_requires = '>=' + cfg['min_python'], 52 | long_description = open('README.md', encoding='utf8').read(), 53 | long_description_content_type = 'text/markdown', 54 | zip_safe = False, 55 | entry_points = { 56 | 'console_scripts': cfg.get('console_scripts','').split(), 57 | 'nbdev': [f'{cfg.get("lib_path")}={cfg.get("lib_path")}._modidx:d'] 58 | }, 59 | **setup_cfg) 60 | 61 | 62 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/hierarchicalforecast/162e42f4143201d44fdd0480306dd4cc19776038/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_benchmark.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import pytest 4 | from scipy import sparse 5 | from hierarchicalforecast.methods import MinTrace, ERM, BottomUp 6 | from hierarchicalforecast.utils import _ma_cov 7 | from statsforecast.core import StatsForecast 8 | from statsforecast.models import AutoETS 9 | 10 | from hierarchicalforecast.utils import aggregate 11 | from hierarchicalforecast.core import HierarchicalReconciliation 12 | 13 | import pytest_benchmark # noqa: F401 14 | 15 | #%% MinT benchmarks 16 | # run using: pytest tests\test_benchmark.py -v -s --benchmark-min-rounds=20 17 | def _create_reconciler_inputs(n_bottom_timeseries): 18 | # Create random hierarchy 19 | h = 100 20 | insample = 1000 21 | max_levels_random = 5 22 | max_categories_per_random_level = 10 23 | rng = np.random.default_rng(0) 24 | ones = np.ones(n_bottom_timeseries, dtype=np.float64) 25 | idx_range = np.arange(n_bottom_timeseries) 26 | n_levels_random = rng.integers(1, max_levels_random + 1) 27 | S_aggs_list = [] 28 | for _ in range(n_levels_random): 29 | n_categories_per_level = rng.integers(2, max_categories_per_random_level + 1) 30 | codes = rng.integers(0, n_categories_per_level, size=(n_bottom_timeseries, )) 31 | S_agg = sparse.csr_matrix((ones, (codes, idx_range))) 32 | S_aggs_list.append(S_agg) 33 | S_aggs = sparse.vstack(S_aggs_list) 34 | # Create top and bottom level 35 | S_top = sparse.csr_matrix(ones, dtype=np.float64) 36 | S_bottom = sparse.eye(n_bottom_timeseries, dtype=np.float64, format="csr") 37 | # Construct S: stack top, aggregations and bottom 38 | S_sp = sparse.vstack([S_top, S_aggs, S_bottom]) 39 | 40 | y_hat_bottom = np.vstack([i * np.ones(h, dtype=np.float64) for i in range(n_bottom_timeseries)]) 41 | y_hat_bottom_insample = np.vstack([i * np.ones(insample, dtype=np.float64) for i in range(n_bottom_timeseries)]) 42 | y_bottom = y_hat_bottom_insample + rng.normal(size=(n_bottom_timeseries, insample)) 43 | 44 | S = S_sp.toarray() 45 | y_insample = S @ y_bottom 46 | y_hat_insample = S @ y_hat_bottom_insample 47 | idx_bottom = np.arange(start=S.shape[0] - n_bottom_timeseries, stop=S.shape[0]) 48 | y_hat=S @ y_hat_bottom 49 | 50 | return S, y_hat, y_insample, y_hat_insample, idx_bottom 51 | 52 | @pytest.mark.parametrize("n_bottom_timeseries", [20]) 53 | @pytest.mark.parametrize("with_nans", (False, True)) 54 | def test_mint(benchmark, n_bottom_timeseries, with_nans): 55 | S, y_hat, y_insample, y_hat_insample, idx_bottom = _create_reconciler_inputs(n_bottom_timeseries) 56 | if with_nans: 57 | y_insample[-1, :-1] = np.nan 58 | y_hat_insample[-1, :-1] = np.nan 59 | 60 | cls_min_trace = MinTrace(method='mint_shrink') 61 | result_min_trace = benchmark(cls_min_trace, S=S, y_hat=y_hat, y_insample=y_insample, y_hat_insample=y_hat_insample, idx_bottom=idx_bottom) # noqa: F841 62 | 63 | @pytest.mark.parametrize("n_bottom_timeseries", [20]) 64 | @pytest.mark.parametrize("with_nans", (False, True)) 65 | def test_cov(benchmark, n_bottom_timeseries, with_nans): 66 | S, y_hat, y_insample, y_hat_insample, idx_bottom = _create_reconciler_inputs(n_bottom_timeseries) 67 | if with_nans: 68 | y_insample[-1, :-1] = np.nan 69 | y_hat_insample[-1, :-1] = np.nan 70 | 71 | residuals = (y_insample - y_hat_insample) 72 | nan_mask = np.isnan(residuals) 73 | 74 | result = benchmark(_ma_cov, residuals, ~nan_mask) # noqa: F841 75 | 76 | @pytest.mark.parametrize("n_bottom_timeseries", [10]) 77 | @pytest.mark.parametrize("erm_method", ['reg', 'reg_bu']) 78 | def test_erm_reg(benchmark, n_bottom_timeseries, erm_method): 79 | S, y_hat, y_insample, y_hat_insample, idx_bottom = _create_reconciler_inputs(n_bottom_timeseries) 80 | 81 | cls_erm = ERM(method=erm_method) 82 | result_erm = benchmark(cls_erm, S=S, y_hat=y_hat, y_insample=y_insample, y_hat_insample=y_hat_insample, idx_bottom=idx_bottom) # noqa: F841 83 | 84 | @pytest.fixture 85 | def load_tourism(): 86 | df = pd.read_csv('https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/tourism.csv') 87 | df = df.rename({'Trips': 'y', 'Quarter': 'ds'}, axis=1) 88 | df.insert(0, 'Country', 'Australia') 89 | return df 90 | 91 | # run with: pytest tests\test_benchmark.py::test_reconciler -v -s --benchmark-min-rounds=20 --disable-warnings 92 | @pytest.mark.parametrize("reconciler", [MinTrace(method='mint_shrink'), BottomUp()]) 93 | def test_reconciler(benchmark, reconciler, load_tourism): 94 | 95 | # Load TourismSmall dataset 96 | df = load_tourism 97 | 98 | # Create hierarchical seires based on geographic levels and purpose 99 | # And Convert quarterly ds string to pd.datetime format 100 | hierarchy_levels = [['Country'], 101 | ['Country', 'State'], 102 | ['Country', 'State', 'Region'], 103 | ['Country', 'State', 'Region', 'Purpose']] 104 | 105 | Y_df, S_df, tags = aggregate(df=df, spec=hierarchy_levels) 106 | qs = Y_df['ds'].str.replace(r'(\d+) (Q\d)', r'\1-\2', regex=True) 107 | Y_df['ds'] = pd.PeriodIndex(qs, freq='Q').to_timestamp() 108 | 109 | # Split train/test sets 110 | Y_test_df = Y_df.groupby('unique_id').tail(8) 111 | Y_train_df = Y_df.drop(Y_test_df.index) 112 | 113 | # Compute base auto-ETS predictions 114 | # Careful identifying correct data freq, this data quarterly 'Q' 115 | fcst = StatsForecast(models=[AutoETS(season_length=4, model='ZZA')], freq='QS', n_jobs=-1) 116 | Y_hat_df = fcst.forecast(df=Y_train_df, h=8, fitted=True).reset_index() 117 | Y_fitted_df = fcst.forecast_fitted_values().reset_index() 118 | 119 | reconcilers = [reconciler] 120 | hrec = HierarchicalReconciliation(reconcilers=reconcilers) 121 | 122 | result = benchmark(hrec.reconcile, Y_hat_df=Y_hat_df, Y_df=Y_fitted_df, S=S_df, tags=tags) # noqa: F841 --------------------------------------------------------------------------------