├── .circleci
    └── config.yml
├── .gitattributes
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug-report.yml
    │   ├── config.yml
    │   ├── documentation-issue.yml
    │   └── feature-request.yml
    ├── dependabot.yml
    ├── release-drafter.yml
    └── workflows
    │   ├── build-docs.yaml
    │   ├── ci.yml
    │   ├── lint.yml
    │   ├── no-response.yaml
    │   └── python-publish.yml
├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── action_files
    ├── imports_with_code.py
    └── test_models
    │   ├── requirements.txt
    │   └── src
    │       ├── data.py
    │       ├── evaluation.py
    │       ├── evaluation_temporal.py
    │       ├── models.py
    │       └── models_temporal.py
├── environment.yml
├── experiments
    ├── hierarchical_baselines
    │   ├── README.md
    │   ├── environment.yml
    │   ├── nbs
    │   │   ├── run_favorita_baselines.ipynb
    │   │   ├── run_favorita_hiere2e.ipynb
    │   │   └── run_hiere2e.ipynb
    │   └── src
    │   │   ├── run_baselines.py
    │   │   └── run_favorita_baselines.py
    └── libs-comparison
    │   ├── README.md
    │   ├── environment.yml
    │   ├── nbs
    │       └── parse-results.ipynb
    │   ├── results.png
    │   └── src
    │       ├── data.py
    │       ├── fable.R
    │       ├── sktime.py
    │       └── statsforecast.py
├── hierarchicalforecast
    ├── __init__.py
    ├── _modidx.py
    ├── _nbdev.py
    ├── core.py
    ├── evaluation.py
    ├── methods.py
    ├── probabilistic_methods.py
    └── utils.py
├── nbs
    ├── .gitattributes
    ├── .gitignore
    ├── _quarto.yml
    ├── custom.yml
    ├── examples
    │   ├── .nodoc
    │   ├── .notest
    │   ├── AustralianDomesticTourism-Bootstraped-Intervals.ipynb
    │   ├── AustralianDomesticTourism-Intervals.ipynb
    │   ├── AustralianDomesticTourism-Permbu-Intervals.ipynb
    │   ├── AustralianDomesticTourism.ipynb
    │   ├── AustralianDomesticTourismCrossTemporal.ipynb
    │   ├── AustralianDomesticTourismTemporal.ipynb
    │   ├── AustralianPrisonPopulation.ipynb
    │   ├── Installation.ipynb
    │   ├── Introduction.ipynb
    │   ├── LocalGlobalAggregation.ipynb
    │   ├── M3withThief.ipynb
    │   ├── MLFrameworksExample.ipynb
    │   ├── NonNegativeReconciliation.ipynb
    │   ├── TourismLarge-Evaluation.ipynb
    │   ├── TourismSmall.ipynb
    │   ├── TourismSmallPolars.ipynb
    │   ├── imgs
    │   │   ├── AustralianDomesticTourism-results-fable.png
    │   │   ├── AustralianPrisonPopulation-results-fable.png
    │   │   ├── hierarchical_motivation1.png
    │   │   └── hierarchical_motivation2.png
    │   └── index.qmd
    ├── favicon_png.png
    ├── index.ipynb
    ├── mint.json
    ├── nbdev.yml
    ├── sidebar.yml
    ├── src
    │   ├── core.ipynb
    │   ├── evaluation.ipynb
    │   ├── methods.ipynb
    │   ├── probabilistic_methods.ipynb
    │   └── utils.ipynb
    └── styles.css
├── pyproject.toml
├── settings.ini
├── setup.py
└── tests
    ├── __init__.py
    └── test_benchmark.py


/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | version: 2.1
 2 | jobs:
 3 |   nbdev-tests:
 4 |     resource_class: xlarge
 5 |     docker:
 6 |       - image: python:3.10-slim
 7 |     steps:
 8 |       - checkout
 9 |       - run:
10 |           name: Install dependencies
11 |           command: |
12 |             pip install uv
13 |             uv venv --python 3.10
14 |       - run:
15 |           name: Run nbdev tests
16 |           command: |
17 |             source .venv/bin/activate           
18 |             uv pip install ".[dev]"
19 |             nbdev_test --do_print --timing
20 |   test-model-performance:
21 |     resource_class: large
22 |     docker:
23 |       - image: python:3.10-slim
24 |     steps:
25 |       - checkout
26 |       - run:
27 |           name: Install dependencies
28 |           command: |
29 |             pip install uv
30 |             uv venv --python 3.10
31 |       - run:
32 |           name: Run model performance tests
33 |           command: |
34 |             source .venv/bin/activate           
35 |             uv pip install ".[dev]"
36 |             cd ./action_files/test_models/
37 |             uv pip install -r requirements.txt
38 |             python -m src.models
39 |             python -m src.evaluation
40 |             cd ../../
41 |       - store_artifacts:
42 |           path: ./action_files/test_models/data/evaluation.csv
43 |           destination: evaluation.csv
44 |   test-model-performance-temporal:
45 |     resource_class: large
46 |     docker:
47 |       - image: python:3.10-slim
48 |     steps:
49 |       - checkout
50 |       - run:
51 |           name: Install dependencies
52 |           command: |
53 |             pip install uv
54 |             uv venv --python 3.10
55 |       - run:
56 |           name: Run model performance tests
57 |           command: |
58 |             source .venv/bin/activate           
59 |             uv pip install ".[dev]"
60 |             cd ./action_files/test_models/
61 |             uv pip install -r requirements.txt
62 |             python -m src.models_temporal
63 |             python -m src.evaluation_temporal
64 |             cd ../../
65 |       - store_artifacts:
66 |           path: ./action_files/test_models/data/evaluation.csv
67 |           destination: evaluation.csv
68 | workflows:
69 |   sample:
70 |     jobs:
71 |       - nbdev-tests
72 |       - test-model-performance
73 |       - test-model-performance-temporal


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.ipynb merge=nbdev-merge
2 | nbs/** linguist-documentation
3 | experiments/** linguist-documentation
4 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report.yml:
--------------------------------------------------------------------------------
 1 | name: Bug report
 2 | title: "[<Library component: Model|Core|etc...>] "
 3 | description: Problems and issues with code of the library
 4 | labels: [bug]
 5 | body:
 6 |   - type: markdown
 7 |     attributes:
 8 |       value: |
 9 |         Thank you for reporting the problem!
10 |         Please make sure what you are reporting is a bug with reproducible steps. To ask questions
11 |         or share ideas, please post on our [Slack community](https://join.slack.com/t/nixtlacommunity/shared_invite/zt-1h77esh5y-iL1m8N0F7qV1HmH~0KYeAQ) instead.
12 | 
13 |   - type: textarea
14 |     attributes:
15 |       label: What happened + What you expected to happen
16 |       description: Describe 1. the bug 2. expected behavior 3. useful information (e.g., logs)
17 |       placeholder: >
18 |         Please provide the context in which the problem occurred and explain what happened. Further,
19 |         please also explain why you think the behaviour is erroneous. It is extremely helpful if you can
20 |         copy and paste the fragment of logs showing the exact error messages or wrong behaviour here.
21 | 
22 |         **NOTE**: please copy and paste texts instead of taking screenshots of them for easy future search.
23 |     validations:
24 |       required: true
25 | 
26 |   - type: textarea
27 |     attributes:
28 |       label: Versions / Dependencies
29 |       description: Please specify the versions of the library, Python, OS, and other libraries that are used.
30 |       placeholder: >
31 |         Please specify the versions of dependencies.
32 |     validations:
33 |       required: true
34 | 
35 |   - type: textarea
36 |     attributes:
37 |       label: Reproduction script
38 |       description: >
39 |         Please provide a reproducible script. Providing a narrow reproduction (minimal / no external dependencies) will
40 |         help us triage and address issues in the timely manner!
41 |       placeholder: >
42 |         Please provide a short code snippet (less than 50 lines if possible) that can be copy-pasted to
43 |         reproduce the issue. The snippet should have **no external library dependencies**
44 |         (i.e., use fake or mock data / environments).
45 | 
46 |         **NOTE**: If the code snippet cannot be run by itself, the issue will be marked as "needs-repro-script"
47 |         until the repro instruction is updated.
48 |     validations:
49 |       required: true
50 | 
51 |   - type: dropdown
52 |     attributes:
53 |       label: Issue Severity
54 |       description: |
55 |         How does this issue affect your experience as user?
56 |       multiple: false
57 |       options:
58 |           - "Low: It annoys or frustrates me."
59 |           - "Medium: It is a significant difficulty but I can work around it."
60 |           - "High: It blocks me from completing my task."
61 |     validations:
62 |         required: false
63 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: true
2 | contact_links:
3 |   - name: Ask a question or get support
4 |     url: https://join.slack.com/t/nixtlacommunity/shared_invite/zt-1h77esh5y-iL1m8N0F7qV1HmH~0KYeAQ
5 |     about: Ask a question or request support for using a library of the nixtlaverse
6 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/documentation-issue.yml:
--------------------------------------------------------------------------------
 1 | name: Documentation
 2 | title: "[<Library component: Models|Core|etc...>] "
 3 | description: Report an issue with the library documentation
 4 | labels: [documentation]
 5 | body:
 6 |   - type: markdown
 7 |     attributes:
 8 |       value: Thank you for helping us improve the library documentation!
 9 | 
10 |   - type: textarea
11 |     attributes:
12 |       label: Description
13 |       description: |
14 |         Tell us about the change you'd like to see. For example, "I'd like to
15 |         see more examples of how to use `cross_validation`."
16 |     validations:
17 |       required: true
18 | 
19 |   - type: textarea
20 |     attributes:
21 |       label: Link
22 |       description: |
23 |         If the problem is related to an existing section, please add a link to
24 |         the section. 
25 |     validations:
26 |       required: false
27 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-request.yml:
--------------------------------------------------------------------------------
 1 | name: Library feature request
 2 | description: Suggest an idea for a project
 3 | title: "[<Library component: Models|Core|etc...>] "
 4 | labels: [enhancement, feature]
 5 | body:
 6 |   - type: markdown
 7 |     attributes:
 8 |       value: |
 9 |         Thank you for finding the time to propose a new feature!
10 |         We really appreciate the community efforts to improve the nixtlaverse.
11 | 
12 |   - type: textarea
13 |     attributes:
14 |       label: Description
15 |       description: A short description of your feature
16 | 
17 |   - type: textarea
18 |     attributes:
19 |       label: Use case
20 |       description: >
21 |         Describe the use case of your feature request. It will help us understand and
22 |         prioritize the feature request.
23 |       placeholder: >
24 |         Rather than telling us how you might implement this feature, try to take a
25 |         step back and describe what you are trying to achieve.
26 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   - package-ecosystem: github-actions
 4 |     directory: /
 5 |     schedule:
 6 |       interval: weekly
 7 |     groups:
 8 |       ci-dependencies:
 9 |         patterns: ["*"]
10 | 


--------------------------------------------------------------------------------
/.github/release-drafter.yml:
--------------------------------------------------------------------------------
 1 | name-template: 'v$NEXT_PATCH_VERSION'
 2 | tag-template: 'v$NEXT_PATCH_VERSION'
 3 | categories:
 4 |   - title: 'New Features'
 5 |     label: 'feature'
 6 |   - title: 'Enhancement'
 7 |     label: 'enhancement'
 8 |   - title: 'Bug Fixes'
 9 |     label: 'fix'
10 |   - title: 'Breaking Change'
11 |     label: 'breaking change'    
12 |   - title: 'Documentation'
13 |     label: 'documentation'
14 |   - title: 'Dependencies'
15 |     label: 'dependencies'
16 | change-template: '- $TITLE @$AUTHOR (#$NUMBER)'
17 | template: |
18 |   ## Changes
19 |   $CHANGES
20 | 


--------------------------------------------------------------------------------
/.github/workflows/build-docs.yaml:
--------------------------------------------------------------------------------
 1 | name: "build-docs"
 2 | on:
 3 |   release:
 4 |     types: [released]
 5 |   pull_request:
 6 |     branches: ["main"]
 7 |   workflow_dispatch:
 8 | 
 9 | defaults:
10 |   run:
11 |     shell: bash
12 | 
13 | jobs:
14 |   build-docs:
15 |     runs-on: ubuntu-latest
16 |     steps:
17 |       - name: Clone repo
18 |         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
19 |       - name: Clone docs repo
20 |         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
21 |         with:
22 |           repository: Nixtla/docs
23 |           ref: scripts
24 |           path: docs-scripts
25 |       - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # 5.6.0
26 |         with:
27 |           python-version: '3.10'
28 |           cache-dependency-path: settings.ini
29 | 
30 |       - name: Install dependencies
31 |         run: pip install uv && uv pip install ".[dev]" --system
32 | 
33 |       - name: Build docs
34 |         run: |
35 |           mkdir nbs/_extensions
36 |           cp -r docs-scripts/mintlify/ nbs/_extensions/
37 |           python docs-scripts/update-quarto.py
38 |           nbdev_docs
39 | 
40 |       - name: Apply final formats
41 |         run: bash ./docs-scripts/docs-final-formatting.bash
42 | 
43 |       - name: Copy over necessary assets
44 |         run: |
45 |           cp nbs/mint.json _docs/mint.json
46 |           cp docs-scripts/imgs/* _docs/
47 |           
48 |       - name: Deploy to Mintlify Docs
49 |         if: | 
50 |           github.event_name == 'release' || 
51 |           github.event_name == 'workflow_dispatch'
52 |         uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0
53 |         with:
54 |           github_token: ${{ secrets.GITHUB_TOKEN }}
55 |           publish_branch: docs
56 |           publish_dir: ./_docs
57 |           user_name: github-actions[bot]
58 |           user_email: 41898282+github-actions[bot]@users.noreply.github.com
59 | 
60 |       - name: Trigger mintlify workflow
61 |         if: | 
62 |           github.event_name == 'release' || 
63 |           github.event_name == 'workflow_dispatch'
64 |         uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
65 |         with:
66 |           github-token: ${{ secrets.DOCS_WORKFLOW_TOKEN }}
67 |           script: |
68 |             await github.rest.actions.createWorkflowDispatch({
69 |               owner: 'nixtla',
70 |               repo: 'docs',
71 |               workflow_id: 'mintlify-action.yml',
72 |               ref: 'main',
73 |             });
74 | 
75 |       - name: Configure redirects for gh-pages
76 |         run: python docs-scripts/configure-redirects.py hierarchicalforecast
77 | 
78 |       - name: Deploy to Github Pages
79 |         if: | 
80 |           github.event_name == 'release' || 
81 |           github.event_name == 'workflow_dispatch'
82 |         uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0
83 |         with:
84 |           github_token: ${{ secrets.GITHUB_TOKEN }}
85 |           publish_branch: gh-pages
86 |           publish_dir: ./gh-pages
87 |           user_name: github-actions[bot]
88 |           user_email: 41898282+github-actions[bot]@users.noreply.github.com
89 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 | concurrency:
10 |   group: ${{ github.workflow }}-${{ github.ref }}
11 |   cancel-in-progress: true
12 | 
13 | jobs:
14 |   run-tests:
15 |     runs-on: ${{ matrix.os }}
16 |     strategy:
17 |       fail-fast: false
18 |       matrix:
19 |         os: [ubuntu-latest, macos-latest, windows-latest]
20 |         python-version: ["3.9", "3.10", "3.11", "3.12"]
21 |     steps:
22 |       - name: Clone repo
23 |         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
24 | 
25 |       - name: Set up environment
26 |         uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
27 |         with:
28 |           python-version: ${{ matrix.python-version }}
29 | 
30 |       - name: Install pip requirements
31 |         run: pip install uv && uv pip install --system ".[dev]"
32 | 
33 |       - name: Tests
34 |         run: nbdev_test --do_print --timing --n_workers 0
35 | 


--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
 1 | name: Lint
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 | jobs:
10 |   lint:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - name: Clone repo
14 |         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
15 | 
16 |       - name: Set up python
17 |         uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # 5.6.0
18 |         with:
19 |           python-version: 3.9
20 | 
21 |       - name: Install dependencies
22 |         run: pip install black nbdev pre-commit
23 | 
24 |       - name: Run pre-commit
25 |         run: pre-commit run --files hierarchicalforecast/*


--------------------------------------------------------------------------------
/.github/workflows/no-response.yaml:
--------------------------------------------------------------------------------
 1 | name: No Response Bot
 2 | 
 3 | on:
 4 |   issue_comment:
 5 |     types: [created]
 6 |   schedule:
 7 |     - cron: '0 4 * * *'
 8 | 
 9 | jobs:
10 |   noResponse:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - uses: lee-dohm/no-response@9bb0a4b5e6a45046f00353d5de7d90fb8bd773bb # v0.5.0
14 |         with:
15 |           closeComment: >
16 |             This issue has been automatically closed because it has been awaiting a response for too long.
17 |             When you have time to to work with the maintainers to resolve this issue, please post a new comment and it will be re-opened.
18 |             If the issue has been locked for editing by the time you return to it, please open a new issue and reference this one.
19 |           daysUntilClose: 30
20 |           responseRequiredLabel: awaiting response
21 |           token: ${{ github.token }}
22 | 


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 3 | 
 4 | # This workflow uses actions that are not certified by GitHub.
 5 | # They are provided by a third-party and are governed by
 6 | # separate terms of service, privacy policy, and support
 7 | # documentation.
 8 | 
 9 | name: Upload Python Package
10 | 
11 | on:
12 |   release:
13 |     types: [published]
14 | 
15 | permissions:
16 |   contents: read
17 | 
18 | jobs:
19 |   deploy:
20 | 
21 |     runs-on: ubuntu-latest
22 | 
23 |     steps:
24 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
25 |       - name: Set up Python
26 |         uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # 5.6.0
27 |         with:
28 |           python-version: '3.x'
29 |       - name: Install dependencies
30 |         run: |
31 |           python -m pip install --upgrade pip
32 |           pip install build
33 |       - name: Build package
34 |         run: python -m build
35 |       - name: Publish package
36 |         uses: pypa/gh-action-pypi-publish@76f52bc884231f62b9a034ebfe128415bbaabdfc # v1.12.4
37 |         with:
38 |           user: __token__
39 |           password: ${{ secrets.PYPI_API_TOKEN }}
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | .last_checked
 31 | .gitattributes
 32 | .gitconfig
 33 | .DS_Store
 34 | quarto-macos.pkg
 35 | action_files/test_models/data/
 36 | 
 37 | # PyInstaller
 38 | #  Usually these files are written by a python script from a template
 39 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 40 | *.manifest
 41 | *.spec
 42 | 
 43 | # Installer logs
 44 | pip-log.txt
 45 | pip-delete-this-directory.txt
 46 | 
 47 | # Unit test / coverage reports
 48 | htmlcov/
 49 | .tox/
 50 | .nox/
 51 | .coverage
 52 | .coverage.*
 53 | .cache
 54 | nosetests.xml
 55 | coverage.xml
 56 | *.cover
 57 | *.py,cover
 58 | .hypothesis/
 59 | .pytest_cache/
 60 | 
 61 | # Translations
 62 | *.mo
 63 | *.pot
 64 | 
 65 | # Django stuff:
 66 | *.log
 67 | local_settings.py
 68 | db.sqlite3
 69 | db.sqlite3-journal
 70 | 
 71 | # Flask stuff:
 72 | instance/
 73 | .webassets-cache
 74 | 
 75 | # Scrapy stuff:
 76 | .scrapy
 77 | 
 78 | # Sphinx documentation
 79 | docs/_build/
 80 | 
 81 | # PyBuilder
 82 | target/
 83 | 
 84 | # Jupyter Notebook
 85 | .ipynb_checkpoints
 86 | 
 87 | # IPython
 88 | profile_default/
 89 | ipython_config.py
 90 | 
 91 | # pyenv
 92 | .python-version
 93 | 
 94 | # pipenv
 95 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 96 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 97 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 98 | #   install all needed dependencies.
 99 | #Pipfile.lock
100 | 
101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
102 | __pypackages__/
103 | 
104 | # Celery stuff
105 | celerybeat-schedule
106 | celerybeat.pid
107 | 
108 | # SageMath parsed files
109 | *.sage.py
110 | 
111 | # Environments
112 | .env
113 | .venv
114 | env/
115 | venv/
116 | ENV/
117 | env.bak/
118 | venv.bak/
119 | 
120 | # Spyder project settings
121 | .spyderproject
122 | .spyproject
123 | 
124 | # Rope project settings
125 | .ropeproject
126 | 
127 | # mkdocs documentation
128 | /site
129 | 
130 | # mypy
131 | .mypy_cache/
132 | .dmypy.json
133 | dmypy.json
134 | 
135 | # Pyre type checker
136 | .pyre/
137 | 
138 | *.csv
139 | *.xls
140 | *.p
141 | *.zip
142 | */data/*
143 | 
144 | Gemfile.lock
145 | _docs/
146 | sidebar.yml
147 | _proc/
148 | 
149 | # VS Code project settings
150 | .vscode
151 | /tmp


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "docs/assets"]
2 | 	path = docs/assets
3 | 	url = https://github.com/Nixtla/styles.git
4 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | fail_fast: true
 2 | 
 3 | repos:
 4 |   - repo: local
 5 |     hooks:
 6 |       - id: imports_with_code
 7 |         name: Cells with imports and code
 8 |         entry: python action_files/imports_with_code.py
 9 |         language: system
10 |   - repo: https://github.com/fastai/nbdev
11 |     rev: 2.2.10
12 |     hooks:
13 |       - id: nbdev_clean
14 |       - id: nbdev_export
15 |   - repo: https://github.com/astral-sh/ruff-pre-commit
16 |     rev: v0.2.1
17 |     hooks:
18 |       - id: ruff
19 |   - repo: https://github.com/pre-commit/mirrors-mypy
20 |     rev: v1.8.0
21 |     hooks:
22 |       - id: mypy
23 |         args: [--ignore-missing-imports]
24 |         exclude: (experiments|setup.py)


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, religion, or sexual identity
 10 | and orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 |   and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the
 26 |   overall community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or
 31 |   advances of any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email
 35 |   address, without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 |   professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards of
 42 | acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies when
 54 | an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official e-mail address,
 56 | posting via an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement at
 63 | ops@nixtla.io.
 64 | All complaints will be reviewed and investigated promptly and fairly.
 65 | 
 66 | All community leaders are obligated to respect the privacy and security of the
 67 | reporter of any incident.
 68 | 
 69 | ## Enforcement Guidelines
 70 | 
 71 | Community leaders will follow these Community Impact Guidelines in determining
 72 | the consequences for any action they deem in violation of this Code of Conduct:
 73 | 
 74 | ### 1. Correction
 75 | 
 76 | **Community Impact**: Use of inappropriate language or other behavior deemed
 77 | unprofessional or unwelcome in the community.
 78 | 
 79 | **Consequence**: A private, written warning from community leaders, providing
 80 | clarity around the nature of the violation and an explanation of why the
 81 | behavior was inappropriate. A public apology may be requested.
 82 | 
 83 | ### 2. Warning
 84 | 
 85 | **Community Impact**: A violation through a single incident or series
 86 | of actions.
 87 | 
 88 | **Consequence**: A warning with consequences for continued behavior. No
 89 | interaction with the people involved, including unsolicited interaction with
 90 | those enforcing the Code of Conduct, for a specified period of time. This
 91 | includes avoiding interactions in community spaces as well as external channels
 92 | like social media. Violating these terms may lead to a temporary or
 93 | permanent ban.
 94 | 
 95 | ### 3. Temporary Ban
 96 | 
 97 | **Community Impact**: A serious violation of community standards, including
 98 | sustained inappropriate behavior.
 99 | 
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 | 
106 | ### 4. Permanent Ban
107 | 
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior,  harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 | 
112 | **Consequence**: A permanent ban from any sort of public interaction within
113 | the community.
114 | 
115 | ## Attribution
116 | 
117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118 | version 2.0, available at
119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
120 | 
121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
122 | enforcement ladder](https://github.com/mozilla/diversity).
123 | 
124 | [homepage]: https://www.contributor-covenant.org
125 | 
126 | For answers to common questions about this code of conduct, see the FAQ at
127 | https://www.contributor-covenant.org/faq. Translations are available at
128 | https://www.contributor-covenant.org/translations.
129 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to contribute
 2 | 
 3 | ## Did you find a bug?
 4 | 
 5 | * Ensure the bug was not already reported by searching on GitHub under Issues.
 6 | * If you're unable to find an open issue addressing the problem, open a new one. Be sure to include a title and clear description, as much relevant information as possible, and a code sample or an executable test case demonstrating the expected behavior that is not occurring.
 7 | * Be sure to add the complete error messages.
 8 | 
 9 | ## Do you have a feature request?
10 | 
11 | * Ensure that it hasn't been yet implemented in the `main` branch of the repository and that there's not an Issue requesting it yet.
12 | * Open a new issue and make sure to describe it clearly, mention how it improves the project and why its useful.
13 | 
14 | ## Do you want to fix a bug or implement a feature?
15 | 
16 | Bug fixes and features are added through pull requests (PRs).
17 | 
18 | ## PR submission guidelines
19 | 
20 | * Keep each PR focused. While it's more convenient, do not combine several unrelated fixes together. Create as many branches as needing to keep each PR focused.
21 | * Do not mix style changes/fixes with "functional" changes. It's very difficult to review such PRs and it most likely get rejected.
22 | * Do not add/remove vertical whitespace. Preserve the original style of the file you edit as much as you can.
23 | * Do not turn an already submitted PR into your development playground. If after you submitted PR, you discovered that more work is needed - close the PR, do the required work and then submit a new PR. Otherwise each of your commits requires attention from maintainers of the project.
24 | * If, however, you submitted a PR and received a request for changes, you should proceed with commits inside that PR, so that the maintainer can see the incremental fixes and won't need to review the whole PR again. In the exception case where you realize it'll take many many commits to complete the requests, then it's probably best to close the PR, do the work and then submit it again. Use common sense where you'd choose one way over another.
25 | 
26 | ### Local setup for working on a PR
27 | 
28 | #### Clone the repository
29 | * HTTPS: `git clone https://github.com/Nixtla/hierarchicalforecast.git`
30 | * SSH: `git clone git@github.com:Nixtla/hierarchicalforecast.git`
31 | * GitHub CLI: `gh repo clone Nixtla/hierarchicalforecast`
32 | 
33 | #### Set up a conda environment
34 | The repo comes with an `environment.yml` file which contains the libraries needed to run all the tests. In order to set up the environment you must have `conda` installed, we recommend [miniconda](https://docs.conda.io/en/latest/miniconda.html).
35 | 
36 | Once you have `conda` go to the top level directory of the repository and run the following lines:
37 | ```
38 | conda create -n hierarchicalforecast python=3.10
39 | conda activate hierarchicalforecast
40 | ```
41 | Then, run one of the following commands:
42 | ```
43 | conda env update -f environment.yml
44 | ```
45 | 
46 | #### Install the library
47 | Once you have your environment setup, activate it using `conda activate hierarchicalforecast` and then install the library in editable mode using `pip install -e ".[dev]"`
48 | 
49 | #### Install git hooks
50 | Before doing any changes to the code, please install the git hooks and checks that run automatic scripts during each commit and merge to strip the notebooks of superfluous metadata (and avoid merge conflicts).
51 | ```
52 | nbdev_install_hooks
53 | pre-commit install
54 | ```
55 | 
56 | ### Preview Changes
57 | You can preview changes in your local browser before pushing by using the `nbdev_preview`.
58 | 
59 | ### Build the library
60 | The library is built using the notebooks contained in the `nbs` folder. If you want to make any changes to the library you have to find the relevant notebook, make your changes and then call 
61 | ```
62 | nbdev_export
63 | ```
64 | 
65 | ### Run tests
66 | If you're working on the local interface you can just use `nbdev_test --n_workers 1 --do_print --timing`. 
67 | 
68 | ### Clean notebook's outputs. 
69 | Since the notebooks output cells can vary from run to run (even if they produce the same outputs) the notebooks are cleaned before committing them. Please make sure to run `nbdev_clean --clear_all` before committing your changes. If you clean the library's notebooks with this command please backtrack the changes you make to the example notebooks `git checkout nbs/examples`, unless you intend to change the examples.
70 | 
71 | 
72 | ## Do you want to contribute to the documentation?
73 | 
74 | * Docs are automatically created from the notebooks in the `nbs` folder.
75 | * In order to modify the documentation:
76 |     1. Find the relevant notebook.
77 |     2. Make your changes.
78 |     3. Run all cells.
79 |     4. If you are modifying library notebooks (not in `nbs/examples`), clean all outputs using `Edit > Clear All Outputs`.
80 |     5. Run `nbdev_preview`.
81 |     6. Clean the notebook metadata using `nbdev_clean`.
82 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2022 Nixtla
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include settings.ini
2 | include LICENSE
3 | include CONTRIBUTING.md
4 | include README.md
5 | recursive-exclude * __pycache__
6 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Nixtla &nbsp;[![Slack](https://img.shields.io/badge/Slack-4A154B?&logo=slack&logoColor=white)](https://join.slack.com/t/nixtlacommunity/shared_invite/zt-1pmhan9j5-F54XR20edHk0UtYAPcW4KQ)
  2 | 
  3 | <div align="center">
  4 | <img src="https://raw.githubusercontent.com/Nixtla/neuralforecast/main/nbs/imgs_indx/logo_mid.png">
  5 | <h1 align="center">Hierarchical Forecast 👑</h1>
  6 | <h3 align="center">Probabilistic hierarchical forecasting with statistical and econometric methods</h3>
  7 |     
  8 | [![CI](https://github.com/Nixtla/hierarchicalforecast/actions/workflows/ci.yml/badge.svg)](https://github.com/Nixtla/hierarchicalforecast/actions/workflows/ci.yml)
  9 | [![Python](https://img.shields.io/pypi/pyversions/hierarchicalforecast)](https://pypi.org/project/hierarchicalforecast/)
 10 | [![PyPi](https://img.shields.io/pypi/v/hierarchicalforecast?color=blue)](https://pypi.org/project/hierarchicalforecast/)
 11 | [![conda-nixtla](https://img.shields.io/conda/vn/conda-forge/hierarchicalforecast?color=seagreen&label=conda)](https://anaconda.org/conda-forge/hierarchicalforecast)
 12 | [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://github.com/Nixtla/hierarchicalforecast/blob/main/LICENSE)
 13 |     
 14 | **HierarchicalForecast** offers a collection of cross-sectional and temporal reconciliation methods, including `BottomUp`, `TopDown`, `MiddleOut`, `MinTrace` and `ERM`, as well as probabilistic coherent prediction methods such as `Normality`, `Bootstrap`, and `PERMBU`.
 15 | </div>
 16 | 
 17 | ## 📚 Intro
 18 | A vast amount of time series datasets are organized into structures with different levels or hierarchies of aggregation. Examples include cross-sectional aggregations such as categories, brands, or geographical groupings, or temporal aggregations such as weeks, months or years. Coherent forecasts across levels are necessary for consistent decision-making and planning. Hierachical Forecast offers different reconciliation methods that render coherent forecasts across cross-sectional and temporal hierachies. 
 19 | 
 20 | ## 🎊 Features 
 21 | 
 22 | * Classic reconciliation methods:
 23 |     - `BottomUp`: Simple addition to the upper levels.
 24 |     - `TopDown`: Distributes the top levels forecasts trough the hierarchies.
 25 | * Alternative reconciliation methods:
 26 |     - `MiddleOut`: It anchors the base predictions in a middle level. The levels above the base predictions use the bottom-up approach, while the levels below use a top-down.
 27 |     - `MinTrace`: Minimizes the total forecast variance of the space of coherent forecasts, with the Minimum Trace reconciliation.
 28 |     - `ERM`: Optimizes the reconciliation matrix minimizing an L1 regularized objective.
 29 | * Probabilistic coherent methods:
 30 |     - `Normality`: Uses MinTrace variance-covariance closed form matrix under a normality assumption.
 31 |     - `Bootstrap`: Generates distribution of hierarchically reconciled predictions using Gamakumara's bootstrap approach.
 32 |     - `PERMBU`: Reconciles independent sample predictions by reinjecting multivariate dependence with estimated rank permutation copulas, and performing a Bottom-Up aggregation.
 33 | * Temporal reconciliation methods:
 34 |     - All reconciliation methods (except for the insample methods) are available to use with temporal hierarchies too.
 35 | 
 36 | Missing something? Please open an issue here or write us in [![Slack](https://img.shields.io/badge/Slack-4A154B?&logo=slack&logoColor=white)](https://join.slack.com/t/nixtlaworkspace/shared_invite/zt-135dssye9-fWTzMpv2WBthq8NK0Yvu6A)
 37 | 
 38 | ## 📖 Why? 
 39 | 
 40 | **Short**: We want to contribute to the ML field by providing reliable baselines and benchmarks for hierarchical forecasting task in industry and academia. Here's the complete [paper](https://arxiv.org/abs/2207.03517).
 41 | 
 42 | **Verbose**: `HierarchicalForecast` integrates publicly available processed datasets, evaluation metrics, and a curated set of standard statistical baselines. In this library we provide usage examples and references to extensive experiments where we showcase the baseline's use and evaluate the accuracy of their predictions. With this work, we hope to contribute to Machine Learning forecasting by bridging the gap to statistical and econometric modeling, as well as providing tools for the development of novel hierarchical forecasting algorithms rooted in a thorough comparison of these well-established models. We intend to continue maintaining and increasing the repository, promoting collaboration across the forecasting community.
 43 | 
 44 | ## 💻 Installation
 45 | 
 46 | You can install `HierarchicalForecast`'s the Python package index [pip](https://pypi.org) with:
 47 | 
 48 | ```python
 49 | pip install hierarchicalforecast
 50 | ```
 51 | 
 52 | You can also can install `HierarchicalForecast`'s from [conda](https://anaconda.org) with:
 53 | 
 54 | ```python
 55 | conda install -c conda-forge hierarchicalforecast
 56 | ```
 57 | 
 58 | 
 59 | ## 🧬 How to use
 60 | 
 61 | The following example needs `statsforecast` and `datasetsforecast` as additional packages. If not installed, install it via your preferred method, e.g. `pip install statsforecast datasetsforecast`.
 62 | The `datasetsforecast` library allows us to download hierarhical datasets and we will use `statsforecast` to compute the base forecasts to be reconciled.
 63 | 
 64 | You can open a complete example in Colab [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/nixtla/hierarchicalforecast/blob/main/nbs/examples/TourismSmall.ipynb)
 65 | 
 66 | Minimal Example:
 67 | ```python
 68 | # !pip install -U numba statsforecast datasetsforecast
 69 | import numpy as np
 70 | import pandas as pd
 71 | 
 72 | #obtain hierarchical dataset
 73 | from datasetsforecast.hierarchical import HierarchicalData
 74 | 
 75 | # compute base forecast no coherent
 76 | from statsforecast.core import StatsForecast
 77 | from statsforecast.models import AutoARIMA, Naive
 78 | 
 79 | #obtain hierarchical reconciliation methods and evaluation
 80 | from hierarchicalforecast.core import HierarchicalReconciliation
 81 | from hierarchicalforecast.evaluation import evaluate
 82 | from hierarchicalforecast.methods import BottomUp, TopDown, MiddleOut
 83 | from utilsforecast.losses import mse
 84 | 
 85 | # Load TourismSmall dataset
 86 | Y_df, S, tags = HierarchicalData.load('./data', 'TourismSmall')
 87 | Y_df['ds'] = pd.to_datetime(Y_df['ds'])
 88 | S = S.reset_index(names="unique_id")
 89 | 
 90 | #split train/test sets
 91 | Y_test_df  = Y_df.groupby('unique_id').tail(4)
 92 | Y_train_df = Y_df.drop(Y_test_df.index)
 93 | 
 94 | # Compute base auto-ARIMA predictions
 95 | fcst = StatsForecast(models=[AutoARIMA(season_length=4), Naive()],
 96 |                      freq='QE', n_jobs=-1)
 97 | Y_hat_df = fcst.forecast(df=Y_train_df, h=4)
 98 | 
 99 | # Reconcile the base predictions
100 | reconcilers = [
101 |     BottomUp(),
102 |     TopDown(method='forecast_proportions'),
103 |     MiddleOut(middle_level='Country/Purpose/State',
104 |               top_down_method='forecast_proportions')
105 | ]
106 | hrec = HierarchicalReconciliation(reconcilers=reconcilers)
107 | Y_rec_df = hrec.reconcile(Y_hat_df=Y_hat_df, Y_df=Y_train_df,
108 |                           S=S, tags=tags)
109 | ```
110 | 
111 | ### Evaluation
112 | Assumes you have a test dataframe.
113 | 
114 | ```python
115 | df = Y_rec_df.merge(Y_test_df, on=['unique_id', 'ds'])
116 | evaluation = evaluate(df = df,
117 |                       tags = tags,
118 |                       metrics = [mse],
119 |                       benchmark = "Naive")
120 | ```
121 | 
122 | ## 📖 Documentation (WIP)
123 | Here is a link to the [documentation](https://nixtlaverse.nixtla.io/hierarchicalforecast/index.html).
124 | 
125 | ## 📃 License
126 | This project is licensed under the MIT License - see the [LICENSE](https://github.com/Nixtla/neuralforecast/blob/main/LICENSE) file for details.
127 | 
128 | ## 🏟 HTS projects
129 | In the R ecosystem, we recommend checking out [fable](http://fable.tidyverts.org/), and the now-retired [hts](https://github.com/earowang/hts).
130 | In Python we want to acknowledge the following libraries [hiere2e](https://github.com/rshyamsundar/gluonts-hierarchical-ICML-2021), [hierts](https://github.com/elephaint/hierts), [sktime](https://github.com/sktime/sktime-tutorial-pydata-berlin-2022), [darts](https://github.com/unit8co/darts), [pyhts](https://github.com/AngelPone/pyhts), [scikit-hts](https://github.com/carlomazzaferro/scikit-hts).
131 | 
132 | ## 📚 References and Acknowledgements
133 | This work is highly influenced by the fantastic work of previous contributors and other scholars who previously proposed the reconciliation methods presented here. We want to highlight the work of Rob Hyndman, George Athanasopoulos, Shanika L. Wickramasuriya, Souhaib Ben Taieb, and Bonsoo Koo. For a full reference link, please visit the Reference section of this [paper](https://arxiv.org/pdf/2207.03517.pdf).
134 | We encourage users to explore this [literature review](https://otexts.com/fpp3/hierarchical-reading.html).
135 | 
136 | ## 🙏 How to cite
137 | If you enjoy or benefit from using these Python implementations, a citation to this [hierarchical forecasting reference paper](https://arxiv.org/abs/2207.03517) will be greatly appreciated.
138 | ```bibtex
139 | @article{olivares2024hierarchicalforecastreferenceframeworkhierarchical,
140 |       title={HierarchicalForecast: A Reference Framework for Hierarchical Forecasting in Python}, 
141 |       author={Kin G. Olivares and Azul Garza and David Luo and Cristian Challú and Max Mergenthaler and Souhaib Ben Taieb and Shanika L. Wickramasuriya and Artur Dubrawski},
142 |       year={2024},
143 |       eprint={2207.03517},
144 |       archivePrefix={arXiv},
145 |       primaryClass={stat.ML},
146 |       url={https://arxiv.org/abs/2207.03517}, 
147 | }
148 | ```
149 | 


--------------------------------------------------------------------------------
/action_files/imports_with_code.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import warnings
 3 | from pathlib import Path
 4 | 
 5 | from nbdev.processors import NBProcessor, _do_eval
 6 | 
 7 | 
 8 | def check_nb(nb_path: str) -> None:
 9 |     with warnings.catch_warnings(record=True) as issued_warnings:
10 |         NBProcessor(nb_path, _do_eval, process=True)
11 |     if any(
12 |         "Found cells containing imports and other code" in str(w)
13 |         for w in issued_warnings
14 |     ):
15 |         print(f"{nb_path} has cells containing imports and code.")
16 |         sys.exit(1)
17 | 
18 | 
19 | if __name__ == "__main__":
20 |     repo_root = Path(__file__).parents[1]
21 |     for nb_path in (repo_root / "nbs").glob("*.ipynb"):
22 |         check_nb(str(nb_path))
23 | 


--------------------------------------------------------------------------------
/action_files/test_models/requirements.txt:
--------------------------------------------------------------------------------
1 | fire
2 | datasetsforecast
3 | 


--------------------------------------------------------------------------------
/action_files/test_models/src/data.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import fire
 3 | import pickle
 4 | import pandas as pd
 5 | 
 6 | from statsforecast.models import AutoETS
 7 | from statsforecast.core import StatsForecast
 8 | 
 9 | from hierarchicalforecast.utils import aggregate
10 | 
11 | 
12 | def get_data():
13 |     # If data exists read it
14 |     if os.path.isfile('data/Y_test.csv'):
15 |         Y_test_df = pd.read_csv('data/Y_test.csv')
16 |         Y_train_df = pd.read_csv('data/Y_train.csv')
17 |         Y_hat_df = pd.read_csv('data/Y_hat.csv')
18 |         Y_fitted_df = pd.read_csv('data/Y_fitted.csv')
19 |         S_df = pd.read_csv('data/S.csv')
20 | 
21 |         with open('data/tags.pickle', 'rb') as handle:
22 |             tags = pickle.load(handle)
23 |         
24 |         return Y_train_df, Y_test_df, Y_hat_df, Y_fitted_df, S_df, tags
25 | 
26 |     # Read and Parse Data
27 |     Y_df = pd.read_csv('https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/tourism.csv')
28 |     Y_df = Y_df.rename({'Trips': 'y', 'Quarter': 'ds'}, axis=1)
29 |     Y_df.insert(0, 'Country', 'Australia')
30 |     Y_df = Y_df[['Country', 'Region', 'State', 'Purpose', 'ds', 'y']]
31 |     Y_df['ds'] = Y_df['ds'].str.replace(r'(\d+) (Q\d)', r'\1-\2', regex=True)
32 |     Y_df['ds'] = pd.to_datetime(Y_df['ds'])
33 | 
34 |     # Hierarchical Aggregation
35 |     spec = [
36 |         ['Country'],
37 |         ['Country', 'State'], 
38 |         ['Country', 'State', 'Region'], 
39 |         ['Country', 'State', 'Region', 'Purpose']
40 |     ]
41 | 
42 |     Y_df, S_df, tags = aggregate(Y_df, spec)
43 | 
44 |     # Train/Test Splits
45 |     Y_test_df = Y_df.groupby('unique_id').tail(8)
46 |     Y_train_df = Y_df.drop(Y_test_df.index)
47 | 
48 |     sf = StatsForecast(models=[AutoETS(season_length=4, model='ZZA')],
49 |                        freq='QS', n_jobs=-1)
50 |     Y_hat_df = sf.forecast(df=Y_train_df, h=8, fitted=True)
51 |     Y_fitted_df = sf.forecast_fitted_values()
52 |     
53 |     # Save Data
54 |     if not os.path.exists('./data'):
55 |         os.makedirs('./data')
56 | 
57 |     Y_test_df.to_csv('./data/Y_test.csv', index=False)
58 |     Y_train_df.to_csv('./data/Y_train.csv', index=False)
59 | 
60 |     Y_hat_df.to_csv('./data/Y_hat.csv', index=False)
61 |     Y_fitted_df.to_csv('./data/Y_fitted.csv', index=False)
62 |     S_df.to_csv('./data/S.csv', index=False)
63 | 
64 |     with open('./data/tags.pickle', 'wb') as handle:
65 |         pickle.dump(tags, handle, protocol=pickle.HIGHEST_PROTOCOL)    
66 | 
67 |     return Y_train_df, Y_test_df, Y_hat_df, Y_fitted_df, S_df, tags
68 | 
69 | def save_data():
70 |     Y_train_df, Y_test_df, Y_hat_df, Y_fitted_df, S_df, tags = get_data()
71 | 
72 |     Y_test_df.to_csv('./data/Y_test.csv', index=False)
73 |     Y_train_df.to_csv('./data/Y_train.csv', index=False)
74 | 
75 |     Y_hat_df.to_csv('./data/Y_hat.csv', index=False)
76 |     Y_fitted_df.to_csv('./data/Y_fitted.csv', index=False)
77 |     S_df.to_csv('./data/S.csv', index=False)
78 | 
79 |     with open('./data/tags.pickle', 'wb') as handle:
80 |         pickle.dump(tags, handle, protocol=pickle.HIGHEST_PROTOCOL)
81 | 
82 | 
83 | if __name__=="__main__":
84 |     fire.Fire(save_data)
85 | 


--------------------------------------------------------------------------------
/action_files/test_models/src/evaluation.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import numpy as np
 3 | import pandas as pd
 4 | 
 5 | from hierarchicalforecast.evaluation import HierarchicalEvaluation
 6 | 
 7 | def rmse(y, y_hat):
 8 |     return np.mean(np.sqrt(np.mean((y-y_hat)**2, axis=1)))
 9 | 
10 | def mase(y, y_hat, y_insample, seasonality=4):
11 |     errors = np.mean(np.abs(y - y_hat), axis=1)
12 |     scale = np.mean(np.abs(y_insample[:, seasonality:] - y_insample[:, :-seasonality]), axis=1)
13 |     return np.mean(errors / scale)
14 | 
15 | 
16 | def evaluate():
17 |     execution_times = pd.read_csv('data/execution_times.csv')
18 |     models = [f"{x[0]} ({x[1]:.2f} secs)" for x in execution_times.values]
19 | 
20 |     Y_rec_df = pd.read_csv('data/Y_rec.csv')
21 |     Y_test_df = pd.read_csv('data/Y_test.csv')
22 |     Y_train_df = pd.read_csv('data/Y_train.csv')
23 | 
24 |     with open('data/tags.pickle', 'rb') as handle:
25 |         tags = pickle.load(handle)
26 | 
27 |     eval_tags = {}
28 |     eval_tags['Total'] = tags['Country']
29 |     eval_tags['State'] = tags['Country/State']
30 |     eval_tags['Regions'] = tags['Country/State/Region']
31 |     eval_tags['Bottom'] = tags['Country/State/Region/Purpose']
32 |     eval_tags['All'] = np.concatenate(list(tags.values()))
33 | 
34 |     evaluator = HierarchicalEvaluation(evaluators=[mase])
35 |     evaluation = evaluator.evaluate(
36 |             Y_hat_df=Y_rec_df, Y_test_df=Y_test_df,
37 |             tags=eval_tags, Y_df=Y_train_df
38 |     )
39 |     evaluation = evaluation.query("level != 'Overall'").set_index(['level', 'metric'])
40 | 
41 |     evaluation.columns = ['Base'] + models
42 |     evaluation = evaluation.map('{:.2f}'.format)
43 |     return evaluation
44 | 
45 | 
46 | if __name__ == '__main__':
47 |     evaluation = evaluate()
48 |     evaluation.to_csv('./data/evaluation.csv')
49 |     print(evaluation.T)
50 | 


--------------------------------------------------------------------------------
/action_files/test_models/src/evaluation_temporal.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import numpy as np
 3 | import pandas as pd
 4 | 
 5 | import hierarchicalforecast.evaluation as hfe
 6 | from utilsforecast.losses import mae, rmse
 7 | 
 8 | def evaluate():
 9 |     execution_times = pd.read_csv('data/execution_times.csv')
10 |     models = [f"{x[0]} ({x[1]:.2f} secs)" for x in execution_times.values]
11 | 
12 |     Y_rec_df = pd.read_csv('data/Y_rec.csv')
13 |     Y_test_df = pd.read_csv('data/Y_test.csv')
14 | 
15 |     with open('data/tags.pickle', 'rb') as handle:
16 |         tags = pickle.load(handle)
17 | 
18 |     Y_hat_df=Y_rec_df.merge(Y_test_df, on=["ds", "unique_id", "temporal_id"], how="left")
19 |     
20 |     evaluation = hfe.evaluate(
21 |             df=Y_hat_df.drop(columns="unique_id"),
22 |             tags=tags, 
23 |             metrics=[mae, rmse],
24 |             id_col='temporal_id'
25 |     )
26 |     numeric_cols = evaluation.select_dtypes(include="number").columns
27 |     evaluation[numeric_cols] = evaluation[numeric_cols].map('{:.3}'.format).astype(np.float64)
28 |     evaluation.columns = ['level', 'metric', 'Base'] + models
29 |     return evaluation
30 | 
31 | if __name__ == '__main__':
32 |     evaluation = evaluate()
33 |     evaluation.to_csv('./data/evaluation.csv')
34 |     print(evaluation.T)
35 | 


--------------------------------------------------------------------------------
/action_files/test_models/src/models.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import fire
 3 | import pandas as pd
 4 | 
 5 | from hierarchicalforecast.core import HierarchicalReconciliation
 6 | from hierarchicalforecast.methods import (
 7 |     BottomUp, BottomUpSparse, TopDown, TopDownSparse, MiddleOut, MiddleOutSparse, 
 8 |     MinTrace, 
 9 |     MinTraceSparse, 
10 |     OptimalCombination,
11 |     ERM,
12 | )
13 | 
14 | from src.data import get_data
15 | 
16 | 
17 | def main():
18 |     Y_train_df, Y_test_df, Y_hat_df, Y_fitted_df, S_df, tags = get_data()
19 | 
20 |     reconcilers = [BottomUp(),
21 |                    BottomUpSparse(),
22 |                    TopDown(method="forecast_proportions"),
23 |                    TopDownSparse(method="forecast_proportions"),
24 |                    TopDown(method="average_proportions"),
25 |                    TopDownSparse(method="average_proportions"),
26 |                    TopDown(method="proportion_averages"),
27 |                    TopDownSparse(method="proportion_averages"),
28 |                    MiddleOut(middle_level="Country/State", top_down_method="average_proportions"),
29 |                    MiddleOutSparse(middle_level="Country/State", top_down_method="average_proportions"),
30 |                    MinTrace(method='ols'),
31 |                    MinTrace(method='wls_struct'),
32 |                    MinTrace(method='wls_var'),
33 |                    MinTrace(method='mint_cov'),
34 |                    MinTrace(method='mint_shrink'),
35 |                    MinTraceSparse(method='ols'),
36 |                    MinTraceSparse(method='wls_struct'),
37 |                    MinTraceSparse(method='wls_var'),
38 |                    OptimalCombination(method='ols'),
39 |                    OptimalCombination(method='wls_struct'),
40 |                    ERM(method='closed'),
41 |     ]
42 |     hrec = HierarchicalReconciliation(reconcilers=reconcilers)
43 |     Y_rec_df = hrec.reconcile(Y_hat_df=Y_hat_df,
44 |                                Y_df=Y_fitted_df, S=S_df, tags=tags)
45 | 
46 |     execution_times = pd.Series(hrec.execution_times).reset_index()
47 | 
48 |     if not os.path.exists('./data'):
49 |         os.makedirs('./data')
50 |     Y_rec_df.to_csv('./data/Y_rec.csv', index=False)
51 |     execution_times.to_csv('./data/execution_times.csv', index=False)
52 | 
53 | 
54 | if __name__ == '__main__':
55 |     fire.Fire(main)
56 | 


--------------------------------------------------------------------------------
/action_files/test_models/src/models_temporal.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import fire
  3 | import pandas as pd
  4 | import pickle
  5 | 
  6 | from hierarchicalforecast.utils import aggregate_temporal
  7 | from hierarchicalforecast.core import HierarchicalReconciliation
  8 | from hierarchicalforecast.methods import (
  9 |     BottomUp, BottomUpSparse, TopDown, 
 10 |     TopDownSparse, 
 11 |     MiddleOut, 
 12 |     MiddleOutSparse, 
 13 |     MinTrace, 
 14 |     MinTraceSparse, 
 15 |     OptimalCombination,
 16 |     # ERM,
 17 | )
 18 | 
 19 | from statsforecast.models import AutoETS
 20 | from statsforecast.core import StatsForecast
 21 | 
 22 | def main():
 23 | 
 24 |     # Read data
 25 |     Y_df = pd.read_csv('https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/tourism.csv')
 26 |     Y_df = Y_df.rename({'Trips': 'y', 'Quarter': 'ds'}, axis=1)
 27 |     Y_df.insert(0, 'Country', 'Australia')
 28 |     Y_df = Y_df[['Country', 'Region', 'State', 'Purpose', 'ds', 'y']]
 29 |     Y_df['ds'] = Y_df['ds'].str.replace(r'(\d+) (Q\d)', r'\1-\2', regex=True)
 30 |     Y_df['ds'] = pd.PeriodIndex(Y_df["ds"], freq='Q').to_timestamp()
 31 |     Y_df["unique_id"] = Y_df["Country"] + "/" + Y_df["State"] + "/" + Y_df["Region"] + "/" + Y_df["Purpose"]
 32 | 
 33 |     # split data into train and test
 34 |     horizon = 8
 35 |     Y_test_df = Y_df.groupby("unique_id", as_index=False).tail(horizon)
 36 |     Y_train_df = Y_df.drop(Y_test_df.index)    
 37 | 
 38 |     # Temporal Hierarchical Aggregation
 39 |     spec_temporal = {"year": 4, "semiannual": 2, "quarter": 1}
 40 |     Y_train_df, S_train_df, tags_train = aggregate_temporal(df=Y_train_df, spec=spec_temporal)
 41 |     Y_test_df, S_test_df, tags_test = aggregate_temporal(df=Y_test_df,  spec=spec_temporal)
 42 | 
 43 |     # Create forecasts
 44 |     Y_hat_dfs = []
 45 |     id_cols = ["unique_id", "temporal_id", "ds"]
 46 |     # We will train a model for each temporal level
 47 |     for level, temporal_ids_train in tags_train.items():
 48 |         # Filter the data for the level
 49 |         Y_level_train = Y_train_df.query("temporal_id in @temporal_ids_train")
 50 |         temporal_ids_test = tags_test[level] # noqa F841
 51 |         Y_level_test = Y_test_df.query("temporal_id in @temporal_ids_test")
 52 |         # For each temporal level we have a different frequency and forecast horizon
 53 |         freq_level = pd.infer_freq(Y_level_train["ds"].unique())
 54 |         horizon_level = Y_level_test["ds"].nunique()
 55 |         # Train a model and create forecasts
 56 |         fcst = StatsForecast(models=[AutoETS(model='ZZZ')], freq=freq_level, n_jobs=-1)
 57 |         Y_hat_df_level = fcst.forecast(df=Y_level_train[["ds", "unique_id", "y"]], h=horizon_level)
 58 |         # Add the test set to the forecast
 59 |         Y_hat_df_level = Y_hat_df_level.merge(Y_level_test.drop(columns="y"), on=["ds", "unique_id"], how="left")
 60 |         # Put cols in the right order (for readability)
 61 |         Y_hat_cols = id_cols + [col for col in Y_hat_df_level.columns if col not in id_cols]
 62 |         Y_hat_df_level = Y_hat_df_level[Y_hat_cols]
 63 |         # Append the forecast to the list
 64 |         Y_hat_dfs.append(Y_hat_df_level)
 65 | 
 66 |     Y_hat_df = pd.concat(Y_hat_dfs, ignore_index=True)
 67 | 
 68 |     reconcilers = [
 69 |                    BottomUp(),
 70 |                    BottomUpSparse(),
 71 |                    TopDown(method="forecast_proportions"),
 72 |                    TopDownSparse(method="forecast_proportions"),
 73 |                 #    TopDown(method="average_proportions"),
 74 |                 #    TopDownSparse(method="average_proportions"),
 75 |                 #    TopDown(method="proportion_averages"),
 76 |                 #    TopDownSparse(method="proportion_averages"),
 77 |                    MiddleOut(middle_level="semiannual", top_down_method="forecast_proportions"),
 78 |                    MiddleOutSparse(middle_level="semiannual", top_down_method="forecast_proportions"),
 79 |                    MinTrace(method='ols'),
 80 |                    MinTrace(method='wls_struct'),
 81 |                 #    MinTrace(method='wls_var'),
 82 |                 #    MinTrace(method='mint_cov'),
 83 |                 #    MinTrace(method='mint_shrink'),
 84 |                    MinTraceSparse(method='ols'),
 85 |                    MinTraceSparse(method='wls_struct'),
 86 |                 #    MinTraceSparse(method='wls_var'),
 87 |                    OptimalCombination(method='ols'),
 88 |                    OptimalCombination(method='wls_struct'),
 89 |                 #    ERM(method='closed'),
 90 |     ]
 91 |     hrec = HierarchicalReconciliation(reconcilers=reconcilers)
 92 | 
 93 |     Y_rec_df = hrec.reconcile(Y_hat_df=Y_hat_df, 
 94 |                             S=S_test_df, 
 95 |                             tags=tags_test, 
 96 |                             temporal=True,
 97 |                             )
 98 | 
 99 |     execution_times = pd.Series(hrec.execution_times).reset_index()
100 | 
101 |     if not os.path.exists('./data'):
102 |         os.makedirs('./data')
103 |     Y_rec_df.to_csv('./data/Y_rec.csv', index=False)
104 |     Y_test_df.to_csv('./data/Y_test.csv', index=False)
105 |     Y_train_df.to_csv('./data/Y_train.csv', index=False)
106 |     with open('./data/tags.pickle', 'wb') as handle:
107 |         pickle.dump(tags_test, handle, protocol=pickle.HIGHEST_PROTOCOL)      
108 | 
109 |     execution_times.to_csv('./data/execution_times.csv', index=False)
110 | 
111 | if __name__ == '__main__':
112 |     fire.Fire(main)
113 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: hierarchicalforecast
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - numpy
 6 |   - numba
 7 |   - pandas
 8 |   - scikit-learn
 9 |   - quadprog
10 |   - clarabel
11 |   - matplotlib
12 |   - pip
13 |   - pip:
14 |     - nbdev
15 |     - ipython<=8.32.0


--------------------------------------------------------------------------------
/experiments/hierarchical_baselines/README.md:
--------------------------------------------------------------------------------
 1 | # HierarchicalForecast Baselines
 2 | 
 3 | This study showcases the HierarchicalForecast library's statistical baselines, following established experimental protocols from previous research Rangapuram et al. [2021], Olivares et al. [2023] and Kamarthi et al. [2022]. The benchmark datasets utilized include Australian Monthly Labour, SF Bay Area daily Traffic, Quarterly Australian Tourism Visits, Monthly Australian Tourism visits, and daily Wikipedia article views.
 4 | 
 5 | In our experiment, we compare the predictions of several state-of-the-art probabilistic coherent methods. The statistical baselines encompass different variants, such as (1) BOOTSTRAP [Panagiotelis et al., 2023], (2) NORMALITY [Wickramasuriya et al., 2022], and (3) PERMBU probabilistic reconciliation [Ben Taieb et al., 2017], combined with mean reconcilers including BottomUp [Orcutt et al., 1968], TopDown [Gross and Sohl, 1990], and MinTrace [Wickramasuriya et al., 2019].
 6 | 
 7 | ## Probabilistic Coherent Forecast Accuracy
 8 | 
 9 | To evaluate the accuracy of probabilistic coherent forecasts, we employ the Mean scaled Continuous Ranked Probability Scores (sCRPS) integral. This evaluation metric utilizes a Riemann approximation to the sCRPS, considering quantile intervals of 1 percent denoted as $dq$. We report here the best performing method across the BottomUp, TopDown and MinTrace reconciliations.
10 | ```math
11 | \mathrm{sCRPS}(\mathbb{P}, \mathbf{y}_{[i],\tau}) = \frac{2}{|[i\,]|} \sum_{i}
12 |     \frac{\int^{1}_{0} \mathrm{QL}(\mathbb{P}_{i,\tau}, y_{i,\tau})_{q} dq }{\sum_{i} | y_{i,\tau} |}
13 | ```
14 | 
15 | |              |  **BOOTSTRAP**              ||| **NORMALITY**               ||| **PERMBU**                  |||
16 | |--------------|----------|---------|----------|----------|---------|----------|----------|---------|----------|
17 | | **Dataset**  | BottomUp | TopDown | MinTrace | BottomUp | TopDown | MinTrace | BottomUp | TopDown | MinTrace |
18 | | Labour       | 0.0078   | 0.0668  | 0.0073   | 0.0076   | 0.0656  | 0.0069   | 0.0077   | 0.0623  | 0.0069   |
19 | | Traffic      | 0.0736   | 0.0741  | 0.0608   | 0.0845   | 0.0738  | 0.0630   | 0.0849   | 0.0708  | 0.0651   |
20 | | TourismS     | 0.0682   | 0.1040  | 0.0703   | 0.0649   | 0.1000  | 0.6830   | 0.0649   | 0.0898  | 0.0680   |
21 | | TourismL     | 0.1375   | -       | 0.1313   | 0.1620   | -       | 0.1338   | -        | -       | -        |
22 | | Wiki2        | 0.2894   | 0.3231  | 0.2808   | 0.3914   | 0.3385  | 0.3385   | 0.3920   | 0.4269  | 0.3821   |
23 |  <br>
24 | 
25 | 
26 | ## Hierarchically Coherent Forecast Accuracy
27 | 
28 | To assess the accuracy of hierarchically coherent mean forecasts, we employ the relative mean squared error that compares the Naive forecast, with reconciled StatsForecast's base AutoARIMA forecasts.
29 | ```math
30 | \mathrm{relMSE}(\mathbf{y}_{[i]}, \hat{\mathbf{y}}_{[i]}, \mathbf{\check{y}}_{[i]}) =
31 |     \frac{\mathrm{MSE}(\mathbf{y}_{[i]}, \mathbf{\hat{y}}_{[i]})}{\mathrm{MSE}(\mathbf{y}_{[i]}, \mathbf{\check{y}}_{[i]})}
32 | ```
33 | 
34 | | Dataset  | BottomUp | TopDown | MinTrace |
35 | |----------|----------|---------|----------|
36 | | Labour   | 0.5382   | 16.8204 | 0.3547   |
37 | | Traffic  | 0.1394   | 0.0614  | 0.0744   |
38 | | TourismS | 0.1002   | 0.1919  | 0.1235   |
39 | | TourismL | 0.3070   | -       | 0.1375   |
40 | | Wiki2    | 1.0163   | 1.4482  | 1.0068   |
41 | <br>
42 | 
43 | ## Reproducibility
44 | 
45 | 1. Create a conda environment `hierarchical_baselines` using the `environment.yml` file.
46 |   ```shell
47 |   conda env create -f environment.yml
48 |   ```
49 | 
50 | 3. Activate the conda environment using 
51 |   ```shell
52 |   conda activate hierarchical_baselines
53 |   ```
54 | 
55 | 4. Run the experiments for each dataset and each model using with 
56 | - `--intervals_method` parameter in `['bootstrap', 'normality', 'permbu']`
57 | - `--dataset` parameter in `['Labour', 'Traffic', 'OldTraffic', 'TourismSmall', 'TourismLarge', 'OldTourismLarge', 'Wikitwo']`
58 |   ```shell
59 |   python src/run_baselines.py --intervals_method 'bootstrap' --dataset 'OldTourismLarge'
60 |   ```
61 | 
62 | You can access the final reconciled forecasts from the `./data/{dataset}/{intervals_method}_rec.csv` file. Example: `./data/TourismLarge/bootstrap_rec.csv`.
63 | The file gives you access to the mean and quantile forecasts for all series in the dataset. Example `AutoARIMA/BottomUp`, `AutoARIMA/BottomUp-lo-90`, `AutoARIMA/BottomUp-hi-90`, that correspond to mean, q5 and q95.
64 | 
65 | <br><br>
66 | 
67 | ## References
68 | - [Kin G. Olivares, Federico Garza, David Luo, Cristian Challú, Max Mergenthaler, Souhaib Ben Taieb, Shanika Wickramasuriya, and Artur Dubrawski 2023. "HierarchicalForecast: A reference framework for hierarchical forecasting". Journal of Machine Learning Research, submitted URL https://arxiv.org/abs/2207.03517](https://arxiv.org/abs/2207.03517)
69 | 
70 | ### Datasets
71 | - [Australian Bureau of Statistics. Labour force, Australia. Accessed Online, 2019. URL https://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/6202.0Dec%202019?OpenDocument.](https://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/6202.0Dec%202019?OpenDocument)
72 | - [Dheeru Dua and Casey Graff. Traffic dataset. UCI machine learning repository, 2017. URL http://archive.ics.uci.edu/ml.](http://archive.ics.uci.edu/ml.)
73 | - [Tourism Australia, Canberra. Tourism Research Australia (2005), Travel by Australians. URL https://www.kaggle.com/luisblanche/quarterly-tourism-in-australia/](https://www.kaggle.com/luisblanche/quarterly-tourism-in-australia/)
74 | - [Tourism Australia, Canberra. Detailed tourism Australia (2005), Travel by Australians, Sep 2019. URL https://robjhyndman.com/publications/hierarchical-tourism/.](https://robjhyndman.com/publications/hierarchical-tourism/)
75 | - [Oren Anava, Vitaly Kuznetsov, and (Google Inc. Sponsorship). Web traffic time series forecasting, forecast future traffic to wikipedia pages. Kaggle Competition, 2018. URL https://www.kaggle.com/c/web-traffic-time-series-forecasting/.](https://www.kaggle.com/c/web-traffic-time-series-forecasting/)
76 | 
77 | ### Baselines
78 | - [BOOTSTRAP: Anastasios Panagiotelis, Puwasala Gamakumara, George Athanasopoulos, and Rob J. Hyndman (2022). "Probabilistic forecast reconciliation: Properties, evaluation and score optimisation". European Journal of Operational Research, 306(2):693–706, 2023. ISSN 0377-2217. doi: https://doi.org/10. 1016/j.ejor.2022.07.040. URL https://www.sciencedirect.com/science/article/pii/S0377221722006087.](https://www.sciencedirect.com/science/article/pii/S0377221722006087)
79 | - [PERMBU: Souhaib Ben Taieb, James W. Taylor, and Rob J. Hyndman. Coherent probabilistic forecasts for hierarchical time series. In Doina Precup and Yee Whye Teh, editors, Proceedings of the 34th International Conference on Machine Learning, volume 70 of Proceedings of Machine Learning Research, pages 3348–3357. PMLR, 06–11 Aug 2017. URL http://proceedings.mlr.press/v70/taieb17a.html.](http://proceedings.mlr.press/v70/taieb17a.html)
80 | - [NORMALITY: Shanika L. Wickramasuriya. Probabilistic forecast reconciliation under the Gaussian framework. Accepted at Journal of Business and Economic Statistics, 2023.]()
81 | - [Orcutt, G.H., Watts, H.W., & Edwards, J.B.(1968). \"Data aggregation and information loss\". The American Economic Review, 58 , 773{787)](http://www.jstor.org/stable/1815532).
82 | - [CW. Gross (1990). \"Disaggregation methods to expedite product line forecasting\". Journal of Forecasting, 9 , 233–254. doi:10.1002/for.3980090304](https://onlinelibrary.wiley.com/doi/abs/10.1002/for.3980090304).
83 | - [Wickramasuriya, S.L., Turlach, B.A. & Hyndman, R.J. (2020). "Optimal non-negative forecast reconciliation". Stat Comput 30, 1167–1182, https://doi.org/10.1007/s11222-020-09930-0](https://robjhyndman.com/publications/nnmint/).
84 | 


--------------------------------------------------------------------------------
/experiments/hierarchical_baselines/environment.yml:
--------------------------------------------------------------------------------
 1 | name: hierarchical_baselines
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - numpy<1.24
 6 |   - quadprog
 7 |   - pip
 8 |   - pip:
 9 |     - statsforecast
10 |     - "git+https://github.com/Nixtla/datasetsforecast.git"
11 |     - hierarchicalforecast


--------------------------------------------------------------------------------
/experiments/hierarchical_baselines/src/run_baselines.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | 
  4 | import numpy as np
  5 | import pandas as pd
  6 | import matplotlib.pyplot as plt
  7 | 
  8 | from statsforecast.core import StatsForecast
  9 | from statsforecast.models import AutoARIMA, Naive
 10 | 
 11 | from hierarchicalforecast.core import HierarchicalReconciliation
 12 | from hierarchicalforecast.evaluation import HierarchicalEvaluation
 13 | from hierarchicalforecast.methods import BottomUp, TopDown, MinTrace, ERM
 14 | 
 15 | from hierarchicalforecast.utils import is_strictly_hierarchical
 16 | from hierarchicalforecast.utils import HierarchicalPlot, CodeTimer
 17 | from hierarchicalforecast.evaluation import scaled_crps, rel_mse, msse
 18 | 
 19 | from datasetsforecast.hierarchical import HierarchicalData, HierarchicalInfo
 20 | 
 21 | import warnings
 22 | # Avoid pandas fragmentation warning and positive definite warning
 23 | warnings.filterwarnings("ignore") 
 24 | 
 25 | 
 26 | class HierarchicalDataset(object):
 27 |     # Class with loading, processing and
 28 |     # prediction evaluation methods for hierarchical data
 29 | 
 30 |     available_datasets = ['Labour','Traffic',
 31 |                           'TourismSmall','TourismLarge','Wiki2',
 32 |                           'OldTraffic', 'OldTourismLarge']
 33 | 
 34 |     @staticmethod
 35 |     def _get_hierarchical_scrps(hier_idxs, Y, Yq_hat, q_to_pred):
 36 |         # We use the indexes obtained from the aggregation tags
 37 |         # to compute scaled CRPS across the hierarchy levels
 38 |         scrps_list = []
 39 |         for idxs in hier_idxs:
 40 |             y      = Y[idxs, :]
 41 |             yq_hat = Yq_hat[idxs, :, :]
 42 |             level_scrps  = scaled_crps(y, yq_hat, q_to_pred)
 43 |             scrps_list.append(level_scrps)
 44 |         return scrps_list
 45 | 
 46 |     @staticmethod
 47 |     def _get_hierarchical_msse(hier_idxs, Y, Y_hat, Y_train):
 48 |         # We use the indexes obtained from the aggregation tags
 49 |         # to compute MS scaled Error across the hierarchy levels
 50 |         msse_list = []
 51 |         for idxs in hier_idxs:
 52 |             y       = Y[idxs, :]
 53 |             y_hat   = Y_hat[idxs, :]
 54 |             y_train = Y_train[idxs, :]
 55 |             level_msse = msse(y, y_hat, y_train)
 56 |             msse_list.append(level_msse)
 57 |         return msse_list        
 58 | 
 59 |     @staticmethod
 60 |     def _get_hierarchical_rel_mse(hier_idxs, Y, Y_hat, Y_train):
 61 |         # We use the indexes obtained from the aggregation tags
 62 |         # to compute relative MSE across the hierarchy levels
 63 |         rel_mse_list = []
 64 |         for idxs in hier_idxs:
 65 |             y       = Y[idxs, :]
 66 |             y_hat   = Y_hat[idxs, :]
 67 |             y_train = Y_train[idxs, :]
 68 |             level_rel_mse = rel_mse(y, y_hat, y_train)
 69 |             rel_mse_list.append(level_rel_mse)
 70 |         return rel_mse_list
 71 | 
 72 |     @staticmethod
 73 |     def _sort_hier_df(Y_df, S_df):
 74 |         # NeuralForecast core, sorts unique_id lexicographically
 75 |         # deviating from S_df, this class matches S_df and Y_hat_df order.
 76 |         Y_df.unique_id = Y_df.unique_id.astype('category')
 77 |         Y_df.unique_id = Y_df.unique_id.cat.set_categories(S_df.index)
 78 |         Y_df = Y_df.sort_values(by=['unique_id', 'ds'])
 79 |         return Y_df
 80 | 
 81 |     @staticmethod
 82 |     def _nonzero_indexes_by_row(M):
 83 |         return [np.nonzero(M[row,:])[0] for row in range(len(M))]    
 84 | 
 85 |     @staticmethod
 86 |     def load_process_data(dataset, directory='./data'):
 87 |         # Load data
 88 |         data_info = HierarchicalInfo[dataset]
 89 |         Y_df, S_df, tags = HierarchicalData.load(directory=directory,
 90 |                                                  group=dataset)
 91 | 
 92 |         # Parse and augment data
 93 |         Y_df['ds'] = pd.to_datetime(Y_df['ds'])
 94 |         Y_df = HierarchicalDataset._sort_hier_df(Y_df=Y_df, S_df=S_df)
 95 | 
 96 |         # Obtain indexes for plots and evaluation
 97 |         hier_levels = ['Overall'] + list(tags.keys())
 98 |         hier_idxs = [np.arange(len(S_df))] +\
 99 |             [S_df.index.get_indexer(tags[level]) for level in list(tags.keys())]
100 |         hier_linked_idxs = HierarchicalDataset._nonzero_indexes_by_row(S_df.values.T)
101 | 
102 |         # Final output
103 |         data = dict(Y_df=Y_df, S_df=S_df, tags=tags,
104 |                     # Hierarchical idxs
105 |                     hier_idxs=hier_idxs,
106 |                     hier_levels=hier_levels,
107 |                     hier_linked_idxs=hier_linked_idxs,
108 |                     # Dataset Properties
109 |                     horizon=data_info.papers_horizon,
110 |                     freq=data_info.freq,
111 |                     seasonality=data_info.seasonality)
112 |         return data
113 | 
114 | 
115 | def run_baselines(dataset, intervals_method, verbose=False, seed=0):
116 |     with CodeTimer('Read and Parse data   ', verbose):
117 |         data = HierarchicalDataset.load_process_data(dataset=dataset)
118 |         Y_df = data['Y_df'][["unique_id", 'ds', 'y']]
119 |         S_df, tags = data['S_df'], data['tags']
120 |         horizon = data['horizon']
121 |         seasonality = data['seasonality']
122 |         freq = data['freq']
123 | 
124 |         # Train/Test Splits
125 |         Y_test_df  = Y_df.groupby('unique_id').tail(horizon)
126 |         Y_train_df = Y_df.drop(Y_test_df.index)
127 |         Y_test_df  = Y_test_df.set_index('unique_id')
128 |         Y_train_df = Y_train_df.set_index('unique_id')
129 | 
130 |         dataset_str = f'{dataset}, h={horizon} '
131 |         dataset_str += f'n_series={len(S_df)}, n_bottom={len(S_df.columns)} \n'
132 |         dataset_str += f'test ds=[{min(Y_test_df.ds), max(Y_test_df.ds)}] '
133 |         print(dataset_str)
134 | 
135 |     with CodeTimer('Fit/Predict Model	  ', verbose):
136 |         # Read to avoid unnecesary AutoARIMA computation
137 |         yhat_file = f'./data/{dataset}/Y_hat.csv'
138 |         yfitted_file = f'./data/{dataset}/Y_fitted.csv'
139 |         yrec_file = f'./data/{dataset}/{intervals_method}_rec.csv'
140 | 
141 |         if os.path.exists(yhat_file):
142 |             Y_hat_df = pd.read_csv(yhat_file)
143 |             Y_fitted_df = pd.read_csv(yfitted_file)
144 | 
145 |         else:
146 |             if not os.path.exists(f'./data/{dataset}'):
147 |                 os.makedirs(f'./data/{dataset}')			
148 |             fcst = StatsForecast(
149 |                 df=Y_train_df, 
150 |                 models=[AutoARIMA(season_length=seasonality)],
151 |                 fallback_model=[Naive()],
152 |                 freq=freq, 
153 |                 n_jobs=-1
154 |             )
155 |             Y_hat_df = fcst.forecast(h=horizon, fitted=True, level=LEVEL)
156 |             Y_fitted_df = fcst.forecast_fitted_values()
157 | 
158 |             Y_hat_df = Y_hat_df.reset_index()
159 |             Y_fitted_df = Y_fitted_df.reset_index()
160 |             Y_hat_df.to_csv(yhat_file, index=False)
161 |             Y_fitted_df.to_csv(yfitted_file, index=False)
162 | 
163 |         Y_hat_df = Y_hat_df.set_index('unique_id')
164 |         Y_fitted_df = Y_fitted_df.set_index('unique_id')
165 | 
166 |     with CodeTimer('Reconcile Predictions ', verbose):
167 |         if is_strictly_hierarchical(S=S_df.values.astype(np.float32), 
168 |             tags={key: S_df.index.get_indexer(val) for key, val in tags.items()}):
169 |             reconcilers = [
170 |                 BottomUp(),
171 |                 TopDown(method='average_proportions'),
172 |                 TopDown(method='proportion_averages'),
173 |                 MinTrace(method='ols'),
174 |                 MinTrace(method='wls_var'),
175 |                 MinTrace(method='mint_shrink'),
176 |                 #ERM(method='reg_bu', lambda_reg=100) # Extremely inneficient
177 |                 ERM(method='closed')
178 |             ]
179 |         else:
180 |             reconcilers = [
181 |                 BottomUp(),
182 |                 MinTrace(method='ols'),
183 |                 MinTrace(method='wls_var'),
184 |                 MinTrace(method='mint_shrink'),
185 |                 #ERM(method='reg_bu', lambda_reg=100) # Extremely inneficient
186 |                 ERM(method='closed')
187 |             ]
188 |         
189 |         hrec = HierarchicalReconciliation(reconcilers=reconcilers)
190 |         Y_rec_df = hrec.bootstrap_reconcile(Y_hat_df=Y_hat_df,
191 |                                             Y_df=Y_fitted_df,
192 |                                             S_df=S_df, tags=tags,
193 |                                             level=LEVEL,
194 |                                             intervals_method=intervals_method,
195 |                                             num_samples=10, num_seeds=10)
196 | 
197 |         # Matching Y_test/Y_rec/S index ordering
198 |         Y_test_df = Y_test_df.reset_index()
199 |         Y_test_df.unique_id = Y_test_df.unique_id.astype('category')
200 |         Y_test_df.unique_id = Y_test_df.unique_id.cat.set_categories(S_df.index)
201 |         Y_test_df = Y_test_df.sort_values(by=['unique_id', 'ds'])
202 | 
203 |         Y_rec_df = Y_rec_df.reset_index()
204 |         Y_rec_df.unique_id = Y_rec_df.unique_id.astype('category')
205 |         Y_rec_df.unique_id = Y_rec_df.unique_id.cat.set_categories(S_df.index)
206 |         Y_rec_df = Y_rec_df.sort_values(by=['seed', 'unique_id', 'ds'])
207 | 
208 |         Y_rec_df.to_csv(yrec_file, index=False)
209 | 
210 |         # Parsing model level columns
211 |         flat_cols = list(hrec.level_names.keys())
212 |         for model in hrec.level_names:
213 |             flat_cols += hrec.level_names[model]
214 |         for model in hrec.sample_names:
215 |             flat_cols += hrec.sample_names[model]
216 |         y_rec  = Y_rec_df[flat_cols]
217 |         model_columns = y_rec.columns
218 | 
219 |         n_series = len(S_df)
220 |         n_seeds = len(Y_rec_df.seed.unique())
221 |         y_rec  = y_rec.values.reshape(n_seeds, n_series, horizon, len(model_columns))
222 |         y_test = Y_test_df['y'].values.reshape(n_series, horizon)
223 |         y_train = Y_train_df['y'].values.reshape(n_series, -1)
224 | 
225 |     with CodeTimer('Evaluate Base Forecasts  ', verbose):
226 |         crps_results = {'Dataset': [dataset] * len(['Overall'] + list(tags.keys())),
227 |                         'Level': ['Overall'] + list(tags.keys()),}
228 |         relmse_results = {'Dataset': [dataset] * len(['Overall'] + list(tags.keys())),
229 |                         'Level': ['Overall'] + list(tags.keys()),}
230 |         msse_results = {'Dataset': [dataset] * len(['Overall'] + list(tags.keys())),
231 |                         'Level': ['Overall'] + list(tags.keys()),}
232 |         Y_hat_quantiles = Y_hat_df.drop(columns=['ds', 'AutoARIMA'])
233 |         y_hat_quantiles_np = Y_hat_quantiles.values.reshape(n_series, horizon, len(QUANTILES))
234 |         y_hat_np = Y_hat_df['AutoARIMA'].values.reshape(n_series, -1)
235 | 
236 |         crps_results['AutoARIMA'] = HierarchicalDataset._get_hierarchical_scrps(
237 |                                                             Y=y_test,
238 |                                                             Yq_hat=y_hat_quantiles_np,
239 |                                                             q_to_pred=QUANTILES,
240 |                                                             hier_idxs=data['hier_idxs'])
241 |         relmse_results['AutoARIMA'] = HierarchicalDataset._get_hierarchical_rel_mse(
242 |                                                             Y=y_test,
243 |                                                             Y_hat=y_hat_np,
244 |                                                             Y_train=y_train,
245 |                                                             hier_idxs=data['hier_idxs'])
246 |         msse_results['AutoARIMA'] = HierarchicalDataset._get_hierarchical_msse(
247 |                                                             Y=y_test,
248 |                                                             Y_hat=y_hat_np,
249 |                                                             Y_train=y_train,
250 |                                                             hier_idxs=data['hier_idxs'])
251 | 
252 |     with CodeTimer('Evaluate Models CRPS  ', verbose):
253 |         for model in hrec.level_names.keys():
254 |             crps_results[model] = []
255 |             for level in crps_results['Level']:
256 |                 if level=='Overall':
257 |                     row_idxs = np.arange(len(S_df))
258 |                 else:
259 |                     row_idxs = S_df.index.get_indexer(tags[level])
260 |                 col_idxs = model_columns.get_indexer(hrec.level_names[model])
261 |                 _y = y_test[row_idxs,:]
262 |                 _y_rec_seeds = y_rec[:,row_idxs,:,:][:,:,:,col_idxs]
263 | 
264 |                 level_model_crps =[]
265 |                 for seed_idx in range(y_rec.shape[0]):
266 |                     _y_rec = _y_rec_seeds[seed_idx,:,:,:]
267 |                     level_model_crps.append(scaled_crps(y=_y, y_hat=_y_rec,
268 |                                                         quantiles=QUANTILES))
269 |                 level_model_crps = f'{np.mean(level_model_crps):.4f}±{(1.96 * np.std(level_model_crps)):.4f}'
270 |                 crps_results[model].append(level_model_crps)
271 | 
272 |         crps_results = pd.DataFrame(crps_results)
273 | 
274 |     with CodeTimer('Evaluate Models relMSE', verbose):
275 |         for model in hrec.level_names.keys():
276 |             relmse_results[model] = []
277 |             for level in relmse_results['Level']:
278 |                 if level=='Overall':
279 |                     row_idxs = np.arange(len(S_df))
280 |                 else:
281 |                     row_idxs = S_df.index.get_indexer(tags[level])
282 |                 col_idx = model_columns.get_loc(model)
283 |                 _y = y_test[row_idxs,:]
284 |                 _y_train = y_train[row_idxs,:]
285 |                 _y_hat_seeds = y_rec[:,row_idxs,:,:][:,:,:,col_idx]
286 | 
287 |                 level_model_relmse = []
288 |                 for seed_idx in range(y_rec.shape[0]):
289 |                     _y_hat = _y_hat_seeds[seed_idx,:,:]
290 |                     level_model_relmse.append(rel_mse(y=_y, y_hat=_y_hat, y_train=_y_train))
291 |                 level_model_relmse = f'{np.mean(level_model_relmse):.4f}'
292 |                 relmse_results[model].append(level_model_relmse)
293 | 
294 |         relmse_results = pd.DataFrame(relmse_results)
295 | 
296 |     with CodeTimer('Evaluate Models MSSE  ', verbose):
297 |         for model in hrec.level_names.keys():
298 |             msse_results[model] = []
299 |             for level in msse_results['Level']:
300 |                 if level=='Overall':
301 |                     row_idxs = np.arange(len(S_df))
302 |                 else:
303 |                     row_idxs = S_df.index.get_indexer(tags[level])
304 |                 col_idx = model_columns.get_loc(model)
305 |                 _y = y_test[row_idxs,:]
306 |                 _y_train = y_train[row_idxs,:]
307 |                 _y_hat_seeds = y_rec[:,row_idxs,:,:][:,:,:,col_idx]
308 | 
309 |                 level_model_msse = []
310 |                 for seed_idx in range(y_rec.shape[0]):
311 |                     _y_hat = _y_hat_seeds[seed_idx,:,:]
312 |                     level_model_msse.append(msse(y=_y, y_hat=_y_hat, y_train=_y_train))
313 |                 level_model_msse = f'{np.mean(level_model_msse):.4f}'
314 |                 msse_results[model].append(level_model_msse)
315 | 
316 |         msse_results = pd.DataFrame(msse_results)
317 | 
318 |         return crps_results, relmse_results, msse_results
319 | 
320 | if __name__ == '__main__':
321 |     
322 |     # Parse execution parameters
323 |     verbose = True
324 |     parser = argparse.ArgumentParser()
325 |     parser.add_argument("-intervals_method", "--intervals_method", type=str)
326 |     parser.add_argument("-dataset", "--dataset", type=str)
327 | 
328 |     args = parser.parse_args()
329 |     intervals_method = args.intervals_method
330 |     dataset = args.dataset
331 | 
332 |     assert intervals_method in ['bootstrap', 'normality', 'permbu'], \
333 |         "Select `--intervals_method` from ['bootstrap', 'normality', 'permbu']"
334 | 
335 |     available_datasets = ['Labour', 'Traffic', 'OldTraffic',
336 |                           'TourismSmall', 'TourismLarge', 'OldTourismLarge', 'Wikitwo']
337 |     assert dataset in available_datasets, \
338 |         "Select `--dataset` from ['Labour', 'Traffic', 'OldTraffic', \
339 |             'TourismSmall', 'TourismLarge', 'OldTourismLarge', 'Wikitwo']"
340 | 
341 |     print(f'\n {intervals_method.upper()} {dataset} statistical baselines evaluation \n')
342 | 
343 |     LEVEL = np.arange(0, 100, 2)
344 |     qs = [[50-lv/2, 50+lv/2] for lv in LEVEL]
345 |     QUANTILES = np.sort(np.concatenate(qs)/100)
346 | 
347 |     # Run experiments
348 |     crps_results_list = []
349 |     msse_results_list = []
350 |     relmse_results_list = []
351 | 
352 |     try: # Hacky protection for non strictly hierarchical datasets
353 |         crps_results, relmse_results, msse_results = run_baselines(dataset=dataset,
354 |                     intervals_method=intervals_method, verbose=verbose)
355 |         crps_results_list.append(crps_results)
356 |         msse_results_list.append(msse_results)
357 |         relmse_results_list.append(relmse_results)
358 |     except Exception as e:
359 |         print('failed ', dataset)
360 |         print(str(e))
361 |     print('\n\n')
362 | 
363 |     crps_results_df = pd.concat(crps_results_list)
364 |     msse_results_df = pd.concat(msse_results_list)
365 |     relmse_results_df = pd.concat(relmse_results_list)
366 | 
367 |     crps_results_df.to_csv(f'./data/{intervals_method}_crps.csv', index=False)
368 |     msse_results_df.to_csv(f'./data/{intervals_method}_msse.csv', index=False)
369 |     relmse_results_df.to_csv(f'./data/{intervals_method}_relmse.csv', index=False)
370 | 
371 |     print('='*(200+24))
372 |     print(f'{intervals_method} sCRPS:')
373 |     print(crps_results_df)
374 | 
375 |     print('\n\n'+'='*(200+24))
376 |     print(f'{intervals_method} relMSE:')
377 |     print(relmse_results_df)
378 | 
379 |     print('\n\n'+'='*(200+24))
380 |     print(f'{intervals_method} MSSE:')
381 |     print(msse_results_df)
382 | 


--------------------------------------------------------------------------------
/experiments/hierarchical_baselines/src/run_favorita_baselines.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/hierarchicalforecast/162e42f4143201d44fdd0480306dd4cc19776038/experiments/hierarchical_baselines/src/run_favorita_baselines.py


--------------------------------------------------------------------------------
/experiments/libs-comparison/README.md:
--------------------------------------------------------------------------------
 1 | # Hierarchical Methods Comparison
 2 | 
 3 | This experiment aims to empirically validate the results presented in other great implementations of hierarchical reconciliation methods for Python and R. We use the ETS model in the following datasets, highly inspired by [Rob Hyndman and George Athanasopoulos's work](https://otexts.com/fpp3/hierarchical.html). 
 4 | 
 5 | ## Main results
 6 | 
 7 | To perform the experiments, we used the TourismSmall, Labour, and Wiki2 datasets, widely used for hierarchical reconciliation research. For TourismSmall and Labour, we used the last eight observations as a test and the last 12 observations for Wiki2.
 8 | 
 9 | ### Performance (RMSSE)
10 | 
11 | ![image](./results.png)
12 | 
13 | Notes:
14 | - [fable](https://github.com/tidyverts/fable) also contains `TopDown` and `MiddleOut` methods, but they rise an error. A [Pull Request](https://github.com/tidyverts/fabletools/pull/362) was opened to fix the issue.
15 | - The `RMSSE` (root mean squared scaled error) was calculated against a naive model.
16 | 
17 | **Disclaimer:**
18 | 
19 | It was pointed out by sktime contributors that the difference between fable and sktime in our experiments, in terms of forecast accuracy, are due to [problems](https://github.com/sktime/sktime/issues/3162) with the base forecaster in sktime. Using a simple AR models ensures replicability between fable and sktime as seen [here](https://github.com/ciaran-g/hierarchical-fc-comparison). 
20 | 
21 | ### Time (seconds)
22 | 
23 | | Dataset      |   statsforecast |   fable |   sktime |
24 | |:-------------|----------------:|--------:|---------:|
25 | | Labour       |           1.982 |  11.233 |   44.368  |
26 | | TourismSmall |           0.627 |   7.61  |   19.120 |
27 | | Wiki2        |           1.822 |  47.626 |  119.941 |
28 | 
29 | Notes:
30 | - Fitting time for base forecasts.
31 | 
32 | ## Reproducibility
33 | 
34 | To reproduce the main results you have:
35 | 
36 | 1. Execute `conda env create -f environment.yml`. 
37 | 2. Activate the environment using `conda activate hts-comparison`.
38 | 3. Run the experiments using `python -m src.[lib] --group [group]` where `[lib]` can be `statsforecast` or `sktime`, and `[group]` can be `Labour`, `Wiki2`, and `TourismSmall`.
39 | 4. To run R experiments you have to prepare the data using `python -m src.data --group [group]` for each `[group]`. Once it is done, just run `Rscript src/fable.R [group]`.
40 | 5. To parse the results, use `nbs/parse-results.ipynb`.
41 | 
42 | The results were obtained using a `c5d.24xlarge` AWS instance.
43 | 
44 | ## ToDo
45 | 
46 | - Run comparisons using the `auto_arima` model.
47 | - Use same base forecasts (obtained with the same library/implementation) with different implementations of the reconciled methods.
48 | - Include `Traffic` Dataset
49 | 
50 | 


--------------------------------------------------------------------------------
/experiments/libs-comparison/environment.yml:
--------------------------------------------------------------------------------
 1 | name: hts-comparison
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - python=3.7
 6 |   - pip==20.3.3
 7 |   - fire
 8 |   - jupyterlab
 9 |   - r-base==3.6.3
10 |   - r-urca
11 |   - r-tidyverse
12 |   - r-future
13 |   - r-future.apply
14 |   - r-forecast
15 |   - r-furrr
16 |   - r-fable
17 |   - r-tsibble
18 |   - r-tsibbledata
19 |   - pip:
20 |     - pyarrow
21 |     - statsforecast
22 |     - hierarchicalforecast
23 |     - datasetsforecast
24 |     - sktime
25 |     - tabulate
26 | 


--------------------------------------------------------------------------------
/experiments/libs-comparison/results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/hierarchicalforecast/162e42f4143201d44fdd0480306dd4cc19776038/experiments/libs-comparison/results.png


--------------------------------------------------------------------------------
/experiments/libs-comparison/src/data.py:
--------------------------------------------------------------------------------
 1 | import fire
 2 | from datasetsforecast.hierarchical import HierarchicalData
 3 | 
 4 | 
 5 | def hierarchical_cols(group: str):
 6 |     if group == 'Wiki2':
 7 |         return ['Country', 'Access', 'Agent', 'Topic'], ['Country', 'Access', 'Agent', 'Topic'], '_'
 8 |     elif group == 'Labour':
 9 |         return ['Employment', 'Gender', 'Region'], ['Region', 'Employment', 'Gender'], ','
10 |     elif group == 'TourismSmall':
11 |         return ['State', 'Purpose', 'CityNonCity'], ['Purpose', 'State', 'CityNonCity'], '-'
12 |     raise Exception(f'Unknown group {group}')
13 | 
14 | def parse_data(group: str):
15 |     #Get bottom time series to use in R
16 |     init_cols, hier_cols, sep = hierarchical_cols(group)
17 |     Y_df, S, tags = HierarchicalData.load('data', group)
18 |     Y_df = Y_df.query('unique_id in @S.columns')
19 |     Y_df[init_cols] = Y_df['unique_id'].str.split(sep, expand=True)
20 |     Y_df = Y_df[init_cols + ['ds', 'y']]
21 |     Y_df = Y_df.groupby(init_cols + ['ds']).sum().reset_index()
22 |     Y_df.to_csv(f'data/{group}.csv', index=False)
23 | 
24 | 
25 | if __name__=="__main__":
26 |     fire.Fire(parse_data)
27 | 
28 | 


--------------------------------------------------------------------------------
/experiments/libs-comparison/src/fable.R:
--------------------------------------------------------------------------------
 1 | library(fable)
 2 | library(tsibble)
 3 | library(tsibbledata)
 4 | library(lubridate)
 5 | library(dplyr)
 6 | library(readr)
 7 | library(future)
 8 | library(stringr)
 9 | 
10 | args <- commandArgs(trailingOnly=TRUE)
11 | meta <- list(
12 | 	TourismSmall=list(ds_fn=yearquarter, cutoff=yearquarter('2004-12-31'), 
13 | 			  key=c("Purpose", "State", "CityNonCity")),
14 | 	Labour=list(ds_fn=yearmonth, cutoff=yearmonth('2019-04-01'), 
15 | 		    key=c('Region', 'Employment', 'Gender')),
16 | 	Wiki2=list(ds_fn=ymd, cutoff=ymd('2016-12-17'), 
17 | 		   key=c('Country', 'Access', 'Agent', 'Topic'))
18 | )
19 | group <- args[1]
20 | ds_fn <- meta[[group]][['ds_fn']]
21 | cutoff <- meta[[group]][['cutoff']]
22 | key <- meta[[group]][['key']]
23 | 
24 | plan(multiprocess, gc=TRUE)
25 | 
26 | 
27 | Y_df <- read_csv(str_glue('./data/{group}.csv')) %>%
28 | 	mutate(ds = ds_fn(ds)) %>%
29 | 	as_tsibble(
30 | 		index = ds,
31 | 		key = key,
32 | 	) 
33 | if(group == 'TourismSmall'){
34 | 	Y_df <- aggregate_key(Y_df, Purpose / State / CityNonCity, y = sum(y))
35 | } else if (group == 'Labour') {
36 | 	Y_df <- aggregate_key(Y_df, Region / Employment / Gender, y = sum(y))
37 | } else if (group == 'Wiki2') {
38 | 	Y_df <- aggregate_key(Y_df, Country / Access / Agent / Topic, y = sum(y))
39 | }
40 | 
41 | #split train/test sets
42 | Y_df_train <- Y_df %>%
43 | 	filter(ds <= cutoff)
44 | Y_df_test <- Y_df %>%
45 | 	filter(ds > cutoff)
46 | 
47 | #forecaster
48 | start <- Sys.time()
49 | ets_fit <- Y_df_train %>%
50 | 	model(ets = ETS(y), naive = NAIVE(y)) 
51 | end <- Sys.time()
52 | 
53 | ets_fit <- ets_fit %>%
54 | 	reconcile(
55 | 		bu = bottom_up(ets),
56 | 		ols = min_trace(ets, method='ols'),
57 | 		wls_struct = min_trace(ets, method='wls_struct'),
58 | 		wls_var = min_trace(ets, method='wls_var'),
59 | 		mint_shrink = min_trace(ets, method='mint_shrink'),
60 | 	)
61 | fc <- ets_fit %>%
62 | 	forecast(Y_df_test)
63 | 
64 | fc <- fc %>%
65 | 	as_tibble() %>%
66 | 	select(-y) %>%
67 | 	left_join(Y_df_test, by=c(key, 'ds'))
68 | 
69 | errors <- fc %>%
70 | 	mutate(error = (y - .mean) ** 2) %>%
71 | 	group_by_at(c(key, '.model')) %>%
72 | 	summarise(rmse = sqrt(mean(error))) %>%
73 | 	ungroup()
74 | 
75 | naive_errors <- errors %>%
76 | 	filter(.model == 'naive') %>%
77 | 	select(-.model) %>%
78 | 	rename(naive_rmse = rmse)
79 | 
80 | errors <- errors %>%
81 | 	filter(.model != 'naive') %>%
82 | 	left_join(naive_errors, by=key) %>%
83 | 	group_by(.model) %>%
84 | 	summarise(rmsse = mean(rmse / naive_rmse))
85 | 
86 | write_csv(errors, 
87 | 	  str_glue('./results/{group}/fable.csv'))
88 | tibble(group = group, 
89 |        time = difftime(end, start, units='secs')) %>%
90 | 	write_csv(str_glue('./results/{group}/fable-time.csv'))
91 | 
92 | 
93 | 


--------------------------------------------------------------------------------
/experiments/libs-comparison/src/sktime.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from time import time
 3 | 
 4 | import fire
 5 | import numpy as np
 6 | import pandas as pd
 7 | from datasetsforecast.hierarchical import HierarchicalData, HierarchicalInfo
 8 | from sktime.forecasting.ets import AutoETS
 9 | from sktime.forecasting.reconcile import ReconcilerForecaster
10 | from sktime.transformations.hierarchical.aggregate import Aggregator
11 | from sktime.transformations.hierarchical.reconcile import Reconciler
12 | 
13 | 
14 | def rmsse(y, y_hat, y_insample):
15 |     errors = np.mean((y - y_hat) ** 2, axis=1)
16 |     scale = np.mean((y - y_insample[:, [-1]]) ** 2, axis=1)
17 |     return np.mean(np.sqrt(errors) / np.sqrt(scale))
18 | 
19 | def hierarchical_cols(group: str):
20 |     if group == 'Wiki2':
21 |         return ['Country', 'Access', 'Agent', 'Topic'], ['Country', 'Access', 'Agent', 'Topic'], '_'
22 |     elif group == 'Labour':
23 |         return ['Employment', 'Gender', 'Region'], ['Region', 'Employment', 'Gender'], ','
24 |     elif group == 'TourismSmall':
25 |         return ['State', 'Purpose', 'CityNonCity'], ['Purpose', 'State', 'CityNonCity'], '-'
26 |     raise Exception(f'Unknown group {group}')
27 | 
28 | def pipeline(group: str):
29 |     results_group_dir = Path(f'./results/{group}')
30 |     results_group_dir.mkdir(exist_ok=True, parents=True)
31 |     init_cols, hier_cols, sep = hierarchical_cols(group)
32 |     Y_df, S, tags = HierarchicalData.load('data', group)
33 |     n_series = Y_df['unique_id'].nunique()
34 |     meta_info_group = HierarchicalInfo[group]
35 |     h = meta_info_group.horizon
36 |     freq = meta_info_group.freq
37 |     sp = meta_info_group.seasonality
38 |     #Get only bottom series
39 |     #to contruct full dataset using sktime
40 |     Y_df = Y_df.query('unique_id in @S.columns')
41 |     Y_df[init_cols] = Y_df['unique_id'].str.split(sep, expand=True)
42 |     if group == 'Labour':
43 |         freq = 'M'
44 |     Y_df['ds'] = pd.PeriodIndex(Y_df['ds'], freq=freq)
45 |     Y_df = Y_df.set_index(hier_cols+['ds'])[['y']]
46 |     #Aggregation
47 |     agg = Aggregator(flatten_single_levels=False)
48 |     Y_df = agg.fit_transform(Y_df)
49 |     Y_df = Y_df.reset_index()
50 |     n_agg_series = len(Y_df[hier_cols].drop_duplicates())
51 |     if n_agg_series != n_series:
52 |         raise Exception('mismatch n_series original and sktime')
53 |     #split train/test sets
54 |     Y_df_test = Y_df.groupby(hier_cols).tail(h)
55 |     Y_df_train = Y_df.drop(Y_df_test.index)
56 |     Y_df_test = Y_df_test.set_index(hier_cols+['ds'])
57 |     Y_df_train = Y_df_train.set_index(hier_cols+['ds'])
58 |     #forecaster
59 |     seasonal = 'Additive' if group == 'Wiki2' else None #prevent negative and zero values
60 |     init_time = time()
61 |     forecaster = AutoETS(auto=True, sp=sp, seasonal=seasonal, n_jobs=-1)
62 |     forecaster.fit(Y_df_train)
63 |     prds = forecaster.predict(fh=np.arange(1, h + 1)).rename(columns={'y': 'base'})
64 |     fcst_time = time() - init_time
65 |     #reconciliation methods
66 |     methods = ['bu', 'ols', 'wls_str', 'td_fcst']
67 |     for method in methods:
68 |         reconciler = Reconciler(method=method)
69 |         prds_recon = reconciler.fit_transform(prds[['base']]).rename(columns={'base': method})
70 |         prds = prds.merge(prds_recon, how='left', left_index=True, right_index=True)
71 |     #methods based on residuals
72 |     methods_res = ['wls_var', 'mint_shrink']
73 |     for method in methods_res:
74 |         reconciler = ReconcilerForecaster(forecaster, method=method)
75 |         reconciler.fit(Y_df_train)
76 |         prds_recon = reconciler.predict(fh=np.arange(1, h + 1)).rename(columns={'y': method})
77 |         prds = prds.merge(prds_recon, how='left', left_index=True, right_index=True)
78 |     #adding y_test for evaluation
79 |     prds = prds.merge(Y_df_test, how='left', left_index=True, right_index=True)
80 |     #evaluation
81 |     y_test = prds['y'].values.reshape(-1, h)
82 |     y_insample = Y_df_train['y'].values.reshape(n_series, -1)
83 |     evals = {}
84 |     for method in ['base'] + methods + methods_res:
85 |         y_hat = prds[method].values.reshape(-1, h)
86 |         evals[method] = rmsse(y_test, y_hat, y_insample)
87 |     evals = pd.DataFrame(evals, index=[group])
88 |     fcst_time = pd.DataFrame({'group': group, 'time': fcst_time}, index=[0])
89 |     evals.to_csv(results_group_dir / 'sktime.csv', index=False)
90 |     fcst_time.to_csv(results_group_dir / 'sktime-time.csv', index=False)
91 | 
92 | 
93 | if __name__=="__main__":
94 |     fire.Fire(pipeline)
95 | 
96 | 
97 | 
98 | 


--------------------------------------------------------------------------------
/experiments/libs-comparison/src/statsforecast.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from pathlib import Path
  3 | from time import time
  4 | os.environ['NUMBA_RELEASE_GIL'] = 'True'
  5 | os.environ['NUMBA_CACHE'] = 'True'
  6 | 
  7 | import fire
  8 | import numpy as np
  9 | import pandas as pd
 10 | from datasetsforecast.hierarchical import HierarchicalData, HierarchicalInfo
 11 | from hierarchicalforecast.core import HierarchicalReconciliation
 12 | from hierarchicalforecast.evaluation import HierarchicalEvaluation
 13 | from hierarchicalforecast.methods import (
 14 |     BottomUp, TopDown, MiddleOut, 
 15 |     MinTrace, ERM
 16 | )
 17 | from statsforecast.core import StatsForecast
 18 | from statsforecast.models import ets
 19 | from statsforecast.utils import AirPassengers as ap
 20 | 
 21 | 
 22 | def rmsse(y, y_hat, y_insample):
 23 |     errors = np.mean((y - y_hat) ** 2, axis=1)
 24 |     scale = np.mean((y - y_insample[:, [-1]]) ** 2, axis=1)
 25 |     return np.mean(np.sqrt(errors) / np.sqrt(scale))
 26 | 
 27 | def get_str_model(group: str):
 28 |     if group == 'Wiki2':
 29 |         #avoid issues with seasonal models
 30 |         #due to negative and zero values
 31 |         return 'ZZA'
 32 |     return 'ZZZ'
 33 | 
 34 | def get_ERM_lambda(group: str):
 35 |     if group == 'Wiki2':
 36 |         return 1e6
 37 |     elif group == 'TourismSmall':
 38 |         return 2e6
 39 |     elif group == 'Labour':
 40 |         return 100
 41 |     raise Exception(f'Unkwon group {group}')
 42 | 
 43 | def pipeline(group: str):
 44 |     results_group_dir = Path(f'./results/{group}')
 45 |     results_group_dir.mkdir(exist_ok=True, parents=True)
 46 |     Y_df, S, tags = HierarchicalData.load('data', group)
 47 |     Y_df['ds'] = pd.to_datetime(Y_df['ds'])
 48 |     n_series = Y_df['unique_id'].nunique()
 49 |     meta_info_group = HierarchicalInfo[group]
 50 |     h = meta_info_group.horizon
 51 |     freq = meta_info_group.freq
 52 |     sp = meta_info_group.seasonality
 53 |     #split train/test sets
 54 |     Y_df_test = Y_df.groupby(['unique_id']).tail(h)
 55 |     Y_df_train = Y_df.drop(Y_df_test.index)
 56 |     Y_df_test = Y_df_test.set_index('unique_id')
 57 |     Y_df_train = Y_df_train.set_index('unique_id')
 58 |     #forecaster
 59 |     str_model = get_str_model(group)
 60 |     init_time = time()
 61 |     forecaster = StatsForecast(
 62 |         df=Y_df_train,
 63 |         models=[(ets, sp, str_model)],
 64 |         freq=freq,
 65 |         n_jobs=-1,
 66 |     )
 67 |     Y_df_hat = forecaster.forecast(h, fitted=True)
 68 |     Y_df_fitted = forecaster.forecast_fitted_values()
 69 |     fcst_time = time() - init_time
 70 |     #reconciliation methods
 71 |     methods = [
 72 |         BottomUp(),
 73 |         TopDown(method='forecast_proportions'),
 74 |         TopDown(method='average_proportions'),
 75 |         TopDown(method='proportion_averages'),
 76 |         *[
 77 |             MiddleOut(level=name, top_down_method='forecast_proportions') \
 78 |             for name in list(tags.keys())[1:-1]
 79 |         ],
 80 |         *[
 81 |             MiddleOut(level=name, top_down_method='average_proportions') \
 82 |             for name in list(tags.keys())[1:-1]
 83 |         ],
 84 |         *[
 85 |             MiddleOut(level=name, top_down_method='proportion_averages') \
 86 |             for name in list(tags.keys())[1:-1]
 87 |         ],
 88 |         MinTrace(method='ols'),
 89 |         MinTrace(method='wls_struct'),
 90 |         MinTrace(method='wls_var'),
 91 |         MinTrace(method='mint_shrink'),
 92 |         ERM(method='closed'),
 93 |         ERM(method='reg', lambda_reg=get_ERM_lambda(group)),
 94 |         ERM(method='reg_bu', lambda_reg=get_ERM_lambda(group)),
 95 |     ]
 96 |     hrec = HierarchicalReconciliation(reconcilers=methods)
 97 |     Y_df_hat_rec = hrec.reconcile(
 98 |         Y_df_hat, 
 99 |         Y_df_fitted,
100 |         S,
101 |         tags
102 |     )
103 |     eval_tags = {'All': np.concatenate(list(tags.values()))}
104 |     evaluator = HierarchicalEvaluation(evaluators=[rmsse])
105 |     evals = evaluator.evaluate(
106 |         Y_df_hat_rec,
107 |         Y_df_test,
108 |         eval_tags,
109 |         Y_df=Y_df_train
110 |     )
111 |     evals = evals.loc['All'].reset_index()
112 |     evals = pd.melt(
113 |         evals,
114 |         value_vars=evals.columns.to_list(),
115 |         var_name='model',
116 |         value_name=group,
117 |     )
118 |     evals[['model', 'rec_method']] = evals['model'].str.split('/', expand=True, n=1)
119 |     evals['rec_method'] = evals['rec_method'].fillna('Base')
120 |     evals = evals.drop(0)
121 |     evals = evals[['rec_method', group]] 
122 |     fcst_time = pd.DataFrame({'group': group, 'time': fcst_time}, index=[0])
123 |     evals.to_csv(results_group_dir / 'statsforecast.csv', index=False)
124 |     fcst_time.to_csv(results_group_dir / 'statsforecast-time.csv', index=False)
125 | 
126 | 
127 | if __name__=="__main__":
128 |     ets(ap.astype(np.float32), 12, season_length=12)
129 |     fire.Fire(pipeline)
130 | 
131 | 
132 | 
133 | 


--------------------------------------------------------------------------------
/hierarchicalforecast/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "1.2.1"
2 | 


--------------------------------------------------------------------------------
/hierarchicalforecast/_nbdev.py:
--------------------------------------------------------------------------------
 1 | # AUTOGENERATED BY NBDEV! DO NOT EDIT!
 2 | 
 3 | __all__ = ["index", "modules", "custom_doc_links", "git_url"]
 4 | 
 5 | index = {"HierarchicalReconciliation": "core.ipynb",
 6 |          "HierarchicalEvaluation": "evaluation.ipynb",
 7 |          "bottom_up": "methods.ipynb",
 8 |          "BottomUp": "methods.ipynb",
 9 |          "is_strictly_hierarchical": "methods.ipynb",
10 |          "top_down": "methods.ipynb",
11 |          "TopDown": "methods.ipynb",
12 |          "middle_out": "methods.ipynb",
13 |          "MiddleOut": "methods.ipynb",
14 |          "crossprod": "methods.ipynb",
15 |          "min_trace": "methods.ipynb",
16 |          "MinTrace": "methods.ipynb",
17 |          "optimal_combination": "methods.ipynb",
18 |          "OptimalCombination": "methods.ipynb",
19 |          "lasso": "methods.ipynb",
20 |          "erm": "methods.ipynb",
21 |          "ERM": "methods.ipynb",
22 |          "aggregate": "utils.ipynb"}
23 | 
24 | modules = ["core.py",
25 |            "evaluation.py",
26 |            "methods.py",
27 |            "utils.py"]
28 | 
29 | doc_url = "https://Nixtla.github.io/hierarchicalforecast/"
30 | 
31 | git_url = "https://github.com/Nixtla/hierarchicalforecast/tree/main/"
32 | 
33 | def custom_doc_links(name): return None
34 | 


--------------------------------------------------------------------------------
/hierarchicalforecast/probabilistic_methods.py:
--------------------------------------------------------------------------------
  1 | # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/src/probabilistic_methods.ipynb.
  2 | 
  3 | # %% auto 0
  4 | __all__ = ['Normality']
  5 | 
  6 | # %% ../nbs/src/probabilistic_methods.ipynb 3
  7 | import warnings
  8 | from typing import Optional
  9 | 
 10 | import numpy as np
 11 | from scipy.stats import norm
 12 | from sklearn.preprocessing import OneHotEncoder
 13 | 
 14 | from .utils import is_strictly_hierarchical
 15 | 
 16 | # %% ../nbs/src/probabilistic_methods.ipynb 6
 17 | class Normality:
 18 |     """Normality Probabilistic Reconciliation Class.
 19 | 
 20 |     The Normality method leverages the Gaussian Distribution linearity, to
 21 |     generate hierarchically coherent prediction distributions. This class is
 22 |     meant to be used as the `sampler` input as other `HierarchicalForecast` [reconciliation classes](https://nixtla.github.io/hierarchicalforecast/methods.html).
 23 | 
 24 |     Given base forecasts under a normal distribution:
 25 |     $$\hat{y}_{h} \sim \mathrm{N}(\hat{\\boldsymbol{\\mu}}, \hat{\mathbf{W}}_{h})$$
 26 | 
 27 |     The reconciled forecasts are also normally distributed:
 28 | 
 29 |     $$
 30 |     \\tilde{y}_{h} \sim \mathrm{N}(\mathbf{S}\mathbf{P}\hat{\\boldsymbol{\\mu}},
 31 |     \mathbf{S}\mathbf{P}\hat{\mathbf{W}}_{h} \mathbf{P}^{\intercal} \mathbf{S}^{\intercal})
 32 |     $$
 33 | 
 34 |     **Parameters:**<br>
 35 |     `S`: np.array, summing matrix of size (`base`, `bottom`).<br>
 36 |     `P`: np.array, reconciliation matrix of size (`bottom`, `base`).<br>
 37 |     `y_hat`: Point forecasts values of size (`base`, `horizon`).<br>
 38 |     `W`: np.array, hierarchical covariance matrix of size (`base`, `base`).<br>
 39 |     `sigmah`: np.array, forecast standard dev. of size (`base`, `horizon`).<br>
 40 |     `num_samples`: int, number of bootstraped samples generated.<br>
 41 |     `seed`: int, random seed for numpy generator's replicability.<br>
 42 | 
 43 |     **References:**<br>
 44 |     - [Panagiotelis A., Gamakumara P. Athanasopoulos G., and Hyndman R. J. (2022).
 45 |     "Probabilistic forecast reconciliation: Properties, evaluation and score optimisation". European Journal of Operational Research.](https://www.sciencedirect.com/science/article/pii/S0377221722006087)
 46 |     """
 47 | 
 48 |     def __init__(
 49 |         self,
 50 |         S: np.ndarray,
 51 |         P: np.ndarray,
 52 |         y_hat: np.ndarray,
 53 |         sigmah: np.ndarray,
 54 |         W: np.ndarray,
 55 |         seed: int = 0,
 56 |     ):
 57 |         self.S = S
 58 |         self.P = P
 59 |         self.y_hat = y_hat
 60 |         self.SP = self.S @ self.P
 61 |         self.W = W
 62 |         self.sigmah = sigmah
 63 |         self.seed = seed
 64 | 
 65 |         # Base Normality Errors assume independence/diagonal covariance
 66 |         # TODO: replace bilinearity with elementwise row multiplication
 67 |         std_ = np.sqrt(self.W.diagonal())
 68 |         R1 = self.W / np.outer(std_, std_)
 69 |         Wh = [np.diag(sigma) @ R1 @ np.diag(sigma).T for sigma in self.sigmah.T]
 70 | 
 71 |         # Reconciled covariances across forecast horizon
 72 |         self.cov_rec = [(self.SP @ W @ self.SP.T) for W in Wh]
 73 |         self.sigmah_rec = np.hstack(
 74 |             [np.sqrt(cov.diagonal())[:, None] for cov in self.cov_rec]
 75 |         )
 76 | 
 77 |     def get_samples(self, num_samples: int):
 78 |         """Normality Coherent Samples.
 79 | 
 80 |         Obtains coherent samples under the Normality assumptions.
 81 | 
 82 |         **Parameters:**<br>
 83 |         `num_samples`: int, number of samples generated from coherent distribution.<br>
 84 | 
 85 |         **Returns:**<br>
 86 |         `samples`: Coherent samples of size (`base`, `horizon`, `num_samples`).
 87 |         """
 88 |         rng = np.random.default_rng(self.seed)
 89 |         n_series, n_horizon = self.y_hat.shape
 90 |         samples = np.empty(shape=(num_samples, n_series, n_horizon))
 91 |         for t in range(n_horizon):
 92 |             with warnings.catch_warnings():
 93 |                 # Avoid 'RuntimeWarning: covariance is not positive-semidefinite.'
 94 |                 # By definition the multivariate distribution is not full-rank
 95 |                 partial_samples = rng.multivariate_normal(
 96 |                     mean=self.SP @ self.y_hat[:, t],
 97 |                     cov=self.cov_rec[t],
 98 |                     size=num_samples,
 99 |                 )
100 |             samples[:, :, t] = partial_samples
101 | 
102 |         # [samples, N, H] -> [N, H, samples]
103 |         samples = samples.transpose((1, 2, 0))
104 |         return samples
105 | 
106 |     def get_prediction_levels(self, res, level):
107 |         """Adds reconciled forecast levels to results dictionary"""
108 |         res["sigmah"] = self.sigmah_rec
109 |         level = np.asarray(level)
110 |         z = norm.ppf(0.5 + level / 200)
111 |         for zs, lv in zip(z, level):
112 |             res[f"lo-{lv}"] = res["mean"] - zs * self.sigmah_rec
113 |             res[f"hi-{lv}"] = res["mean"] + zs * self.sigmah_rec
114 |         return res
115 | 
116 |     def get_prediction_quantiles(self, res, quantiles):
117 |         """Adds reconciled forecast quantiles to results dictionary"""
118 |         # [N,H,None] + [None None,Q] * [N,H,None] -> [N,H,Q]
119 |         z = norm.ppf(quantiles)
120 |         res["sigmah"] = self.sigmah_rec
121 |         res["quantiles"] = (
122 |             res["mean"][:, :, None] + z[None, None, :] * self.sigmah_rec[:, :, None]
123 |         )
124 |         return res
125 | 
126 | # %% ../nbs/src/probabilistic_methods.ipynb 10
127 | class Bootstrap:
128 |     """Bootstrap Probabilistic Reconciliation Class.
129 | 
130 |     This method goes beyond the normality assumption for the base forecasts,
131 |     the technique simulates future sample paths and uses them to generate
132 |     base sample paths that are latered reconciled. This clever idea and its
133 |     simplicity allows to generate coherent bootstraped prediction intervals
134 |     for any reconciliation strategy. This class is meant to be used as the `sampler`
135 |     input as other `HierarchicalForecast` [reconciliation classes](https://nixtla.github.io/hierarchicalforecast/methods.html).
136 | 
137 |     Given a boostraped set of simulated sample paths:
138 |     $$(\hat{\mathbf{y}}^{[1]}_{\\tau}, \dots ,\hat{\mathbf{y}}^{[B]}_{\\tau})$$
139 | 
140 |     The reconciled sample paths allow for reconciled distributional forecasts:
141 |     $$(\mathbf{S}\mathbf{P}\hat{\mathbf{y}}^{[1]}_{\\tau}, \dots ,\mathbf{S}\mathbf{P}\hat{\mathbf{y}}^{[B]}_{\\tau})$$
142 | 
143 |     **Parameters:**<br>
144 |     `S`: np.array, summing matrix of size (`base`, `bottom`).<br>
145 |     `P`: np.array, reconciliation matrix of size (`bottom`, `base`).<br>
146 |     `y_hat`: Point forecasts values of size (`base`, `horizon`).<br>
147 |     `y_insample`: Insample values of size (`base`, `insample_size`).<br>
148 |     `y_hat_insample`: Insample point forecasts of size (`base`, `insample_size`).<br>
149 |     `num_samples`: int, number of bootstraped samples generated.<br>
150 |     `seed`: int, random seed for numpy generator's replicability.<br>
151 | 
152 |     **References:**<br>
153 |     - [Puwasala Gamakumara Ph. D. dissertation. Monash University, Econometrics and Business Statistics (2020).
154 |     "Probabilistic Forecast Reconciliation"](https://bridges.monash.edu/articles/thesis/Probabilistic_Forecast_Reconciliation_Theory_and_Applications/11869533)
155 |     - [Panagiotelis A., Gamakumara P. Athanasopoulos G., and Hyndman R. J. (2022).
156 |     "Probabilistic forecast reconciliation: Properties, evaluation and score optimisation". European Journal of Operational Research.](https://www.sciencedirect.com/science/article/pii/S0377221722006087)
157 |     """
158 | 
159 |     def __init__(
160 |         self,
161 |         S: np.ndarray,
162 |         P: np.ndarray,
163 |         y_hat: np.ndarray,
164 |         y_insample: np.ndarray,
165 |         y_hat_insample: np.ndarray,
166 |         num_samples: int = 100,
167 |         seed: int = 0,
168 |         W: np.ndarray = None,
169 |     ):
170 |         self.S = S
171 |         self.P = P
172 |         self.W = W
173 |         self.y_hat = y_hat
174 |         self.y_insample = y_insample
175 |         self.y_hat_insample = y_hat_insample
176 |         self.num_samples = num_samples
177 |         self.seed = seed
178 | 
179 |     def get_samples(self, num_samples: int):
180 |         """Bootstrap Sample Reconciliation Method.
181 | 
182 |         Applies Bootstrap sample reconciliation method as defined by Gamakumara 2020.
183 |         Generating independent sample paths and reconciling them with Bootstrap.
184 | 
185 |         **Parameters:**<br>
186 |         `num_samples`: int, number of samples generated from coherent distribution.<br>
187 | 
188 |         **Returns:**<br>
189 |         `samples`: Coherent samples of size (`base`, `horizon`, `num_samples`).
190 |         """
191 |         residuals = self.y_insample - self.y_hat_insample
192 |         h = self.y_hat.shape[1]
193 | 
194 |         # removing nas from residuals
195 |         residuals = residuals[:, np.isnan(residuals).sum(axis=0) == 0]
196 |         sample_idx = np.arange(residuals.shape[1] - h)
197 |         rng = np.random.default_rng(self.seed)
198 |         samples_idx = rng.choice(sample_idx, size=num_samples)
199 |         samples = [self.y_hat + residuals[:, idx : (idx + h)] for idx in samples_idx]
200 |         SP = self.S @ self.P
201 |         samples = np.apply_along_axis(
202 |             lambda path: np.matmul(SP, path), axis=1, arr=samples
203 |         )
204 |         samples_np = np.stack(samples)
205 | 
206 |         # [samples, N, H] -> [N, H, samples]
207 |         samples_np = samples_np.transpose((1, 2, 0))
208 |         return samples_np
209 | 
210 |     def get_prediction_levels(self, res, level):
211 |         """Adds reconciled forecast levels to results dictionary"""
212 |         samples = self.get_samples(num_samples=self.num_samples)
213 |         for lv in level:
214 |             min_q = (100 - lv) / 200
215 |             max_q = min_q + lv / 100
216 |             res[f"lo-{lv}"] = np.quantile(samples, min_q, axis=2)
217 |             res[f"hi-{lv}"] = np.quantile(samples, max_q, axis=2)
218 |         return res
219 | 
220 |     def get_prediction_quantiles(self, res, quantiles):
221 |         """Adds reconciled forecast quantiles to results dictionary"""
222 |         samples = self.get_samples(num_samples=self.num_samples)
223 | 
224 |         # [Q, N, H] -> [N, H, Q]
225 |         sample_quantiles = np.quantile(samples, quantiles, axis=2)
226 |         res["quantiles"] = sample_quantiles.transpose((1, 2, 0))
227 |         return res
228 | 
229 | # %% ../nbs/src/probabilistic_methods.ipynb 14
230 | class PERMBU:
231 |     """PERMBU Probabilistic Reconciliation Class.
232 | 
233 |     The PERMBU method leverages empirical bottom-level marginal distributions
234 |     with empirical copula functions (describing bottom-level dependencies) to
235 |     generate the distribution of aggregate-level distributions using BottomUp
236 |     reconciliation. The sample reordering technique in the PERMBU method reinjects
237 |     multivariate dependencies into independent bottom-level samples.
238 | 
239 |         Algorithm:
240 |         1.   For all series compute conditional marginals distributions.
241 |         2.   Compute residuals $\hat{\epsilon}_{i,t}$ and obtain rank permutations.
242 |         2.   Obtain K-sample from the bottom-level series predictions.
243 |         3.   Apply recursively through the hierarchical structure:<br>
244 |             3.1.   For a given aggregate series $i$ and its children series:<br>
245 |             3.2.   Obtain children's empirical joint using sample reordering copula.<br>
246 |             3.2.   From the children's joint obtain the aggregate series's samples.
247 | 
248 |     **Parameters:**<br>
249 |     `S`: np.array, summing matrix of size (`base`, `bottom`).<br>
250 |     `tags`: Each key is a level and each value its `S` indices.<br>
251 |     `y_insample`: Insample values of size (`base`, `insample_size`).<br>
252 |     `y_hat_insample`: Insample point forecasts of size (`base`, `insample_size`).<br>
253 |     `sigmah`: np.array, forecast standard dev. of size (`base`, `horizon`).<br>
254 |     `num_samples`: int, number of normal prediction samples generated.<br>
255 |     `seed`: int, random seed for numpy generator's replicability.<br>
256 | 
257 |     **References:**<br>
258 |     - [Taieb, Souhaib Ben and Taylor, James W and Hyndman, Rob J. (2017).
259 |     Coherent probabilistic forecasts for hierarchical time series.
260 |     International conference on machine learning ICML.](https://proceedings.mlr.press/v70/taieb17a.html)
261 |     """
262 | 
263 |     def __init__(
264 |         self,
265 |         S: np.ndarray,
266 |         tags: dict[str, np.ndarray],
267 |         y_hat: np.ndarray,
268 |         y_insample: np.ndarray,
269 |         y_hat_insample: np.ndarray,
270 |         sigmah: np.ndarray,
271 |         num_samples: Optional[int] = None,
272 |         seed: int = 0,
273 |         P: np.ndarray = None,
274 |     ):
275 |         # PERMBU only works for strictly hierarchical structures
276 |         if not is_strictly_hierarchical(S, tags):
277 |             raise ValueError(
278 |                 "PERMBU probabilistic reconciliation requires strictly hierarchical structures."
279 |             )
280 |         self.S = S
281 |         self.P = P
282 |         self.y_hat = y_hat
283 |         self.y_insample = y_insample
284 |         self.y_hat_insample = y_hat_insample
285 |         self.sigmah = sigmah
286 |         self.num_samples = num_samples
287 |         self.seed = seed
288 | 
289 |     def _obtain_ranks(self, array):
290 |         """Vector ranks
291 | 
292 |         Efficiently obtain vector ranks.
293 |         Example `array=[4,2,7,1]` -> `ranks=[2, 1, 3, 0]`.
294 | 
295 |         **Parameters**<br>
296 |         `array`: np.array, matrix with floats or integers on which the
297 |                 ranks will be computed on the second dimension.<br>
298 | 
299 |         **Returns**<br>
300 |         `ranks`: np.array, matrix with ranks along the second dimension.<br>
301 |         """
302 |         temp = array.argsort(axis=1)
303 |         ranks = np.empty_like(temp)
304 |         a_range = np.arange(temp.shape[1])
305 |         for i_row in range(temp.shape[0]):
306 |             ranks[i_row, temp[i_row, :]] = a_range
307 |         return ranks
308 | 
309 |     def _permutate_samples(self, samples, permutations):
310 |         """Permutate Samples
311 | 
312 |         Applies efficient vectorized permutation on the samples.
313 | 
314 |         **Parameters**<br>
315 |         `samples`: np.array [series,samples], independent base samples.<br>
316 |         `permutations`: np.array [series,samples], permutation ranks with wich
317 |                   which `samples` dependence will be restored see `_obtain_ranks`.<br>
318 | 
319 |         **Returns**<br>
320 |         `permutated_samples`: np.array.<br>
321 |         """
322 |         # Generate auxiliary and flat permutation indexes
323 |         n_rows, n_cols = permutations.shape
324 |         aux_row_idx = np.arange(n_rows)[:, None] * n_cols
325 |         aux_row_idx = np.repeat(aux_row_idx, repeats=n_cols, axis=1)
326 |         permutate_idxs = permutations.flatten() + aux_row_idx.flatten()
327 | 
328 |         # Apply flat permutation indexes and recover original shape
329 |         permutated_samples = samples.flatten()
330 |         permutated_samples = permutated_samples[permutate_idxs]
331 |         permutated_samples = permutated_samples.reshape(n_rows, n_cols)
332 |         return permutated_samples
333 | 
334 |     def _permutate_predictions(self, prediction_samples, permutations):
335 |         """Permutate Prediction Samples
336 | 
337 |         Applies permutations to prediction_samples across the horizon.
338 | 
339 |         **Parameters**<br>
340 |         `prediction_samples`: np.array [series,horizon,samples], independent
341 |                   base prediction samples.<br>
342 |         `permutations`: np.array [series, samples], permutation ranks with which
343 |                   `samples` dependence will be restored see `_obtain_ranks`.
344 |                   it can also apply a random permutation.<br>
345 | 
346 |         **Returns**<br>
347 |         `permutated_prediction_samples`: np.array.<br>
348 |         """
349 |         # Apply permutation throughout forecast horizon
350 |         permutated_prediction_samples = prediction_samples.copy()
351 | 
352 |         _, n_horizon, _ = prediction_samples.shape
353 |         for t in range(n_horizon):
354 |             permutated_prediction_samples[:, t, :] = self._permutate_samples(
355 |                 prediction_samples[:, t, :], permutations
356 |             )
357 |         return permutated_prediction_samples
358 | 
359 |     def _nonzero_indexes_by_row(self, M):
360 |         return [np.nonzero(M[row, :])[0] for row in range(len(M))]
361 | 
362 |     def get_samples(self, num_samples: Optional[int] = None):
363 |         """PERMBU Sample Reconciliation Method.
364 | 
365 |         Applies PERMBU reconciliation method as defined by Taieb et. al 2017.
366 |         Generating independent base prediction samples, restoring its multivariate
367 |         dependence using estimated copula with reordering and applying the BottomUp
368 |         aggregation to the new samples.
369 | 
370 |         **Parameters:**<br>
371 |         `num_samples`: int, number of samples generated from coherent distribution.<br>
372 | 
373 |         **Returns:**<br>
374 |         `samples`: Coherent samples of size (`base`, `horizon`, `num_samples`).
375 |         """
376 |         # Compute residuals and rank permutations
377 |         residuals = self.y_insample - self.y_hat_insample
378 |         residuals = residuals[:, np.isnan(residuals).sum(axis=0) == 0]
379 | 
380 |         # Sample h step-ahead base marginal distributions
381 |         if num_samples is None:
382 |             num_samples = residuals.shape[1]
383 | 
384 |         # Expand residuals to match num_samples [(a,b),T] -> [(a,b),num_samples]
385 |         rng = np.random.default_rng(self.seed)
386 |         if num_samples > residuals.shape[1]:
387 |             residuals_idxs = rng.choice(residuals.shape[1], size=num_samples)
388 |         else:
389 |             residuals_idxs = rng.choice(
390 |                 residuals.shape[1], size=num_samples, replace=False
391 |             )
392 |         residuals = residuals[:, residuals_idxs]
393 |         rank_permutations = self._obtain_ranks(residuals)
394 | 
395 |         n_series, n_horizon = self.y_hat.shape
396 | 
397 |         base_samples = np.array(
398 |             [
399 |                 rng.normal(loc=m, scale=s, size=num_samples)
400 |                 for m, s in zip(self.y_hat.flatten(), self.sigmah.flatten())
401 |             ]
402 |         )
403 |         base_samples = base_samples.reshape(n_series, n_horizon, num_samples)
404 | 
405 |         # Initialize PERMBU utility
406 |         rec_samples = base_samples.copy()
407 |         try:
408 |             encoder = OneHotEncoder(sparse_output=False, dtype=np.float64)
409 |         except TypeError:
410 |             encoder = OneHotEncoder(sparse=False, dtype=np.float64)
411 |         hier_links = np.vstack(self._nonzero_indexes_by_row(self.S.T))
412 | 
413 |         # BottomUp hierarchy traversing
414 |         hier_levels = hier_links.shape[1] - 1
415 |         for level_idx in reversed(range(hier_levels)):
416 |             # Obtain aggregation matrix from parent/children links
417 |             children_links = np.unique(hier_links[:, level_idx : level_idx + 2], axis=0)
418 |             children_idxs = np.unique(children_links[:, 1])
419 |             parent_idxs = np.unique(children_links[:, 0])
420 |             Agg = encoder.fit_transform(children_links).T
421 |             Agg = Agg[: len(parent_idxs), :]
422 | 
423 |             # Permute children_samples for each prediction step
424 |             children_permutations = rank_permutations[children_idxs, :]
425 |             children_samples = rec_samples[children_idxs, :, :]
426 |             children_samples = self._permutate_predictions(
427 |                 prediction_samples=children_samples, permutations=children_permutations
428 |             )
429 | 
430 |             # Overwrite hier_samples with BottomUp aggregation
431 |             # and randomly shuffle parent predictions after aggregation
432 |             parent_samples = np.einsum("ab,bhs->ahs", Agg, children_samples)
433 |             random_permutation = np.array(
434 |                 [
435 |                     rng.permutation(np.arange(num_samples))
436 |                     for serie in range(len(parent_samples))
437 |                 ]
438 |             )
439 |             parent_samples = self._permutate_predictions(
440 |                 prediction_samples=parent_samples, permutations=random_permutation
441 |             )
442 | 
443 |             rec_samples[parent_idxs, :, :] = parent_samples
444 |         return rec_samples
445 | 
446 |     def get_prediction_levels(self, res, level):
447 |         """Adds reconciled forecast levels to results dictionary"""
448 |         samples = self.get_samples(num_samples=self.num_samples)
449 |         for lv in level:
450 |             min_q = (100 - lv) / 200
451 |             max_q = min_q + lv / 100
452 |             res[f"lo-{lv}"] = np.quantile(samples, min_q, axis=2)
453 |             res[f"hi-{lv}"] = np.quantile(samples, max_q, axis=2)
454 |         return res
455 | 
456 |     def get_prediction_quantiles(self, res, quantiles):
457 |         """Adds reconciled forecast quantiles to results dictionary"""
458 |         samples = self.get_samples(num_samples=self.num_samples)
459 | 
460 |         # [Q, N, H] -> [N, H, Q]
461 |         sample_quantiles = np.quantile(samples, quantiles, axis=2)
462 |         res["quantiles"] = sample_quantiles.transpose((1, 2, 0))
463 |         return res
464 | 


--------------------------------------------------------------------------------
/nbs/.gitattributes:
--------------------------------------------------------------------------------
1 | **/*.ipynb filter=clean-nbs
2 | **/*.ipynb diff=ipynb
3 | 


--------------------------------------------------------------------------------
/nbs/.gitignore:
--------------------------------------------------------------------------------
1 | /.quarto/
2 | 


--------------------------------------------------------------------------------
/nbs/_quarto.yml:
--------------------------------------------------------------------------------
 1 | project:
 2 |   type: website
 3 | 
 4 | format:
 5 |   html:
 6 |     theme: cosmo
 7 |     fontsize: 1em
 8 |     linestretch: 1.7
 9 |     css: styles.css
10 |     toc: true
11 | 
12 | website:
13 |   twitter-card: true
14 |   open-graph: true
15 |   google-analytics: "G-NXJNCVR18L"
16 |   repo-actions: [issue]
17 |   favicon: favicon_png.png
18 |   navbar:
19 |     background: primary
20 |     search: true
21 |     collapse-below: lg
22 |     left:
23 |       - text: "Get Started"
24 |         href: examples/TourismSmall.ipynb
25 |       - text: "NixtlaVerse"
26 |         menu:
27 |           - text: "MLForecast 🤖"
28 |             href: https://github.com/nixtla/mlforecast
29 |           - text: "NeuralForecast 🧠"
30 |             href: https://github.com/nixtla/neuralforecast
31 |           - text: "StatsForecast ⚡️"
32 |             href: https://github.com/nixtla/statsforecast
33 |           
34 |       - text: "Help"
35 |         menu:
36 |           - text: "Report an Issue"
37 |             icon: bug
38 |             href: https://github.com/nixtla/hierarchicalforecast/issues/new/choose
39 |           - text: "Join our Slack"
40 |             icon: chat-right-text
41 |             href: https://join.slack.com/t/nixtlacommunity/shared_invite/zt-1kd5m5db7-7OOfy0xVNf1PcvCiAPWvPw
42 |     right:
43 |       - icon: github
44 |         href: "https://github.com/nixtla/hierarchicalforecast"
45 |       - icon: twitter
46 |         href: https://twitter.com/nixtlainc
47 |         aria-label: Nixtla Twitter
48 | 
49 |   sidebar:
50 |     style: floating
51 |   body-footer: |
52 |     If you find the code useful, please ⭐ us on [Github](https://github.com/nixtla/hierarchicalforecast)
53 | 
54 | metadata-files: [nbdev.yml, sidebar.yml]
55 | 


--------------------------------------------------------------------------------
/nbs/custom.yml:
--------------------------------------------------------------------------------
 1 | website:
 2 |   reader-mode: false
 3 |   navbar:
 4 |     collapse-below: lg
 5 |     left:
 6 |       - text: "Get Started"
 7 |         href: examples/AustralianDomesticTourism.ipynb
 8 |       - text: "Experiments"
 9 |         href: https://github.com/Nixtla/hierarchicalforecast/tree/main/experiments
10 |       - text: "Help"
11 |         menu:
12 |           - text: "Report an Issue"
13 |             icon: bug
14 |             href: https://github.com/nixtla/hierarchicalforecast/issues
15 |           - text: "Slack Nixtla"
16 |             icon: chat-right-text
17 |             href: https://join.slack.com/t/nixtlaworkspace/shared_invite/zt-135dssye9-fWTzMpv2WBthq8NK0Yvu6A
18 |     right:
19 |       - icon: twitter
20 |         href: https://twitter.com/nixtlainc
21 |         aria-label: Nixtla Twitter
22 | 


--------------------------------------------------------------------------------
/nbs/examples/.nodoc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/hierarchicalforecast/162e42f4143201d44fdd0480306dd4cc19776038/nbs/examples/.nodoc


--------------------------------------------------------------------------------
/nbs/examples/.notest:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/hierarchicalforecast/162e42f4143201d44fdd0480306dd4cc19776038/nbs/examples/.notest


--------------------------------------------------------------------------------
/nbs/examples/Installation.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "attachments": {},
 5 |    "cell_type": "markdown",
 6 |    "id": "14f5686c-449b-4376-8c58-fc8141f4b0f8",
 7 |    "metadata": {},
 8 |    "source": [
 9 |     "# Install\n",
10 |     "\n",
11 |     "> Install HierachicalForecast with pip or conda"
12 |    ]
13 |   },
14 |   {
15 |    "attachments": {},
16 |    "cell_type": "markdown",
17 |    "id": "0f1d1483-6da7-4372-8390-84c9c280109e",
18 |    "metadata": {},
19 |    "source": [
20 |     "You can install the *released version* of `HierachicalForecast` from the [Python package index](https://pypi.org) with:\n",
21 |     "\n",
22 |     "```python\n",
23 |     "pip install hierarchicalforecast\n",
24 |     "```\n",
25 |     "\n",
26 |     "or \n",
27 |     "\n",
28 |     "```python\n",
29 |     "conda install -c conda-forge hierarchicalforecast\n",
30 |     "``` \n",
31 |     "\n",
32 |     ":::{.callout-tip}\n",
33 |     "We recommend installing your libraries inside a python virtual or [conda environment](https://docs.conda.io/projects/conda/en/latest/user-guide/install/macos.html).\n",
34 |     ":::\n",
35 |     "\n",
36 |     "#### User our env (optional)\n",
37 |     "\n",
38 |     "If you don't have a Conda environment and need tools like Numba, Pandas, NumPy, Jupyter, StatsModels, and Nbdev you can use ours by following these steps:\n",
39 |     "\n",
40 |     "1. Clone the HierachicalForecast repo: \n",
41 |     "\n",
42 |     "```bash \n",
43 |     "$ git clone https://github.com/Nixtla/hierachicalforecast.git && cd hierachicalforecast\n",
44 |     "```\n",
45 |     "\n",
46 |     "2. Create the environment using the `environment.yml` file: \n",
47 |     "\n",
48 |     "```bash \n",
49 |     "$ conda env create -f environment.yml\n",
50 |     "```\n",
51 |     "\n",
52 |     "3. Activate the environment:\n",
53 |     "```bash\n",
54 |     "$ conda activate statsforecast\n",
55 |     "```"
56 |    ]
57 |   }
58 |  ],
59 |  "metadata": {
60 |   "kernelspec": {
61 |    "display_name": "python3",
62 |    "language": "python",
63 |    "name": "python3"
64 |   }
65 |  },
66 |  "nbformat": 4,
67 |  "nbformat_minor": 5
68 | }
69 | 


--------------------------------------------------------------------------------
/nbs/examples/LocalGlobalAggregation.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Local vs Global Temporal Aggregation\n",
  9 |     "\n",
 10 |     "> Temporal Hierarchical Aggregation on a local or global level."
 11 |    ]
 12 |   },
 13 |   {
 14 |    "attachments": {},
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "In this notebook we explain the difference between temporally aggregating timeseries locally and globally."
 19 |    ]
 20 |   },
 21 |   {
 22 |    "attachments": {},
 23 |    "cell_type": "markdown",
 24 |    "metadata": {},
 25 |    "source": [
 26 |     "You can run these experiments using CPU or GPU with Google Colab.\n",
 27 |     "\n",
 28 |     "<a href=\"https://colab.research.google.com/github/Nixtla/hierarchicalforecast/blob/main/nbs/examples/LocalGlobalAggregation.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": null,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "%%capture\n",
 38 |     "!pip install hierarchicalforecast utilsforecast"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "markdown",
 43 |    "metadata": {},
 44 |    "source": [
 45 |     "## 1. Generate Data"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "attachments": {},
 50 |    "cell_type": "markdown",
 51 |    "metadata": {},
 52 |    "source": [
 53 |     "In this example we will generate synthetic series to explain the difference between local- and global temporal aggregation. We will generate 2 series with a daily frequency."
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "from utilsforecast.data import generate_series"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "freq = \"D\"\n",
 72 |     "n_series = 2\n",
 73 |     "df = generate_series(n_series=n_series, \n",
 74 |     "                     freq=freq, \n",
 75 |     "                     min_length=2 * 365, \n",
 76 |     "                     max_length=4 * 365,  \n",
 77 |     "                     equal_ends=True)"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "metadata": {},
 83 |    "source": [
 84 |     "Note that our two timeseries do not have the same number of timesteps:"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {},
 91 |    "outputs": [
 92 |     {
 93 |      "data": {
 94 |       "text/plain": [
 95 |        "unique_id\n",
 96 |        "0    1414\n",
 97 |        "1    1289\n",
 98 |        "Name: ds, dtype: int64"
 99 |       ]
100 |      },
101 |      "execution_count": null,
102 |      "metadata": {},
103 |      "output_type": "execute_result"
104 |     }
105 |    ],
106 |    "source": [
107 |     "df.groupby('unique_id', observed=True)[\"ds\"].count()"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "markdown",
112 |    "metadata": {},
113 |    "source": [
114 |     "We then define a spec for our temporal aggregations."
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": null,
120 |    "metadata": {},
121 |    "outputs": [],
122 |    "source": [
123 |     "spec  = {\"year\": 365, \"quarter\": 91, \"month\": 30, \"week\": 7, \"day\": 1}"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "markdown",
128 |    "metadata": {},
129 |    "source": [
130 |     "## 2. Local aggregation (default)"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "markdown",
135 |    "metadata": {},
136 |    "source": [
137 |     "In local aggregation, we treat the timestamps of each timeseries individually. It means that the temporal aggregation is performed by only looking at the timestamps of each series, disregarding the timestamps of other series. "
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": null,
143 |    "metadata": {},
144 |    "outputs": [],
145 |    "source": [
146 |     "from hierarchicalforecast.utils import aggregate_temporal"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": null,
152 |    "metadata": {},
153 |    "outputs": [],
154 |    "source": [
155 |     "Y_df_local, S_df_local, tags_local = aggregate_temporal(df, spec)"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "markdown",
160 |    "metadata": {},
161 |    "source": [
162 |     "We have created temporal aggregations _per timeseries_, as the temporal aggregation `month-1` doesn't correspond to the same (year, month) for both timeseries. This is because the series with `unique_id=1` is shorter and has its first datapoint in July 2000, in contrast to the series with `unique_id=0`, which is longer and has its first timestamp in March 2000."
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": null,
168 |    "metadata": {},
169 |    "outputs": [
170 |     {
171 |      "data": {
172 |       "text/html": [
173 |        "<div>\n",
174 |        "<style scoped>\n",
175 |        "    .dataframe tbody tr th:only-of-type {\n",
176 |        "        vertical-align: middle;\n",
177 |        "    }\n",
178 |        "\n",
179 |        "    .dataframe tbody tr th {\n",
180 |        "        vertical-align: top;\n",
181 |        "    }\n",
182 |        "\n",
183 |        "    .dataframe thead th {\n",
184 |        "        text-align: right;\n",
185 |        "    }\n",
186 |        "</style>\n",
187 |        "<table border=\"1\" class=\"dataframe\">\n",
188 |        "  <thead>\n",
189 |        "    <tr style=\"text-align: right;\">\n",
190 |        "      <th></th>\n",
191 |        "      <th>temporal_id</th>\n",
192 |        "      <th>unique_id</th>\n",
193 |        "      <th>ds</th>\n",
194 |        "      <th>y</th>\n",
195 |        "    </tr>\n",
196 |        "  </thead>\n",
197 |        "  <tbody>\n",
198 |        "    <tr>\n",
199 |        "      <th>39</th>\n",
200 |        "      <td>month-1</td>\n",
201 |        "      <td>0</td>\n",
202 |        "      <td>2000-03-16</td>\n",
203 |        "      <td>93.574676</td>\n",
204 |        "    </tr>\n",
205 |        "    <tr>\n",
206 |        "      <th>87</th>\n",
207 |        "      <td>month-1</td>\n",
208 |        "      <td>1</td>\n",
209 |        "      <td>2000-07-19</td>\n",
210 |        "      <td>91.506421</td>\n",
211 |        "    </tr>\n",
212 |        "  </tbody>\n",
213 |        "</table>\n",
214 |        "</div>"
215 |       ],
216 |       "text/plain": [
217 |        "   temporal_id unique_id         ds          y\n",
218 |        "39     month-1         0 2000-03-16  93.574676\n",
219 |        "87     month-1         1 2000-07-19  91.506421"
220 |       ]
221 |      },
222 |      "execution_count": null,
223 |      "metadata": {},
224 |      "output_type": "execute_result"
225 |     }
226 |    ],
227 |    "source": [
228 |     "Y_df_local.query(\"temporal_id == 'month-1'\")"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "markdown",
233 |    "metadata": {},
234 |    "source": [
235 |     "## 2. Global aggregation"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "markdown",
240 |    "metadata": {},
241 |    "source": [
242 |     "In global aggregation, we examine all unique timestamps across all timeseries, and base our temporal aggregations on the unique list of timestamps across all timeseries. We can specify the aggregation type by setting the `aggregation_type` attritbue in `aggregate_temporal`."
243 |    ]
244 |   },
245 |   {
246 |    "cell_type": "code",
247 |    "execution_count": null,
248 |    "metadata": {},
249 |    "outputs": [],
250 |    "source": [
251 |     "Y_df_global, S_df_global, tags_globval = aggregate_temporal(df, spec, aggregation_type=\"global\")\n"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "markdown",
256 |    "metadata": {},
257 |    "source": [
258 |     "We have created temporal aggregations _across all timeseries_, as the temporal aggregation `month-1` corresponds to the same (year, month)-combination for both timeseries. Since `month-1` isn't present in the second timeseries (as it is shorter), we have only one record for the aggregation."
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "code",
263 |    "execution_count": null,
264 |    "metadata": {},
265 |    "outputs": [
266 |     {
267 |      "data": {
268 |       "text/html": [
269 |        "<div>\n",
270 |        "<style scoped>\n",
271 |        "    .dataframe tbody tr th:only-of-type {\n",
272 |        "        vertical-align: middle;\n",
273 |        "    }\n",
274 |        "\n",
275 |        "    .dataframe tbody tr th {\n",
276 |        "        vertical-align: top;\n",
277 |        "    }\n",
278 |        "\n",
279 |        "    .dataframe thead th {\n",
280 |        "        text-align: right;\n",
281 |        "    }\n",
282 |        "</style>\n",
283 |        "<table border=\"1\" class=\"dataframe\">\n",
284 |        "  <thead>\n",
285 |        "    <tr style=\"text-align: right;\">\n",
286 |        "      <th></th>\n",
287 |        "      <th>temporal_id</th>\n",
288 |        "      <th>unique_id</th>\n",
289 |        "      <th>ds</th>\n",
290 |        "      <th>y</th>\n",
291 |        "    </tr>\n",
292 |        "  </thead>\n",
293 |        "  <tbody>\n",
294 |        "    <tr>\n",
295 |        "      <th>39</th>\n",
296 |        "      <td>month-1</td>\n",
297 |        "      <td>0</td>\n",
298 |        "      <td>2000-03-16</td>\n",
299 |        "      <td>93.574676</td>\n",
300 |        "    </tr>\n",
301 |        "  </tbody>\n",
302 |        "</table>\n",
303 |        "</div>"
304 |       ],
305 |       "text/plain": [
306 |        "   temporal_id unique_id         ds          y\n",
307 |        "39     month-1         0 2000-03-16  93.574676"
308 |       ]
309 |      },
310 |      "execution_count": null,
311 |      "metadata": {},
312 |      "output_type": "execute_result"
313 |     }
314 |    ],
315 |    "source": [
316 |     "Y_df_global.query(\"temporal_id == 'month-1'\")"
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "markdown",
321 |    "metadata": {},
322 |    "source": [
323 |     "For `month-5` however, we have a record for both timeseries, as the second series has its first datapoint in that month."
324 |    ]
325 |   },
326 |   {
327 |    "cell_type": "code",
328 |    "execution_count": null,
329 |    "metadata": {},
330 |    "outputs": [
331 |     {
332 |      "data": {
333 |       "text/html": [
334 |        "<div>\n",
335 |        "<style scoped>\n",
336 |        "    .dataframe tbody tr th:only-of-type {\n",
337 |        "        vertical-align: middle;\n",
338 |        "    }\n",
339 |        "\n",
340 |        "    .dataframe tbody tr th {\n",
341 |        "        vertical-align: top;\n",
342 |        "    }\n",
343 |        "\n",
344 |        "    .dataframe thead th {\n",
345 |        "        text-align: right;\n",
346 |        "    }\n",
347 |        "</style>\n",
348 |        "<table border=\"1\" class=\"dataframe\">\n",
349 |        "  <thead>\n",
350 |        "    <tr style=\"text-align: right;\">\n",
351 |        "      <th></th>\n",
352 |        "      <th>temporal_id</th>\n",
353 |        "      <th>unique_id</th>\n",
354 |        "      <th>ds</th>\n",
355 |        "      <th>y</th>\n",
356 |        "    </tr>\n",
357 |        "  </thead>\n",
358 |        "  <tbody>\n",
359 |        "    <tr>\n",
360 |        "      <th>43</th>\n",
361 |        "      <td>month-5</td>\n",
362 |        "      <td>0</td>\n",
363 |        "      <td>2000-07-14</td>\n",
364 |        "      <td>95.169659</td>\n",
365 |        "    </tr>\n",
366 |        "    <tr>\n",
367 |        "      <th>87</th>\n",
368 |        "      <td>month-5</td>\n",
369 |        "      <td>1</td>\n",
370 |        "      <td>2000-07-14</td>\n",
371 |        "      <td>74.502584</td>\n",
372 |        "    </tr>\n",
373 |        "  </tbody>\n",
374 |        "</table>\n",
375 |        "</div>"
376 |       ],
377 |       "text/plain": [
378 |        "   temporal_id unique_id         ds          y\n",
379 |        "43     month-5         0 2000-07-14  95.169659\n",
380 |        "87     month-5         1 2000-07-14  74.502584"
381 |       ]
382 |      },
383 |      "execution_count": null,
384 |      "metadata": {},
385 |      "output_type": "execute_result"
386 |     }
387 |    ],
388 |    "source": [
389 |     "Y_df_global.query(\"temporal_id == 'month-5'\")"
390 |    ]
391 |   },
392 |   {
393 |    "cell_type": "markdown",
394 |    "metadata": {},
395 |    "source": [
396 |     "Hence, the global aggregation ensures temporal alignment across all series."
397 |    ]
398 |   },
399 |   {
400 |    "cell_type": "markdown",
401 |    "metadata": {},
402 |    "source": [
403 |     "## 3. What to choose?"
404 |    ]
405 |   },
406 |   {
407 |    "cell_type": "markdown",
408 |    "metadata": {},
409 |    "source": [
410 |     "- If all timeseries have the same length and same timestamps, `global` and `local` yield the same results.\n",
411 |     "- The default behavior is `local`. This means that temporal aggregations between timeseries can't be compared unless the series have the same length and timestamp. This behavior is generally safer, and advised to use when time series are not necessarily related, and you are building per-series models using e.g. `StatsForecast`.\n",
412 |     "- The `global` behavior can be useful when dealing with timeseries where we expect relationships between the timeseries. For example, in case of forecasting daily product demand individual products may not always have sales for all timesteps, but one is interested in the overall temporal yearly aggregation across all products. The `global` setting has more room for error, so be careful and check the aggregation result carefully. This would typically be the setting used in combination with models from `MLForecast` or `NeuralForecast`. "
413 |    ]
414 |   }
415 |  ],
416 |  "metadata": {
417 |   "kernelspec": {
418 |    "display_name": "python3",
419 |    "language": "python",
420 |    "name": "python3"
421 |   }
422 |  },
423 |  "nbformat": 4,
424 |  "nbformat_minor": 4
425 | }
426 | 


--------------------------------------------------------------------------------
/nbs/examples/TourismSmall.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "id": "843cf8de-d678-4243-a8af-d78439058e6a",
  7 |    "metadata": {},
  8 |    "source": [
  9 |     "# Quick Start\n",
 10 |     "\n",
 11 |     "> Minimal Example of Hierarchical Reconciliation"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "attachments": {},
 16 |    "cell_type": "markdown",
 17 |    "id": "0a836410-7063-4766-b03c-7d22b6abe457",
 18 |    "metadata": {},
 19 |    "source": [
 20 |     "Large collections of time series organized into structures at different aggregation levels often require their forecasts to follow their aggregation constraints, which poses the challenge of creating novel algorithms capable of coherent forecasts.\n",
 21 |     "\n",
 22 |     "The `HierarchicalForecast` package provides a wide collection of Python implementations of hierarchical forecasting algorithms that follow classic hierarchical reconciliation.\n",
 23 |     "\n",
 24 |     "In this notebook we will show how to use the `StatsForecast` library to produce base forecasts, and use `HierarchicalForecast` package to perform hierarchical reconciliation."
 25 |    ]
 26 |   },
 27 |   {
 28 |    "attachments": {},
 29 |    "cell_type": "markdown",
 30 |    "id": "46e647a5",
 31 |    "metadata": {},
 32 |    "source": [
 33 |     "You can run these experiments using CPU or GPU with Google Colab.\n",
 34 |     "\n",
 35 |     "<a href=\"https://colab.research.google.com/github/Nixtla/hierarchicalforecast/blob/main/nbs/examples/TourismSmall.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "attachments": {},
 40 |    "cell_type": "markdown",
 41 |    "id": "c0dc4e6d",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "## 1. Libraries"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "id": "7f777bdd-dff4-4bc0-8529-b492874de6f0",
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "%%capture\n",
 55 |     "!pip install hierarchicalforecast statsforecast datasetsforecast"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "attachments": {},
 60 |    "cell_type": "markdown",
 61 |    "id": "6221152f",
 62 |    "metadata": {},
 63 |    "source": [
 64 |     "## 2. Load Data"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "attachments": {},
 69 |    "cell_type": "markdown",
 70 |    "id": "9809d816",
 71 |    "metadata": {},
 72 |    "source": [
 73 |     "In this example we will use the `TourismSmall` dataset. The following cell gets the time series for the different levels in the hierarchy, the summing matrix `S` which recovers the full dataset from the bottom level hierarchy and the indices of each hierarchy denoted by `tags`."
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "id": "f7a5828a-2fb4-4811-9e07-0ee291331978",
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "import pandas as pd\n",
 84 |     "\n",
 85 |     "from datasetsforecast.hierarchical import HierarchicalData, HierarchicalInfo"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": null,
 91 |    "id": "c18a4300-5b8f-45b5-92ce-e52f8c4dab20",
 92 |    "metadata": {},
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "group_name = 'TourismSmall'\n",
 96 |     "group = HierarchicalInfo.get_group(group_name)\n",
 97 |     "Y_df, S_df, tags = HierarchicalData.load('./data', group_name)\n",
 98 |     "S_df = S_df.reset_index(names=\"unique_id\")\n",
 99 |     "Y_df['ds'] = pd.to_datetime(Y_df['ds'])"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": null,
105 |    "id": "b964394e-6a79-4c75-be74-3c3994e1bf58",
106 |    "metadata": {},
107 |    "outputs": [
108 |     {
109 |      "data": {
110 |       "text/html": [
111 |        "<div>\n",
112 |        "<style scoped>\n",
113 |        "    .dataframe tbody tr th:only-of-type {\n",
114 |        "        vertical-align: middle;\n",
115 |        "    }\n",
116 |        "\n",
117 |        "    .dataframe tbody tr th {\n",
118 |        "        vertical-align: top;\n",
119 |        "    }\n",
120 |        "\n",
121 |        "    .dataframe thead th {\n",
122 |        "        text-align: right;\n",
123 |        "    }\n",
124 |        "</style>\n",
125 |        "<table border=\"1\" class=\"dataframe\">\n",
126 |        "  <thead>\n",
127 |        "    <tr style=\"text-align: right;\">\n",
128 |        "      <th></th>\n",
129 |        "      <th>unique_id</th>\n",
130 |        "      <th>nsw-hol-city</th>\n",
131 |        "      <th>nsw-hol-noncity</th>\n",
132 |        "      <th>vic-hol-city</th>\n",
133 |        "      <th>vic-hol-noncity</th>\n",
134 |        "      <th>qld-hol-city</th>\n",
135 |        "    </tr>\n",
136 |        "  </thead>\n",
137 |        "  <tbody>\n",
138 |        "    <tr>\n",
139 |        "      <th>0</th>\n",
140 |        "      <td>total</td>\n",
141 |        "      <td>1.0</td>\n",
142 |        "      <td>1.0</td>\n",
143 |        "      <td>1.0</td>\n",
144 |        "      <td>1.0</td>\n",
145 |        "      <td>1.0</td>\n",
146 |        "    </tr>\n",
147 |        "    <tr>\n",
148 |        "      <th>1</th>\n",
149 |        "      <td>hol</td>\n",
150 |        "      <td>1.0</td>\n",
151 |        "      <td>1.0</td>\n",
152 |        "      <td>1.0</td>\n",
153 |        "      <td>1.0</td>\n",
154 |        "      <td>1.0</td>\n",
155 |        "    </tr>\n",
156 |        "    <tr>\n",
157 |        "      <th>2</th>\n",
158 |        "      <td>vfr</td>\n",
159 |        "      <td>0.0</td>\n",
160 |        "      <td>0.0</td>\n",
161 |        "      <td>0.0</td>\n",
162 |        "      <td>0.0</td>\n",
163 |        "      <td>0.0</td>\n",
164 |        "    </tr>\n",
165 |        "    <tr>\n",
166 |        "      <th>3</th>\n",
167 |        "      <td>bus</td>\n",
168 |        "      <td>0.0</td>\n",
169 |        "      <td>0.0</td>\n",
170 |        "      <td>0.0</td>\n",
171 |        "      <td>0.0</td>\n",
172 |        "      <td>0.0</td>\n",
173 |        "    </tr>\n",
174 |        "    <tr>\n",
175 |        "      <th>4</th>\n",
176 |        "      <td>oth</td>\n",
177 |        "      <td>0.0</td>\n",
178 |        "      <td>0.0</td>\n",
179 |        "      <td>0.0</td>\n",
180 |        "      <td>0.0</td>\n",
181 |        "      <td>0.0</td>\n",
182 |        "    </tr>\n",
183 |        "    <tr>\n",
184 |        "      <th>5</th>\n",
185 |        "      <td>nsw-hol</td>\n",
186 |        "      <td>1.0</td>\n",
187 |        "      <td>1.0</td>\n",
188 |        "      <td>0.0</td>\n",
189 |        "      <td>0.0</td>\n",
190 |        "      <td>0.0</td>\n",
191 |        "    </tr>\n",
192 |        "  </tbody>\n",
193 |        "</table>\n",
194 |        "</div>"
195 |       ],
196 |       "text/plain": [
197 |        "  unique_id  nsw-hol-city  nsw-hol-noncity  vic-hol-city  vic-hol-noncity  \\\n",
198 |        "0     total           1.0              1.0           1.0              1.0   \n",
199 |        "1       hol           1.0              1.0           1.0              1.0   \n",
200 |        "2       vfr           0.0              0.0           0.0              0.0   \n",
201 |        "3       bus           0.0              0.0           0.0              0.0   \n",
202 |        "4       oth           0.0              0.0           0.0              0.0   \n",
203 |        "5   nsw-hol           1.0              1.0           0.0              0.0   \n",
204 |        "\n",
205 |        "   qld-hol-city  \n",
206 |        "0           1.0  \n",
207 |        "1           1.0  \n",
208 |        "2           0.0  \n",
209 |        "3           0.0  \n",
210 |        "4           0.0  \n",
211 |        "5           0.0  "
212 |       ]
213 |      },
214 |      "execution_count": null,
215 |      "metadata": {},
216 |      "output_type": "execute_result"
217 |     }
218 |    ],
219 |    "source": [
220 |     "S_df.iloc[:6, :6]"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "code",
225 |    "execution_count": null,
226 |    "id": "fd32f1a5-2b60-454e-afc9-6911f84f6698",
227 |    "metadata": {},
228 |    "outputs": [
229 |     {
230 |      "data": {
231 |       "text/plain": [
232 |        "{'Country': array(['total'], dtype=object),\n",
233 |        " 'Country/Purpose': array(['hol', 'vfr', 'bus', 'oth'], dtype=object),\n",
234 |        " 'Country/Purpose/State': array(['nsw-hol', 'vic-hol', 'qld-hol', 'sa-hol', 'wa-hol', 'tas-hol',\n",
235 |        "        'nt-hol', 'nsw-vfr', 'vic-vfr', 'qld-vfr', 'sa-vfr', 'wa-vfr',\n",
236 |        "        'tas-vfr', 'nt-vfr', 'nsw-bus', 'vic-bus', 'qld-bus', 'sa-bus',\n",
237 |        "        'wa-bus', 'tas-bus', 'nt-bus', 'nsw-oth', 'vic-oth', 'qld-oth',\n",
238 |        "        'sa-oth', 'wa-oth', 'tas-oth', 'nt-oth'], dtype=object),\n",
239 |        " 'Country/Purpose/State/CityNonCity': array(['nsw-hol-city', 'nsw-hol-noncity', 'vic-hol-city',\n",
240 |        "        'vic-hol-noncity', 'qld-hol-city', 'qld-hol-noncity',\n",
241 |        "        'sa-hol-city', 'sa-hol-noncity', 'wa-hol-city', 'wa-hol-noncity',\n",
242 |        "        'tas-hol-city', 'tas-hol-noncity', 'nt-hol-city', 'nt-hol-noncity',\n",
243 |        "        'nsw-vfr-city', 'nsw-vfr-noncity', 'vic-vfr-city',\n",
244 |        "        'vic-vfr-noncity', 'qld-vfr-city', 'qld-vfr-noncity',\n",
245 |        "        'sa-vfr-city', 'sa-vfr-noncity', 'wa-vfr-city', 'wa-vfr-noncity',\n",
246 |        "        'tas-vfr-city', 'tas-vfr-noncity', 'nt-vfr-city', 'nt-vfr-noncity',\n",
247 |        "        'nsw-bus-city', 'nsw-bus-noncity', 'vic-bus-city',\n",
248 |        "        'vic-bus-noncity', 'qld-bus-city', 'qld-bus-noncity',\n",
249 |        "        'sa-bus-city', 'sa-bus-noncity', 'wa-bus-city', 'wa-bus-noncity',\n",
250 |        "        'tas-bus-city', 'tas-bus-noncity', 'nt-bus-city', 'nt-bus-noncity',\n",
251 |        "        'nsw-oth-city', 'nsw-oth-noncity', 'vic-oth-city',\n",
252 |        "        'vic-oth-noncity', 'qld-oth-city', 'qld-oth-noncity',\n",
253 |        "        'sa-oth-city', 'sa-oth-noncity', 'wa-oth-city', 'wa-oth-noncity',\n",
254 |        "        'tas-oth-city', 'tas-oth-noncity', 'nt-oth-city', 'nt-oth-noncity'],\n",
255 |        "       dtype=object)}"
256 |       ]
257 |      },
258 |      "execution_count": null,
259 |      "metadata": {},
260 |      "output_type": "execute_result"
261 |     }
262 |    ],
263 |    "source": [
264 |     "tags"
265 |    ]
266 |   },
267 |   {
268 |    "cell_type": "markdown",
269 |    "id": "56a7aadb-6e2c-456a-a0b5-b29b30deadb5",
270 |    "metadata": {},
271 |    "source": [
272 |     "We split the dataframe in train/test splits."
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "code",
277 |    "execution_count": null,
278 |    "id": "462451d8-2fc0-445e-9458-908811011dd9",
279 |    "metadata": {},
280 |    "outputs": [],
281 |    "source": [
282 |     "Y_test_df = Y_df.groupby('unique_id').tail(group.horizon)\n",
283 |     "Y_train_df = Y_df.drop(Y_test_df.index)"
284 |    ]
285 |   },
286 |   {
287 |    "attachments": {},
288 |    "cell_type": "markdown",
289 |    "id": "1958d4e6",
290 |    "metadata": {},
291 |    "source": [
292 |     "## 3. Base forecasts"
293 |    ]
294 |   },
295 |   {
296 |    "cell_type": "markdown",
297 |    "id": "b7cfb43a-cd16-418c-a04b-e075c176cc9e",
298 |    "metadata": {},
299 |    "source": [
300 |     "The following cell computes the *base forecast* for each time series using the `auto_arima` and `naive` models. Observe that `Y_hat_df` contains the forecasts but they are not coherent."
301 |    ]
302 |   },
303 |   {
304 |    "cell_type": "code",
305 |    "execution_count": null,
306 |    "id": "ce5017ee",
307 |    "metadata": {},
308 |    "outputs": [],
309 |    "source": [
310 |     "from statsforecast.core import StatsForecast\n",
311 |     "from statsforecast.models import AutoARIMA, Naive"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "code",
316 |    "execution_count": null,
317 |    "id": "f99e7b7b-f4b8-4f2f-a1a7-c8be98a1e280",
318 |    "metadata": {},
319 |    "outputs": [],
320 |    "source": [
321 |     "fcst = StatsForecast(\n",
322 |     "    models=[AutoARIMA(season_length=group.seasonality), Naive()], \n",
323 |     "    freq=\"QE\", \n",
324 |     "    n_jobs=-1\n",
325 |     ")\n",
326 |     "Y_hat_df = fcst.forecast(df=Y_train_df, h=group.horizon)"
327 |    ]
328 |   },
329 |   {
330 |    "attachments": {},
331 |    "cell_type": "markdown",
332 |    "id": "ef1c9163",
333 |    "metadata": {},
334 |    "source": [
335 |     "## 4. Hierarchical reconciliation"
336 |    ]
337 |   },
338 |   {
339 |    "attachments": {},
340 |    "cell_type": "markdown",
341 |    "id": "cc296762-2009-4aef-8b31-f24aad9d0787",
342 |    "metadata": {},
343 |    "source": [
344 |     "The following cell makes the previous forecasts coherent using the `HierarchicalReconciliation` class. The used methods to make the forecasts coherent are:\n",
345 |     "\n",
346 |     "- `BottomUp`: The reconciliation of the method is a simple addition to the upper levels.\n",
347 |     "- `TopDown`: The second method constrains the base-level predictions to the top-most aggregate-level serie and then distributes it to the disaggregate series through the use of proportions. \n",
348 |     "- `MiddleOut`: Anchors the base predictions in a middle level."
349 |    ]
350 |   },
351 |   {
352 |    "cell_type": "code",
353 |    "execution_count": null,
354 |    "id": "63ec7e26",
355 |    "metadata": {},
356 |    "outputs": [],
357 |    "source": [
358 |     "from hierarchicalforecast.core import HierarchicalReconciliation\n",
359 |     "from hierarchicalforecast.methods import BottomUp, TopDown, MiddleOut"
360 |    ]
361 |   },
362 |   {
363 |    "cell_type": "code",
364 |    "execution_count": null,
365 |    "id": "a43be9e7-99a9-4981-bfd7-8552efba9751",
366 |    "metadata": {},
367 |    "outputs": [],
368 |    "source": [
369 |     "reconcilers = [\n",
370 |     "    BottomUp(),\n",
371 |     "    TopDown(method='forecast_proportions'),\n",
372 |     "    TopDown(method='proportion_averages'),\n",
373 |     "    MiddleOut(middle_level=\"Country/Purpose/State\", top_down_method=\"proportion_averages\"),\n",
374 |     "]\n",
375 |     "hrec = HierarchicalReconciliation(reconcilers=reconcilers)\n",
376 |     "Y_rec_df = hrec.reconcile(Y_hat_df=Y_hat_df, Y_df=Y_train_df, S=S_df, tags=tags)"
377 |    ]
378 |   },
379 |   {
380 |    "attachments": {},
381 |    "cell_type": "markdown",
382 |    "id": "6590a5e2",
383 |    "metadata": {},
384 |    "source": [
385 |     "## 5. Evaluation"
386 |    ]
387 |   },
388 |   {
389 |    "cell_type": "markdown",
390 |    "id": "03c4752c-53f8-4b1f-8169-32075b8e4050",
391 |    "metadata": {},
392 |    "source": [
393 |     "The `HierarchicalForecast` package includes the `evaluate` function to evaluate the different hierarchies and we can use utilsforecast to compute the mean absolute error relative to a baseline model."
394 |    ]
395 |   },
396 |   {
397 |    "cell_type": "code",
398 |    "execution_count": null,
399 |    "id": "0be293f3",
400 |    "metadata": {},
401 |    "outputs": [],
402 |    "source": [
403 |     "from hierarchicalforecast.evaluation import evaluate\n",
404 |     "from utilsforecast.losses import mse"
405 |    ]
406 |   },
407 |   {
408 |    "cell_type": "code",
409 |    "execution_count": null,
410 |    "id": "8599f85a",
411 |    "metadata": {},
412 |    "outputs": [
413 |     {
414 |      "data": {
415 |       "text/html": [
416 |        "<div>\n",
417 |        "<style scoped>\n",
418 |        "    .dataframe tbody tr th:only-of-type {\n",
419 |        "        vertical-align: middle;\n",
420 |        "    }\n",
421 |        "\n",
422 |        "    .dataframe tbody tr th {\n",
423 |        "        vertical-align: top;\n",
424 |        "    }\n",
425 |        "\n",
426 |        "    .dataframe thead th {\n",
427 |        "        text-align: right;\n",
428 |        "    }\n",
429 |        "</style>\n",
430 |        "<table border=\"1\" class=\"dataframe\">\n",
431 |        "  <thead>\n",
432 |        "    <tr style=\"text-align: right;\">\n",
433 |        "      <th></th>\n",
434 |        "      <th></th>\n",
435 |        "      <th>AutoARIMA</th>\n",
436 |        "      <th>AutoARIMA/BottomUp</th>\n",
437 |        "      <th>AutoARIMA/TopDown_method-forecast_proportions</th>\n",
438 |        "      <th>AutoARIMA/TopDown_method-proportion_averages</th>\n",
439 |        "      <th>AutoARIMA/MiddleOut_middle_level-Country/Purpose/State_top_down_method-proportion_averages</th>\n",
440 |        "    </tr>\n",
441 |        "    <tr>\n",
442 |        "      <th>level</th>\n",
443 |        "      <th>metric</th>\n",
444 |        "      <th></th>\n",
445 |        "      <th></th>\n",
446 |        "      <th></th>\n",
447 |        "      <th></th>\n",
448 |        "      <th></th>\n",
449 |        "    </tr>\n",
450 |        "  </thead>\n",
451 |        "  <tbody>\n",
452 |        "    <tr>\n",
453 |        "      <th>Country</th>\n",
454 |        "      <th>mse-scaled</th>\n",
455 |        "      <td>0.317897</td>\n",
456 |        "      <td>0.367078</td>\n",
457 |        "      <td>0.317897</td>\n",
458 |        "      <td>0.317897</td>\n",
459 |        "      <td>0.305053</td>\n",
460 |        "    </tr>\n",
461 |        "    <tr>\n",
462 |        "      <th>Country/Purpose</th>\n",
463 |        "      <th>mse-scaled</th>\n",
464 |        "      <td>0.318950</td>\n",
465 |        "      <td>0.233606</td>\n",
466 |        "      <td>0.262216</td>\n",
467 |        "      <td>0.320225</td>\n",
468 |        "      <td>0.196062</td>\n",
469 |        "    </tr>\n",
470 |        "    <tr>\n",
471 |        "      <th>Country/Purpose/State</th>\n",
472 |        "      <th>mse-scaled</th>\n",
473 |        "      <td>0.268057</td>\n",
474 |        "      <td>0.281189</td>\n",
475 |        "      <td>0.320349</td>\n",
476 |        "      <td>0.511356</td>\n",
477 |        "      <td>0.268057</td>\n",
478 |        "    </tr>\n",
479 |        "    <tr>\n",
480 |        "      <th>Country/Purpose/State/CityNonCity</th>\n",
481 |        "      <th>mse-scaled</th>\n",
482 |        "      <td>0.292136</td>\n",
483 |        "      <td>0.292136</td>\n",
484 |        "      <td>0.323261</td>\n",
485 |        "      <td>0.509784</td>\n",
486 |        "      <td>0.280599</td>\n",
487 |        "    </tr>\n",
488 |        "    <tr>\n",
489 |        "      <th>Overall</th>\n",
490 |        "      <th>mse-scaled</th>\n",
491 |        "      <td>0.308942</td>\n",
492 |        "      <td>0.295690</td>\n",
493 |        "      <td>0.297072</td>\n",
494 |        "      <td>0.364775</td>\n",
495 |        "      <td>0.255038</td>\n",
496 |        "    </tr>\n",
497 |        "  </tbody>\n",
498 |        "</table>\n",
499 |        "</div>"
500 |       ],
501 |       "text/plain": [
502 |        "                                              AutoARIMA  AutoARIMA/BottomUp  \\\n",
503 |        "level                             metric                                      \n",
504 |        "Country                           mse-scaled   0.317897            0.367078   \n",
505 |        "Country/Purpose                   mse-scaled   0.318950            0.233606   \n",
506 |        "Country/Purpose/State             mse-scaled   0.268057            0.281189   \n",
507 |        "Country/Purpose/State/CityNonCity mse-scaled   0.292136            0.292136   \n",
508 |        "Overall                           mse-scaled   0.308942            0.295690   \n",
509 |        "\n",
510 |        "                                              AutoARIMA/TopDown_method-forecast_proportions  \\\n",
511 |        "level                             metric                                                      \n",
512 |        "Country                           mse-scaled                                       0.317897   \n",
513 |        "Country/Purpose                   mse-scaled                                       0.262216   \n",
514 |        "Country/Purpose/State             mse-scaled                                       0.320349   \n",
515 |        "Country/Purpose/State/CityNonCity mse-scaled                                       0.323261   \n",
516 |        "Overall                           mse-scaled                                       0.297072   \n",
517 |        "\n",
518 |        "                                              AutoARIMA/TopDown_method-proportion_averages  \\\n",
519 |        "level                             metric                                                     \n",
520 |        "Country                           mse-scaled                                      0.317897   \n",
521 |        "Country/Purpose                   mse-scaled                                      0.320225   \n",
522 |        "Country/Purpose/State             mse-scaled                                      0.511356   \n",
523 |        "Country/Purpose/State/CityNonCity mse-scaled                                      0.509784   \n",
524 |        "Overall                           mse-scaled                                      0.364775   \n",
525 |        "\n",
526 |        "                                              AutoARIMA/MiddleOut_middle_level-Country/Purpose/State_top_down_method-proportion_averages  \n",
527 |        "level                             metric                                                                                                  \n",
528 |        "Country                           mse-scaled                                           0.305053                                           \n",
529 |        "Country/Purpose                   mse-scaled                                           0.196062                                           \n",
530 |        "Country/Purpose/State             mse-scaled                                           0.268057                                           \n",
531 |        "Country/Purpose/State/CityNonCity mse-scaled                                           0.280599                                           \n",
532 |        "Overall                           mse-scaled                                           0.255038                                           "
533 |       ]
534 |      },
535 |      "execution_count": null,
536 |      "metadata": {},
537 |      "output_type": "execute_result"
538 |     }
539 |    ],
540 |    "source": [
541 |     "df = Y_rec_df.merge(Y_test_df, on=['unique_id', 'ds'])\n",
542 |     "evaluation = evaluate(df = df,\n",
543 |     "                      tags = tags,\n",
544 |     "                      train_df = Y_train_df,\n",
545 |     "                      metrics = [mse],\n",
546 |     "                      benchmark=\"Naive\")\n",
547 |     "\n",
548 |     "evaluation.set_index([\"level\", \"metric\"]).filter(like=\"ARIMA\", axis=1)"
549 |    ]
550 |   },
551 |   {
552 |    "cell_type": "markdown",
553 |    "id": "a51830f5",
554 |    "metadata": {},
555 |    "source": [
556 |     "### References\n",
557 |     "- [Orcutt, G.H., Watts, H.W., & Edwards, J.B.(1968). Data aggregation and information loss. The American \n",
558 |     "Economic Review, 58 , 773(787)](http://www.jstor.org/stable/1815532).\n",
559 |     "- [Disaggregation methods to expedite product line forecasting. Journal of Forecasting, 9 , 233–254. \n",
560 |     "doi:10.1002/for.3980090304](https://onlinelibrary.wiley.com/doi/abs/10.1002/for.3980090304).<br>\n",
561 |     "- [An investigation of aggregate variable time series forecast strategies with specific subaggregate \n",
562 |     "time series statistical correlation. Computers and Operations Research, 26 , 1133–1149. \n",
563 |     "doi:10.1016/S0305-0548(99)00017-9](https://doi.org/10.1016/S0305-0548(99)00017-9).\n",
564 |     "- [Hyndman, R.J., & Athanasopoulos, G. (2021). \"Forecasting: principles and practice, 3rd edition: \n",
565 |     "Chapter 11: Forecasting hierarchical and grouped series.\". OTexts: Melbourne, Australia. OTexts.com/fpp3 \n",
566 |     "Accessed on July 2022.](https://otexts.com/fpp3/hierarchical.html)"
567 |    ]
568 |   }
569 |  ],
570 |  "metadata": {
571 |   "kernelspec": {
572 |    "display_name": "python3",
573 |    "language": "python",
574 |    "name": "python3"
575 |   }
576 |  },
577 |  "nbformat": 4,
578 |  "nbformat_minor": 5
579 | }
580 | 


--------------------------------------------------------------------------------
/nbs/examples/TourismSmallPolars.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "id": "843cf8de-d678-4243-a8af-d78439058e6a",
  7 |    "metadata": {},
  8 |    "source": [
  9 |     "# Quick Start (Polars)\n",
 10 |     "\n",
 11 |     "> Minimal Example of Hierarchical Reconciliation using Polars"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "attachments": {},
 16 |    "cell_type": "markdown",
 17 |    "id": "0a836410-7063-4766-b03c-7d22b6abe457",
 18 |    "metadata": {},
 19 |    "source": [
 20 |     "Large collections of time series organized into structures at different aggregation levels often require their forecasts to follow their aggregation constraints, which poses the challenge of creating novel algorithms capable of coherent forecasts.\n",
 21 |     "\n",
 22 |     "The `HierarchicalForecast` package provides a wide collection of Python implementations of hierarchical forecasting algorithms that follow classic hierarchical reconciliation.\n",
 23 |     "\n",
 24 |     "In this notebook we will show how to use the `StatsForecast` library to produce base forecasts, and use `HierarchicalForecast` package to perform hierarchical reconciliation."
 25 |    ]
 26 |   },
 27 |   {
 28 |    "attachments": {},
 29 |    "cell_type": "markdown",
 30 |    "id": "46e647a5",
 31 |    "metadata": {},
 32 |    "source": [
 33 |     "You can run these experiments using CPU or GPU with Google Colab.\n",
 34 |     "\n",
 35 |     "<a href=\"https://colab.research.google.com/github/Nixtla/hierarchicalforecast/blob/main/nbs/examples/TourismSmall.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "attachments": {},
 40 |    "cell_type": "markdown",
 41 |    "id": "c0dc4e6d",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "## 1. Libraries"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "id": "7f777bdd-dff4-4bc0-8529-b492874de6f0",
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "%%capture\n",
 55 |     "!pip install hierarchicalforecast statsforecast datasetsforecast"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "attachments": {},
 60 |    "cell_type": "markdown",
 61 |    "id": "6221152f",
 62 |    "metadata": {},
 63 |    "source": [
 64 |     "## 2. Load Data"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "attachments": {},
 69 |    "cell_type": "markdown",
 70 |    "id": "9809d816",
 71 |    "metadata": {},
 72 |    "source": [
 73 |     "In this example we will use the `TourismSmall` dataset. The following cell gets the time series for the different levels in the hierarchy, the summing matrix `S` which recovers the full dataset from the bottom level hierarchy and the indices of each hierarchy denoted by `tags`."
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "id": "f7a5828a-2fb4-4811-9e07-0ee291331978",
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "import numpy as np\n",
 84 |     "import polars as pl\n",
 85 |     "\n",
 86 |     "from datasetsforecast.hierarchical import HierarchicalData, HierarchicalInfo"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": null,
 92 |    "id": "c18a4300-5b8f-45b5-92ce-e52f8c4dab20",
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "group_name = 'TourismSmall'\n",
 97 |     "group = HierarchicalInfo.get_group(group_name)\n",
 98 |     "Y_df, S_df, tags = HierarchicalData.load('./data', group_name)\n",
 99 |     "\n",
100 |     "Y_df = pl.from_pandas(Y_df)\n",
101 |     "S_df = pl.from_pandas(S_df.reset_index(names=\"unique_id\"))\n",
102 |     "Y_df = Y_df.with_columns(pl.col('ds').cast(pl.Date))"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": null,
108 |    "id": "b964394e-6a79-4c75-be74-3c3994e1bf58",
109 |    "metadata": {},
110 |    "outputs": [
111 |     {
112 |      "data": {
113 |       "text/html": [
114 |        "<div><style>\n",
115 |        ".dataframe > thead > tr,\n",
116 |        ".dataframe > tbody > tr {\n",
117 |        "  text-align: right;\n",
118 |        "  white-space: pre-wrap;\n",
119 |        "}\n",
120 |        "</style>\n",
121 |        "<small>shape: (6, 6)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>unique_id</th><th>nsw-hol-city</th><th>nsw-hol-noncity</th><th>vic-hol-city</th><th>vic-hol-noncity</th><th>qld-hol-city</th></tr><tr><td>str</td><td>f64</td><td>f64</td><td>f64</td><td>f64</td><td>f64</td></tr></thead><tbody><tr><td>&quot;total&quot;</td><td>1.0</td><td>1.0</td><td>1.0</td><td>1.0</td><td>1.0</td></tr><tr><td>&quot;hol&quot;</td><td>1.0</td><td>1.0</td><td>1.0</td><td>1.0</td><td>1.0</td></tr><tr><td>&quot;vfr&quot;</td><td>0.0</td><td>0.0</td><td>0.0</td><td>0.0</td><td>0.0</td></tr><tr><td>&quot;bus&quot;</td><td>0.0</td><td>0.0</td><td>0.0</td><td>0.0</td><td>0.0</td></tr><tr><td>&quot;oth&quot;</td><td>0.0</td><td>0.0</td><td>0.0</td><td>0.0</td><td>0.0</td></tr><tr><td>&quot;nsw-hol&quot;</td><td>1.0</td><td>1.0</td><td>0.0</td><td>0.0</td><td>0.0</td></tr></tbody></table></div>"
122 |       ],
123 |       "text/plain": [
124 |        "shape: (6, 6)\n",
125 |        "┌───────────┬──────────────┬─────────────────┬──────────────┬─────────────────┬──────────────┐\n",
126 |        "│ unique_id ┆ nsw-hol-city ┆ nsw-hol-noncity ┆ vic-hol-city ┆ vic-hol-noncity ┆ qld-hol-city │\n",
127 |        "│ ---       ┆ ---          ┆ ---             ┆ ---          ┆ ---             ┆ ---          │\n",
128 |        "│ str       ┆ f64          ┆ f64             ┆ f64          ┆ f64             ┆ f64          │\n",
129 |        "╞═══════════╪══════════════╪═════════════════╪══════════════╪═════════════════╪══════════════╡\n",
130 |        "│ total     ┆ 1.0          ┆ 1.0             ┆ 1.0          ┆ 1.0             ┆ 1.0          │\n",
131 |        "│ hol       ┆ 1.0          ┆ 1.0             ┆ 1.0          ┆ 1.0             ┆ 1.0          │\n",
132 |        "│ vfr       ┆ 0.0          ┆ 0.0             ┆ 0.0          ┆ 0.0             ┆ 0.0          │\n",
133 |        "│ bus       ┆ 0.0          ┆ 0.0             ┆ 0.0          ┆ 0.0             ┆ 0.0          │\n",
134 |        "│ oth       ┆ 0.0          ┆ 0.0             ┆ 0.0          ┆ 0.0             ┆ 0.0          │\n",
135 |        "│ nsw-hol   ┆ 1.0          ┆ 1.0             ┆ 0.0          ┆ 0.0             ┆ 0.0          │\n",
136 |        "└───────────┴──────────────┴─────────────────┴──────────────┴─────────────────┴──────────────┘"
137 |       ]
138 |      },
139 |      "execution_count": null,
140 |      "metadata": {},
141 |      "output_type": "execute_result"
142 |     }
143 |    ],
144 |    "source": [
145 |     "S_df[:6, :6]"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": null,
151 |    "id": "fd32f1a5-2b60-454e-afc9-6911f84f6698",
152 |    "metadata": {},
153 |    "outputs": [
154 |     {
155 |      "data": {
156 |       "text/plain": [
157 |        "{'Country': array(['total'], dtype=object),\n",
158 |        " 'Country/Purpose': array(['hol', 'vfr', 'bus', 'oth'], dtype=object),\n",
159 |        " 'Country/Purpose/State': array(['nsw-hol', 'vic-hol', 'qld-hol', 'sa-hol', 'wa-hol', 'tas-hol',\n",
160 |        "        'nt-hol', 'nsw-vfr', 'vic-vfr', 'qld-vfr', 'sa-vfr', 'wa-vfr',\n",
161 |        "        'tas-vfr', 'nt-vfr', 'nsw-bus', 'vic-bus', 'qld-bus', 'sa-bus',\n",
162 |        "        'wa-bus', 'tas-bus', 'nt-bus', 'nsw-oth', 'vic-oth', 'qld-oth',\n",
163 |        "        'sa-oth', 'wa-oth', 'tas-oth', 'nt-oth'], dtype=object),\n",
164 |        " 'Country/Purpose/State/CityNonCity': array(['nsw-hol-city', 'nsw-hol-noncity', 'vic-hol-city',\n",
165 |        "        'vic-hol-noncity', 'qld-hol-city', 'qld-hol-noncity',\n",
166 |        "        'sa-hol-city', 'sa-hol-noncity', 'wa-hol-city', 'wa-hol-noncity',\n",
167 |        "        'tas-hol-city', 'tas-hol-noncity', 'nt-hol-city', 'nt-hol-noncity',\n",
168 |        "        'nsw-vfr-city', 'nsw-vfr-noncity', 'vic-vfr-city',\n",
169 |        "        'vic-vfr-noncity', 'qld-vfr-city', 'qld-vfr-noncity',\n",
170 |        "        'sa-vfr-city', 'sa-vfr-noncity', 'wa-vfr-city', 'wa-vfr-noncity',\n",
171 |        "        'tas-vfr-city', 'tas-vfr-noncity', 'nt-vfr-city', 'nt-vfr-noncity',\n",
172 |        "        'nsw-bus-city', 'nsw-bus-noncity', 'vic-bus-city',\n",
173 |        "        'vic-bus-noncity', 'qld-bus-city', 'qld-bus-noncity',\n",
174 |        "        'sa-bus-city', 'sa-bus-noncity', 'wa-bus-city', 'wa-bus-noncity',\n",
175 |        "        'tas-bus-city', 'tas-bus-noncity', 'nt-bus-city', 'nt-bus-noncity',\n",
176 |        "        'nsw-oth-city', 'nsw-oth-noncity', 'vic-oth-city',\n",
177 |        "        'vic-oth-noncity', 'qld-oth-city', 'qld-oth-noncity',\n",
178 |        "        'sa-oth-city', 'sa-oth-noncity', 'wa-oth-city', 'wa-oth-noncity',\n",
179 |        "        'tas-oth-city', 'tas-oth-noncity', 'nt-oth-city', 'nt-oth-noncity'],\n",
180 |        "       dtype=object)}"
181 |       ]
182 |      },
183 |      "execution_count": null,
184 |      "metadata": {},
185 |      "output_type": "execute_result"
186 |     }
187 |    ],
188 |    "source": [
189 |     "tags"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "markdown",
194 |    "id": "56a7aadb-6e2c-456a-a0b5-b29b30deadb5",
195 |    "metadata": {},
196 |    "source": [
197 |     "We split the dataframe in train/test splits."
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": null,
203 |    "id": "462451d8-2fc0-445e-9458-908811011dd9",
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": [
207 |     "Y_test_df = Y_df.group_by('unique_id').tail(group.horizon)\n",
208 |     "Y_train_df = Y_df.filter(pl.col('ds') < Y_test_df['ds'].min())"
209 |    ]
210 |   },
211 |   {
212 |    "attachments": {},
213 |    "cell_type": "markdown",
214 |    "id": "1958d4e6",
215 |    "metadata": {},
216 |    "source": [
217 |     "## 3. Base forecasts"
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "markdown",
222 |    "id": "b7cfb43a-cd16-418c-a04b-e075c176cc9e",
223 |    "metadata": {},
224 |    "source": [
225 |     "The following cell computes the *base forecast* for each time series using the `auto_arima` and `naive` models. Observe that `Y_hat_df` contains the forecasts but they are not coherent."
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": null,
231 |    "id": "ce5017ee",
232 |    "metadata": {},
233 |    "outputs": [],
234 |    "source": [
235 |     "from statsforecast.core import StatsForecast\n",
236 |     "from statsforecast.models import AutoARIMA, Naive"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": null,
242 |    "id": "f99e7b7b-f4b8-4f2f-a1a7-c8be98a1e280",
243 |    "metadata": {},
244 |    "outputs": [],
245 |    "source": [
246 |     "fcst = StatsForecast(\n",
247 |     "    models=[AutoARIMA(season_length=group.seasonality), Naive()], \n",
248 |     "    freq=\"1q\", \n",
249 |     "    n_jobs=-1\n",
250 |     ")\n",
251 |     "Y_hat_df = fcst.forecast(df=Y_train_df, h=group.horizon)"
252 |    ]
253 |   },
254 |   {
255 |    "attachments": {},
256 |    "cell_type": "markdown",
257 |    "id": "ef1c9163",
258 |    "metadata": {},
259 |    "source": [
260 |     "## 4. Hierarchical reconciliation"
261 |    ]
262 |   },
263 |   {
264 |    "attachments": {},
265 |    "cell_type": "markdown",
266 |    "id": "cc296762-2009-4aef-8b31-f24aad9d0787",
267 |    "metadata": {},
268 |    "source": [
269 |     "The following cell makes the previous forecasts coherent using the `HierarchicalReconciliation` class. The used methods to make the forecasts coherent are:\n",
270 |     "\n",
271 |     "- `BottomUp`: The reconciliation of the method is a simple addition to the upper levels.\n",
272 |     "- `TopDown`: The second method constrains the base-level predictions to the top-most aggregate-level serie and then distributes it to the disaggregate series through the use of proportions. \n",
273 |     "- `MiddleOut`: Anchors the base predictions in a middle level."
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "code",
278 |    "execution_count": null,
279 |    "id": "63ec7e26",
280 |    "metadata": {},
281 |    "outputs": [],
282 |    "source": [
283 |     "from hierarchicalforecast.core import HierarchicalReconciliation\n",
284 |     "from hierarchicalforecast.methods import BottomUp, TopDown, MiddleOut"
285 |    ]
286 |   },
287 |   {
288 |    "cell_type": "code",
289 |    "execution_count": null,
290 |    "id": "a43be9e7-99a9-4981-bfd7-8552efba9751",
291 |    "metadata": {},
292 |    "outputs": [],
293 |    "source": [
294 |     "reconcilers = [\n",
295 |     "    BottomUp(),\n",
296 |     "    TopDown(method='forecast_proportions'),\n",
297 |     "    MiddleOut(middle_level='Country/Purpose/State', \n",
298 |     "              top_down_method='forecast_proportions')\n",
299 |     "]\n",
300 |     "hrec = HierarchicalReconciliation(reconcilers=reconcilers)\n",
301 |     "Y_rec_df = hrec.reconcile(Y_hat_df=Y_hat_df, Y_df=Y_train_df, S=S_df, tags=tags)"
302 |    ]
303 |   },
304 |   {
305 |    "attachments": {},
306 |    "cell_type": "markdown",
307 |    "id": "6590a5e2",
308 |    "metadata": {},
309 |    "source": [
310 |     "## 5. Evaluation"
311 |    ]
312 |   },
313 |   {
314 |    "cell_type": "markdown",
315 |    "id": "03c4752c-53f8-4b1f-8169-32075b8e4050",
316 |    "metadata": {},
317 |    "source": [
318 |     "The `HierarchicalForecast` package includes the `evaluate` function to evaluate the different hierarchies and we can use utilsforecast to compute the mean absolute error relative to a baseline model."
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "code",
323 |    "execution_count": null,
324 |    "id": "a797f84d",
325 |    "metadata": {},
326 |    "outputs": [],
327 |    "source": [
328 |     "from hierarchicalforecast.evaluation import evaluate\n",
329 |     "from utilsforecast.losses import mse"
330 |    ]
331 |   },
332 |   {
333 |    "cell_type": "code",
334 |    "execution_count": null,
335 |    "id": "a793cff0-e6bf-469d-86d8-cf6ce7a8d922",
336 |    "metadata": {},
337 |    "outputs": [
338 |     {
339 |      "data": {
340 |       "text/html": [
341 |        "<div><style>\n",
342 |        ".dataframe > thead > tr,\n",
343 |        ".dataframe > tbody > tr {\n",
344 |        "  text-align: right;\n",
345 |        "  white-space: pre-wrap;\n",
346 |        "}\n",
347 |        "</style>\n",
348 |        "<small>shape: (5, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>level</th><th>metric</th><th>AutoARIMA</th><th>AutoARIMA/BottomUp</th><th>AutoARIMA/TopDown_method-forecast_proportions</th></tr><tr><td>str</td><td>str</td><td>f64</td><td>f64</td><td>f64</td></tr></thead><tbody><tr><td>&quot;Country&quot;</td><td>&quot;mse-scaled&quot;</td><td>0.317897</td><td>0.226999</td><td>0.317897</td></tr><tr><td>&quot;Country/Purpose&quot;</td><td>&quot;mse-scaled&quot;</td><td>0.323207</td><td>0.199359</td><td>0.251368</td></tr><tr><td>&quot;Country/Purpose/State&quot;</td><td>&quot;mse-scaled&quot;</td><td>0.266118</td><td>0.305711</td><td>0.308241</td></tr><tr><td>&quot;Country/Purpose/State/CityNonC…</td><td>&quot;mse-scaled&quot;</td><td>0.305173</td><td>0.305173</td><td>0.305913</td></tr><tr><td>&quot;Overall&quot;</td><td>&quot;mse-scaled&quot;</td><td>0.311707</td><td>0.234934</td><td>0.289406</td></tr></tbody></table></div>"
349 |       ],
350 |       "text/plain": [
351 |        "shape: (5, 5)\n",
352 |        "┌──────────────────────────┬────────────┬───────────┬────────────────────┬─────────────────────────┐\n",
353 |        "│ level                    ┆ metric     ┆ AutoARIMA ┆ AutoARIMA/BottomUp ┆ AutoARIMA/TopDown_metho │\n",
354 |        "│ ---                      ┆ ---        ┆ ---       ┆ ---                ┆ d-forec…                │\n",
355 |        "│ str                      ┆ str        ┆ f64       ┆ f64                ┆ ---                     │\n",
356 |        "│                          ┆            ┆           ┆                    ┆ f64                     │\n",
357 |        "╞══════════════════════════╪════════════╪═══════════╪════════════════════╪═════════════════════════╡\n",
358 |        "│ Country                  ┆ mse-scaled ┆ 0.317897  ┆ 0.226999           ┆ 0.317897                │\n",
359 |        "│ Country/Purpose          ┆ mse-scaled ┆ 0.323207  ┆ 0.199359           ┆ 0.251368                │\n",
360 |        "│ Country/Purpose/State    ┆ mse-scaled ┆ 0.266118  ┆ 0.305711           ┆ 0.308241                │\n",
361 |        "│ Country/Purpose/State/Ci ┆ mse-scaled ┆ 0.305173  ┆ 0.305173           ┆ 0.305913                │\n",
362 |        "│ tyNonC…                  ┆            ┆           ┆                    ┆                         │\n",
363 |        "│ Overall                  ┆ mse-scaled ┆ 0.311707  ┆ 0.234934           ┆ 0.289406                │\n",
364 |        "└──────────────────────────┴────────────┴───────────┴────────────────────┴─────────────────────────┘"
365 |       ]
366 |      },
367 |      "execution_count": null,
368 |      "metadata": {},
369 |      "output_type": "execute_result"
370 |     }
371 |    ],
372 |    "source": [
373 |     "df = Y_rec_df.join(Y_test_df, on=['unique_id', 'ds'])\n",
374 |     "evaluation = evaluate(df = df,\n",
375 |     "                      tags = tags,\n",
376 |     "                      train_df = Y_train_df,\n",
377 |     "                      metrics = [mse],\n",
378 |     "                      benchmark=\"Naive\")\n",
379 |     "\n",
380 |     "evaluation[[\"level\", \"metric\", \"AutoARIMA\", \"AutoARIMA/BottomUp\", \"AutoARIMA/TopDown_method-forecast_proportions\"]]"
381 |    ]
382 |   },
383 |   {
384 |    "cell_type": "markdown",
385 |    "id": "a51830f5",
386 |    "metadata": {},
387 |    "source": [
388 |     "### References\n",
389 |     "- [Orcutt, G.H., Watts, H.W., & Edwards, J.B.(1968). Data aggregation and information loss. The American \n",
390 |     "Economic Review, 58 , 773(787)](http://www.jstor.org/stable/1815532).\n",
391 |     "- [Disaggregation methods to expedite product line forecasting. Journal of Forecasting, 9 , 233–254. \n",
392 |     "doi:10.1002/for.3980090304](https://onlinelibrary.wiley.com/doi/abs/10.1002/for.3980090304).<br>\n",
393 |     "- [An investigation of aggregate variable time series forecast strategies with specific subaggregate \n",
394 |     "time series statistical correlation. Computers and Operations Research, 26 , 1133–1149. \n",
395 |     "doi:10.1016/S0305-0548(99)00017-9](https://doi.org/10.1016/S0305-0548(99)00017-9).\n",
396 |     "- [Hyndman, R.J., & Athanasopoulos, G. (2021). \"Forecasting: principles and practice, 3rd edition: \n",
397 |     "Chapter 11: Forecasting hierarchical and grouped series.\". OTexts: Melbourne, Australia. OTexts.com/fpp3 \n",
398 |     "Accessed on July 2022.](https://otexts.com/fpp3/hierarchical.html)"
399 |    ]
400 |   }
401 |  ],
402 |  "metadata": {
403 |   "kernelspec": {
404 |    "display_name": "python3",
405 |    "language": "python",
406 |    "name": "python3"
407 |   }
408 |  },
409 |  "nbformat": 4,
410 |  "nbformat_minor": 5
411 | }
412 | 


--------------------------------------------------------------------------------
/nbs/examples/imgs/AustralianDomesticTourism-results-fable.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/hierarchicalforecast/162e42f4143201d44fdd0480306dd4cc19776038/nbs/examples/imgs/AustralianDomesticTourism-results-fable.png


--------------------------------------------------------------------------------
/nbs/examples/imgs/AustralianPrisonPopulation-results-fable.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/hierarchicalforecast/162e42f4143201d44fdd0480306dd4cc19776038/nbs/examples/imgs/AustralianPrisonPopulation-results-fable.png


--------------------------------------------------------------------------------
/nbs/examples/imgs/hierarchical_motivation1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/hierarchicalforecast/162e42f4143201d44fdd0480306dd4cc19776038/nbs/examples/imgs/hierarchical_motivation1.png


--------------------------------------------------------------------------------
/nbs/examples/imgs/hierarchical_motivation2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/hierarchicalforecast/162e42f4143201d44fdd0480306dd4cc19776038/nbs/examples/imgs/hierarchical_motivation2.png


--------------------------------------------------------------------------------
/nbs/examples/index.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | order: 1
 3 | title: Tutorials
 4 | listing:
 5 |   fields: [title]
 6 |   type: table
 7 |   sort-ui: false
 8 |   filter-ui: false
 9 | ---
10 | 
11 | Click through to any of these tutorials to get started with `HierarchicalForecast`'s features.
12 | 


--------------------------------------------------------------------------------
/nbs/favicon_png.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/hierarchicalforecast/162e42f4143201d44fdd0480306dd4cc19776038/nbs/favicon_png.png


--------------------------------------------------------------------------------
/nbs/index.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "id": "018f6145-b103-4f3e-b4cd-0aab4d8bbdb7",
  7 |    "metadata": {},
  8 |    "source": [
  9 |     "# Hierarchical Forecast 👑"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "id": "112249bc-95d7-42bd-9eb9-b6bb7dda5b20",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "Large collections of time series organized into structures at different aggregation levels often require their forecasts to follow their aggregation constraints, which poses the challenge of creating novel algorithms capable of coherent forecasts.\n",
 18 |     "\n",
 19 |     "**HierarchicalForecast** offers a collection of cross-sectional and temporal reconciliation methods, including `BottomUp`, `TopDown`, `MiddleOut`, `MinTrace` and `ERM`, as well as probabilistic coherent predictions including `Normality`, `Bootstrap`, and `PERMBU`."
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "id": "295fee6a-1ca0-47c5-81f3-5a7e52afc426",
 25 |    "metadata": {},
 26 |    "source": [
 27 |     "## 🎊 Features \n",
 28 |     "\n",
 29 |     "* Classic reconciliation methods:\n",
 30 |     "    - `BottomUp`: Simple addition to the upper levels.\n",
 31 |     "    - `TopDown`: Distributes the top levels forecasts trough the hierarchies.\n",
 32 |     "* Alternative reconciliation methods:\n",
 33 |     "    - `MiddleOut`: It anchors the base predictions in a middle level. The levels above the base predictions use the bottom-up approach, while the levels below use a top-down.\n",
 34 |     "    - `MinTrace`: Minimizes the total forecast variance of the space of coherent forecasts, with the Minimum Trace reconciliation.\n",
 35 |     "    - `ERM`: Optimizes the reconciliation matrix minimizing an L1 regularized objective.\n",
 36 |     "* Probabilistic coherent methods:\n",
 37 |     "    - `Normality`: Uses MinTrace variance-covariance closed form matrix under a normality assumption.\n",
 38 |     "    - `Bootstrap`: Generates distribution of hierarchically reconciled predictions using Gamakumara's bootstrap approach.\n",
 39 |     "    - `PERMBU`: Reconciles independent sample predictions by reinjecting multivariate dependence with estimated rank permutation copulas, and performing a Bottom-Up aggregation.\n",
 40 |     "* Temporal reconciliation methods:\n",
 41 |     "    - All reconciliation methods (except for the insample methods) are available to use with temporal hierarchies too.\n",
 42 |     "\n",
 43 |     "Missing something? Please open an issue here or write us in [![Slack](https://img.shields.io/badge/Slack-4A154B?&logo=slack&logoColor=white)](https://join.slack.com/t/nixtlaworkspace/shared_invite/zt-135dssye9-fWTzMpv2WBthq8NK0Yvu6A)\n"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "markdown",
 48 |    "id": "11131f77-4f6c-4232-9780-31afe1b9a034",
 49 |    "metadata": {},
 50 |    "source": [
 51 |     "## 📖 Why? \n",
 52 |     "\n",
 53 |     "**Short**: We want to contribute to the ML field by providing reliable baselines and benchmarks for hierarchical forecasting task in industry and academia. Here's the complete [paper](https://arxiv.org/abs/2207.03517).\n",
 54 |     "\n",
 55 |     "**Verbose**: `HierarchicalForecast` integrates publicly available processed datasets, evaluation metrics, and a curated set of statistical baselines. In this library we provide usage examples and references to extensive experiments where we showcase the baseline's use and evaluate the accuracy of their predictions. With this work, we hope to contribute to Machine Learning forecasting by bridging the gap to statistical and econometric modeling, as well as providing tools for the development of novel hierarchical forecasting algorithms rooted in a thorough comparison of these well-established models. We intend to continue maintaining and increasing the repository, promoting collaboration across the forecasting community."
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "markdown",
 60 |    "id": "2e3f7a11-d151-4662-a3b7-46f945141996",
 61 |    "metadata": {},
 62 |    "source": [
 63 |     "## 💻 Installation\n",
 64 |     "\n",
 65 |     "### PyPI\n",
 66 |     "\n",
 67 |     "You can install the *released version* of `HierarchicalForecast` from the [Python package index](https://pypi.org) with:\n",
 68 |     "\n",
 69 |     "```python\n",
 70 |     "pip install hierarchicalforecast\n",
 71 |     "```\n",
 72 |     "\n",
 73 |     "(Installing inside a python virtualenvironment or a conda environment is recommended.)\n",
 74 |     "\n",
 75 |     "### Conda\n",
 76 |     "\n",
 77 |     "Also you can install the *released version* of `HierarchicalForecast` from [conda](https://anaconda.org) with:\n",
 78 |     "\n",
 79 |     "```python\n",
 80 |     "conda install -c conda-forge hierarchicalforecast\n",
 81 |     "```\n",
 82 |     "\n",
 83 |     "(Installing inside a python virtualenvironment or a conda environment is recommended.)\n",
 84 |     "\n",
 85 |     "### Dev Mode\n",
 86 |     "\n",
 87 |     "If you want to make some modifications to the code and see the effects in real time (without reinstalling), follow the steps below:\n",
 88 |     "\n",
 89 |     "```bash\n",
 90 |     "git clone https://github.com/Nixtla/hierarchicalforecast.git\n",
 91 |     "cd hierarchicalforecast\n",
 92 |     "pip install -e .\n",
 93 |     "```"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "markdown",
 98 |    "id": "5dd31c3e-ffb6-4364-acf6-e4646fd693a9",
 99 |    "metadata": {},
100 |    "source": [
101 |     "## 🧬 How to use\n",
102 |     "\n",
103 |     "The following example needs `statsforecast` and `datasetsforecast` as additional packages. If not installed, install it via your preferred method, e.g. `pip install statsforecast datasetsforecast`.\n",
104 |     "The `datasetsforecast` library allows us to download hierarhical datasets and we will use `statsforecast` to compute base forecasts to be reconciled.\n",
105 |     "\n",
106 |     "You can open this example in Colab [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/nixtla/hierarchicalforecast/blob/main/nbs/examples/TourismSmall.ipynb)\n",
107 |     "\n",
108 |     "```python\n",
109 |     "import pandas as pd\n",
110 |     "\n",
111 |     "#obtain hierarchical dataset\n",
112 |     "from datasetsforecast.hierarchical import HierarchicalData\n",
113 |     "\n",
114 |     "# compute base forecast no coherent\n",
115 |     "from statsforecast.core import StatsForecast\n",
116 |     "from statsforecast.models import AutoARIMA, Naive\n",
117 |     "\n",
118 |     "#obtain hierarchical reconciliation methods and evaluation\n",
119 |     "from hierarchicalforecast.core import HierarchicalReconciliation\n",
120 |     "from hierarchicalforecast.evaluation import evaluate\n",
121 |     "from hierarchicalforecast.methods import BottomUp, TopDown, MiddleOut\n",
122 |     "from utilsforecast.losses import mse\n",
123 |     "\n",
124 |     "# Load TourismSmall dataset\n",
125 |     "Y_df, S, tags = HierarchicalData.load('./data', 'TourismSmall')\n",
126 |     "Y_df['ds'] = pd.to_datetime(Y_df['ds'])\n",
127 |     "S = S.reset_index(names=\"unique_id\")\n",
128 |     "\n",
129 |     "#split train/test sets\n",
130 |     "Y_test_df  = Y_df.groupby('unique_id').tail(4)\n",
131 |     "Y_train_df = Y_df.drop(Y_test_df.index)\n",
132 |     "\n",
133 |     "# Compute base auto-ARIMA predictions\n",
134 |     "fcst = StatsForecast(models=[AutoARIMA(season_length=4), Naive()],\n",
135 |     "                     freq='QE', n_jobs=-1)\n",
136 |     "Y_hat_df = fcst.forecast(df=Y_train_df, h=4)\n",
137 |     "\n",
138 |     "# Reconcile the base predictions\n",
139 |     "reconcilers = [\n",
140 |     "    BottomUp(),\n",
141 |     "    TopDown(method='forecast_proportions'),\n",
142 |     "    MiddleOut(middle_level='Country/Purpose/State',\n",
143 |     "              top_down_method='forecast_proportions')\n",
144 |     "]\n",
145 |     "hrec = HierarchicalReconciliation(reconcilers=reconcilers)\n",
146 |     "Y_rec_df = hrec.reconcile(Y_hat_df=Y_hat_df, Y_df=Y_train_df,\n",
147 |     "                          S=S, tags=tags)\n",
148 |     "```"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "markdown",
153 |    "id": "ac517335-d4fa-4d56-9cfa-a19782280aa4",
154 |    "metadata": {},
155 |    "source": [
156 |     "### Evaluation\n",
157 |     "\n",
158 |     "```python\n",
159 |     "df = Y_rec_df.merge(Y_test_df, on=['unique_id', 'ds'], how='left')\n",
160 |     "\n",
161 |     "evaluate(df=df, metrics=[mse],\n",
162 |     "                   tags=tags, benchmark='Naive')\n",
163 |     "```"
164 |    ]
165 |   },
166 |   {
167 |    "attachments": {},
168 |    "cell_type": "markdown",
169 |    "id": "16dc2940-a4e0-486c-bac5-403f9084d6ac",
170 |    "metadata": {},
171 |    "source": [
172 |     "##  How to cite\n",
173 |     "\n",
174 |     "Here's the complete [paper](https://arxiv.org/abs/2207.03517).\n",
175 |     "\n",
176 |     "```bibtex\n",
177 |     "@article{olivares2022hierarchicalforecast,\n",
178 |     "    author    = {Kin G. Olivares and\n",
179 |     "                 Federico Garza and \n",
180 |     "                 David Luo and \n",
181 |     "                 Cristian Challú and\n",
182 |     "                 Max Mergenthaler and\n",
183 |     "                 Souhaib Ben Taieb and\n",
184 |     "                 Shanika L. Wickramasuriya and\n",
185 |     "                 Artur Dubrawski},\n",
186 |     "    title     = {{HierarchicalForecast}: A Reference Framework for Hierarchical Forecasting in Python},\n",
187 |     "    journal   = {Work in progress paper, submitted to Journal of Machine Learning Research.},\n",
188 |     "    volume    = {abs/2207.03517},\n",
189 |     "    year      = {2022},\n",
190 |     "    url       = {https://arxiv.org/abs/2207.03517},\n",
191 |     "    archivePrefix = {arXiv}\n",
192 |     "}\n",
193 |     "```"
194 |    ]
195 |   }
196 |  ],
197 |  "metadata": {
198 |   "kernelspec": {
199 |    "display_name": "python3",
200 |    "language": "python",
201 |    "name": "python3"
202 |   }
203 |  },
204 |  "nbformat": 4,
205 |  "nbformat_minor": 5
206 | }
207 | 


--------------------------------------------------------------------------------
/nbs/mint.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://mintlify.com/schema.json",
 3 |   "name": "Nixtla",
 4 |   "logo": {
 5 |     "light": "/light.png",
 6 |     "dark": "/dark.png"
 7 |   },
 8 |   "favicon": "/favicon.svg",
 9 |   "colors": {
10 |     "primary": "#0E0E0E",
11 |     "light": "#FAFAFA",
12 |     "dark": "#0E0E0E",
13 |     "anchors": {
14 |       "from": "#2AD0CA",
15 |       "to": "#0E00F8"
16 |     }
17 |   },
18 |   "topbarCtaButton": {
19 |     "type": "github",
20 |     "url": "https://github.com/Nixtla/hierarchicalforecast"
21 |   },
22 |   "topAnchor": {
23 |     "name": "HierarchicalForecast",
24 |     "icon": "crown"
25 |   },
26 |   "navigation": [
27 |     {
28 |       "group": "",
29 |       "pages": ["index.html"]
30 |     },
31 |     {
32 |       "group": "Getting Started",
33 |       "pages": [
34 |         "examples/installation.html",
35 |         "examples/tourismsmall.html",
36 |         "examples/tourismsmallpolars.html",
37 |         "examples/introduction.html"
38 |       ]
39 |     },
40 |     {
41 |       "group": "Tutorials",
42 |       "pages": [
43 |         {
44 |           "group": "Point Reconciliation",
45 |           "pages": [
46 |             "examples/australiandomestictourism.html",
47 |             "examples/australianprisonpopulation.html",
48 |             "examples/nonnegativereconciliation.html"
49 |           ]
50 |         },
51 |         {
52 |           "group": "Probabilistic Reconciliation",
53 |           "pages": [
54 |             "examples/australiandomestictourism-intervals.html",
55 |             "examples/australiandomestictourism-bootstraped-intervals.html",
56 |             "examples/australiandomestictourism-permbu-intervals.html",
57 |             "examples/tourismlarge-evaluation.html"
58 |           ]
59 |         },
60 |         {
61 |           "group": "Temporal Reconciliation",
62 |           "pages": [
63 |             "examples/australiandomestictourismtemporal.html",
64 |             "examples/australiandomestictourismcrosstemporal.html",
65 |             "examples/m3withthief.html",
66 |             "examples/localglobalaggregation.html"              
67 |           ]
68 |         },        
69 |         "examples/mlframeworksexample.html"
70 |       ]
71 |     },
72 |     {
73 |       "group": "API Reference",
74 |       "pages": [
75 |         "src/core.html",
76 |         "src/methods.html",
77 |         "src/probabilistic_methods.html",
78 |         "src/evaluation.html",
79 |         "src/utils.html"
80 |       ]
81 |     }
82 |   ]
83 | }
84 | 


--------------------------------------------------------------------------------
/nbs/nbdev.yml:
--------------------------------------------------------------------------------
 1 | project:
 2 |   output-dir: _docs
 3 | 
 4 | website:
 5 |   title: "hierarchicalforecast"
 6 |   site-url: "https://Nixtla.github.io/hierarchicalforecast/"
 7 |   description: "Hierarchical Methods Time series forecasting"
 8 |   repo-branch: main
 9 |   repo-url: "https://github.com/Nixtla/hierarchicalforecast/"
10 | 


--------------------------------------------------------------------------------
/nbs/sidebar.yml:
--------------------------------------------------------------------------------
 1 | website:
 2 |   sidebar:
 3 |     collapse-level: 1
 4 |     contents:
 5 |       - index.ipynb
 6 |       - text: "--"
 7 |       - section: "Getting Started"
 8 |         contents: 
 9 |         - examples/Installation.ipynb
10 |         - examples/TourismSmall.ipynb
11 |         - examples/TourismSmallPolars.ipynb
12 |         - examples/Introduction.ipynb
13 |       - section: Tutorials
14 |         contents:
15 |         - section: Point Reconciliation
16 |           contents:
17 |           - examples/AustralianDomesticTourism.ipynb
18 |           - examples/AustralianPrisonPopulation.ipynb
19 |           - examples/NonNegativeReconciliation.ipynb
20 |         - section: Probabilistic Reconciliation
21 |           contents:
22 |           - examples/AustralianDomesticTourism-Intervals.ipynb
23 |           - examples/AustralianDomesticTourism-Bootstraped-Intervals.ipynb
24 |           - examples/AustralianDomesticTourism-Permbu-Intervals.ipynb
25 |           - examples/TourismLarge-Evaluation.ipynb
26 |         - section: ML Forecast Reconciliation
27 |           contents:
28 |           - examples/MLFrameworksExample.ipynb
29 |       - section: "API Reference"
30 |         contents: src/*
31 |       - section: Community
32 |         contents:
33 |           - Contributing


--------------------------------------------------------------------------------
/nbs/styles.css:
--------------------------------------------------------------------------------
 1 | .cell {
 2 |   margin-bottom: 1rem;
 3 | }
 4 | 
 5 | .cell > .sourceCode {
 6 |   margin-bottom: 0;
 7 | }
 8 | 
 9 | .cell-output > pre {
10 |   margin-bottom: 0;
11 | }
12 | 
13 | .cell-output > pre, .cell-output > .sourceCode > pre, .cell-output-stdout > pre {
14 |   margin-left: 0.8rem;
15 |   margin-top: 0;
16 |   background: none;
17 |   border-left: 2px solid lightsalmon;
18 |   border-top-left-radius: 0;
19 |   border-top-right-radius: 0;
20 | }
21 | 
22 | .cell-output > .sourceCode {
23 |   border: none;
24 | }
25 | 
26 | .cell-output > .sourceCode {
27 |   background: none;
28 |   margin-top: 0;
29 | }
30 | 
31 | div.description {
32 |   padding-left: 2px;
33 |   padding-top: 5px;
34 |   font-style: italic;
35 |   font-size: 135%;
36 |   opacity: 70%;
37 | }
38 | 
39 | /* show_doc signature */
40 | blockquote > pre {
41 |   font-size: 14px;
42 | }
43 | 
44 | .table {
45 |   font-size: 16px;
46 |   /* disable striped tables */
47 |   --bs-table-striped-bg: var(--bs-table-bg);
48 | }
49 | 
50 | .quarto-figure-center > figure > figcaption {
51 |   text-align: center;
52 | }
53 | 
54 | .figure-caption {
55 |   font-size: 75%;
56 |   font-style: italic;
57 | }


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.ruff.lint]
2 | select = [
3 |     "F",  # pyflakes
4 | ]
5 | 


--------------------------------------------------------------------------------
/settings.ini:
--------------------------------------------------------------------------------
 1 | [DEFAULT]
 2 | host = github
 3 | lib_name = hierarchicalforecast
 4 | user = Nixtla
 5 | description = Hierarchical Methods Time series forecasting
 6 | keywords = time-series forecasting datasets hierarchical
 7 | author = Nixtla
 8 | author_email = business@nixtla.io
 9 | copyright = Nixtla Inc.
10 | branch = main
11 | version = 1.2.1
12 | min_python = 3.9
13 | audience = Developers
14 | language = English
15 | custom_sidebar = True
16 | license = apache2
17 | status = 2
18 | requirements = numpy, numba, pandas>=2.1.0, scikit-learn>=1.2, quadprog, clarabel, matplotlib, narwhals>=1.27.0, utilsforecast>=0.2.12, intel-cmplr-lib-rt ; platform_system!="Darwin" and platform_machine=="x86_64"
19 | dev_requirements = datasetsforecast ipython<=8.32.0 nbdev statsforecast>=1.0.0 requests scipy pre-commit ruff black pytest pytest-benchmark
20 | polars_requirements = polars[numpy]
21 | nbs_path = nbs
22 | doc_path = _docs
23 | recursive = True
24 | doc_host =  https://%(user)s.github.io
25 | doc_baseurl = /%(lib_name)s/
26 | git_url = https://github.com/%(user)s/hierarchicalforecast/
27 | lib_path = %(lib_name)s
28 | title = %(lib_name)s
29 | black_formatting = True
30 | jupyter_hooks = True
31 | clean_ids = True
32 | readme_nb = index.ipynb
33 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from pkg_resources import parse_version
 2 | from configparser import ConfigParser
 3 | import setuptools
 4 | assert parse_version(setuptools.__version__)>=parse_version('36.2')
 5 | 
 6 | # note: all settings are in settings.ini; edit there, not here
 7 | config = ConfigParser(delimiters=['='])
 8 | config.read('settings.ini')
 9 | cfg = config['DEFAULT']
10 | 
11 | cfg_keys = 'version description keywords author author_email'.split()
12 | expected = cfg_keys + "lib_name user branch license status min_python audience language".split()
13 | for o in expected: assert o in cfg, "missing expected setting: {}".format(o)
14 | setup_cfg = {o:cfg[o] for o in cfg_keys}
15 | 
16 | licenses = {
17 |     'apache2': ('Apache Software License 2.0','OSI Approved :: Apache Software License'),
18 |     'mit': ('MIT License', 'OSI Approved :: MIT License'),
19 |     'gpl2': ('GNU General Public License v2', 'OSI Approved :: GNU General Public License v2 (GPLv2)'),
20 |     'gpl3': ('GNU General Public License v3', 'OSI Approved :: GNU General Public License v3 (GPLv3)'),
21 |     'bsd3': ('BSD License', 'OSI Approved :: BSD License'),
22 | }
23 | statuses = [ '1 - Planning', '2 - Pre-Alpha', '3 - Alpha',
24 |     '4 - Beta', '5 - Production/Stable', '6 - Mature', '7 - Inactive' ]
25 | py_versions = '3.9 3.10 3.11 3.12'.split()
26 | 
27 | requirements = cfg['requirements'].split(',')
28 | if cfg.get('pip_requirements'): requirements += cfg.get('pip_requirements','').split()
29 | min_python = cfg['min_python']
30 | lic = licenses.get(cfg['license'].lower(), (cfg['license'], None))
31 | dev_requirements = (cfg.get('dev_requirements') or '').split()
32 | polars_requirements = (cfg.get('polars_requirements') or '').split()
33 | dev_requirements.extend(polars_requirements)
34 | 
35 | setuptools.setup(
36 |     name = 'hierarchicalforecast',
37 |     license = lic[0],
38 |     classifiers = [
39 |         'Development Status :: ' + statuses[int(cfg['status'])],
40 |         'Intended Audience :: ' + cfg['audience'].title(),
41 |         'Natural Language :: ' + cfg['language'].title(),
42 |     ] + ['Programming Language :: Python :: '+o for o in py_versions[py_versions.index(min_python):]] + (['License :: ' + lic[1] ] if lic[1] else []),
43 |     url = cfg['git_url'],
44 |     packages = setuptools.find_packages(),
45 |     include_package_data = True,
46 |     install_requires = requirements,
47 |     extras_require={'dev': dev_requirements,
48 |                     'polars': polars_requirements, 
49 |                       },
50 |     dependency_links = cfg.get('dep_links','').split(),
51 |     python_requires  = '>=' + cfg['min_python'],
52 |     long_description = open('README.md', encoding='utf8').read(),
53 |     long_description_content_type = 'text/markdown',
54 |     zip_safe = False,
55 |     entry_points = {
56 |         'console_scripts': cfg.get('console_scripts','').split(),
57 |         'nbdev': [f'{cfg.get("lib_path")}={cfg.get("lib_path")}._modidx:d']
58 |     },
59 |     **setup_cfg)
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/hierarchicalforecast/162e42f4143201d44fdd0480306dd4cc19776038/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_benchmark.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import pytest
  4 | from scipy import sparse
  5 | from hierarchicalforecast.methods import MinTrace, ERM, BottomUp
  6 | from hierarchicalforecast.utils import _ma_cov
  7 | from statsforecast.core import StatsForecast
  8 | from statsforecast.models import AutoETS
  9 | 
 10 | from hierarchicalforecast.utils import aggregate
 11 | from hierarchicalforecast.core import HierarchicalReconciliation
 12 | 
 13 | import pytest_benchmark # noqa: F401
 14 | 
 15 | #%% MinT benchmarks
 16 | # run using: pytest tests\test_benchmark.py -v -s --benchmark-min-rounds=20
 17 | def _create_reconciler_inputs(n_bottom_timeseries):
 18 |     # Create random hierarchy
 19 |     h = 100
 20 |     insample = 1000
 21 |     max_levels_random = 5
 22 |     max_categories_per_random_level = 10
 23 |     rng = np.random.default_rng(0)
 24 |     ones = np.ones(n_bottom_timeseries, dtype=np.float64)
 25 |     idx_range = np.arange(n_bottom_timeseries)
 26 |     n_levels_random = rng.integers(1, max_levels_random + 1)
 27 |     S_aggs_list = []
 28 |     for _ in range(n_levels_random):
 29 |         n_categories_per_level = rng.integers(2, max_categories_per_random_level + 1)
 30 |         codes = rng.integers(0, n_categories_per_level, size=(n_bottom_timeseries, ))
 31 |         S_agg = sparse.csr_matrix((ones, (codes, idx_range)))
 32 |         S_aggs_list.append(S_agg)
 33 |     S_aggs = sparse.vstack(S_aggs_list)
 34 |     # Create top and bottom level
 35 |     S_top = sparse.csr_matrix(ones, dtype=np.float64)
 36 |     S_bottom = sparse.eye(n_bottom_timeseries, dtype=np.float64, format="csr")
 37 |     # Construct S: stack top, aggregations and bottom 
 38 |     S_sp = sparse.vstack([S_top, S_aggs, S_bottom])
 39 | 
 40 |     y_hat_bottom = np.vstack([i * np.ones(h, dtype=np.float64) for i in range(n_bottom_timeseries)])
 41 |     y_hat_bottom_insample = np.vstack([i * np.ones(insample, dtype=np.float64) for i in range(n_bottom_timeseries)])
 42 |     y_bottom = y_hat_bottom_insample + rng.normal(size=(n_bottom_timeseries, insample))
 43 | 
 44 |     S = S_sp.toarray()
 45 |     y_insample = S @ y_bottom
 46 |     y_hat_insample = S @ y_hat_bottom_insample
 47 |     idx_bottom = np.arange(start=S.shape[0] - n_bottom_timeseries, stop=S.shape[0])
 48 |     y_hat=S @ y_hat_bottom
 49 | 
 50 |     return S, y_hat, y_insample, y_hat_insample, idx_bottom
 51 | 
 52 | @pytest.mark.parametrize("n_bottom_timeseries", [20])
 53 | @pytest.mark.parametrize("with_nans", (False, True))
 54 | def test_mint(benchmark, n_bottom_timeseries, with_nans):
 55 |     S, y_hat, y_insample, y_hat_insample, idx_bottom = _create_reconciler_inputs(n_bottom_timeseries)
 56 |     if with_nans:
 57 |         y_insample[-1, :-1] = np.nan
 58 |         y_hat_insample[-1, :-1] = np.nan
 59 | 
 60 |     cls_min_trace = MinTrace(method='mint_shrink')
 61 |     result_min_trace = benchmark(cls_min_trace, S=S, y_hat=y_hat, y_insample=y_insample, y_hat_insample=y_hat_insample, idx_bottom=idx_bottom) # noqa: F841
 62 | 
 63 | @pytest.mark.parametrize("n_bottom_timeseries", [20])
 64 | @pytest.mark.parametrize("with_nans", (False, True))
 65 | def test_cov(benchmark, n_bottom_timeseries, with_nans):
 66 |     S, y_hat, y_insample, y_hat_insample, idx_bottom = _create_reconciler_inputs(n_bottom_timeseries)
 67 |     if with_nans:
 68 |         y_insample[-1, :-1] = np.nan
 69 |         y_hat_insample[-1, :-1] = np.nan
 70 | 
 71 |     residuals = (y_insample - y_hat_insample)
 72 |     nan_mask = np.isnan(residuals)
 73 | 
 74 |     result = benchmark(_ma_cov, residuals, ~nan_mask) # noqa: F841
 75 | 
 76 | @pytest.mark.parametrize("n_bottom_timeseries", [10])
 77 | @pytest.mark.parametrize("erm_method", ['reg', 'reg_bu'])
 78 | def test_erm_reg(benchmark, n_bottom_timeseries, erm_method):
 79 |     S, y_hat, y_insample, y_hat_insample, idx_bottom = _create_reconciler_inputs(n_bottom_timeseries)
 80 | 
 81 |     cls_erm = ERM(method=erm_method)
 82 |     result_erm = benchmark(cls_erm, S=S, y_hat=y_hat, y_insample=y_insample, y_hat_insample=y_hat_insample, idx_bottom=idx_bottom) # noqa: F841   
 83 | 
 84 | @pytest.fixture
 85 | def load_tourism():
 86 |     df = pd.read_csv('https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/tourism.csv')
 87 |     df = df.rename({'Trips': 'y', 'Quarter': 'ds'}, axis=1)
 88 |     df.insert(0, 'Country', 'Australia')
 89 |     return df    
 90 | 
 91 | # run with: pytest tests\test_benchmark.py::test_reconciler -v -s --benchmark-min-rounds=20 --disable-warnings
 92 | @pytest.mark.parametrize("reconciler", [MinTrace(method='mint_shrink'), BottomUp()])
 93 | def test_reconciler(benchmark, reconciler, load_tourism):
 94 | 
 95 |     # Load TourismSmall dataset
 96 |     df = load_tourism
 97 | 
 98 |     # Create hierarchical seires based on geographic levels and purpose
 99 |     # And Convert quarterly ds string to pd.datetime format
100 |     hierarchy_levels = [['Country'],
101 |                         ['Country', 'State'], 
102 |                         ['Country', 'State', 'Region'], 
103 |                         ['Country', 'State', 'Region', 'Purpose']]
104 | 
105 |     Y_df, S_df, tags = aggregate(df=df, spec=hierarchy_levels)
106 |     qs = Y_df['ds'].str.replace(r'(\d+) (Q\d)', r'\1-\2', regex=True)
107 |     Y_df['ds'] = pd.PeriodIndex(qs, freq='Q').to_timestamp()
108 | 
109 |     # Split train/test sets
110 |     Y_test_df  = Y_df.groupby('unique_id').tail(8)
111 |     Y_train_df = Y_df.drop(Y_test_df.index)
112 | 
113 |     # Compute base auto-ETS predictions
114 |     # Careful identifying correct data freq, this data quarterly 'Q'
115 |     fcst = StatsForecast(models=[AutoETS(season_length=4, model='ZZA')], freq='QS', n_jobs=-1)
116 |     Y_hat_df = fcst.forecast(df=Y_train_df, h=8, fitted=True).reset_index()
117 |     Y_fitted_df = fcst.forecast_fitted_values().reset_index()
118 | 
119 |     reconcilers = [reconciler]
120 |     hrec = HierarchicalReconciliation(reconcilers=reconcilers)
121 | 
122 |     result = benchmark(hrec.reconcile, Y_hat_df=Y_hat_df, Y_df=Y_fitted_df, S=S_df, tags=tags)    # noqa: F841


--------------------------------------------------------------------------------