├── .devcontainer
    └── devcontainer.json
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug-report.yml
    │   ├── config.yml
    │   ├── documentation-issue.yml
    │   └── feature-request.yml
    ├── dependabot.yml
    ├── release-drafter.yml
    └── workflows
    │   ├── build-docs.yaml
    │   ├── ci.yaml
    │   ├── deploy-readme.yaml
    │   ├── lint.yaml
    │   ├── models-performance.yaml
    │   ├── no-response.yaml
    │   ├── python-publish.yml
    │   ├── release-drafter.yml
    │   └── test-publish.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CHANGELOG.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── Makefile
├── QuickStart.gif
├── QuickstartTGPT.mp4
├── README.md
├── action_files
    ├── __init__.py
    ├── comment_file.py
    ├── models_performance
    │   ├── experiments.yaml
    │   └── main.py
    └── readme_com
    │   ├── create_readme_docs.sh
    │   ├── create_sdk_reference.py
    │   └── modify_markdown.py
├── experiments
    ├── amazon-chronos
    │   ├── README.md
    │   ├── environment.yml
    │   └── src
    │   │   ├── amazon_chronos
    │   │       ├── forecaster.py
    │   │       └── pipeline.py
    │   │   ├── main.py
    │   │   ├── statsforecast_pipeline.py
    │   │   └── utils.py
    ├── azure-automl-forecasting
    │   ├── .env.example
    │   ├── Makefile
    │   ├── README.md
    │   ├── requirements.txt
    │   └── src
    │   │   ├── azure_automl
    │   │       ├── __init__.py
    │   │       ├── automl_handler.py
    │   │       ├── download_forecasts.py
    │   │       └── forecasting.py
    │   │   ├── evaluation.py
    │   │   ├── nixtla_timegpt.py
    │   │   ├── statsforecast_sn.py
    │   │   └── utils
    │   │       ├── data_handler.py
    │   │       ├── download_data.py
    │   │       └── filter_data.py
    ├── efficiency
    │   ├── README.md
    │   ├── main.py
    │   └── requirements.txt
    ├── foundation-time-series-arena
    │   ├── .env.example
    │   ├── Makefile
    │   ├── README.md
    │   ├── requirements.txt
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── test_arena.py
    │   │   ├── test_eval.py
    │   │   ├── test_models.py
    │   │   └── utils.py
    │   └── xiuhmolpilli
    │   │   ├── __init__.py
    │   │   ├── arena.py
    │   │   ├── models
    │   │       ├── __init__.py
    │   │       ├── benchmarks
    │   │       │   ├── __init__.py
    │   │       │   ├── ml.py
    │   │       │   ├── neural.py
    │   │       │   ├── prophet.py
    │   │       │   └── stats.py
    │   │       ├── foundational
    │   │       │   ├── __init__.py
    │   │       │   ├── chronos.py
    │   │       │   ├── lagllama.py
    │   │       │   ├── moirai.py
    │   │       │   ├── timegpt.py
    │   │       │   └── timesfm.py
    │   │       └── utils
    │   │       │   ├── __init__.py
    │   │       │   ├── forecaster.py
    │   │       │   ├── gluonts_forecaster.py
    │   │       │   └── parallel_forecaster.py
    │   │   └── utils
    │   │       ├── download_data.py
    │   │       ├── experiment_handler.py
    │   │       ├── filter_data.py
    │   │       └── logger_config.py
    ├── lag-llama
    │   ├── Makefile
    │   ├── README.md
    │   ├── environment.yml
    │   └── src
    │   │   ├── lag_llama_pipeline.py
    │   │   ├── main.py
    │   │   ├── statsforecast_pipeline.py
    │   │   └── utils.py
    ├── one-billion
    │   ├── README.md
    │   ├── main.py
    │   └── requirements.txt
    ├── prophet
    │   ├── .env.example
    │   ├── Makefile
    │   ├── README.md
    │   ├── environment.yml
    │   └── src
    │   │   ├── prophet_exp.py
    │   │   ├── results_summary.py
    │   │   ├── statsforecast_exp.py
    │   │   ├── timegpt_exp.py
    │   │   ├── tools.py
    │   │   └── utils.py
    ├── salesforce-moirai
    │   ├── README.md
    │   ├── environment.yml
    │   └── src
    │   │   ├── main.py
    │   │   ├── moirai_pipeline.py
    │   │   ├── statsforecast_pipeline.py
    │   │   └── utils.py
    └── vn1-competition
    │   ├── Makefile
    │   ├── README.md
    │   ├── src
    │       ├── __init__.py
    │       ├── functions.R
    │       ├── main.R
    │       └── main.py
    │   └── tests
    │       ├── __init__.py
    │       └── test_scores.py
├── nbs
    ├── _quarto.yml
    ├── assets
    │   ├── Inter-VariableFont.ttf
    │   ├── M5_categorical_variables_example.parquet
    │   ├── M5_what_if_pricing_example.parquet
    │   ├── arima_rst.csv
    │   ├── forecast_synthetic_data.mp4
    │   ├── lgbm_rst.csv
    │   ├── logo.png
    │   ├── long_horizon_example_Y_df.parquet
    │   └── nhits_rst.csv
    ├── docs
    │   ├── capabilities
    │   │   ├── 000_capabilities.ipynb
    │   │   ├── forecast
    │   │   │   ├── 00_forecast.ipynb
    │   │   │   ├── 01_quickstart.ipynb
    │   │   │   ├── 02_exogenous_variables.ipynb
    │   │   │   ├── 03_holidays_special_dates.ipynb
    │   │   │   ├── 04_categorical_variables.ipynb
    │   │   │   ├── 05_longhorizon.ipynb
    │   │   │   ├── 06_multiple_series.ipynb
    │   │   │   ├── 07_finetuning.ipynb
    │   │   │   ├── 08_custom_loss_function.ipynb
    │   │   │   ├── 09_cross_validation.ipynb
    │   │   │   ├── 10_prediction_intervals.ipynb
    │   │   │   └── 11_irregular_timestamps.ipynb
    │   │   ├── historical-anomaly-detection
    │   │   │   ├── 00_historical_anomaly_detection.ipynb
    │   │   │   ├── 01_quickstart.ipynb
    │   │   │   ├── 02_anomaly_exogenous.ipynb
    │   │   │   ├── 03_anomaly_detection_date_features.ipynb
    │   │   │   └── 04_confidence_levels.ipynb
    │   │   └── online-anomaly-detection
    │   │   │   ├── 00_online_anomaly_detection.ipynb
    │   │   │   ├── 01_quickstart.ipynb
    │   │   │   ├── 02_adjusting_detection_process.ipynb
    │   │   │   └── 03_univariate_vs_multivariate_anomaly_detection.ipynb
    │   ├── deployment
    │   │   └── 2_azure_ai.ipynb
    │   ├── getting-started
    │   │   ├── 1_introduction.ipynb
    │   │   ├── 21_polars_quickstart.ipynb
    │   │   ├── 22_azure_quickstart.ipynb
    │   │   ├── 2_quickstart.ipynb
    │   │   ├── 3_setting_up_your_api_key.ipynb
    │   │   ├── 41_pricing.ipynb
    │   │   ├── 4_data_requirements.ipynb
    │   │   ├── 5_faq.ipynb
    │   │   ├── 6_glossary.ipynb
    │   │   └── 7_why_timegpt.ipynb
    │   ├── reference
    │   │   ├── 01_nixtla_client.ipynb
    │   │   ├── 02_date_features.ipynb
    │   │   ├── 03_excel_addin.ipynb
    │   │   └── 04_nixtlar.ipynb
    │   ├── tutorials
    │   │   ├── 01_exogenous_variables.ipynb
    │   │   ├── 02_holidays.ipynb
    │   │   ├── 03_categorical_variables.ipynb
    │   │   ├── 04_longhorizon.ipynb
    │   │   ├── 050_training.ipynb
    │   │   ├── 05_multiple_series.ipynb
    │   │   ├── 061_reusing_finetuned_models.ipynb
    │   │   ├── 06_finetuning.ipynb
    │   │   ├── 07_loss_function_finetuning.ipynb
    │   │   ├── 080_validation.ipynb
    │   │   ├── 08_cross_validation.ipynb
    │   │   ├── 09_historical_forecast.ipynb
    │   │   ├── 100_uncertainty_quantification.ipynb
    │   │   ├── 10_uncertainty_quantification_with_quantile_forecasts.ipynb
    │   │   ├── 11_uncertainty_quantification_with_prediction_intervals.ipynb
    │   │   ├── 120_special_topics.ipynb
    │   │   ├── 13_bounded_forecasts.ipynb
    │   │   ├── 14_hierarchical_forecasting.ipynb
    │   │   ├── 15_missing_values.ipynb
    │   │   ├── 16_computing_at_scale.ipynb
    │   │   ├── 17_computing_at_scale_spark_distributed.ipynb
    │   │   ├── 18_computing_at_scale_dask_distributed.ipynb
    │   │   ├── 19_computing_at_scale_ray_distributed.ipynb
    │   │   ├── 20_anomaly_detection.ipynb
    │   │   ├── 21_shap_values.ipynb
    │   │   ├── 22_how_to_improve_forecast_accuracy.ipynb
    │   │   ├── 23_finetune_depth_finetuning.ipynb
    │   │   └── 23_temporalhierarchical.ipynb
    │   └── use-cases
    │   │   ├── 1_forecasting_web_traffic.ipynb
    │   │   ├── 2_bitcoin_price_prediction.ipynb
    │   │   ├── 3_electricity_demand.ipynb
    │   │   ├── 4_intermittent_demand.ipynb
    │   │   └── 5_what_if_pricing_scenarios_in_retail.ipynb
    ├── favicon_png.png
    ├── img
    │   ├── ApiRefScreen.png
    │   ├── anomaly.png
    │   ├── api_key_process.png
    │   ├── australia_hierarchy.png
    │   ├── australia_tourism.png
    │   ├── azure-deploy.png
    │   ├── azure-endpoint.png
    │   ├── azure-model-catalog.png
    │   ├── dashboard.png
    │   ├── forecast.png
    │   ├── forecast_readme.png
    │   ├── logo_nixtlar.png
    │   ├── results.jpg
    │   ├── timegpt-arch.png
    │   ├── timegpt_archi.png
    │   └── timeseries_model_arena.png
    ├── mint.json
    ├── nbdev.yml
    ├── sidebar.yml
    ├── src
    │   ├── date_features.ipynb
    │   ├── nixtla_client.ipynb
    │   └── utils.ipynb
    └── styles.css
├── nixtla
    ├── __init__.py
    ├── _modidx.py
    ├── date_features.py
    ├── nixtla_client.py
    ├── py.typed
    └── utils.py
├── pyproject.toml
├── settings.ini
└── setup.py


/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "Nixtla Development Environment",
 3 |     "image": "mcr.microsoft.com/vscode/devcontainers/python:3.11",
 4 |     "hostRequirements": {
 5 |         "cpus": 4,
 6 |         "memory": "16gb",
 7 |         "storage": "32gb"
 8 |     },
 9 |     "customizations": {
10 |         "vscode": {
11 |             "settings": {
12 |                 "terminal.integrated.shell.linux": "/bin/bash",
13 |                 "python.terminal.activateEnvInCurrentTerminal": true,
14 |                 "python.defaultInterpreterPath": ".venv/bin/python",
15 |                 "python.pythonPath": ".venv/bin/python",
16 |                 "[python]": {
17 |                     "editor.defaultFormatter": "charliermarsh.ruff",
18 |                     "editor.formatOnSave": true,
19 |                     "editor.codeActionsOnSave": {
20 |                         "source.fixAll": "explicit",
21 |                         "source.organizeImports": "explicit"
22 |                     }
23 |                 },
24 |                 "notebook.python.defaultInterpreterPath": ".venv/bin/python",
25 |                 "notebook.codeActionsOnSave": {
26 |                     "source.fixAll": "explicit",
27 |                     "source.organizeImports": "explicit"
28 |                 },
29 |                 "ruff.nativeServer": "on",
30 |                 "python.languageServer": "Default",
31 |                 "debug.internalConsoleOptions": "neverOpen",
32 |                 "extensions.ignoreRecommendations": true,
33 |                 "files.insertFinalNewline": true
34 |             },
35 |             "extensions": [
36 |                 "ms-python.python",
37 |                 "ms-python.mypy",
38 |                 "ms-python.vscode-pylance",
39 |                 "ms-toolsai.jupyter@2025.2.0",
40 |                 "charliermarsh.ruff@2025.22.0",
41 |                 "GitHub.copilot",
42 |                 "tamasfe.even-better-toml"
43 |             ]
44 |         }
45 |     },
46 |     "forwardPorts": [
47 |         8888
48 |     ],
49 |     "onCreateCommand": "make devenv",
50 |     "postCreateCommand": "uv pip install -Ue .[dev,distributed]",
51 |     "waitFor": "postCreateCommand",
52 |     "features": {
53 |         "ghcr.io/devcontainers/features/docker-in-docker:2.12.2": {},
54 |         "ghcr.io/va-h/devcontainers-features/uv:1": {}
55 |     }
56 | }
57 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report.yml:
--------------------------------------------------------------------------------
 1 | name: Bug report
 2 | title: "[<Library component: Model|Core|etc...>] "
 3 | description: Problems and issues with code of the library
 4 | labels: [bug]
 5 | body:
 6 |   - type: markdown
 7 |     attributes:
 8 |       value: |
 9 |         Thank you for reporting the problem..
10 |         Please make sure what you are reporting is a bug with reproducible steps. To ask questions
11 |         or share ideas, please post on our [Slack community](https://join.slack.com/t/nixtlacommunity/shared_invite/zt-1h77esh5y-iL1m8N0F7qV1HmH~0KYeAQ) instead.
12 | 
13 |   - type: textarea
14 |     attributes:
15 |       label: What happened + What you expected to happen
16 |       description: Describe 1. the bug 2. expected behavior 3. useful information (e.g., logs)
17 |       placeholder: >
18 |         Please provide the context in which the problem occurred and explain what happened. Further,
19 |         please also explain why you think the behaviour is erroneous. It is extremely helpful if you can
20 |         copy and paste the fragment of logs showing the exact error messages or wrong behaviour here.
21 | 
22 |         **NOTE**: please copy and paste texts instead of taking screenshots of them for easy future search.
23 |     validations:
24 |       required: true
25 | 
26 |   - type: textarea
27 |     attributes:
28 |       label: Versions / Dependencies
29 |       description: Please specify the versions of the library, Python, OS, and other libraries that are used.
30 |       value: |
31 |         <details><summary>Click to expand</summary>
32 |         Dependencies:
33 | 
34 |         </details>
35 |     validations:
36 |       required: true
37 | 
38 |   - type: textarea
39 |     attributes:
40 |       label: Reproducible example
41 |       description: >
42 |         Please provide a reproducible script. Providing a simple way to reproduce the issue
43 |         (minimal / no external dependencies) will help us triage and address issues in the timely manner!
44 |       value: |
45 |         ```python
46 |         # paste your code here
47 |         ```
48 |     validations:
49 |       required: true
50 | 
51 |   - type: dropdown
52 |     attributes:
53 |       label: Issue Severity
54 |       description: |
55 |         How does this issue affect your experience as user?
56 |       multiple: false
57 |       options:
58 |         - "Low: It annoys or frustrates me."
59 |         - "Medium: It is a significant difficulty but I can work around it."
60 |         - "High: It blocks me from completing my task."
61 |     validations:
62 |       required: false
63 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: true
2 | contact_links:
3 |   - name: Ask a question or get support
4 |     url: https://join.slack.com/t/nixtlacommunity/shared_invite/zt-1h77esh5y-iL1m8N0F7qV1HmH~0KYeAQ
5 |     about: Ask a question or request support for using a library of the nixtlaverse
6 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/documentation-issue.yml:
--------------------------------------------------------------------------------
 1 | name: Documentation
 2 | title: "[<Library component: Models|Core|etc...>] "
 3 | description: Report an issue with the library documentation
 4 | labels: [documentation]
 5 | body:
 6 |   - type: markdown
 7 |     attributes:
 8 |       value: Thank you for helping us improve the library documentation!
 9 | 
10 |   - type: textarea
11 |     attributes:
12 |       label: Description
13 |       description: |
14 |         Tell us about the change you'd like to see. For example, "I'd like to
15 |         see more examples of how to use `cross_validation`."
16 |     validations:
17 |       required: true
18 | 
19 |   - type: textarea
20 |     attributes:
21 |       label: Link
22 |       description: |
23 |         If the problem is related to an existing section, please add a link to
24 |         the section. 
25 |     validations:
26 |       required: false
27 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-request.yml:
--------------------------------------------------------------------------------
 1 | name: Library feature request
 2 | description: Suggest an idea for a project
 3 | title: "[<Library component: Models|Core|etc...>] "
 4 | labels: [enhancement, feature]
 5 | body:
 6 |   - type: markdown
 7 |     attributes:
 8 |       value: |
 9 |         Thank you for finding the time to propose a new feature!
10 |         We really appreciate the community efforts to improve the nixtlaverse.
11 | 
12 |   - type: textarea
13 |     attributes:
14 |       label: Description
15 |       description: A short description of your feature
16 | 
17 |   - type: textarea
18 |     attributes:
19 |       label: Use case
20 |       description: >
21 |         Describe the use case of your feature request. It will help us understand and
22 |         prioritize the feature request.
23 |       placeholder: >
24 |         Rather than telling us how you might implement this feature, try to take a
25 |         step back and describe what you are trying to achieve.
26 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   - package-ecosystem: github-actions
 4 |     directory: /
 5 |     schedule:
 6 |       interval: weekly
 7 |     groups:
 8 |       ci-dependencies:
 9 |         patterns: ["*"]
10 | 


--------------------------------------------------------------------------------
/.github/release-drafter.yml:
--------------------------------------------------------------------------------
 1 | name-template: 'v$NEXT_PATCH_VERSION'
 2 | tag-template: 'v$NEXT_PATCH_VERSION'
 3 | categories:
 4 |   - title: 'New Features'
 5 |     label: 'feature'
 6 |   - title: 'Breaking Change'
 7 |     label: 'breaking change'
 8 |   - title: 'Bug Fixes'
 9 |     label: 'fix'
10 |   - title: 'Documentation'
11 |     label: 'documentation'
12 |   - title: 'Dependencies'
13 |     label: 'dependencies'
14 |   - title: 'Enhancement'
15 |     label: 'enhancement'
16 | change-template: '- $TITLE @$AUTHOR (#$NUMBER)'
17 | template: |
18 |   $CHANGES
19 | 


--------------------------------------------------------------------------------
/.github/workflows/build-docs.yaml:
--------------------------------------------------------------------------------
 1 | name: "build-docs"
 2 | on:
 3 |   release:
 4 |     types: [released]
 5 |   pull_request:
 6 |     branches: ["main"]
 7 |   workflow_dispatch:
 8 | 
 9 | defaults:
10 |   run:
11 |     shell: bash
12 | 
13 | jobs:
14 |   build-docs:
15 |     runs-on: ubuntu-latest
16 |     steps:
17 |       - name: Clone repo
18 |         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
19 |       - name: Clone docs repo
20 |         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
21 |         with:
22 |           repository: Nixtla/docs
23 |           ref: scripts
24 |           path: docs-scripts
25 |       - uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # 5.4.0
26 |         with:
27 |           cache: "pip"
28 |           python-version: "3.10"
29 |           cache-dependency-path: settings.ini
30 |       - name: Build docs
31 |         run: |
32 |           set -ux
33 |           python -m pip install --upgrade pip
34 |           pip install -Uq nbdev nbdev_plotly
35 |           pip install -e ".[dev,distributed]"
36 |           mkdir nbs/_extensions
37 |           cp -r docs-scripts/mintlify/ nbs/_extensions/
38 |           python docs-scripts/update-quarto.py
39 |           echo "procs = nbdev_plotly.plotly:PlotlyProc" >> settings.ini
40 |           nbdev_docs
41 |       - name: Apply final formats
42 |         run: bash ./docs-scripts/docs-final-formatting.bash
43 |       - name: Copy over necessary assets
44 |         run: |
45 |           cp nbs/mint.json _docs/mint.json
46 |           cp docs-scripts/imgs/* _docs/
47 |       - name: Deploy to Mintlify Docs
48 |         if: | 
49 |           github.event_name == 'release' || 
50 |           github.event_name == 'workflow_dispatch'
51 |         uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0
52 |         with:
53 |           github_token: ${{ secrets.GITHUB_TOKEN }}
54 |           publish_branch: docs
55 |           publish_dir: ./_docs
56 |           user_name: github-actions[bot]
57 |           user_email: 41898282+github-actions[bot]@users.noreply.github.com
58 |       - name: Trigger mintlify workflow
59 |         if: | 
60 |           github.event_name == 'release' ||
61 |           github.event_name == 'workflow_dispatch'
62 |         uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
63 |         with:
64 |           github-token: ${{ secrets.DOCS_WORKFLOW_TOKEN }}
65 |           script: |
66 |             await github.rest.actions.createWorkflowDispatch({
67 |               owner: 'nixtla',
68 |               repo: 'docs',
69 |               workflow_id: 'mintlify-action.yml',
70 |               ref: 'main',
71 |             });
72 |       - name: Configure redirects for gh-pages
73 |         run: python docs-scripts/configure-redirects.py nixtla
74 |       - name: Deploy to Github Pages
75 |         if: | 
76 |           github.event_name == 'release' ||
77 |           github.event_name == 'workflow_dispatch'
78 |         uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0
79 |         with:
80 |           github_token: ${{ secrets.GITHUB_TOKEN }}
81 |           publish_branch: gh-pages
82 |           publish_dir: ./gh-pages
83 |           user_name: github-actions[bot]
84 |           user_email: 41898282+github-actions[bot]@users.noreply.github.com
85 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yaml:
--------------------------------------------------------------------------------
  1 | name: CI
  2 | 
  3 | on:
  4 |   push:
  5 |     branches: [main]
  6 |   pull_request:
  7 |     branches: [main]
  8 | 
  9 | concurrency:
 10 |   group: ${{ github.workflow }}-${{ github.ref }}
 11 |   cancel-in-progress: true
 12 | 
 13 | env:
 14 |   NIXTLA_API_KEY: ${{ secrets.NIXTLA_DEV_API_KEY }}
 15 |   NIXTLA_BASE_URL: ${{ secrets.NIXTLA_DEV_BASE_URL }}
 16 |   NIXTLA_API_KEY_CUSTOM: ${{ secrets.NIXTLA_API_KEY_CUSTOM }}
 17 |   NIXTLA_BASE_URL_CUSTOM: ${{ secrets.NIXTLA_BASE_URL_CUSTOM }}
 18 |   API_KEY_FRED: ${{ secrets.API_KEY_FRED }}
 19 | 
 20 | jobs:
 21 |   check-import:
 22 |     runs-on: ubuntu-latest
 23 |     strategy:
 24 |       fail-fast: false
 25 |       matrix:
 26 |         python-version: ["3.9", "3.10"]
 27 |     steps:
 28 |       - name: Clone repo
 29 |         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 30 | 
 31 |       - name: Set up python
 32 |         uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # 5.4.0
 33 |         with:
 34 |           python-version: ${{ matrix.python-version }}
 35 | 
 36 |       - name: Install nixtla
 37 |         run: pip install uv && uv pip install --system .
 38 | 
 39 |       - name: Check import
 40 |         run: python -c "from nixtla import NixtlaClient"
 41 | 
 42 |   run-all-tests:
 43 |     runs-on: nixtla-linux-large-public
 44 |     timeout-minutes: 60
 45 |     strategy:
 46 |       fail-fast: false
 47 |       matrix:
 48 |         python-version: ["3.9", "3.10"]
 49 |     steps:
 50 |       - name: Clone repo
 51 |         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 52 | 
 53 |       - name: Set up python
 54 |         uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # 5.4.0
 55 |         with:
 56 |           python-version: ${{ matrix.python-version }}
 57 | 
 58 |       - name: Install pip requirements
 59 |         run: pip install uv && uv pip install --system ".[dev,distributed]"
 60 | 
 61 |       - name: Run tests
 62 |         run: nbdev_test --timing --do_print --n_workers 0 --flags 'distributed'
 63 | 
 64 |   run-local-tests:
 65 |     runs-on: ${{ matrix.os }}
 66 |     timeout-minutes: 60
 67 |     strategy:
 68 |       fail-fast: false
 69 |       matrix:
 70 |         os: [macos-13, windows-latest]
 71 |         python-version: ["3.9", "3.10"]
 72 |     steps:
 73 |       - name: Clone repo
 74 |         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 75 | 
 76 |       - name: Set up python
 77 |         uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # 5.4.0
 78 |         with:
 79 |           python-version: ${{ matrix.python-version }}
 80 | 
 81 |       - name: Install pip requirements
 82 |         run: pip install uv && uv pip install --system ".[dev]"
 83 | 
 84 |       - name: Run tests
 85 |         run: nbdev_test --timing --do_print --n_workers 0 --skip_file_re "computing_at_scale|distributed"
 86 | 
 87 |   run-minimal-tests:
 88 |     runs-on: ${{ matrix.os }}
 89 |     timeout-minutes: 60
 90 |     strategy:
 91 |       fail-fast: false
 92 |       matrix:
 93 |         os: [macos-13, macos-14, ubuntu-latest, windows-latest]
 94 |         python-version: ["3.9", "3.13"]
 95 |     steps:
 96 |       - name: Clone repo
 97 |         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 98 | 
 99 |       - name: Set up python
100 |         uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # 5.4.0
101 |         with:
102 |           python-version: ${{ matrix.python-version }}
103 | 
104 |       - name: Install pip requirements
105 |         run: pip install uv && uv pip install --system . "ipython<=8.32.0" matplotlib nbdev python-dotenv
106 | 
107 |       - name: Run tests
108 |         run: nbdev_test --n_workers 0 --path nbs/docs/getting-started/2_quickstart.ipynb
109 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy-readme.yaml:
--------------------------------------------------------------------------------
 1 | name: Deploy to readme dot com
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [published]
 6 |   workflow_dispatch:
 7 | 
 8 | defaults:
 9 |   run:
10 |     shell: bash -l {0}
11 | 
12 | concurrency:
13 |   group: ${{ github.workflow }}-${{ github.ref }}
14 |   cancel-in-progress: true
15 | 
16 | jobs:
17 |   deploy-readme:
18 |     runs-on: ubuntu-latest
19 |     env:
20 |       readme_version: "0.0.2"
21 |     steps:
22 |       - name: Clone repo
23 |         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
24 |         with:
25 |           persist-credentials: false
26 | 
27 |       - name: Set up python
28 |         uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # 5.4.0
29 |         with:
30 |           python-version: "3.10"
31 | 
32 |       - name: Install pip requirements
33 |         run: pip install ".[dev]"
34 | 
35 |       - name: Install Quarto
36 |         run: nbdev_install_quarto
37 | 
38 |       - name: Create readme docs
39 |         env:
40 |           README_HOST_URL: ${{ secrets.README_HOST_URL }}
41 |           README_CATEGORY: ${{ secrets.README_CATEGORY }}
42 |           README_API_KEY: ${{ secrets.README_API_KEY }}
43 |           README_VERSION: ${{ env.readme_version }}
44 |         run: ./action_files/readme_com/create_readme_docs.sh
45 | 
46 |       - name: Push PNGs to readme_docs branch
47 |         run: |
48 |           git config --global user.name 'FedericoGarza'
49 |           git config --global user.email 'fede.garza.ramirez@gmail.com'
50 |           git push https://${{ secrets.TOKEN_GITHUB }}@github.com/${{ github.repository }} --delete readme_docs || true
51 |           git checkout -b readme_docs
52 |           git add -f "*.png"
53 |           git commit -m "[cd] update png images" || echo "No changes to commit"
54 |           git push https://${{ secrets.TOKEN_GITHUB }}@github.com/${{ github.repository }} HEAD:readme_docs
55 | 
56 |       - name: Deploy to readme com
57 |         uses: readmeio/rdme@51a80867c45de15e2b41af0c4bd5bbc61b932804 # 8.6.6
58 |         with:
59 |           rdme: docs ./nbs/_docs/docs/ --key=${{ secrets.README_API_KEY }} --version=${{ env.readme_version }}
60 | 


--------------------------------------------------------------------------------
/.github/workflows/lint.yaml:
--------------------------------------------------------------------------------
 1 | name: Lint
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 | jobs:
10 |   lint:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - name: Clone repo
14 |         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
15 | 
16 |       - name: Set up python
17 |         uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # 5.4.0
18 |         with:
19 |           python-version: '3.10'
20 | 
21 |       - name: Install dependencies
22 |         run: pip install black nbdev pre-commit
23 | 
24 |       - name: Run pre-commit
25 |         run: pre-commit run --show-diff-on-failure --files nixtla/*
26 | 


--------------------------------------------------------------------------------
/.github/workflows/models-performance.yaml:
--------------------------------------------------------------------------------
 1 | name: Models Performance
 2 | 
 3 | permissions: write-all
 4 | 
 5 | on:
 6 |   pull_request:
 7 |     types: [opened, synchronize, reopened]
 8 | 
 9 | defaults:
10 |   run:
11 |     shell: bash -l {0}
12 | 
13 | concurrency:
14 |   group: ${{ github.workflow }}-${{ github.ref }}
15 |   cancel-in-progress: true
16 | 
17 | jobs:
18 |   run-models-performance:
19 |     runs-on: ubuntu-latest
20 |     env:
21 |       NIXTLA_API_KEY: ${{ secrets.NIXTLA_DEV_API_KEY }}
22 |       NIXTLA_BASE_URL: ${{ secrets.NIXTLA_DEV_BASE_URL }}
23 |       PLOTS_REPO_URL: https://github.com/Nixtla/nixtla/blob/docs-figs-model-performance
24 |     steps:
25 |       - name: Clone repo
26 |         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
27 | 
28 |       - name: Set up python
29 |         uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # 5.4.0
30 |         with:
31 |           python-version: "3.10"
32 | 
33 |       - name: Install pip requirements
34 |         run: pip install uv && uv pip install --system '.[dev]'
35 | 
36 |       - name: Run evaluation
37 |         run: python -m action_files.models_performance.main
38 | 
39 |       - name: Upload results to the PR
40 |         if: github.event_name == 'pull_request'
41 |         env:
42 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
43 |           PR_NUMBER: ${{ github.event.pull_request.number }}
44 |         run: >
45 |           python action_files/comment_file.py
46 |           --search_term "Experiment Results"
47 |           --file "action_files/models_performance/summary.md"
48 | 
49 |       - name: Upload images to new branch main
50 |         run: |
51 |           git config --global user.email azul@nixtla.io
52 |           git config --global user.name AzulGarza
53 |           git push https://$GITHUB_TOKEN@github.com/nixtla/nixtla.git --delete docs-figs-model-performance || true
54 |           git checkout -b docs-figs-model-performance
55 |           git add -f "*.png"
56 |           git commit -m "[cd] update png images" || echo "No changes to commit"
57 |           git push https://$GITHUB_TOKEN@github.com/nixtla/nixtla.git HEAD:docs-figs-model-performance
58 | 


--------------------------------------------------------------------------------
/.github/workflows/no-response.yaml:
--------------------------------------------------------------------------------
 1 | name: No Response Bot
 2 | 
 3 | on:
 4 |   issue_comment:
 5 |     types: [created]
 6 |   schedule:
 7 |     - cron: '0 4 * * *'
 8 | 
 9 | jobs:
10 |   noResponse:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - uses: lee-dohm/no-response@9bb0a4b5e6a45046f00353d5de7d90fb8bd773bb # v0.5.0
14 |         with:
15 |           closeComment: >
16 |             This issue has been automatically closed because it has been awaiting a response for too long.
17 |             When you have time to to work with the maintainers to resolve this issue, please post a new comment and it will be re-opened.
18 |             If the issue has been locked for editing by the time you return to it, please open a new issue and reference this one.
19 |           daysUntilClose: 30
20 |           responseRequiredLabel: awaiting response
21 |           token: ${{ github.token }}
22 | 


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | name: Upload Python Packages
 2 | 
 3 | on:
 4 |   push:
 5 |     tags: ['v*']
 6 | 
 7 | jobs:
 8 |   deploy:
 9 |     runs-on: ubuntu-latest
10 |     permissions:
11 |       id-token: write
12 |     steps:
13 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
14 | 
15 |       - name: Set up Python
16 |         uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # 5.4.0
17 |         with:
18 |           python-version: '3.10'
19 | 
20 |       - name: Install dependencies
21 |         run: python -m pip install --upgrade pip && pip install build
22 | 
23 |       - name: Build nixtla package
24 |         run: python -m build
25 | 
26 |       - name: Publish nixtla package
27 |         uses: pypa/gh-action-pypi-publish@76f52bc884231f62b9a034ebfe128415bbaabdfc # v1.12.4
28 | 
29 | 


--------------------------------------------------------------------------------
/.github/workflows/release-drafter.yml:
--------------------------------------------------------------------------------
 1 | name: Release Drafter
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | permissions:
 9 |   contents: read
10 | 
11 | jobs:
12 |   update_release_draft:
13 |     permissions:
14 |       contents: write
15 |       pull-requests: read
16 |     runs-on: ubuntu-latest
17 |     steps:
18 |       - uses: release-drafter/release-drafter@b1476f6e6eb133afa41ed8589daba6dc69b4d3f5 # v6.1.0
19 |         env:
20 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
21 | 


--------------------------------------------------------------------------------
/.github/workflows/test-publish.yml:
--------------------------------------------------------------------------------
 1 | name: Upload Python Packages to TestPyPI
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 | 
 6 | jobs:
 7 |   deploy:
 8 |     runs-on: ubuntu-latest
 9 |     permissions:
10 |       id-token: write
11 |     steps:
12 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
13 | 
14 |       - name: Set up Python
15 |         uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # 5.4.0
16 |         with:
17 |           python-version: '3.10'
18 | 
19 |       - name: Install dependencies
20 |         run: python -m pip install --upgrade pip && pip install build
21 | 
22 |       - name: Build nixtla package
23 |         run: python -m build
24 | 
25 |       - name: Publish nixtla package
26 |         uses: pypa/gh-action-pypi-publish@76f52bc884231f62b9a034ebfe128415bbaabdfc # v1.12.4
27 |         with:
28 |           repository-url: https://test.pypi.org/legacy/
29 | 
30 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .ipynb_checkpoints
 2 | __pycache__
 3 | *.egg-info
 4 | Gemfile*
 5 | Gemfile.lock
 6 | docs/_site
 7 | build
 8 | dist
 9 | .vscode
10 | .idea
11 | *.gif
12 | *.csv
13 | */data/*
14 | *.parquet
15 | *.tar.gz
16 | tmp
17 | _docs/
18 | _proc/
19 | .DS_Store
20 | .gitattributes
21 | .gitconfig
22 | nbs/.last_checked
23 | .venv
24 | .idea
25 | .env
26 | */summary.md
27 | */*.png
28 | longhorizon
29 | data
30 | *.rda
31 | nbs/_extensions
32 | !nbs/assets/*
33 | 
34 | # VSCode
35 | *.code-workspace
36 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: local
 3 |     hooks:
 4 |       - id: nbdev_clean
 5 |         name: Clean notebooks
 6 |         entry: sh -c 'nbdev_clean && nbdev_clean --fname nbs/src --clear_all'
 7 |         language: system
 8 | 
 9 |   - repo: local
10 |     hooks:
11 |       - id: nbdev_export
12 |         name: nbdev_export
13 |         entry: sh -c 'nbdev_export'
14 |         language: system
15 | 
16 |   - repo: https://github.com/astral-sh/ruff-pre-commit
17 |     rev: v0.2.1
18 |     hooks:
19 |       - id: ruff
20 |         files: 'nixtla'
21 | 
22 |   - repo: https://github.com/pre-commit/mirrors-mypy
23 |     rev: v1.10.1
24 |     hooks:
25 |       - id: mypy
26 |         args: [--ignore-missing-imports]
27 |         files: 'nixtla'
28 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include settings.ini
2 | include LICENSE
3 | include CONTRIBUTING.md
4 | include README.md
5 | recursive-exclude * __pycache__
6 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | devenv:
 2 | 	uv venv
 3 | 	. .venv/bin/activate; uv pip install -Ue .[dev,distributed]
 4 | 	. .venv/bin/activate; pre-commit install
 5 | 	. .venv/bin/activate; nbdev_install_hooks
 6 | 
 7 | 
 8 | jupyter:
 9 | 	mkdir -p tmp
10 | 	jupyter lab --port=8888 --ip=0.0.0.0 --no-browser --allow-root --NotebookApp.token='' --NotebookApp.password='' --NotebookApp.allow_origin='*'
11 | 


--------------------------------------------------------------------------------
/QuickStart.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/QuickStart.gif


--------------------------------------------------------------------------------
/QuickstartTGPT.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/QuickstartTGPT.mp4


--------------------------------------------------------------------------------
/action_files/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/action_files/__init__.py


--------------------------------------------------------------------------------
/action_files/comment_file.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import fire
 4 | import requests
 5 | 
 6 | token = os.environ["GITHUB_TOKEN"]
 7 | pr_number = os.environ["PR_NUMBER"]
 8 | headers = {
 9 |     "Authorization": f"token {token}",
10 |     "Accept": "application/vnd.github.v3+json",
11 | }
12 | base_url = "https://api.github.com/repos/Nixtla/nixtla/issues"
13 | 
14 | 
15 | def get_comments():
16 |     resp = requests.get(f"{base_url}/{pr_number}/comments", headers=headers)
17 |     if resp.status_code != 200:
18 |         raise RuntimeError(resp.text)
19 |     return resp.json()
20 | 
21 | 
22 | def upsert_comment(body: str, comment_id: str | None):
23 |     data = {"body": body}
24 |     if comment_id is None:
25 |         resp = requests.post(
26 |             f"{base_url}/{pr_number}/comments", json=data, headers=headers
27 |         )
28 |     else:
29 |         resp = requests.patch(
30 |             f"{base_url}/comments/{comment_id}", json=data, headers=headers
31 |         )
32 |     return resp
33 | 
34 | 
35 | def main(search_term: str, file: str):
36 |     comments = get_comments()
37 |     existing_comment = [
38 |         c for c in comments if search_term in c["body"] and c["user"]["type"] == "Bot"
39 |     ]
40 |     if existing_comment:
41 |         comment_id = existing_comment[0]["id"]
42 |     else:
43 |         comment_id = None
44 |     with open(file, "rt") as f:
45 |         summary = f.read()
46 |     resp = upsert_comment(summary, comment_id)
47 |     if resp.status_code not in (200, 201, 202):
48 |         raise RuntimeError(f"{resp.status_code}: {resp.text}")
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     fire.Fire(main)
53 | 


--------------------------------------------------------------------------------
/action_files/models_performance/experiments.yaml:
--------------------------------------------------------------------------------
 1 | experiments:
 2 | 
 3 |   - air-passengers:
 4 |     - dataset_url: https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/air_passengers.csv
 5 |     - time_col: timestamp
 6 |     - target_col: value
 7 |     - season_length: 12 # for benchmarks
 8 |     - freq:
 9 |       - MS
10 |     - h:
11 |       - 12
12 |       - 24
13 | 
14 |   - electricity-multiple-series:
15 |     - dataset_url: https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/ercot_multiple_ts.csv
16 |     - season_length: 24 # for benchmarks
17 |     - time_col: timestamp
18 |     - target_col: value
19 |     - freq:
20 |       - H
21 |     - h:
22 |       - 24
23 |       - 168
24 |       - 336
25 | 


--------------------------------------------------------------------------------
/action_files/readme_com/create_readme_docs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | BASE_DIR="nbs/docs/"
 4 | SUB_DIRS=("getting-started" "capabilities" "deployment" "tutorials" "use-cases" "reference")
 5 | 
 6 | counter=0
 7 | for sub_dir in "${SUB_DIRS[@]}"; do
 8 |     DIR="$BASE_DIR$sub_dir/"
 9 |     if [[ -d "$DIR" ]]; then
10 | 	while read -r ipynb_file; do
11 | 	    echo $counter
12 |             md_file="${ipynb_file%.ipynb}.md"
13 |             md_file="${md_file/docs/_docs/docs}"
14 |             quarto render "$ipynb_file" --to md  --wrap=none
15 |             python -m action_files.readme_com.modify_markdown --file_path "$md_file" --slug_number "$counter"
16 | 	    ((counter++))
17 | 	done < <(find "$DIR" -type f -name "*.ipynb" -not -path "*/.ipynb_checkpoints/*" | sort)
18 |     else
19 |         echo "Directory $DIR does not exist."
20 |     fi
21 | done
22 | 
23 | # process changelog
24 | echo $counter
25 | file_changelog="./nbs/_docs/docs/CHANGELOG.md"
26 | cp ./CHANGELOG.md ${file_changelog} 
27 | python -m action_files.readme_com.modify_markdown --file_path "$file_changelog" --slug_number "$counter" 
28 | 


--------------------------------------------------------------------------------
/action_files/readme_com/create_sdk_reference.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | 
 4 | import fire
 5 | from dotenv import load_dotenv
 6 | 
 7 | load_dotenv()
 8 | 
 9 | 
10 | def create_sdk_reference(
11 |     save_dir,
12 |     slug_number,
13 |     host_url=os.environ["README_HOST_URL"],
14 |     category=os.environ["README_CATEGORY"],
15 | ):
16 |     file_path = f"{save_dir}/{slug_number}_sdk_reference.md"
17 |     header = f"""---
18 | title: "SDK Reference"
19 | slug: "sdk_reference"
20 | order: {slug_number}
21 | type: "link"
22 | link_url: "https://nixtla.mintlify.app/nixtla/timegpt.html" 
23 | link_external: true
24 | category: {category}
25 | ---
26 | 
27 |     """
28 | 
29 |     with open(file_path, "w", encoding="utf-8") as file:
30 |         file.write(header)
31 | 
32 | 
33 | if __name__ == "__main__":
34 |     fire.Fire(create_sdk_reference)
35 | 


--------------------------------------------------------------------------------
/action_files/readme_com/modify_markdown.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | from pathlib import Path
  4 | import requests
  5 | 
  6 | import fire
  7 | from dotenv import load_dotenv
  8 | 
  9 | load_dotenv()
 10 | 
 11 | 
 12 | def to_snake_case(s):
 13 |     s = s.lower()
 14 |     s = re.sub(r"(?<!^)(?=[A-Z])", "_", s).lower()
 15 |     s = re.sub(r"\W", "_", s)
 16 |     s = re.sub(r"_+", "_", s)
 17 |     return s
 18 | 
 19 | def modify_markdown(
 20 |     file_path,
 21 |     slug_number=0,
 22 |     host_url=os.environ["README_HOST_URL"],
 23 |     category=os.environ["README_CATEGORY"],
 24 |     api_key=os.environ["README_API_KEY"],
 25 |     readme_version=os.environ["README_VERSION"],
 26 | ):
 27 |     with open(file_path, "r", encoding="utf-8") as file:
 28 |         content = file.read()
 29 |     dir_path = os.path.dirname(file_path)
 30 |     if not dir_path.endswith("/"):
 31 |         dir_path += "/"
 32 | 
 33 |     # Extract and remove the first markdown header
 34 |     pattern_header = re.compile(r"^#\s+(.*)\n+", re.MULTILINE)
 35 |     match = pattern_header.search(content)
 36 | 
 37 |     if match:
 38 |         title = match.group(1)
 39 |         content = pattern_header.sub("", content, count=1)  # remove the first match
 40 |     else:
 41 |         title = "Something Amazing"
 42 |     slug = to_snake_case(title)
 43 | 
 44 |     # Get category id for this doc based on the parent folder name
 45 |     url = "https://dash.readme.com/api/v1/categories"
 46 |     headers = {"authorization": f"{api_key}",
 47 |                "x-readme-version": f"{readme_version}"}    
 48 |     try:
 49 |         response = requests.get(url, headers=headers)
 50 |         categories = {category["slug"]:category["id"] for category in response.json()}
 51 |         if Path(file_path).name == 'CHANGELOG.md':  
 52 |             category_slug = 'getting-started'
 53 |             slug = category_slug + '-' + slug
 54 |         else:
 55 |             parent = Path(file_path).parents[0].name
 56 |             grandparent = Path(file_path).parents[1].name
 57 |             if grandparent == "docs":
 58 |                 category_slug = parent
 59 |                 slug = category_slug + '-' + slug
 60 |             else:
 61 |                 category_slug = grandparent
 62 |                 subcategory = parent
 63 |                 slug = category_slug + '-' + subcategory + '-' + slug
 64 |         category = categories[category_slug]
 65 |     except:
 66 |         pass
 67 | 
 68 |     # Hide the unnecessary capabilities notebook for readme.com
 69 |     if slug == 'capabilities-capabilities':
 70 |         hidden = True
 71 |     else:
 72 |         hidden = False
 73 | 
 74 |     # Prepare the new header
 75 |     header = f"""---
 76 | title: "{title}"
 77 | slug: "{slug}"
 78 | order: {slug_number}
 79 | category: {category}
 80 | hidden: {hidden}
 81 | ---
 82 | 
 83 |     """
 84 | 
 85 |     # Remove parts delimited by ::: :::
 86 |     pattern_delimited = re.compile(r":::.*?:::", re.DOTALL)
 87 |     content = pattern_delimited.sub("", content)
 88 | 
 89 |     # Modify image paths
 90 |     content = content.replace("![figure](../../", f"![figure]({host_url}/nbs/")
 91 |     pattern_image = re.compile(r"!\[\]\(((?!.*\.svg).*?)\)")
 92 |     modified_content = pattern_image.sub(
 93 |         r"![](" + host_url + dir_path + r"\1)", content
 94 |     )
 95 | 
 96 |     # Concatenate new header and modified content
 97 |     final_content = header + modified_content
 98 | 
 99 |     with open(file_path, "w", encoding="utf-8") as file:
100 |         file.write(final_content)
101 | 
102 | 
103 | if __name__ == "__main__":
104 |     fire.Fire(modify_markdown)
105 | 


--------------------------------------------------------------------------------
/experiments/amazon-chronos/README.md:
--------------------------------------------------------------------------------
 1 | # Amazon Chronos is 10% less accurate and 500% slower than training classical statistical models.
 2 | 
 3 | We present a fully reproducible comprehensive evaluation showcasing that a Statistical Ensemble, consisting of AutoARIMA, AutoETS, AutoCES, and DynamicOptimizedTheta, outperforms Amazon Chronos—a foundational model for time series forecasting with over 710 million parameters. Specifically, the **Statistical Ensemble demonstrates 10%, 10%, and 11% superior performance in CRPS, MASE, and SMAPE metrics, respectively**, and it is **5x faster**. This analysis spans over **50,000 unique time series** across M1, M3, M4, and Tourism datasets, robustly comparing these models.
 4 | 
 5 | # Introduction
 6 | 
 7 | The rise of foundational models in time series forecasting, such as Amazon Chronos, represents a significant leap forward, leveraging deep learning and massive datasets for model pre-training to enhance predictive accuracy. Amazon Chronos, in particular, is noteworthy for its extensive parameterization and ambitious scope. However, our study shows that a comparatively simpler approach, employing a Statistical Ensemble of traditional forecasting methods, yields better accuracy and computational efficiency. One year ago, we used the same [benchmark](https://github.com/Nixtla/statsforecast/tree/main/experiments/m3) to showcase how statistical models outperformed deep learning models. 
 8 | 
 9 | ## Empirical Evaluation
10 | 
11 | This study considers over 50,000 unique time series from the M1, M3, M4, and Tourism datasets, spanning various time series frequencies. Chronos did not use these datasets in the training phase. We have also included comparisons to the Seasonal Naive model to provide a benchmark for traditional forecasting methods.
12 | 
13 | ## Results
14 | 
15 | Our findings are shown in the following table, showcasing the performance across different metrics: CRPS, MASE, SMAPE, and computational time (in seconds). The best results are highlighted in **bold** for ease of reference.
16 | 
17 | <img width="1099" alt="image" src="https://github.com/Nixtla/nixtla/assets/10517170/4d4fe9f3-4251-4b95-bd9b-248fc283e97b">
18 | 
19 | 
20 | ## Reproducibility
21 | 
22 | To ensure the reproducibility of our findings, the Statistical Ensemble experiments were conducted on an AWS c5a.24xlarge instance, equipped with 96 vCPUs and 192 GiB of RAM. In contrast, the experiments for Amazon Chronos were carried out on an AWS g5.4xlarge GPU instance, which includes 16 vCPUs, 64 GiB of RAM, and an NVIDIA A10G Tensor Core GPU with 24 GiB. All necessary code and detailed instructions for reproducing the experiments are available in this directory.
23 | 
24 | ### Instructions
25 | 
26 | 1. Set up a Python environment:
27 |    
28 | ```bash
29 | mamba env create -f environment.yml
30 | conda activate amazon-chronos
31 | ```
32 | 
33 | 2. Run the experiments as reported in the table:
34 |    
35 | ```bash
36 | python -m src.main --mode fcst_statsforecast
37 | python -m src.main --mode fcst_chronos
38 | ```
39 | 
40 | 3. Evaluate the results using:
41 | 
42 | ```bash
43 | python -m src.main --mode evaluation
44 | ```
45 | 
46 | ### References
47 | - **Statistical Ensemble Paper**: [A Simple Combination of Univariate Models](https://www.sciencedirect.com/science/article/abs/pii/S0169207019300585?via%3Dihub)
48 | - **Amazon Chronos Paper**: [Chronos: Learning the Language of Time Series](https://arxiv.org/abs/2403.07815)
49 | 


--------------------------------------------------------------------------------
/experiments/amazon-chronos/environment.yml:
--------------------------------------------------------------------------------
 1 | name: amazon-chronos
 2 | channels:
 3 |   - conda-forge
 4 |   - defaults
 5 |   - anaconda
 6 | dependencies:
 7 |   - jupyterlab
 8 |   - pip
 9 |   - python=3.10
10 |   - pip:
11 |     - datasetsforecast
12 |     - fire
13 |     - gluonts
14 |     - huggingface_hub[cli]
15 |     - neuralforecast
16 |     - orjson
17 |     - statsforecast
18 |     - utilsforecast
19 |     - git+https://github.com/amazon-science/chronos-forecasting.git
20 | 
21 | 


--------------------------------------------------------------------------------
/experiments/amazon-chronos/src/amazon_chronos/forecaster.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from typing import Iterable, List
  3 | 
  4 | import numpy as np
  5 | import pandas as pd
  6 | import torch
  7 | from chronos import ChronosPipeline
  8 | from utilsforecast.processing import make_future_dataframe
  9 | 
 10 | logging.basicConfig(level=logging.INFO)
 11 | main_logger = logging.getLogger(__name__)
 12 | 
 13 | 
 14 | class TimeSeriesDataset:
 15 |     def __init__(
 16 |         self,
 17 |         data: torch.Tensor,
 18 |         uids: Iterable,
 19 |         last_times: Iterable,
 20 |         batch_size: int,
 21 |     ):
 22 |         self.data = data
 23 |         self.uids = uids
 24 |         self.last_times = last_times
 25 |         self.batch_size = batch_size
 26 |         self.n_batches = len(data) // self.batch_size + (
 27 |             0 if len(data) % self.batch_size == 0 else 1
 28 |         )
 29 |         self.current_batch = 0
 30 | 
 31 |     @classmethod
 32 |     def from_df(cls, df: pd.DataFrame, batch_size: int):
 33 |         num_unique_ids = df["unique_id"].nunique()
 34 |         max_series_length = df["unique_id"].value_counts().max()
 35 |         padded_tensor = torch.full(
 36 |             size=(num_unique_ids, max_series_length),
 37 |             fill_value=torch.nan,
 38 |             dtype=torch.bfloat16,
 39 |         )  # type: ignore
 40 |         df_sorted = df.sort_values(by=["unique_id", "ds"])
 41 |         for idx, (_, group) in enumerate(df_sorted.groupby("unique_id")):
 42 |             series_length = len(group)
 43 |             padded_tensor[idx, -series_length:] = torch.tensor(
 44 |                 group["y"].values,
 45 |                 dtype=torch.bfloat16,
 46 |             )
 47 |         uids = df_sorted["unique_id"].unique()
 48 |         last_times = df_sorted.groupby("unique_id")["ds"].tail(1)
 49 |         return cls(padded_tensor, uids, last_times, batch_size)
 50 | 
 51 |     def __len__(self):
 52 |         return len(self.data)
 53 | 
 54 |     def make_future_dataframe(self, h: int, freq: str) -> pd.DataFrame:
 55 |         return make_future_dataframe(
 56 |             uids=self.uids,
 57 |             last_times=pd.to_datetime(self.last_times),
 58 |             h=h,
 59 |             freq=freq,
 60 |         )  # type: ignore
 61 | 
 62 |     def __iter__(self):
 63 |         self.current_batch = 0  # Reset for new iteration
 64 |         return self
 65 | 
 66 |     def __next__(self):
 67 |         if self.current_batch < self.n_batches:
 68 |             start_idx = self.current_batch * self.batch_size
 69 |             end_idx = start_idx + self.batch_size
 70 |             self.current_batch += 1
 71 |             return self.data[start_idx:end_idx]
 72 |         else:
 73 |             raise StopIteration
 74 | 
 75 | 
 76 | class AmazonChronos:
 77 |     def __init__(self, model_name: str):
 78 |         self.model_name = model_name
 79 |         self.model = ChronosPipeline.from_pretrained(
 80 |             model_name,
 81 |             device_map="auto",
 82 |             torch_dtype=torch.bfloat16,
 83 |         )
 84 | 
 85 |     def forecast(
 86 |         self,
 87 |         df: pd.DataFrame,
 88 |         h: int,
 89 |         freq: str,
 90 |         batch_size: int = 32,
 91 |         quantiles: List[float] | None = None,
 92 |         **predict_kwargs,
 93 |     ) -> pd.DataFrame:
 94 |         main_logger.info("transforming dataframe to tensor")
 95 |         dataset = TimeSeriesDataset.from_df(df, batch_size=batch_size)
 96 |         main_logger.info("forecasting")
 97 |         fcsts = [self.model.predict(batch, prediction_length=h, **predict_kwargs) for batch in dataset]
 98 |         fcst = torch.cat(fcsts)
 99 |         main_logger.info("transforming forecast to dataframe")
100 |         fcst = fcst.numpy()
101 |         fcst_df = dataset.make_future_dataframe(h=h, freq=freq)
102 |         fcst_df[self.model_name] = np.median(fcst, axis=1).reshape(-1, 1)
103 |         if quantiles is not None:
104 |             for q in quantiles:
105 |                 q_col = f"{self.model_name}-q-{q}"
106 |                 fcst_df[q_col] = np.quantile(fcst, q, axis=1).reshape(-1, 1)
107 |         return fcst_df
108 | 
109 | 
110 | if __name__ == "__main__":
111 |     import pandas as pd
112 | 
113 |     df = pd.read_csv(
114 |         "https://raw.githubusercontent.com/AileenNielsen/TimeSeriesAnalysisWithPython/master/data/AirPassengers.csv"
115 |     )
116 |     df = df.rename(columns={"#Passengers": "y", "Month": "ds"})
117 |     df["ds"] = pd.to_datetime(df["ds"])
118 |     df.insert(0, "unique_id", "AirPassengers")
119 |     df = pd.concat([df, df.assign(unique_id="AirPassengers2")])
120 |     model = AmazonChronos("amazon/chronos-t5-small")
121 |     fcst_df = model.forecast(df, h=12, freq="MS")
122 |     print(fcst_df)
123 | 


--------------------------------------------------------------------------------
/experiments/amazon-chronos/src/amazon_chronos/pipeline.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from time import time
 3 | from typing import List, Tuple
 4 | 
 5 | import fire
 6 | import pandas as pd
 7 | 
 8 | 
 9 | from ..utils import ExperimentHandler
10 | from .forecaster import AmazonChronos
11 | 
12 | 
13 | def run_amazon_chronos(
14 |     train_df: pd.DataFrame,
15 |     model_name: str,
16 |     horizon: int,
17 |     freq: str,
18 |     quantiles: List[float],
19 | ) -> Tuple[pd.DataFrame, float, str]:
20 |     ac = AmazonChronos(model_name)
21 |     init_time = time()
22 |     fcsts_df = ac.forecast(
23 |         df=train_df,
24 |         h=horizon,
25 |         freq=freq,
26 |         batch_size=8,
27 |         quantiles=quantiles,
28 |         # parameters as in https://github.com/amazon-science/chronos-forecasting/blob/73be25042f5f587823d46106d372ba133152fb00/README.md?plain=1#L62-L65
29 |         num_samples=20,
30 |         temperature=1.0,
31 |         top_k=50,
32 |         top_p=1.0,
33 |     )
34 |     total_time = time() - init_time
35 |     return fcsts_df, total_time, model_name
36 | 
37 | 
38 | def main(dataset: str, model_name: str):
39 |     exp = ExperimentHandler(dataset)
40 |     fcst_df, total_time, model_name = run_amazon_chronos(
41 |         train_df=exp.train_df,
42 |         model_name=model_name,
43 |         horizon=exp.horizon,
44 |         freq=exp.freq,
45 |         quantiles=exp.quantiles,
46 |     )
47 |     exp.save_results(fcst_df, total_time, model_name)
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     fire.Fire(main)
52 | 


--------------------------------------------------------------------------------
/experiments/amazon-chronos/src/main.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import subprocess
 3 | 
 4 | import fire
 5 | import pandas as pd
 6 | 
 7 | from src.utils import ExperimentHandler
 8 | 
 9 | logger = logging.getLogger(__name__)
10 | logger.setLevel(logging.INFO)
11 | 
12 | datasets = [
13 |     "m1_yearly",
14 |     "m1_quarterly",
15 |     "m1_monthly",
16 |     "m3_yearly",
17 |     "m3_quarterly",
18 |     "m3_monthly",
19 |     "m3_other",
20 |     "tourism_yearly",
21 |     "tourism_quarterly",
22 |     "tourism_monthly",
23 |     "m4_yearly",
24 |     "m4_quarterly",
25 | ]
26 | 
27 | amazon_chronos_models = [
28 |     "amazon/chronos-t5-large",
29 |     "amazon/chronos-t5-tiny",
30 |     "amazon/chronos-t5-mini",
31 |     "amazon/chronos-t5-small",
32 |     "amazon/chronos-t5-base",
33 | ]
34 | 
35 | 
36 | def main(mode: str):
37 |     prefix_process = ["python", "-m"]
38 | 
39 |     eval_df = None
40 |     for dataset in datasets:
41 |         logger.info(f"Evaluating {dataset}...")
42 |         if mode in ["fcst_statsforecast", "fcst_chronos"]:
43 |             suffix_process = ["--dataset", dataset]
44 | 
45 |             def process(middle_process):
46 |                 return prefix_process + middle_process + suffix_process
47 | 
48 |             if mode == "fcst_statsforecast":
49 |                 logger.info("Running StatisticalEnsemble")
50 |                 subprocess.run(process(["src.statsforecast_pipeline"]))
51 |             elif mode == "fcst_chronos":
52 |                 for model in amazon_chronos_models:
53 |                     logger.info(f"Running Amazon Chronos {model}")
54 |                     chronos_process = process(["src.amazon_chronos.pipeline"])
55 |                     chronos_process.extend(["--model_name", model])
56 |                     subprocess.run(chronos_process)
57 |         elif mode == "evaluation":
58 |             if eval_df is None:
59 |                 eval_df = []
60 |             logger.info("Running dataset evaluation")
61 |             exp = ExperimentHandler(dataset)
62 |             try:
63 |                 eval_dataset_df = exp.evaluate_models(
64 |                     amazon_chronos_models + ["StatisticalEnsemble", "SeasonalNaive"]
65 |                 )
66 |                 print(eval_dataset_df)
67 |                 eval_df.append(eval_dataset_df)
68 |             except Exception as e:
69 |                 logger.error(e)
70 |     if eval_df is not None:
71 |         eval_df = pd.concat(eval_df).reset_index(drop=True)
72 |         exp.save_dataframe(eval_df, "complete-results.csv")
73 | 
74 | 
75 | if __name__ == "__main__":
76 |     fire.Fire(main)
77 | 


--------------------------------------------------------------------------------
/experiments/amazon-chronos/src/statsforecast_pipeline.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from time import time
  3 | from typing import List, Tuple
  4 | 
  5 | os.environ["NIXTLA_NUMBA_RELEASE_GIL"] = "1"
  6 | os.environ["NIXTLA_NUMBA_CACHE"] = "1"
  7 | 
  8 | import fire
  9 | import numpy as np
 10 | import pandas as pd
 11 | from scipy.stats import norm
 12 | from statsforecast import StatsForecast
 13 | from statsforecast.models import (
 14 |     AutoARIMA,
 15 |     AutoETS,
 16 |     AutoCES,
 17 |     DynamicOptimizedTheta,
 18 |     SeasonalNaive,
 19 | )
 20 | 
 21 | from src.utils import ExperimentHandler
 22 | 
 23 | 
 24 | def run_seasonal_naive(
 25 |     train_df: pd.DataFrame,
 26 |     horizon: int,
 27 |     freq: str,
 28 |     seasonality: int,
 29 |     level: List[int],
 30 | ) -> Tuple[pd.DataFrame, float, str]:
 31 |     os.environ["NIXTLA_ID_AS_COL"] = "true"
 32 |     sf = StatsForecast(
 33 |         models=[SeasonalNaive(season_length=seasonality)],
 34 |         freq=freq,
 35 |         n_jobs=-1,
 36 |     )
 37 |     model = sf
 38 |     init_time = time()
 39 |     fcsts_df = model.forecast(df=train_df, h=horizon, level=level)
 40 |     total_time = time() - init_time
 41 |     return fcsts_df, total_time, "SeasonalNaive"
 42 | 
 43 | 
 44 | def ensemble_forecasts(
 45 |     fcsts_df: pd.DataFrame,
 46 |     quantiles: List[float],
 47 |     name_models: List[str],
 48 |     model_name: str,
 49 | ) -> pd.DataFrame:
 50 |     fcsts_df[model_name] = fcsts_df[name_models].mean(axis=1).values  # type: ignore
 51 |     # compute quantiles based on the mean of the forecasts
 52 |     sigma_models = []
 53 |     for model in name_models:
 54 |         fcsts_df[f"sigma_{model}"] = fcsts_df[f"{model}-hi-68.27"] - fcsts_df[model]
 55 |         sigma_models.append(f"sigma_{model}")
 56 |     fcsts_df[f"std_{model_name}"] = (
 57 |         fcsts_df[sigma_models].pow(2).sum(axis=1).div(len(sigma_models) ** 2).pow(0.5)
 58 |     )
 59 |     z = norm.ppf(quantiles)
 60 |     q_cols = []
 61 |     for q, zq in zip(quantiles, z):
 62 |         q_col = f"{model_name}-q-{q}"
 63 |         fcsts_df[q_col] = fcsts_df[model_name] + zq * fcsts_df[f"std_{model_name}"]
 64 |         q_cols.append(q_col)
 65 |     fcsts_df = fcsts_df[["unique_id", "ds"] + [model_name] + q_cols]
 66 |     return fcsts_df
 67 | 
 68 | 
 69 | def run_statistical_ensemble(
 70 |     train_df: pd.DataFrame,
 71 |     horizon: int,
 72 |     freq: str,
 73 |     seasonality: int,
 74 |     quantiles: List[float],
 75 | ) -> Tuple[pd.DataFrame, float, str]:
 76 |     os.environ["NIXTLA_ID_AS_COL"] = "true"
 77 |     models = [
 78 |         AutoARIMA(season_length=seasonality),
 79 |         AutoETS(season_length=seasonality),
 80 |         AutoCES(season_length=seasonality),
 81 |         DynamicOptimizedTheta(season_length=seasonality),
 82 |     ]
 83 |     init_time = time()
 84 |     series_per_core = 15
 85 |     n_series = train_df["unique_id"].nunique()
 86 |     n_jobs = min(n_series // series_per_core, os.cpu_count())
 87 |     sf = StatsForecast(
 88 |         models=models,
 89 |         freq=freq,
 90 |         n_jobs=n_jobs,
 91 |     )
 92 |     fcsts_df = sf.forecast(df=train_df, h=horizon, level=[68.27])
 93 |     name_models = [repr(model) for model in models]
 94 |     model_name = "StatisticalEnsemble"
 95 |     fcsts_df = ensemble_forecasts(
 96 |         fcsts_df,
 97 |         quantiles,
 98 |         name_models,
 99 |         model_name,
100 |     )
101 |     total_time = time() - init_time
102 |     return fcsts_df, total_time, model_name
103 | 
104 | 
105 | def main(dataset: str):
106 |     exp = ExperimentHandler(dataset)
107 |     # seasonal naive benchmark
108 |     fcst_df, total_time, model_name = run_seasonal_naive(
109 |         train_df=exp.train_df,
110 |         horizon=exp.horizon,
111 |         freq=exp.freq,
112 |         seasonality=exp.seasonality,
113 |         level=exp.level,
114 |     )
115 |     fcst_df = exp.fcst_from_level_to_quantiles(fcst_df, model_name)
116 |     exp.save_results(fcst_df, total_time, model_name)
117 |     # statistical ensemble
118 |     fcst_df, total_time, model_name = run_statistical_ensemble(
119 |         train_df=exp.train_df,
120 |         horizon=exp.horizon,
121 |         freq=exp.freq,
122 |         seasonality=exp.seasonality,
123 |         quantiles=exp.quantiles,
124 |     )
125 |     exp.save_results(fcst_df, total_time, model_name)
126 | 
127 | 
128 | if __name__ == "__main__":
129 |     from statsforecast.utils import AirPassengers as ap
130 | 
131 |     AutoARIMA(season_length=12).forecast(ap.astype(np.float32), h=12)
132 |     fire.Fire(main)
133 | 


--------------------------------------------------------------------------------
/experiments/azure-automl-forecasting/.env.example:
--------------------------------------------------------------------------------
1 | AZURE_SUBSCRIPTION_ID=
2 | AZURE_RESOURCE_GROUP=
3 | AZURE_WORKSPACE_NAME=
4 | TIMEGPT_TOKEN=
5 | 
6 | 


--------------------------------------------------------------------------------
/experiments/azure-automl-forecasting/Makefile:
--------------------------------------------------------------------------------
 1 | TS_FILES := Hourly_H.parquet Daily_D.parquet Weekly_W-MON.parquet Monthly_MS.parquet 
 2 | FILTERED_TS_FILES := $(patsubst %,./data/filtered_datasets/%,$(TS_FILES))
 3 | 
 4 | filter_data:
 5 | 	@for file in $(TS_FILES); do \
 6 | 		python -m src.utils.filter_data --dataset_path ./data/$$file; \
 7 | 	done
 8 | 
 9 | run_timegpt: .require-dataset_path
10 | 	@echo Running TimeGPT with dataset_path=$(dataset_path)
11 | 	@python -m src.nixtla_timegpt --dataset_path $(dataset_path)
12 | 
13 | run_sn: .require-dataset_path
14 | 	@echo Running SN with dataset_path=$(dataset_path)
15 | 	@python -m src.statsforecast_sn --dataset_path $(dataset_path)
16 | 
17 | run_automl: .require-dataset_path
18 | 	@echo Running AutoML with dataset_path=$(dataset_path)
19 | 	@python -m src.azure_automl.forecasting --dataset_path $(dataset_path)
20 | 
21 | run_methods:
22 | 	@for file in $(TS_FILES); do \
23 | 		echo "Running methods for $$file"; \
24 | 		$(MAKE) run_timegpt dataset_path=./data/filtered_datasets/$$file; \
25 | 		$(MAKE) run_sn dataset_path=./data/filtered_datasets/$$file; \
26 | 		$(MAKE) run_automl dataset_path=./data/filtered_datasets/$$file; \
27 | 	done
28 | 
29 | download_automl_forecasts:
30 | 	@python -m src.azure_automl.download_forecasts
31 | 
32 | evaluate_experiments:
33 | 	@python -m src.evaluation --datasets_paths "$(shell echo $(FILTERED_TS_FILES) | tr ' ' ',')"
34 | 
35 | .require-dataset_path:
36 | ifndef dataset_path
37 | 	$(error dataset_path is required)
38 | endif
39 | 
40 | 


--------------------------------------------------------------------------------
/experiments/azure-automl-forecasting/README.md:
--------------------------------------------------------------------------------
 1 | # Nixtla TimeGPT vs. Azure AutoML: A Comprehensive Performance Analysis
 2 | 
 3 | This experiment evaluates the performance of **Nixtla TimeGPT's zero-shot inference** against **Microsoft's Azure AutoML** in the domain of time series forecasting. Our analysis shows that TimeGPT **surpasses Azure AutoML by 12%, 12%, and 10% in MAE, RMSE, and MASE metrics** and has **300x improvement in computational efficiency**. This evaluation spanned over 3,000 distinct time series across various data frequencies, with considerations for Azure AutoML's cost constraints.
 4 | 
 5 | # Introduction
 6 | 
 7 | [Azure AutoML](https://learn.microsoft.com/en-us/azure/machine-learning/concept-automl-forecasting-methods?view=azureml-api-2), a product of Microsoft, offers a robust automated machine-learning solution that caters to a wide array of predictive tasks, including time series forecasting. TimeGPT is a foundational model for time series forecasting that can be accessed [through an API](https://docs.nixtla.io/). While Azure AutoML is known for its adaptability and ease of use, our findings reveal that TimeGPT offers superior accuracy and efficiency, especially in the context of time series data.
 8 | 
 9 | ## Empirical Evaluation
10 | 
11 | Our study involved a detailed comparison of both models across various datasets, including Hourly, Daily, Weekly, and Monthly data frequencies. The datasets were chosen from the test set of the [TimeGPT-1 paper](https://arxiv.org/abs/2310.03589), ensuring a diverse set of time series for evaluation. The selection process was designed to manage computational complexity and adhere to Azure AutoML's dataset size requirements, with a cap of 3,000 observations to maintain cost-effectiveness.
12 | 
13 | ## Results
14 | 
15 | The following table shows the main findings of our analysis, presenting a comparison of performance metrics (MASE, MAE, RMSE) and computational time (in seconds) across different datasets. The best results are highlighted in **bold** for clarity.
16 | 
17 | <img width="632" alt="image" src="https://github.com/Nixtla/nixtla/assets/10517170/0cc4285e-2572-4f08-9846-94c68ad72e8b">
18 | 
19 | 
20 | ## Reproducibility
21 | 
22 | All experiments were conducted in controlled environments to uphold the integrity and reproducibility of our results. TimeGPT evaluations were performed using a 2020 MacBook Air with an M1 chip, ensuring accessibility and practicality. In contrast, Azure AutoML experiments were carried out on a cluster of 11 STANDARD_DS5_V2 virtual machines equipped with substantial computational resources to showcase its scalability and power.
23 | 
24 | ### Instructions
25 | 
26 | 1. Configure Azure AutoML according to the official Microsoft documentation.
27 | 2. Set the environment variables in a `.env` file using `.env.example` as example.
28 | 3. Set up a conda environment using:
29 | 
30 | ```bash
31 | mamba create -n azure-automl-fcst python=3.10
32 | conda activate azure-automl-fcst
33 | pip install uv
34 | uv pip install -r requirements.txt
35 | ```
36 | 
37 | 4. Download the data using
38 | 
39 | ```python
40 | python -m src.utils.download_data
41 | ```
42 | 
43 | If you're interested in replicating the results, write us at `support@nixtla.io` to give you access to the data.
44 | 
45 | 5. Filter the datasets to prevent AzureML from crashing
46 | 
47 | ```
48 | make filter_data
49 | ```
50 | 
51 | 6. Run the forecasting tasks for TimeGPT, SeasonalNaive, and AzureAutoML using the following:
52 | 
53 | ```
54 | make run_methods
55 | ```
56 | 
57 | Notice that AzureAutoML will send the job to the predefined cluster. 
58 | 
59 | 7. Retrieve AzureAutoML forecasts once they are ready:
60 | 
61 | ```
62 | make download_automl_forecasts
63 | ```
64 | 
65 | 8. Run evaluation
66 | 
67 | ```
68 | make evaluate_experiments
69 | ```
70 | 
71 | 
72 | ### References
73 | - [TimeGPT 1](https://arxiv.org/abs/2310.03589)
74 | - [StatsForecast](https://github.com/Nixtla/statsforecast/)
75 | - [Distributed AzureAutoML for forecasting](https://github.com/Azure/azureml-examples/blob/main/sdk/python/jobs/pipelines/1k_demand_forecasting_with_pipeline_components/automl-forecasting-demand-many-models-in-pipeline/automl-forecasting-demand-many-models-in-pipeline.ipynb)
76 | 


--------------------------------------------------------------------------------
/experiments/azure-automl-forecasting/requirements.txt:
--------------------------------------------------------------------------------
 1 | azure-ai-ml
 2 | azure-identity
 3 | azureml-core
 4 | fire
 5 | mltable
 6 | nixtla
 7 | pandas
 8 | python-dotenv
 9 | rich
10 | statsforecast
11 | utilsforecast
12 | 


--------------------------------------------------------------------------------
/experiments/azure-automl-forecasting/src/azure_automl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/experiments/azure-automl-forecasting/src/azure_automl/__init__.py


--------------------------------------------------------------------------------
/experiments/azure-automl-forecasting/src/azure_automl/download_forecasts.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | 
 4 | import fire
 5 | 
 6 | from .automl_handler import AzureAutoML
 7 | from .forecasting import AzureAutoMLJobs
 8 | from src.utils.data_handler import ForecastDataset
 9 | 
10 | logging.basicConfig(level=logging.INFO)
11 | main_logger = logging.getLogger(__name__)
12 | 
13 | 
14 | def download_forecasts(dir: str = "./results"):
15 |     azure_automl = AzureAutoML.from_environment()
16 |     azure_automl_experiments = AzureAutoMLJobs()
17 |     results_path = Path(dir) / "azure_automl"
18 | 
19 |     jobs_df = azure_automl_experiments.get_jobs_df()
20 |     jobs_df = jobs_df.sort_values("created_at", ascending=False).drop_duplicates(
21 |         "experiment_name"
22 |     )
23 | 
24 |     for _, row in jobs_df.iterrows():
25 |         experiment_name = row.experiment_name
26 |         job_name = row.job_name
27 |         main_logger.info(
28 |             f"Downloading forecasts for experiment {experiment_name} and job {job_name}"
29 |         )
30 |         try:
31 |             forecast_df = azure_automl.get_forecast_df(job_name)
32 |             total_time = azure_automl.get_job_total_time(job_name)
33 |         except Exception:
34 |             main_logger.info(
35 |                 f"Failed to download forecasts for experiment {experiment_name} and job {job_name}"
36 |             )
37 |             continue
38 |         if forecast_df is None:
39 |             main_logger.info(
40 |                 f"Failed to download forecasts for experiment {experiment_name} and job {job_name}"
41 |                 "probably because the job is not finished yet or failed"
42 |             )
43 |             continue
44 |         fcst_dataset = ForecastDataset(forecast_df=forecast_df, total_time=total_time)
45 |         experiment_name = row.experiment_name
46 |         fcst_dataset.save_to_dir(results_path / experiment_name)
47 |         main_logger.info(
48 |             f"Saved forecasts for experiment {experiment_name} and job {job_name}"
49 |         )
50 | 
51 | 
52 | if __name__ == "__main__":
53 |     fire.Fire(download_forecasts)
54 | 


--------------------------------------------------------------------------------
/experiments/azure-automl-forecasting/src/azure_automl/forecasting.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import fire
 4 | import pandas as pd
 5 | from azure.ai.ml.entities import AmlCompute
 6 | 
 7 | from .automl_handler import AzureAutoML
 8 | from src.utils.data_handler import ExperimentDataset
 9 | 
10 | 
11 | class AzureAutoMLJobs:
12 |     """
13 |     This class stores and updates the Azure AutoML Experiments,
14 |     to keep track of the pipeline jobs.
15 |     We need this to later downlaod the forecasts.
16 |     """
17 | 
18 |     file_name = "forecasting_jobs.csv"
19 | 
20 |     def __init__(self, dir: str = "./azure_automl_results"):
21 |         self.dir = dir
22 |         self.jobs_path = Path(self.dir) / self.file_name
23 |         self.setup()
24 | 
25 |     def setup(self):
26 |         self.jobs_path.parent.mkdir(parents=True, exist_ok=True)
27 |         if not self.jobs_path.exists():
28 |             pd.DataFrame(columns=["created_at", "experiment_name", "job_name"]).to_csv(
29 |                 self.jobs_path,
30 |                 index=False,
31 |             )
32 | 
33 |     def get_jobs_df(self) -> pd.DataFrame:
34 |         return pd.read_csv(self.jobs_path)
35 | 
36 |     def save_job(self, job_name: str, experiment_name: str):
37 |         jobs_df = self.get_jobs_df()
38 |         new_row = pd.DataFrame(
39 |             {
40 |                 "created_at": [pd.Timestamp.now()],
41 |                 "experiment_name": [experiment_name],
42 |                 "job_name": [job_name],
43 |             }
44 |         )
45 |         jobs_df = pd.concat([jobs_df, new_row])
46 |         jobs_df.to_csv(self.jobs_path, index=False)
47 | 
48 | 
49 | def start_forecasting_job(
50 |     dataset_path: str,
51 |     begin_create_or_update_aml_compute: bool = False,
52 | ):
53 |     experiment_name = dataset_path.split("/")[-1].split(".")[0]
54 |     dataset = ExperimentDataset.from_parquet(parquet_path=dataset_path)
55 |     azure_automl = AzureAutoML.from_environment()
56 |     azure_automl_jobs = AzureAutoMLJobs()
57 | 
58 |     aml_compute = AmlCompute(
59 |         name="azure-automl-fcst-cluster-nixtla",
60 |         min_instances=11,
61 |         max_instances=11,
62 |         size="STANDARD_DS5_V2",
63 |     )
64 | 
65 |     job_name = azure_automl.forecast(
66 |         df=dataset.Y_df_train,
67 |         df_test=dataset.Y_df_test,
68 |         aml_compute=aml_compute,
69 |         h=dataset.horizon,
70 |         freq=dataset.pandas_frequency,
71 |         n_cross_validations=2,
72 |         experiment_name=experiment_name,
73 |         begin_create_or_update_aml_compute=begin_create_or_update_aml_compute,
74 |         max_nodes=11,
75 |         max_concurrency_per_node=8,
76 |     )
77 | 
78 |     azure_automl_jobs.save_job(job_name, experiment_name)
79 | 
80 | 
81 | if __name__ == "__main__":
82 |     fire.Fire(start_forecasting_job)
83 | 


--------------------------------------------------------------------------------
/experiments/azure-automl-forecasting/src/nixtla_timegpt.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from pathlib import Path
 3 | from time import time
 4 | 
 5 | import fire
 6 | from dotenv import load_dotenv
 7 | from nixtla import NixtlaClient
 8 | 
 9 | from src.utils.data_handler import ExperimentDataset, ForecastDataset
10 | 
11 | load_dotenv()
12 | 
13 | 
14 | def timegpt_forecast(dataset_path: str, results_dir: str = "./results"):
15 |     dataset = ExperimentDataset.from_parquet(parquet_path=dataset_path)
16 |     size_df = sys.getsizeof(dataset.Y_df_train) / (1024 * 1024)
17 |     max_partition_size_mb = 20
18 |     num_partitions = int(size_df / max_partition_size_mb) + 1
19 |     timegpt = NixtlaClient(max_retries=1)
20 |     start = time()
21 |     forecast_df = timegpt.forecast(
22 |         df=dataset.Y_df_train,
23 |         h=dataset.horizon,
24 |         freq=dataset.pandas_frequency,
25 |         model="timegpt-1-long-horizon",
26 |         num_partitions=num_partitions,
27 |     )
28 |     end = time()
29 |     total_time = end - start
30 |     forecast_dataset = ForecastDataset(
31 |         forecast_df=forecast_df,
32 |         total_time=total_time,
33 |     )
34 |     experiment_name = dataset_path.split("/")[-1].split(".")[0]
35 |     results_path = Path(results_dir) / "nixtla_timegpt" / experiment_name
36 |     forecast_dataset.save_to_dir(results_path)
37 | 
38 | 
39 | if __name__ == "__main__":
40 |     fire.Fire(timegpt_forecast)
41 | 


--------------------------------------------------------------------------------
/experiments/azure-automl-forecasting/src/statsforecast_sn.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | from time import time
 4 | 
 5 | import fire
 6 | from statsforecast import StatsForecast
 7 | from statsforecast.models import SeasonalNaive
 8 | 
 9 | from src.utils.data_handler import ExperimentDataset, ForecastDataset
10 | 
11 | 
12 | def sn_forecast(dataset_path: str, results_dir: str = "./results"):
13 |     os.environ["NIXTLA_ID_AS_COL"] = "true"
14 |     dataset = ExperimentDataset.from_parquet(parquet_path=dataset_path)
15 |     sf = StatsForecast(
16 |         models=[SeasonalNaive(season_length=dataset.seasonality)],
17 |         freq=dataset.pandas_frequency,
18 |     )
19 |     start = time()
20 |     forecast_df = sf.forecast(
21 |         df=dataset.Y_df_train,
22 |         h=dataset.horizon,
23 |     )
24 |     end = time()
25 |     total_time = end - start
26 |     forecast_dataset = ForecastDataset(forecast_df=forecast_df, total_time=total_time)
27 |     experiment_name = dataset_path.split("/")[-1].split(".")[0]
28 |     results_path = Path(results_dir) / "statsforecast_sn" / experiment_name
29 |     forecast_dataset.save_to_dir(results_path)
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     fire.Fire(sn_forecast)
34 | 


--------------------------------------------------------------------------------
/experiments/azure-automl-forecasting/src/utils/download_data.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from concurrent.futures import ProcessPoolExecutor
 3 | 
 4 | import pandas as pd
 5 | 
 6 | logging.basicConfig(level=logging.INFO)
 7 | main_logger = logging.getLogger(__name__)
 8 | 
 9 | 
10 | def read_parquet_and_assign(uid, url):
11 |     df = pd.read_parquet(url)
12 |     df["unique_id"] = uid
13 |     df["ds"] = df["ds"].astype(str)
14 |     return df[["unique_id", "ds", "y"]]
15 | 
16 | 
17 | def download_data():
18 |     catalogue_splits = pd.read_parquet("./data/catalogue_splits.parquet")
19 |     catalogue_datasets = pd.read_parquet("./data/catalogue_datasets.parquet")
20 |     catalogue_df = catalogue_splits.merge(
21 |         catalogue_datasets,
22 |         on=["dataset", "subdataset", "frequency"],
23 |     )
24 |     del catalogue_splits
25 |     del catalogue_datasets
26 |     catalogue_df = catalogue_df.query("split == 'test'")[
27 |         [
28 |             "unique_id",
29 |             "frequency",
30 |             "url",
31 |             "pandas_frequency",
32 |             "seasonality",
33 |             "horizon",
34 |         ]
35 |     ]
36 |     grouped_df = catalogue_df.groupby(["frequency", "pandas_frequency"])
37 |     for (frequency, pandas_frequency), df in grouped_df:
38 |         uids, urls = df["unique_id"].values, df["url"].values
39 |         main_logger.info(
40 |             f"frequency: {frequency}, pandas_frequency: {pandas_frequency}"
41 |         )
42 |         n_uids = len(uids)
43 |         main_logger.info(f"number of uids: {n_uids}")
44 |         max_workers = min(10, n_uids)
45 |         with ProcessPoolExecutor(max_workers=max_workers) as executor:
46 |             futures = [
47 |                 executor.submit(read_parquet_and_assign, uid, url)
48 |                 for uid, url in zip(uids, urls)
49 |             ]
50 |             results = [future.result() for future in futures]
51 |         main_logger.info("dataset read")
52 |         Y_df = pd.concat(results)
53 |         Y_df = Y_df.merge(
54 |             df.drop(columns="url"),
55 |             on="unique_id",
56 |             how="left",
57 |         )
58 |         Y_df.to_parquet(f"./data/{frequency}_{pandas_frequency}.parquet")
59 |         del Y_df
60 |         main_logger.info("dataset saved")
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     download_data()
65 | 


--------------------------------------------------------------------------------
/experiments/azure-automl-forecasting/src/utils/filter_data.py:
--------------------------------------------------------------------------------
  1 | """
  2 | this module takes Nixtla's benchmarking data 
  3 | and filters it to prevent azureml from crashing
  4 | in the following cases:
  5 | - too short series, see https://learn.microsoft.com/en-us/azure/machine-learning/concept-automl-forecasting-methods?view=azureml-api-2#data-length-requirements
  6 | """
  7 | import logging
  8 | from dataclasses import dataclass
  9 | from pathlib import Path
 10 | from typing import Any, Callable
 11 | 
 12 | import fire
 13 | import numpy as np
 14 | import pandas as pd
 15 | 
 16 | logging.basicConfig(level=logging.INFO)
 17 | main_logger = logging.getLogger(__name__)
 18 | 
 19 | 
 20 | @dataclass
 21 | class DatasetParams:
 22 |     frequency: str
 23 |     pandas_frequency: str
 24 |     horizon: int
 25 |     seasonality: int
 26 | 
 27 |     @staticmethod
 28 |     def _get_value_from_df_col(
 29 |         df: pd.DataFrame,
 30 |         col: str,
 31 |         dtype: Callable | None = None,
 32 |     ) -> Any:
 33 |         col_values = df[col].unique()
 34 |         if len(col_values) > 1:
 35 |             raise ValueError(f"{col} is not unique: {col_values}")
 36 |         value = col_values[0]
 37 |         if dtype is not None:
 38 |             value = dtype(value)
 39 |         return value
 40 | 
 41 |     @classmethod
 42 |     def from_df(cls, df: pd.DataFrame) -> "DatasetParams":
 43 |         dataset_params = {}
 44 |         dataset_params_cols = [
 45 |             "frequency",
 46 |             "pandas_frequency",
 47 |             "horizon",
 48 |             "seasonality",
 49 |         ]
 50 |         dataset_params_cols_dtypes = [str, str, int, int]
 51 |         for col, dtype in zip(dataset_params_cols, dataset_params_cols_dtypes):
 52 |             dataset_params[col] = cls._get_value_from_df_col(df, col, dtype=dtype)
 53 |         return cls(**dataset_params)
 54 | 
 55 | 
 56 | def filter_and_clean_dataset(
 57 |     dataset_path: str,
 58 |     max_series: int = 1_000,
 59 |     n_train_cv: int = 2,
 60 |     n_seasonalities: int = 5,
 61 |     max_insample_length: int = 3_000,
 62 |     random_seed: int = 420,
 63 | ):
 64 |     main_logger.info(f"Processing dataset {dataset_path}")
 65 |     df = pd.read_parquet(dataset_path)
 66 |     df = df.drop_duplicates(["unique_id", "ds"])  # type: ignore
 67 |     df = df.sort_values(["unique_id", "ds"])
 68 |     ds_params = DatasetParams.from_df(df)
 69 |     min_train_size_per_series = (
 70 |         ds_params.horizon
 71 |         + 2 * ds_params.horizon
 72 |         + (n_train_cv - 1) * ds_params.horizon
 73 |         + 1
 74 |     )
 75 |     if ds_params.seasonality < 100:
 76 |         # if series has low seasonality
 77 |         # we add n_seasonalities to min_train_size_per_series
 78 |         # to keep the series long enough
 79 |         min_train_size_per_series += n_seasonalities * ds_params.seasonality
 80 |     uids = df["unique_id"].unique()  # type: ignore
 81 |     df = (
 82 |         df.groupby("unique_id")
 83 |         .filter(lambda x: len(x) >= min_train_size_per_series)
 84 |         .groupby("unique_id")  # type: ignore
 85 |         .tail(max_insample_length + ds_params.horizon)
 86 |         .reset_index(drop=True)
 87 |     )
 88 |     main_logger.info(
 89 |         f"Filtering out {len(uids) - len(df['unique_id'].unique())} series"
 90 |     )
 91 |     uids = df["unique_id"].unique()  # type: ignore
 92 |     if len(uids) > max_series:
 93 |         np.random.seed(random_seed)
 94 |         uids = np.random.choice(uids, max_series, replace=False)  # type: ignore
 95 |         df = df.query("unique_id in @uids")  # type: ignore
 96 |         main_logger.info(f"Filtering out {len(uids) - max_series} series")
 97 |     # finally we clean some strange dates
 98 |     mask = df["ds"].str.endswith(":01")  # type: ignore
 99 |     df.loc[mask, "ds"] = df.loc[mask, "ds"].str[:-3] + ":00"
100 |     # save the dataset
101 |     dataset_path = Path(dataset_path)  # type: ignore
102 |     filtered_dataset_path = dataset_path.parent / "filtered_datasets" / dataset_path.name  # type: ignore
103 |     filtered_dataset_path.parent.mkdir(exist_ok=True, parents=True)
104 |     df.to_parquet(filtered_dataset_path)
105 |     main_logger.info(f"Filtered dataset saved to {filtered_dataset_path}")
106 | 
107 | 
108 | if __name__ == "__main__":
109 |     fire.Fire(filter_and_clean_dataset)
110 | 


--------------------------------------------------------------------------------
/experiments/efficiency/README.md:
--------------------------------------------------------------------------------
 1 | # 🚀 TimeGPT API v2: Faster, Smarter, and More Powerful Time Series Forecasting! 🚀
 2 | 
 3 | We’re excited to introduce **v2 of the TimeGPT API**, featuring a significant boost in performance, enhanced flexibility, and new capabilities that make time series forecasting faster and more insightful than ever before.
 4 | 
 5 | In this release, you will find:
 6 | - **Dramatic speed improvements** across all major endpoints 🏎️
 7 | - **Scalable forecasting** that handles 1 billion time series in just 6 hours 📊
 8 | - **Advanced handling of exogenous variables**, both historical and future 🌐
 9 | - **Enhanced explainability** through SHAP values 🧠
10 | - **New integration with Polars**, a high-performance DataFrame library ⚡
11 | 
12 | ## Key Performance Highlights 🔥
13 | 
14 | We've optimized the core functionalities—forecasting, anomaly detection, and cross-validation—with v2 showing significant speedups compared to v1. Below are the benchmark results:
15 | 
16 | | Endpoint          | Features   | Level   | v1   | v2   | Speedup   |
17 | |:------------------|:-----------|:--------|:-----|:-----|:----------|
18 | | anomaly_detection | exog       | [80]    | 24s  | 3s   | 9x        |
19 | | anomaly_detection | none       | [80]    | 13s  | 2s   | 8x        |
20 | | cross_validation  | exog       | None    | 22s  | 4s   | 6x        |
21 | | cross_validation  | exog       | [80]    | 31s  | 6s   | 5x        |
22 | | cross_validation  | none       | None    | 5s   | 1s   | 9x        |
23 | | cross_validation  | none       | [80]    | 9s   | 2s   | 4x        |
24 | | forecast          | exog       | None    | 18s  | 1s   | 13x       |
25 | | forecast          | exog       | [80]    | 20s  | 2s   | 10x       |
26 | | forecast          | none       | None    | 1s   | 0s   | 6x        |
27 | | forecast          | none       | [80]    | 3s   | 1s   | 6x        |
28 | 
29 | These results represent the huge leap in efficiency v2 provides, allowing you to analyze vast datasets and derive insights faster than ever before. 🚀
30 | 
31 | ## How to Reproduce Results
32 | 
33 | ### Installation 🛠️
34 | 
35 | 1. Install the required Python packages:
36 |    ```bash
37 |    pip install -r requirements.txt
38 |    ```
39 | 
40 | ### Running the Code 🏃‍♀️
41 | 
42 | This script benchmarks **forecasting**, **anomaly detection**, and **cross-validation** across both v1 and v2 of the TimeGPT API. You can run the script and compare performance results by executing:
43 | 
44 | ```bash
45 | python main.py
46 | ```
47 | 
48 | ## Performance Breakdown 🏎️
49 | 
50 | With v2, you get **up to 13x speed improvements** on key operations like forecasting with exogenous variables. This makes the API ideal for production environments where performance and scalability are paramount.
51 | 
52 | ### New Features in v2
53 | 
54 | - **Advanced Exogenous Variable Handling**: Leverage both historical and future exogenous data for more accurate forecasts.
55 | - **SHAP Values**: Improve model interpretability with SHAP value integration.
56 | - **Polars Integration**: Benefit from lightning-fast data processing with Polars, especially useful for big datasets. 
57 | 
58 | ## Conclusion 🚀
59 | 
60 | With TimeGPT API v2, you’re not just getting a faster API—you’re gaining the tools to scale up your time series analysis effortlessly, with greater precision and deeper insights. Whether it’s detecting anomalies, validating models, or producing reliable forecasts, v2 ensures you get results **faster and smarter** than ever before.
61 | 
62 | Happy forecasting! 
63 | 


--------------------------------------------------------------------------------
/experiments/efficiency/main.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | import logging
 3 | import time
 4 | 
 5 | import pandas as pd
 6 | from nixtla import NixtlaClient as V2Client
 7 | from nixtlats import NixtlaClient as V1Client
 8 | from utilsforecast.data import generate_series
 9 | from utilsforecast.feature_engineering import fourier
10 | 
11 | logging.getLogger("nixtla").setLevel(logging.ERROR)
12 | logging.getLogger("nixtlats").setLevel(logging.ERROR)
13 | 
14 | 
15 | def forecast(client, df, X_df, h, level):
16 |     return client.forecast(df=df, X_df=X_df, h=h, level=level)
17 | 
18 | 
19 | def cross_validation(client, df, X_df, h, level):
20 |     return client.cross_validation(df=df, h=h, n_windows=4, level=level)
21 | 
22 | 
23 | def anomaly_detection(client, df, X_df, h, level):
24 |     if isinstance(level, list):
25 |         level = level[0]
26 |     return client.detect_anomalies(df=df, level=level)
27 | 
28 | 
29 | v1_client = V1Client()
30 | v2_client = V2Client()
31 | n_series = 1_000
32 | freq = "D"
33 | h = 14
34 | series = generate_series(n_series, freq=freq, min_length=200)
35 | train, future = fourier(series, freq=freq, season_length=7, k=4, h=h)
36 | features = ["none", "exog"]
37 | level = [None, [80]]
38 | clients = {"v1": v1_client, "v2": v2_client}
39 | methods = {
40 |     "forecast": forecast,
41 |     "cross_validation": cross_validation,
42 |     "anomaly_detection": anomaly_detection,
43 | }
44 | times = {version: {} for version in ("v1", "v2")}
45 | for feats, lvl in itertools.product(features, level):
46 |     if feats == "none":
47 |         df = series
48 |         X_df = None
49 |     else:
50 |         df = train
51 |         X_df = future
52 |     for name, method in methods.items():
53 |         if name == "anomaly_detection" and lvl is None:
54 |             continue
55 |         for version, client in clients.items():
56 |             start = time.perf_counter()
57 |             combination = f"{version} {name}. Features: {feats}. Level: {lvl}"
58 |             print(f"Running {combination}")
59 |             res = method(client, df=df, X_df=X_df, h=h, level=lvl)
60 |             time_taken = time.perf_counter() - start
61 |             times[version][f"{name}-{feats}-{lvl}"] = time_taken
62 |             print(f"{combination} took {time_taken:.1f} seconds.")
63 | 
64 | df = pd.DataFrame(times)
65 | df.index = df.index.str.split("-", expand=True)
66 | df.index.names = ["endpoint", "features", "level"]
67 | df = df.sort_index()
68 | df["speedup"] = df["v1"] / df["v2"]
69 | df["speedup"] = df["speedup"].map("{:.0f}x".format)
70 | for col in ("v1", "v2"):
71 |     df[col] = df[col].map("{:.0f}s".format)
72 | with open("endpoint_times.md", "wt") as f:
73 |     f.write(df.reset_index().to_markdown(index=False))
74 | 


--------------------------------------------------------------------------------
/experiments/efficiency/requirements.txt:
--------------------------------------------------------------------------------
1 | nixtla>=0.6
2 | nixtlats==0.5.2
3 | pandas
4 | 


--------------------------------------------------------------------------------
/experiments/foundation-time-series-arena/.env.example:
--------------------------------------------------------------------------------
1 | NIXTLA_API_KEY=
2 | NIXTLA_BASE_URL=
3 | 


--------------------------------------------------------------------------------
/experiments/foundation-time-series-arena/Makefile:
--------------------------------------------------------------------------------
 1 | download_lag_llama_code:
 2 | 	@git clone https://github.com/time-series-foundation-models/lag-llama tempdir
 3 | 	@cp -R tempdir/data/ .
 4 | 	@cp -R tempdir/gluon_utils/ .
 5 | 	@cp -R tempdir/lag_llama/ .
 6 | 	@rm -rf tempdir
 7 | 
 8 | download_data:
 9 | 	@aws s3 sync s3://nixtla-foundational-time-series/data nixtla-foundational-time-series/data --no-sign-request
10 | 


--------------------------------------------------------------------------------
/experiments/foundation-time-series-arena/requirements.txt:
--------------------------------------------------------------------------------
 1 | gluonts[torch]
 2 | numpy
 3 | torch>=2.0.0
 4 | wandb
 5 | scipy
 6 | pandas
 7 | huggingface_hub[cli]
 8 | einshape
 9 | fire
10 | nixtla
11 | python-dotenv
12 | rich
13 | statsforecast
14 | neuralforecast
15 | utilsforecast
16 | mlforecast
17 | lightgbm
18 | chronos @ git+https://github.com/amazon-science/chronos-forecasting.git
19 | salesforce-uni2ts @ git+https://github.com/SalesforceAIResearch/uni2ts.git
20 | timesfm @ git+https://github.com/AzulGarza/timesfm.git@fix-structure
21 | jax[cuda12]
22 | pytest
23 | prophet
24 | 


--------------------------------------------------------------------------------
/experiments/foundation-time-series-arena/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/experiments/foundation-time-series-arena/tests/__init__.py


--------------------------------------------------------------------------------
/experiments/foundation-time-series-arena/tests/test_arena.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from tempfile import TemporaryDirectory
 3 | 
 4 | import pandas as pd
 5 | 
 6 | from xiuhmolpilli.arena import FoundationalTimeSeriesArena
 7 | from .utils import models
 8 | from .test_eval import generate_exp_dataset
 9 | 
10 | 
11 | def generate_data(freq: str, tmpdir: str) -> str:
12 |     df = generate_exp_dataset(n_series=5, freq=freq, return_df=True)
13 |     df_parquet_path = Path(tmpdir) / f"dataset_{freq}.parquet"
14 |     df.to_parquet(df_parquet_path)
15 |     return str(df_parquet_path)
16 | 
17 | 
18 | def test_foundational_time_series_arena():
19 |     cwd = Path.cwd()
20 |     with TemporaryDirectory(dir=cwd) as tmpdir:
21 |         parquet_data_paths = [generate_data(freq, tmpdir) for freq in ["H", "MS"]]
22 |         arena = FoundationalTimeSeriesArena(
23 |             models=models,
24 |             parquet_data_paths=parquet_data_paths,
25 |             results_dir=tmpdir,
26 |         )
27 |         arena.compete()
28 |         eval_df = pd.read_csv(arena.evaluation_path)
29 |         arena.compete()
30 |         eval_df_2 = pd.read_csv(arena.evaluation_path)
31 |         print(eval_df)
32 |         print(eval_df_2)
33 |         assert eval_df.equals(eval_df_2)
34 |         print(eval_df)
35 | 


--------------------------------------------------------------------------------
/experiments/foundation-time-series-arena/tests/test_models.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import pytest
 3 | from utilsforecast.data import generate_series
 4 | 
 5 | from .utils import models
 6 | 
 7 | 
 8 | @pytest.mark.parametrize("model", models)
 9 | @pytest.mark.parametrize("freq", ["H", "D", "W-MON", "MS"])
10 | @pytest.mark.parametrize("h", [1, 12])
11 | def test_correct_forecast_dates(model, freq, h):
12 |     n_series = 5
13 |     df = generate_series(
14 |         n_series,
15 |         freq=freq,
16 |     )
17 |     df["unique_id"] = df["unique_id"].astype(str)
18 |     df_test = df.groupby("unique_id").tail(h)
19 |     df_train = df.drop(df_test.index)
20 |     fcst_df = model.forecast(
21 |         df_train,
22 |         h=h,
23 |         freq=freq,
24 |     )
25 |     exp_n_cols = 3
26 |     assert fcst_df.shape == (n_series * h, exp_n_cols)
27 |     exp_cols = ["unique_id", "ds"]
28 |     pd.testing.assert_frame_equal(
29 |         fcst_df[exp_cols].sort_values(["unique_id", "ds"]).reset_index(drop=True),
30 |         df_test[exp_cols].sort_values(["unique_id", "ds"]).reset_index(drop=True),
31 |     )
32 | 
33 | 
34 | @pytest.mark.parametrize("model", models)
35 | @pytest.mark.parametrize("freq", ["H", "D", "W-MON", "MS"])
36 | @pytest.mark.parametrize("n_windows", [1, 4])
37 | def test_cross_validation(model, freq, n_windows):
38 |     h = 12
39 |     n_series = 5
40 |     df = generate_series(n_series, freq=freq, equal_ends=True)
41 |     df["unique_id"] = df["unique_id"].astype(str)
42 |     cv_df = model.cross_validation(
43 |         df,
44 |         h=h,
45 |         freq=freq,
46 |         n_windows=n_windows,
47 |     )
48 |     exp_n_cols = 5  # unique_id, cutoff, ds, y, model
49 |     assert cv_df.shape == (n_series * h * n_windows, exp_n_cols)
50 |     cutoffs = cv_df["cutoff"].unique()
51 |     assert len(cutoffs) == n_windows
52 |     df_test = df.groupby("unique_id").tail(h * n_windows)
53 |     exp_cols = ["unique_id", "ds", "y"]
54 |     pd.testing.assert_frame_equal(
55 |         cv_df.sort_values(["unique_id", "ds"]).reset_index(drop=True)[exp_cols],
56 |         df_test.sort_values(["unique_id", "ds"]).reset_index(drop=True)[exp_cols],
57 |     )
58 |     if n_windows == 1:
59 |         # test same results using predict with less data
60 |         df_test = df.groupby("unique_id").tail(h)
61 |         df_train = df.drop(df_test.index)
62 |         fcst_df = model.forecast(
63 |             df_train,
64 |             h=h,
65 |             freq=freq,
66 |         )
67 |         exp_cols = ["unique_id", "ds"]
68 |         pd.testing.assert_frame_equal(
69 |             cv_df.sort_values(["unique_id", "ds"]).reset_index(drop=True)[exp_cols],
70 |             fcst_df.sort_values(["unique_id", "ds"]).reset_index(drop=True)[exp_cols],
71 |         )
72 | 


--------------------------------------------------------------------------------
/experiments/foundation-time-series-arena/tests/utils.py:
--------------------------------------------------------------------------------
 1 | from xiuhmolpilli.models.benchmarks import (
 2 |     AutoARIMA,
 3 |     NixtlaProphet,
 4 |     SeasonalNaive,
 5 |     AutoNHITS,
 6 |     AutoTFT,
 7 |     AutoLGBM,
 8 | )
 9 | from xiuhmolpilli.models.foundational import Chronos, LagLlama, Moirai, TimeGPT, TimesFM
10 | 
11 | models = [
12 |     # benchmarks
13 |     AutoARIMA(),
14 |     NixtlaProphet(),
15 |     SeasonalNaive(),
16 |     # neural benchmarks
17 |     AutoNHITS(),
18 |     AutoTFT(),
19 |     # ml
20 |     AutoLGBM(),
21 |     # foundational models
22 |     Chronos("amazon/chronos-t5-tiny"),
23 |     LagLlama(),
24 |     Moirai("Salesforce/moirai-1.0-R-small"),
25 |     TimeGPT(),
26 |     TimesFM(),
27 | ]
28 | 


--------------------------------------------------------------------------------
/experiments/foundation-time-series-arena/xiuhmolpilli/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/experiments/foundation-time-series-arena/xiuhmolpilli/__init__.py


--------------------------------------------------------------------------------
/experiments/foundation-time-series-arena/xiuhmolpilli/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/experiments/foundation-time-series-arena/xiuhmolpilli/models/__init__.py


--------------------------------------------------------------------------------
/experiments/foundation-time-series-arena/xiuhmolpilli/models/benchmarks/__init__.py:
--------------------------------------------------------------------------------
 1 | from .ml import AutoLGBM
 2 | from .neural import (
 3 |     AutoNHITS,
 4 |     AutoTFT,
 5 | )
 6 | from .prophet import NixtlaProphet
 7 | from .stats import (
 8 |     ADIDA,
 9 |     AutoARIMA,
10 |     AutoCES,
11 |     AutoETS,
12 |     CrostonClassic,
13 |     DOTheta,
14 |     HistoricAverage,
15 |     IMAPA,
16 |     SeasonalNaive,
17 |     Theta,
18 |     ZeroModel,
19 | )
20 | 
21 | __all__ = [
22 |     "AutoLGBM",
23 |     "NixtlaProphet",
24 |     "AutoNHITS",
25 |     "AutoTFT",
26 |     "ADIDA",
27 |     "AutoARIMA",
28 |     "AutoCES",
29 |     "AutoETS",
30 |     "CrostonClassic",
31 |     "DOTheta",
32 |     "HistoricAverage",
33 |     "IMAPA",
34 |     "SeasonalNaive",
35 |     "Theta",
36 |     "ZeroModel",
37 | ]
38 | 


--------------------------------------------------------------------------------
/experiments/foundation-time-series-arena/xiuhmolpilli/models/benchmarks/ml.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pandas as pd
 4 | from mlforecast.auto import AutoMLForecast, AutoLightGBM
 5 | 
 6 | from ..utils.forecaster import Forecaster, get_seasonality
 7 | 
 8 | os.environ["NIXTLA_ID_AS_COL"] = "true"
 9 | 
10 | 
11 | class AutoLGBM(Forecaster):
12 |     def __init__(
13 |         self,
14 |         alias: str = "AutoLGBM",
15 |         num_samples: int = 10,
16 |         cv_n_windows: int = 5,
17 |     ):
18 |         self.alias = alias
19 |         self.num_samples = num_samples
20 |         self.cv_n_windows = cv_n_windows
21 | 
22 |     def forecast(
23 |         self,
24 |         df: pd.DataFrame,
25 |         h: int,
26 |         freq: str,
27 |     ) -> pd.DataFrame:
28 |         mf = AutoMLForecast(
29 |             models=[AutoLightGBM()],
30 |             freq=freq,
31 |             season_length=get_seasonality(freq),
32 |             num_threads=-1,
33 |         )
34 |         mf.fit(
35 |             df=df,
36 |             n_windows=self.cv_n_windows,
37 |             h=h,
38 |             num_samples=self.num_samples,
39 |         )
40 |         fcst_df = mf.predict(h=h)
41 |         fcst_df = fcst_df.rename(columns={"AutoLightGBM": self.alias})
42 |         return fcst_df
43 | 


--------------------------------------------------------------------------------
/experiments/foundation-time-series-arena/xiuhmolpilli/models/benchmarks/neural.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import pandas as pd
  4 | from neuralforecast import NeuralForecast
  5 | from neuralforecast.auto import (
  6 |     AutoNHITS as _AutoNHITS,
  7 |     AutoTFT as _AutoTFT,
  8 | )
  9 | from neuralforecast.common._base_model import BaseModel as NeuralForecastModel
 10 | from ray import tune
 11 | 
 12 | from ..utils.forecaster import Forecaster
 13 | 
 14 | os.environ["NIXTLA_ID_AS_COL"] = "true"
 15 | 
 16 | 
 17 | def run_neuralforecast_model(
 18 |     model: NeuralForecastModel,
 19 |     df: pd.DataFrame,
 20 |     freq: str,
 21 | ) -> pd.DataFrame:
 22 |     nf = NeuralForecast(
 23 |         models=[model],
 24 |         freq=freq,
 25 |     )
 26 |     nf.fit(df=df)
 27 |     fcst_df = nf.predict()
 28 |     return fcst_df
 29 | 
 30 | 
 31 | class AutoNHITS(Forecaster):
 32 |     def __init__(
 33 |         self,
 34 |         alias: str = "AutoNHITS",
 35 |         num_samples: int = 10,
 36 |         backend: str = "optuna",
 37 |     ):
 38 |         self.alias = alias
 39 |         self.num_samples = num_samples
 40 |         self.backend = backend
 41 | 
 42 |     def forecast(
 43 |         self,
 44 |         df: pd.DataFrame,
 45 |         h: int,
 46 |         freq: str,
 47 |     ) -> pd.DataFrame:
 48 |         config = _AutoNHITS.get_default_config(h=h, backend="ray")
 49 |         config["scaler_type"] = tune.choice(["robust"])
 50 | 
 51 |         if self.backend == "optuna":
 52 |             config = _AutoNHITS._ray_config_to_optuna(config)
 53 |         fcst_df = run_neuralforecast_model(
 54 |             model=_AutoNHITS(
 55 |                 h=h,
 56 |                 alias=self.alias,
 57 |                 num_samples=self.num_samples,
 58 |                 backend=self.backend,
 59 |                 config=config,
 60 |             ),
 61 |             df=df,
 62 |             freq=freq,
 63 |         )
 64 |         return fcst_df
 65 | 
 66 | 
 67 | class AutoTFT(Forecaster):
 68 |     def __init__(
 69 |         self,
 70 |         alias: str = "AutoTFT",
 71 |         num_samples: int = 10,
 72 |         backend: str = "optuna",
 73 |     ):
 74 |         self.alias = alias
 75 |         self.num_samples = num_samples
 76 |         self.backend = backend
 77 | 
 78 |     def forecast(
 79 |         self,
 80 |         df: pd.DataFrame,
 81 |         h: int,
 82 |         freq: str,
 83 |     ) -> pd.DataFrame:
 84 |         config = _AutoTFT.get_default_config(h=h, backend="ray")
 85 |         config["scaler_type"] = tune.choice(["robust"])
 86 |         if self.backend == "optuna":
 87 |             config = _AutoTFT._ray_config_to_optuna(config)
 88 |         fcst_df = run_neuralforecast_model(
 89 |             model=_AutoTFT(
 90 |                 h=h,
 91 |                 alias=self.alias,
 92 |                 num_samples=self.num_samples,
 93 |                 backend=self.backend,
 94 |                 config=config,
 95 |             ),
 96 |             df=df,
 97 |             freq=freq,
 98 |         )
 99 |         return fcst_df
100 | 


--------------------------------------------------------------------------------
/experiments/foundation-time-series-arena/xiuhmolpilli/models/benchmarks/prophet.py:
--------------------------------------------------------------------------------
 1 | from copy import deepcopy
 2 | from typing import List
 3 | from threadpoolctl import threadpool_limits
 4 | 
 5 | import pandas as pd
 6 | from prophet import Prophet
 7 | 
 8 | from ..utils.parallel_forecaster import ParallelForecaster
 9 | from ..utils.forecaster import Forecaster
10 | 
11 | 
12 | class NixtlaProphet(Prophet, ParallelForecaster, Forecaster):
13 |     def __init__(
14 |         self,
15 |         alias: str = "Prophet",
16 |         *args,
17 |         **kwargs,
18 |     ):
19 |         super().__init__(*args, **kwargs)
20 |         self.alias = alias
21 | 
22 |     def __local_forecast(
23 |         self,
24 |         df: pd.DataFrame,
25 |         h: int,
26 |         freq: str,
27 |         quantiles: List[float] | None = None,
28 |     ) -> pd.DataFrame:
29 |         if quantiles is not None:
30 |             raise NotImplementedError
31 |         model = deepcopy(self)
32 |         model.fit(df=df)
33 |         future_df = model.make_future_dataframe(
34 |             periods=h,
35 |             include_history=False,
36 |             freq=freq,
37 |         )
38 |         fcst_df = model.predict(future_df)
39 |         fcst_df = fcst_df.rename({"yhat": self.alias}, axis=1)
40 |         fcst_df = fcst_df[["ds", self.alias]]
41 |         return fcst_df
42 | 
43 |     def _local_forecast(
44 |         self,
45 |         df: pd.DataFrame,
46 |         h: int,
47 |         freq: str,
48 |         quantiles: List[float] | None = None,
49 |     ) -> pd.DataFrame:
50 |         with threadpool_limits(limits=1):
51 |             return self.__local_forecast(
52 |                 df=df,
53 |                 h=h,
54 |                 freq=freq,
55 |                 quantiles=quantiles,
56 |             )
57 | 


--------------------------------------------------------------------------------
/experiments/foundation-time-series-arena/xiuhmolpilli/models/foundational/__init__.py:
--------------------------------------------------------------------------------
 1 | from .chronos import Chronos
 2 | from .lagllama import LagLlama
 3 | from .moirai import Moirai
 4 | from .timegpt import TimeGPT
 5 | from .timesfm import TimesFM
 6 | 
 7 | __all__ = [
 8 |     "Chronos",
 9 |     "LagLlama",
10 |     "Moirai",
11 |     "TimeGPT",
12 |     "TimesFM",
13 | ]
14 | 


--------------------------------------------------------------------------------
/experiments/foundation-time-series-arena/xiuhmolpilli/models/foundational/chronos.py:
--------------------------------------------------------------------------------
  1 | from typing import Iterable, List
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | import torch
  6 | from chronos import ChronosPipeline
  7 | from tqdm import tqdm
  8 | from utilsforecast.processing import make_future_dataframe
  9 | 
 10 | from ..utils.forecaster import Forecaster
 11 | 
 12 | 
 13 | class TimeSeriesDataset:
 14 |     def __init__(
 15 |         self,
 16 |         data: torch.Tensor,
 17 |         uids: Iterable,
 18 |         last_times: Iterable,
 19 |         batch_size: int,
 20 |     ):
 21 |         self.data = data
 22 |         self.uids = uids
 23 |         self.last_times = last_times
 24 |         self.batch_size = batch_size
 25 |         self.n_batches = len(data) // self.batch_size + (
 26 |             0 if len(data) % self.batch_size == 0 else 1
 27 |         )
 28 |         self.current_batch = 0
 29 | 
 30 |     @classmethod
 31 |     def from_df(cls, df: pd.DataFrame, batch_size: int):
 32 |         num_unique_ids = df["unique_id"].nunique()
 33 |         max_series_length = df["unique_id"].value_counts().max()
 34 |         padded_tensor = torch.full(
 35 |             size=(num_unique_ids, max_series_length),
 36 |             fill_value=torch.nan,
 37 |             dtype=torch.bfloat16,
 38 |         )  # type: ignore
 39 |         df_sorted = df.sort_values(by=["unique_id", "ds"])
 40 |         for idx, (_, group) in enumerate(df_sorted.groupby("unique_id")):
 41 |             series_length = len(group)
 42 |             padded_tensor[idx, -series_length:] = torch.tensor(
 43 |                 group["y"].values,
 44 |                 dtype=torch.bfloat16,
 45 |             )
 46 |         uids = df_sorted["unique_id"].unique()
 47 |         last_times = df_sorted.groupby("unique_id")["ds"].tail(1)
 48 |         return cls(padded_tensor, uids, last_times, batch_size)
 49 | 
 50 |     def __len__(self):
 51 |         return self.n_batches
 52 | 
 53 |     def make_future_dataframe(self, h: int, freq: str) -> pd.DataFrame:
 54 |         return make_future_dataframe(
 55 |             uids=self.uids,
 56 |             last_times=pd.to_datetime(self.last_times),
 57 |             h=h,
 58 |             freq=freq,
 59 |         )  # type: ignore
 60 | 
 61 |     def __iter__(self):
 62 |         self.current_batch = 0  # Reset for new iteration
 63 |         return self
 64 | 
 65 |     def __next__(self):
 66 |         if self.current_batch < self.n_batches:
 67 |             start_idx = self.current_batch * self.batch_size
 68 |             end_idx = start_idx + self.batch_size
 69 |             self.current_batch += 1
 70 |             return self.data[start_idx:end_idx]
 71 |         else:
 72 |             raise StopIteration
 73 | 
 74 | 
 75 | class Chronos(Forecaster):
 76 |     def __init__(
 77 |         self,
 78 |         repo_id: str = "amazon/chronos-t5-large",
 79 |         batch_size: int = 16,
 80 |         alias: str = "Chronos",
 81 |     ):
 82 |         self.repo_id = repo_id
 83 |         self.batch_size = batch_size
 84 |         self.alias = alias
 85 |         self.model = ChronosPipeline.from_pretrained(
 86 |             repo_id,
 87 |             device_map="auto",
 88 |             torch_dtype=torch.bfloat16,
 89 |         )
 90 | 
 91 |     def forecast(
 92 |         self,
 93 |         df: pd.DataFrame,
 94 |         h: int,
 95 |         freq: str,
 96 |     ) -> pd.DataFrame:
 97 |         dataset = TimeSeriesDataset.from_df(df, batch_size=self.batch_size)
 98 |         fcsts = [
 99 |             self.model.predict(batch, prediction_length=h) for batch in tqdm(dataset)
100 |         ]
101 |         fcst = torch.cat(fcsts)
102 |         fcst = fcst.numpy()
103 |         fcst_df = dataset.make_future_dataframe(h=h, freq=freq)
104 |         fcst_df[self.alias] = np.mean(fcst, axis=1).reshape(-1, 1)
105 |         return fcst_df
106 | 


--------------------------------------------------------------------------------
/experiments/foundation-time-series-arena/xiuhmolpilli/models/foundational/lagllama.py:
--------------------------------------------------------------------------------
 1 | from gluonts.torch.model.predictor import PyTorchPredictor
 2 | from lag_llama.gluon.estimator import LagLlamaEstimator
 3 | 
 4 | from ..utils.gluonts_forecaster import GluonTSForecaster
 5 | 
 6 | 
 7 | class LagLlama(GluonTSForecaster):
 8 |     def __init__(
 9 |         self,
10 |         repo_id: str = "time-series-foundation-models/Lag-Llama",
11 |         filename: str = "lag-llama.ckpt",
12 |         alias: str = "LagLlama",
13 |     ):
14 |         super().__init__(
15 |             repo_id=repo_id,
16 |             filename=filename,
17 |             alias=alias,
18 |         )
19 | 
20 |     def get_predictor(self, prediction_length: int) -> PyTorchPredictor:
21 |         ckpt = self.load()
22 |         estimator_args = ckpt["hyper_parameters"]["model_kwargs"]
23 |         # this context length is reported in the paper
24 |         context_length = 32
25 |         estimator = LagLlamaEstimator(
26 |             ckpt_path=self.checkpoint_path,
27 |             prediction_length=prediction_length,
28 |             context_length=context_length,
29 |             # estimator args
30 |             input_size=estimator_args["input_size"],
31 |             n_layer=estimator_args["n_layer"],
32 |             n_embd_per_head=estimator_args["n_embd_per_head"],
33 |             n_head=estimator_args["n_head"],
34 |             scaling=estimator_args["scaling"],
35 |             time_feat=estimator_args["time_feat"],
36 |         )
37 |         lightning_module = estimator.create_lightning_module()
38 |         transformation = estimator.create_transformation()
39 |         predictor = estimator.create_predictor(transformation, lightning_module)
40 |         return predictor
41 | 


--------------------------------------------------------------------------------
/experiments/foundation-time-series-arena/xiuhmolpilli/models/foundational/moirai.py:
--------------------------------------------------------------------------------
 1 | from gluonts.torch.model.predictor import PyTorchPredictor
 2 | from uni2ts.model.moirai import MoiraiForecast, MoiraiModule
 3 | 
 4 | from ..utils.gluonts_forecaster import GluonTSForecaster
 5 | 
 6 | 
 7 | class Moirai(GluonTSForecaster):
 8 |     def __init__(
 9 |         self,
10 |         repo_id: str = "Salesforce/moirai-1.0-R-large",
11 |         filename: str = "model.ckpt",
12 |         alias: str = "Moirai",
13 |     ):
14 |         super().__init__(
15 |             repo_id=repo_id,
16 |             filename=filename,
17 |             alias=alias,
18 |         )
19 | 
20 |     def get_predictor(self, prediction_length: int) -> PyTorchPredictor:
21 |         model = MoiraiForecast(
22 |             module=MoiraiModule.from_pretrained(self.repo_id),
23 |             prediction_length=prediction_length,
24 |             context_length=200,
25 |             patch_size="auto",
26 |             num_samples=100,
27 |             target_dim=1,
28 |             feat_dynamic_real_dim=0,
29 |             past_feat_dynamic_real_dim=0,
30 |         )
31 |         predictor = model.create_predictor(batch_size=32)
32 |         return predictor
33 | 


--------------------------------------------------------------------------------
/experiments/foundation-time-series-arena/xiuhmolpilli/models/foundational/timegpt.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pandas as pd
 4 | from dotenv import load_dotenv
 5 | from nixtla import NixtlaClient
 6 | from typing import Optional
 7 | from ..utils.forecaster import Forecaster
 8 | 
 9 | load_dotenv()
10 | 
11 | 
12 | class TimeGPT(Forecaster):
13 |     def __init__(
14 |         self,
15 |         api_key: str | None = None,
16 |         base_url: Optional[str] = None,
17 |         max_retries: int = 1,
18 |         model: str = "timegpt-1",
19 |         alias: str = "TimeGPT",
20 |     ):
21 |         self.api_key = api_key
22 |         self.base_url = base_url
23 |         self.max_retries = max_retries
24 |         self.model = model
25 |         self.alias = alias
26 | 
27 |     def _get_client(self) -> NixtlaClient:
28 |         if self.api_key is None:
29 |             api_key = os.environ["NIXTLA_API_KEY"]
30 |         else:
31 |             api_key = self.api_key
32 |         return NixtlaClient(
33 |             api_key=api_key,
34 |             base_url=self.base_url,
35 |             max_retries=self.max_retries,
36 |         )
37 | 
38 |     def forecast(
39 |         self,
40 |         df: pd.DataFrame,
41 |         h: int,
42 |         freq: str,
43 |     ) -> pd.DataFrame:
44 |         client = self._get_client()
45 |         fcst_df = client.forecast(
46 |             df=df,
47 |             h=h,
48 |             freq=freq,
49 |             model=self.model,
50 |         )
51 |         fcst_df["ds"] = pd.to_datetime(fcst_df["ds"])
52 |         fcst_df = fcst_df.rename(columns={"TimeGPT": self.alias})
53 |         return fcst_df
54 | 


--------------------------------------------------------------------------------
/experiments/foundation-time-series-arena/xiuhmolpilli/models/foundational/timesfm.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import timesfm
 3 | import torch
 4 | from paxml import checkpoints
 5 | 
 6 | from ..utils.forecaster import Forecaster
 7 | 
 8 | 
 9 | class TimesFM(Forecaster):
10 |     def __init__(
11 |         self,
12 |         repo_id: str = "google/timesfm-1.0-200m",
13 |         context_length: int = 512,
14 |         batch_size: int = 64,
15 |         alias: str = "TimesFM",
16 |     ):
17 |         self.repo_id = repo_id
18 |         self.context_length = context_length
19 |         self.batch_size = batch_size
20 |         self.alias = alias
21 | 
22 |     def get_predictor(
23 |         self,
24 |         prediction_length: int,
25 |     ) -> timesfm.TimesFm:
26 |         backend = "gpu" if torch.cuda.is_available() else "cpu"
27 |         tfm = timesfm.TimesFm(
28 |             context_len=self.context_length,
29 |             horizon_len=prediction_length,
30 |             input_patch_len=32,
31 |             output_patch_len=128,
32 |             num_layers=20,
33 |             model_dims=1280,
34 |             backend=backend,
35 |             per_core_batch_size=self.batch_size,
36 |         )
37 |         tfm.load_from_checkpoint(repo_id=self.repo_id)
38 |         return tfm
39 | 
40 |     def forecast(
41 |         self,
42 |         df: pd.DataFrame,
43 |         h: int,
44 |         freq: str,
45 |     ) -> pd.DataFrame:
46 |         predictor = self.get_predictor(prediction_length=h)
47 |         fcst_df = predictor.forecast_on_df(
48 |             inputs=df,
49 |             freq=freq,
50 |             value_name="y",
51 |             model_name=self.alias,
52 |             num_jobs=1,
53 |         )
54 |         fcst_df = fcst_df[["unique_id", "ds", self.alias]]
55 |         return fcst_df
56 | 


--------------------------------------------------------------------------------
/experiments/foundation-time-series-arena/xiuhmolpilli/models/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/experiments/foundation-time-series-arena/xiuhmolpilli/models/utils/__init__.py


--------------------------------------------------------------------------------
/experiments/foundation-time-series-arena/xiuhmolpilli/models/utils/forecaster.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import pandas as pd
 4 | from gluonts.time_feature.seasonality import get_seasonality as _get_seasonality
 5 | from tqdm import tqdm
 6 | from utilsforecast.processing import (
 7 |     backtest_splits,
 8 |     drop_index_if_pandas,
 9 |     join,
10 |     maybe_compute_sort_indices,
11 |     take_rows,
12 |     vertical_concat,
13 | )
14 | 
15 | 
16 | def get_seasonality(freq: str) -> int:
17 |     return _get_seasonality(freq, seasonalities={"D": 7})
18 | 
19 | 
20 | def maybe_convert_col_to_datetime(df: pd.DataFrame, col_name: str) -> pd.DataFrame:
21 |     if not pd.api.types.is_datetime64_any_dtype(df[col_name]):
22 |         df = df.copy()
23 |         df[col_name] = pd.to_datetime(df[col_name])
24 |     return df
25 | 
26 | 
27 | class Forecaster:
28 |     def forecast(
29 |         self,
30 |         df: pd.DataFrame,
31 |         h: int,
32 |         freq: str,
33 |     ) -> pd.DataFrame:
34 |         raise NotImplementedError
35 | 
36 |     def cross_validation(
37 |         self,
38 |         df: pd.DataFrame,
39 |         h: int,
40 |         freq: str,
41 |         n_windows: int = 1,
42 |         step_size: int | None = None,
43 |     ) -> pd.DataFrame:
44 |         df = maybe_convert_col_to_datetime(df, "ds")
45 |         # mlforecast cv code
46 |         results = []
47 |         sort_idxs = maybe_compute_sort_indices(df, "unique_id", "ds")
48 |         if sort_idxs is not None:
49 |             df = take_rows(df, sort_idxs)
50 |         splits = backtest_splits(
51 |             df,
52 |             n_windows=n_windows,
53 |             h=h,
54 |             id_col="unique_id",
55 |             time_col="ds",
56 |             freq=pd.tseries.frequencies.to_offset(freq),
57 |             step_size=h if step_size is None else step_size,
58 |         )
59 |         for _, (cutoffs, train, valid) in tqdm(enumerate(splits)):
60 |             if len(valid.columns) > 3:
61 |                 raise NotImplementedError(
62 |                     "Cross validation with exogenous variables is not yet supported."
63 |                 )
64 |             y_pred = self.forecast(
65 |                 df=train,
66 |                 h=h,
67 |                 freq=freq,
68 |             )
69 |             y_pred = join(y_pred, cutoffs, on="unique_id", how="left")
70 |             result = join(
71 |                 valid[["unique_id", "ds", "y"]],
72 |                 y_pred,
73 |                 on=["unique_id", "ds"],
74 |             )
75 |             if result.shape[0] < valid.shape[0]:
76 |                 raise ValueError(
77 |                     "Cross validation result produced less results than expected. "
78 |                     "Please verify that the frequency parameter (freq) matches your series' "
79 |                     "and that there aren't any missing periods."
80 |                 )
81 |             results.append(result)
82 |         out = vertical_concat(results)
83 |         out = drop_index_if_pandas(out)
84 |         first_out_cols = ["unique_id", "ds", "cutoff", "y"]
85 |         remaining_cols = [c for c in out.columns if c not in first_out_cols]
86 |         fcst_cv_df = out[first_out_cols + remaining_cols]
87 |         return fcst_cv_df
88 | 


--------------------------------------------------------------------------------
/experiments/foundation-time-series-arena/xiuhmolpilli/models/utils/gluonts_forecaster.py:
--------------------------------------------------------------------------------
  1 | from typing import Iterable, List, Any
  2 | 
  3 | import pandas as pd
  4 | import torch
  5 | from gluonts.dataset.pandas import PandasDataset
  6 | from gluonts.model.forecast import Forecast
  7 | from gluonts.torch.model.predictor import PyTorchPredictor
  8 | from huggingface_hub import hf_hub_download
  9 | from tqdm import tqdm
 10 | 
 11 | from .forecaster import Forecaster
 12 | 
 13 | 
 14 | def fix_freq(freq: str) -> str:
 15 |     # see https://github.com/awslabs/gluonts/pull/2462/files
 16 |     if len(freq) > 1 and freq.endswith("S"):
 17 |         return freq[:-1]
 18 |     return freq
 19 | 
 20 | 
 21 | def maybe_convert_col_to_float32(df: pd.DataFrame, col_name: str) -> pd.DataFrame:
 22 |     if df[col_name].dtype != "float32":
 23 |         df = df.copy()
 24 |         df[col_name] = df[col_name].astype("float32")
 25 |     return df
 26 | 
 27 | 
 28 | class GluonTSForecaster(Forecaster):
 29 |     def __init__(self, repo_id: str, filename: str, alias: str):
 30 |         self.repo_id = repo_id
 31 |         self.filename = filename
 32 |         self.alias = alias
 33 | 
 34 |     @property
 35 |     def checkpoint_path(self) -> str:
 36 |         return hf_hub_download(
 37 |             repo_id=self.repo_id,
 38 |             filename=self.filename,
 39 |         )
 40 | 
 41 |     @property
 42 |     def map_location(self) -> str:
 43 |         map_location = "cuda:0" if torch.cuda.is_available() else "cpu"
 44 |         return map_location
 45 | 
 46 |     def load(self) -> Any:
 47 |         return torch.load(
 48 |             self.checkpoint_path,
 49 |             map_location=self.map_location,
 50 |         )
 51 | 
 52 |     def get_predictor(self, prediction_length: int) -> PyTorchPredictor:
 53 |         raise NotImplementedError
 54 | 
 55 |     def gluonts_instance_fcst_to_df(
 56 |         self,
 57 |         fcst: Forecast,
 58 |         freq: str,
 59 |         model_name: str,
 60 |     ) -> pd.DataFrame:
 61 |         point_forecast = fcst.mean
 62 |         h = len(point_forecast)
 63 |         dates = pd.date_range(
 64 |             fcst.start_date.to_timestamp(),
 65 |             freq=freq,
 66 |             periods=h,
 67 |         )
 68 |         fcst_df = pd.DataFrame(
 69 |             {
 70 |                 "ds": dates,
 71 |                 "unique_id": fcst.item_id,
 72 |                 model_name: point_forecast,
 73 |             }
 74 |         )
 75 |         return fcst_df
 76 | 
 77 |     def gluonts_fcsts_to_df(
 78 |         self,
 79 |         fcsts: Iterable[Forecast],
 80 |         freq: str,
 81 |         model_name: str,
 82 |     ) -> pd.DataFrame:
 83 |         df = []
 84 |         for fcst in tqdm(fcsts):
 85 |             fcst_df = self.gluonts_instance_fcst_to_df(
 86 |                 fcst=fcst,
 87 |                 freq=freq,
 88 |                 model_name=model_name,
 89 |             )
 90 |             df.append(fcst_df)
 91 |         return pd.concat(df).reset_index(drop=True)
 92 | 
 93 |     def forecast(
 94 |         self,
 95 |         df: pd.DataFrame,
 96 |         h: int,
 97 |         freq: str,
 98 |     ) -> pd.DataFrame:
 99 |         df = maybe_convert_col_to_float32(df, "y")
100 |         gluonts_dataset = PandasDataset.from_long_dataframe(
101 |             df,
102 |             target="y",
103 |             item_id="unique_id",
104 |             timestamp="ds",
105 |             freq=fix_freq(freq),
106 |         )
107 |         predictor = self.get_predictor(prediction_length=h)
108 |         fcsts = predictor.predict(gluonts_dataset, num_samples=100)
109 |         fcst_df = self.gluonts_fcsts_to_df(
110 |             fcsts,
111 |             freq=freq,
112 |             model_name=self.alias,
113 |         )
114 |         return fcst_df
115 | 


--------------------------------------------------------------------------------
/experiments/foundation-time-series-arena/xiuhmolpilli/models/utils/parallel_forecaster.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from multiprocessing import Pool
 3 | from typing import Callable, List
 4 | 
 5 | import pandas as pd
 6 | 
 7 | 
 8 | class ParallelForecaster:
 9 |     def _process_group(
10 |         self,
11 |         df: pd.DataFrame,
12 |         func: Callable,
13 |         **kwargs,
14 |     ) -> pd.DataFrame:
15 |         uid = df["unique_id"].iloc[0]
16 |         _df = df.drop("unique_id", axis=1)
17 |         res_df = func(_df, **kwargs)
18 |         res_df.insert(0, "unique_id", uid)
19 |         return res_df
20 | 
21 |     def _apply_parallel(
22 |         self,
23 |         df_grouped: pd.DataFrame,
24 |         func: Callable,
25 |         **kwargs,
26 |     ) -> pd.DataFrame:
27 |         with Pool(os.cpu_count() - 1) as executor:
28 |             futures = [
29 |                 executor.apply_async(
30 |                     self._process_group,
31 |                     args=(df, func),
32 |                     kwds=kwargs,
33 |                 )
34 |                 for _, df in df_grouped
35 |             ]
36 |             results = [future.get() for future in futures]
37 |         return pd.concat(results)
38 | 
39 |     def _local_forecast(
40 |         self,
41 |         df: pd.DataFrame,
42 |         h: int,
43 |         freq: str,
44 |         quantiles: List[float] | None = None,
45 |     ) -> pd.DataFrame:
46 |         raise NotImplementedError
47 | 
48 |     def forecast(
49 |         self,
50 |         df: pd.DataFrame,
51 |         h: int,
52 |         freq: str,
53 |         quantiles: List[float] | None = None,
54 |     ) -> pd.DataFrame:
55 |         fcst_df = self._apply_parallel(
56 |             df.groupby("unique_id"),
57 |             self._local_forecast,
58 |             h=h,
59 |             freq=freq,
60 |             quantiles=quantiles,
61 |         )
62 |         return fcst_df
63 | 


--------------------------------------------------------------------------------
/experiments/foundation-time-series-arena/xiuhmolpilli/utils/download_data.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from concurrent.futures import ProcessPoolExecutor
 3 | 
 4 | import pandas as pd
 5 | 
 6 | logging.basicConfig(level=logging.INFO)
 7 | main_logger = logging.getLogger(__name__)
 8 | 
 9 | 
10 | def read_parquet_and_assign(uid, url):
11 |     df = pd.read_parquet(url)
12 |     df["unique_id"] = uid
13 |     df["ds"] = df["ds"].astype(str)
14 |     return df[["unique_id", "ds", "y"]]
15 | 
16 | 
17 | def download_data():
18 |     catalogue_splits = pd.read_csv("./data/series_catalogue_hourly.csv")
19 |     catalogue_df = catalogue_splits.query("dataset == 'moirai'")
20 |     catalogue_df["pandas_frequency"] = "H"
21 |     catalogue_df["seasonality"] = 24
22 |     catalogue_df["horizon"] = 24
23 |     catalogue_df = catalogue_df.query("split == 'test'")[
24 |         [
25 |             "unique_id",
26 |             "frequency",
27 |             "url",
28 |             "pandas_frequency",
29 |             "seasonality",
30 |             "horizon",
31 |         ]
32 |     ]
33 |     grouped_df = catalogue_df.groupby(["frequency", "pandas_frequency"])
34 |     for (frequency, pandas_frequency), df in grouped_df:
35 |         uids, urls = df["unique_id"].values, df["url"].values
36 |         main_logger.info(
37 |             f"frequency: {frequency}, pandas_frequency: {pandas_frequency}"
38 |         )
39 |         n_uids = len(uids)
40 |         main_logger.info(f"number of uids: {n_uids}")
41 |         max_workers = min(10, n_uids)
42 |         with ProcessPoolExecutor(max_workers=max_workers) as executor:
43 |             futures = [
44 |                 executor.submit(read_parquet_and_assign, uid, url)
45 |                 for uid, url in zip(uids, urls)
46 |             ]
47 |             results = [future.result() for future in futures]
48 |         main_logger.info("dataset read")
49 |         Y_df = pd.concat(results)
50 |         Y_df = Y_df.merge(
51 |             df.drop(columns="url"),
52 |             on="unique_id",
53 |             how="left",
54 |         )
55 |         # Y_df.to_parquet(f"./data/{frequency}_{pandas_frequency}.parquet")
56 |         Y_df.to_parquet(f"./data/filtered_datasets/moirai-data.parquet")
57 |         del Y_df
58 |         main_logger.info("dataset saved")
59 | 
60 | 
61 | if __name__ == "__main__":
62 |     download_data()
63 | 


--------------------------------------------------------------------------------
/experiments/foundation-time-series-arena/xiuhmolpilli/utils/filter_data.py:
--------------------------------------------------------------------------------
 1 | """
 2 | this module takes Nixtla's benchmarking data 
 3 | and filters it to prevent azureml from crashing
 4 | in the following cases:
 5 | - too short series, see https://learn.microsoft.com/en-us/azure/machine-learning/concept-automl-forecasting-methods?view=azureml-api-2#data-length-requirements
 6 | """
 7 | import logging
 8 | from pathlib import Path
 9 | 
10 | import fire
11 | import numpy as np
12 | import pandas as pd
13 | 
14 | main_logger = logging.getLogger(__name__)
15 | main_logger.setLevel(logging.INFO)
16 | 
17 | 
18 | def get_min_size_per_series(dataset_path: str) -> int:
19 |     if "Daily" in dataset_path or "Hourly" in dataset_path:
20 |         return 1_000
21 |     elif "Monthly" in dataset_path:
22 |         return 10 * 12
23 |     else:
24 |         return 1_000 // 7
25 | 
26 | 
27 | def filter_and_clean_dataset(
28 |     dataset_path: str,
29 |     max_series: int = 1_000,
30 |     random_seed: int = 420,
31 | ):
32 |     main_logger.info(f"Processing dataset {dataset_path}")
33 |     df = pd.read_parquet(dataset_path)
34 |     df = df.drop_duplicates(["unique_id", "ds"])  # type: ignore
35 |     df = df.sort_values(["unique_id", "ds"])
36 |     min_size_per_series = get_min_size_per_series(dataset_path)
37 |     df = (
38 |         df.groupby("unique_id")
39 |         .filter(lambda x: len(x) >= min_size_per_series)
40 |         .reset_index(drop=True)
41 |     )
42 |     uids = df["unique_id"].unique()  # type: ignore
43 |     if len(uids) > max_series:
44 |         np.random.seed(random_seed)
45 |         uids = np.random.choice(uids, max_series, replace=False)  # type: ignore
46 |         df = df.query("unique_id in @uids")  # type: ignore
47 |         main_logger.info(f"Filtering out {len(uids) - max_series} series")
48 |     n_series = len(df["unique_id"].unique())  # type: ignore
49 |     main_logger.info(f"Number of series: {n_series}")
50 |     if n_series == 0:
51 |         raise ValueError("No series left after filtering")
52 |     # finally we clean some strange dates
53 |     mask = df["ds"].str.endswith(":01")  # type: ignore
54 |     df.loc[mask, "ds"] = df.loc[mask, "ds"].str[:-3] + ":00"
55 |     # save the dataset
56 |     dataset_path = Path(dataset_path)  # type: ignore
57 |     filtered_dataset_path = dataset_path.parent / "filtered_datasets" / dataset_path.name  # type: ignore
58 |     filtered_dataset_path.parent.mkdir(exist_ok=True, parents=True)
59 |     df.to_parquet(filtered_dataset_path)
60 |     main_logger.info(f"Filtered dataset saved to {filtered_dataset_path}")
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     fire.Fire(filter_and_clean_dataset)
65 | 


--------------------------------------------------------------------------------
/experiments/foundation-time-series-arena/xiuhmolpilli/utils/logger_config.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | 
 4 | def setup_logger(logger_name, log_file=None):
 5 |     logger = logging.getLogger(logger_name)
 6 |     logger.setLevel(logging.INFO)
 7 |     formatter = logging.Formatter(
 8 |         "%(asctime)s,%(levelname)s,%(module)s,%(message)s",
 9 |         datefmt="%Y-%m-%d %H:%M:%S",
10 |     )
11 |     console_handler = logging.StreamHandler()
12 |     console_handler.setLevel(logging.INFO)
13 |     console_handler.setFormatter(formatter)
14 |     logger.addHandler(console_handler)
15 |     return logger
16 | 


--------------------------------------------------------------------------------
/experiments/lag-llama/Makefile:
--------------------------------------------------------------------------------
 1 | download_lag_llama_code:
 2 | 	@git clone https://github.com/time-series-foundation-models/lag-llama tempdir
 3 | 	@cp -R tempdir/data/ .
 4 | 	@cp -R tempdir/gluon_utils/ .
 5 | 	@cp -R tempdir/lag_llama/ .
 6 | 	@cp -R tempdir/requirements.txt lag-llama-requirements.txt
 7 | 	@rm -rf tempdir
 8 | 
 9 | download_lag_llama_model:
10 | 	@huggingface-cli download time-series-foundation-models/Lag-Llama lag-llama.ckpt --local-dir ./models/
11 | 


--------------------------------------------------------------------------------
/experiments/lag-llama/README.md:
--------------------------------------------------------------------------------
 1 | # LagLLama is 40% less accurate than a simple SeasonalNaive and 1000x slower.
 2 | 
 3 | We present a fully reproducible experiment showing that SeasonalNaive significantly outperforms LagLlama, a recently introduced open-source foundational model for time series forecasting (a deep learning architecture pre-trained on time series datasets). Specifically, **SeasonalNaive achieves 42%, 24%, and 16% better performance** in terms of MASE, MAPE, and CRPS respectively, and boasts **a 1,000x speed advantage**. These findings are based on an extensive analysis covering 105,289 unique time series from the M1, M3, M4, and Tourism datasets, which were omitted in the original LagLlama paper.
 4 | 
 5 | # Introduction
 6 | 
 7 | In the field of time series forecasting, recent developments have introduced foundational models such as LagLlama, which utilizes deep learning and extensive data for pretraining, aiming to enhance predictive performance and model complexity. LagLLama is to be praised as one of the first open-source foundational models. However, contrary to expectations, our analysis indicates that the traditional SeasonalNaive model, known for its straightforward approach of extending past seasonal trends into future predictions, outperforms LagLlama in terms of both accuracy and computational efficiency. 
 8 | 
 9 | ## Empirical Evaluation
10 | 
11 | The original paper uses 3,113 time series to assess the model performance. The original paper only reports CRPS and omits point forecast error metrics widely used in academia and industry, e.g. MASE and MAPE.
12 | 
13 | Our evaluation encompasses 105,289 unique time series from different datasets, including M1, M3, M4, and Tourism, covering yearly, quarterly, monthly, weekly, daily, and hourly frequencies. This diverse dataset selection allows for a robust assessment of the models across various time series characteristics and forecasting horizons. We also reproduce results for Pedestrian Counts and Weather originally included in the paper/code to show that we are running LagLlama correctly. 
14 | 
15 | ## Results
16 | 
17 | The results are summarized in the following table, highlighting the performance metrics of MASE, MAPE, CRPS, and TIME (measured in seconds). The best results are indicated in **bold** for easy reference.
18 | 
19 | <img width="953" alt="image" src="https://github.com/Nixtla/nixtla/assets/10517170/8e65338d-930e-4837-8bf5-2e7aeddad5cc">
20 | 
21 | 
22 | ## Reproducibility
23 | 
24 | To ensure the reproducibility of our findings, the experiments were conducted on an AWS g5.4xlarge GPU instance equipped with 16 vCPUs, 64 GiB of RAM, and an NVIDIA A10G Tensor Core GPU (24 GiB). The complete code can be found in this repo.
25 | 
26 | ### Instructions
27 | 
28 | 1. Create a python environment using:
29 | ```
30 | mamba env create -f environment.yml
31 | conda activate lag-llama
32 | ```
33 | 
34 | 2. Add lag-llama code to your environment
35 | 
36 | ```
37 | make download_lag_llama_code
38 | ```
39 | 
40 | 5. Download lag-llama model
41 | 
42 | ```
43 | make download_lag_llama_model
44 | ```
45 | 
46 | 4. Install lag-llama requirements
47 | 
48 | ```
49 | pip install -r lag-llama-requirements.txt
50 | ```
51 | 
52 | 5. Run complete experiments reported in the table
53 | 
54 | ```
55 | python -m src.main
56 | ```
57 | 
58 | ### References
59 | - **Lag-Llama Paper**: [Towards Foundation Models for Probabilistic Time Series Forecasting](https://arxiv.org/abs/2310.08278)
60 | - **SeasonalNaive Implementation**: [GitHub Repository](https://github.com/nixtla/statsforecast/)
61 | - **CRPS Replication Note**: The CRPS performance for `LagLlama` is replicated from the model's publicly available [Colab notebook](https://colab.research.google.com/drive/13HHKYL_HflHBKxDWycXgIUAHSeHRR5eo?usp=sharing), ensuring a fair comparison.
62 | 


--------------------------------------------------------------------------------
/experiments/lag-llama/environment.yml:
--------------------------------------------------------------------------------
 1 | name: lag-llama
 2 | channels:
 3 |   - conda-forge
 4 |   - defaults
 5 |   - anaconda
 6 | dependencies:
 7 |   - jupyterlab
 8 |   - pip
 9 |   - python=3.10
10 |   - pip:
11 |     - datasetsforecast
12 |     - fire
13 |     - huggingface_hub[cli]
14 |     - neuralforecast
15 |     - orjson
16 |     - statsforecast
17 |     - utilsforecast
18 | 
19 | 


--------------------------------------------------------------------------------
/experiments/lag-llama/src/lag_llama_pipeline.py:
--------------------------------------------------------------------------------
  1 | from time import time
  2 | from typing import Iterable, List, Tuple
  3 | 
  4 | import fire
  5 | import pandas as pd
  6 | import torch
  7 | from gluonts.dataset import Dataset
  8 | from gluonts.model.forecast import Forecast
  9 | from gluonts.torch.model.predictor import PyTorchPredictor
 10 | from tqdm import tqdm
 11 | 
 12 | from lag_llama.gluon.estimator import LagLlamaEstimator
 13 | from src.utils import ExperimentHandler
 14 | 
 15 | 
 16 | def get_lag_llama_predictor(
 17 |     prediction_length: int, models_dir: str
 18 | ) -> PyTorchPredictor:
 19 |     model_path = f"{models_dir}/lag-llama.ckpt"
 20 |     map_location = torch.device("cuda:0") if torch.cuda.is_available() else "cpu"
 21 |     if map_location == "cpu":
 22 |         raise ValueError("cpu is not supported in lagllama (there is a bug)")
 23 |     ckpt = torch.load(model_path, map_location=map_location)
 24 |     estimator_args = ckpt["hyper_parameters"]["model_kwargs"]
 25 |     # this context length is reported in the paper
 26 |     context_length = 32
 27 |     estimator = LagLlamaEstimator(
 28 |         ckpt_path=model_path,
 29 |         prediction_length=prediction_length,
 30 |         context_length=context_length,
 31 |         # estimator args
 32 |         input_size=estimator_args["input_size"],
 33 |         n_layer=estimator_args["n_layer"],
 34 |         n_embd_per_head=estimator_args["n_embd_per_head"],
 35 |         n_head=estimator_args["n_head"],
 36 |         scaling=estimator_args["scaling"],
 37 |         time_feat=estimator_args["time_feat"],
 38 |     )
 39 |     lightning_module = estimator.create_lightning_module()
 40 |     transformation = estimator.create_transformation()
 41 |     predictor = estimator.create_predictor(transformation, lightning_module)
 42 |     return predictor
 43 | 
 44 | 
 45 | def gluonts_instance_fcst_to_df(
 46 |     fcst: Forecast,
 47 |     quantiles: List[float],
 48 |     model_name: str,
 49 | ) -> pd.DataFrame:
 50 |     point_forecast = fcst.mean
 51 |     h = len(point_forecast)
 52 |     dates = pd.date_range(
 53 |         fcst.start_date.to_timestamp(),
 54 |         freq=fcst.freq,
 55 |         periods=h,
 56 |     )
 57 |     fcst_df = pd.DataFrame(
 58 |         {
 59 |             "ds": dates,
 60 |             "unique_id": fcst.item_id,
 61 |             model_name: point_forecast,
 62 |         }
 63 |     )
 64 |     for q in quantiles:
 65 |         fcst_df[f"{model_name}-q-{q}"] = fcst.quantile(q)
 66 |     return fcst_df
 67 | 
 68 | 
 69 | def gluonts_fcsts_to_df(
 70 |     fcsts: Iterable[Forecast],
 71 |     quantiles: List[float],
 72 |     model_name: str,
 73 | ) -> pd.DataFrame:
 74 |     df = []
 75 |     for fcst in tqdm(fcsts):
 76 |         fcst_df = gluonts_instance_fcst_to_df(fcst, quantiles, model_name)
 77 |         df.append(fcst_df)
 78 |     return pd.concat(df).reset_index(drop=True)
 79 | 
 80 | 
 81 | def run_lag_llama(
 82 |     gluonts_dataset: Dataset,
 83 |     horizon: int,
 84 |     quantiles: List[float],
 85 |     models_dir: str,
 86 | ) -> Tuple[pd.DataFrame, float, str]:
 87 |     init_time = time()
 88 |     predictor = get_lag_llama_predictor(horizon, models_dir)
 89 |     fcsts = predictor.predict(gluonts_dataset, num_samples=100)
 90 |     model_name = "LagLlama"
 91 |     fcsts_df = gluonts_fcsts_to_df(
 92 |         fcsts,
 93 |         quantiles=quantiles,
 94 |         model_name=model_name,
 95 |     )
 96 |     total_time = time() - init_time
 97 |     return fcsts_df, total_time, model_name
 98 | 
 99 | 
100 | def main(dataset: str):
101 |     exp = ExperimentHandler(dataset)
102 |     fcst_df, total_time, model_name = run_lag_llama(
103 |         gluonts_dataset=exp.gluonts_train_dataset,
104 |         horizon=exp.horizon,
105 |         quantiles=exp.quantiles,
106 |         models_dir=exp.models_dir,
107 |     )
108 |     exp._save_results(fcst_df, total_time, model_name)
109 | 
110 | 
111 | if __name__ == "__main__":
112 |     fire.Fire(main)
113 | 


--------------------------------------------------------------------------------
/experiments/lag-llama/src/main.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import subprocess
 3 | 
 4 | import pandas as pd
 5 | 
 6 | from src.utils import ExperimentHandler
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | logger.setLevel(logging.INFO)
10 | 
11 | not_included_datasets = [
12 |     "m1_yearly",
13 |     "m1_quarterly",
14 |     "m1_monthly",
15 |     "m3_yearly",
16 |     "m3_quarterly",
17 |     "m3_monthly",
18 |     "m3_other",
19 |     "m4_yearly",
20 |     "m4_quarterly",
21 |     "m4_monthly",
22 |     "m4_weekly",
23 |     "m4_daily",
24 |     "m4_hourly",
25 |     "tourism_yearly",
26 |     "tourism_quarterly",
27 |     "tourism_monthly",
28 | ]
29 | 
30 | test_paper_datasets = [
31 |     "pedestrian_counts",
32 |     "weather",
33 | ]
34 | 
35 | datasets = {
36 |     "not_included": not_included_datasets,
37 |     "test_set": test_paper_datasets,
38 | }
39 | 
40 | 
41 | def evaluate():
42 |     eval_df = []
43 |     prefix_process = ["python", "-m"]
44 | 
45 |     for name_group, groups in datasets.items():
46 |         for dataset in groups:
47 |             logger.info(f"Evaluating {dataset}...")
48 |             suffix_process = ["--dataset", dataset]
49 |             process = (
50 |                 lambda middle_process: prefix_process + middle_process + suffix_process
51 |             )
52 |             # running statsforecast and lagllama in separated
53 |             # processes because gluonts sets multiprocessing context
54 |             # see: https://github.com/awslabs/gluonts/blob/dev/src/gluonts/torch/__init__.py
55 |             logger.info("Running SeasonalNaive")
56 |             subprocess.run(process(["src.statsforecast_pipeline"]))
57 |             logger.info("Running LagLLama")
58 |             subprocess.run(process(["src.lag_llama_pipeline"]))
59 |             logger.info("Running dataset evaluation")
60 |             exp = ExperimentHandler(dataset)
61 |             eval_dataset_df = exp.evaluate_models(["LagLlama", "SeasonalNaive"])
62 |             eval_dataset_df.insert(0, "paper", name_group)
63 |             eval_df.append(eval_dataset_df)
64 |     eval_df = pd.concat(eval_df).reset_index(drop=True)
65 |     exp.save_dataframe(eval_df, "complete-results.csv")
66 | 
67 | 
68 | if __name__ == "__main__":
69 |     evaluate()
70 | 


--------------------------------------------------------------------------------
/experiments/lag-llama/src/statsforecast_pipeline.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from time import time
 3 | from typing import List, Tuple
 4 | 
 5 | import fire
 6 | import pandas as pd
 7 | from statsforecast import StatsForecast
 8 | from statsforecast.models import SeasonalNaive
 9 | 
10 | from src.utils import ExperimentHandler
11 | 
12 | 
13 | def run_statsforecast(
14 |     train_df: pd.DataFrame,
15 |     horizon: int,
16 |     freq: str,
17 |     seasonality: int,
18 |     level: List[int],
19 | ) -> Tuple[pd.DataFrame, float, str]:
20 |     os.environ["NIXTLA_ID_AS_COL"] = "true"
21 |     models = [SeasonalNaive(season_length=seasonality)]
22 |     init_time = time()
23 |     sf = StatsForecast(
24 |         models=models,
25 |         freq=freq,
26 |         n_jobs=-1,
27 |     )
28 |     fcsts_df = sf.forecast(df=train_df, h=horizon, level=level)
29 |     total_time = time() - init_time
30 |     model_name = repr(models[0])
31 |     return fcsts_df, total_time, model_name
32 | 
33 | 
34 | def main(dataset: str):
35 |     exp = ExperimentHandler(dataset)
36 |     fcst_df, total_time, model_name = run_statsforecast(
37 |         train_df=exp.train_df,
38 |         horizon=exp.horizon,
39 |         freq=exp.freq,
40 |         seasonality=exp.seasonality,
41 |         level=exp.level,
42 |     )
43 |     fcst_df = exp._fcst_from_level_to_quantiles(fcst_df, model_name)
44 |     exp._save_results(fcst_df, total_time, model_name)
45 | 
46 | 
47 | if __name__ == "__main__":
48 |     fire.Fire(main)
49 | 


--------------------------------------------------------------------------------
/experiments/one-billion/README.md:
--------------------------------------------------------------------------------
 1 | # Forecasting at Scale: One Billion (1e9) Time Series with TimeGPT ⚡📈
 2 | 
 3 | Imagine you're tasked with forecasting for **one billion unique time series**—ranging from retail sales across thousands of stores to sensor data from millions of IoT devices. It's a monumental challenge, requiring not just statistical modeling but also cutting-edge tools to handle the scale and complexity of the data.
 4 | 
 5 | This project is a blueprint for scaling such a task, utilizing **Nixtla's foundation models for time series forecasting** and orchestrating the process efficiently using Python and AWS S3. Here's how you can tackle this kind of project.
 6 | 
 7 | ## The Challenge 🎯
 8 | 
 9 | The goal is simple: forecast the future for **one billion different time series**, but the constraints are anything but simple. How do you handle the storage of this data? 🗄️ How do you parallelize the computation efficiently? 💻 And finally, how do you produce results quickly enough to be useful in decision-making? ⏳
10 | 
11 | ### Enter Foundation Models for Time Series 🚀
12 | 
13 | **Nixtla** offers **TimeGPT** through an API that leverages foundation models capable of handling large-scale forecasting problems. These models are designed for flexibility and speed 🏎️, making them ideal for scenarios where you're dealing with an enormous volume of data and need results at a high cadence. ⚡
14 | 
15 | ## Results 📊
16 | 
17 | | 📈 **Number of Series** | Number of Processes | ⏳ **CPU Time (hours)** |
18 | |:-----------------------:|:-------------------:|:------------------:|
19 | | 1e9                     | 1                | 5.5 |
20 | | 1e9 | 5 | 1.1 |
21 | 
22 | ## Running the Project 🛠️
23 | 
24 | ### Installation 🧩
25 | 
26 | 1. Install the required Python packages:
27 |    ```bash
28 |    pip install -r requirements.txt
29 |    ```
30 | 
31 | 2. Configure AWS credentials so the script can interact with S3:
32 |    ```bash
33 |    aws configure
34 |    ```
35 | 
36 | ### Usage 🏃‍♂️
37 | 
38 | To generate forecasts, you simply run the following command. Adjust the parameters as needed:
39 | 
40 | ```bash
41 | python main.py --bucket <your-bucket-name> --prefix <your-s3-prefix> --n_partitions 1000 --series_per_partition 1000000 --n_jobs 5
42 | ```
43 | 
44 | - **`bucket`**: The S3 bucket where the data is stored.
45 | - **`prefix`**: The path inside the S3 bucket where the input and output data is stored.
46 | - **`n_partitions`**: The number of partitions to break the task into.
47 | - **`series_per_partition`**: The number of time series in each partition.
48 | - **`n_jobs`**: The number of processes to run in parallel.
49 | 
50 | ### What Happens Behind the Scenes 🔍
51 | 
52 | The code will:
53 | 
54 | 1. Check if the forecast for each partition has already been generated. ✅
55 | 2. Generate new time series data for each partition. 🧬
56 | 3. Use Nixtla’s API to compute forecasts for each partition. 🔮
57 | 4. Save the results and the time taken to S3. 💾
58 | 
59 | ## Scaling to Billions 🚀
60 | 
61 | This approach is designed to **scale**—whether you’re forecasting for **one million** or **one billion** series. By partitioning the data, processing it in parallel 🧠, and leveraging foundation models like those provided by Nixtla, you can handle even the most massive forecasting tasks efficiently. ⚙️
62 | 
63 | ### Final Thoughts 💡
64 | 
65 | Forecasting at scale is no easy feat, but with the right tools, it’s entirely achievable. This project demonstrates how modern time series forecasting techniques can be applied to massive datasets in an efficient, scalable way. By leveraging AWS infrastructure, foundation models, and clever parallel processing, you can forecast the future for billions of unique data series—**unlocking insights** that can power decision-making at an unprecedented scale. 🌍✨
66 | 


--------------------------------------------------------------------------------
/experiments/one-billion/requirements.txt:
--------------------------------------------------------------------------------
1 | boto3
2 | fire
3 | nixtla>=0.6.0
4 | nixtlats==0.5.2
5 | pandas
6 | pyarrow
7 | s3fs
8 | tqdm
9 | 


--------------------------------------------------------------------------------
/experiments/prophet/.env.example:
--------------------------------------------------------------------------------
1 | TIMEGPT_TOKEN=
2 | 


--------------------------------------------------------------------------------
/experiments/prophet/Makefile:
--------------------------------------------------------------------------------
 1 | SRC_DIR := data
 2 | EXCLUDE_STRINGS := catalogue
 3 | TS_FILES := $(filter-out $(wildcard $(SRC_DIR)/*$(foreach str,$(EXCLUDE_STRINGS),*$(str)*)), $(wildcard $(SRC_DIR)/*.parquet))
 4 | 
 5 | evaluate: .require-method
 6 | 	@echo "Evaluation for $${method}..."
 7 | 	@for file in $(TS_FILES); do \
 8 | 		echo $$file; \
 9 | 		python -m src.$${method}_exp --file $$file; \
10 | 	done
11 | 	@echo "Evaluation for $${method} complete."
12 | 
13 | summarize_results:
14 | 	@echo "Summarize results..."
15 | 	@python -m src.results_summary --dir ./data/results/
16 | 	@echo "Summarize results complete."
17 | 
18 | .require-method:
19 | ifndef method
20 | 	$(error method is required)
21 | endif
22 | 


--------------------------------------------------------------------------------
/experiments/prophet/environment.yml:
--------------------------------------------------------------------------------
 1 | name: timegpt-benchmark
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - jupyterlab
 6 |   - prophet
 7 |   - pyspark>=3.3
 8 |   - python=3.10
 9 |   - pip:
10 |     - fire
11 |     - nixtla
12 |     - python-dotenv
13 |     - statsforecast
14 |     - utilsforecast
15 |     - tabulate
16 | 
17 | 


--------------------------------------------------------------------------------
/experiments/prophet/src/results_summary.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | 
  3 | import fire
  4 | from numpy import column_stack
  5 | import pandas as pd
  6 | 
  7 | 
  8 | def read_kind_results(kind: str, dir: str):
  9 |     files = list(Path(dir).rglob(f"*{kind}.parquet"))
 10 |     df = pd.concat(
 11 |         [pd.read_parquet(file).assign(file=str(file).split("/")[-2]) for file in files],
 12 |         ignore_index=True,
 13 |     )
 14 |     return df
 15 | 
 16 | 
 17 | def summarize_results_per_file(metrics_df: pd.DataFrame):
 18 |     metrics_df_per_freq = metrics_df.groupby(["file", "metric", "model"]).mean(
 19 |         numeric_only=True
 20 |     )
 21 |     metrics_df_per_freq = metrics_df_per_freq.reset_index()
 22 |     metrics_df_per_freq = metrics_df_per_freq.query(
 23 |         "model in ['Prophet', 'SeasonalNaive', 'TimeGPT']"
 24 |     )
 25 |     models = metrics_df_per_freq["model"].unique()
 26 |     metrics_df_per_freq = pd.pivot(
 27 |         metrics_df_per_freq,
 28 |         index=["file", "metric"],
 29 |         columns="model",
 30 |         values="value",
 31 |     ).reset_index()
 32 |     for model in models:
 33 |         if model == "SeasonalNaive":
 34 |             continue
 35 |         metrics_df_per_freq[model] /= metrics_df_per_freq["SeasonalNaive"]
 36 |     metrics_df_per_freq["SeasonalNaive"] /= metrics_df_per_freq["SeasonalNaive"]
 37 |     return metrics_df_per_freq
 38 | 
 39 | 
 40 | def prepare_results(df: pd.DataFrame):
 41 |     def bold_best(row):
 42 |         row = row.round(3)
 43 |         models = row.drop(columns=["file", "metric"]).columns
 44 |         best_model = row[models].idxmin(axis=1).item()
 45 |         row[best_model] = "**" + str(row[best_model].item()) + "**"
 46 |         return row
 47 | 
 48 |     df_bolded = df.groupby(["file", "metric"]).apply(bold_best)
 49 |     df_bolded = df_bolded.reset_index(drop=True)
 50 |     return df_bolded
 51 | 
 52 | 
 53 | def write_to_readme(content: str):
 54 |     with open("README.md", "r") as file:
 55 |         readme_content = file.readlines()
 56 |     start_index = -1
 57 |     end_index = -1
 58 |     for i, line in enumerate(readme_content):
 59 |         if line.strip().lower() == "## results":
 60 |             start_index = i + 1
 61 |         if start_index != -1 and line.strip() == "<end>":
 62 |             end_index = i
 63 |             break
 64 | 
 65 |     if start_index != -1 and end_index != -1:
 66 |         readme_content = (
 67 |             readme_content[: start_index + 1]
 68 |             + [content + "\n"]
 69 |             + readme_content[end_index:]
 70 |         )
 71 |     else:
 72 |         print("Results section not found or improperly formatted")
 73 | 
 74 |     # Write the changes back to the README
 75 |     with open("README.md", "w") as file:
 76 |         file.writelines(readme_content)
 77 | 
 78 | 
 79 | def summarize_results(dir: str):
 80 |     metrics_df = read_kind_results("metrics", dir)
 81 |     summary_df = read_kind_results("summary", dir)
 82 |     summary_df = (
 83 |         summary_df.set_index(["file", "frequency"])
 84 |         .reset_index()
 85 |         .round(3)
 86 |         .sort_values("frequency")
 87 |     )
 88 |     no_int_cols = ["file", "frequency", "mean", "std"]
 89 |     for col in summary_df.columns:
 90 |         if col not in no_int_cols:
 91 |             summary_df[col] = summary_df[col].astype(int)
 92 |     summary_df = summary_df.to_markdown(index=False, intfmt=",", floatfmt=",.3f")
 93 |     time_df = read_kind_results("time", dir)
 94 |     time_df = time_df.assign(metric="time").rename(columns={"time": "value"})
 95 |     metrics_df_per_file = summarize_results_per_file(metrics_df)
 96 |     time_df = summarize_results_per_file(time_df)
 97 |     eval_df = pd.concat([metrics_df_per_file, time_df], ignore_index=True)
 98 |     eval_df = prepare_results(eval_df)[
 99 |         ["file", "metric", "TimeGPT", "Prophet", "SeasonalNaive"]
100 |     ]
101 |     n_files = eval_df["file"].nunique()
102 |     eval_df = eval_df.to_markdown(
103 |         index=False,
104 |         colalign=2 * ["left"] + (eval_df.shape[1] - 2) * ["right"],
105 |     )
106 |     markdown_lines = eval_df.split("\n")
107 |     custom_separator = markdown_lines[1].replace(":", "-")
108 |     for i in range(4, len(markdown_lines) + n_files - 1, 4):
109 |         markdown_lines.insert(i + 1, custom_separator)
110 |     markdown_lines.insert(
111 |         0,
112 |         ("\n### Data Description\n\n" f"{summary_df}\n\n" "### Performance\n\n"),
113 |     )
114 |     eval_df = "\n".join(markdown_lines)
115 |     write_to_readme(eval_df)
116 | 
117 | 
118 | if __name__ == "__main__":
119 |     fire.Fire(summarize_results)
120 | 


--------------------------------------------------------------------------------
/experiments/prophet/src/statsforecast_exp.py:
--------------------------------------------------------------------------------
 1 | from time import time
 2 | 
 3 | import fire
 4 | import pandas as pd
 5 | from statsforecast import StatsForecast
 6 | from statsforecast.models import SeasonalNaive, ZeroModel
 7 | 
 8 | from src.tools import ExperimentHandler
 9 | 
10 | 
11 | def evaluate_experiment(file: str):
12 |     exp_handler = ExperimentHandler(file=file, method="statsforecast")
13 |     Y_df, freq, pandas_freq, h, seasonality = exp_handler.read_data()
14 |     models = [
15 |         SeasonalNaive(season_length=seasonality),
16 |         ZeroModel(),
17 |     ]
18 |     # even though statsforecast can handle multiple models, we only use one
19 |     # at a time to calculate time for each
20 |     eval_df = []
21 |     total_time_df = []
22 |     for model in models:
23 |         model_name = repr(model)
24 |         print(model_name)
25 |         sf = StatsForecast(
26 |             models=[model],
27 |             freq=pandas_freq,
28 |             n_jobs=-1,
29 |         )
30 |         start = time()
31 |         Y_hat_df_model = sf.cross_validation(
32 |             df=Y_df,
33 |             h=h,
34 |             n_windows=1,
35 |         ).reset_index()
36 |         total_time = time() - start
37 |         print(total_time)
38 |         # evaluation
39 |         eval_df_model, total_time_df_model = exp_handler.evaluate_model(
40 |             Y_hat_df=Y_hat_df_model,
41 |             model_name=model_name,
42 |             total_time=total_time,
43 |         )
44 |         eval_df.append(eval_df_model.set_index(["metric", "unique_id"]))
45 |         total_time_df.append(total_time_df_model)
46 |     eval_df = pd.concat(eval_df, axis=1).reset_index()
47 |     total_time_df = pd.concat(total_time_df)
48 |     exp_handler.save_results(
49 |         freq=freq,
50 |         eval_df=eval_df,
51 |         total_time_df=total_time_df,
52 |         df=Y_df,
53 |     )
54 | 
55 | 
56 | if __name__ == "__main__":
57 |     fire.Fire(evaluate_experiment)
58 | 


--------------------------------------------------------------------------------
/experiments/prophet/src/timegpt_exp.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from time import time
 3 | 
 4 | import fire
 5 | from dotenv import load_dotenv
 6 | from nixtla import NixtlaClient
 7 | 
 8 | from src.tools import ExperimentHandler
 9 | 
10 | load_dotenv()
11 | 
12 | 
13 | def evaluate_experiment(file: str):
14 |     exp_handler = ExperimentHandler(file=file, method="timegpt")
15 |     model_name = "TimeGPT"
16 |     print(model_name)
17 |     # timegpt does not need the full history to
18 |     # make zero shot predictions
19 |     Y_df, freq, pandas_freq, h, seasonality = exp_handler.read_data(
20 |         max_insample_length=300
21 |     )
22 |     size_df = sys.getsizeof(Y_df) / (1024 * 1024)
23 |     max_partition_size_mb = 20
24 |     num_partitions = int(size_df / max_partition_size_mb) + 1
25 |     timegpt = NixtlaClient(
26 |         base_url="https://timegpt-endpoint.eastus.inference.ml.azure.com/",
27 |         max_retries=1,
28 |     )
29 |     start = time()
30 |     Y_hat_df = timegpt.cross_validation(
31 |         df=Y_df,
32 |         h=h,
33 |         n_windows=1,
34 |         freq=pandas_freq,
35 |         num_partitions=num_partitions,
36 |     )
37 |     total_time = time() - start
38 |     print(total_time)
39 |     # evaluation
40 |     eval_df, total_time_df = exp_handler.evaluate_model(
41 |         Y_hat_df=Y_hat_df,
42 |         model_name=model_name,
43 |         total_time=total_time,
44 |     )
45 |     exp_handler.save_results(
46 |         freq=freq,
47 |         eval_df=eval_df,
48 |         total_time_df=total_time_df,
49 |     )
50 | 
51 | 
52 | if __name__ == "__main__":
53 |     fire.Fire(evaluate_experiment)
54 | 


--------------------------------------------------------------------------------
/experiments/prophet/src/tools.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Optional, Tuple
  3 | 
  4 | import pandas as pd
  5 | from utilsforecast.evaluation import evaluate
  6 | from utilsforecast.losses import mae, rmse
  7 | 
  8 | 
  9 | class ExperimentHandler:
 10 |     def __init__(self, file: str, method: str):
 11 |         self.file = file
 12 |         self.method = method
 13 | 
 14 |     @staticmethod
 15 |     def get_parameter(parameter: str, df: pd.DataFrame):
 16 |         parameter = df[parameter].unique()
 17 |         if len(parameter) > 1:
 18 |             raise ValueError(f"{parameter} is not unique: {parameter}")
 19 |         return parameter[0]
 20 | 
 21 |     def read_data(
 22 |         self,
 23 |         max_insample_length: int = 3_000,
 24 |     ) -> Tuple[pd.DataFrame, str, str, int, int]:
 25 |         df = pd.read_parquet(self.file)
 26 |         Y_df = df[["unique_id", "ds", "y"]].drop_duplicates(["unique_id", "ds"])
 27 |         Y_df = Y_df.sort_values(["unique_id", "ds"])
 28 |         Y_df = Y_df.groupby("unique_id").tail(
 29 |             max_insample_length
 30 |         )  # take only last 3_000 rows
 31 |         Y_df["ds"] = Y_df["ds"].str.replace(":01$", ":00", regex=True)
 32 |         freq = self.get_parameter("frequency", df)
 33 |         pandas_freq = self.get_parameter("pandas_frequency", df)
 34 |         h = self.get_parameter("horizon", df)
 35 |         seasonality = self.get_parameter("seasonality", df)
 36 |         return Y_df, freq, pandas_freq, int(h), int(seasonality)
 37 | 
 38 |     def evaluate_model(
 39 |         self,
 40 |         Y_hat_df: pd.DataFrame,
 41 |         model_name: str,
 42 |         total_time: float,
 43 |     ):
 44 |         if "cutoff" in Y_hat_df.columns:
 45 |             Y_hat_df = Y_hat_df.drop(columns="cutoff")
 46 |         eval_df = evaluate(
 47 |             df=Y_hat_df,
 48 |             metrics=[rmse, mae],
 49 |         )
 50 |         total_time_df = pd.DataFrame({"model": [model_name], "time": [total_time]})
 51 |         return eval_df, total_time_df
 52 | 
 53 |     @staticmethod
 54 |     def summarize_df(df: pd.DataFrame):
 55 |         n_unique_ids = df["unique_id"].nunique()
 56 |         mean_y = df["y"].mean()
 57 |         std_y = df["y"].std()
 58 |         lengths = df.groupby("unique_id").size()
 59 |         min_length = lengths.min()
 60 |         max_length = lengths.max()
 61 |         n_obs = len(df)
 62 |         summary = {
 63 |             "n_series": n_unique_ids,
 64 |             "mean": mean_y,
 65 |             "std": std_y,
 66 |             "min_length": min_length,
 67 |             "max_length": max_length,
 68 |             "n_obs": n_obs,
 69 |         }
 70 |         summary_df = pd.DataFrame.from_dict(summary, orient="index")
 71 |         summary_df = summary_df.transpose()
 72 |         return summary_df
 73 | 
 74 |     def save_results(
 75 |         self,
 76 |         freq: str,
 77 |         eval_df: pd.DataFrame,
 78 |         total_time_df: pd.DataFrame,
 79 |         df: Optional[pd.DataFrame] = None,
 80 |     ):
 81 |         eval_df["frequency"] = freq
 82 |         eval_df = eval_df.melt(
 83 |             id_vars=["frequency", "metric", "unique_id"],
 84 |             var_name="model",
 85 |             value_name="value",
 86 |         )
 87 |         total_time_df["frequency"] = freq
 88 |         dir = self.file.split("/")[-1].replace(".parquet", "")
 89 |         dir = f"./data/results/{dir}"
 90 |         os.makedirs(dir, exist_ok=True)
 91 |         eval_df.to_parquet(
 92 |             f"{dir}/{self.method}_metrics.parquet",
 93 |             index=False,
 94 |         )
 95 |         total_time_df.to_parquet(
 96 |             f"{dir}/{self.method}_time.parquet",
 97 |             index=False,
 98 |         )
 99 |         if df is not None:
100 |             summary_df = self.summarize_df(df)
101 |             summary_df["frequency"] = freq
102 |             print(summary_df)
103 |             summary_df.to_parquet(
104 |                 f"{dir}/series_summary.parquet",
105 |                 index=False,
106 |             )
107 | 


--------------------------------------------------------------------------------
/experiments/prophet/src/utils.py:
--------------------------------------------------------------------------------
 1 | from concurrent.futures import ThreadPoolExecutor
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def read_parquet_and_assign(uid, url):
 6 |     df = pd.read_parquet(url)
 7 |     df["unique_id"] = uid
 8 |     df["ds"] = df["ds"].astype(str)
 9 |     return df[["unique_id", "ds", "y"]]
10 | 
11 | 
12 | def download_data():
13 |     catalogue_splits = pd.read_parquet("./data/catalogue_splits.parquet")
14 |     catalogue_datasets = pd.read_parquet("./data/catalogue_datasets.parquet")
15 |     catalogue_df = catalogue_splits.merge(
16 |         catalogue_datasets,
17 |         on=["dataset", "subdataset", "frequency"],
18 |     )
19 |     del catalogue_splits
20 |     del catalogue_datasets
21 |     catalogue_df = catalogue_df.query("split == 'test'")[
22 |         [
23 |             "unique_id",
24 |             "frequency",
25 |             "url",
26 |             "pandas_frequency",
27 |             "seasonality",
28 |             "horizon",
29 |         ]
30 |     ]
31 |     grouped_df = catalogue_df.groupby(["frequency", "pandas_frequency"])
32 |     for (frequency, pandas_frequency), df in grouped_df:
33 |         uids, urls = df["unique_id"].values, df["url"].values
34 |         print(f"frequency: {frequency}, pandas_frequency: {pandas_frequency}")
35 |         print(f"number of uids: {len(uids)}")
36 |         with ThreadPoolExecutor() as executor:
37 |             futures = [
38 |                 executor.submit(read_parquet_and_assign, uid, url)
39 |                 for uid, url in zip(uids, urls)
40 |             ]
41 |             results = [future.result() for future in futures]
42 |         print("dataset read")
43 |         Y_df = pd.concat(results)
44 |         Y_df = Y_df.merge(
45 |             df.drop(columns="url"),
46 |             on="unique_id",
47 |             how="left",
48 |         )
49 |         print(Y_df)
50 |         Y_df.to_parquet(f"./data/{frequency}_{pandas_frequency}.parquet")
51 |         del Y_df
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     download_data()
56 | 


--------------------------------------------------------------------------------
/experiments/salesforce-moirai/environment.yml:
--------------------------------------------------------------------------------
 1 | name: moirai
 2 | channels:
 3 |   - conda-forge
 4 |   - defaults
 5 |   - anaconda
 6 | dependencies:
 7 |   - jupyterlab
 8 |   - pip
 9 |   - python=3.10
10 |   - pip:
11 |     - datasetsforecast
12 |     - fire
13 |     - huggingface_hub[cli]
14 |     - neuralforecast
15 |     - orjson
16 |     - statsforecast
17 |     - utilsforecast
18 | 
19 | 


--------------------------------------------------------------------------------
/experiments/salesforce-moirai/src/main.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import subprocess
 3 | from typing import Literal
 4 | 
 5 | import fire
 6 | import pandas as pd
 7 | 
 8 | 
 9 | logger = logging.getLogger(__name__)
10 | logger.setLevel(logging.INFO)
11 | 
12 | datasets = [
13 |     "m1_yearly",
14 |     "m1_quarterly",
15 |     "m1_monthly",
16 |     "m3_yearly",
17 |     "m3_quarterly",
18 |     "m3_monthly",
19 |     "m3_other",
20 |     "m4_yearly",
21 |     "m4_quarterly",
22 |     "m4_monthly",
23 |     "m4_weekly",
24 |     "m4_daily",
25 |     "m4_hourly",
26 |     "tourism_yearly",
27 |     "tourism_quarterly",
28 |     "tourism_monthly",
29 | ]
30 | 
31 | 
32 | def main(mode: Literal["fcst_statsforecast", "fcst_moirai`", "evaluation"]):
33 |     prefix_process = ["python", "-m"]
34 | 
35 |     if mode in ["fcst_statsforecast", "fcst_moirai"]:
36 |         for dataset in datasets:
37 |             logger.info(f"Forecasting {dataset}...")
38 |             suffix_process = ["--dataset", dataset]
39 | 
40 |             def process(middle_process):
41 |                 return prefix_process + middle_process + suffix_process
42 | 
43 |             if mode == "fcst_statsforecast":
44 |                 logger.info("Running StatisticalEnsemble")
45 |                 subprocess.run(process(["src.statsforecast_pipeline"]))
46 |             elif mode == "fcst_moirai":
47 |                 logger.info("Running SalesforceMoirai")
48 |                 subprocess.run(process(["src.moirai_pipeline"]))
49 |     elif mode == "evaluation":
50 |         from src.utils import ExperimentHandler
51 | 
52 |         eval_df = []
53 |         for dataset in datasets:
54 |             logger.info(f"Evaluating {dataset}...")
55 |             exp = ExperimentHandler(dataset)
56 |             try:
57 |                 eval_dataset_df = exp.evaluate_models(
58 |                     [
59 |                         "SalesforceMoirai",
60 |                         "StatisticalEnsemble",
61 |                         "SeasonalNaive",
62 |                     ]
63 |                 )
64 |                 print(eval_dataset_df)
65 |                 eval_df.append(eval_dataset_df)
66 |             except Exception as e:
67 |                 logger.error(e)
68 |         eval_df = pd.concat(eval_df).reset_index(drop=True)
69 |         exp.save_dataframe(eval_df, "complete-results.csv")
70 |     else:
71 |         raise ValueError(f"mode {mode} not found")
72 | 
73 | 
74 | if __name__ == "__main__":
75 |     fire.Fire(main)
76 | 


--------------------------------------------------------------------------------
/experiments/salesforce-moirai/src/moirai_pipeline.py:
--------------------------------------------------------------------------------
  1 | from time import time
  2 | from typing import Iterable, List, Tuple
  3 | 
  4 | import fire
  5 | import pandas as pd
  6 | import torch
  7 | from gluonts.dataset import Dataset
  8 | from gluonts.model.forecast import Forecast
  9 | from gluonts.torch.model.predictor import PyTorchPredictor
 10 | from huggingface_hub import hf_hub_download
 11 | from tqdm import tqdm
 12 | from uni2ts.model.moirai import MoiraiForecast
 13 | 
 14 | from src.utils import ExperimentHandler
 15 | 
 16 | 
 17 | def get_morai_predictor(
 18 |     model_size: str,
 19 |     prediction_length: int,
 20 |     target_dim: int,
 21 |     batch_size: int,
 22 | ) -> PyTorchPredictor:
 23 |     model = MoiraiForecast.load_from_checkpoint(
 24 |         checkpoint_path=hf_hub_download(
 25 |             repo_id=f"Salesforce/moirai-1.0-R-{model_size}",
 26 |             filename="model.ckpt",
 27 |         ),
 28 |         prediction_length=prediction_length,
 29 |         context_length=200,
 30 |         patch_size="auto",
 31 |         num_samples=100,
 32 |         target_dim=target_dim,
 33 |         feat_dynamic_real_dim=0,
 34 |         past_feat_dynamic_real_dim=0,
 35 |         map_location="cuda:0" if torch.cuda.is_available() else "cpu",
 36 |     )
 37 | 
 38 |     predictor = model.create_predictor(batch_size)
 39 | 
 40 |     return predictor
 41 | 
 42 | 
 43 | def gluonts_instance_fcst_to_df(
 44 |     fcst: Forecast,
 45 |     quantiles: List[float],
 46 |     model_name: str,
 47 | ) -> pd.DataFrame:
 48 |     point_forecast = fcst.mean
 49 |     h = len(point_forecast)
 50 |     dates = pd.date_range(
 51 |         fcst.start_date.to_timestamp(),
 52 |         freq=fcst.freq,
 53 |         periods=h,
 54 |     )
 55 |     fcst_df = pd.DataFrame(
 56 |         {
 57 |             "ds": dates,
 58 |             "unique_id": fcst.item_id,
 59 |             model_name: point_forecast,
 60 |         }
 61 |     )
 62 |     for q in quantiles:
 63 |         fcst_df[f"{model_name}-q-{q}"] = fcst.quantile(q)
 64 |     return fcst_df
 65 | 
 66 | 
 67 | def gluonts_fcsts_to_df(
 68 |     fcsts: Iterable[Forecast],
 69 |     quantiles: List[float],
 70 |     model_name: str,
 71 | ) -> pd.DataFrame:
 72 |     df = []
 73 |     for fcst in tqdm(fcsts):
 74 |         fcst_df = gluonts_instance_fcst_to_df(fcst, quantiles, model_name)
 75 |         df.append(fcst_df)
 76 |     return pd.concat(df).reset_index(drop=True)
 77 | 
 78 | 
 79 | def run_moirai(
 80 |     gluonts_dataset: Dataset,
 81 |     model_size: str,
 82 |     horizon: int,
 83 |     target_dim: int,
 84 |     batch_size: int,
 85 |     quantiles: List[float],
 86 | ) -> Tuple[pd.DataFrame, float, str]:
 87 |     init_time = time()
 88 |     predictor = get_morai_predictor(model_size, horizon, target_dim, batch_size)
 89 |     fcsts = predictor.predict(gluonts_dataset)
 90 |     model_name = "SalesforceMoirai"
 91 |     fcsts_df = gluonts_fcsts_to_df(
 92 |         fcsts,
 93 |         quantiles=quantiles,
 94 |         model_name=model_name,
 95 |     )
 96 |     total_time = time() - init_time
 97 |     return fcsts_df, total_time, model_name
 98 | 
 99 | 
100 | def main(dataset: str):
101 |     exp = ExperimentHandler(dataset)
102 |     fcst_df, total_time, model_name = run_moirai(
103 |         gluonts_dataset=exp.gluonts_train_dataset,
104 |         model_size="large",
105 |         horizon=exp.horizon,
106 |         target_dim=1,
107 |         batch_size=32,
108 |         quantiles=exp.quantiles,
109 |     )
110 |     exp.save_results(fcst_df, total_time, model_name)
111 | 
112 | 
113 | if __name__ == "__main__":
114 |     fire.Fire(main)
115 | 


--------------------------------------------------------------------------------
/experiments/salesforce-moirai/src/statsforecast_pipeline.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from time import time
  3 | from typing import List, Tuple
  4 | 
  5 | os.environ["NIXTLA_NUMBA_RELEASE_GIL"] = "1"
  6 | os.environ["NIXTLA_NUMBA_CACHE"] = "1"
  7 | 
  8 | import fire
  9 | import numpy as np
 10 | import pandas as pd
 11 | from scipy.stats import norm
 12 | from statsforecast import StatsForecast
 13 | from statsforecast.models import (
 14 |     AutoARIMA,
 15 |     AutoETS,
 16 |     AutoCES,
 17 |     DynamicOptimizedTheta,
 18 |     SeasonalNaive,
 19 | )
 20 | 
 21 | from src.utils import ExperimentHandler
 22 | 
 23 | 
 24 | def run_seasonal_naive(
 25 |     train_df: pd.DataFrame,
 26 |     horizon: int,
 27 |     freq: str,
 28 |     seasonality: int,
 29 |     level: List[int],
 30 | ) -> Tuple[pd.DataFrame, float, str]:
 31 |     os.environ["NIXTLA_ID_AS_COL"] = "true"
 32 |     sf = StatsForecast(
 33 |         models=[SeasonalNaive(season_length=seasonality)],
 34 |         freq=freq,
 35 |         n_jobs=-1,
 36 |     )
 37 |     model = sf
 38 |     init_time = time()
 39 |     fcsts_df = model.forecast(df=train_df, h=horizon, level=level)
 40 |     total_time = time() - init_time
 41 |     return fcsts_df, total_time, "SeasonalNaive"
 42 | 
 43 | 
 44 | def ensemble_forecasts(
 45 |     fcsts_df: pd.DataFrame,
 46 |     quantiles: List[float],
 47 |     name_models: List[str],
 48 |     model_name: str,
 49 | ) -> pd.DataFrame:
 50 |     fcsts_df[model_name] = fcsts_df[name_models].mean(axis=1).values  # type: ignore
 51 |     # compute quantiles based on the mean of the forecasts
 52 |     sigma_models = []
 53 |     for model in name_models:
 54 |         fcsts_df[f"sigma_{model}"] = fcsts_df[f"{model}-hi-68.27"] - fcsts_df[model]
 55 |         sigma_models.append(f"sigma_{model}")
 56 |     fcsts_df[f"std_{model_name}"] = (
 57 |         fcsts_df[sigma_models].pow(2).sum(axis=1).div(len(sigma_models) ** 2).pow(0.5)
 58 |     )
 59 |     z = norm.ppf(quantiles)
 60 |     q_cols = []
 61 |     for q, zq in zip(quantiles, z):
 62 |         q_col = f"{model_name}-q-{q}"
 63 |         fcsts_df[q_col] = fcsts_df[model_name] + zq * fcsts_df[f"std_{model_name}"]
 64 |         q_cols.append(q_col)
 65 |     fcsts_df = fcsts_df[["unique_id", "ds"] + [model_name] + q_cols]
 66 |     return fcsts_df
 67 | 
 68 | 
 69 | def run_statistical_ensemble(
 70 |     train_df: pd.DataFrame,
 71 |     horizon: int,
 72 |     freq: str,
 73 |     seasonality: int,
 74 |     quantiles: List[float],
 75 | ) -> Tuple[pd.DataFrame, float, str]:
 76 |     os.environ["NIXTLA_ID_AS_COL"] = "true"
 77 |     models = [
 78 |         AutoARIMA(season_length=seasonality),
 79 |         AutoETS(season_length=seasonality),
 80 |         AutoCES(season_length=seasonality),
 81 |         DynamicOptimizedTheta(season_length=seasonality),
 82 |     ]
 83 |     init_time = time()
 84 |     series_per_core = 15
 85 |     n_series = train_df["unique_id"].nunique()
 86 |     n_jobs = min(n_series // series_per_core, os.cpu_count())
 87 |     sf = StatsForecast(
 88 |         models=models,
 89 |         freq=freq,
 90 |         n_jobs=n_jobs,
 91 |     )
 92 |     fcsts_df = sf.forecast(df=train_df, h=horizon, level=[68.27])
 93 |     name_models = [repr(model) for model in models]
 94 |     model_name = "StatisticalEnsemble"
 95 |     fcsts_df = ensemble_forecasts(
 96 |         fcsts_df,
 97 |         quantiles,
 98 |         name_models,
 99 |         model_name,
100 |     )
101 |     total_time = time() - init_time
102 |     return fcsts_df, total_time, model_name
103 | 
104 | 
105 | def main(dataset: str):
106 |     exp = ExperimentHandler(dataset)
107 |     # seasonal naive benchmark
108 |     fcst_df, total_time, model_name = run_seasonal_naive(
109 |         train_df=exp.train_df,
110 |         horizon=exp.horizon,
111 |         freq=exp.freq,
112 |         seasonality=exp.seasonality,
113 |         level=exp.level,
114 |     )
115 |     fcst_df = exp.fcst_from_level_to_quantiles(fcst_df, model_name)
116 |     exp.save_results(fcst_df, total_time, model_name)
117 |     # statistical ensemble
118 |     fcst_df, total_time, model_name = run_statistical_ensemble(
119 |         train_df=exp.train_df,
120 |         horizon=exp.horizon,
121 |         freq=exp.freq,
122 |         seasonality=exp.seasonality,
123 |         quantiles=exp.quantiles,
124 |     )
125 |     exp.save_results(fcst_df, total_time, model_name)
126 | 
127 | 
128 | if __name__ == "__main__":
129 |     from statsforecast.utils import AirPassengers as ap
130 | 
131 |     AutoARIMA(season_length=12).forecast(ap.astype(np.float32), h=12)
132 |     fire.Fire(main)
133 | 


--------------------------------------------------------------------------------
/experiments/vn1-competition/Makefile:
--------------------------------------------------------------------------------
 1 | download_data:
 2 | 	mkdir -p data
 3 | 	curl https://www.datasource.ai/attachments/eyJpZCI6Ijk4NDYxNjE2NmZmZjM0MGRmNmE4MTczOGMyMzI2ZWI2LmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoiUGhhc2UgMCAtIFNhbGVzLmNzdiIsInNpemUiOjEwODA0NjU0LCJtaW1lX3R5cGUiOiJ0ZXh0L2NzdiJ9fQ -o data/phase_0_sales.csv
 4 | 	curl https://www.datasource.ai/attachments/eyJpZCI6ImM2OGQxNGNmNTJkZDQ1MTUyZTg0M2FkMDAyMjVlN2NlLmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoiUGhhc2UgMSAtIFNhbGVzLmNzdiIsInNpemUiOjEwMTgzOTYsIm1pbWVfdHlwZSI6InRleHQvY3N2In19 -o data/phase_1_sales.csv 
 5 | 	curl https://www.datasource.ai/attachments/eyJpZCI6IjhlNmJmNmU3ZTlhNWQ4NTcyNGVhNTI4YjAwNTk3OWE1LmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoiUGhhc2UgMiAtIFNhbGVzLmNzdiIsInNpemUiOjEwMTI0MzcsIm1pbWVfdHlwZSI6InRleHQvY3N2In19 -o data/phase_2_sales.csv 
 6 | 	curl https://www.datasource.ai/attachments/eyJpZCI6IjI1NDQxYmMyMTQ3MTA0MjJhMDcyYjllODcwZjEyNmY4LmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoicGhhc2UgMiBzdWJtaXNzaW9uIGV4YW1pbmUgc21vb3RoZWQgMjAyNDEwMTcgRklOQUwuY3N2Iiwic2l6ZSI6MTk5MzAzNCwibWltZV90eXBlIjoidGV4dC9jc3YifX0 -o data/solution_1st_place.csv
 7 | 	curl https://www.datasource.ai/attachments/eyJpZCI6IjU3ODhjZTUwYTU3MTg3NjFlYzMzOWU0ZTg3MWUzNjQxLmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoidm4xX3N1Ym1pc3Npb25fanVzdGluX2Z1cmxvdHRlLmNzdiIsInNpemUiOjM5MDkzNzksIm1pbWVfdHlwZSI6InRleHQvY3N2In19 -o data/solution_2nd_place.csv
 8 | 	curl https://www.datasource.ai/attachments/eyJpZCI6ImE5NzcwNTZhMzhhMTc2ZWJjODFkMDMwMTM2Y2U2MTdlLmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoiYXJzYW5pa3phZF9zdWIuY3N2Iiwic2l6ZSI6Mzg4OTcyNCwibWltZV90eXBlIjoidGV4dC9jc3YifX0 -o data/solution_3rd_place.csv
 9 | 	curl https://www.datasource.ai/attachments/eyJpZCI6ImVlZmUxYWY2NDFjOWMwM2IxMzRhZTc2MzI1Nzg3NzIxLmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoiVEZUX3R1bmVkX1YyX3NlZWRfNDIuY3N2Iiwic2l6ZSI6NjA3NDgzLCJtaW1lX3R5cGUiOiJ0ZXh0L2NzdiJ9fQ -o data/solution_4th_place.csv
10 | 	curl https://www.datasource.ai/attachments/eyJpZCI6IjMwMDEwMmY3NTNhMzlhN2YxNTk3ODYxZTI1N2Q2NzRmLmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoiZGl2aW5lb3B0aW1pemVkd2VpZ2h0c2Vuc2VtYmxlLmNzdiIsInNpemUiOjE3OTU0NzgsIm1pbWVfdHlwZSI6InRleHQvY3N2In19 -o data/solution_5th_place.csv
11 | 


--------------------------------------------------------------------------------
/experiments/vn1-competition/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/experiments/vn1-competition/src/__init__.py


--------------------------------------------------------------------------------
/experiments/vn1-competition/src/functions.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # Functions for VN1 Forecasting Competition ---- 
 3 | 
 4 | read_and_prepare_data <- function(dataset){
 5 |   # Reads data in wide format and returns it in long format with columns `unique_id`, `ds`, and `y`
 6 |   url <- get_dataset_url(dataset)
 7 |   df_wide <- fread(url)
 8 |   df_wide <- df_wide |> 
 9 |     mutate(unique_id = paste0(Client, "/", Warehouse, "/", Product)) |> 
10 |     select(c(unique_id, everything())) |> 
11 |     select(-c(Client, Warehouse, Product))
12 |   
13 |   df <- pivot_longer(
14 |     data = df_wide, 
15 |     cols = -unique_id, 
16 |     names_to = "ds", 
17 |     values_to = "y"
18 |   )
19 |   
20 |   if(startsWith(dataset, "winners")){
21 |     names(df)[which(names(df) == "y")] <- dataset
22 |   }
23 |   
24 |   return(df)
25 | }
26 | 
27 | get_train_data <- function(df0, df1){
28 |   # Merges training data from phase 0 and phase 1 and removes leading zeros 
29 |   df <- rbind(df0, df1) |> 
30 |     arrange(unique_id, ds)
31 |   
32 |   df_clean <- df |> 
33 |     group_by(unique_id) |> 
34 |     mutate(cumsum = cumsum(y)) |>
35 |     filter(cumsum > 0) |> 
36 |     select(-cumsum) |> 
37 |     ungroup()
38 |   
39 |   return(df_clean)
40 | }
41 | 
42 | vn1_competition_evaluation <- function(test, forecast, model){
43 |   # Computes competition evaluation 
44 |   if(!is.character(forecast$ds)){
45 |     forecast$ds <- as.character(forecast$ds) # nixtlar returns timestamps for plotting 
46 |   }
47 |   
48 |   res <- merge(forecast, test, by=c("unique_id", "ds"))
49 |   
50 |   res <- res |> 
51 |     mutate(abs_err = abs(res[[model]]-res$y)) |> 
52 |     mutate(err = res[[model]]-res$y) 
53 |   
54 |   abs_err = sum(res$abs_err, na.rm = TRUE)
55 |   err = sum(res$err, na.rm = TRUE)    
56 |   score = abs_err+abs(err)
57 |   score = score/sum(res$y)
58 |   score = round(score, 4)
59 |   
60 |   return(score)
61 | }
62 | 
63 | get_dataset_url <- function(dataset){
64 |   # Returns the url of the given competition dataset 
65 |   urls <- list(
66 |     phase0_sales = "https://www.datasource.ai/attachments/eyJpZCI6Ijk4NDYxNjE2NmZmZjM0MGRmNmE4MTczOGMyMzI2ZWI2LmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoiUGhhc2UgMCAtIFNhbGVzLmNzdiIsInNpemUiOjEwODA0NjU0LCJtaW1lX3R5cGUiOiJ0ZXh0L2NzdiJ9fQ", 
67 |     phase1_sales = "https://www.datasource.ai/attachments/eyJpZCI6ImM2OGQxNGNmNTJkZDQ1MTUyZTg0M2FkMDAyMjVlN2NlLmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoiUGhhc2UgMSAtIFNhbGVzLmNzdiIsInNpemUiOjEwMTgzOTYsIm1pbWVfdHlwZSI6InRleHQvY3N2In19",
68 |     phase2_sales = "https://www.datasource.ai/attachments/eyJpZCI6IjhlNmJmNmU3ZTlhNWQ4NTcyNGVhNTI4YjAwNTk3OWE1LmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoiUGhhc2UgMiAtIFNhbGVzLmNzdiIsInNpemUiOjEwMTI0MzcsIm1pbWVfdHlwZSI6InRleHQvY3N2In19", 
69 |     winners1 = "https://www.datasource.ai/attachments/eyJpZCI6IjI1NDQxYmMyMTQ3MTA0MjJhMDcyYjllODcwZjEyNmY4LmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoicGhhc2UgMiBzdWJtaXNzaW9uIGV4YW1pbmUgc21vb3RoZWQgMjAyNDEwMTcgRklOQUwuY3N2Iiwic2l6ZSI6MTk5MzAzNCwibWltZV90eXBlIjoidGV4dC9jc3YifX0",
70 |     winners2 = "https://www.datasource.ai/attachments/eyJpZCI6IjU3ODhjZTUwYTU3MTg3NjFlYzMzOWU0ZTg3MWUzNjQxLmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoidm4xX3N1Ym1pc3Npb25fanVzdGluX2Z1cmxvdHRlLmNzdiIsInNpemUiOjM5MDkzNzksIm1pbWVfdHlwZSI6InRleHQvY3N2In19",
71 |     winners3 = "https://www.datasource.ai/attachments/eyJpZCI6ImE5NzcwNTZhMzhhMTc2ZWJjODFkMDMwMTM2Y2U2MTdlLmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoiYXJzYW5pa3phZF9zdWIuY3N2Iiwic2l6ZSI6Mzg4OTcyNCwibWltZV90eXBlIjoidGV4dC9jc3YifX0",
72 |     winners4 = "https://www.datasource.ai/attachments/eyJpZCI6ImVlZmUxYWY2NDFjOWMwM2IxMzRhZTc2MzI1Nzg3NzIxLmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoiVEZUX3R1bmVkX1YyX3NlZWRfNDIuY3N2Iiwic2l6ZSI6NjA3NDgzLCJtaW1lX3R5cGUiOiJ0ZXh0L2NzdiJ9fQ",
73 |     winners5 = "https://www.datasource.ai/attachments/eyJpZCI6IjMwMDEwMmY3NTNhMzlhN2YxNTk3ODYxZTI1N2Q2NzRmLmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoiZGl2aW5lb3B0aW1pemVkd2VpZ2h0c2Vuc2VtYmxlLmNzdiIsInNpemUiOjE3OTU0NzgsIm1pbWVfdHlwZSI6InRleHQvY3N2In19"
74 |   )
75 |   
76 |   return(urls[[dataset]])
77 | }
78 | 
79 | 


--------------------------------------------------------------------------------
/experiments/vn1-competition/src/main.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # VN1 Forecasting Competition Solution with nixtlar ---- 
 3 | 
 4 | install.packages(c("nixtlar", "tidyverse", "data.table"))
 5 | 
 6 | library(nixtlar)
 7 | library(tidyverse)
 8 | library(data.table)
 9 | 
10 | source("functions.R") # same directory as main.R
11 | 
12 | ## Load Data ---- 
13 | sales0 <- read_and_prepare_data("phase0_sales")
14 | sales1 <- read_and_prepare_data("phase1_sales")
15 | test_df <- read_and_prepare_data("phase2_sales")
16 | 
17 | ## Prepare Training Dataset ---- 
18 | train_df <- get_train_data(sales0, sales1)
19 | 
20 | ## Generate TimeGPT Forecast  ----
21 | 
22 | # nixtla_client_setup(api_key = "Your API key here") 
23 | # Learn how to set up your API key here: https://nixtla.github.io/nixtlar/articles/setting-up-your-api-key.html
24 | 
25 | fc <- nixtla_client_forecast(train_df, h=13, model="timegpt-1-long-horizon")
26 | 
27 | ## Visualize TimeGPT Forecast ----
28 | nixtla_client_plot(train_df, fc)
29 | 
30 | ## Evaluate TimeGPT & Top 5 Competition Solutions ----
31 | timegpt_score <- vn1_competition_evaluation(test_df, fc, "TimeGPT")
32 | 
33 | scores <- lapply(1:5, function(i){ # Top 5 
34 |   winner_df <- read_and_prepare_data(paste0("winners", i))
35 |   vn1_competition_evaluation(test_df, winner_df, model = paste0("winners", i))
36 | })
37 | 
38 | scores_df <- data.frame(
39 |   "Result" = c(paste0("Place #", 1:5), "TimeGPT"), 
40 |   "Score" = c(as.numeric(scores), timegpt_score)
41 | )
42 | 
43 | scores_df <- scores_df |> arrange(Score)
44 | print(scores_df) # TimeGPT places 2nd! 
45 | 


--------------------------------------------------------------------------------
/experiments/vn1-competition/src/main.py:
--------------------------------------------------------------------------------
 1 | from time import time
 2 | 
 3 | 
 4 | import numpy as np
 5 | import pandas as pd
 6 | from dotenv import load_dotenv
 7 | from nixtla import NixtlaClient
 8 | 
 9 | load_dotenv()
10 | 
11 | 
12 | def read_and_prepare_data(file_path: str, value_name: str = "y") -> pd.DataFrame:
13 |     """Reads data in wide format, and returns it in long format with columns `unique_id`, `ds`, `y`"""
14 |     df = pd.read_csv(file_path)
15 |     uid_cols = ["Client", "Warehouse", "Product"]
16 |     df["unique_id"] = df[uid_cols].astype(str).agg("-".join, axis=1)
17 |     df = df.drop(uid_cols, axis=1)
18 |     df = df.melt(id_vars=["unique_id"], var_name="ds", value_name=value_name)
19 |     df["ds"] = pd.to_datetime(df["ds"])
20 |     df = df.sort_values(by=["unique_id", "ds"])
21 |     return df
22 | 
23 | 
24 | def get_train_data() -> pd.DataFrame:
25 |     """Reads all train data and returns it in long format with columns `unique_id`, `ds`, `y`"""
26 |     train_list = [read_and_prepare_data(f"./data/phase_{i}_sales.csv") for i in [0, 1]]
27 |     train_df = pd.concat(train_list).reset_index(drop=True)
28 |     train_df = train_df.sort_values(by=["unique_id", "ds"])
29 | 
30 |     def remove_leading_zeros(group):
31 |         first_non_zero_index = group["y"].ne(0).idxmax()
32 |         return group.loc[first_non_zero_index:]
33 | 
34 |     train_df = (
35 |         train_df.groupby("unique_id").apply(remove_leading_zeros).reset_index(drop=True)
36 |     )
37 |     return train_df
38 | 
39 | 
40 | def get_competition_forecasts() -> pd.DataFrame:
41 |     """Reads all competition forecasts and returns it in long format with columns `unique_id`, `ds`, `y`"""
42 |     fcst_df: pd.DataFrame | None = None
43 |     for place in ["1st", "2nd", "3rd", "4th", "5th"]:
44 |         fcst_df_place = read_and_prepare_data(
45 |             f"./data/solution_{place}_place.csv", place
46 |         )
47 |         if fcst_df is None:
48 |             fcst_df = fcst_df_place
49 |         else:
50 |             fcst_df = fcst_df.merge(
51 |                 fcst_df_place,
52 |                 on=["unique_id", "ds"],
53 |                 how="left",
54 |             )
55 |     return fcst_df
56 | 
57 | 
58 | def vn1_competition_evaluation(forecasts: pd.DataFrame) -> pd.DataFrame:
59 |     """Computes competition evaluation scores"""
60 |     actual = read_and_prepare_data("./data/phase_2_sales.csv")
61 |     res = actual[["unique_id", "ds", "y"]].merge(
62 |         forecasts, on=["unique_id", "ds"], how="left"
63 |     )
64 |     ids_forecasts = forecasts["unique_id"].unique()
65 |     ids_res = res["unique_id"].unique()
66 |     assert set(ids_forecasts) == set(ids_res), "Some unique_ids are missing"
67 |     scores = {}
68 |     for model in [col for col in forecasts.columns if col not in ["unique_id", "ds"]]:
69 |         abs_err = np.nansum(np.abs(res[model] - res["y"]))
70 |         err = np.nansum(res[model] - res["y"])
71 |         score = abs_err + abs(err)
72 |         score = score / res["y"].sum()
73 |         scores[model] = round(score, 4)
74 |     score_df = pd.DataFrame(list(scores.items()), columns=["model", "score"])
75 |     score_df = score_df.sort_values(by="score")
76 |     return score_df
77 | 
78 | 
79 | def main():
80 |     """Complete pipeline"""
81 |     train_df = get_train_data()
82 |     client = NixtlaClient()
83 |     init = time()
84 |     fcst_df = client.forecast(train_df, h=13, model="timegpt-1-long-horizon")
85 |     print(f"TimeGPT time: {time() - init}")
86 |     fcst_df_comp = get_competition_forecasts()
87 |     fcst_df = fcst_df.merge(fcst_df_comp, on=["unique_id", "ds"], how="left")
88 |     eval_df = vn1_competition_evaluation(fcst_df)
89 |     print(eval_df)
90 | 
91 | 
92 | if __name__ == "__main__":
93 |     main()
94 | 


--------------------------------------------------------------------------------
/experiments/vn1-competition/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/experiments/vn1-competition/tests/__init__.py


--------------------------------------------------------------------------------
/experiments/vn1-competition/tests/test_scores.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | from src.main import vn1_competition_evaluation, get_competition_forecasts
 4 | 
 5 | 
 6 | def test_vn1_competition_evaluation():
 7 |     forecasts = get_competition_forecasts()
 8 |     eval_df = vn1_competition_evaluation(forecasts)
 9 |     assert len(eval_df) == 5
10 |     pd.testing.assert_series_equal(
11 |         eval_df["score"], 
12 |         pd.Series([0.4637, 0.4657, 0.4758, 0.4774, 0.4808]),
13 |         check_names=False,
14 |     )
15 | 


--------------------------------------------------------------------------------
/nbs/_quarto.yml:
--------------------------------------------------------------------------------
 1 | project:
 2 |   type: website
 3 | 
 4 | format:
 5 |   html:
 6 |     theme: cosmo
 7 |     fontsize: 1em
 8 |     linestretch: 1.7
 9 |     css: styles.css
10 |     toc: true
11 | 
12 | website:
13 |   twitter-card:
14 |     image: "https://farm6.staticflickr.com/5510/14338202952_93595258ff_z.jpg"
15 |     site: "@Nixtlainc"
16 |   open-graph:
17 |     image: "https://github.com/Nixtla/styles/blob/2abf51612584169874c90cd7c4d347e3917eaf73/images/Banner%20Github.png"
18 |   google-analytics: "G-NXJNCVR18L"
19 |   repo-actions: [issue]
20 |   favicon: favicon_png.png
21 |   navbar:
22 |     background: primary
23 |     search: true
24 |     collapse-below: lg
25 |     left:
26 |       - text: "Get Started"
27 |         href: timegpt.ipynb
28 |       - text: "NixtlaVerse"
29 |         menu:
30 |           - text: "StatsForecast ⚡️"
31 |             href: https://github.com/nixtla/statsforecast
32 |           - text: "MLForecast 🤖"
33 |             href: https://github.com/nixtla/mlforecast
34 |           - text: "NeuralForecast 🧠"
35 |             href: https://github.com/nixtla/neuralforecast
36 |           - text: "HierarchicalForecast 👑"
37 |             href: https://github.com/nixtla/hierarchicalforecast
38 |       - text: "Help"
39 |         menu:
40 |           - text: "Report an Issue"
41 |             icon: bug
42 |             href: https://github.com/nixtla/statsforecast/issues/new/choose
43 |           - text: "Join our Slack"
44 |             icon: chat-right-text
45 |             href: https://join.slack.com/t/nixtlaworkspace/shared_invite/zt-135dssye9-fWTzMpv2WBthq8NK0Yvu6A
46 |     right:
47 |       - icon: github
48 |         href: "https://github.com/nixtla/nixtla"
49 |       - icon: twitter
50 |         href: https://twitter.com/nixtlainc
51 |         aria-label: Nixtla Twitter
52 | 
53 |   sidebar:
54 |     style: floating
55 |   body-footer: |
56 |     Give us a ⭐ on [Github](https://github.com/nixtla/nixtla)
57 | 
58 | metadata-files: [nbdev.yml, sidebar.yml]
59 | 


--------------------------------------------------------------------------------
/nbs/assets/Inter-VariableFont.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/assets/Inter-VariableFont.ttf


--------------------------------------------------------------------------------
/nbs/assets/M5_categorical_variables_example.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/assets/M5_categorical_variables_example.parquet


--------------------------------------------------------------------------------
/nbs/assets/M5_what_if_pricing_example.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/assets/M5_what_if_pricing_example.parquet


--------------------------------------------------------------------------------
/nbs/assets/forecast_synthetic_data.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/assets/forecast_synthetic_data.mp4


--------------------------------------------------------------------------------
/nbs/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/assets/logo.png


--------------------------------------------------------------------------------
/nbs/assets/long_horizon_example_Y_df.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/assets/long_horizon_example_Y_df.parquet


--------------------------------------------------------------------------------
/nbs/docs/capabilities/000_capabilities.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "6de758ee-a0d2-4b3f-acff-eed419dd17c5",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "# Capabilities"
 9 |    ]
10 |   },
11 |   {
12 |    "cell_type": "markdown",
13 |    "id": "5d267032-535b-4b7b-b7d3-d2db8f673af6",
14 |    "metadata": {},
15 |    "source": [
16 |     "This section offers an overview of capabilities of TimeGPT"
17 |    ]
18 |   }
19 |  ],
20 |  "metadata": {
21 |   "kernelspec": {
22 |    "display_name": "python3",
23 |    "language": "python",
24 |    "name": "python3"
25 |   }
26 |  },
27 |  "nbformat": 4,
28 |  "nbformat_minor": 5
29 | }
30 | 


--------------------------------------------------------------------------------
/nbs/docs/capabilities/forecast/00_forecast.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "6de758ee-a0d2-4b3f-acff-eed419dd17c5",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "# Forecast"
 9 |    ]
10 |   },
11 |   {
12 |    "cell_type": "markdown",
13 |    "id": "5d267032-535b-4b7b-b7d3-d2db8f673af6",
14 |    "metadata": {},
15 |    "source": [
16 |     "This section shows the capabilities TimeGPT offers for forecasting.\n",
17 |     "\n",
18 |     "TimeGPT is capable of zero-shot forecasting a wide variety of time series from different domains, thanks to its pretraining on a vast amount of time series data.\n",
19 |     "\n",
20 |     "Here, you will find recipes for the following tasks:\n",
21 |     "\n",
22 |     "* [Zero-shot forecasting](https://docs.nixtla.io/docs/capabilities-forecast-quickstart)\n",
23 |     "\n",
24 |     "* [Forecasting with exogenous variables](https://docs.nixtla.io/docs/capabilities-forecast-add_exogenous_variables)\n",
25 |     "\n",
26 |     "* [Forecasting with holidays and special dates](https://docs.nixtla.io/docs/capabilities-forecast-add_holidays_and_special_dates)\n",
27 |     "\n",
28 |     "* [Forecasting with categorical variables](https://docs.nixtla.io/docs/capabilities-forecast-add_categorical_variables)\n",
29 |     "\n",
30 |     "* [Long-horizon forecasting](https://docs.nixtla.io/docs/capabilities-forecast-long_horizon_forecasting)\n",
31 |     "\n",
32 |     "* [Forecasting multiple series](https://docs.nixtla.io/docs/capabilities-forecast-multiple_series_forecasting)\n",
33 |     "\n",
34 |     "* [Fine-tuning TimeGPT](https://docs.nixtla.io/docs/capabilities-forecast-fine_tuning)\n",
35 |     "\n",
36 |     "* [Fine-tuning with a specific loss function](https://docs.nixtla.io/docs/capabilities-forecast-finetuning_with_a_custom_loss_function)\n",
37 |     "\n",
38 |     "* [Cross-validation](https://docs.nixtla.io/docs/capabilities-forecast-cross_validation)\n",
39 |     "\n",
40 |     "* [Adding prediction intervals](https://docs.nixtla.io/docs/capabilities-forecast-predictions_intervals)\n",
41 |     "\n",
42 |     "* [Dealing with irregular timestamps](https://docs.nixtla.io/docs/capabilities-forecast-irregular_timestamps)"
43 |    ]
44 |   },
45 |   {
46 |    "cell_type": "markdown",
47 |    "id": "ec7b0357",
48 |    "metadata": {},
49 |    "source": []
50 |   }
51 |  ],
52 |  "metadata": {
53 |   "kernelspec": {
54 |    "display_name": "python3",
55 |    "language": "python",
56 |    "name": "python3"
57 |   }
58 |  },
59 |  "nbformat": 4,
60 |  "nbformat_minor": 5
61 | }
62 | 


--------------------------------------------------------------------------------
/nbs/docs/capabilities/forecast/03_holidays_special_dates.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "#| hide\n",
 10 |     "!pip install -Uqq nixtla"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": null,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "#| hide \n",
 20 |     "from nixtla.utils import in_colab"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": null,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "#| hide \n",
 30 |     "IN_COLAB = in_colab()"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "#| hide\n",
 40 |     "if not IN_COLAB:\n",
 41 |     "    from nixtla.utils import colab_badge\n",
 42 |     "    from dotenv import load_dotenv"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "markdown",
 47 |    "metadata": {},
 48 |    "source": [
 49 |     "# Add holidays and special dates\n",
 50 |     "\n",
 51 |     "You can create DataFrames specifying holidays for particular countries and specify your own special dates to include them as features for forecasting."
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "metadata": {},
 58 |    "outputs": [
 59 |     {
 60 |      "data": {
 61 |       "text/markdown": [
 62 |        "[![](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Nixtla/nixtla/blob/main/nbs/docs/capabilities/forecast/03_holidays_special_dates.ipynb)"
 63 |       ],
 64 |       "text/plain": [
 65 |        "<IPython.core.display.Markdown object>"
 66 |       ]
 67 |      },
 68 |      "metadata": {},
 69 |      "output_type": "display_data"
 70 |     }
 71 |    ],
 72 |    "source": [
 73 |     "#| echo: false\n",
 74 |     "if not IN_COLAB:\n",
 75 |     "    load_dotenv()\n",
 76 |     "    colab_badge('docs/capabilities/forecast/03_holidays_special_dates')"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "import pandas as pd\n",
 86 |     "from nixtla import NixtlaClient\n",
 87 |     "from nixtla.date_features import CountryHolidays\n",
 88 |     "from nixtla.date_features import SpecialDates"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": null,
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "nixtla_client = NixtlaClient(\n",
 98 |     "    # defaults to os.environ.get(\"NIXTLA_API_KEY\")\n",
 99 |     "    api_key = 'my_api_key_provided_by_nixtla'\n",
100 |     ")"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "markdown",
105 |    "metadata": {},
106 |    "source": [
107 |     "> 👍 Use an Azure AI endpoint\n",
108 |     "> \n",
109 |     "> To use an Azure AI endpoint, remember to set also the `base_url` argument:\n",
110 |     "> \n",
111 |     "> `nixtla_client = NixtlaClient(base_url=\"you azure ai endpoint\", api_key=\"your api_key\")`"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "#| hide\n",
121 |     "if not IN_COLAB:\n",
122 |     "    nixtla_client = NixtlaClient()"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": null,
128 |    "metadata": {},
129 |    "outputs": [],
130 |    "source": [
131 |     "# Get country holidays for the US\n",
132 |     "c_holidays = CountryHolidays(countries=['US'])\n",
133 |     "periods = 365 * 1\n",
134 |     "dates = pd.date_range(end='2023-09-01', periods=periods)\n",
135 |     "holidays_df = c_holidays(dates)\n",
136 |     "\n",
137 |     "# Specify your own special dates\n",
138 |     "special_dates = SpecialDates(\n",
139 |     "    special_dates={\n",
140 |     "        'Important Dates': ['2021-02-26', '2020-02-26'],\n",
141 |     "        'Very Important Dates': ['2021-01-26', '2020-01-26', '2019-01-26']\n",
142 |     "    }\n",
143 |     ")\n",
144 |     "periods = 365 * 1\n",
145 |     "dates = pd.date_range(end='2023-09-01', periods=periods)\n",
146 |     "special_dates_df = special_dates(dates)"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "markdown",
151 |    "metadata": {},
152 |    "source": [
153 |     "For a detailed guide on using special dates and holidays, read our tutorial on [Holidays and special dates](https://docs.nixtla.io/docs/tutorials-holidays_and_special_dates)."
154 |    ]
155 |   }
156 |  ],
157 |  "metadata": {
158 |   "kernelspec": {
159 |    "display_name": "python3",
160 |    "language": "python",
161 |    "name": "python3"
162 |   }
163 |  },
164 |  "nbformat": 4,
165 |  "nbformat_minor": 2
166 | }
167 | 


--------------------------------------------------------------------------------
/nbs/docs/capabilities/historical-anomaly-detection/00_historical_anomaly_detection.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "6de758ee-a0d2-4b3f-acff-eed419dd17c5",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "# Historical anomaly detection"
 9 |    ]
10 |   },
11 |   {
12 |    "cell_type": "markdown",
13 |    "id": "5d267032-535b-4b7b-b7d3-d2db8f673af6",
14 |    "metadata": {},
15 |    "source": [
16 |     "This section provides various recipes for performing historical anomaly detection using TimeGPT.\n",
17 |     "\n",
18 |     "Historical anomaly detection identifies data points that deviate from the expected behavior over a given historical time series, helping to spot fraudulent activity, security breaches, or significant outliers.\n",
19 |     "\n",
20 |     "The process involves generating predictions and constructing a 99% confidence interval. Data points falling outside this interval are considered anomalies.\n",
21 |     "\n",
22 |     "This section covers:\n",
23 |     "\n",
24 |     "* [Historical anomaly detection](https://docs.nixtla.io/docs/capabilities-historical-anomaly-detection-quickstart)\n",
25 |     "\n",
26 |     "* [Historical anomaly detection with exogenous features](https://docs.nixtla.io/docs/capabilities-historical-anomaly-detection-add_exogenous_variables)\n",
27 |     "\n",
28 |     "* [Historical anomaly detection with date features](https://docs.nixtla.io/docs/capabilities-historical-anomaly-detection-add_date_features)\n",
29 |     "\n",
30 |     "* [Modifying the confidence intervals](https://docs.nixtla.io/docs/capabilities-historical-anomaly-detection-add_confidence_levels)"
31 |    ]
32 |   }
33 |  ],
34 |  "metadata": {
35 |   "kernelspec": {
36 |    "display_name": "python3",
37 |    "language": "python",
38 |    "name": "python3"
39 |   }
40 |  },
41 |  "nbformat": 4,
42 |  "nbformat_minor": 5
43 | }
44 | 


--------------------------------------------------------------------------------
/nbs/docs/capabilities/online-anomaly-detection/00_online_anomaly_detection.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Online (Real-Time) Anomaly Detection"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "markdown",
12 |    "metadata": {},
13 |    "source": [
14 |     "Online anomaly detection dynamically identifies anomalies as data streams in, allowing users to specify the number of timestamps to monitor. This method is well-suited for immediate applications, such as fraud detection, live sensor monitoring, or tracking real-time demand changes. By focusing on recent data and continuously generating forecasts, it enables timely responses to anomalies in critical scenarios.\n",
15 |     "\n",
16 |     "This section provides various recipes for performing real-time anomaly detection using TimeGPT, offering users the ability to detect outliers and unusual patterns as they emerge, ensuring prompt intervention in time-sensitive situations.\n",
17 |     "\n",
18 |     "This section covers:\n",
19 |     "\n",
20 |     "* [Online anomaly detection](https://docs.nixtla.io/docs/capabilities-online-anomaly-detection-quickstart)\n",
21 |     "\n",
22 |     "* [How to adjust the detection process](https://docs.nixtla.io/docs/capabilities-online-anomaly-detection-adjusting_detection_process.ipynb)\n",
23 |     "\n",
24 |     "* [Univariate vs. multiseries anomaly detection](https://docs.nixtla.io/docs/capabilities-online-anomaly-detection-univariate_vs_multivariate_anomaly_detection)\n"
25 |    ]
26 |   },
27 |   {
28 |    "cell_type": "markdown",
29 |    "metadata": {},
30 |    "source": []
31 |   }
32 |  ],
33 |  "metadata": {},
34 |  "nbformat": 4,
35 |  "nbformat_minor": 2
36 | }
37 | 


--------------------------------------------------------------------------------
/nbs/docs/deployment/2_azure_ai.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# AzureAI \n",
 8 |     "\n",
 9 |     "> The foundational models for time series by Nixtla can be deployed on your Azure subscription. This page explains how to easily get started with TimeGEN-1 deployed as an Azure AI endpoint. If you use the `nixtla` library, it should be a drop-in replacement where you only need to change the client parameters (endpoint URL, API key, model name)."
10 |    ]
11 |   },
12 |   {
13 |    "cell_type": "markdown",
14 |    "metadata": {},
15 |    "source": [
16 |     "## Deploying TimeGEN-1\n",
17 |     "\n"
18 |    ]
19 |   },
20 |   {
21 |    "cell_type": "markdown",
22 |    "metadata": {},
23 |    "source": [
24 |     "## Using the model\n",
25 |     "\n",
26 |     "Once your model is deployed and provided that you have the relevant permissions, consuming it will basically be the same process as for a Nixtla endpoint.\n",
27 |     "\n",
28 |     "To run the examples below, you will need to define the following environment variables:\n",
29 |     "\n",
30 |     "- `AZURE_AI_NIXTLA_BASE_URL` is your api URL, should be of the form `https://your-endpoint.inference.ai.azure.com/`.\n",
31 |     "- `AZURE_AI_NIXTLA_API_KEY` is your authentication key."
32 |    ]
33 |   },
34 |   {
35 |    "cell_type": "markdown",
36 |    "metadata": {},
37 |    "source": [
38 |     "## How to use"
39 |    ]
40 |   },
41 |   {
42 |    "cell_type": "markdown",
43 |    "metadata": {},
44 |    "source": [
45 |     "Just import the library, set your credentials, and start forecasting in two lines of code!"
46 |    ]
47 |   },
48 |   {
49 |    "cell_type": "markdown",
50 |    "metadata": {},
51 |    "source": [
52 |     "```bash\n",
53 |     "pip install nixtla\n",
54 |     "```"
55 |    ]
56 |   },
57 |   {
58 |    "cell_type": "markdown",
59 |    "metadata": {},
60 |    "source": [
61 |     "```python\n",
62 |     "import os\n",
63 |     "from nixtla import NixtlaClient\n",
64 |     "\n",
65 |     "base_url = os.environ[\"AZURE_AI_NIXTLA_BASE_URL\"]\n",
66 |     "api_key = os.environ[\"AZURE_AI_NIXTLA_API_KEY\"]\n",
67 |     "model = \"azureai\"\n",
68 |     "\n",
69 |     "nixtla_client = NixtlaClient(api_key=api_key, base_url=base_url)\n",
70 |     "nixtla_client.forecast(\n",
71 |     "    ...,\n",
72 |     "    model=model,\n",
73 |     ")\n",
74 |     "```"
75 |    ]
76 |   }
77 |  ],
78 |  "metadata": {
79 |   "kernelspec": {
80 |    "display_name": "python3",
81 |    "language": "python",
82 |    "name": "python3"
83 |   }
84 |  },
85 |  "nbformat": 4,
86 |  "nbformat_minor": 4
87 | }
88 | 


--------------------------------------------------------------------------------
/nbs/docs/getting-started/41_pricing.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# TimeGPT Subscription Plans\n",
 8 |     "\n",
 9 |     "We offer various Enterprise plans tailored to your forecasting needs. The number of API calls, number of users, and support levels can be customized based on your needs. We also offer an option for a self-hosted version and a version hosted on Azure.\n",
10 |     "\n",
11 |     "Please get in touch with us at support@nixtla.io for more information regarding pricing options and to discuss your specific requirements. For organizations interested in exploring our solution further, you can schedule a demo [here]( https://meetings.hubspot.com/cristian-challu/enterprise-contact-us?uuid=dc037f5a-d93b-4[…]90b-a611dd9460af&utm_source=github&utm_medium=pricing_page) "
12 |    ]
13 |   },
14 |   {
15 |    "cell_type": "markdown",
16 |    "metadata": {},
17 |    "source": [
18 |     "**Free trial available**\n",
19 |     "\n",
20 |     "When you [create your account](https://dashboard.nixtla.io), you’ll receive a 30-day free trial, no credit card required. After 30 days, access will expire unless you upgrade to a paid plan. Contact us to continue leveraging TimeGPT for accurate and easy to use forecasting!"
21 |    ]
22 |   },
23 |   {
24 |    "cell_type": "markdown",
25 |    "metadata": {},
26 |    "source": [
27 |     "**More information on pricing and billing**\n",
28 |     "\n",
29 |     "For additional information on pricing and billing please see our FAQ."
30 |    ]
31 |   }
32 |  ],
33 |  "metadata": {
34 |   "kernelspec": {
35 |    "display_name": "python3",
36 |    "language": "python",
37 |    "name": "python3"
38 |   }
39 |  },
40 |  "nbformat": 4,
41 |  "nbformat_minor": 4
42 | }
43 | 


--------------------------------------------------------------------------------
/nbs/docs/reference/03_excel_addin.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# TimeGPT Excel Add-in (Beta)\n",
  8 |     "\n",
  9 |     "## Installation\n",
 10 |     "\n",
 11 |     "Head to the [TimeGTP excel add-in page in Microsoft Appsource](https://appsource.microsoft.com/en-us/product/office/WA200006429?tab=Overview) and click on \"Get it now\"\n",
 12 |     "\n",
 13 |     "## Usage\n",
 14 |     "> 📘 Access token required\n",
 15 |     "> \n",
 16 |     "> The TimeGPT Excel Add-in requires an access token. Get your API Key on the [Nixtla Dashboard](http://dashboard.nixtla.io).\n",
 17 |     "\n",
 18 |     "## Support\n",
 19 |     "\n",
 20 |     "If you have questions or need support, please email `support@nixtla.io`.\n",
 21 |     "\n",
 22 |     "## How-to\n",
 23 |     "\n",
 24 |     "### Settings\n",
 25 |     "\n",
 26 |     "If this is your first time using Excel add-ins, find information on how to add Excel add-ins with your version of Excel. In the Office Add-ins Store, you'll search for \"TimeGPT\". \n",
 27 |     "\n",
 28 |     "Once you have installed the TimeGPT add-in, the add-in comes up in a sidebar task pane. \n",
 29 |     "* Read through the Welcome screen.\n",
 30 |     "* Click on the **'Get Started'** button.\n",
 31 |     "* The API URL is already set to: https://api.nixtla.io.\n",
 32 |     "* Copy your API key from [Nixtla Dashboard](http://dashboard.nixtla.io). Paste it into the box that say **API Key, Bearer**.\n",
 33 |     "* Click the gray arrow next to that box on the right. \n",
 34 |     "* You'll get to a screen with options for 'Forecast' and 'Anomaly Detection'.\n",
 35 |     "\n",
 36 |     "To access the settings later, click the gear icon in the top left.\n",
 37 |     "\n",
 38 |     "### Data Requirements\n",
 39 |     "\n",
 40 |     "* Put your dates in one column and your values in another.\n",
 41 |     "* Ensure your date format is recognized as a valid date by excel.\n",
 42 |     "* Ensure your values are recognized as valid number by excel.\n",
 43 |     "* All data inputs must exist in the same worksheet. The add-in does not support forecasting using multiple worksheets.\n",
 44 |     "* Do not include headers\n",
 45 |     "\n",
 46 |     "Example:\n",
 47 |     "\n",
 48 |     "| dates      | values  | \n",
 49 |     "| :------------- | :----- | \n",
 50 |     "| 12/1/16 0:00 | 72     | \n",
 51 |     "| 12/1/16 1:00   | 65.8   | \n",
 52 |     "| 12/1/16 2:00   | 59.99  | \n",
 53 |     "| 12/1/16 3:00   | 50.69  | \n",
 54 |     "| 12/1/16 4:00   | 52.58  | \n",
 55 |     "| 12/1/16 5:00   | 65.05  | \n",
 56 |     "| 12/1/16 6:00   | 80.4   | \n",
 57 |     "| 12/1/16 7:00   | 200    | \n",
 58 |     "| 12/1/16 8:00   | 200.63 | \n",
 59 |     "| 12/1/16 9:00   | 155.47 | \n",
 60 |     "| 12/1/16 10:00  | 150.91 | \n",
 61 |     "\n",
 62 |     "#### Forecasting\n",
 63 |     "\n",
 64 |     "Once you've configured your token and formatted your input data then you're all ready to forecast!\n",
 65 |     "\n",
 66 |     "With the add-in open, configure the forecasting settings by selecting the column for each input.\n",
 67 |     "\n",
 68 |     "* **Frequency** - The frequency of the data (hourly / daily / weekly / monthly)\n",
 69 |     "\n",
 70 |     "* **Horizon** - The forecasting horizon. This represents the number of time steps into the future that the forecast should predict.\n",
 71 |     "\n",
 72 |     "* **Dates Range** - The column and range of the timeseries timestamps. Must not include header data, and should be formatted as a range, e.g. A2:A145. \n",
 73 |     "\n",
 74 |     "* **Values Range** - The column and range of the timeseries values for each point in time. Must not include header data, and should be formatted as a range, e.g. B2:B145. \n",
 75 |     "\n",
 76 |     "\n",
 77 |     "\n",
 78 |     "\n",
 79 |     "\n",
 80 |     "When you're ready, click **Make Prediction** to generate the predicted values. The add-in will generate a plot and append the forecasted data to the end of the column of your existing data and highlight them in green. So, scroll to the end of your data to see the predicted values. \n",
 81 |     "\n",
 82 |     "\n",
 83 |     "\n",
 84 |     "#### Anomaly Detection\n",
 85 |     "\n",
 86 |     "The requirements are the same as for the forecasting functionality, so if you already tried it you are ready to run the anomaly detection one. Go to the main page in the add-in and select \"Anomaly Detection\", then choose your dates and values cell ranges and click on submit. We'll run the model and mark the anomalies cells in yellow while adding a third column for expected values with a green background.\n",
 87 |     "\n",
 88 |     "\n",
 89 |     "\n",
 90 |     "\n"
 91 |    ]
 92 |   }
 93 |  ],
 94 |  "metadata": {
 95 |   "kernelspec": {
 96 |    "display_name": "python3",
 97 |    "language": "python",
 98 |    "name": "python3"
 99 |   }
100 |  },
101 |  "nbformat": 4,
102 |  "nbformat_minor": 2
103 | }
104 | 


--------------------------------------------------------------------------------
/nbs/docs/reference/04_nixtlar.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# TimeGPT in R\n",
 8 |     "\n",
 9 |     "TimeGPT is also available in R through the `nixtlar` package, which is available on CRAN. This package can be used in a way almost identical to its Python counterpart. It offers nearly the same functionalities, with missing features and documentation currently under development. Originally developed in Python, TimeGPT is now accessible to the R community through `nixtlar`, providing access to the first foundation model for time series forecasting and embracing our core philosophy that _the future is for everybody_."
10 |    ]
11 |   },
12 |   {
13 |    "cell_type": "markdown",
14 |    "metadata": {},
15 |    "source": [
16 |     "<img src=\"https://github.com/Nixtla/nixtla/blob/main/nbs/img/logo_nixtlar.png?raw=true\" alt=\"Logo for nixtlar\" width=\"700\" />"
17 |    ]
18 |   },
19 |   {
20 |    "cell_type": "markdown",
21 |    "metadata": {},
22 |    "source": [
23 |     "## How to use \n",
24 |     "\n",
25 |     "To learn how to use `nixtlar`, please refer to the [documentation](https://nixtla.github.io/nixtlar/). \n",
26 |     "\n",
27 |     "To view directly on CRAN, please use this [link](https://cloud.r-project.org/web/packages/nixtlar/index.html). "
28 |    ]
29 |   },
30 |   {
31 |    "cell_type": "markdown",
32 |    "metadata": {},
33 |    "source": [
34 |     "> 📘 API key required\n",
35 |     "> \n",
36 |     "> The `nixtlar` package requires an API key. Get yours on the [Nixtla Dashboard](http://dashboard.nixtla.io).\n",
37 |     "\n",
38 |     "## Support\n",
39 |     "\n",
40 |     "If you have questions or need support, please email `support@nixtla.io`."
41 |    ]
42 |   }
43 |  ],
44 |  "metadata": {},
45 |  "nbformat": 4,
46 |  "nbformat_minor": 2
47 | }
48 | 


--------------------------------------------------------------------------------
/nbs/docs/tutorials/050_training.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "6de758ee-a0d2-4b3f-acff-eed419dd17c5",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "# Training\n",
 9 |     "\n",
10 |     "This section offers tutorials related to training `TimeGPT` under specific conditions.\n",
11 |     "\n",
12 |     "### What You Will Learn\n",
13 |     "\n",
14 |     "1. **[Long Horizon Forecasting](https://docs.nixtla.io/docs/tutorials-long_horizon_forecasting)**\n",
15 |     "\n",
16 |     "    - Discover how make predictions beyond two seasonal periods or even further into the future, using `TimeGPT`'s specialized model for long horizon forecasting.\n",
17 |     "\n",
18 |     "2. **[Multiple Series Forecasting](https://docs.nixtla.io/docs/tutorials-multiple_series_forecasting)**\n",
19 |     "\n",
20 |     "    - Learn how to use `TimeGPT` to forecast multiple time series simultaneously."
21 |    ]
22 |   }
23 |  ],
24 |  "metadata": {
25 |   "kernelspec": {
26 |    "display_name": "python3",
27 |    "language": "python",
28 |    "name": "python3"
29 |   }
30 |  },
31 |  "nbformat": 4,
32 |  "nbformat_minor": 5
33 | }
34 | 


--------------------------------------------------------------------------------
/nbs/docs/tutorials/080_validation.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "6de758ee-a0d2-4b3f-acff-eed419dd17c5",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "# Validation"
 9 |    ]
10 |   },
11 |   {
12 |    "cell_type": "markdown",
13 |    "id": "5d267032-535b-4b7b-b7d3-d2db8f673af6",
14 |    "metadata": {},
15 |    "source": [
16 |     "One of the primary challenges in time series forecasting is the inherent uncertainty and variability over time, making it crucial to validate the accuracy and reliability of the models employed. `TimeGPT` offers the possibility for cross-validation and historical forecasts to help you validate your predictions.\n",
17 |     "\n",
18 |     "### What You Will Learn\n",
19 |     "\n",
20 |     "1. **[Cross-Validation](https://docs.nixtla.io/docs/tutorials-cross_validation)**\n",
21 |     "\n",
22 |     "    - Learn how to perform time series cross-validation across different continuous windows of your data. \n",
23 |     "\n",
24 |     "2. **[Historical Forecasts](https://docs.nixtla.io/docs/tutorials-historical_forecast)**\n",
25 |     "\n",
26 |     "    - Generate in-sample forecasts to validate how `TimeGPT` would have performed in the past, providing insights into the model's accuracy. \n"
27 |    ]
28 |   }
29 |  ],
30 |  "metadata": {
31 |   "kernelspec": {
32 |    "display_name": "python3",
33 |    "language": "python",
34 |    "name": "python3"
35 |   }
36 |  },
37 |  "nbformat": 4,
38 |  "nbformat_minor": 5
39 | }
40 | 


--------------------------------------------------------------------------------
/nbs/docs/tutorials/100_uncertainty_quantification.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "6de758ee-a0d2-4b3f-acff-eed419dd17c5",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "# Uncertainty quantification\n",
 9 |     "\n",
10 |     "In forecasting, it is essential to consider the full distribution of predictions rather than only a point prediction. This approach allows for a better understanding of the uncertainty surrounding the forecast. `TimeGPT` supports uncertainty quantification through quantile forecasts and prediction intervals.\n",
11 |     "\n",
12 |     "### What You Will Learn\n",
13 |     "\n",
14 |     "1. **[Quantile Forecasts](https://docs.nixtla.io/docs/tutorials-quantile_forecasts)**\n",
15 |     "\n",
16 |     "    - Learn how to compute specific quantiles of the forecast distribution using `TimeGPT`. \n",
17 |     "\n",
18 |     "2. **[Prediction Intervals](https://docs.nixtla.io/docs/tutorials-prediction_intervals)**\n",
19 |     "\n",
20 |     "    - Learn how to generate prediction intervals with `TimeGPT`, which give you a range of values that the forecast can take with a given probability. \n"
21 |    ]
22 |   }
23 |  ],
24 |  "metadata": {
25 |   "kernelspec": {
26 |    "display_name": "python3",
27 |    "language": "python",
28 |    "name": "python3"
29 |   }
30 |  },
31 |  "nbformat": 4,
32 |  "nbformat_minor": 5
33 | }
34 | 


--------------------------------------------------------------------------------
/nbs/docs/tutorials/120_special_topics.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "6de758ee-a0d2-4b3f-acff-eed419dd17c5",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "# Special topics"
 9 |    ]
10 |   },
11 |   {
12 |    "cell_type": "markdown",
13 |    "id": "5d267032-535b-4b7b-b7d3-d2db8f673af6",
14 |    "metadata": {},
15 |    "source": [
16 |     "`TimeGPT` is a robust foundation model for time series forecasting, with advanced capabilities such as hierarchical and bounded forecasts. To fully leverage the power of `TimeGPT`, there are specific situations that require special consideration, such as dealing with irregular timestamps or handling datasets with missing values.\n",
17 |     "\n",
18 |     "In this section, we will cover these special topics.\n",
19 |     "\n",
20 |     "### What You Will Learn\n",
21 |     "\n",
22 |     "1. **[Irregular Timestamps](https://docs.nixtla.io/docs/capabilities-forecast-irregular_timestamps)**\n",
23 |     "\n",
24 |     "    - Learn how to deal with irregular timestamps for correct usage of `TimeGPT`.\n",
25 |     "\n",
26 |     "2. **[Bounded Forecasts](https://docs.nixtla.io/docs/tutorials-bounded_forecasts)**\n",
27 |     "\n",
28 |     "    - Explore `TimeGPT`'s capability to make forecasts within a specified range, ideal for applications where outcomes are bounded.\n",
29 |     "\n",
30 |     "3. **[Hierarchical Forecasts](https://docs.nixtla.io/docs/tutorials-hierarchical_forecasting)**\n",
31 |     "\n",
32 |     "    - Understand how to use `TimeGPT` to make coherent predictions at various levels of aggregation.\n",
33 |     "\n",
34 |     "4. **[Missing Values](https://docs.nixtla.io/docs/tutorials-missing_values)**\n",
35 |     "\n",
36 |     "    - Learn how to address missing values within your time series data effectively using `TimeGPT`.\n",
37 |     "\n",
38 |     "5. **[Improve Forecast Accuracy](https://docs.nixtla.io/docs/tutorials-improve_forecast_accuracy_with_timegpt)**\n",
39 |     "\n",
40 |     "    - Discover multiple techniques to boost forecast accuracy when working with `TimeGPT`."
41 |    ]
42 |   }
43 |  ],
44 |  "metadata": {
45 |   "kernelspec": {
46 |    "display_name": "python3",
47 |    "language": "python",
48 |    "name": "python3"
49 |   }
50 |  },
51 |  "nbformat": 4,
52 |  "nbformat_minor": 5
53 | }
54 | 


--------------------------------------------------------------------------------
/nbs/favicon_png.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/favicon_png.png


--------------------------------------------------------------------------------
/nbs/img/ApiRefScreen.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/ApiRefScreen.png


--------------------------------------------------------------------------------
/nbs/img/anomaly.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/anomaly.png


--------------------------------------------------------------------------------
/nbs/img/api_key_process.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/api_key_process.png


--------------------------------------------------------------------------------
/nbs/img/australia_hierarchy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/australia_hierarchy.png


--------------------------------------------------------------------------------
/nbs/img/australia_tourism.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/australia_tourism.png


--------------------------------------------------------------------------------
/nbs/img/azure-deploy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/azure-deploy.png


--------------------------------------------------------------------------------
/nbs/img/azure-endpoint.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/azure-endpoint.png


--------------------------------------------------------------------------------
/nbs/img/azure-model-catalog.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/azure-model-catalog.png


--------------------------------------------------------------------------------
/nbs/img/dashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/dashboard.png


--------------------------------------------------------------------------------
/nbs/img/forecast.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/forecast.png


--------------------------------------------------------------------------------
/nbs/img/forecast_readme.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/forecast_readme.png


--------------------------------------------------------------------------------
/nbs/img/logo_nixtlar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/logo_nixtlar.png


--------------------------------------------------------------------------------
/nbs/img/results.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/results.jpg


--------------------------------------------------------------------------------
/nbs/img/timegpt-arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/timegpt-arch.png


--------------------------------------------------------------------------------
/nbs/img/timegpt_archi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/timegpt_archi.png


--------------------------------------------------------------------------------
/nbs/img/timeseries_model_arena.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/timeseries_model_arena.png


--------------------------------------------------------------------------------
/nbs/nbdev.yml:
--------------------------------------------------------------------------------
 1 | project:
 2 |   output-dir: _docs
 3 | 
 4 | website:
 5 |   title: "nixtla"
 6 |   site-url: "https://Nixtla.github.io/nixtla/"
 7 |   description: "Python SDK for Nixtla API (TimeGPT)"
 8 |   repo-branch: main
 9 |   repo-url: "https://github.com/Nixtla/nixtla/"
10 | 


--------------------------------------------------------------------------------
/nbs/sidebar.yml:
--------------------------------------------------------------------------------
 1 | website:
 2 |   reader-mode: false
 3 |   sidebar:
 4 |     collapse-level: 1
 5 |     contents:
 6 |       - text: "--"
 7 |       - section: "Getting Started"
 8 |         contents: docs/getting-started/*
 9 |       - section: "Capabilities"
10 |         contents: docs/capabilities/*
11 |       - section: "Deployment"
12 |         contents: docs/deployment/*
13 |       - section: "Tutorials"
14 |         contents: docs/tutorials/*
15 |       - section: "Use cases"
16 |         contents: docs/use-cases/*
17 |       - section: "API Reference"
18 |         contents: 
19 |         - nixtla_client.ipynb
20 |         - date_features.ipynb


--------------------------------------------------------------------------------
/nbs/src/utils.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Utils"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": null,
13 |    "metadata": {},
14 |    "outputs": [],
15 |    "source": [
16 |     "#| default_exp utils"
17 |    ]
18 |   },
19 |   {
20 |    "cell_type": "code",
21 |    "execution_count": null,
22 |    "metadata": {},
23 |    "outputs": [],
24 |    "source": [
25 |     "#| hide \n",
26 |     "%load_ext autoreload\n",
27 |     "%autoreload 2"
28 |    ]
29 |   },
30 |   {
31 |    "cell_type": "code",
32 |    "execution_count": null,
33 |    "metadata": {},
34 |    "outputs": [],
35 |    "source": [
36 |     "#| export\n",
37 |     "import sys"
38 |    ]
39 |   },
40 |   {
41 |    "cell_type": "code",
42 |    "execution_count": null,
43 |    "metadata": {},
44 |    "outputs": [],
45 |    "source": [
46 |     "#| export\n",
47 |     "def colab_badge(path: str):\n",
48 |     "    from IPython.display import Markdown, display\n",
49 |     "    base_url = \"https://colab.research.google.com/github\"\n",
50 |     "    badge_svg = \"https://colab.research.google.com/assets/colab-badge.svg\"\n",
51 |     "    nb_url = f'{base_url}/Nixtla/nixtla/blob/main/nbs/{path}.ipynb'\n",
52 |     "    badge_md = f\"[![]({badge_svg})]({nb_url})\"\n",
53 |     "    display(Markdown(badge_md))"
54 |    ]
55 |   },
56 |   {
57 |    "cell_type": "code",
58 |    "execution_count": null,
59 |    "metadata": {},
60 |    "outputs": [],
61 |    "source": [
62 |     "#| export\n",
63 |     "def in_colab():\n",
64 |     "    return 'google.colab' in sys.modules"
65 |    ]
66 |   }
67 |  ],
68 |  "metadata": {
69 |   "kernelspec": {
70 |    "display_name": "python3",
71 |    "language": "python",
72 |    "name": "python3"
73 |   }
74 |  },
75 |  "nbformat": 4,
76 |  "nbformat_minor": 4
77 | }
78 | 


--------------------------------------------------------------------------------
/nixtla/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.6.7.dev3"
2 | __all__ = ["NixtlaClient"]
3 | from .nixtla_client import NixtlaClient
4 | 


--------------------------------------------------------------------------------
/nixtla/date_features.py:
--------------------------------------------------------------------------------
 1 | # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/src/date_features.ipynb.
 2 | 
 3 | # %% auto 0
 4 | __all__ = ['CountryHolidays', 'SpecialDates']
 5 | 
 6 | # %% ../nbs/src/date_features.ipynb 4
 7 | import pandas as pd
 8 | 
 9 | # %% ../nbs/src/date_features.ipynb 6
10 | def _transform_dict_holidays(dict_holidays_dates):
11 |     dict_holidays = {}
12 |     for key, value in dict_holidays_dates.items():
13 |         if value not in dict_holidays:
14 |             dict_holidays[value] = []
15 |         dict_holidays[value].append(key)
16 |     return dict_holidays
17 | 
18 | # %% ../nbs/src/date_features.ipynb 7
19 | def _get_holidays_df(dates, categories, holiday_extractor, supported_categories):
20 |     years = dates.year.unique().tolist()
21 |     total_holidays = dict()
22 |     for cat in categories:
23 |         if cat not in supported_categories:
24 |             raise Exception(f"Holidays for {cat} not available, please remove it.")
25 |         dict_holidays = _transform_dict_holidays(holiday_extractor(cat, years=years))
26 |         for key, val in dict_holidays.items():
27 |             total_holidays[f"{cat}_{key}"] = [int(ds.date() in val) for ds in dates]
28 |     return pd.DataFrame(total_holidays, index=dates)
29 | 
30 | # %% ../nbs/src/date_features.ipynb 8
31 | class CountryHolidays:
32 |     """Given a list of countries, returns a dataframe with holidays for each country."""
33 | 
34 |     def __init__(self, countries: list[str]):
35 |         self.countries = countries
36 | 
37 |     def __call__(self, dates: pd.DatetimeIndex):
38 |         try:
39 |             from holidays.utils import country_holidays
40 |             from holidays.utils import list_supported_countries
41 |         except ModuleNotFoundError:
42 |             raise Exception(
43 |                 "You have to install additional libraries to use holidays, "
44 |                 'please install them using `pip install "nixtla[date_extras]"`'
45 |             )
46 |         return _get_holidays_df(
47 |             dates, self.countries, country_holidays, list_supported_countries()
48 |         )
49 | 
50 |     def __name__(self):
51 |         return "CountryHolidays"
52 | 
53 | # %% ../nbs/src/date_features.ipynb 12
54 | class SpecialDates:
55 |     """Given a dictionary of categories and dates, returns a dataframe with the special dates."""
56 | 
57 |     def __init__(self, special_dates: dict[str, list[str]]):
58 |         self.special_dates = special_dates
59 | 
60 |     def __call__(self, dates: pd.DatetimeIndex):
61 |         total_special_dates = dict()
62 |         for key, val in self.special_dates.items():
63 |             date_vals = [ds.date() for ds in pd.to_datetime(val)]
64 |             total_special_dates[key] = [int(ds.date() in date_vals) for ds in dates]
65 |         return pd.DataFrame(total_special_dates, index=dates)
66 | 
67 |     def __name__(self):
68 |         return "SpecialDates"
69 | 


--------------------------------------------------------------------------------
/nixtla/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nixtla/py.typed


--------------------------------------------------------------------------------
/nixtla/utils.py:
--------------------------------------------------------------------------------
 1 | # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/src/utils.ipynb.
 2 | 
 3 | # %% auto 0
 4 | __all__ = ['colab_badge', 'in_colab']
 5 | 
 6 | # %% ../nbs/src/utils.ipynb 3
 7 | import sys
 8 | 
 9 | # %% ../nbs/src/utils.ipynb 4
10 | def colab_badge(path: str):
11 |     from IPython.display import Markdown, display
12 | 
13 |     base_url = "https://colab.research.google.com/github"
14 |     badge_svg = "https://colab.research.google.com/assets/colab-badge.svg"
15 |     nb_url = f"{base_url}/Nixtla/nixtla/blob/main/nbs/{path}.ipynb"
16 |     badge_md = f"[![]({badge_svg})]({nb_url})"
17 |     display(Markdown(badge_md))
18 | 
19 | # %% ../nbs/src/utils.ipynb 5
20 | def in_colab():
21 |     return "google.colab" in sys.modules
22 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.ruff.lint]
2 | select = [
3 |     "F",  # pyflakes
4 | ]
5 | 


--------------------------------------------------------------------------------
/settings.ini:
--------------------------------------------------------------------------------
 1 | [DEFAULT]
 2 | host = github
 3 | lib_name = nixtla
 4 | user = Nixtla
 5 | description = Python SDK for Nixtla API (TimeGPT)
 6 | keywords = time-series forecasting gpt
 7 | author = Nixtla
 8 | author_email = business@nixtla.io
 9 | copyright = Nixtla Inc.
10 | branch = main
11 | version = 0.6.7.dev3
12 | min_python = 3.9
13 | audience = Developers
14 | language = English
15 | custom_sidebar = True
16 | license = apache2
17 | status = 4
18 | requirements = annotated-types httpx[zstd] orjson pandas pydantic>=1.10 tenacity tqdm utilsforecast>=0.2.8
19 | dev_requirements = black datasetsforecast fire hierarchicalforecast ipython<=8.32.0 ipywidgets jupyterlab nbdev neuralforecast numpy<2 plotly polars pre-commit pyreadr<0.5.3 python-dotenv pyyaml setuptools<70 statsforecast tabulate
20 | distributed_requirements = fugue[dask,ray,spark]>=0.8.7 dask<=2024.12.1 pandas<2.2 ray<2.6.3
21 | plotting_requirements = utilsforecast[plotting]
22 | date_extra_requirements = holidays
23 | nbs_path = nbs
24 | doc_path = _docs
25 | recursive = True
26 | doc_host = https://Nixtla.github.io
27 | doc_baseurl = /nixtla/
28 | git_url = https://github.com/Nixtla/nixtla/
29 | lib_path = nixtla
30 | title = nixtla
31 | black_formatting = True
32 | jupyter_hooks = True
33 | clean_ids = True
34 | readme_nb = nbs/index.ipynb
35 | tst_flags = distributed
36 | allowed_metadata_keys = 
37 | allowed_cell_metadata_keys = 
38 | clear_all = False
39 | put_version_in_init = True
40 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from pkg_resources import parse_version
 2 | from configparser import ConfigParser
 3 | import setuptools
 4 | assert parse_version(setuptools.__version__)>=parse_version('36.2')
 5 | 
 6 | # note: all settings are in settings.ini; edit there, not here
 7 | config = ConfigParser(delimiters=['='])
 8 | config.read('settings.ini')
 9 | cfg = config['DEFAULT']
10 | 
11 | cfg_keys = 'version description keywords author author_email'.split()
12 | expected = cfg_keys + "lib_name user branch license status min_python audience language".split()
13 | for o in expected: assert o in cfg, "missing expected setting: {}".format(o)
14 | setup_cfg = {o:cfg[o] for o in cfg_keys}
15 | 
16 | licenses = {
17 |     'apache2': ('Apache Software License 2.0','OSI Approved :: Apache Software License'),
18 |     'mit': ('MIT License', 'OSI Approved :: MIT License'),
19 |     'gpl2': ('GNU General Public License v2', 'OSI Approved :: GNU General Public License v2 (GPLv2)'),
20 |     'gpl3': ('GNU General Public License v3', 'OSI Approved :: GNU General Public License v3 (GPLv3)'),
21 |     'bsd3': ('BSD License', 'OSI Approved :: BSD License'),
22 | }
23 | statuses = [ '1 - Planning', '2 - Pre-Alpha', '3 - Alpha',
24 |     '4 - Beta', '5 - Production/Stable', '6 - Mature', '7 - Inactive' ]
25 | py_versions = '3.9 3.10 3.11 3.12 3.13'.split()
26 | 
27 | requirements = cfg['requirements'].split()
28 | distributed_requirements = cfg['distributed_requirements'].split()
29 | plotting_requirements = cfg['plotting_requirements'].split()
30 | date_extra_requirements = cfg['date_extra_requirements'].split()
31 | dev_requirements = cfg['dev_requirements'].split()
32 | dev_requirements.extend(plotting_requirements)
33 | dev_requirements.extend(date_extra_requirements)
34 | 
35 | min_python = cfg['min_python']
36 | lic = licenses.get(cfg['license'].lower(), (cfg['license'], None))
37 | 
38 | setuptools.setup(
39 |     name = cfg['lib_name'],
40 |     license = lic[0],
41 |     classifiers = [
42 |         'Development Status :: ' + statuses[int(cfg['status'])],
43 |         'Intended Audience :: ' + cfg['audience'].title(),
44 |         'Natural Language :: ' + cfg['language'].title(),
45 |     ] + ['Programming Language :: Python :: '+o for o in py_versions[py_versions.index(min_python):]] + (['License :: ' + lic[1] ] if lic[1] else []),
46 |     url = cfg['git_url'],
47 |     packages = setuptools.find_packages(exclude=['action_files']),
48 |     include_package_data = True,
49 |     install_requires = requirements,
50 |     extras_require = {
51 |         "dev": dev_requirements,
52 |         "distributed": distributed_requirements,
53 |         "plotting": plotting_requirements,
54 |         "date_extras": date_extra_requirements,
55 |     },
56 |     dependency_links = cfg.get('dep_links','').split(),
57 |     python_requires  = '>=' + cfg['min_python'],
58 |     long_description = open('README.md', encoding='utf-8').read(),
59 |     long_description_content_type = 'text/markdown',
60 |     zip_safe = False,
61 |     entry_points = {
62 |         'console_scripts': cfg.get('console_scripts','').split(),
63 |         'nbdev': [f'{cfg.get("lib_path")}={cfg.get("lib_path")}._modidx:d']
64 |     },
65 |     **setup_cfg)
66 | 


--------------------------------------------------------------------------------