├── .devcontainer └── devcontainer.json ├── .github ├── ISSUE_TEMPLATE │ ├── bug-report.yml │ ├── config.yml │ ├── documentation-issue.yml │ └── feature-request.yml ├── dependabot.yml ├── release-drafter.yml └── workflows │ ├── build-docs.yaml │ ├── ci.yaml │ ├── deploy-readme.yaml │ ├── lint.yaml │ ├── models-performance.yaml │ ├── no-response.yaml │ ├── python-publish.yml │ ├── release-drafter.yml │ └── test-publish.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── Makefile ├── QuickStart.gif ├── QuickstartTGPT.mp4 ├── README.md ├── action_files ├── __init__.py ├── comment_file.py ├── models_performance │ ├── experiments.yaml │ └── main.py └── readme_com │ ├── create_readme_docs.sh │ ├── create_sdk_reference.py │ └── modify_markdown.py ├── experiments ├── amazon-chronos │ ├── README.md │ ├── environment.yml │ └── src │ │ ├── amazon_chronos │ │ ├── forecaster.py │ │ └── pipeline.py │ │ ├── main.py │ │ ├── statsforecast_pipeline.py │ │ └── utils.py ├── azure-automl-forecasting │ ├── .env.example │ ├── Makefile │ ├── README.md │ ├── requirements.txt │ └── src │ │ ├── azure_automl │ │ ├── __init__.py │ │ ├── automl_handler.py │ │ ├── download_forecasts.py │ │ └── forecasting.py │ │ ├── evaluation.py │ │ ├── nixtla_timegpt.py │ │ ├── statsforecast_sn.py │ │ └── utils │ │ ├── data_handler.py │ │ ├── download_data.py │ │ └── filter_data.py ├── efficiency │ ├── README.md │ ├── main.py │ └── requirements.txt ├── foundation-time-series-arena │ ├── .env.example │ ├── Makefile │ ├── README.md │ ├── requirements.txt │ ├── tests │ │ ├── __init__.py │ │ ├── test_arena.py │ │ ├── test_eval.py │ │ ├── test_models.py │ │ └── utils.py │ └── xiuhmolpilli │ │ ├── __init__.py │ │ ├── arena.py │ │ ├── models │ │ ├── __init__.py │ │ ├── benchmarks │ │ │ ├── __init__.py │ │ │ ├── ml.py │ │ │ ├── neural.py │ │ │ ├── prophet.py │ │ │ └── stats.py │ │ ├── foundational │ │ │ ├── __init__.py │ │ │ ├── chronos.py │ │ │ ├── lagllama.py │ │ │ ├── moirai.py │ │ │ ├── timegpt.py │ │ │ └── timesfm.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── forecaster.py │ │ │ ├── gluonts_forecaster.py │ │ │ └── parallel_forecaster.py │ │ └── utils │ │ ├── download_data.py │ │ ├── experiment_handler.py │ │ ├── filter_data.py │ │ └── logger_config.py ├── lag-llama │ ├── Makefile │ ├── README.md │ ├── environment.yml │ └── src │ │ ├── lag_llama_pipeline.py │ │ ├── main.py │ │ ├── statsforecast_pipeline.py │ │ └── utils.py ├── one-billion │ ├── README.md │ ├── main.py │ └── requirements.txt ├── prophet │ ├── .env.example │ ├── Makefile │ ├── README.md │ ├── environment.yml │ └── src │ │ ├── prophet_exp.py │ │ ├── results_summary.py │ │ ├── statsforecast_exp.py │ │ ├── timegpt_exp.py │ │ ├── tools.py │ │ └── utils.py ├── salesforce-moirai │ ├── README.md │ ├── environment.yml │ └── src │ │ ├── main.py │ │ ├── moirai_pipeline.py │ │ ├── statsforecast_pipeline.py │ │ └── utils.py └── vn1-competition │ ├── Makefile │ ├── README.md │ ├── src │ ├── __init__.py │ ├── functions.R │ ├── main.R │ └── main.py │ └── tests │ ├── __init__.py │ └── test_scores.py ├── nbs ├── _quarto.yml ├── assets │ ├── Inter-VariableFont.ttf │ ├── M5_categorical_variables_example.parquet │ ├── M5_what_if_pricing_example.parquet │ ├── arima_rst.csv │ ├── forecast_synthetic_data.mp4 │ ├── lgbm_rst.csv │ ├── logo.png │ ├── long_horizon_example_Y_df.parquet │ └── nhits_rst.csv ├── docs │ ├── capabilities │ │ ├── 000_capabilities.ipynb │ │ ├── forecast │ │ │ ├── 00_forecast.ipynb │ │ │ ├── 01_quickstart.ipynb │ │ │ ├── 02_exogenous_variables.ipynb │ │ │ ├── 03_holidays_special_dates.ipynb │ │ │ ├── 04_categorical_variables.ipynb │ │ │ ├── 05_longhorizon.ipynb │ │ │ ├── 06_multiple_series.ipynb │ │ │ ├── 07_finetuning.ipynb │ │ │ ├── 08_custom_loss_function.ipynb │ │ │ ├── 09_cross_validation.ipynb │ │ │ ├── 10_prediction_intervals.ipynb │ │ │ └── 11_irregular_timestamps.ipynb │ │ ├── historical-anomaly-detection │ │ │ ├── 00_historical_anomaly_detection.ipynb │ │ │ ├── 01_quickstart.ipynb │ │ │ ├── 02_anomaly_exogenous.ipynb │ │ │ ├── 03_anomaly_detection_date_features.ipynb │ │ │ └── 04_confidence_levels.ipynb │ │ └── online-anomaly-detection │ │ │ ├── 00_online_anomaly_detection.ipynb │ │ │ ├── 01_quickstart.ipynb │ │ │ ├── 02_adjusting_detection_process.ipynb │ │ │ └── 03_univariate_vs_multivariate_anomaly_detection.ipynb │ ├── deployment │ │ └── 2_azure_ai.ipynb │ ├── getting-started │ │ ├── 1_introduction.ipynb │ │ ├── 21_polars_quickstart.ipynb │ │ ├── 22_azure_quickstart.ipynb │ │ ├── 2_quickstart.ipynb │ │ ├── 3_setting_up_your_api_key.ipynb │ │ ├── 41_pricing.ipynb │ │ ├── 4_data_requirements.ipynb │ │ ├── 5_faq.ipynb │ │ ├── 6_glossary.ipynb │ │ └── 7_why_timegpt.ipynb │ ├── reference │ │ ├── 01_nixtla_client.ipynb │ │ ├── 02_date_features.ipynb │ │ ├── 03_excel_addin.ipynb │ │ └── 04_nixtlar.ipynb │ ├── tutorials │ │ ├── 01_exogenous_variables.ipynb │ │ ├── 02_holidays.ipynb │ │ ├── 03_categorical_variables.ipynb │ │ ├── 04_longhorizon.ipynb │ │ ├── 050_training.ipynb │ │ ├── 05_multiple_series.ipynb │ │ ├── 061_reusing_finetuned_models.ipynb │ │ ├── 06_finetuning.ipynb │ │ ├── 07_loss_function_finetuning.ipynb │ │ ├── 080_validation.ipynb │ │ ├── 08_cross_validation.ipynb │ │ ├── 09_historical_forecast.ipynb │ │ ├── 100_uncertainty_quantification.ipynb │ │ ├── 10_uncertainty_quantification_with_quantile_forecasts.ipynb │ │ ├── 11_uncertainty_quantification_with_prediction_intervals.ipynb │ │ ├── 120_special_topics.ipynb │ │ ├── 13_bounded_forecasts.ipynb │ │ ├── 14_hierarchical_forecasting.ipynb │ │ ├── 15_missing_values.ipynb │ │ ├── 16_computing_at_scale.ipynb │ │ ├── 17_computing_at_scale_spark_distributed.ipynb │ │ ├── 18_computing_at_scale_dask_distributed.ipynb │ │ ├── 19_computing_at_scale_ray_distributed.ipynb │ │ ├── 20_anomaly_detection.ipynb │ │ ├── 21_shap_values.ipynb │ │ ├── 22_how_to_improve_forecast_accuracy.ipynb │ │ ├── 23_finetune_depth_finetuning.ipynb │ │ └── 23_temporalhierarchical.ipynb │ └── use-cases │ │ ├── 1_forecasting_web_traffic.ipynb │ │ ├── 2_bitcoin_price_prediction.ipynb │ │ ├── 3_electricity_demand.ipynb │ │ ├── 4_intermittent_demand.ipynb │ │ └── 5_what_if_pricing_scenarios_in_retail.ipynb ├── favicon_png.png ├── img │ ├── ApiRefScreen.png │ ├── anomaly.png │ ├── api_key_process.png │ ├── australia_hierarchy.png │ ├── australia_tourism.png │ ├── azure-deploy.png │ ├── azure-endpoint.png │ ├── azure-model-catalog.png │ ├── dashboard.png │ ├── forecast.png │ ├── forecast_readme.png │ ├── logo_nixtlar.png │ ├── results.jpg │ ├── timegpt-arch.png │ ├── timegpt_archi.png │ └── timeseries_model_arena.png ├── mint.json ├── nbdev.yml ├── sidebar.yml ├── src │ ├── date_features.ipynb │ ├── nixtla_client.ipynb │ └── utils.ipynb └── styles.css ├── nixtla ├── __init__.py ├── _modidx.py ├── date_features.py ├── nixtla_client.py ├── py.typed └── utils.py ├── pyproject.toml ├── settings.ini └── setup.py /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Nixtla Development Environment", 3 | "image": "mcr.microsoft.com/vscode/devcontainers/python:3.11", 4 | "hostRequirements": { 5 | "cpus": 4, 6 | "memory": "16gb", 7 | "storage": "32gb" 8 | }, 9 | "customizations": { 10 | "vscode": { 11 | "settings": { 12 | "terminal.integrated.shell.linux": "/bin/bash", 13 | "python.terminal.activateEnvInCurrentTerminal": true, 14 | "python.defaultInterpreterPath": ".venv/bin/python", 15 | "python.pythonPath": ".venv/bin/python", 16 | "[python]": { 17 | "editor.defaultFormatter": "charliermarsh.ruff", 18 | "editor.formatOnSave": true, 19 | "editor.codeActionsOnSave": { 20 | "source.fixAll": "explicit", 21 | "source.organizeImports": "explicit" 22 | } 23 | }, 24 | "notebook.python.defaultInterpreterPath": ".venv/bin/python", 25 | "notebook.codeActionsOnSave": { 26 | "source.fixAll": "explicit", 27 | "source.organizeImports": "explicit" 28 | }, 29 | "ruff.nativeServer": "on", 30 | "python.languageServer": "Default", 31 | "debug.internalConsoleOptions": "neverOpen", 32 | "extensions.ignoreRecommendations": true, 33 | "files.insertFinalNewline": true 34 | }, 35 | "extensions": [ 36 | "ms-python.python", 37 | "ms-python.mypy", 38 | "ms-python.vscode-pylance", 39 | "ms-toolsai.jupyter@2025.2.0", 40 | "charliermarsh.ruff@2025.22.0", 41 | "GitHub.copilot", 42 | "tamasfe.even-better-toml" 43 | ] 44 | } 45 | }, 46 | "forwardPorts": [ 47 | 8888 48 | ], 49 | "onCreateCommand": "make devenv", 50 | "postCreateCommand": "uv pip install -Ue .[dev,distributed]", 51 | "waitFor": "postCreateCommand", 52 | "features": { 53 | "ghcr.io/devcontainers/features/docker-in-docker:2.12.2": {}, 54 | "ghcr.io/va-h/devcontainers-features/uv:1": {} 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-report.yml: -------------------------------------------------------------------------------- 1 | name: Bug report 2 | title: "[] " 3 | description: Problems and issues with code of the library 4 | labels: [bug] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | Thank you for reporting the problem.. 10 | Please make sure what you are reporting is a bug with reproducible steps. To ask questions 11 | or share ideas, please post on our [Slack community](https://join.slack.com/t/nixtlacommunity/shared_invite/zt-1h77esh5y-iL1m8N0F7qV1HmH~0KYeAQ) instead. 12 | 13 | - type: textarea 14 | attributes: 15 | label: What happened + What you expected to happen 16 | description: Describe 1. the bug 2. expected behavior 3. useful information (e.g., logs) 17 | placeholder: > 18 | Please provide the context in which the problem occurred and explain what happened. Further, 19 | please also explain why you think the behaviour is erroneous. It is extremely helpful if you can 20 | copy and paste the fragment of logs showing the exact error messages or wrong behaviour here. 21 | 22 | **NOTE**: please copy and paste texts instead of taking screenshots of them for easy future search. 23 | validations: 24 | required: true 25 | 26 | - type: textarea 27 | attributes: 28 | label: Versions / Dependencies 29 | description: Please specify the versions of the library, Python, OS, and other libraries that are used. 30 | value: | 31 |
Click to expand 32 | Dependencies: 33 | 34 |
35 | validations: 36 | required: true 37 | 38 | - type: textarea 39 | attributes: 40 | label: Reproducible example 41 | description: > 42 | Please provide a reproducible script. Providing a simple way to reproduce the issue 43 | (minimal / no external dependencies) will help us triage and address issues in the timely manner! 44 | value: | 45 | ```python 46 | # paste your code here 47 | ``` 48 | validations: 49 | required: true 50 | 51 | - type: dropdown 52 | attributes: 53 | label: Issue Severity 54 | description: | 55 | How does this issue affect your experience as user? 56 | multiple: false 57 | options: 58 | - "Low: It annoys or frustrates me." 59 | - "Medium: It is a significant difficulty but I can work around it." 60 | - "High: It blocks me from completing my task." 61 | validations: 62 | required: false 63 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | contact_links: 3 | - name: Ask a question or get support 4 | url: https://join.slack.com/t/nixtlacommunity/shared_invite/zt-1h77esh5y-iL1m8N0F7qV1HmH~0KYeAQ 5 | about: Ask a question or request support for using a library of the nixtlaverse 6 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/documentation-issue.yml: -------------------------------------------------------------------------------- 1 | name: Documentation 2 | title: "[] " 3 | description: Report an issue with the library documentation 4 | labels: [documentation] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: Thank you for helping us improve the library documentation! 9 | 10 | - type: textarea 11 | attributes: 12 | label: Description 13 | description: | 14 | Tell us about the change you'd like to see. For example, "I'd like to 15 | see more examples of how to use `cross_validation`." 16 | validations: 17 | required: true 18 | 19 | - type: textarea 20 | attributes: 21 | label: Link 22 | description: | 23 | If the problem is related to an existing section, please add a link to 24 | the section. 25 | validations: 26 | required: false 27 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.yml: -------------------------------------------------------------------------------- 1 | name: Library feature request 2 | description: Suggest an idea for a project 3 | title: "[] " 4 | labels: [enhancement, feature] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | Thank you for finding the time to propose a new feature! 10 | We really appreciate the community efforts to improve the nixtlaverse. 11 | 12 | - type: textarea 13 | attributes: 14 | label: Description 15 | description: A short description of your feature 16 | 17 | - type: textarea 18 | attributes: 19 | label: Use case 20 | description: > 21 | Describe the use case of your feature request. It will help us understand and 22 | prioritize the feature request. 23 | placeholder: > 24 | Rather than telling us how you might implement this feature, try to take a 25 | step back and describe what you are trying to achieve. 26 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: github-actions 4 | directory: / 5 | schedule: 6 | interval: weekly 7 | groups: 8 | ci-dependencies: 9 | patterns: ["*"] 10 | -------------------------------------------------------------------------------- /.github/release-drafter.yml: -------------------------------------------------------------------------------- 1 | name-template: 'v$NEXT_PATCH_VERSION' 2 | tag-template: 'v$NEXT_PATCH_VERSION' 3 | categories: 4 | - title: 'New Features' 5 | label: 'feature' 6 | - title: 'Breaking Change' 7 | label: 'breaking change' 8 | - title: 'Bug Fixes' 9 | label: 'fix' 10 | - title: 'Documentation' 11 | label: 'documentation' 12 | - title: 'Dependencies' 13 | label: 'dependencies' 14 | - title: 'Enhancement' 15 | label: 'enhancement' 16 | change-template: '- $TITLE @$AUTHOR (#$NUMBER)' 17 | template: | 18 | $CHANGES 19 | -------------------------------------------------------------------------------- /.github/workflows/build-docs.yaml: -------------------------------------------------------------------------------- 1 | name: "build-docs" 2 | on: 3 | release: 4 | types: [released] 5 | pull_request: 6 | branches: ["main"] 7 | workflow_dispatch: 8 | 9 | defaults: 10 | run: 11 | shell: bash 12 | 13 | jobs: 14 | build-docs: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - name: Clone repo 18 | uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 19 | - name: Clone docs repo 20 | uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 21 | with: 22 | repository: Nixtla/docs 23 | ref: scripts 24 | path: docs-scripts 25 | - uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # 5.4.0 26 | with: 27 | cache: "pip" 28 | python-version: "3.10" 29 | cache-dependency-path: settings.ini 30 | - name: Build docs 31 | run: | 32 | set -ux 33 | python -m pip install --upgrade pip 34 | pip install -Uq nbdev nbdev_plotly 35 | pip install -e ".[dev,distributed]" 36 | mkdir nbs/_extensions 37 | cp -r docs-scripts/mintlify/ nbs/_extensions/ 38 | python docs-scripts/update-quarto.py 39 | echo "procs = nbdev_plotly.plotly:PlotlyProc" >> settings.ini 40 | nbdev_docs 41 | - name: Apply final formats 42 | run: bash ./docs-scripts/docs-final-formatting.bash 43 | - name: Copy over necessary assets 44 | run: | 45 | cp nbs/mint.json _docs/mint.json 46 | cp docs-scripts/imgs/* _docs/ 47 | - name: Deploy to Mintlify Docs 48 | if: | 49 | github.event_name == 'release' || 50 | github.event_name == 'workflow_dispatch' 51 | uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 52 | with: 53 | github_token: ${{ secrets.GITHUB_TOKEN }} 54 | publish_branch: docs 55 | publish_dir: ./_docs 56 | user_name: github-actions[bot] 57 | user_email: 41898282+github-actions[bot]@users.noreply.github.com 58 | - name: Trigger mintlify workflow 59 | if: | 60 | github.event_name == 'release' || 61 | github.event_name == 'workflow_dispatch' 62 | uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 63 | with: 64 | github-token: ${{ secrets.DOCS_WORKFLOW_TOKEN }} 65 | script: | 66 | await github.rest.actions.createWorkflowDispatch({ 67 | owner: 'nixtla', 68 | repo: 'docs', 69 | workflow_id: 'mintlify-action.yml', 70 | ref: 'main', 71 | }); 72 | - name: Configure redirects for gh-pages 73 | run: python docs-scripts/configure-redirects.py nixtla 74 | - name: Deploy to Github Pages 75 | if: | 76 | github.event_name == 'release' || 77 | github.event_name == 'workflow_dispatch' 78 | uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 79 | with: 80 | github_token: ${{ secrets.GITHUB_TOKEN }} 81 | publish_branch: gh-pages 82 | publish_dir: ./gh-pages 83 | user_name: github-actions[bot] 84 | user_email: 41898282+github-actions[bot]@users.noreply.github.com 85 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.ref }} 11 | cancel-in-progress: true 12 | 13 | env: 14 | NIXTLA_API_KEY: ${{ secrets.NIXTLA_DEV_API_KEY }} 15 | NIXTLA_BASE_URL: ${{ secrets.NIXTLA_DEV_BASE_URL }} 16 | NIXTLA_API_KEY_CUSTOM: ${{ secrets.NIXTLA_API_KEY_CUSTOM }} 17 | NIXTLA_BASE_URL_CUSTOM: ${{ secrets.NIXTLA_BASE_URL_CUSTOM }} 18 | API_KEY_FRED: ${{ secrets.API_KEY_FRED }} 19 | 20 | jobs: 21 | check-import: 22 | runs-on: ubuntu-latest 23 | strategy: 24 | fail-fast: false 25 | matrix: 26 | python-version: ["3.9", "3.10"] 27 | steps: 28 | - name: Clone repo 29 | uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 30 | 31 | - name: Set up python 32 | uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # 5.4.0 33 | with: 34 | python-version: ${{ matrix.python-version }} 35 | 36 | - name: Install nixtla 37 | run: pip install uv && uv pip install --system . 38 | 39 | - name: Check import 40 | run: python -c "from nixtla import NixtlaClient" 41 | 42 | run-all-tests: 43 | runs-on: nixtla-linux-large-public 44 | timeout-minutes: 60 45 | strategy: 46 | fail-fast: false 47 | matrix: 48 | python-version: ["3.9", "3.10"] 49 | steps: 50 | - name: Clone repo 51 | uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 52 | 53 | - name: Set up python 54 | uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # 5.4.0 55 | with: 56 | python-version: ${{ matrix.python-version }} 57 | 58 | - name: Install pip requirements 59 | run: pip install uv && uv pip install --system ".[dev,distributed]" 60 | 61 | - name: Run tests 62 | run: nbdev_test --timing --do_print --n_workers 0 --flags 'distributed' 63 | 64 | run-local-tests: 65 | runs-on: ${{ matrix.os }} 66 | timeout-minutes: 60 67 | strategy: 68 | fail-fast: false 69 | matrix: 70 | os: [macos-13, windows-latest] 71 | python-version: ["3.9", "3.10"] 72 | steps: 73 | - name: Clone repo 74 | uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 75 | 76 | - name: Set up python 77 | uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # 5.4.0 78 | with: 79 | python-version: ${{ matrix.python-version }} 80 | 81 | - name: Install pip requirements 82 | run: pip install uv && uv pip install --system ".[dev]" 83 | 84 | - name: Run tests 85 | run: nbdev_test --timing --do_print --n_workers 0 --skip_file_re "computing_at_scale|distributed" 86 | 87 | run-minimal-tests: 88 | runs-on: ${{ matrix.os }} 89 | timeout-minutes: 60 90 | strategy: 91 | fail-fast: false 92 | matrix: 93 | os: [macos-13, macos-14, ubuntu-latest, windows-latest] 94 | python-version: ["3.9", "3.13"] 95 | steps: 96 | - name: Clone repo 97 | uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 98 | 99 | - name: Set up python 100 | uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # 5.4.0 101 | with: 102 | python-version: ${{ matrix.python-version }} 103 | 104 | - name: Install pip requirements 105 | run: pip install uv && uv pip install --system . "ipython<=8.32.0" matplotlib nbdev python-dotenv 106 | 107 | - name: Run tests 108 | run: nbdev_test --n_workers 0 --path nbs/docs/getting-started/2_quickstart.ipynb 109 | -------------------------------------------------------------------------------- /.github/workflows/deploy-readme.yaml: -------------------------------------------------------------------------------- 1 | name: Deploy to readme dot com 2 | 3 | on: 4 | release: 5 | types: [published] 6 | workflow_dispatch: 7 | 8 | defaults: 9 | run: 10 | shell: bash -l {0} 11 | 12 | concurrency: 13 | group: ${{ github.workflow }}-${{ github.ref }} 14 | cancel-in-progress: true 15 | 16 | jobs: 17 | deploy-readme: 18 | runs-on: ubuntu-latest 19 | env: 20 | readme_version: "0.0.2" 21 | steps: 22 | - name: Clone repo 23 | uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 24 | with: 25 | persist-credentials: false 26 | 27 | - name: Set up python 28 | uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # 5.4.0 29 | with: 30 | python-version: "3.10" 31 | 32 | - name: Install pip requirements 33 | run: pip install ".[dev]" 34 | 35 | - name: Install Quarto 36 | run: nbdev_install_quarto 37 | 38 | - name: Create readme docs 39 | env: 40 | README_HOST_URL: ${{ secrets.README_HOST_URL }} 41 | README_CATEGORY: ${{ secrets.README_CATEGORY }} 42 | README_API_KEY: ${{ secrets.README_API_KEY }} 43 | README_VERSION: ${{ env.readme_version }} 44 | run: ./action_files/readme_com/create_readme_docs.sh 45 | 46 | - name: Push PNGs to readme_docs branch 47 | run: | 48 | git config --global user.name 'FedericoGarza' 49 | git config --global user.email 'fede.garza.ramirez@gmail.com' 50 | git push https://${{ secrets.TOKEN_GITHUB }}@github.com/${{ github.repository }} --delete readme_docs || true 51 | git checkout -b readme_docs 52 | git add -f "*.png" 53 | git commit -m "[cd] update png images" || echo "No changes to commit" 54 | git push https://${{ secrets.TOKEN_GITHUB }}@github.com/${{ github.repository }} HEAD:readme_docs 55 | 56 | - name: Deploy to readme com 57 | uses: readmeio/rdme@51a80867c45de15e2b41af0c4bd5bbc61b932804 # 8.6.6 58 | with: 59 | rdme: docs ./nbs/_docs/docs/ --key=${{ secrets.README_API_KEY }} --version=${{ env.readme_version }} 60 | -------------------------------------------------------------------------------- /.github/workflows/lint.yaml: -------------------------------------------------------------------------------- 1 | name: Lint 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | jobs: 10 | lint: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Clone repo 14 | uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 15 | 16 | - name: Set up python 17 | uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # 5.4.0 18 | with: 19 | python-version: '3.10' 20 | 21 | - name: Install dependencies 22 | run: pip install black nbdev pre-commit 23 | 24 | - name: Run pre-commit 25 | run: pre-commit run --show-diff-on-failure --files nixtla/* 26 | -------------------------------------------------------------------------------- /.github/workflows/models-performance.yaml: -------------------------------------------------------------------------------- 1 | name: Models Performance 2 | 3 | permissions: write-all 4 | 5 | on: 6 | pull_request: 7 | types: [opened, synchronize, reopened] 8 | 9 | defaults: 10 | run: 11 | shell: bash -l {0} 12 | 13 | concurrency: 14 | group: ${{ github.workflow }}-${{ github.ref }} 15 | cancel-in-progress: true 16 | 17 | jobs: 18 | run-models-performance: 19 | runs-on: ubuntu-latest 20 | env: 21 | NIXTLA_API_KEY: ${{ secrets.NIXTLA_DEV_API_KEY }} 22 | NIXTLA_BASE_URL: ${{ secrets.NIXTLA_DEV_BASE_URL }} 23 | PLOTS_REPO_URL: https://github.com/Nixtla/nixtla/blob/docs-figs-model-performance 24 | steps: 25 | - name: Clone repo 26 | uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 27 | 28 | - name: Set up python 29 | uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # 5.4.0 30 | with: 31 | python-version: "3.10" 32 | 33 | - name: Install pip requirements 34 | run: pip install uv && uv pip install --system '.[dev]' 35 | 36 | - name: Run evaluation 37 | run: python -m action_files.models_performance.main 38 | 39 | - name: Upload results to the PR 40 | if: github.event_name == 'pull_request' 41 | env: 42 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 43 | PR_NUMBER: ${{ github.event.pull_request.number }} 44 | run: > 45 | python action_files/comment_file.py 46 | --search_term "Experiment Results" 47 | --file "action_files/models_performance/summary.md" 48 | 49 | - name: Upload images to new branch main 50 | run: | 51 | git config --global user.email azul@nixtla.io 52 | git config --global user.name AzulGarza 53 | git push https://$GITHUB_TOKEN@github.com/nixtla/nixtla.git --delete docs-figs-model-performance || true 54 | git checkout -b docs-figs-model-performance 55 | git add -f "*.png" 56 | git commit -m "[cd] update png images" || echo "No changes to commit" 57 | git push https://$GITHUB_TOKEN@github.com/nixtla/nixtla.git HEAD:docs-figs-model-performance 58 | -------------------------------------------------------------------------------- /.github/workflows/no-response.yaml: -------------------------------------------------------------------------------- 1 | name: No Response Bot 2 | 3 | on: 4 | issue_comment: 5 | types: [created] 6 | schedule: 7 | - cron: '0 4 * * *' 8 | 9 | jobs: 10 | noResponse: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: lee-dohm/no-response@9bb0a4b5e6a45046f00353d5de7d90fb8bd773bb # v0.5.0 14 | with: 15 | closeComment: > 16 | This issue has been automatically closed because it has been awaiting a response for too long. 17 | When you have time to to work with the maintainers to resolve this issue, please post a new comment and it will be re-opened. 18 | If the issue has been locked for editing by the time you return to it, please open a new issue and reference this one. 19 | daysUntilClose: 30 20 | responseRequiredLabel: awaiting response 21 | token: ${{ github.token }} 22 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | name: Upload Python Packages 2 | 3 | on: 4 | push: 5 | tags: ['v*'] 6 | 7 | jobs: 8 | deploy: 9 | runs-on: ubuntu-latest 10 | permissions: 11 | id-token: write 12 | steps: 13 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 14 | 15 | - name: Set up Python 16 | uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # 5.4.0 17 | with: 18 | python-version: '3.10' 19 | 20 | - name: Install dependencies 21 | run: python -m pip install --upgrade pip && pip install build 22 | 23 | - name: Build nixtla package 24 | run: python -m build 25 | 26 | - name: Publish nixtla package 27 | uses: pypa/gh-action-pypi-publish@76f52bc884231f62b9a034ebfe128415bbaabdfc # v1.12.4 28 | 29 | -------------------------------------------------------------------------------- /.github/workflows/release-drafter.yml: -------------------------------------------------------------------------------- 1 | name: Release Drafter 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | permissions: 9 | contents: read 10 | 11 | jobs: 12 | update_release_draft: 13 | permissions: 14 | contents: write 15 | pull-requests: read 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: release-drafter/release-drafter@b1476f6e6eb133afa41ed8589daba6dc69b4d3f5 # v6.1.0 19 | env: 20 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 21 | -------------------------------------------------------------------------------- /.github/workflows/test-publish.yml: -------------------------------------------------------------------------------- 1 | name: Upload Python Packages to TestPyPI 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | deploy: 8 | runs-on: ubuntu-latest 9 | permissions: 10 | id-token: write 11 | steps: 12 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 13 | 14 | - name: Set up Python 15 | uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # 5.4.0 16 | with: 17 | python-version: '3.10' 18 | 19 | - name: Install dependencies 20 | run: python -m pip install --upgrade pip && pip install build 21 | 22 | - name: Build nixtla package 23 | run: python -m build 24 | 25 | - name: Publish nixtla package 26 | uses: pypa/gh-action-pypi-publish@76f52bc884231f62b9a034ebfe128415bbaabdfc # v1.12.4 27 | with: 28 | repository-url: https://test.pypi.org/legacy/ 29 | 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | __pycache__ 3 | *.egg-info 4 | Gemfile* 5 | Gemfile.lock 6 | docs/_site 7 | build 8 | dist 9 | .vscode 10 | .idea 11 | *.gif 12 | *.csv 13 | */data/* 14 | *.parquet 15 | *.tar.gz 16 | tmp 17 | _docs/ 18 | _proc/ 19 | .DS_Store 20 | .gitattributes 21 | .gitconfig 22 | nbs/.last_checked 23 | .venv 24 | .idea 25 | .env 26 | */summary.md 27 | */*.png 28 | longhorizon 29 | data 30 | *.rda 31 | nbs/_extensions 32 | !nbs/assets/* 33 | 34 | # VSCode 35 | *.code-workspace 36 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: local 3 | hooks: 4 | - id: nbdev_clean 5 | name: Clean notebooks 6 | entry: sh -c 'nbdev_clean && nbdev_clean --fname nbs/src --clear_all' 7 | language: system 8 | 9 | - repo: local 10 | hooks: 11 | - id: nbdev_export 12 | name: nbdev_export 13 | entry: sh -c 'nbdev_export' 14 | language: system 15 | 16 | - repo: https://github.com/astral-sh/ruff-pre-commit 17 | rev: v0.2.1 18 | hooks: 19 | - id: ruff 20 | files: 'nixtla' 21 | 22 | - repo: https://github.com/pre-commit/mirrors-mypy 23 | rev: v1.10.1 24 | hooks: 25 | - id: mypy 26 | args: [--ignore-missing-imports] 27 | files: 'nixtla' 28 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include settings.ini 2 | include LICENSE 3 | include CONTRIBUTING.md 4 | include README.md 5 | recursive-exclude * __pycache__ 6 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | devenv: 2 | uv venv 3 | . .venv/bin/activate; uv pip install -Ue .[dev,distributed] 4 | . .venv/bin/activate; pre-commit install 5 | . .venv/bin/activate; nbdev_install_hooks 6 | 7 | 8 | jupyter: 9 | mkdir -p tmp 10 | jupyter lab --port=8888 --ip=0.0.0.0 --no-browser --allow-root --NotebookApp.token='' --NotebookApp.password='' --NotebookApp.allow_origin='*' 11 | -------------------------------------------------------------------------------- /QuickStart.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/QuickStart.gif -------------------------------------------------------------------------------- /QuickstartTGPT.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/QuickstartTGPT.mp4 -------------------------------------------------------------------------------- /action_files/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/action_files/__init__.py -------------------------------------------------------------------------------- /action_files/comment_file.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import fire 4 | import requests 5 | 6 | token = os.environ["GITHUB_TOKEN"] 7 | pr_number = os.environ["PR_NUMBER"] 8 | headers = { 9 | "Authorization": f"token {token}", 10 | "Accept": "application/vnd.github.v3+json", 11 | } 12 | base_url = "https://api.github.com/repos/Nixtla/nixtla/issues" 13 | 14 | 15 | def get_comments(): 16 | resp = requests.get(f"{base_url}/{pr_number}/comments", headers=headers) 17 | if resp.status_code != 200: 18 | raise RuntimeError(resp.text) 19 | return resp.json() 20 | 21 | 22 | def upsert_comment(body: str, comment_id: str | None): 23 | data = {"body": body} 24 | if comment_id is None: 25 | resp = requests.post( 26 | f"{base_url}/{pr_number}/comments", json=data, headers=headers 27 | ) 28 | else: 29 | resp = requests.patch( 30 | f"{base_url}/comments/{comment_id}", json=data, headers=headers 31 | ) 32 | return resp 33 | 34 | 35 | def main(search_term: str, file: str): 36 | comments = get_comments() 37 | existing_comment = [ 38 | c for c in comments if search_term in c["body"] and c["user"]["type"] == "Bot" 39 | ] 40 | if existing_comment: 41 | comment_id = existing_comment[0]["id"] 42 | else: 43 | comment_id = None 44 | with open(file, "rt") as f: 45 | summary = f.read() 46 | resp = upsert_comment(summary, comment_id) 47 | if resp.status_code not in (200, 201, 202): 48 | raise RuntimeError(f"{resp.status_code}: {resp.text}") 49 | 50 | 51 | if __name__ == "__main__": 52 | fire.Fire(main) 53 | -------------------------------------------------------------------------------- /action_files/models_performance/experiments.yaml: -------------------------------------------------------------------------------- 1 | experiments: 2 | 3 | - air-passengers: 4 | - dataset_url: https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/air_passengers.csv 5 | - time_col: timestamp 6 | - target_col: value 7 | - season_length: 12 # for benchmarks 8 | - freq: 9 | - MS 10 | - h: 11 | - 12 12 | - 24 13 | 14 | - electricity-multiple-series: 15 | - dataset_url: https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/ercot_multiple_ts.csv 16 | - season_length: 24 # for benchmarks 17 | - time_col: timestamp 18 | - target_col: value 19 | - freq: 20 | - H 21 | - h: 22 | - 24 23 | - 168 24 | - 336 25 | -------------------------------------------------------------------------------- /action_files/readme_com/create_readme_docs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | BASE_DIR="nbs/docs/" 4 | SUB_DIRS=("getting-started" "capabilities" "deployment" "tutorials" "use-cases" "reference") 5 | 6 | counter=0 7 | for sub_dir in "${SUB_DIRS[@]}"; do 8 | DIR="$BASE_DIR$sub_dir/" 9 | if [[ -d "$DIR" ]]; then 10 | while read -r ipynb_file; do 11 | echo $counter 12 | md_file="${ipynb_file%.ipynb}.md" 13 | md_file="${md_file/docs/_docs/docs}" 14 | quarto render "$ipynb_file" --to md --wrap=none 15 | python -m action_files.readme_com.modify_markdown --file_path "$md_file" --slug_number "$counter" 16 | ((counter++)) 17 | done < <(find "$DIR" -type f -name "*.ipynb" -not -path "*/.ipynb_checkpoints/*" | sort) 18 | else 19 | echo "Directory $DIR does not exist." 20 | fi 21 | done 22 | 23 | # process changelog 24 | echo $counter 25 | file_changelog="./nbs/_docs/docs/CHANGELOG.md" 26 | cp ./CHANGELOG.md ${file_changelog} 27 | python -m action_files.readme_com.modify_markdown --file_path "$file_changelog" --slug_number "$counter" 28 | -------------------------------------------------------------------------------- /action_files/readme_com/create_sdk_reference.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | 4 | import fire 5 | from dotenv import load_dotenv 6 | 7 | load_dotenv() 8 | 9 | 10 | def create_sdk_reference( 11 | save_dir, 12 | slug_number, 13 | host_url=os.environ["README_HOST_URL"], 14 | category=os.environ["README_CATEGORY"], 15 | ): 16 | file_path = f"{save_dir}/{slug_number}_sdk_reference.md" 17 | header = f"""--- 18 | title: "SDK Reference" 19 | slug: "sdk_reference" 20 | order: {slug_number} 21 | type: "link" 22 | link_url: "https://nixtla.mintlify.app/nixtla/timegpt.html" 23 | link_external: true 24 | category: {category} 25 | --- 26 | 27 | """ 28 | 29 | with open(file_path, "w", encoding="utf-8") as file: 30 | file.write(header) 31 | 32 | 33 | if __name__ == "__main__": 34 | fire.Fire(create_sdk_reference) 35 | -------------------------------------------------------------------------------- /action_files/readme_com/modify_markdown.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from pathlib import Path 4 | import requests 5 | 6 | import fire 7 | from dotenv import load_dotenv 8 | 9 | load_dotenv() 10 | 11 | 12 | def to_snake_case(s): 13 | s = s.lower() 14 | s = re.sub(r"(? 18 | 19 | 20 | ## Reproducibility 21 | 22 | To ensure the reproducibility of our findings, the Statistical Ensemble experiments were conducted on an AWS c5a.24xlarge instance, equipped with 96 vCPUs and 192 GiB of RAM. In contrast, the experiments for Amazon Chronos were carried out on an AWS g5.4xlarge GPU instance, which includes 16 vCPUs, 64 GiB of RAM, and an NVIDIA A10G Tensor Core GPU with 24 GiB. All necessary code and detailed instructions for reproducing the experiments are available in this directory. 23 | 24 | ### Instructions 25 | 26 | 1. Set up a Python environment: 27 | 28 | ```bash 29 | mamba env create -f environment.yml 30 | conda activate amazon-chronos 31 | ``` 32 | 33 | 2. Run the experiments as reported in the table: 34 | 35 | ```bash 36 | python -m src.main --mode fcst_statsforecast 37 | python -m src.main --mode fcst_chronos 38 | ``` 39 | 40 | 3. Evaluate the results using: 41 | 42 | ```bash 43 | python -m src.main --mode evaluation 44 | ``` 45 | 46 | ### References 47 | - **Statistical Ensemble Paper**: [A Simple Combination of Univariate Models](https://www.sciencedirect.com/science/article/abs/pii/S0169207019300585?via%3Dihub) 48 | - **Amazon Chronos Paper**: [Chronos: Learning the Language of Time Series](https://arxiv.org/abs/2403.07815) 49 | -------------------------------------------------------------------------------- /experiments/amazon-chronos/environment.yml: -------------------------------------------------------------------------------- 1 | name: amazon-chronos 2 | channels: 3 | - conda-forge 4 | - defaults 5 | - anaconda 6 | dependencies: 7 | - jupyterlab 8 | - pip 9 | - python=3.10 10 | - pip: 11 | - datasetsforecast 12 | - fire 13 | - gluonts 14 | - huggingface_hub[cli] 15 | - neuralforecast 16 | - orjson 17 | - statsforecast 18 | - utilsforecast 19 | - git+https://github.com/amazon-science/chronos-forecasting.git 20 | 21 | -------------------------------------------------------------------------------- /experiments/amazon-chronos/src/amazon_chronos/forecaster.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Iterable, List 3 | 4 | import numpy as np 5 | import pandas as pd 6 | import torch 7 | from chronos import ChronosPipeline 8 | from utilsforecast.processing import make_future_dataframe 9 | 10 | logging.basicConfig(level=logging.INFO) 11 | main_logger = logging.getLogger(__name__) 12 | 13 | 14 | class TimeSeriesDataset: 15 | def __init__( 16 | self, 17 | data: torch.Tensor, 18 | uids: Iterable, 19 | last_times: Iterable, 20 | batch_size: int, 21 | ): 22 | self.data = data 23 | self.uids = uids 24 | self.last_times = last_times 25 | self.batch_size = batch_size 26 | self.n_batches = len(data) // self.batch_size + ( 27 | 0 if len(data) % self.batch_size == 0 else 1 28 | ) 29 | self.current_batch = 0 30 | 31 | @classmethod 32 | def from_df(cls, df: pd.DataFrame, batch_size: int): 33 | num_unique_ids = df["unique_id"].nunique() 34 | max_series_length = df["unique_id"].value_counts().max() 35 | padded_tensor = torch.full( 36 | size=(num_unique_ids, max_series_length), 37 | fill_value=torch.nan, 38 | dtype=torch.bfloat16, 39 | ) # type: ignore 40 | df_sorted = df.sort_values(by=["unique_id", "ds"]) 41 | for idx, (_, group) in enumerate(df_sorted.groupby("unique_id")): 42 | series_length = len(group) 43 | padded_tensor[idx, -series_length:] = torch.tensor( 44 | group["y"].values, 45 | dtype=torch.bfloat16, 46 | ) 47 | uids = df_sorted["unique_id"].unique() 48 | last_times = df_sorted.groupby("unique_id")["ds"].tail(1) 49 | return cls(padded_tensor, uids, last_times, batch_size) 50 | 51 | def __len__(self): 52 | return len(self.data) 53 | 54 | def make_future_dataframe(self, h: int, freq: str) -> pd.DataFrame: 55 | return make_future_dataframe( 56 | uids=self.uids, 57 | last_times=pd.to_datetime(self.last_times), 58 | h=h, 59 | freq=freq, 60 | ) # type: ignore 61 | 62 | def __iter__(self): 63 | self.current_batch = 0 # Reset for new iteration 64 | return self 65 | 66 | def __next__(self): 67 | if self.current_batch < self.n_batches: 68 | start_idx = self.current_batch * self.batch_size 69 | end_idx = start_idx + self.batch_size 70 | self.current_batch += 1 71 | return self.data[start_idx:end_idx] 72 | else: 73 | raise StopIteration 74 | 75 | 76 | class AmazonChronos: 77 | def __init__(self, model_name: str): 78 | self.model_name = model_name 79 | self.model = ChronosPipeline.from_pretrained( 80 | model_name, 81 | device_map="auto", 82 | torch_dtype=torch.bfloat16, 83 | ) 84 | 85 | def forecast( 86 | self, 87 | df: pd.DataFrame, 88 | h: int, 89 | freq: str, 90 | batch_size: int = 32, 91 | quantiles: List[float] | None = None, 92 | **predict_kwargs, 93 | ) -> pd.DataFrame: 94 | main_logger.info("transforming dataframe to tensor") 95 | dataset = TimeSeriesDataset.from_df(df, batch_size=batch_size) 96 | main_logger.info("forecasting") 97 | fcsts = [self.model.predict(batch, prediction_length=h, **predict_kwargs) for batch in dataset] 98 | fcst = torch.cat(fcsts) 99 | main_logger.info("transforming forecast to dataframe") 100 | fcst = fcst.numpy() 101 | fcst_df = dataset.make_future_dataframe(h=h, freq=freq) 102 | fcst_df[self.model_name] = np.median(fcst, axis=1).reshape(-1, 1) 103 | if quantiles is not None: 104 | for q in quantiles: 105 | q_col = f"{self.model_name}-q-{q}" 106 | fcst_df[q_col] = np.quantile(fcst, q, axis=1).reshape(-1, 1) 107 | return fcst_df 108 | 109 | 110 | if __name__ == "__main__": 111 | import pandas as pd 112 | 113 | df = pd.read_csv( 114 | "https://raw.githubusercontent.com/AileenNielsen/TimeSeriesAnalysisWithPython/master/data/AirPassengers.csv" 115 | ) 116 | df = df.rename(columns={"#Passengers": "y", "Month": "ds"}) 117 | df["ds"] = pd.to_datetime(df["ds"]) 118 | df.insert(0, "unique_id", "AirPassengers") 119 | df = pd.concat([df, df.assign(unique_id="AirPassengers2")]) 120 | model = AmazonChronos("amazon/chronos-t5-small") 121 | fcst_df = model.forecast(df, h=12, freq="MS") 122 | print(fcst_df) 123 | -------------------------------------------------------------------------------- /experiments/amazon-chronos/src/amazon_chronos/pipeline.py: -------------------------------------------------------------------------------- 1 | import os 2 | from time import time 3 | from typing import List, Tuple 4 | 5 | import fire 6 | import pandas as pd 7 | 8 | 9 | from ..utils import ExperimentHandler 10 | from .forecaster import AmazonChronos 11 | 12 | 13 | def run_amazon_chronos( 14 | train_df: pd.DataFrame, 15 | model_name: str, 16 | horizon: int, 17 | freq: str, 18 | quantiles: List[float], 19 | ) -> Tuple[pd.DataFrame, float, str]: 20 | ac = AmazonChronos(model_name) 21 | init_time = time() 22 | fcsts_df = ac.forecast( 23 | df=train_df, 24 | h=horizon, 25 | freq=freq, 26 | batch_size=8, 27 | quantiles=quantiles, 28 | # parameters as in https://github.com/amazon-science/chronos-forecasting/blob/73be25042f5f587823d46106d372ba133152fb00/README.md?plain=1#L62-L65 29 | num_samples=20, 30 | temperature=1.0, 31 | top_k=50, 32 | top_p=1.0, 33 | ) 34 | total_time = time() - init_time 35 | return fcsts_df, total_time, model_name 36 | 37 | 38 | def main(dataset: str, model_name: str): 39 | exp = ExperimentHandler(dataset) 40 | fcst_df, total_time, model_name = run_amazon_chronos( 41 | train_df=exp.train_df, 42 | model_name=model_name, 43 | horizon=exp.horizon, 44 | freq=exp.freq, 45 | quantiles=exp.quantiles, 46 | ) 47 | exp.save_results(fcst_df, total_time, model_name) 48 | 49 | 50 | if __name__ == "__main__": 51 | fire.Fire(main) 52 | -------------------------------------------------------------------------------- /experiments/amazon-chronos/src/main.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import subprocess 3 | 4 | import fire 5 | import pandas as pd 6 | 7 | from src.utils import ExperimentHandler 8 | 9 | logger = logging.getLogger(__name__) 10 | logger.setLevel(logging.INFO) 11 | 12 | datasets = [ 13 | "m1_yearly", 14 | "m1_quarterly", 15 | "m1_monthly", 16 | "m3_yearly", 17 | "m3_quarterly", 18 | "m3_monthly", 19 | "m3_other", 20 | "tourism_yearly", 21 | "tourism_quarterly", 22 | "tourism_monthly", 23 | "m4_yearly", 24 | "m4_quarterly", 25 | ] 26 | 27 | amazon_chronos_models = [ 28 | "amazon/chronos-t5-large", 29 | "amazon/chronos-t5-tiny", 30 | "amazon/chronos-t5-mini", 31 | "amazon/chronos-t5-small", 32 | "amazon/chronos-t5-base", 33 | ] 34 | 35 | 36 | def main(mode: str): 37 | prefix_process = ["python", "-m"] 38 | 39 | eval_df = None 40 | for dataset in datasets: 41 | logger.info(f"Evaluating {dataset}...") 42 | if mode in ["fcst_statsforecast", "fcst_chronos"]: 43 | suffix_process = ["--dataset", dataset] 44 | 45 | def process(middle_process): 46 | return prefix_process + middle_process + suffix_process 47 | 48 | if mode == "fcst_statsforecast": 49 | logger.info("Running StatisticalEnsemble") 50 | subprocess.run(process(["src.statsforecast_pipeline"])) 51 | elif mode == "fcst_chronos": 52 | for model in amazon_chronos_models: 53 | logger.info(f"Running Amazon Chronos {model}") 54 | chronos_process = process(["src.amazon_chronos.pipeline"]) 55 | chronos_process.extend(["--model_name", model]) 56 | subprocess.run(chronos_process) 57 | elif mode == "evaluation": 58 | if eval_df is None: 59 | eval_df = [] 60 | logger.info("Running dataset evaluation") 61 | exp = ExperimentHandler(dataset) 62 | try: 63 | eval_dataset_df = exp.evaluate_models( 64 | amazon_chronos_models + ["StatisticalEnsemble", "SeasonalNaive"] 65 | ) 66 | print(eval_dataset_df) 67 | eval_df.append(eval_dataset_df) 68 | except Exception as e: 69 | logger.error(e) 70 | if eval_df is not None: 71 | eval_df = pd.concat(eval_df).reset_index(drop=True) 72 | exp.save_dataframe(eval_df, "complete-results.csv") 73 | 74 | 75 | if __name__ == "__main__": 76 | fire.Fire(main) 77 | -------------------------------------------------------------------------------- /experiments/amazon-chronos/src/statsforecast_pipeline.py: -------------------------------------------------------------------------------- 1 | import os 2 | from time import time 3 | from typing import List, Tuple 4 | 5 | os.environ["NIXTLA_NUMBA_RELEASE_GIL"] = "1" 6 | os.environ["NIXTLA_NUMBA_CACHE"] = "1" 7 | 8 | import fire 9 | import numpy as np 10 | import pandas as pd 11 | from scipy.stats import norm 12 | from statsforecast import StatsForecast 13 | from statsforecast.models import ( 14 | AutoARIMA, 15 | AutoETS, 16 | AutoCES, 17 | DynamicOptimizedTheta, 18 | SeasonalNaive, 19 | ) 20 | 21 | from src.utils import ExperimentHandler 22 | 23 | 24 | def run_seasonal_naive( 25 | train_df: pd.DataFrame, 26 | horizon: int, 27 | freq: str, 28 | seasonality: int, 29 | level: List[int], 30 | ) -> Tuple[pd.DataFrame, float, str]: 31 | os.environ["NIXTLA_ID_AS_COL"] = "true" 32 | sf = StatsForecast( 33 | models=[SeasonalNaive(season_length=seasonality)], 34 | freq=freq, 35 | n_jobs=-1, 36 | ) 37 | model = sf 38 | init_time = time() 39 | fcsts_df = model.forecast(df=train_df, h=horizon, level=level) 40 | total_time = time() - init_time 41 | return fcsts_df, total_time, "SeasonalNaive" 42 | 43 | 44 | def ensemble_forecasts( 45 | fcsts_df: pd.DataFrame, 46 | quantiles: List[float], 47 | name_models: List[str], 48 | model_name: str, 49 | ) -> pd.DataFrame: 50 | fcsts_df[model_name] = fcsts_df[name_models].mean(axis=1).values # type: ignore 51 | # compute quantiles based on the mean of the forecasts 52 | sigma_models = [] 53 | for model in name_models: 54 | fcsts_df[f"sigma_{model}"] = fcsts_df[f"{model}-hi-68.27"] - fcsts_df[model] 55 | sigma_models.append(f"sigma_{model}") 56 | fcsts_df[f"std_{model_name}"] = ( 57 | fcsts_df[sigma_models].pow(2).sum(axis=1).div(len(sigma_models) ** 2).pow(0.5) 58 | ) 59 | z = norm.ppf(quantiles) 60 | q_cols = [] 61 | for q, zq in zip(quantiles, z): 62 | q_col = f"{model_name}-q-{q}" 63 | fcsts_df[q_col] = fcsts_df[model_name] + zq * fcsts_df[f"std_{model_name}"] 64 | q_cols.append(q_col) 65 | fcsts_df = fcsts_df[["unique_id", "ds"] + [model_name] + q_cols] 66 | return fcsts_df 67 | 68 | 69 | def run_statistical_ensemble( 70 | train_df: pd.DataFrame, 71 | horizon: int, 72 | freq: str, 73 | seasonality: int, 74 | quantiles: List[float], 75 | ) -> Tuple[pd.DataFrame, float, str]: 76 | os.environ["NIXTLA_ID_AS_COL"] = "true" 77 | models = [ 78 | AutoARIMA(season_length=seasonality), 79 | AutoETS(season_length=seasonality), 80 | AutoCES(season_length=seasonality), 81 | DynamicOptimizedTheta(season_length=seasonality), 82 | ] 83 | init_time = time() 84 | series_per_core = 15 85 | n_series = train_df["unique_id"].nunique() 86 | n_jobs = min(n_series // series_per_core, os.cpu_count()) 87 | sf = StatsForecast( 88 | models=models, 89 | freq=freq, 90 | n_jobs=n_jobs, 91 | ) 92 | fcsts_df = sf.forecast(df=train_df, h=horizon, level=[68.27]) 93 | name_models = [repr(model) for model in models] 94 | model_name = "StatisticalEnsemble" 95 | fcsts_df = ensemble_forecasts( 96 | fcsts_df, 97 | quantiles, 98 | name_models, 99 | model_name, 100 | ) 101 | total_time = time() - init_time 102 | return fcsts_df, total_time, model_name 103 | 104 | 105 | def main(dataset: str): 106 | exp = ExperimentHandler(dataset) 107 | # seasonal naive benchmark 108 | fcst_df, total_time, model_name = run_seasonal_naive( 109 | train_df=exp.train_df, 110 | horizon=exp.horizon, 111 | freq=exp.freq, 112 | seasonality=exp.seasonality, 113 | level=exp.level, 114 | ) 115 | fcst_df = exp.fcst_from_level_to_quantiles(fcst_df, model_name) 116 | exp.save_results(fcst_df, total_time, model_name) 117 | # statistical ensemble 118 | fcst_df, total_time, model_name = run_statistical_ensemble( 119 | train_df=exp.train_df, 120 | horizon=exp.horizon, 121 | freq=exp.freq, 122 | seasonality=exp.seasonality, 123 | quantiles=exp.quantiles, 124 | ) 125 | exp.save_results(fcst_df, total_time, model_name) 126 | 127 | 128 | if __name__ == "__main__": 129 | from statsforecast.utils import AirPassengers as ap 130 | 131 | AutoARIMA(season_length=12).forecast(ap.astype(np.float32), h=12) 132 | fire.Fire(main) 133 | -------------------------------------------------------------------------------- /experiments/azure-automl-forecasting/.env.example: -------------------------------------------------------------------------------- 1 | AZURE_SUBSCRIPTION_ID= 2 | AZURE_RESOURCE_GROUP= 3 | AZURE_WORKSPACE_NAME= 4 | TIMEGPT_TOKEN= 5 | 6 | -------------------------------------------------------------------------------- /experiments/azure-automl-forecasting/Makefile: -------------------------------------------------------------------------------- 1 | TS_FILES := Hourly_H.parquet Daily_D.parquet Weekly_W-MON.parquet Monthly_MS.parquet 2 | FILTERED_TS_FILES := $(patsubst %,./data/filtered_datasets/%,$(TS_FILES)) 3 | 4 | filter_data: 5 | @for file in $(TS_FILES); do \ 6 | python -m src.utils.filter_data --dataset_path ./data/$$file; \ 7 | done 8 | 9 | run_timegpt: .require-dataset_path 10 | @echo Running TimeGPT with dataset_path=$(dataset_path) 11 | @python -m src.nixtla_timegpt --dataset_path $(dataset_path) 12 | 13 | run_sn: .require-dataset_path 14 | @echo Running SN with dataset_path=$(dataset_path) 15 | @python -m src.statsforecast_sn --dataset_path $(dataset_path) 16 | 17 | run_automl: .require-dataset_path 18 | @echo Running AutoML with dataset_path=$(dataset_path) 19 | @python -m src.azure_automl.forecasting --dataset_path $(dataset_path) 20 | 21 | run_methods: 22 | @for file in $(TS_FILES); do \ 23 | echo "Running methods for $$file"; \ 24 | $(MAKE) run_timegpt dataset_path=./data/filtered_datasets/$$file; \ 25 | $(MAKE) run_sn dataset_path=./data/filtered_datasets/$$file; \ 26 | $(MAKE) run_automl dataset_path=./data/filtered_datasets/$$file; \ 27 | done 28 | 29 | download_automl_forecasts: 30 | @python -m src.azure_automl.download_forecasts 31 | 32 | evaluate_experiments: 33 | @python -m src.evaluation --datasets_paths "$(shell echo $(FILTERED_TS_FILES) | tr ' ' ',')" 34 | 35 | .require-dataset_path: 36 | ifndef dataset_path 37 | $(error dataset_path is required) 38 | endif 39 | 40 | -------------------------------------------------------------------------------- /experiments/azure-automl-forecasting/README.md: -------------------------------------------------------------------------------- 1 | # Nixtla TimeGPT vs. Azure AutoML: A Comprehensive Performance Analysis 2 | 3 | This experiment evaluates the performance of **Nixtla TimeGPT's zero-shot inference** against **Microsoft's Azure AutoML** in the domain of time series forecasting. Our analysis shows that TimeGPT **surpasses Azure AutoML by 12%, 12%, and 10% in MAE, RMSE, and MASE metrics** and has **300x improvement in computational efficiency**. This evaluation spanned over 3,000 distinct time series across various data frequencies, with considerations for Azure AutoML's cost constraints. 4 | 5 | # Introduction 6 | 7 | [Azure AutoML](https://learn.microsoft.com/en-us/azure/machine-learning/concept-automl-forecasting-methods?view=azureml-api-2), a product of Microsoft, offers a robust automated machine-learning solution that caters to a wide array of predictive tasks, including time series forecasting. TimeGPT is a foundational model for time series forecasting that can be accessed [through an API](https://docs.nixtla.io/). While Azure AutoML is known for its adaptability and ease of use, our findings reveal that TimeGPT offers superior accuracy and efficiency, especially in the context of time series data. 8 | 9 | ## Empirical Evaluation 10 | 11 | Our study involved a detailed comparison of both models across various datasets, including Hourly, Daily, Weekly, and Monthly data frequencies. The datasets were chosen from the test set of the [TimeGPT-1 paper](https://arxiv.org/abs/2310.03589), ensuring a diverse set of time series for evaluation. The selection process was designed to manage computational complexity and adhere to Azure AutoML's dataset size requirements, with a cap of 3,000 observations to maintain cost-effectiveness. 12 | 13 | ## Results 14 | 15 | The following table shows the main findings of our analysis, presenting a comparison of performance metrics (MASE, MAE, RMSE) and computational time (in seconds) across different datasets. The best results are highlighted in **bold** for clarity. 16 | 17 | image 18 | 19 | 20 | ## Reproducibility 21 | 22 | All experiments were conducted in controlled environments to uphold the integrity and reproducibility of our results. TimeGPT evaluations were performed using a 2020 MacBook Air with an M1 chip, ensuring accessibility and practicality. In contrast, Azure AutoML experiments were carried out on a cluster of 11 STANDARD_DS5_V2 virtual machines equipped with substantial computational resources to showcase its scalability and power. 23 | 24 | ### Instructions 25 | 26 | 1. Configure Azure AutoML according to the official Microsoft documentation. 27 | 2. Set the environment variables in a `.env` file using `.env.example` as example. 28 | 3. Set up a conda environment using: 29 | 30 | ```bash 31 | mamba create -n azure-automl-fcst python=3.10 32 | conda activate azure-automl-fcst 33 | pip install uv 34 | uv pip install -r requirements.txt 35 | ``` 36 | 37 | 4. Download the data using 38 | 39 | ```python 40 | python -m src.utils.download_data 41 | ``` 42 | 43 | If you're interested in replicating the results, write us at `support@nixtla.io` to give you access to the data. 44 | 45 | 5. Filter the datasets to prevent AzureML from crashing 46 | 47 | ``` 48 | make filter_data 49 | ``` 50 | 51 | 6. Run the forecasting tasks for TimeGPT, SeasonalNaive, and AzureAutoML using the following: 52 | 53 | ``` 54 | make run_methods 55 | ``` 56 | 57 | Notice that AzureAutoML will send the job to the predefined cluster. 58 | 59 | 7. Retrieve AzureAutoML forecasts once they are ready: 60 | 61 | ``` 62 | make download_automl_forecasts 63 | ``` 64 | 65 | 8. Run evaluation 66 | 67 | ``` 68 | make evaluate_experiments 69 | ``` 70 | 71 | 72 | ### References 73 | - [TimeGPT 1](https://arxiv.org/abs/2310.03589) 74 | - [StatsForecast](https://github.com/Nixtla/statsforecast/) 75 | - [Distributed AzureAutoML for forecasting](https://github.com/Azure/azureml-examples/blob/main/sdk/python/jobs/pipelines/1k_demand_forecasting_with_pipeline_components/automl-forecasting-demand-many-models-in-pipeline/automl-forecasting-demand-many-models-in-pipeline.ipynb) 76 | -------------------------------------------------------------------------------- /experiments/azure-automl-forecasting/requirements.txt: -------------------------------------------------------------------------------- 1 | azure-ai-ml 2 | azure-identity 3 | azureml-core 4 | fire 5 | mltable 6 | nixtla 7 | pandas 8 | python-dotenv 9 | rich 10 | statsforecast 11 | utilsforecast 12 | -------------------------------------------------------------------------------- /experiments/azure-automl-forecasting/src/azure_automl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/experiments/azure-automl-forecasting/src/azure_automl/__init__.py -------------------------------------------------------------------------------- /experiments/azure-automl-forecasting/src/azure_automl/download_forecasts.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | 4 | import fire 5 | 6 | from .automl_handler import AzureAutoML 7 | from .forecasting import AzureAutoMLJobs 8 | from src.utils.data_handler import ForecastDataset 9 | 10 | logging.basicConfig(level=logging.INFO) 11 | main_logger = logging.getLogger(__name__) 12 | 13 | 14 | def download_forecasts(dir: str = "./results"): 15 | azure_automl = AzureAutoML.from_environment() 16 | azure_automl_experiments = AzureAutoMLJobs() 17 | results_path = Path(dir) / "azure_automl" 18 | 19 | jobs_df = azure_automl_experiments.get_jobs_df() 20 | jobs_df = jobs_df.sort_values("created_at", ascending=False).drop_duplicates( 21 | "experiment_name" 22 | ) 23 | 24 | for _, row in jobs_df.iterrows(): 25 | experiment_name = row.experiment_name 26 | job_name = row.job_name 27 | main_logger.info( 28 | f"Downloading forecasts for experiment {experiment_name} and job {job_name}" 29 | ) 30 | try: 31 | forecast_df = azure_automl.get_forecast_df(job_name) 32 | total_time = azure_automl.get_job_total_time(job_name) 33 | except Exception: 34 | main_logger.info( 35 | f"Failed to download forecasts for experiment {experiment_name} and job {job_name}" 36 | ) 37 | continue 38 | if forecast_df is None: 39 | main_logger.info( 40 | f"Failed to download forecasts for experiment {experiment_name} and job {job_name}" 41 | "probably because the job is not finished yet or failed" 42 | ) 43 | continue 44 | fcst_dataset = ForecastDataset(forecast_df=forecast_df, total_time=total_time) 45 | experiment_name = row.experiment_name 46 | fcst_dataset.save_to_dir(results_path / experiment_name) 47 | main_logger.info( 48 | f"Saved forecasts for experiment {experiment_name} and job {job_name}" 49 | ) 50 | 51 | 52 | if __name__ == "__main__": 53 | fire.Fire(download_forecasts) 54 | -------------------------------------------------------------------------------- /experiments/azure-automl-forecasting/src/azure_automl/forecasting.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import fire 4 | import pandas as pd 5 | from azure.ai.ml.entities import AmlCompute 6 | 7 | from .automl_handler import AzureAutoML 8 | from src.utils.data_handler import ExperimentDataset 9 | 10 | 11 | class AzureAutoMLJobs: 12 | """ 13 | This class stores and updates the Azure AutoML Experiments, 14 | to keep track of the pipeline jobs. 15 | We need this to later downlaod the forecasts. 16 | """ 17 | 18 | file_name = "forecasting_jobs.csv" 19 | 20 | def __init__(self, dir: str = "./azure_automl_results"): 21 | self.dir = dir 22 | self.jobs_path = Path(self.dir) / self.file_name 23 | self.setup() 24 | 25 | def setup(self): 26 | self.jobs_path.parent.mkdir(parents=True, exist_ok=True) 27 | if not self.jobs_path.exists(): 28 | pd.DataFrame(columns=["created_at", "experiment_name", "job_name"]).to_csv( 29 | self.jobs_path, 30 | index=False, 31 | ) 32 | 33 | def get_jobs_df(self) -> pd.DataFrame: 34 | return pd.read_csv(self.jobs_path) 35 | 36 | def save_job(self, job_name: str, experiment_name: str): 37 | jobs_df = self.get_jobs_df() 38 | new_row = pd.DataFrame( 39 | { 40 | "created_at": [pd.Timestamp.now()], 41 | "experiment_name": [experiment_name], 42 | "job_name": [job_name], 43 | } 44 | ) 45 | jobs_df = pd.concat([jobs_df, new_row]) 46 | jobs_df.to_csv(self.jobs_path, index=False) 47 | 48 | 49 | def start_forecasting_job( 50 | dataset_path: str, 51 | begin_create_or_update_aml_compute: bool = False, 52 | ): 53 | experiment_name = dataset_path.split("/")[-1].split(".")[0] 54 | dataset = ExperimentDataset.from_parquet(parquet_path=dataset_path) 55 | azure_automl = AzureAutoML.from_environment() 56 | azure_automl_jobs = AzureAutoMLJobs() 57 | 58 | aml_compute = AmlCompute( 59 | name="azure-automl-fcst-cluster-nixtla", 60 | min_instances=11, 61 | max_instances=11, 62 | size="STANDARD_DS5_V2", 63 | ) 64 | 65 | job_name = azure_automl.forecast( 66 | df=dataset.Y_df_train, 67 | df_test=dataset.Y_df_test, 68 | aml_compute=aml_compute, 69 | h=dataset.horizon, 70 | freq=dataset.pandas_frequency, 71 | n_cross_validations=2, 72 | experiment_name=experiment_name, 73 | begin_create_or_update_aml_compute=begin_create_or_update_aml_compute, 74 | max_nodes=11, 75 | max_concurrency_per_node=8, 76 | ) 77 | 78 | azure_automl_jobs.save_job(job_name, experiment_name) 79 | 80 | 81 | if __name__ == "__main__": 82 | fire.Fire(start_forecasting_job) 83 | -------------------------------------------------------------------------------- /experiments/azure-automl-forecasting/src/nixtla_timegpt.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from pathlib import Path 3 | from time import time 4 | 5 | import fire 6 | from dotenv import load_dotenv 7 | from nixtla import NixtlaClient 8 | 9 | from src.utils.data_handler import ExperimentDataset, ForecastDataset 10 | 11 | load_dotenv() 12 | 13 | 14 | def timegpt_forecast(dataset_path: str, results_dir: str = "./results"): 15 | dataset = ExperimentDataset.from_parquet(parquet_path=dataset_path) 16 | size_df = sys.getsizeof(dataset.Y_df_train) / (1024 * 1024) 17 | max_partition_size_mb = 20 18 | num_partitions = int(size_df / max_partition_size_mb) + 1 19 | timegpt = NixtlaClient(max_retries=1) 20 | start = time() 21 | forecast_df = timegpt.forecast( 22 | df=dataset.Y_df_train, 23 | h=dataset.horizon, 24 | freq=dataset.pandas_frequency, 25 | model="timegpt-1-long-horizon", 26 | num_partitions=num_partitions, 27 | ) 28 | end = time() 29 | total_time = end - start 30 | forecast_dataset = ForecastDataset( 31 | forecast_df=forecast_df, 32 | total_time=total_time, 33 | ) 34 | experiment_name = dataset_path.split("/")[-1].split(".")[0] 35 | results_path = Path(results_dir) / "nixtla_timegpt" / experiment_name 36 | forecast_dataset.save_to_dir(results_path) 37 | 38 | 39 | if __name__ == "__main__": 40 | fire.Fire(timegpt_forecast) 41 | -------------------------------------------------------------------------------- /experiments/azure-automl-forecasting/src/statsforecast_sn.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | from time import time 4 | 5 | import fire 6 | from statsforecast import StatsForecast 7 | from statsforecast.models import SeasonalNaive 8 | 9 | from src.utils.data_handler import ExperimentDataset, ForecastDataset 10 | 11 | 12 | def sn_forecast(dataset_path: str, results_dir: str = "./results"): 13 | os.environ["NIXTLA_ID_AS_COL"] = "true" 14 | dataset = ExperimentDataset.from_parquet(parquet_path=dataset_path) 15 | sf = StatsForecast( 16 | models=[SeasonalNaive(season_length=dataset.seasonality)], 17 | freq=dataset.pandas_frequency, 18 | ) 19 | start = time() 20 | forecast_df = sf.forecast( 21 | df=dataset.Y_df_train, 22 | h=dataset.horizon, 23 | ) 24 | end = time() 25 | total_time = end - start 26 | forecast_dataset = ForecastDataset(forecast_df=forecast_df, total_time=total_time) 27 | experiment_name = dataset_path.split("/")[-1].split(".")[0] 28 | results_path = Path(results_dir) / "statsforecast_sn" / experiment_name 29 | forecast_dataset.save_to_dir(results_path) 30 | 31 | 32 | if __name__ == "__main__": 33 | fire.Fire(sn_forecast) 34 | -------------------------------------------------------------------------------- /experiments/azure-automl-forecasting/src/utils/download_data.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from concurrent.futures import ProcessPoolExecutor 3 | 4 | import pandas as pd 5 | 6 | logging.basicConfig(level=logging.INFO) 7 | main_logger = logging.getLogger(__name__) 8 | 9 | 10 | def read_parquet_and_assign(uid, url): 11 | df = pd.read_parquet(url) 12 | df["unique_id"] = uid 13 | df["ds"] = df["ds"].astype(str) 14 | return df[["unique_id", "ds", "y"]] 15 | 16 | 17 | def download_data(): 18 | catalogue_splits = pd.read_parquet("./data/catalogue_splits.parquet") 19 | catalogue_datasets = pd.read_parquet("./data/catalogue_datasets.parquet") 20 | catalogue_df = catalogue_splits.merge( 21 | catalogue_datasets, 22 | on=["dataset", "subdataset", "frequency"], 23 | ) 24 | del catalogue_splits 25 | del catalogue_datasets 26 | catalogue_df = catalogue_df.query("split == 'test'")[ 27 | [ 28 | "unique_id", 29 | "frequency", 30 | "url", 31 | "pandas_frequency", 32 | "seasonality", 33 | "horizon", 34 | ] 35 | ] 36 | grouped_df = catalogue_df.groupby(["frequency", "pandas_frequency"]) 37 | for (frequency, pandas_frequency), df in grouped_df: 38 | uids, urls = df["unique_id"].values, df["url"].values 39 | main_logger.info( 40 | f"frequency: {frequency}, pandas_frequency: {pandas_frequency}" 41 | ) 42 | n_uids = len(uids) 43 | main_logger.info(f"number of uids: {n_uids}") 44 | max_workers = min(10, n_uids) 45 | with ProcessPoolExecutor(max_workers=max_workers) as executor: 46 | futures = [ 47 | executor.submit(read_parquet_and_assign, uid, url) 48 | for uid, url in zip(uids, urls) 49 | ] 50 | results = [future.result() for future in futures] 51 | main_logger.info("dataset read") 52 | Y_df = pd.concat(results) 53 | Y_df = Y_df.merge( 54 | df.drop(columns="url"), 55 | on="unique_id", 56 | how="left", 57 | ) 58 | Y_df.to_parquet(f"./data/{frequency}_{pandas_frequency}.parquet") 59 | del Y_df 60 | main_logger.info("dataset saved") 61 | 62 | 63 | if __name__ == "__main__": 64 | download_data() 65 | -------------------------------------------------------------------------------- /experiments/azure-automl-forecasting/src/utils/filter_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | this module takes Nixtla's benchmarking data 3 | and filters it to prevent azureml from crashing 4 | in the following cases: 5 | - too short series, see https://learn.microsoft.com/en-us/azure/machine-learning/concept-automl-forecasting-methods?view=azureml-api-2#data-length-requirements 6 | """ 7 | import logging 8 | from dataclasses import dataclass 9 | from pathlib import Path 10 | from typing import Any, Callable 11 | 12 | import fire 13 | import numpy as np 14 | import pandas as pd 15 | 16 | logging.basicConfig(level=logging.INFO) 17 | main_logger = logging.getLogger(__name__) 18 | 19 | 20 | @dataclass 21 | class DatasetParams: 22 | frequency: str 23 | pandas_frequency: str 24 | horizon: int 25 | seasonality: int 26 | 27 | @staticmethod 28 | def _get_value_from_df_col( 29 | df: pd.DataFrame, 30 | col: str, 31 | dtype: Callable | None = None, 32 | ) -> Any: 33 | col_values = df[col].unique() 34 | if len(col_values) > 1: 35 | raise ValueError(f"{col} is not unique: {col_values}") 36 | value = col_values[0] 37 | if dtype is not None: 38 | value = dtype(value) 39 | return value 40 | 41 | @classmethod 42 | def from_df(cls, df: pd.DataFrame) -> "DatasetParams": 43 | dataset_params = {} 44 | dataset_params_cols = [ 45 | "frequency", 46 | "pandas_frequency", 47 | "horizon", 48 | "seasonality", 49 | ] 50 | dataset_params_cols_dtypes = [str, str, int, int] 51 | for col, dtype in zip(dataset_params_cols, dataset_params_cols_dtypes): 52 | dataset_params[col] = cls._get_value_from_df_col(df, col, dtype=dtype) 53 | return cls(**dataset_params) 54 | 55 | 56 | def filter_and_clean_dataset( 57 | dataset_path: str, 58 | max_series: int = 1_000, 59 | n_train_cv: int = 2, 60 | n_seasonalities: int = 5, 61 | max_insample_length: int = 3_000, 62 | random_seed: int = 420, 63 | ): 64 | main_logger.info(f"Processing dataset {dataset_path}") 65 | df = pd.read_parquet(dataset_path) 66 | df = df.drop_duplicates(["unique_id", "ds"]) # type: ignore 67 | df = df.sort_values(["unique_id", "ds"]) 68 | ds_params = DatasetParams.from_df(df) 69 | min_train_size_per_series = ( 70 | ds_params.horizon 71 | + 2 * ds_params.horizon 72 | + (n_train_cv - 1) * ds_params.horizon 73 | + 1 74 | ) 75 | if ds_params.seasonality < 100: 76 | # if series has low seasonality 77 | # we add n_seasonalities to min_train_size_per_series 78 | # to keep the series long enough 79 | min_train_size_per_series += n_seasonalities * ds_params.seasonality 80 | uids = df["unique_id"].unique() # type: ignore 81 | df = ( 82 | df.groupby("unique_id") 83 | .filter(lambda x: len(x) >= min_train_size_per_series) 84 | .groupby("unique_id") # type: ignore 85 | .tail(max_insample_length + ds_params.horizon) 86 | .reset_index(drop=True) 87 | ) 88 | main_logger.info( 89 | f"Filtering out {len(uids) - len(df['unique_id'].unique())} series" 90 | ) 91 | uids = df["unique_id"].unique() # type: ignore 92 | if len(uids) > max_series: 93 | np.random.seed(random_seed) 94 | uids = np.random.choice(uids, max_series, replace=False) # type: ignore 95 | df = df.query("unique_id in @uids") # type: ignore 96 | main_logger.info(f"Filtering out {len(uids) - max_series} series") 97 | # finally we clean some strange dates 98 | mask = df["ds"].str.endswith(":01") # type: ignore 99 | df.loc[mask, "ds"] = df.loc[mask, "ds"].str[:-3] + ":00" 100 | # save the dataset 101 | dataset_path = Path(dataset_path) # type: ignore 102 | filtered_dataset_path = dataset_path.parent / "filtered_datasets" / dataset_path.name # type: ignore 103 | filtered_dataset_path.parent.mkdir(exist_ok=True, parents=True) 104 | df.to_parquet(filtered_dataset_path) 105 | main_logger.info(f"Filtered dataset saved to {filtered_dataset_path}") 106 | 107 | 108 | if __name__ == "__main__": 109 | fire.Fire(filter_and_clean_dataset) 110 | -------------------------------------------------------------------------------- /experiments/efficiency/README.md: -------------------------------------------------------------------------------- 1 | # 🚀 TimeGPT API v2: Faster, Smarter, and More Powerful Time Series Forecasting! 🚀 2 | 3 | We’re excited to introduce **v2 of the TimeGPT API**, featuring a significant boost in performance, enhanced flexibility, and new capabilities that make time series forecasting faster and more insightful than ever before. 4 | 5 | In this release, you will find: 6 | - **Dramatic speed improvements** across all major endpoints 🏎️ 7 | - **Scalable forecasting** that handles 1 billion time series in just 6 hours 📊 8 | - **Advanced handling of exogenous variables**, both historical and future 🌐 9 | - **Enhanced explainability** through SHAP values 🧠 10 | - **New integration with Polars**, a high-performance DataFrame library ⚡ 11 | 12 | ## Key Performance Highlights 🔥 13 | 14 | We've optimized the core functionalities—forecasting, anomaly detection, and cross-validation—with v2 showing significant speedups compared to v1. Below are the benchmark results: 15 | 16 | | Endpoint | Features | Level | v1 | v2 | Speedup | 17 | |:------------------|:-----------|:--------|:-----|:-----|:----------| 18 | | anomaly_detection | exog | [80] | 24s | 3s | 9x | 19 | | anomaly_detection | none | [80] | 13s | 2s | 8x | 20 | | cross_validation | exog | None | 22s | 4s | 6x | 21 | | cross_validation | exog | [80] | 31s | 6s | 5x | 22 | | cross_validation | none | None | 5s | 1s | 9x | 23 | | cross_validation | none | [80] | 9s | 2s | 4x | 24 | | forecast | exog | None | 18s | 1s | 13x | 25 | | forecast | exog | [80] | 20s | 2s | 10x | 26 | | forecast | none | None | 1s | 0s | 6x | 27 | | forecast | none | [80] | 3s | 1s | 6x | 28 | 29 | These results represent the huge leap in efficiency v2 provides, allowing you to analyze vast datasets and derive insights faster than ever before. 🚀 30 | 31 | ## How to Reproduce Results 32 | 33 | ### Installation 🛠️ 34 | 35 | 1. Install the required Python packages: 36 | ```bash 37 | pip install -r requirements.txt 38 | ``` 39 | 40 | ### Running the Code 🏃‍♀️ 41 | 42 | This script benchmarks **forecasting**, **anomaly detection**, and **cross-validation** across both v1 and v2 of the TimeGPT API. You can run the script and compare performance results by executing: 43 | 44 | ```bash 45 | python main.py 46 | ``` 47 | 48 | ## Performance Breakdown 🏎️ 49 | 50 | With v2, you get **up to 13x speed improvements** on key operations like forecasting with exogenous variables. This makes the API ideal for production environments where performance and scalability are paramount. 51 | 52 | ### New Features in v2 53 | 54 | - **Advanced Exogenous Variable Handling**: Leverage both historical and future exogenous data for more accurate forecasts. 55 | - **SHAP Values**: Improve model interpretability with SHAP value integration. 56 | - **Polars Integration**: Benefit from lightning-fast data processing with Polars, especially useful for big datasets. 57 | 58 | ## Conclusion 🚀 59 | 60 | With TimeGPT API v2, you’re not just getting a faster API—you’re gaining the tools to scale up your time series analysis effortlessly, with greater precision and deeper insights. Whether it’s detecting anomalies, validating models, or producing reliable forecasts, v2 ensures you get results **faster and smarter** than ever before. 61 | 62 | Happy forecasting! 63 | -------------------------------------------------------------------------------- /experiments/efficiency/main.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import logging 3 | import time 4 | 5 | import pandas as pd 6 | from nixtla import NixtlaClient as V2Client 7 | from nixtlats import NixtlaClient as V1Client 8 | from utilsforecast.data import generate_series 9 | from utilsforecast.feature_engineering import fourier 10 | 11 | logging.getLogger("nixtla").setLevel(logging.ERROR) 12 | logging.getLogger("nixtlats").setLevel(logging.ERROR) 13 | 14 | 15 | def forecast(client, df, X_df, h, level): 16 | return client.forecast(df=df, X_df=X_df, h=h, level=level) 17 | 18 | 19 | def cross_validation(client, df, X_df, h, level): 20 | return client.cross_validation(df=df, h=h, n_windows=4, level=level) 21 | 22 | 23 | def anomaly_detection(client, df, X_df, h, level): 24 | if isinstance(level, list): 25 | level = level[0] 26 | return client.detect_anomalies(df=df, level=level) 27 | 28 | 29 | v1_client = V1Client() 30 | v2_client = V2Client() 31 | n_series = 1_000 32 | freq = "D" 33 | h = 14 34 | series = generate_series(n_series, freq=freq, min_length=200) 35 | train, future = fourier(series, freq=freq, season_length=7, k=4, h=h) 36 | features = ["none", "exog"] 37 | level = [None, [80]] 38 | clients = {"v1": v1_client, "v2": v2_client} 39 | methods = { 40 | "forecast": forecast, 41 | "cross_validation": cross_validation, 42 | "anomaly_detection": anomaly_detection, 43 | } 44 | times = {version: {} for version in ("v1", "v2")} 45 | for feats, lvl in itertools.product(features, level): 46 | if feats == "none": 47 | df = series 48 | X_df = None 49 | else: 50 | df = train 51 | X_df = future 52 | for name, method in methods.items(): 53 | if name == "anomaly_detection" and lvl is None: 54 | continue 55 | for version, client in clients.items(): 56 | start = time.perf_counter() 57 | combination = f"{version} {name}. Features: {feats}. Level: {lvl}" 58 | print(f"Running {combination}") 59 | res = method(client, df=df, X_df=X_df, h=h, level=lvl) 60 | time_taken = time.perf_counter() - start 61 | times[version][f"{name}-{feats}-{lvl}"] = time_taken 62 | print(f"{combination} took {time_taken:.1f} seconds.") 63 | 64 | df = pd.DataFrame(times) 65 | df.index = df.index.str.split("-", expand=True) 66 | df.index.names = ["endpoint", "features", "level"] 67 | df = df.sort_index() 68 | df["speedup"] = df["v1"] / df["v2"] 69 | df["speedup"] = df["speedup"].map("{:.0f}x".format) 70 | for col in ("v1", "v2"): 71 | df[col] = df[col].map("{:.0f}s".format) 72 | with open("endpoint_times.md", "wt") as f: 73 | f.write(df.reset_index().to_markdown(index=False)) 74 | -------------------------------------------------------------------------------- /experiments/efficiency/requirements.txt: -------------------------------------------------------------------------------- 1 | nixtla>=0.6 2 | nixtlats==0.5.2 3 | pandas 4 | -------------------------------------------------------------------------------- /experiments/foundation-time-series-arena/.env.example: -------------------------------------------------------------------------------- 1 | NIXTLA_API_KEY= 2 | NIXTLA_BASE_URL= 3 | -------------------------------------------------------------------------------- /experiments/foundation-time-series-arena/Makefile: -------------------------------------------------------------------------------- 1 | download_lag_llama_code: 2 | @git clone https://github.com/time-series-foundation-models/lag-llama tempdir 3 | @cp -R tempdir/data/ . 4 | @cp -R tempdir/gluon_utils/ . 5 | @cp -R tempdir/lag_llama/ . 6 | @rm -rf tempdir 7 | 8 | download_data: 9 | @aws s3 sync s3://nixtla-foundational-time-series/data nixtla-foundational-time-series/data --no-sign-request 10 | -------------------------------------------------------------------------------- /experiments/foundation-time-series-arena/requirements.txt: -------------------------------------------------------------------------------- 1 | gluonts[torch] 2 | numpy 3 | torch>=2.0.0 4 | wandb 5 | scipy 6 | pandas 7 | huggingface_hub[cli] 8 | einshape 9 | fire 10 | nixtla 11 | python-dotenv 12 | rich 13 | statsforecast 14 | neuralforecast 15 | utilsforecast 16 | mlforecast 17 | lightgbm 18 | chronos @ git+https://github.com/amazon-science/chronos-forecasting.git 19 | salesforce-uni2ts @ git+https://github.com/SalesforceAIResearch/uni2ts.git 20 | timesfm @ git+https://github.com/AzulGarza/timesfm.git@fix-structure 21 | jax[cuda12] 22 | pytest 23 | prophet 24 | -------------------------------------------------------------------------------- /experiments/foundation-time-series-arena/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/experiments/foundation-time-series-arena/tests/__init__.py -------------------------------------------------------------------------------- /experiments/foundation-time-series-arena/tests/test_arena.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from tempfile import TemporaryDirectory 3 | 4 | import pandas as pd 5 | 6 | from xiuhmolpilli.arena import FoundationalTimeSeriesArena 7 | from .utils import models 8 | from .test_eval import generate_exp_dataset 9 | 10 | 11 | def generate_data(freq: str, tmpdir: str) -> str: 12 | df = generate_exp_dataset(n_series=5, freq=freq, return_df=True) 13 | df_parquet_path = Path(tmpdir) / f"dataset_{freq}.parquet" 14 | df.to_parquet(df_parquet_path) 15 | return str(df_parquet_path) 16 | 17 | 18 | def test_foundational_time_series_arena(): 19 | cwd = Path.cwd() 20 | with TemporaryDirectory(dir=cwd) as tmpdir: 21 | parquet_data_paths = [generate_data(freq, tmpdir) for freq in ["H", "MS"]] 22 | arena = FoundationalTimeSeriesArena( 23 | models=models, 24 | parquet_data_paths=parquet_data_paths, 25 | results_dir=tmpdir, 26 | ) 27 | arena.compete() 28 | eval_df = pd.read_csv(arena.evaluation_path) 29 | arena.compete() 30 | eval_df_2 = pd.read_csv(arena.evaluation_path) 31 | print(eval_df) 32 | print(eval_df_2) 33 | assert eval_df.equals(eval_df_2) 34 | print(eval_df) 35 | -------------------------------------------------------------------------------- /experiments/foundation-time-series-arena/tests/test_models.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import pytest 3 | from utilsforecast.data import generate_series 4 | 5 | from .utils import models 6 | 7 | 8 | @pytest.mark.parametrize("model", models) 9 | @pytest.mark.parametrize("freq", ["H", "D", "W-MON", "MS"]) 10 | @pytest.mark.parametrize("h", [1, 12]) 11 | def test_correct_forecast_dates(model, freq, h): 12 | n_series = 5 13 | df = generate_series( 14 | n_series, 15 | freq=freq, 16 | ) 17 | df["unique_id"] = df["unique_id"].astype(str) 18 | df_test = df.groupby("unique_id").tail(h) 19 | df_train = df.drop(df_test.index) 20 | fcst_df = model.forecast( 21 | df_train, 22 | h=h, 23 | freq=freq, 24 | ) 25 | exp_n_cols = 3 26 | assert fcst_df.shape == (n_series * h, exp_n_cols) 27 | exp_cols = ["unique_id", "ds"] 28 | pd.testing.assert_frame_equal( 29 | fcst_df[exp_cols].sort_values(["unique_id", "ds"]).reset_index(drop=True), 30 | df_test[exp_cols].sort_values(["unique_id", "ds"]).reset_index(drop=True), 31 | ) 32 | 33 | 34 | @pytest.mark.parametrize("model", models) 35 | @pytest.mark.parametrize("freq", ["H", "D", "W-MON", "MS"]) 36 | @pytest.mark.parametrize("n_windows", [1, 4]) 37 | def test_cross_validation(model, freq, n_windows): 38 | h = 12 39 | n_series = 5 40 | df = generate_series(n_series, freq=freq, equal_ends=True) 41 | df["unique_id"] = df["unique_id"].astype(str) 42 | cv_df = model.cross_validation( 43 | df, 44 | h=h, 45 | freq=freq, 46 | n_windows=n_windows, 47 | ) 48 | exp_n_cols = 5 # unique_id, cutoff, ds, y, model 49 | assert cv_df.shape == (n_series * h * n_windows, exp_n_cols) 50 | cutoffs = cv_df["cutoff"].unique() 51 | assert len(cutoffs) == n_windows 52 | df_test = df.groupby("unique_id").tail(h * n_windows) 53 | exp_cols = ["unique_id", "ds", "y"] 54 | pd.testing.assert_frame_equal( 55 | cv_df.sort_values(["unique_id", "ds"]).reset_index(drop=True)[exp_cols], 56 | df_test.sort_values(["unique_id", "ds"]).reset_index(drop=True)[exp_cols], 57 | ) 58 | if n_windows == 1: 59 | # test same results using predict with less data 60 | df_test = df.groupby("unique_id").tail(h) 61 | df_train = df.drop(df_test.index) 62 | fcst_df = model.forecast( 63 | df_train, 64 | h=h, 65 | freq=freq, 66 | ) 67 | exp_cols = ["unique_id", "ds"] 68 | pd.testing.assert_frame_equal( 69 | cv_df.sort_values(["unique_id", "ds"]).reset_index(drop=True)[exp_cols], 70 | fcst_df.sort_values(["unique_id", "ds"]).reset_index(drop=True)[exp_cols], 71 | ) 72 | -------------------------------------------------------------------------------- /experiments/foundation-time-series-arena/tests/utils.py: -------------------------------------------------------------------------------- 1 | from xiuhmolpilli.models.benchmarks import ( 2 | AutoARIMA, 3 | NixtlaProphet, 4 | SeasonalNaive, 5 | AutoNHITS, 6 | AutoTFT, 7 | AutoLGBM, 8 | ) 9 | from xiuhmolpilli.models.foundational import Chronos, LagLlama, Moirai, TimeGPT, TimesFM 10 | 11 | models = [ 12 | # benchmarks 13 | AutoARIMA(), 14 | NixtlaProphet(), 15 | SeasonalNaive(), 16 | # neural benchmarks 17 | AutoNHITS(), 18 | AutoTFT(), 19 | # ml 20 | AutoLGBM(), 21 | # foundational models 22 | Chronos("amazon/chronos-t5-tiny"), 23 | LagLlama(), 24 | Moirai("Salesforce/moirai-1.0-R-small"), 25 | TimeGPT(), 26 | TimesFM(), 27 | ] 28 | -------------------------------------------------------------------------------- /experiments/foundation-time-series-arena/xiuhmolpilli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/experiments/foundation-time-series-arena/xiuhmolpilli/__init__.py -------------------------------------------------------------------------------- /experiments/foundation-time-series-arena/xiuhmolpilli/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/experiments/foundation-time-series-arena/xiuhmolpilli/models/__init__.py -------------------------------------------------------------------------------- /experiments/foundation-time-series-arena/xiuhmolpilli/models/benchmarks/__init__.py: -------------------------------------------------------------------------------- 1 | from .ml import AutoLGBM 2 | from .neural import ( 3 | AutoNHITS, 4 | AutoTFT, 5 | ) 6 | from .prophet import NixtlaProphet 7 | from .stats import ( 8 | ADIDA, 9 | AutoARIMA, 10 | AutoCES, 11 | AutoETS, 12 | CrostonClassic, 13 | DOTheta, 14 | HistoricAverage, 15 | IMAPA, 16 | SeasonalNaive, 17 | Theta, 18 | ZeroModel, 19 | ) 20 | 21 | __all__ = [ 22 | "AutoLGBM", 23 | "NixtlaProphet", 24 | "AutoNHITS", 25 | "AutoTFT", 26 | "ADIDA", 27 | "AutoARIMA", 28 | "AutoCES", 29 | "AutoETS", 30 | "CrostonClassic", 31 | "DOTheta", 32 | "HistoricAverage", 33 | "IMAPA", 34 | "SeasonalNaive", 35 | "Theta", 36 | "ZeroModel", 37 | ] 38 | -------------------------------------------------------------------------------- /experiments/foundation-time-series-arena/xiuhmolpilli/models/benchmarks/ml.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pandas as pd 4 | from mlforecast.auto import AutoMLForecast, AutoLightGBM 5 | 6 | from ..utils.forecaster import Forecaster, get_seasonality 7 | 8 | os.environ["NIXTLA_ID_AS_COL"] = "true" 9 | 10 | 11 | class AutoLGBM(Forecaster): 12 | def __init__( 13 | self, 14 | alias: str = "AutoLGBM", 15 | num_samples: int = 10, 16 | cv_n_windows: int = 5, 17 | ): 18 | self.alias = alias 19 | self.num_samples = num_samples 20 | self.cv_n_windows = cv_n_windows 21 | 22 | def forecast( 23 | self, 24 | df: pd.DataFrame, 25 | h: int, 26 | freq: str, 27 | ) -> pd.DataFrame: 28 | mf = AutoMLForecast( 29 | models=[AutoLightGBM()], 30 | freq=freq, 31 | season_length=get_seasonality(freq), 32 | num_threads=-1, 33 | ) 34 | mf.fit( 35 | df=df, 36 | n_windows=self.cv_n_windows, 37 | h=h, 38 | num_samples=self.num_samples, 39 | ) 40 | fcst_df = mf.predict(h=h) 41 | fcst_df = fcst_df.rename(columns={"AutoLightGBM": self.alias}) 42 | return fcst_df 43 | -------------------------------------------------------------------------------- /experiments/foundation-time-series-arena/xiuhmolpilli/models/benchmarks/neural.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pandas as pd 4 | from neuralforecast import NeuralForecast 5 | from neuralforecast.auto import ( 6 | AutoNHITS as _AutoNHITS, 7 | AutoTFT as _AutoTFT, 8 | ) 9 | from neuralforecast.common._base_model import BaseModel as NeuralForecastModel 10 | from ray import tune 11 | 12 | from ..utils.forecaster import Forecaster 13 | 14 | os.environ["NIXTLA_ID_AS_COL"] = "true" 15 | 16 | 17 | def run_neuralforecast_model( 18 | model: NeuralForecastModel, 19 | df: pd.DataFrame, 20 | freq: str, 21 | ) -> pd.DataFrame: 22 | nf = NeuralForecast( 23 | models=[model], 24 | freq=freq, 25 | ) 26 | nf.fit(df=df) 27 | fcst_df = nf.predict() 28 | return fcst_df 29 | 30 | 31 | class AutoNHITS(Forecaster): 32 | def __init__( 33 | self, 34 | alias: str = "AutoNHITS", 35 | num_samples: int = 10, 36 | backend: str = "optuna", 37 | ): 38 | self.alias = alias 39 | self.num_samples = num_samples 40 | self.backend = backend 41 | 42 | def forecast( 43 | self, 44 | df: pd.DataFrame, 45 | h: int, 46 | freq: str, 47 | ) -> pd.DataFrame: 48 | config = _AutoNHITS.get_default_config(h=h, backend="ray") 49 | config["scaler_type"] = tune.choice(["robust"]) 50 | 51 | if self.backend == "optuna": 52 | config = _AutoNHITS._ray_config_to_optuna(config) 53 | fcst_df = run_neuralforecast_model( 54 | model=_AutoNHITS( 55 | h=h, 56 | alias=self.alias, 57 | num_samples=self.num_samples, 58 | backend=self.backend, 59 | config=config, 60 | ), 61 | df=df, 62 | freq=freq, 63 | ) 64 | return fcst_df 65 | 66 | 67 | class AutoTFT(Forecaster): 68 | def __init__( 69 | self, 70 | alias: str = "AutoTFT", 71 | num_samples: int = 10, 72 | backend: str = "optuna", 73 | ): 74 | self.alias = alias 75 | self.num_samples = num_samples 76 | self.backend = backend 77 | 78 | def forecast( 79 | self, 80 | df: pd.DataFrame, 81 | h: int, 82 | freq: str, 83 | ) -> pd.DataFrame: 84 | config = _AutoTFT.get_default_config(h=h, backend="ray") 85 | config["scaler_type"] = tune.choice(["robust"]) 86 | if self.backend == "optuna": 87 | config = _AutoTFT._ray_config_to_optuna(config) 88 | fcst_df = run_neuralforecast_model( 89 | model=_AutoTFT( 90 | h=h, 91 | alias=self.alias, 92 | num_samples=self.num_samples, 93 | backend=self.backend, 94 | config=config, 95 | ), 96 | df=df, 97 | freq=freq, 98 | ) 99 | return fcst_df 100 | -------------------------------------------------------------------------------- /experiments/foundation-time-series-arena/xiuhmolpilli/models/benchmarks/prophet.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from typing import List 3 | from threadpoolctl import threadpool_limits 4 | 5 | import pandas as pd 6 | from prophet import Prophet 7 | 8 | from ..utils.parallel_forecaster import ParallelForecaster 9 | from ..utils.forecaster import Forecaster 10 | 11 | 12 | class NixtlaProphet(Prophet, ParallelForecaster, Forecaster): 13 | def __init__( 14 | self, 15 | alias: str = "Prophet", 16 | *args, 17 | **kwargs, 18 | ): 19 | super().__init__(*args, **kwargs) 20 | self.alias = alias 21 | 22 | def __local_forecast( 23 | self, 24 | df: pd.DataFrame, 25 | h: int, 26 | freq: str, 27 | quantiles: List[float] | None = None, 28 | ) -> pd.DataFrame: 29 | if quantiles is not None: 30 | raise NotImplementedError 31 | model = deepcopy(self) 32 | model.fit(df=df) 33 | future_df = model.make_future_dataframe( 34 | periods=h, 35 | include_history=False, 36 | freq=freq, 37 | ) 38 | fcst_df = model.predict(future_df) 39 | fcst_df = fcst_df.rename({"yhat": self.alias}, axis=1) 40 | fcst_df = fcst_df[["ds", self.alias]] 41 | return fcst_df 42 | 43 | def _local_forecast( 44 | self, 45 | df: pd.DataFrame, 46 | h: int, 47 | freq: str, 48 | quantiles: List[float] | None = None, 49 | ) -> pd.DataFrame: 50 | with threadpool_limits(limits=1): 51 | return self.__local_forecast( 52 | df=df, 53 | h=h, 54 | freq=freq, 55 | quantiles=quantiles, 56 | ) 57 | -------------------------------------------------------------------------------- /experiments/foundation-time-series-arena/xiuhmolpilli/models/foundational/__init__.py: -------------------------------------------------------------------------------- 1 | from .chronos import Chronos 2 | from .lagllama import LagLlama 3 | from .moirai import Moirai 4 | from .timegpt import TimeGPT 5 | from .timesfm import TimesFM 6 | 7 | __all__ = [ 8 | "Chronos", 9 | "LagLlama", 10 | "Moirai", 11 | "TimeGPT", 12 | "TimesFM", 13 | ] 14 | -------------------------------------------------------------------------------- /experiments/foundation-time-series-arena/xiuhmolpilli/models/foundational/chronos.py: -------------------------------------------------------------------------------- 1 | from typing import Iterable, List 2 | 3 | import numpy as np 4 | import pandas as pd 5 | import torch 6 | from chronos import ChronosPipeline 7 | from tqdm import tqdm 8 | from utilsforecast.processing import make_future_dataframe 9 | 10 | from ..utils.forecaster import Forecaster 11 | 12 | 13 | class TimeSeriesDataset: 14 | def __init__( 15 | self, 16 | data: torch.Tensor, 17 | uids: Iterable, 18 | last_times: Iterable, 19 | batch_size: int, 20 | ): 21 | self.data = data 22 | self.uids = uids 23 | self.last_times = last_times 24 | self.batch_size = batch_size 25 | self.n_batches = len(data) // self.batch_size + ( 26 | 0 if len(data) % self.batch_size == 0 else 1 27 | ) 28 | self.current_batch = 0 29 | 30 | @classmethod 31 | def from_df(cls, df: pd.DataFrame, batch_size: int): 32 | num_unique_ids = df["unique_id"].nunique() 33 | max_series_length = df["unique_id"].value_counts().max() 34 | padded_tensor = torch.full( 35 | size=(num_unique_ids, max_series_length), 36 | fill_value=torch.nan, 37 | dtype=torch.bfloat16, 38 | ) # type: ignore 39 | df_sorted = df.sort_values(by=["unique_id", "ds"]) 40 | for idx, (_, group) in enumerate(df_sorted.groupby("unique_id")): 41 | series_length = len(group) 42 | padded_tensor[idx, -series_length:] = torch.tensor( 43 | group["y"].values, 44 | dtype=torch.bfloat16, 45 | ) 46 | uids = df_sorted["unique_id"].unique() 47 | last_times = df_sorted.groupby("unique_id")["ds"].tail(1) 48 | return cls(padded_tensor, uids, last_times, batch_size) 49 | 50 | def __len__(self): 51 | return self.n_batches 52 | 53 | def make_future_dataframe(self, h: int, freq: str) -> pd.DataFrame: 54 | return make_future_dataframe( 55 | uids=self.uids, 56 | last_times=pd.to_datetime(self.last_times), 57 | h=h, 58 | freq=freq, 59 | ) # type: ignore 60 | 61 | def __iter__(self): 62 | self.current_batch = 0 # Reset for new iteration 63 | return self 64 | 65 | def __next__(self): 66 | if self.current_batch < self.n_batches: 67 | start_idx = self.current_batch * self.batch_size 68 | end_idx = start_idx + self.batch_size 69 | self.current_batch += 1 70 | return self.data[start_idx:end_idx] 71 | else: 72 | raise StopIteration 73 | 74 | 75 | class Chronos(Forecaster): 76 | def __init__( 77 | self, 78 | repo_id: str = "amazon/chronos-t5-large", 79 | batch_size: int = 16, 80 | alias: str = "Chronos", 81 | ): 82 | self.repo_id = repo_id 83 | self.batch_size = batch_size 84 | self.alias = alias 85 | self.model = ChronosPipeline.from_pretrained( 86 | repo_id, 87 | device_map="auto", 88 | torch_dtype=torch.bfloat16, 89 | ) 90 | 91 | def forecast( 92 | self, 93 | df: pd.DataFrame, 94 | h: int, 95 | freq: str, 96 | ) -> pd.DataFrame: 97 | dataset = TimeSeriesDataset.from_df(df, batch_size=self.batch_size) 98 | fcsts = [ 99 | self.model.predict(batch, prediction_length=h) for batch in tqdm(dataset) 100 | ] 101 | fcst = torch.cat(fcsts) 102 | fcst = fcst.numpy() 103 | fcst_df = dataset.make_future_dataframe(h=h, freq=freq) 104 | fcst_df[self.alias] = np.mean(fcst, axis=1).reshape(-1, 1) 105 | return fcst_df 106 | -------------------------------------------------------------------------------- /experiments/foundation-time-series-arena/xiuhmolpilli/models/foundational/lagllama.py: -------------------------------------------------------------------------------- 1 | from gluonts.torch.model.predictor import PyTorchPredictor 2 | from lag_llama.gluon.estimator import LagLlamaEstimator 3 | 4 | from ..utils.gluonts_forecaster import GluonTSForecaster 5 | 6 | 7 | class LagLlama(GluonTSForecaster): 8 | def __init__( 9 | self, 10 | repo_id: str = "time-series-foundation-models/Lag-Llama", 11 | filename: str = "lag-llama.ckpt", 12 | alias: str = "LagLlama", 13 | ): 14 | super().__init__( 15 | repo_id=repo_id, 16 | filename=filename, 17 | alias=alias, 18 | ) 19 | 20 | def get_predictor(self, prediction_length: int) -> PyTorchPredictor: 21 | ckpt = self.load() 22 | estimator_args = ckpt["hyper_parameters"]["model_kwargs"] 23 | # this context length is reported in the paper 24 | context_length = 32 25 | estimator = LagLlamaEstimator( 26 | ckpt_path=self.checkpoint_path, 27 | prediction_length=prediction_length, 28 | context_length=context_length, 29 | # estimator args 30 | input_size=estimator_args["input_size"], 31 | n_layer=estimator_args["n_layer"], 32 | n_embd_per_head=estimator_args["n_embd_per_head"], 33 | n_head=estimator_args["n_head"], 34 | scaling=estimator_args["scaling"], 35 | time_feat=estimator_args["time_feat"], 36 | ) 37 | lightning_module = estimator.create_lightning_module() 38 | transformation = estimator.create_transformation() 39 | predictor = estimator.create_predictor(transformation, lightning_module) 40 | return predictor 41 | -------------------------------------------------------------------------------- /experiments/foundation-time-series-arena/xiuhmolpilli/models/foundational/moirai.py: -------------------------------------------------------------------------------- 1 | from gluonts.torch.model.predictor import PyTorchPredictor 2 | from uni2ts.model.moirai import MoiraiForecast, MoiraiModule 3 | 4 | from ..utils.gluonts_forecaster import GluonTSForecaster 5 | 6 | 7 | class Moirai(GluonTSForecaster): 8 | def __init__( 9 | self, 10 | repo_id: str = "Salesforce/moirai-1.0-R-large", 11 | filename: str = "model.ckpt", 12 | alias: str = "Moirai", 13 | ): 14 | super().__init__( 15 | repo_id=repo_id, 16 | filename=filename, 17 | alias=alias, 18 | ) 19 | 20 | def get_predictor(self, prediction_length: int) -> PyTorchPredictor: 21 | model = MoiraiForecast( 22 | module=MoiraiModule.from_pretrained(self.repo_id), 23 | prediction_length=prediction_length, 24 | context_length=200, 25 | patch_size="auto", 26 | num_samples=100, 27 | target_dim=1, 28 | feat_dynamic_real_dim=0, 29 | past_feat_dynamic_real_dim=0, 30 | ) 31 | predictor = model.create_predictor(batch_size=32) 32 | return predictor 33 | -------------------------------------------------------------------------------- /experiments/foundation-time-series-arena/xiuhmolpilli/models/foundational/timegpt.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pandas as pd 4 | from dotenv import load_dotenv 5 | from nixtla import NixtlaClient 6 | from typing import Optional 7 | from ..utils.forecaster import Forecaster 8 | 9 | load_dotenv() 10 | 11 | 12 | class TimeGPT(Forecaster): 13 | def __init__( 14 | self, 15 | api_key: str | None = None, 16 | base_url: Optional[str] = None, 17 | max_retries: int = 1, 18 | model: str = "timegpt-1", 19 | alias: str = "TimeGPT", 20 | ): 21 | self.api_key = api_key 22 | self.base_url = base_url 23 | self.max_retries = max_retries 24 | self.model = model 25 | self.alias = alias 26 | 27 | def _get_client(self) -> NixtlaClient: 28 | if self.api_key is None: 29 | api_key = os.environ["NIXTLA_API_KEY"] 30 | else: 31 | api_key = self.api_key 32 | return NixtlaClient( 33 | api_key=api_key, 34 | base_url=self.base_url, 35 | max_retries=self.max_retries, 36 | ) 37 | 38 | def forecast( 39 | self, 40 | df: pd.DataFrame, 41 | h: int, 42 | freq: str, 43 | ) -> pd.DataFrame: 44 | client = self._get_client() 45 | fcst_df = client.forecast( 46 | df=df, 47 | h=h, 48 | freq=freq, 49 | model=self.model, 50 | ) 51 | fcst_df["ds"] = pd.to_datetime(fcst_df["ds"]) 52 | fcst_df = fcst_df.rename(columns={"TimeGPT": self.alias}) 53 | return fcst_df 54 | -------------------------------------------------------------------------------- /experiments/foundation-time-series-arena/xiuhmolpilli/models/foundational/timesfm.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import timesfm 3 | import torch 4 | from paxml import checkpoints 5 | 6 | from ..utils.forecaster import Forecaster 7 | 8 | 9 | class TimesFM(Forecaster): 10 | def __init__( 11 | self, 12 | repo_id: str = "google/timesfm-1.0-200m", 13 | context_length: int = 512, 14 | batch_size: int = 64, 15 | alias: str = "TimesFM", 16 | ): 17 | self.repo_id = repo_id 18 | self.context_length = context_length 19 | self.batch_size = batch_size 20 | self.alias = alias 21 | 22 | def get_predictor( 23 | self, 24 | prediction_length: int, 25 | ) -> timesfm.TimesFm: 26 | backend = "gpu" if torch.cuda.is_available() else "cpu" 27 | tfm = timesfm.TimesFm( 28 | context_len=self.context_length, 29 | horizon_len=prediction_length, 30 | input_patch_len=32, 31 | output_patch_len=128, 32 | num_layers=20, 33 | model_dims=1280, 34 | backend=backend, 35 | per_core_batch_size=self.batch_size, 36 | ) 37 | tfm.load_from_checkpoint(repo_id=self.repo_id) 38 | return tfm 39 | 40 | def forecast( 41 | self, 42 | df: pd.DataFrame, 43 | h: int, 44 | freq: str, 45 | ) -> pd.DataFrame: 46 | predictor = self.get_predictor(prediction_length=h) 47 | fcst_df = predictor.forecast_on_df( 48 | inputs=df, 49 | freq=freq, 50 | value_name="y", 51 | model_name=self.alias, 52 | num_jobs=1, 53 | ) 54 | fcst_df = fcst_df[["unique_id", "ds", self.alias]] 55 | return fcst_df 56 | -------------------------------------------------------------------------------- /experiments/foundation-time-series-arena/xiuhmolpilli/models/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/experiments/foundation-time-series-arena/xiuhmolpilli/models/utils/__init__.py -------------------------------------------------------------------------------- /experiments/foundation-time-series-arena/xiuhmolpilli/models/utils/forecaster.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import pandas as pd 4 | from gluonts.time_feature.seasonality import get_seasonality as _get_seasonality 5 | from tqdm import tqdm 6 | from utilsforecast.processing import ( 7 | backtest_splits, 8 | drop_index_if_pandas, 9 | join, 10 | maybe_compute_sort_indices, 11 | take_rows, 12 | vertical_concat, 13 | ) 14 | 15 | 16 | def get_seasonality(freq: str) -> int: 17 | return _get_seasonality(freq, seasonalities={"D": 7}) 18 | 19 | 20 | def maybe_convert_col_to_datetime(df: pd.DataFrame, col_name: str) -> pd.DataFrame: 21 | if not pd.api.types.is_datetime64_any_dtype(df[col_name]): 22 | df = df.copy() 23 | df[col_name] = pd.to_datetime(df[col_name]) 24 | return df 25 | 26 | 27 | class Forecaster: 28 | def forecast( 29 | self, 30 | df: pd.DataFrame, 31 | h: int, 32 | freq: str, 33 | ) -> pd.DataFrame: 34 | raise NotImplementedError 35 | 36 | def cross_validation( 37 | self, 38 | df: pd.DataFrame, 39 | h: int, 40 | freq: str, 41 | n_windows: int = 1, 42 | step_size: int | None = None, 43 | ) -> pd.DataFrame: 44 | df = maybe_convert_col_to_datetime(df, "ds") 45 | # mlforecast cv code 46 | results = [] 47 | sort_idxs = maybe_compute_sort_indices(df, "unique_id", "ds") 48 | if sort_idxs is not None: 49 | df = take_rows(df, sort_idxs) 50 | splits = backtest_splits( 51 | df, 52 | n_windows=n_windows, 53 | h=h, 54 | id_col="unique_id", 55 | time_col="ds", 56 | freq=pd.tseries.frequencies.to_offset(freq), 57 | step_size=h if step_size is None else step_size, 58 | ) 59 | for _, (cutoffs, train, valid) in tqdm(enumerate(splits)): 60 | if len(valid.columns) > 3: 61 | raise NotImplementedError( 62 | "Cross validation with exogenous variables is not yet supported." 63 | ) 64 | y_pred = self.forecast( 65 | df=train, 66 | h=h, 67 | freq=freq, 68 | ) 69 | y_pred = join(y_pred, cutoffs, on="unique_id", how="left") 70 | result = join( 71 | valid[["unique_id", "ds", "y"]], 72 | y_pred, 73 | on=["unique_id", "ds"], 74 | ) 75 | if result.shape[0] < valid.shape[0]: 76 | raise ValueError( 77 | "Cross validation result produced less results than expected. " 78 | "Please verify that the frequency parameter (freq) matches your series' " 79 | "and that there aren't any missing periods." 80 | ) 81 | results.append(result) 82 | out = vertical_concat(results) 83 | out = drop_index_if_pandas(out) 84 | first_out_cols = ["unique_id", "ds", "cutoff", "y"] 85 | remaining_cols = [c for c in out.columns if c not in first_out_cols] 86 | fcst_cv_df = out[first_out_cols + remaining_cols] 87 | return fcst_cv_df 88 | -------------------------------------------------------------------------------- /experiments/foundation-time-series-arena/xiuhmolpilli/models/utils/gluonts_forecaster.py: -------------------------------------------------------------------------------- 1 | from typing import Iterable, List, Any 2 | 3 | import pandas as pd 4 | import torch 5 | from gluonts.dataset.pandas import PandasDataset 6 | from gluonts.model.forecast import Forecast 7 | from gluonts.torch.model.predictor import PyTorchPredictor 8 | from huggingface_hub import hf_hub_download 9 | from tqdm import tqdm 10 | 11 | from .forecaster import Forecaster 12 | 13 | 14 | def fix_freq(freq: str) -> str: 15 | # see https://github.com/awslabs/gluonts/pull/2462/files 16 | if len(freq) > 1 and freq.endswith("S"): 17 | return freq[:-1] 18 | return freq 19 | 20 | 21 | def maybe_convert_col_to_float32(df: pd.DataFrame, col_name: str) -> pd.DataFrame: 22 | if df[col_name].dtype != "float32": 23 | df = df.copy() 24 | df[col_name] = df[col_name].astype("float32") 25 | return df 26 | 27 | 28 | class GluonTSForecaster(Forecaster): 29 | def __init__(self, repo_id: str, filename: str, alias: str): 30 | self.repo_id = repo_id 31 | self.filename = filename 32 | self.alias = alias 33 | 34 | @property 35 | def checkpoint_path(self) -> str: 36 | return hf_hub_download( 37 | repo_id=self.repo_id, 38 | filename=self.filename, 39 | ) 40 | 41 | @property 42 | def map_location(self) -> str: 43 | map_location = "cuda:0" if torch.cuda.is_available() else "cpu" 44 | return map_location 45 | 46 | def load(self) -> Any: 47 | return torch.load( 48 | self.checkpoint_path, 49 | map_location=self.map_location, 50 | ) 51 | 52 | def get_predictor(self, prediction_length: int) -> PyTorchPredictor: 53 | raise NotImplementedError 54 | 55 | def gluonts_instance_fcst_to_df( 56 | self, 57 | fcst: Forecast, 58 | freq: str, 59 | model_name: str, 60 | ) -> pd.DataFrame: 61 | point_forecast = fcst.mean 62 | h = len(point_forecast) 63 | dates = pd.date_range( 64 | fcst.start_date.to_timestamp(), 65 | freq=freq, 66 | periods=h, 67 | ) 68 | fcst_df = pd.DataFrame( 69 | { 70 | "ds": dates, 71 | "unique_id": fcst.item_id, 72 | model_name: point_forecast, 73 | } 74 | ) 75 | return fcst_df 76 | 77 | def gluonts_fcsts_to_df( 78 | self, 79 | fcsts: Iterable[Forecast], 80 | freq: str, 81 | model_name: str, 82 | ) -> pd.DataFrame: 83 | df = [] 84 | for fcst in tqdm(fcsts): 85 | fcst_df = self.gluonts_instance_fcst_to_df( 86 | fcst=fcst, 87 | freq=freq, 88 | model_name=model_name, 89 | ) 90 | df.append(fcst_df) 91 | return pd.concat(df).reset_index(drop=True) 92 | 93 | def forecast( 94 | self, 95 | df: pd.DataFrame, 96 | h: int, 97 | freq: str, 98 | ) -> pd.DataFrame: 99 | df = maybe_convert_col_to_float32(df, "y") 100 | gluonts_dataset = PandasDataset.from_long_dataframe( 101 | df, 102 | target="y", 103 | item_id="unique_id", 104 | timestamp="ds", 105 | freq=fix_freq(freq), 106 | ) 107 | predictor = self.get_predictor(prediction_length=h) 108 | fcsts = predictor.predict(gluonts_dataset, num_samples=100) 109 | fcst_df = self.gluonts_fcsts_to_df( 110 | fcsts, 111 | freq=freq, 112 | model_name=self.alias, 113 | ) 114 | return fcst_df 115 | -------------------------------------------------------------------------------- /experiments/foundation-time-series-arena/xiuhmolpilli/models/utils/parallel_forecaster.py: -------------------------------------------------------------------------------- 1 | import os 2 | from multiprocessing import Pool 3 | from typing import Callable, List 4 | 5 | import pandas as pd 6 | 7 | 8 | class ParallelForecaster: 9 | def _process_group( 10 | self, 11 | df: pd.DataFrame, 12 | func: Callable, 13 | **kwargs, 14 | ) -> pd.DataFrame: 15 | uid = df["unique_id"].iloc[0] 16 | _df = df.drop("unique_id", axis=1) 17 | res_df = func(_df, **kwargs) 18 | res_df.insert(0, "unique_id", uid) 19 | return res_df 20 | 21 | def _apply_parallel( 22 | self, 23 | df_grouped: pd.DataFrame, 24 | func: Callable, 25 | **kwargs, 26 | ) -> pd.DataFrame: 27 | with Pool(os.cpu_count() - 1) as executor: 28 | futures = [ 29 | executor.apply_async( 30 | self._process_group, 31 | args=(df, func), 32 | kwds=kwargs, 33 | ) 34 | for _, df in df_grouped 35 | ] 36 | results = [future.get() for future in futures] 37 | return pd.concat(results) 38 | 39 | def _local_forecast( 40 | self, 41 | df: pd.DataFrame, 42 | h: int, 43 | freq: str, 44 | quantiles: List[float] | None = None, 45 | ) -> pd.DataFrame: 46 | raise NotImplementedError 47 | 48 | def forecast( 49 | self, 50 | df: pd.DataFrame, 51 | h: int, 52 | freq: str, 53 | quantiles: List[float] | None = None, 54 | ) -> pd.DataFrame: 55 | fcst_df = self._apply_parallel( 56 | df.groupby("unique_id"), 57 | self._local_forecast, 58 | h=h, 59 | freq=freq, 60 | quantiles=quantiles, 61 | ) 62 | return fcst_df 63 | -------------------------------------------------------------------------------- /experiments/foundation-time-series-arena/xiuhmolpilli/utils/download_data.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from concurrent.futures import ProcessPoolExecutor 3 | 4 | import pandas as pd 5 | 6 | logging.basicConfig(level=logging.INFO) 7 | main_logger = logging.getLogger(__name__) 8 | 9 | 10 | def read_parquet_and_assign(uid, url): 11 | df = pd.read_parquet(url) 12 | df["unique_id"] = uid 13 | df["ds"] = df["ds"].astype(str) 14 | return df[["unique_id", "ds", "y"]] 15 | 16 | 17 | def download_data(): 18 | catalogue_splits = pd.read_csv("./data/series_catalogue_hourly.csv") 19 | catalogue_df = catalogue_splits.query("dataset == 'moirai'") 20 | catalogue_df["pandas_frequency"] = "H" 21 | catalogue_df["seasonality"] = 24 22 | catalogue_df["horizon"] = 24 23 | catalogue_df = catalogue_df.query("split == 'test'")[ 24 | [ 25 | "unique_id", 26 | "frequency", 27 | "url", 28 | "pandas_frequency", 29 | "seasonality", 30 | "horizon", 31 | ] 32 | ] 33 | grouped_df = catalogue_df.groupby(["frequency", "pandas_frequency"]) 34 | for (frequency, pandas_frequency), df in grouped_df: 35 | uids, urls = df["unique_id"].values, df["url"].values 36 | main_logger.info( 37 | f"frequency: {frequency}, pandas_frequency: {pandas_frequency}" 38 | ) 39 | n_uids = len(uids) 40 | main_logger.info(f"number of uids: {n_uids}") 41 | max_workers = min(10, n_uids) 42 | with ProcessPoolExecutor(max_workers=max_workers) as executor: 43 | futures = [ 44 | executor.submit(read_parquet_and_assign, uid, url) 45 | for uid, url in zip(uids, urls) 46 | ] 47 | results = [future.result() for future in futures] 48 | main_logger.info("dataset read") 49 | Y_df = pd.concat(results) 50 | Y_df = Y_df.merge( 51 | df.drop(columns="url"), 52 | on="unique_id", 53 | how="left", 54 | ) 55 | # Y_df.to_parquet(f"./data/{frequency}_{pandas_frequency}.parquet") 56 | Y_df.to_parquet(f"./data/filtered_datasets/moirai-data.parquet") 57 | del Y_df 58 | main_logger.info("dataset saved") 59 | 60 | 61 | if __name__ == "__main__": 62 | download_data() 63 | -------------------------------------------------------------------------------- /experiments/foundation-time-series-arena/xiuhmolpilli/utils/filter_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | this module takes Nixtla's benchmarking data 3 | and filters it to prevent azureml from crashing 4 | in the following cases: 5 | - too short series, see https://learn.microsoft.com/en-us/azure/machine-learning/concept-automl-forecasting-methods?view=azureml-api-2#data-length-requirements 6 | """ 7 | import logging 8 | from pathlib import Path 9 | 10 | import fire 11 | import numpy as np 12 | import pandas as pd 13 | 14 | main_logger = logging.getLogger(__name__) 15 | main_logger.setLevel(logging.INFO) 16 | 17 | 18 | def get_min_size_per_series(dataset_path: str) -> int: 19 | if "Daily" in dataset_path or "Hourly" in dataset_path: 20 | return 1_000 21 | elif "Monthly" in dataset_path: 22 | return 10 * 12 23 | else: 24 | return 1_000 // 7 25 | 26 | 27 | def filter_and_clean_dataset( 28 | dataset_path: str, 29 | max_series: int = 1_000, 30 | random_seed: int = 420, 31 | ): 32 | main_logger.info(f"Processing dataset {dataset_path}") 33 | df = pd.read_parquet(dataset_path) 34 | df = df.drop_duplicates(["unique_id", "ds"]) # type: ignore 35 | df = df.sort_values(["unique_id", "ds"]) 36 | min_size_per_series = get_min_size_per_series(dataset_path) 37 | df = ( 38 | df.groupby("unique_id") 39 | .filter(lambda x: len(x) >= min_size_per_series) 40 | .reset_index(drop=True) 41 | ) 42 | uids = df["unique_id"].unique() # type: ignore 43 | if len(uids) > max_series: 44 | np.random.seed(random_seed) 45 | uids = np.random.choice(uids, max_series, replace=False) # type: ignore 46 | df = df.query("unique_id in @uids") # type: ignore 47 | main_logger.info(f"Filtering out {len(uids) - max_series} series") 48 | n_series = len(df["unique_id"].unique()) # type: ignore 49 | main_logger.info(f"Number of series: {n_series}") 50 | if n_series == 0: 51 | raise ValueError("No series left after filtering") 52 | # finally we clean some strange dates 53 | mask = df["ds"].str.endswith(":01") # type: ignore 54 | df.loc[mask, "ds"] = df.loc[mask, "ds"].str[:-3] + ":00" 55 | # save the dataset 56 | dataset_path = Path(dataset_path) # type: ignore 57 | filtered_dataset_path = dataset_path.parent / "filtered_datasets" / dataset_path.name # type: ignore 58 | filtered_dataset_path.parent.mkdir(exist_ok=True, parents=True) 59 | df.to_parquet(filtered_dataset_path) 60 | main_logger.info(f"Filtered dataset saved to {filtered_dataset_path}") 61 | 62 | 63 | if __name__ == "__main__": 64 | fire.Fire(filter_and_clean_dataset) 65 | -------------------------------------------------------------------------------- /experiments/foundation-time-series-arena/xiuhmolpilli/utils/logger_config.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | def setup_logger(logger_name, log_file=None): 5 | logger = logging.getLogger(logger_name) 6 | logger.setLevel(logging.INFO) 7 | formatter = logging.Formatter( 8 | "%(asctime)s,%(levelname)s,%(module)s,%(message)s", 9 | datefmt="%Y-%m-%d %H:%M:%S", 10 | ) 11 | console_handler = logging.StreamHandler() 12 | console_handler.setLevel(logging.INFO) 13 | console_handler.setFormatter(formatter) 14 | logger.addHandler(console_handler) 15 | return logger 16 | -------------------------------------------------------------------------------- /experiments/lag-llama/Makefile: -------------------------------------------------------------------------------- 1 | download_lag_llama_code: 2 | @git clone https://github.com/time-series-foundation-models/lag-llama tempdir 3 | @cp -R tempdir/data/ . 4 | @cp -R tempdir/gluon_utils/ . 5 | @cp -R tempdir/lag_llama/ . 6 | @cp -R tempdir/requirements.txt lag-llama-requirements.txt 7 | @rm -rf tempdir 8 | 9 | download_lag_llama_model: 10 | @huggingface-cli download time-series-foundation-models/Lag-Llama lag-llama.ckpt --local-dir ./models/ 11 | -------------------------------------------------------------------------------- /experiments/lag-llama/README.md: -------------------------------------------------------------------------------- 1 | # LagLLama is 40% less accurate than a simple SeasonalNaive and 1000x slower. 2 | 3 | We present a fully reproducible experiment showing that SeasonalNaive significantly outperforms LagLlama, a recently introduced open-source foundational model for time series forecasting (a deep learning architecture pre-trained on time series datasets). Specifically, **SeasonalNaive achieves 42%, 24%, and 16% better performance** in terms of MASE, MAPE, and CRPS respectively, and boasts **a 1,000x speed advantage**. These findings are based on an extensive analysis covering 105,289 unique time series from the M1, M3, M4, and Tourism datasets, which were omitted in the original LagLlama paper. 4 | 5 | # Introduction 6 | 7 | In the field of time series forecasting, recent developments have introduced foundational models such as LagLlama, which utilizes deep learning and extensive data for pretraining, aiming to enhance predictive performance and model complexity. LagLLama is to be praised as one of the first open-source foundational models. However, contrary to expectations, our analysis indicates that the traditional SeasonalNaive model, known for its straightforward approach of extending past seasonal trends into future predictions, outperforms LagLlama in terms of both accuracy and computational efficiency. 8 | 9 | ## Empirical Evaluation 10 | 11 | The original paper uses 3,113 time series to assess the model performance. The original paper only reports CRPS and omits point forecast error metrics widely used in academia and industry, e.g. MASE and MAPE. 12 | 13 | Our evaluation encompasses 105,289 unique time series from different datasets, including M1, M3, M4, and Tourism, covering yearly, quarterly, monthly, weekly, daily, and hourly frequencies. This diverse dataset selection allows for a robust assessment of the models across various time series characteristics and forecasting horizons. We also reproduce results for Pedestrian Counts and Weather originally included in the paper/code to show that we are running LagLlama correctly. 14 | 15 | ## Results 16 | 17 | The results are summarized in the following table, highlighting the performance metrics of MASE, MAPE, CRPS, and TIME (measured in seconds). The best results are indicated in **bold** for easy reference. 18 | 19 | image 20 | 21 | 22 | ## Reproducibility 23 | 24 | To ensure the reproducibility of our findings, the experiments were conducted on an AWS g5.4xlarge GPU instance equipped with 16 vCPUs, 64 GiB of RAM, and an NVIDIA A10G Tensor Core GPU (24 GiB). The complete code can be found in this repo. 25 | 26 | ### Instructions 27 | 28 | 1. Create a python environment using: 29 | ``` 30 | mamba env create -f environment.yml 31 | conda activate lag-llama 32 | ``` 33 | 34 | 2. Add lag-llama code to your environment 35 | 36 | ``` 37 | make download_lag_llama_code 38 | ``` 39 | 40 | 5. Download lag-llama model 41 | 42 | ``` 43 | make download_lag_llama_model 44 | ``` 45 | 46 | 4. Install lag-llama requirements 47 | 48 | ``` 49 | pip install -r lag-llama-requirements.txt 50 | ``` 51 | 52 | 5. Run complete experiments reported in the table 53 | 54 | ``` 55 | python -m src.main 56 | ``` 57 | 58 | ### References 59 | - **Lag-Llama Paper**: [Towards Foundation Models for Probabilistic Time Series Forecasting](https://arxiv.org/abs/2310.08278) 60 | - **SeasonalNaive Implementation**: [GitHub Repository](https://github.com/nixtla/statsforecast/) 61 | - **CRPS Replication Note**: The CRPS performance for `LagLlama` is replicated from the model's publicly available [Colab notebook](https://colab.research.google.com/drive/13HHKYL_HflHBKxDWycXgIUAHSeHRR5eo?usp=sharing), ensuring a fair comparison. 62 | -------------------------------------------------------------------------------- /experiments/lag-llama/environment.yml: -------------------------------------------------------------------------------- 1 | name: lag-llama 2 | channels: 3 | - conda-forge 4 | - defaults 5 | - anaconda 6 | dependencies: 7 | - jupyterlab 8 | - pip 9 | - python=3.10 10 | - pip: 11 | - datasetsforecast 12 | - fire 13 | - huggingface_hub[cli] 14 | - neuralforecast 15 | - orjson 16 | - statsforecast 17 | - utilsforecast 18 | 19 | -------------------------------------------------------------------------------- /experiments/lag-llama/src/lag_llama_pipeline.py: -------------------------------------------------------------------------------- 1 | from time import time 2 | from typing import Iterable, List, Tuple 3 | 4 | import fire 5 | import pandas as pd 6 | import torch 7 | from gluonts.dataset import Dataset 8 | from gluonts.model.forecast import Forecast 9 | from gluonts.torch.model.predictor import PyTorchPredictor 10 | from tqdm import tqdm 11 | 12 | from lag_llama.gluon.estimator import LagLlamaEstimator 13 | from src.utils import ExperimentHandler 14 | 15 | 16 | def get_lag_llama_predictor( 17 | prediction_length: int, models_dir: str 18 | ) -> PyTorchPredictor: 19 | model_path = f"{models_dir}/lag-llama.ckpt" 20 | map_location = torch.device("cuda:0") if torch.cuda.is_available() else "cpu" 21 | if map_location == "cpu": 22 | raise ValueError("cpu is not supported in lagllama (there is a bug)") 23 | ckpt = torch.load(model_path, map_location=map_location) 24 | estimator_args = ckpt["hyper_parameters"]["model_kwargs"] 25 | # this context length is reported in the paper 26 | context_length = 32 27 | estimator = LagLlamaEstimator( 28 | ckpt_path=model_path, 29 | prediction_length=prediction_length, 30 | context_length=context_length, 31 | # estimator args 32 | input_size=estimator_args["input_size"], 33 | n_layer=estimator_args["n_layer"], 34 | n_embd_per_head=estimator_args["n_embd_per_head"], 35 | n_head=estimator_args["n_head"], 36 | scaling=estimator_args["scaling"], 37 | time_feat=estimator_args["time_feat"], 38 | ) 39 | lightning_module = estimator.create_lightning_module() 40 | transformation = estimator.create_transformation() 41 | predictor = estimator.create_predictor(transformation, lightning_module) 42 | return predictor 43 | 44 | 45 | def gluonts_instance_fcst_to_df( 46 | fcst: Forecast, 47 | quantiles: List[float], 48 | model_name: str, 49 | ) -> pd.DataFrame: 50 | point_forecast = fcst.mean 51 | h = len(point_forecast) 52 | dates = pd.date_range( 53 | fcst.start_date.to_timestamp(), 54 | freq=fcst.freq, 55 | periods=h, 56 | ) 57 | fcst_df = pd.DataFrame( 58 | { 59 | "ds": dates, 60 | "unique_id": fcst.item_id, 61 | model_name: point_forecast, 62 | } 63 | ) 64 | for q in quantiles: 65 | fcst_df[f"{model_name}-q-{q}"] = fcst.quantile(q) 66 | return fcst_df 67 | 68 | 69 | def gluonts_fcsts_to_df( 70 | fcsts: Iterable[Forecast], 71 | quantiles: List[float], 72 | model_name: str, 73 | ) -> pd.DataFrame: 74 | df = [] 75 | for fcst in tqdm(fcsts): 76 | fcst_df = gluonts_instance_fcst_to_df(fcst, quantiles, model_name) 77 | df.append(fcst_df) 78 | return pd.concat(df).reset_index(drop=True) 79 | 80 | 81 | def run_lag_llama( 82 | gluonts_dataset: Dataset, 83 | horizon: int, 84 | quantiles: List[float], 85 | models_dir: str, 86 | ) -> Tuple[pd.DataFrame, float, str]: 87 | init_time = time() 88 | predictor = get_lag_llama_predictor(horizon, models_dir) 89 | fcsts = predictor.predict(gluonts_dataset, num_samples=100) 90 | model_name = "LagLlama" 91 | fcsts_df = gluonts_fcsts_to_df( 92 | fcsts, 93 | quantiles=quantiles, 94 | model_name=model_name, 95 | ) 96 | total_time = time() - init_time 97 | return fcsts_df, total_time, model_name 98 | 99 | 100 | def main(dataset: str): 101 | exp = ExperimentHandler(dataset) 102 | fcst_df, total_time, model_name = run_lag_llama( 103 | gluonts_dataset=exp.gluonts_train_dataset, 104 | horizon=exp.horizon, 105 | quantiles=exp.quantiles, 106 | models_dir=exp.models_dir, 107 | ) 108 | exp._save_results(fcst_df, total_time, model_name) 109 | 110 | 111 | if __name__ == "__main__": 112 | fire.Fire(main) 113 | -------------------------------------------------------------------------------- /experiments/lag-llama/src/main.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import subprocess 3 | 4 | import pandas as pd 5 | 6 | from src.utils import ExperimentHandler 7 | 8 | logger = logging.getLogger(__name__) 9 | logger.setLevel(logging.INFO) 10 | 11 | not_included_datasets = [ 12 | "m1_yearly", 13 | "m1_quarterly", 14 | "m1_monthly", 15 | "m3_yearly", 16 | "m3_quarterly", 17 | "m3_monthly", 18 | "m3_other", 19 | "m4_yearly", 20 | "m4_quarterly", 21 | "m4_monthly", 22 | "m4_weekly", 23 | "m4_daily", 24 | "m4_hourly", 25 | "tourism_yearly", 26 | "tourism_quarterly", 27 | "tourism_monthly", 28 | ] 29 | 30 | test_paper_datasets = [ 31 | "pedestrian_counts", 32 | "weather", 33 | ] 34 | 35 | datasets = { 36 | "not_included": not_included_datasets, 37 | "test_set": test_paper_datasets, 38 | } 39 | 40 | 41 | def evaluate(): 42 | eval_df = [] 43 | prefix_process = ["python", "-m"] 44 | 45 | for name_group, groups in datasets.items(): 46 | for dataset in groups: 47 | logger.info(f"Evaluating {dataset}...") 48 | suffix_process = ["--dataset", dataset] 49 | process = ( 50 | lambda middle_process: prefix_process + middle_process + suffix_process 51 | ) 52 | # running statsforecast and lagllama in separated 53 | # processes because gluonts sets multiprocessing context 54 | # see: https://github.com/awslabs/gluonts/blob/dev/src/gluonts/torch/__init__.py 55 | logger.info("Running SeasonalNaive") 56 | subprocess.run(process(["src.statsforecast_pipeline"])) 57 | logger.info("Running LagLLama") 58 | subprocess.run(process(["src.lag_llama_pipeline"])) 59 | logger.info("Running dataset evaluation") 60 | exp = ExperimentHandler(dataset) 61 | eval_dataset_df = exp.evaluate_models(["LagLlama", "SeasonalNaive"]) 62 | eval_dataset_df.insert(0, "paper", name_group) 63 | eval_df.append(eval_dataset_df) 64 | eval_df = pd.concat(eval_df).reset_index(drop=True) 65 | exp.save_dataframe(eval_df, "complete-results.csv") 66 | 67 | 68 | if __name__ == "__main__": 69 | evaluate() 70 | -------------------------------------------------------------------------------- /experiments/lag-llama/src/statsforecast_pipeline.py: -------------------------------------------------------------------------------- 1 | import os 2 | from time import time 3 | from typing import List, Tuple 4 | 5 | import fire 6 | import pandas as pd 7 | from statsforecast import StatsForecast 8 | from statsforecast.models import SeasonalNaive 9 | 10 | from src.utils import ExperimentHandler 11 | 12 | 13 | def run_statsforecast( 14 | train_df: pd.DataFrame, 15 | horizon: int, 16 | freq: str, 17 | seasonality: int, 18 | level: List[int], 19 | ) -> Tuple[pd.DataFrame, float, str]: 20 | os.environ["NIXTLA_ID_AS_COL"] = "true" 21 | models = [SeasonalNaive(season_length=seasonality)] 22 | init_time = time() 23 | sf = StatsForecast( 24 | models=models, 25 | freq=freq, 26 | n_jobs=-1, 27 | ) 28 | fcsts_df = sf.forecast(df=train_df, h=horizon, level=level) 29 | total_time = time() - init_time 30 | model_name = repr(models[0]) 31 | return fcsts_df, total_time, model_name 32 | 33 | 34 | def main(dataset: str): 35 | exp = ExperimentHandler(dataset) 36 | fcst_df, total_time, model_name = run_statsforecast( 37 | train_df=exp.train_df, 38 | horizon=exp.horizon, 39 | freq=exp.freq, 40 | seasonality=exp.seasonality, 41 | level=exp.level, 42 | ) 43 | fcst_df = exp._fcst_from_level_to_quantiles(fcst_df, model_name) 44 | exp._save_results(fcst_df, total_time, model_name) 45 | 46 | 47 | if __name__ == "__main__": 48 | fire.Fire(main) 49 | -------------------------------------------------------------------------------- /experiments/one-billion/README.md: -------------------------------------------------------------------------------- 1 | # Forecasting at Scale: One Billion (1e9) Time Series with TimeGPT ⚡📈 2 | 3 | Imagine you're tasked with forecasting for **one billion unique time series**—ranging from retail sales across thousands of stores to sensor data from millions of IoT devices. It's a monumental challenge, requiring not just statistical modeling but also cutting-edge tools to handle the scale and complexity of the data. 4 | 5 | This project is a blueprint for scaling such a task, utilizing **Nixtla's foundation models for time series forecasting** and orchestrating the process efficiently using Python and AWS S3. Here's how you can tackle this kind of project. 6 | 7 | ## The Challenge 🎯 8 | 9 | The goal is simple: forecast the future for **one billion different time series**, but the constraints are anything but simple. How do you handle the storage of this data? 🗄️ How do you parallelize the computation efficiently? 💻 And finally, how do you produce results quickly enough to be useful in decision-making? ⏳ 10 | 11 | ### Enter Foundation Models for Time Series 🚀 12 | 13 | **Nixtla** offers **TimeGPT** through an API that leverages foundation models capable of handling large-scale forecasting problems. These models are designed for flexibility and speed 🏎️, making them ideal for scenarios where you're dealing with an enormous volume of data and need results at a high cadence. ⚡ 14 | 15 | ## Results 📊 16 | 17 | | 📈 **Number of Series** | Number of Processes | ⏳ **CPU Time (hours)** | 18 | |:-----------------------:|:-------------------:|:------------------:| 19 | | 1e9 | 1 | 5.5 | 20 | | 1e9 | 5 | 1.1 | 21 | 22 | ## Running the Project 🛠️ 23 | 24 | ### Installation 🧩 25 | 26 | 1. Install the required Python packages: 27 | ```bash 28 | pip install -r requirements.txt 29 | ``` 30 | 31 | 2. Configure AWS credentials so the script can interact with S3: 32 | ```bash 33 | aws configure 34 | ``` 35 | 36 | ### Usage 🏃‍♂️ 37 | 38 | To generate forecasts, you simply run the following command. Adjust the parameters as needed: 39 | 40 | ```bash 41 | python main.py --bucket --prefix --n_partitions 1000 --series_per_partition 1000000 --n_jobs 5 42 | ``` 43 | 44 | - **`bucket`**: The S3 bucket where the data is stored. 45 | - **`prefix`**: The path inside the S3 bucket where the input and output data is stored. 46 | - **`n_partitions`**: The number of partitions to break the task into. 47 | - **`series_per_partition`**: The number of time series in each partition. 48 | - **`n_jobs`**: The number of processes to run in parallel. 49 | 50 | ### What Happens Behind the Scenes 🔍 51 | 52 | The code will: 53 | 54 | 1. Check if the forecast for each partition has already been generated. ✅ 55 | 2. Generate new time series data for each partition. 🧬 56 | 3. Use Nixtla’s API to compute forecasts for each partition. 🔮 57 | 4. Save the results and the time taken to S3. 💾 58 | 59 | ## Scaling to Billions 🚀 60 | 61 | This approach is designed to **scale**—whether you’re forecasting for **one million** or **one billion** series. By partitioning the data, processing it in parallel 🧠, and leveraging foundation models like those provided by Nixtla, you can handle even the most massive forecasting tasks efficiently. ⚙️ 62 | 63 | ### Final Thoughts 💡 64 | 65 | Forecasting at scale is no easy feat, but with the right tools, it’s entirely achievable. This project demonstrates how modern time series forecasting techniques can be applied to massive datasets in an efficient, scalable way. By leveraging AWS infrastructure, foundation models, and clever parallel processing, you can forecast the future for billions of unique data series—**unlocking insights** that can power decision-making at an unprecedented scale. 🌍✨ 66 | -------------------------------------------------------------------------------- /experiments/one-billion/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3 2 | fire 3 | nixtla>=0.6.0 4 | nixtlats==0.5.2 5 | pandas 6 | pyarrow 7 | s3fs 8 | tqdm 9 | -------------------------------------------------------------------------------- /experiments/prophet/.env.example: -------------------------------------------------------------------------------- 1 | TIMEGPT_TOKEN= 2 | -------------------------------------------------------------------------------- /experiments/prophet/Makefile: -------------------------------------------------------------------------------- 1 | SRC_DIR := data 2 | EXCLUDE_STRINGS := catalogue 3 | TS_FILES := $(filter-out $(wildcard $(SRC_DIR)/*$(foreach str,$(EXCLUDE_STRINGS),*$(str)*)), $(wildcard $(SRC_DIR)/*.parquet)) 4 | 5 | evaluate: .require-method 6 | @echo "Evaluation for $${method}..." 7 | @for file in $(TS_FILES); do \ 8 | echo $$file; \ 9 | python -m src.$${method}_exp --file $$file; \ 10 | done 11 | @echo "Evaluation for $${method} complete." 12 | 13 | summarize_results: 14 | @echo "Summarize results..." 15 | @python -m src.results_summary --dir ./data/results/ 16 | @echo "Summarize results complete." 17 | 18 | .require-method: 19 | ifndef method 20 | $(error method is required) 21 | endif 22 | -------------------------------------------------------------------------------- /experiments/prophet/environment.yml: -------------------------------------------------------------------------------- 1 | name: timegpt-benchmark 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - jupyterlab 6 | - prophet 7 | - pyspark>=3.3 8 | - python=3.10 9 | - pip: 10 | - fire 11 | - nixtla 12 | - python-dotenv 13 | - statsforecast 14 | - utilsforecast 15 | - tabulate 16 | 17 | -------------------------------------------------------------------------------- /experiments/prophet/src/results_summary.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import fire 4 | from numpy import column_stack 5 | import pandas as pd 6 | 7 | 8 | def read_kind_results(kind: str, dir: str): 9 | files = list(Path(dir).rglob(f"*{kind}.parquet")) 10 | df = pd.concat( 11 | [pd.read_parquet(file).assign(file=str(file).split("/")[-2]) for file in files], 12 | ignore_index=True, 13 | ) 14 | return df 15 | 16 | 17 | def summarize_results_per_file(metrics_df: pd.DataFrame): 18 | metrics_df_per_freq = metrics_df.groupby(["file", "metric", "model"]).mean( 19 | numeric_only=True 20 | ) 21 | metrics_df_per_freq = metrics_df_per_freq.reset_index() 22 | metrics_df_per_freq = metrics_df_per_freq.query( 23 | "model in ['Prophet', 'SeasonalNaive', 'TimeGPT']" 24 | ) 25 | models = metrics_df_per_freq["model"].unique() 26 | metrics_df_per_freq = pd.pivot( 27 | metrics_df_per_freq, 28 | index=["file", "metric"], 29 | columns="model", 30 | values="value", 31 | ).reset_index() 32 | for model in models: 33 | if model == "SeasonalNaive": 34 | continue 35 | metrics_df_per_freq[model] /= metrics_df_per_freq["SeasonalNaive"] 36 | metrics_df_per_freq["SeasonalNaive"] /= metrics_df_per_freq["SeasonalNaive"] 37 | return metrics_df_per_freq 38 | 39 | 40 | def prepare_results(df: pd.DataFrame): 41 | def bold_best(row): 42 | row = row.round(3) 43 | models = row.drop(columns=["file", "metric"]).columns 44 | best_model = row[models].idxmin(axis=1).item() 45 | row[best_model] = "**" + str(row[best_model].item()) + "**" 46 | return row 47 | 48 | df_bolded = df.groupby(["file", "metric"]).apply(bold_best) 49 | df_bolded = df_bolded.reset_index(drop=True) 50 | return df_bolded 51 | 52 | 53 | def write_to_readme(content: str): 54 | with open("README.md", "r") as file: 55 | readme_content = file.readlines() 56 | start_index = -1 57 | end_index = -1 58 | for i, line in enumerate(readme_content): 59 | if line.strip().lower() == "## results": 60 | start_index = i + 1 61 | if start_index != -1 and line.strip() == "": 62 | end_index = i 63 | break 64 | 65 | if start_index != -1 and end_index != -1: 66 | readme_content = ( 67 | readme_content[: start_index + 1] 68 | + [content + "\n"] 69 | + readme_content[end_index:] 70 | ) 71 | else: 72 | print("Results section not found or improperly formatted") 73 | 74 | # Write the changes back to the README 75 | with open("README.md", "w") as file: 76 | file.writelines(readme_content) 77 | 78 | 79 | def summarize_results(dir: str): 80 | metrics_df = read_kind_results("metrics", dir) 81 | summary_df = read_kind_results("summary", dir) 82 | summary_df = ( 83 | summary_df.set_index(["file", "frequency"]) 84 | .reset_index() 85 | .round(3) 86 | .sort_values("frequency") 87 | ) 88 | no_int_cols = ["file", "frequency", "mean", "std"] 89 | for col in summary_df.columns: 90 | if col not in no_int_cols: 91 | summary_df[col] = summary_df[col].astype(int) 92 | summary_df = summary_df.to_markdown(index=False, intfmt=",", floatfmt=",.3f") 93 | time_df = read_kind_results("time", dir) 94 | time_df = time_df.assign(metric="time").rename(columns={"time": "value"}) 95 | metrics_df_per_file = summarize_results_per_file(metrics_df) 96 | time_df = summarize_results_per_file(time_df) 97 | eval_df = pd.concat([metrics_df_per_file, time_df], ignore_index=True) 98 | eval_df = prepare_results(eval_df)[ 99 | ["file", "metric", "TimeGPT", "Prophet", "SeasonalNaive"] 100 | ] 101 | n_files = eval_df["file"].nunique() 102 | eval_df = eval_df.to_markdown( 103 | index=False, 104 | colalign=2 * ["left"] + (eval_df.shape[1] - 2) * ["right"], 105 | ) 106 | markdown_lines = eval_df.split("\n") 107 | custom_separator = markdown_lines[1].replace(":", "-") 108 | for i in range(4, len(markdown_lines) + n_files - 1, 4): 109 | markdown_lines.insert(i + 1, custom_separator) 110 | markdown_lines.insert( 111 | 0, 112 | ("\n### Data Description\n\n" f"{summary_df}\n\n" "### Performance\n\n"), 113 | ) 114 | eval_df = "\n".join(markdown_lines) 115 | write_to_readme(eval_df) 116 | 117 | 118 | if __name__ == "__main__": 119 | fire.Fire(summarize_results) 120 | -------------------------------------------------------------------------------- /experiments/prophet/src/statsforecast_exp.py: -------------------------------------------------------------------------------- 1 | from time import time 2 | 3 | import fire 4 | import pandas as pd 5 | from statsforecast import StatsForecast 6 | from statsforecast.models import SeasonalNaive, ZeroModel 7 | 8 | from src.tools import ExperimentHandler 9 | 10 | 11 | def evaluate_experiment(file: str): 12 | exp_handler = ExperimentHandler(file=file, method="statsforecast") 13 | Y_df, freq, pandas_freq, h, seasonality = exp_handler.read_data() 14 | models = [ 15 | SeasonalNaive(season_length=seasonality), 16 | ZeroModel(), 17 | ] 18 | # even though statsforecast can handle multiple models, we only use one 19 | # at a time to calculate time for each 20 | eval_df = [] 21 | total_time_df = [] 22 | for model in models: 23 | model_name = repr(model) 24 | print(model_name) 25 | sf = StatsForecast( 26 | models=[model], 27 | freq=pandas_freq, 28 | n_jobs=-1, 29 | ) 30 | start = time() 31 | Y_hat_df_model = sf.cross_validation( 32 | df=Y_df, 33 | h=h, 34 | n_windows=1, 35 | ).reset_index() 36 | total_time = time() - start 37 | print(total_time) 38 | # evaluation 39 | eval_df_model, total_time_df_model = exp_handler.evaluate_model( 40 | Y_hat_df=Y_hat_df_model, 41 | model_name=model_name, 42 | total_time=total_time, 43 | ) 44 | eval_df.append(eval_df_model.set_index(["metric", "unique_id"])) 45 | total_time_df.append(total_time_df_model) 46 | eval_df = pd.concat(eval_df, axis=1).reset_index() 47 | total_time_df = pd.concat(total_time_df) 48 | exp_handler.save_results( 49 | freq=freq, 50 | eval_df=eval_df, 51 | total_time_df=total_time_df, 52 | df=Y_df, 53 | ) 54 | 55 | 56 | if __name__ == "__main__": 57 | fire.Fire(evaluate_experiment) 58 | -------------------------------------------------------------------------------- /experiments/prophet/src/timegpt_exp.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from time import time 3 | 4 | import fire 5 | from dotenv import load_dotenv 6 | from nixtla import NixtlaClient 7 | 8 | from src.tools import ExperimentHandler 9 | 10 | load_dotenv() 11 | 12 | 13 | def evaluate_experiment(file: str): 14 | exp_handler = ExperimentHandler(file=file, method="timegpt") 15 | model_name = "TimeGPT" 16 | print(model_name) 17 | # timegpt does not need the full history to 18 | # make zero shot predictions 19 | Y_df, freq, pandas_freq, h, seasonality = exp_handler.read_data( 20 | max_insample_length=300 21 | ) 22 | size_df = sys.getsizeof(Y_df) / (1024 * 1024) 23 | max_partition_size_mb = 20 24 | num_partitions = int(size_df / max_partition_size_mb) + 1 25 | timegpt = NixtlaClient( 26 | base_url="https://timegpt-endpoint.eastus.inference.ml.azure.com/", 27 | max_retries=1, 28 | ) 29 | start = time() 30 | Y_hat_df = timegpt.cross_validation( 31 | df=Y_df, 32 | h=h, 33 | n_windows=1, 34 | freq=pandas_freq, 35 | num_partitions=num_partitions, 36 | ) 37 | total_time = time() - start 38 | print(total_time) 39 | # evaluation 40 | eval_df, total_time_df = exp_handler.evaluate_model( 41 | Y_hat_df=Y_hat_df, 42 | model_name=model_name, 43 | total_time=total_time, 44 | ) 45 | exp_handler.save_results( 46 | freq=freq, 47 | eval_df=eval_df, 48 | total_time_df=total_time_df, 49 | ) 50 | 51 | 52 | if __name__ == "__main__": 53 | fire.Fire(evaluate_experiment) 54 | -------------------------------------------------------------------------------- /experiments/prophet/src/tools.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Optional, Tuple 3 | 4 | import pandas as pd 5 | from utilsforecast.evaluation import evaluate 6 | from utilsforecast.losses import mae, rmse 7 | 8 | 9 | class ExperimentHandler: 10 | def __init__(self, file: str, method: str): 11 | self.file = file 12 | self.method = method 13 | 14 | @staticmethod 15 | def get_parameter(parameter: str, df: pd.DataFrame): 16 | parameter = df[parameter].unique() 17 | if len(parameter) > 1: 18 | raise ValueError(f"{parameter} is not unique: {parameter}") 19 | return parameter[0] 20 | 21 | def read_data( 22 | self, 23 | max_insample_length: int = 3_000, 24 | ) -> Tuple[pd.DataFrame, str, str, int, int]: 25 | df = pd.read_parquet(self.file) 26 | Y_df = df[["unique_id", "ds", "y"]].drop_duplicates(["unique_id", "ds"]) 27 | Y_df = Y_df.sort_values(["unique_id", "ds"]) 28 | Y_df = Y_df.groupby("unique_id").tail( 29 | max_insample_length 30 | ) # take only last 3_000 rows 31 | Y_df["ds"] = Y_df["ds"].str.replace(":01$", ":00", regex=True) 32 | freq = self.get_parameter("frequency", df) 33 | pandas_freq = self.get_parameter("pandas_frequency", df) 34 | h = self.get_parameter("horizon", df) 35 | seasonality = self.get_parameter("seasonality", df) 36 | return Y_df, freq, pandas_freq, int(h), int(seasonality) 37 | 38 | def evaluate_model( 39 | self, 40 | Y_hat_df: pd.DataFrame, 41 | model_name: str, 42 | total_time: float, 43 | ): 44 | if "cutoff" in Y_hat_df.columns: 45 | Y_hat_df = Y_hat_df.drop(columns="cutoff") 46 | eval_df = evaluate( 47 | df=Y_hat_df, 48 | metrics=[rmse, mae], 49 | ) 50 | total_time_df = pd.DataFrame({"model": [model_name], "time": [total_time]}) 51 | return eval_df, total_time_df 52 | 53 | @staticmethod 54 | def summarize_df(df: pd.DataFrame): 55 | n_unique_ids = df["unique_id"].nunique() 56 | mean_y = df["y"].mean() 57 | std_y = df["y"].std() 58 | lengths = df.groupby("unique_id").size() 59 | min_length = lengths.min() 60 | max_length = lengths.max() 61 | n_obs = len(df) 62 | summary = { 63 | "n_series": n_unique_ids, 64 | "mean": mean_y, 65 | "std": std_y, 66 | "min_length": min_length, 67 | "max_length": max_length, 68 | "n_obs": n_obs, 69 | } 70 | summary_df = pd.DataFrame.from_dict(summary, orient="index") 71 | summary_df = summary_df.transpose() 72 | return summary_df 73 | 74 | def save_results( 75 | self, 76 | freq: str, 77 | eval_df: pd.DataFrame, 78 | total_time_df: pd.DataFrame, 79 | df: Optional[pd.DataFrame] = None, 80 | ): 81 | eval_df["frequency"] = freq 82 | eval_df = eval_df.melt( 83 | id_vars=["frequency", "metric", "unique_id"], 84 | var_name="model", 85 | value_name="value", 86 | ) 87 | total_time_df["frequency"] = freq 88 | dir = self.file.split("/")[-1].replace(".parquet", "") 89 | dir = f"./data/results/{dir}" 90 | os.makedirs(dir, exist_ok=True) 91 | eval_df.to_parquet( 92 | f"{dir}/{self.method}_metrics.parquet", 93 | index=False, 94 | ) 95 | total_time_df.to_parquet( 96 | f"{dir}/{self.method}_time.parquet", 97 | index=False, 98 | ) 99 | if df is not None: 100 | summary_df = self.summarize_df(df) 101 | summary_df["frequency"] = freq 102 | print(summary_df) 103 | summary_df.to_parquet( 104 | f"{dir}/series_summary.parquet", 105 | index=False, 106 | ) 107 | -------------------------------------------------------------------------------- /experiments/prophet/src/utils.py: -------------------------------------------------------------------------------- 1 | from concurrent.futures import ThreadPoolExecutor 2 | import pandas as pd 3 | 4 | 5 | def read_parquet_and_assign(uid, url): 6 | df = pd.read_parquet(url) 7 | df["unique_id"] = uid 8 | df["ds"] = df["ds"].astype(str) 9 | return df[["unique_id", "ds", "y"]] 10 | 11 | 12 | def download_data(): 13 | catalogue_splits = pd.read_parquet("./data/catalogue_splits.parquet") 14 | catalogue_datasets = pd.read_parquet("./data/catalogue_datasets.parquet") 15 | catalogue_df = catalogue_splits.merge( 16 | catalogue_datasets, 17 | on=["dataset", "subdataset", "frequency"], 18 | ) 19 | del catalogue_splits 20 | del catalogue_datasets 21 | catalogue_df = catalogue_df.query("split == 'test'")[ 22 | [ 23 | "unique_id", 24 | "frequency", 25 | "url", 26 | "pandas_frequency", 27 | "seasonality", 28 | "horizon", 29 | ] 30 | ] 31 | grouped_df = catalogue_df.groupby(["frequency", "pandas_frequency"]) 32 | for (frequency, pandas_frequency), df in grouped_df: 33 | uids, urls = df["unique_id"].values, df["url"].values 34 | print(f"frequency: {frequency}, pandas_frequency: {pandas_frequency}") 35 | print(f"number of uids: {len(uids)}") 36 | with ThreadPoolExecutor() as executor: 37 | futures = [ 38 | executor.submit(read_parquet_and_assign, uid, url) 39 | for uid, url in zip(uids, urls) 40 | ] 41 | results = [future.result() for future in futures] 42 | print("dataset read") 43 | Y_df = pd.concat(results) 44 | Y_df = Y_df.merge( 45 | df.drop(columns="url"), 46 | on="unique_id", 47 | how="left", 48 | ) 49 | print(Y_df) 50 | Y_df.to_parquet(f"./data/{frequency}_{pandas_frequency}.parquet") 51 | del Y_df 52 | 53 | 54 | if __name__ == "__main__": 55 | download_data() 56 | -------------------------------------------------------------------------------- /experiments/salesforce-moirai/environment.yml: -------------------------------------------------------------------------------- 1 | name: moirai 2 | channels: 3 | - conda-forge 4 | - defaults 5 | - anaconda 6 | dependencies: 7 | - jupyterlab 8 | - pip 9 | - python=3.10 10 | - pip: 11 | - datasetsforecast 12 | - fire 13 | - huggingface_hub[cli] 14 | - neuralforecast 15 | - orjson 16 | - statsforecast 17 | - utilsforecast 18 | 19 | -------------------------------------------------------------------------------- /experiments/salesforce-moirai/src/main.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import subprocess 3 | from typing import Literal 4 | 5 | import fire 6 | import pandas as pd 7 | 8 | 9 | logger = logging.getLogger(__name__) 10 | logger.setLevel(logging.INFO) 11 | 12 | datasets = [ 13 | "m1_yearly", 14 | "m1_quarterly", 15 | "m1_monthly", 16 | "m3_yearly", 17 | "m3_quarterly", 18 | "m3_monthly", 19 | "m3_other", 20 | "m4_yearly", 21 | "m4_quarterly", 22 | "m4_monthly", 23 | "m4_weekly", 24 | "m4_daily", 25 | "m4_hourly", 26 | "tourism_yearly", 27 | "tourism_quarterly", 28 | "tourism_monthly", 29 | ] 30 | 31 | 32 | def main(mode: Literal["fcst_statsforecast", "fcst_moirai`", "evaluation"]): 33 | prefix_process = ["python", "-m"] 34 | 35 | if mode in ["fcst_statsforecast", "fcst_moirai"]: 36 | for dataset in datasets: 37 | logger.info(f"Forecasting {dataset}...") 38 | suffix_process = ["--dataset", dataset] 39 | 40 | def process(middle_process): 41 | return prefix_process + middle_process + suffix_process 42 | 43 | if mode == "fcst_statsforecast": 44 | logger.info("Running StatisticalEnsemble") 45 | subprocess.run(process(["src.statsforecast_pipeline"])) 46 | elif mode == "fcst_moirai": 47 | logger.info("Running SalesforceMoirai") 48 | subprocess.run(process(["src.moirai_pipeline"])) 49 | elif mode == "evaluation": 50 | from src.utils import ExperimentHandler 51 | 52 | eval_df = [] 53 | for dataset in datasets: 54 | logger.info(f"Evaluating {dataset}...") 55 | exp = ExperimentHandler(dataset) 56 | try: 57 | eval_dataset_df = exp.evaluate_models( 58 | [ 59 | "SalesforceMoirai", 60 | "StatisticalEnsemble", 61 | "SeasonalNaive", 62 | ] 63 | ) 64 | print(eval_dataset_df) 65 | eval_df.append(eval_dataset_df) 66 | except Exception as e: 67 | logger.error(e) 68 | eval_df = pd.concat(eval_df).reset_index(drop=True) 69 | exp.save_dataframe(eval_df, "complete-results.csv") 70 | else: 71 | raise ValueError(f"mode {mode} not found") 72 | 73 | 74 | if __name__ == "__main__": 75 | fire.Fire(main) 76 | -------------------------------------------------------------------------------- /experiments/salesforce-moirai/src/moirai_pipeline.py: -------------------------------------------------------------------------------- 1 | from time import time 2 | from typing import Iterable, List, Tuple 3 | 4 | import fire 5 | import pandas as pd 6 | import torch 7 | from gluonts.dataset import Dataset 8 | from gluonts.model.forecast import Forecast 9 | from gluonts.torch.model.predictor import PyTorchPredictor 10 | from huggingface_hub import hf_hub_download 11 | from tqdm import tqdm 12 | from uni2ts.model.moirai import MoiraiForecast 13 | 14 | from src.utils import ExperimentHandler 15 | 16 | 17 | def get_morai_predictor( 18 | model_size: str, 19 | prediction_length: int, 20 | target_dim: int, 21 | batch_size: int, 22 | ) -> PyTorchPredictor: 23 | model = MoiraiForecast.load_from_checkpoint( 24 | checkpoint_path=hf_hub_download( 25 | repo_id=f"Salesforce/moirai-1.0-R-{model_size}", 26 | filename="model.ckpt", 27 | ), 28 | prediction_length=prediction_length, 29 | context_length=200, 30 | patch_size="auto", 31 | num_samples=100, 32 | target_dim=target_dim, 33 | feat_dynamic_real_dim=0, 34 | past_feat_dynamic_real_dim=0, 35 | map_location="cuda:0" if torch.cuda.is_available() else "cpu", 36 | ) 37 | 38 | predictor = model.create_predictor(batch_size) 39 | 40 | return predictor 41 | 42 | 43 | def gluonts_instance_fcst_to_df( 44 | fcst: Forecast, 45 | quantiles: List[float], 46 | model_name: str, 47 | ) -> pd.DataFrame: 48 | point_forecast = fcst.mean 49 | h = len(point_forecast) 50 | dates = pd.date_range( 51 | fcst.start_date.to_timestamp(), 52 | freq=fcst.freq, 53 | periods=h, 54 | ) 55 | fcst_df = pd.DataFrame( 56 | { 57 | "ds": dates, 58 | "unique_id": fcst.item_id, 59 | model_name: point_forecast, 60 | } 61 | ) 62 | for q in quantiles: 63 | fcst_df[f"{model_name}-q-{q}"] = fcst.quantile(q) 64 | return fcst_df 65 | 66 | 67 | def gluonts_fcsts_to_df( 68 | fcsts: Iterable[Forecast], 69 | quantiles: List[float], 70 | model_name: str, 71 | ) -> pd.DataFrame: 72 | df = [] 73 | for fcst in tqdm(fcsts): 74 | fcst_df = gluonts_instance_fcst_to_df(fcst, quantiles, model_name) 75 | df.append(fcst_df) 76 | return pd.concat(df).reset_index(drop=True) 77 | 78 | 79 | def run_moirai( 80 | gluonts_dataset: Dataset, 81 | model_size: str, 82 | horizon: int, 83 | target_dim: int, 84 | batch_size: int, 85 | quantiles: List[float], 86 | ) -> Tuple[pd.DataFrame, float, str]: 87 | init_time = time() 88 | predictor = get_morai_predictor(model_size, horizon, target_dim, batch_size) 89 | fcsts = predictor.predict(gluonts_dataset) 90 | model_name = "SalesforceMoirai" 91 | fcsts_df = gluonts_fcsts_to_df( 92 | fcsts, 93 | quantiles=quantiles, 94 | model_name=model_name, 95 | ) 96 | total_time = time() - init_time 97 | return fcsts_df, total_time, model_name 98 | 99 | 100 | def main(dataset: str): 101 | exp = ExperimentHandler(dataset) 102 | fcst_df, total_time, model_name = run_moirai( 103 | gluonts_dataset=exp.gluonts_train_dataset, 104 | model_size="large", 105 | horizon=exp.horizon, 106 | target_dim=1, 107 | batch_size=32, 108 | quantiles=exp.quantiles, 109 | ) 110 | exp.save_results(fcst_df, total_time, model_name) 111 | 112 | 113 | if __name__ == "__main__": 114 | fire.Fire(main) 115 | -------------------------------------------------------------------------------- /experiments/salesforce-moirai/src/statsforecast_pipeline.py: -------------------------------------------------------------------------------- 1 | import os 2 | from time import time 3 | from typing import List, Tuple 4 | 5 | os.environ["NIXTLA_NUMBA_RELEASE_GIL"] = "1" 6 | os.environ["NIXTLA_NUMBA_CACHE"] = "1" 7 | 8 | import fire 9 | import numpy as np 10 | import pandas as pd 11 | from scipy.stats import norm 12 | from statsforecast import StatsForecast 13 | from statsforecast.models import ( 14 | AutoARIMA, 15 | AutoETS, 16 | AutoCES, 17 | DynamicOptimizedTheta, 18 | SeasonalNaive, 19 | ) 20 | 21 | from src.utils import ExperimentHandler 22 | 23 | 24 | def run_seasonal_naive( 25 | train_df: pd.DataFrame, 26 | horizon: int, 27 | freq: str, 28 | seasonality: int, 29 | level: List[int], 30 | ) -> Tuple[pd.DataFrame, float, str]: 31 | os.environ["NIXTLA_ID_AS_COL"] = "true" 32 | sf = StatsForecast( 33 | models=[SeasonalNaive(season_length=seasonality)], 34 | freq=freq, 35 | n_jobs=-1, 36 | ) 37 | model = sf 38 | init_time = time() 39 | fcsts_df = model.forecast(df=train_df, h=horizon, level=level) 40 | total_time = time() - init_time 41 | return fcsts_df, total_time, "SeasonalNaive" 42 | 43 | 44 | def ensemble_forecasts( 45 | fcsts_df: pd.DataFrame, 46 | quantiles: List[float], 47 | name_models: List[str], 48 | model_name: str, 49 | ) -> pd.DataFrame: 50 | fcsts_df[model_name] = fcsts_df[name_models].mean(axis=1).values # type: ignore 51 | # compute quantiles based on the mean of the forecasts 52 | sigma_models = [] 53 | for model in name_models: 54 | fcsts_df[f"sigma_{model}"] = fcsts_df[f"{model}-hi-68.27"] - fcsts_df[model] 55 | sigma_models.append(f"sigma_{model}") 56 | fcsts_df[f"std_{model_name}"] = ( 57 | fcsts_df[sigma_models].pow(2).sum(axis=1).div(len(sigma_models) ** 2).pow(0.5) 58 | ) 59 | z = norm.ppf(quantiles) 60 | q_cols = [] 61 | for q, zq in zip(quantiles, z): 62 | q_col = f"{model_name}-q-{q}" 63 | fcsts_df[q_col] = fcsts_df[model_name] + zq * fcsts_df[f"std_{model_name}"] 64 | q_cols.append(q_col) 65 | fcsts_df = fcsts_df[["unique_id", "ds"] + [model_name] + q_cols] 66 | return fcsts_df 67 | 68 | 69 | def run_statistical_ensemble( 70 | train_df: pd.DataFrame, 71 | horizon: int, 72 | freq: str, 73 | seasonality: int, 74 | quantiles: List[float], 75 | ) -> Tuple[pd.DataFrame, float, str]: 76 | os.environ["NIXTLA_ID_AS_COL"] = "true" 77 | models = [ 78 | AutoARIMA(season_length=seasonality), 79 | AutoETS(season_length=seasonality), 80 | AutoCES(season_length=seasonality), 81 | DynamicOptimizedTheta(season_length=seasonality), 82 | ] 83 | init_time = time() 84 | series_per_core = 15 85 | n_series = train_df["unique_id"].nunique() 86 | n_jobs = min(n_series // series_per_core, os.cpu_count()) 87 | sf = StatsForecast( 88 | models=models, 89 | freq=freq, 90 | n_jobs=n_jobs, 91 | ) 92 | fcsts_df = sf.forecast(df=train_df, h=horizon, level=[68.27]) 93 | name_models = [repr(model) for model in models] 94 | model_name = "StatisticalEnsemble" 95 | fcsts_df = ensemble_forecasts( 96 | fcsts_df, 97 | quantiles, 98 | name_models, 99 | model_name, 100 | ) 101 | total_time = time() - init_time 102 | return fcsts_df, total_time, model_name 103 | 104 | 105 | def main(dataset: str): 106 | exp = ExperimentHandler(dataset) 107 | # seasonal naive benchmark 108 | fcst_df, total_time, model_name = run_seasonal_naive( 109 | train_df=exp.train_df, 110 | horizon=exp.horizon, 111 | freq=exp.freq, 112 | seasonality=exp.seasonality, 113 | level=exp.level, 114 | ) 115 | fcst_df = exp.fcst_from_level_to_quantiles(fcst_df, model_name) 116 | exp.save_results(fcst_df, total_time, model_name) 117 | # statistical ensemble 118 | fcst_df, total_time, model_name = run_statistical_ensemble( 119 | train_df=exp.train_df, 120 | horizon=exp.horizon, 121 | freq=exp.freq, 122 | seasonality=exp.seasonality, 123 | quantiles=exp.quantiles, 124 | ) 125 | exp.save_results(fcst_df, total_time, model_name) 126 | 127 | 128 | if __name__ == "__main__": 129 | from statsforecast.utils import AirPassengers as ap 130 | 131 | AutoARIMA(season_length=12).forecast(ap.astype(np.float32), h=12) 132 | fire.Fire(main) 133 | -------------------------------------------------------------------------------- /experiments/vn1-competition/Makefile: -------------------------------------------------------------------------------- 1 | download_data: 2 | mkdir -p data 3 | curl https://www.datasource.ai/attachments/eyJpZCI6Ijk4NDYxNjE2NmZmZjM0MGRmNmE4MTczOGMyMzI2ZWI2LmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoiUGhhc2UgMCAtIFNhbGVzLmNzdiIsInNpemUiOjEwODA0NjU0LCJtaW1lX3R5cGUiOiJ0ZXh0L2NzdiJ9fQ -o data/phase_0_sales.csv 4 | curl https://www.datasource.ai/attachments/eyJpZCI6ImM2OGQxNGNmNTJkZDQ1MTUyZTg0M2FkMDAyMjVlN2NlLmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoiUGhhc2UgMSAtIFNhbGVzLmNzdiIsInNpemUiOjEwMTgzOTYsIm1pbWVfdHlwZSI6InRleHQvY3N2In19 -o data/phase_1_sales.csv 5 | curl https://www.datasource.ai/attachments/eyJpZCI6IjhlNmJmNmU3ZTlhNWQ4NTcyNGVhNTI4YjAwNTk3OWE1LmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoiUGhhc2UgMiAtIFNhbGVzLmNzdiIsInNpemUiOjEwMTI0MzcsIm1pbWVfdHlwZSI6InRleHQvY3N2In19 -o data/phase_2_sales.csv 6 | curl https://www.datasource.ai/attachments/eyJpZCI6IjI1NDQxYmMyMTQ3MTA0MjJhMDcyYjllODcwZjEyNmY4LmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoicGhhc2UgMiBzdWJtaXNzaW9uIGV4YW1pbmUgc21vb3RoZWQgMjAyNDEwMTcgRklOQUwuY3N2Iiwic2l6ZSI6MTk5MzAzNCwibWltZV90eXBlIjoidGV4dC9jc3YifX0 -o data/solution_1st_place.csv 7 | curl https://www.datasource.ai/attachments/eyJpZCI6IjU3ODhjZTUwYTU3MTg3NjFlYzMzOWU0ZTg3MWUzNjQxLmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoidm4xX3N1Ym1pc3Npb25fanVzdGluX2Z1cmxvdHRlLmNzdiIsInNpemUiOjM5MDkzNzksIm1pbWVfdHlwZSI6InRleHQvY3N2In19 -o data/solution_2nd_place.csv 8 | curl https://www.datasource.ai/attachments/eyJpZCI6ImE5NzcwNTZhMzhhMTc2ZWJjODFkMDMwMTM2Y2U2MTdlLmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoiYXJzYW5pa3phZF9zdWIuY3N2Iiwic2l6ZSI6Mzg4OTcyNCwibWltZV90eXBlIjoidGV4dC9jc3YifX0 -o data/solution_3rd_place.csv 9 | curl https://www.datasource.ai/attachments/eyJpZCI6ImVlZmUxYWY2NDFjOWMwM2IxMzRhZTc2MzI1Nzg3NzIxLmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoiVEZUX3R1bmVkX1YyX3NlZWRfNDIuY3N2Iiwic2l6ZSI6NjA3NDgzLCJtaW1lX3R5cGUiOiJ0ZXh0L2NzdiJ9fQ -o data/solution_4th_place.csv 10 | curl https://www.datasource.ai/attachments/eyJpZCI6IjMwMDEwMmY3NTNhMzlhN2YxNTk3ODYxZTI1N2Q2NzRmLmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoiZGl2aW5lb3B0aW1pemVkd2VpZ2h0c2Vuc2VtYmxlLmNzdiIsInNpemUiOjE3OTU0NzgsIm1pbWVfdHlwZSI6InRleHQvY3N2In19 -o data/solution_5th_place.csv 11 | -------------------------------------------------------------------------------- /experiments/vn1-competition/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/experiments/vn1-competition/src/__init__.py -------------------------------------------------------------------------------- /experiments/vn1-competition/src/functions.R: -------------------------------------------------------------------------------- 1 | 2 | # Functions for VN1 Forecasting Competition ---- 3 | 4 | read_and_prepare_data <- function(dataset){ 5 | # Reads data in wide format and returns it in long format with columns `unique_id`, `ds`, and `y` 6 | url <- get_dataset_url(dataset) 7 | df_wide <- fread(url) 8 | df_wide <- df_wide |> 9 | mutate(unique_id = paste0(Client, "/", Warehouse, "/", Product)) |> 10 | select(c(unique_id, everything())) |> 11 | select(-c(Client, Warehouse, Product)) 12 | 13 | df <- pivot_longer( 14 | data = df_wide, 15 | cols = -unique_id, 16 | names_to = "ds", 17 | values_to = "y" 18 | ) 19 | 20 | if(startsWith(dataset, "winners")){ 21 | names(df)[which(names(df) == "y")] <- dataset 22 | } 23 | 24 | return(df) 25 | } 26 | 27 | get_train_data <- function(df0, df1){ 28 | # Merges training data from phase 0 and phase 1 and removes leading zeros 29 | df <- rbind(df0, df1) |> 30 | arrange(unique_id, ds) 31 | 32 | df_clean <- df |> 33 | group_by(unique_id) |> 34 | mutate(cumsum = cumsum(y)) |> 35 | filter(cumsum > 0) |> 36 | select(-cumsum) |> 37 | ungroup() 38 | 39 | return(df_clean) 40 | } 41 | 42 | vn1_competition_evaluation <- function(test, forecast, model){ 43 | # Computes competition evaluation 44 | if(!is.character(forecast$ds)){ 45 | forecast$ds <- as.character(forecast$ds) # nixtlar returns timestamps for plotting 46 | } 47 | 48 | res <- merge(forecast, test, by=c("unique_id", "ds")) 49 | 50 | res <- res |> 51 | mutate(abs_err = abs(res[[model]]-res$y)) |> 52 | mutate(err = res[[model]]-res$y) 53 | 54 | abs_err = sum(res$abs_err, na.rm = TRUE) 55 | err = sum(res$err, na.rm = TRUE) 56 | score = abs_err+abs(err) 57 | score = score/sum(res$y) 58 | score = round(score, 4) 59 | 60 | return(score) 61 | } 62 | 63 | get_dataset_url <- function(dataset){ 64 | # Returns the url of the given competition dataset 65 | urls <- list( 66 | phase0_sales = "https://www.datasource.ai/attachments/eyJpZCI6Ijk4NDYxNjE2NmZmZjM0MGRmNmE4MTczOGMyMzI2ZWI2LmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoiUGhhc2UgMCAtIFNhbGVzLmNzdiIsInNpemUiOjEwODA0NjU0LCJtaW1lX3R5cGUiOiJ0ZXh0L2NzdiJ9fQ", 67 | phase1_sales = "https://www.datasource.ai/attachments/eyJpZCI6ImM2OGQxNGNmNTJkZDQ1MTUyZTg0M2FkMDAyMjVlN2NlLmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoiUGhhc2UgMSAtIFNhbGVzLmNzdiIsInNpemUiOjEwMTgzOTYsIm1pbWVfdHlwZSI6InRleHQvY3N2In19", 68 | phase2_sales = "https://www.datasource.ai/attachments/eyJpZCI6IjhlNmJmNmU3ZTlhNWQ4NTcyNGVhNTI4YjAwNTk3OWE1LmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoiUGhhc2UgMiAtIFNhbGVzLmNzdiIsInNpemUiOjEwMTI0MzcsIm1pbWVfdHlwZSI6InRleHQvY3N2In19", 69 | winners1 = "https://www.datasource.ai/attachments/eyJpZCI6IjI1NDQxYmMyMTQ3MTA0MjJhMDcyYjllODcwZjEyNmY4LmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoicGhhc2UgMiBzdWJtaXNzaW9uIGV4YW1pbmUgc21vb3RoZWQgMjAyNDEwMTcgRklOQUwuY3N2Iiwic2l6ZSI6MTk5MzAzNCwibWltZV90eXBlIjoidGV4dC9jc3YifX0", 70 | winners2 = "https://www.datasource.ai/attachments/eyJpZCI6IjU3ODhjZTUwYTU3MTg3NjFlYzMzOWU0ZTg3MWUzNjQxLmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoidm4xX3N1Ym1pc3Npb25fanVzdGluX2Z1cmxvdHRlLmNzdiIsInNpemUiOjM5MDkzNzksIm1pbWVfdHlwZSI6InRleHQvY3N2In19", 71 | winners3 = "https://www.datasource.ai/attachments/eyJpZCI6ImE5NzcwNTZhMzhhMTc2ZWJjODFkMDMwMTM2Y2U2MTdlLmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoiYXJzYW5pa3phZF9zdWIuY3N2Iiwic2l6ZSI6Mzg4OTcyNCwibWltZV90eXBlIjoidGV4dC9jc3YifX0", 72 | winners4 = "https://www.datasource.ai/attachments/eyJpZCI6ImVlZmUxYWY2NDFjOWMwM2IxMzRhZTc2MzI1Nzg3NzIxLmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoiVEZUX3R1bmVkX1YyX3NlZWRfNDIuY3N2Iiwic2l6ZSI6NjA3NDgzLCJtaW1lX3R5cGUiOiJ0ZXh0L2NzdiJ9fQ", 73 | winners5 = "https://www.datasource.ai/attachments/eyJpZCI6IjMwMDEwMmY3NTNhMzlhN2YxNTk3ODYxZTI1N2Q2NzRmLmNzdiIsInN0b3JhZ2UiOiJzdG9yZSIsIm1ldGFkYXRhIjp7ImZpbGVuYW1lIjoiZGl2aW5lb3B0aW1pemVkd2VpZ2h0c2Vuc2VtYmxlLmNzdiIsInNpemUiOjE3OTU0NzgsIm1pbWVfdHlwZSI6InRleHQvY3N2In19" 74 | ) 75 | 76 | return(urls[[dataset]]) 77 | } 78 | 79 | -------------------------------------------------------------------------------- /experiments/vn1-competition/src/main.R: -------------------------------------------------------------------------------- 1 | 2 | # VN1 Forecasting Competition Solution with nixtlar ---- 3 | 4 | install.packages(c("nixtlar", "tidyverse", "data.table")) 5 | 6 | library(nixtlar) 7 | library(tidyverse) 8 | library(data.table) 9 | 10 | source("functions.R") # same directory as main.R 11 | 12 | ## Load Data ---- 13 | sales0 <- read_and_prepare_data("phase0_sales") 14 | sales1 <- read_and_prepare_data("phase1_sales") 15 | test_df <- read_and_prepare_data("phase2_sales") 16 | 17 | ## Prepare Training Dataset ---- 18 | train_df <- get_train_data(sales0, sales1) 19 | 20 | ## Generate TimeGPT Forecast ---- 21 | 22 | # nixtla_client_setup(api_key = "Your API key here") 23 | # Learn how to set up your API key here: https://nixtla.github.io/nixtlar/articles/setting-up-your-api-key.html 24 | 25 | fc <- nixtla_client_forecast(train_df, h=13, model="timegpt-1-long-horizon") 26 | 27 | ## Visualize TimeGPT Forecast ---- 28 | nixtla_client_plot(train_df, fc) 29 | 30 | ## Evaluate TimeGPT & Top 5 Competition Solutions ---- 31 | timegpt_score <- vn1_competition_evaluation(test_df, fc, "TimeGPT") 32 | 33 | scores <- lapply(1:5, function(i){ # Top 5 34 | winner_df <- read_and_prepare_data(paste0("winners", i)) 35 | vn1_competition_evaluation(test_df, winner_df, model = paste0("winners", i)) 36 | }) 37 | 38 | scores_df <- data.frame( 39 | "Result" = c(paste0("Place #", 1:5), "TimeGPT"), 40 | "Score" = c(as.numeric(scores), timegpt_score) 41 | ) 42 | 43 | scores_df <- scores_df |> arrange(Score) 44 | print(scores_df) # TimeGPT places 2nd! 45 | -------------------------------------------------------------------------------- /experiments/vn1-competition/src/main.py: -------------------------------------------------------------------------------- 1 | from time import time 2 | 3 | 4 | import numpy as np 5 | import pandas as pd 6 | from dotenv import load_dotenv 7 | from nixtla import NixtlaClient 8 | 9 | load_dotenv() 10 | 11 | 12 | def read_and_prepare_data(file_path: str, value_name: str = "y") -> pd.DataFrame: 13 | """Reads data in wide format, and returns it in long format with columns `unique_id`, `ds`, `y`""" 14 | df = pd.read_csv(file_path) 15 | uid_cols = ["Client", "Warehouse", "Product"] 16 | df["unique_id"] = df[uid_cols].astype(str).agg("-".join, axis=1) 17 | df = df.drop(uid_cols, axis=1) 18 | df = df.melt(id_vars=["unique_id"], var_name="ds", value_name=value_name) 19 | df["ds"] = pd.to_datetime(df["ds"]) 20 | df = df.sort_values(by=["unique_id", "ds"]) 21 | return df 22 | 23 | 24 | def get_train_data() -> pd.DataFrame: 25 | """Reads all train data and returns it in long format with columns `unique_id`, `ds`, `y`""" 26 | train_list = [read_and_prepare_data(f"./data/phase_{i}_sales.csv") for i in [0, 1]] 27 | train_df = pd.concat(train_list).reset_index(drop=True) 28 | train_df = train_df.sort_values(by=["unique_id", "ds"]) 29 | 30 | def remove_leading_zeros(group): 31 | first_non_zero_index = group["y"].ne(0).idxmax() 32 | return group.loc[first_non_zero_index:] 33 | 34 | train_df = ( 35 | train_df.groupby("unique_id").apply(remove_leading_zeros).reset_index(drop=True) 36 | ) 37 | return train_df 38 | 39 | 40 | def get_competition_forecasts() -> pd.DataFrame: 41 | """Reads all competition forecasts and returns it in long format with columns `unique_id`, `ds`, `y`""" 42 | fcst_df: pd.DataFrame | None = None 43 | for place in ["1st", "2nd", "3rd", "4th", "5th"]: 44 | fcst_df_place = read_and_prepare_data( 45 | f"./data/solution_{place}_place.csv", place 46 | ) 47 | if fcst_df is None: 48 | fcst_df = fcst_df_place 49 | else: 50 | fcst_df = fcst_df.merge( 51 | fcst_df_place, 52 | on=["unique_id", "ds"], 53 | how="left", 54 | ) 55 | return fcst_df 56 | 57 | 58 | def vn1_competition_evaluation(forecasts: pd.DataFrame) -> pd.DataFrame: 59 | """Computes competition evaluation scores""" 60 | actual = read_and_prepare_data("./data/phase_2_sales.csv") 61 | res = actual[["unique_id", "ds", "y"]].merge( 62 | forecasts, on=["unique_id", "ds"], how="left" 63 | ) 64 | ids_forecasts = forecasts["unique_id"].unique() 65 | ids_res = res["unique_id"].unique() 66 | assert set(ids_forecasts) == set(ids_res), "Some unique_ids are missing" 67 | scores = {} 68 | for model in [col for col in forecasts.columns if col not in ["unique_id", "ds"]]: 69 | abs_err = np.nansum(np.abs(res[model] - res["y"])) 70 | err = np.nansum(res[model] - res["y"]) 71 | score = abs_err + abs(err) 72 | score = score / res["y"].sum() 73 | scores[model] = round(score, 4) 74 | score_df = pd.DataFrame(list(scores.items()), columns=["model", "score"]) 75 | score_df = score_df.sort_values(by="score") 76 | return score_df 77 | 78 | 79 | def main(): 80 | """Complete pipeline""" 81 | train_df = get_train_data() 82 | client = NixtlaClient() 83 | init = time() 84 | fcst_df = client.forecast(train_df, h=13, model="timegpt-1-long-horizon") 85 | print(f"TimeGPT time: {time() - init}") 86 | fcst_df_comp = get_competition_forecasts() 87 | fcst_df = fcst_df.merge(fcst_df_comp, on=["unique_id", "ds"], how="left") 88 | eval_df = vn1_competition_evaluation(fcst_df) 89 | print(eval_df) 90 | 91 | 92 | if __name__ == "__main__": 93 | main() 94 | -------------------------------------------------------------------------------- /experiments/vn1-competition/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/experiments/vn1-competition/tests/__init__.py -------------------------------------------------------------------------------- /experiments/vn1-competition/tests/test_scores.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | from src.main import vn1_competition_evaluation, get_competition_forecasts 4 | 5 | 6 | def test_vn1_competition_evaluation(): 7 | forecasts = get_competition_forecasts() 8 | eval_df = vn1_competition_evaluation(forecasts) 9 | assert len(eval_df) == 5 10 | pd.testing.assert_series_equal( 11 | eval_df["score"], 12 | pd.Series([0.4637, 0.4657, 0.4758, 0.4774, 0.4808]), 13 | check_names=False, 14 | ) 15 | -------------------------------------------------------------------------------- /nbs/_quarto.yml: -------------------------------------------------------------------------------- 1 | project: 2 | type: website 3 | 4 | format: 5 | html: 6 | theme: cosmo 7 | fontsize: 1em 8 | linestretch: 1.7 9 | css: styles.css 10 | toc: true 11 | 12 | website: 13 | twitter-card: 14 | image: "https://farm6.staticflickr.com/5510/14338202952_93595258ff_z.jpg" 15 | site: "@Nixtlainc" 16 | open-graph: 17 | image: "https://github.com/Nixtla/styles/blob/2abf51612584169874c90cd7c4d347e3917eaf73/images/Banner%20Github.png" 18 | google-analytics: "G-NXJNCVR18L" 19 | repo-actions: [issue] 20 | favicon: favicon_png.png 21 | navbar: 22 | background: primary 23 | search: true 24 | collapse-below: lg 25 | left: 26 | - text: "Get Started" 27 | href: timegpt.ipynb 28 | - text: "NixtlaVerse" 29 | menu: 30 | - text: "StatsForecast ⚡️" 31 | href: https://github.com/nixtla/statsforecast 32 | - text: "MLForecast 🤖" 33 | href: https://github.com/nixtla/mlforecast 34 | - text: "NeuralForecast 🧠" 35 | href: https://github.com/nixtla/neuralforecast 36 | - text: "HierarchicalForecast 👑" 37 | href: https://github.com/nixtla/hierarchicalforecast 38 | - text: "Help" 39 | menu: 40 | - text: "Report an Issue" 41 | icon: bug 42 | href: https://github.com/nixtla/statsforecast/issues/new/choose 43 | - text: "Join our Slack" 44 | icon: chat-right-text 45 | href: https://join.slack.com/t/nixtlaworkspace/shared_invite/zt-135dssye9-fWTzMpv2WBthq8NK0Yvu6A 46 | right: 47 | - icon: github 48 | href: "https://github.com/nixtla/nixtla" 49 | - icon: twitter 50 | href: https://twitter.com/nixtlainc 51 | aria-label: Nixtla Twitter 52 | 53 | sidebar: 54 | style: floating 55 | body-footer: | 56 | Give us a ⭐ on [Github](https://github.com/nixtla/nixtla) 57 | 58 | metadata-files: [nbdev.yml, sidebar.yml] 59 | -------------------------------------------------------------------------------- /nbs/assets/Inter-VariableFont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/assets/Inter-VariableFont.ttf -------------------------------------------------------------------------------- /nbs/assets/M5_categorical_variables_example.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/assets/M5_categorical_variables_example.parquet -------------------------------------------------------------------------------- /nbs/assets/M5_what_if_pricing_example.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/assets/M5_what_if_pricing_example.parquet -------------------------------------------------------------------------------- /nbs/assets/forecast_synthetic_data.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/assets/forecast_synthetic_data.mp4 -------------------------------------------------------------------------------- /nbs/assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/assets/logo.png -------------------------------------------------------------------------------- /nbs/assets/long_horizon_example_Y_df.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/assets/long_horizon_example_Y_df.parquet -------------------------------------------------------------------------------- /nbs/docs/capabilities/000_capabilities.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "6de758ee-a0d2-4b3f-acff-eed419dd17c5", 6 | "metadata": {}, 7 | "source": [ 8 | "# Capabilities" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "5d267032-535b-4b7b-b7d3-d2db8f673af6", 14 | "metadata": {}, 15 | "source": [ 16 | "This section offers an overview of capabilities of TimeGPT" 17 | ] 18 | } 19 | ], 20 | "metadata": { 21 | "kernelspec": { 22 | "display_name": "python3", 23 | "language": "python", 24 | "name": "python3" 25 | } 26 | }, 27 | "nbformat": 4, 28 | "nbformat_minor": 5 29 | } 30 | -------------------------------------------------------------------------------- /nbs/docs/capabilities/forecast/00_forecast.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "6de758ee-a0d2-4b3f-acff-eed419dd17c5", 6 | "metadata": {}, 7 | "source": [ 8 | "# Forecast" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "5d267032-535b-4b7b-b7d3-d2db8f673af6", 14 | "metadata": {}, 15 | "source": [ 16 | "This section shows the capabilities TimeGPT offers for forecasting.\n", 17 | "\n", 18 | "TimeGPT is capable of zero-shot forecasting a wide variety of time series from different domains, thanks to its pretraining on a vast amount of time series data.\n", 19 | "\n", 20 | "Here, you will find recipes for the following tasks:\n", 21 | "\n", 22 | "* [Zero-shot forecasting](https://docs.nixtla.io/docs/capabilities-forecast-quickstart)\n", 23 | "\n", 24 | "* [Forecasting with exogenous variables](https://docs.nixtla.io/docs/capabilities-forecast-add_exogenous_variables)\n", 25 | "\n", 26 | "* [Forecasting with holidays and special dates](https://docs.nixtla.io/docs/capabilities-forecast-add_holidays_and_special_dates)\n", 27 | "\n", 28 | "* [Forecasting with categorical variables](https://docs.nixtla.io/docs/capabilities-forecast-add_categorical_variables)\n", 29 | "\n", 30 | "* [Long-horizon forecasting](https://docs.nixtla.io/docs/capabilities-forecast-long_horizon_forecasting)\n", 31 | "\n", 32 | "* [Forecasting multiple series](https://docs.nixtla.io/docs/capabilities-forecast-multiple_series_forecasting)\n", 33 | "\n", 34 | "* [Fine-tuning TimeGPT](https://docs.nixtla.io/docs/capabilities-forecast-fine_tuning)\n", 35 | "\n", 36 | "* [Fine-tuning with a specific loss function](https://docs.nixtla.io/docs/capabilities-forecast-finetuning_with_a_custom_loss_function)\n", 37 | "\n", 38 | "* [Cross-validation](https://docs.nixtla.io/docs/capabilities-forecast-cross_validation)\n", 39 | "\n", 40 | "* [Adding prediction intervals](https://docs.nixtla.io/docs/capabilities-forecast-predictions_intervals)\n", 41 | "\n", 42 | "* [Dealing with irregular timestamps](https://docs.nixtla.io/docs/capabilities-forecast-irregular_timestamps)" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "id": "ec7b0357", 48 | "metadata": {}, 49 | "source": [] 50 | } 51 | ], 52 | "metadata": { 53 | "kernelspec": { 54 | "display_name": "python3", 55 | "language": "python", 56 | "name": "python3" 57 | } 58 | }, 59 | "nbformat": 4, 60 | "nbformat_minor": 5 61 | } 62 | -------------------------------------------------------------------------------- /nbs/docs/capabilities/forecast/03_holidays_special_dates.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#| hide\n", 10 | "!pip install -Uqq nixtla" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "#| hide \n", 20 | "from nixtla.utils import in_colab" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "#| hide \n", 30 | "IN_COLAB = in_colab()" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "#| hide\n", 40 | "if not IN_COLAB:\n", 41 | " from nixtla.utils import colab_badge\n", 42 | " from dotenv import load_dotenv" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "# Add holidays and special dates\n", 50 | "\n", 51 | "You can create DataFrames specifying holidays for particular countries and specify your own special dates to include them as features for forecasting." 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": {}, 58 | "outputs": [ 59 | { 60 | "data": { 61 | "text/markdown": [ 62 | "[![](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Nixtla/nixtla/blob/main/nbs/docs/capabilities/forecast/03_holidays_special_dates.ipynb)" 63 | ], 64 | "text/plain": [ 65 | "" 66 | ] 67 | }, 68 | "metadata": {}, 69 | "output_type": "display_data" 70 | } 71 | ], 72 | "source": [ 73 | "#| echo: false\n", 74 | "if not IN_COLAB:\n", 75 | " load_dotenv()\n", 76 | " colab_badge('docs/capabilities/forecast/03_holidays_special_dates')" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "import pandas as pd\n", 86 | "from nixtla import NixtlaClient\n", 87 | "from nixtla.date_features import CountryHolidays\n", 88 | "from nixtla.date_features import SpecialDates" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "nixtla_client = NixtlaClient(\n", 98 | " # defaults to os.environ.get(\"NIXTLA_API_KEY\")\n", 99 | " api_key = 'my_api_key_provided_by_nixtla'\n", 100 | ")" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "> 👍 Use an Azure AI endpoint\n", 108 | "> \n", 109 | "> To use an Azure AI endpoint, remember to set also the `base_url` argument:\n", 110 | "> \n", 111 | "> `nixtla_client = NixtlaClient(base_url=\"you azure ai endpoint\", api_key=\"your api_key\")`" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "#| hide\n", 121 | "if not IN_COLAB:\n", 122 | " nixtla_client = NixtlaClient()" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "# Get country holidays for the US\n", 132 | "c_holidays = CountryHolidays(countries=['US'])\n", 133 | "periods = 365 * 1\n", 134 | "dates = pd.date_range(end='2023-09-01', periods=periods)\n", 135 | "holidays_df = c_holidays(dates)\n", 136 | "\n", 137 | "# Specify your own special dates\n", 138 | "special_dates = SpecialDates(\n", 139 | " special_dates={\n", 140 | " 'Important Dates': ['2021-02-26', '2020-02-26'],\n", 141 | " 'Very Important Dates': ['2021-01-26', '2020-01-26', '2019-01-26']\n", 142 | " }\n", 143 | ")\n", 144 | "periods = 365 * 1\n", 145 | "dates = pd.date_range(end='2023-09-01', periods=periods)\n", 146 | "special_dates_df = special_dates(dates)" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "For a detailed guide on using special dates and holidays, read our tutorial on [Holidays and special dates](https://docs.nixtla.io/docs/tutorials-holidays_and_special_dates)." 154 | ] 155 | } 156 | ], 157 | "metadata": { 158 | "kernelspec": { 159 | "display_name": "python3", 160 | "language": "python", 161 | "name": "python3" 162 | } 163 | }, 164 | "nbformat": 4, 165 | "nbformat_minor": 2 166 | } 167 | -------------------------------------------------------------------------------- /nbs/docs/capabilities/historical-anomaly-detection/00_historical_anomaly_detection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "6de758ee-a0d2-4b3f-acff-eed419dd17c5", 6 | "metadata": {}, 7 | "source": [ 8 | "# Historical anomaly detection" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "5d267032-535b-4b7b-b7d3-d2db8f673af6", 14 | "metadata": {}, 15 | "source": [ 16 | "This section provides various recipes for performing historical anomaly detection using TimeGPT.\n", 17 | "\n", 18 | "Historical anomaly detection identifies data points that deviate from the expected behavior over a given historical time series, helping to spot fraudulent activity, security breaches, or significant outliers.\n", 19 | "\n", 20 | "The process involves generating predictions and constructing a 99% confidence interval. Data points falling outside this interval are considered anomalies.\n", 21 | "\n", 22 | "This section covers:\n", 23 | "\n", 24 | "* [Historical anomaly detection](https://docs.nixtla.io/docs/capabilities-historical-anomaly-detection-quickstart)\n", 25 | "\n", 26 | "* [Historical anomaly detection with exogenous features](https://docs.nixtla.io/docs/capabilities-historical-anomaly-detection-add_exogenous_variables)\n", 27 | "\n", 28 | "* [Historical anomaly detection with date features](https://docs.nixtla.io/docs/capabilities-historical-anomaly-detection-add_date_features)\n", 29 | "\n", 30 | "* [Modifying the confidence intervals](https://docs.nixtla.io/docs/capabilities-historical-anomaly-detection-add_confidence_levels)" 31 | ] 32 | } 33 | ], 34 | "metadata": { 35 | "kernelspec": { 36 | "display_name": "python3", 37 | "language": "python", 38 | "name": "python3" 39 | } 40 | }, 41 | "nbformat": 4, 42 | "nbformat_minor": 5 43 | } 44 | -------------------------------------------------------------------------------- /nbs/docs/capabilities/online-anomaly-detection/00_online_anomaly_detection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Online (Real-Time) Anomaly Detection" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Online anomaly detection dynamically identifies anomalies as data streams in, allowing users to specify the number of timestamps to monitor. This method is well-suited for immediate applications, such as fraud detection, live sensor monitoring, or tracking real-time demand changes. By focusing on recent data and continuously generating forecasts, it enables timely responses to anomalies in critical scenarios.\n", 15 | "\n", 16 | "This section provides various recipes for performing real-time anomaly detection using TimeGPT, offering users the ability to detect outliers and unusual patterns as they emerge, ensuring prompt intervention in time-sensitive situations.\n", 17 | "\n", 18 | "This section covers:\n", 19 | "\n", 20 | "* [Online anomaly detection](https://docs.nixtla.io/docs/capabilities-online-anomaly-detection-quickstart)\n", 21 | "\n", 22 | "* [How to adjust the detection process](https://docs.nixtla.io/docs/capabilities-online-anomaly-detection-adjusting_detection_process.ipynb)\n", 23 | "\n", 24 | "* [Univariate vs. multiseries anomaly detection](https://docs.nixtla.io/docs/capabilities-online-anomaly-detection-univariate_vs_multivariate_anomaly_detection)\n" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [] 31 | } 32 | ], 33 | "metadata": {}, 34 | "nbformat": 4, 35 | "nbformat_minor": 2 36 | } 37 | -------------------------------------------------------------------------------- /nbs/docs/deployment/2_azure_ai.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# AzureAI \n", 8 | "\n", 9 | "> The foundational models for time series by Nixtla can be deployed on your Azure subscription. This page explains how to easily get started with TimeGEN-1 deployed as an Azure AI endpoint. If you use the `nixtla` library, it should be a drop-in replacement where you only need to change the client parameters (endpoint URL, API key, model name)." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## Deploying TimeGEN-1\n", 17 | "\n" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "## Using the model\n", 25 | "\n", 26 | "Once your model is deployed and provided that you have the relevant permissions, consuming it will basically be the same process as for a Nixtla endpoint.\n", 27 | "\n", 28 | "To run the examples below, you will need to define the following environment variables:\n", 29 | "\n", 30 | "- `AZURE_AI_NIXTLA_BASE_URL` is your api URL, should be of the form `https://your-endpoint.inference.ai.azure.com/`.\n", 31 | "- `AZURE_AI_NIXTLA_API_KEY` is your authentication key." 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "## How to use" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "Just import the library, set your credentials, and start forecasting in two lines of code!" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": [ 52 | "```bash\n", 53 | "pip install nixtla\n", 54 | "```" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "```python\n", 62 | "import os\n", 63 | "from nixtla import NixtlaClient\n", 64 | "\n", 65 | "base_url = os.environ[\"AZURE_AI_NIXTLA_BASE_URL\"]\n", 66 | "api_key = os.environ[\"AZURE_AI_NIXTLA_API_KEY\"]\n", 67 | "model = \"azureai\"\n", 68 | "\n", 69 | "nixtla_client = NixtlaClient(api_key=api_key, base_url=base_url)\n", 70 | "nixtla_client.forecast(\n", 71 | " ...,\n", 72 | " model=model,\n", 73 | ")\n", 74 | "```" 75 | ] 76 | } 77 | ], 78 | "metadata": { 79 | "kernelspec": { 80 | "display_name": "python3", 81 | "language": "python", 82 | "name": "python3" 83 | } 84 | }, 85 | "nbformat": 4, 86 | "nbformat_minor": 4 87 | } 88 | -------------------------------------------------------------------------------- /nbs/docs/getting-started/41_pricing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# TimeGPT Subscription Plans\n", 8 | "\n", 9 | "We offer various Enterprise plans tailored to your forecasting needs. The number of API calls, number of users, and support levels can be customized based on your needs. We also offer an option for a self-hosted version and a version hosted on Azure.\n", 10 | "\n", 11 | "Please get in touch with us at support@nixtla.io for more information regarding pricing options and to discuss your specific requirements. For organizations interested in exploring our solution further, you can schedule a demo [here]( https://meetings.hubspot.com/cristian-challu/enterprise-contact-us?uuid=dc037f5a-d93b-4[…]90b-a611dd9460af&utm_source=github&utm_medium=pricing_page) " 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "**Free trial available**\n", 19 | "\n", 20 | "When you [create your account](https://dashboard.nixtla.io), you’ll receive a 30-day free trial, no credit card required. After 30 days, access will expire unless you upgrade to a paid plan. Contact us to continue leveraging TimeGPT for accurate and easy to use forecasting!" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "**More information on pricing and billing**\n", 28 | "\n", 29 | "For additional information on pricing and billing please see our FAQ." 30 | ] 31 | } 32 | ], 33 | "metadata": { 34 | "kernelspec": { 35 | "display_name": "python3", 36 | "language": "python", 37 | "name": "python3" 38 | } 39 | }, 40 | "nbformat": 4, 41 | "nbformat_minor": 4 42 | } 43 | -------------------------------------------------------------------------------- /nbs/docs/reference/03_excel_addin.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# TimeGPT Excel Add-in (Beta)\n", 8 | "\n", 9 | "## Installation\n", 10 | "\n", 11 | "Head to the [TimeGTP excel add-in page in Microsoft Appsource](https://appsource.microsoft.com/en-us/product/office/WA200006429?tab=Overview) and click on \"Get it now\"\n", 12 | "\n", 13 | "## Usage\n", 14 | "> 📘 Access token required\n", 15 | "> \n", 16 | "> The TimeGPT Excel Add-in requires an access token. Get your API Key on the [Nixtla Dashboard](http://dashboard.nixtla.io).\n", 17 | "\n", 18 | "## Support\n", 19 | "\n", 20 | "If you have questions or need support, please email `support@nixtla.io`.\n", 21 | "\n", 22 | "## How-to\n", 23 | "\n", 24 | "### Settings\n", 25 | "\n", 26 | "If this is your first time using Excel add-ins, find information on how to add Excel add-ins with your version of Excel. In the Office Add-ins Store, you'll search for \"TimeGPT\". \n", 27 | "\n", 28 | "Once you have installed the TimeGPT add-in, the add-in comes up in a sidebar task pane. \n", 29 | "* Read through the Welcome screen.\n", 30 | "* Click on the **'Get Started'** button.\n", 31 | "* The API URL is already set to: https://api.nixtla.io.\n", 32 | "* Copy your API key from [Nixtla Dashboard](http://dashboard.nixtla.io). Paste it into the box that say **API Key, Bearer**.\n", 33 | "* Click the gray arrow next to that box on the right. \n", 34 | "* You'll get to a screen with options for 'Forecast' and 'Anomaly Detection'.\n", 35 | "\n", 36 | "To access the settings later, click the gear icon in the top left.\n", 37 | "\n", 38 | "### Data Requirements\n", 39 | "\n", 40 | "* Put your dates in one column and your values in another.\n", 41 | "* Ensure your date format is recognized as a valid date by excel.\n", 42 | "* Ensure your values are recognized as valid number by excel.\n", 43 | "* All data inputs must exist in the same worksheet. The add-in does not support forecasting using multiple worksheets.\n", 44 | "* Do not include headers\n", 45 | "\n", 46 | "Example:\n", 47 | "\n", 48 | "| dates | values | \n", 49 | "| :------------- | :----- | \n", 50 | "| 12/1/16 0:00 | 72 | \n", 51 | "| 12/1/16 1:00 | 65.8 | \n", 52 | "| 12/1/16 2:00 | 59.99 | \n", 53 | "| 12/1/16 3:00 | 50.69 | \n", 54 | "| 12/1/16 4:00 | 52.58 | \n", 55 | "| 12/1/16 5:00 | 65.05 | \n", 56 | "| 12/1/16 6:00 | 80.4 | \n", 57 | "| 12/1/16 7:00 | 200 | \n", 58 | "| 12/1/16 8:00 | 200.63 | \n", 59 | "| 12/1/16 9:00 | 155.47 | \n", 60 | "| 12/1/16 10:00 | 150.91 | \n", 61 | "\n", 62 | "#### Forecasting\n", 63 | "\n", 64 | "Once you've configured your token and formatted your input data then you're all ready to forecast!\n", 65 | "\n", 66 | "With the add-in open, configure the forecasting settings by selecting the column for each input.\n", 67 | "\n", 68 | "* **Frequency** - The frequency of the data (hourly / daily / weekly / monthly)\n", 69 | "\n", 70 | "* **Horizon** - The forecasting horizon. This represents the number of time steps into the future that the forecast should predict.\n", 71 | "\n", 72 | "* **Dates Range** - The column and range of the timeseries timestamps. Must not include header data, and should be formatted as a range, e.g. A2:A145. \n", 73 | "\n", 74 | "* **Values Range** - The column and range of the timeseries values for each point in time. Must not include header data, and should be formatted as a range, e.g. B2:B145. \n", 75 | "\n", 76 | "\n", 77 | "\n", 78 | "\n", 79 | "\n", 80 | "When you're ready, click **Make Prediction** to generate the predicted values. The add-in will generate a plot and append the forecasted data to the end of the column of your existing data and highlight them in green. So, scroll to the end of your data to see the predicted values. \n", 81 | "\n", 82 | "\n", 83 | "\n", 84 | "#### Anomaly Detection\n", 85 | "\n", 86 | "The requirements are the same as for the forecasting functionality, so if you already tried it you are ready to run the anomaly detection one. Go to the main page in the add-in and select \"Anomaly Detection\", then choose your dates and values cell ranges and click on submit. We'll run the model and mark the anomalies cells in yellow while adding a third column for expected values with a green background.\n", 87 | "\n", 88 | "\n", 89 | "\n", 90 | "\n" 91 | ] 92 | } 93 | ], 94 | "metadata": { 95 | "kernelspec": { 96 | "display_name": "python3", 97 | "language": "python", 98 | "name": "python3" 99 | } 100 | }, 101 | "nbformat": 4, 102 | "nbformat_minor": 2 103 | } 104 | -------------------------------------------------------------------------------- /nbs/docs/reference/04_nixtlar.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# TimeGPT in R\n", 8 | "\n", 9 | "TimeGPT is also available in R through the `nixtlar` package, which is available on CRAN. This package can be used in a way almost identical to its Python counterpart. It offers nearly the same functionalities, with missing features and documentation currently under development. Originally developed in Python, TimeGPT is now accessible to the R community through `nixtlar`, providing access to the first foundation model for time series forecasting and embracing our core philosophy that _the future is for everybody_." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "\"Logo" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "## How to use \n", 24 | "\n", 25 | "To learn how to use `nixtlar`, please refer to the [documentation](https://nixtla.github.io/nixtlar/). \n", 26 | "\n", 27 | "To view directly on CRAN, please use this [link](https://cloud.r-project.org/web/packages/nixtlar/index.html). " 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "> 📘 API key required\n", 35 | "> \n", 36 | "> The `nixtlar` package requires an API key. Get yours on the [Nixtla Dashboard](http://dashboard.nixtla.io).\n", 37 | "\n", 38 | "## Support\n", 39 | "\n", 40 | "If you have questions or need support, please email `support@nixtla.io`." 41 | ] 42 | } 43 | ], 44 | "metadata": {}, 45 | "nbformat": 4, 46 | "nbformat_minor": 2 47 | } 48 | -------------------------------------------------------------------------------- /nbs/docs/tutorials/050_training.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "6de758ee-a0d2-4b3f-acff-eed419dd17c5", 6 | "metadata": {}, 7 | "source": [ 8 | "# Training\n", 9 | "\n", 10 | "This section offers tutorials related to training `TimeGPT` under specific conditions.\n", 11 | "\n", 12 | "### What You Will Learn\n", 13 | "\n", 14 | "1. **[Long Horizon Forecasting](https://docs.nixtla.io/docs/tutorials-long_horizon_forecasting)**\n", 15 | "\n", 16 | " - Discover how make predictions beyond two seasonal periods or even further into the future, using `TimeGPT`'s specialized model for long horizon forecasting.\n", 17 | "\n", 18 | "2. **[Multiple Series Forecasting](https://docs.nixtla.io/docs/tutorials-multiple_series_forecasting)**\n", 19 | "\n", 20 | " - Learn how to use `TimeGPT` to forecast multiple time series simultaneously." 21 | ] 22 | } 23 | ], 24 | "metadata": { 25 | "kernelspec": { 26 | "display_name": "python3", 27 | "language": "python", 28 | "name": "python3" 29 | } 30 | }, 31 | "nbformat": 4, 32 | "nbformat_minor": 5 33 | } 34 | -------------------------------------------------------------------------------- /nbs/docs/tutorials/080_validation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "6de758ee-a0d2-4b3f-acff-eed419dd17c5", 6 | "metadata": {}, 7 | "source": [ 8 | "# Validation" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "5d267032-535b-4b7b-b7d3-d2db8f673af6", 14 | "metadata": {}, 15 | "source": [ 16 | "One of the primary challenges in time series forecasting is the inherent uncertainty and variability over time, making it crucial to validate the accuracy and reliability of the models employed. `TimeGPT` offers the possibility for cross-validation and historical forecasts to help you validate your predictions.\n", 17 | "\n", 18 | "### What You Will Learn\n", 19 | "\n", 20 | "1. **[Cross-Validation](https://docs.nixtla.io/docs/tutorials-cross_validation)**\n", 21 | "\n", 22 | " - Learn how to perform time series cross-validation across different continuous windows of your data. \n", 23 | "\n", 24 | "2. **[Historical Forecasts](https://docs.nixtla.io/docs/tutorials-historical_forecast)**\n", 25 | "\n", 26 | " - Generate in-sample forecasts to validate how `TimeGPT` would have performed in the past, providing insights into the model's accuracy. \n" 27 | ] 28 | } 29 | ], 30 | "metadata": { 31 | "kernelspec": { 32 | "display_name": "python3", 33 | "language": "python", 34 | "name": "python3" 35 | } 36 | }, 37 | "nbformat": 4, 38 | "nbformat_minor": 5 39 | } 40 | -------------------------------------------------------------------------------- /nbs/docs/tutorials/100_uncertainty_quantification.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "6de758ee-a0d2-4b3f-acff-eed419dd17c5", 6 | "metadata": {}, 7 | "source": [ 8 | "# Uncertainty quantification\n", 9 | "\n", 10 | "In forecasting, it is essential to consider the full distribution of predictions rather than only a point prediction. This approach allows for a better understanding of the uncertainty surrounding the forecast. `TimeGPT` supports uncertainty quantification through quantile forecasts and prediction intervals.\n", 11 | "\n", 12 | "### What You Will Learn\n", 13 | "\n", 14 | "1. **[Quantile Forecasts](https://docs.nixtla.io/docs/tutorials-quantile_forecasts)**\n", 15 | "\n", 16 | " - Learn how to compute specific quantiles of the forecast distribution using `TimeGPT`. \n", 17 | "\n", 18 | "2. **[Prediction Intervals](https://docs.nixtla.io/docs/tutorials-prediction_intervals)**\n", 19 | "\n", 20 | " - Learn how to generate prediction intervals with `TimeGPT`, which give you a range of values that the forecast can take with a given probability. \n" 21 | ] 22 | } 23 | ], 24 | "metadata": { 25 | "kernelspec": { 26 | "display_name": "python3", 27 | "language": "python", 28 | "name": "python3" 29 | } 30 | }, 31 | "nbformat": 4, 32 | "nbformat_minor": 5 33 | } 34 | -------------------------------------------------------------------------------- /nbs/docs/tutorials/120_special_topics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "6de758ee-a0d2-4b3f-acff-eed419dd17c5", 6 | "metadata": {}, 7 | "source": [ 8 | "# Special topics" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "5d267032-535b-4b7b-b7d3-d2db8f673af6", 14 | "metadata": {}, 15 | "source": [ 16 | "`TimeGPT` is a robust foundation model for time series forecasting, with advanced capabilities such as hierarchical and bounded forecasts. To fully leverage the power of `TimeGPT`, there are specific situations that require special consideration, such as dealing with irregular timestamps or handling datasets with missing values.\n", 17 | "\n", 18 | "In this section, we will cover these special topics.\n", 19 | "\n", 20 | "### What You Will Learn\n", 21 | "\n", 22 | "1. **[Irregular Timestamps](https://docs.nixtla.io/docs/capabilities-forecast-irregular_timestamps)**\n", 23 | "\n", 24 | " - Learn how to deal with irregular timestamps for correct usage of `TimeGPT`.\n", 25 | "\n", 26 | "2. **[Bounded Forecasts](https://docs.nixtla.io/docs/tutorials-bounded_forecasts)**\n", 27 | "\n", 28 | " - Explore `TimeGPT`'s capability to make forecasts within a specified range, ideal for applications where outcomes are bounded.\n", 29 | "\n", 30 | "3. **[Hierarchical Forecasts](https://docs.nixtla.io/docs/tutorials-hierarchical_forecasting)**\n", 31 | "\n", 32 | " - Understand how to use `TimeGPT` to make coherent predictions at various levels of aggregation.\n", 33 | "\n", 34 | "4. **[Missing Values](https://docs.nixtla.io/docs/tutorials-missing_values)**\n", 35 | "\n", 36 | " - Learn how to address missing values within your time series data effectively using `TimeGPT`.\n", 37 | "\n", 38 | "5. **[Improve Forecast Accuracy](https://docs.nixtla.io/docs/tutorials-improve_forecast_accuracy_with_timegpt)**\n", 39 | "\n", 40 | " - Discover multiple techniques to boost forecast accuracy when working with `TimeGPT`." 41 | ] 42 | } 43 | ], 44 | "metadata": { 45 | "kernelspec": { 46 | "display_name": "python3", 47 | "language": "python", 48 | "name": "python3" 49 | } 50 | }, 51 | "nbformat": 4, 52 | "nbformat_minor": 5 53 | } 54 | -------------------------------------------------------------------------------- /nbs/favicon_png.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/favicon_png.png -------------------------------------------------------------------------------- /nbs/img/ApiRefScreen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/ApiRefScreen.png -------------------------------------------------------------------------------- /nbs/img/anomaly.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/anomaly.png -------------------------------------------------------------------------------- /nbs/img/api_key_process.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/api_key_process.png -------------------------------------------------------------------------------- /nbs/img/australia_hierarchy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/australia_hierarchy.png -------------------------------------------------------------------------------- /nbs/img/australia_tourism.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/australia_tourism.png -------------------------------------------------------------------------------- /nbs/img/azure-deploy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/azure-deploy.png -------------------------------------------------------------------------------- /nbs/img/azure-endpoint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/azure-endpoint.png -------------------------------------------------------------------------------- /nbs/img/azure-model-catalog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/azure-model-catalog.png -------------------------------------------------------------------------------- /nbs/img/dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/dashboard.png -------------------------------------------------------------------------------- /nbs/img/forecast.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/forecast.png -------------------------------------------------------------------------------- /nbs/img/forecast_readme.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/forecast_readme.png -------------------------------------------------------------------------------- /nbs/img/logo_nixtlar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/logo_nixtlar.png -------------------------------------------------------------------------------- /nbs/img/results.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/results.jpg -------------------------------------------------------------------------------- /nbs/img/timegpt-arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/timegpt-arch.png -------------------------------------------------------------------------------- /nbs/img/timegpt_archi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/timegpt_archi.png -------------------------------------------------------------------------------- /nbs/img/timeseries_model_arena.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nbs/img/timeseries_model_arena.png -------------------------------------------------------------------------------- /nbs/nbdev.yml: -------------------------------------------------------------------------------- 1 | project: 2 | output-dir: _docs 3 | 4 | website: 5 | title: "nixtla" 6 | site-url: "https://Nixtla.github.io/nixtla/" 7 | description: "Python SDK for Nixtla API (TimeGPT)" 8 | repo-branch: main 9 | repo-url: "https://github.com/Nixtla/nixtla/" 10 | -------------------------------------------------------------------------------- /nbs/sidebar.yml: -------------------------------------------------------------------------------- 1 | website: 2 | reader-mode: false 3 | sidebar: 4 | collapse-level: 1 5 | contents: 6 | - text: "--" 7 | - section: "Getting Started" 8 | contents: docs/getting-started/* 9 | - section: "Capabilities" 10 | contents: docs/capabilities/* 11 | - section: "Deployment" 12 | contents: docs/deployment/* 13 | - section: "Tutorials" 14 | contents: docs/tutorials/* 15 | - section: "Use cases" 16 | contents: docs/use-cases/* 17 | - section: "API Reference" 18 | contents: 19 | - nixtla_client.ipynb 20 | - date_features.ipynb -------------------------------------------------------------------------------- /nbs/src/utils.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Utils" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "#| default_exp utils" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "#| hide \n", 26 | "%load_ext autoreload\n", 27 | "%autoreload 2" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "#| export\n", 37 | "import sys" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "#| export\n", 47 | "def colab_badge(path: str):\n", 48 | " from IPython.display import Markdown, display\n", 49 | " base_url = \"https://colab.research.google.com/github\"\n", 50 | " badge_svg = \"https://colab.research.google.com/assets/colab-badge.svg\"\n", 51 | " nb_url = f'{base_url}/Nixtla/nixtla/blob/main/nbs/{path}.ipynb'\n", 52 | " badge_md = f\"[![]({badge_svg})]({nb_url})\"\n", 53 | " display(Markdown(badge_md))" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "#| export\n", 63 | "def in_colab():\n", 64 | " return 'google.colab' in sys.modules" 65 | ] 66 | } 67 | ], 68 | "metadata": { 69 | "kernelspec": { 70 | "display_name": "python3", 71 | "language": "python", 72 | "name": "python3" 73 | } 74 | }, 75 | "nbformat": 4, 76 | "nbformat_minor": 4 77 | } 78 | -------------------------------------------------------------------------------- /nixtla/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.6.7.dev3" 2 | __all__ = ["NixtlaClient"] 3 | from .nixtla_client import NixtlaClient 4 | -------------------------------------------------------------------------------- /nixtla/date_features.py: -------------------------------------------------------------------------------- 1 | # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/src/date_features.ipynb. 2 | 3 | # %% auto 0 4 | __all__ = ['CountryHolidays', 'SpecialDates'] 5 | 6 | # %% ../nbs/src/date_features.ipynb 4 7 | import pandas as pd 8 | 9 | # %% ../nbs/src/date_features.ipynb 6 10 | def _transform_dict_holidays(dict_holidays_dates): 11 | dict_holidays = {} 12 | for key, value in dict_holidays_dates.items(): 13 | if value not in dict_holidays: 14 | dict_holidays[value] = [] 15 | dict_holidays[value].append(key) 16 | return dict_holidays 17 | 18 | # %% ../nbs/src/date_features.ipynb 7 19 | def _get_holidays_df(dates, categories, holiday_extractor, supported_categories): 20 | years = dates.year.unique().tolist() 21 | total_holidays = dict() 22 | for cat in categories: 23 | if cat not in supported_categories: 24 | raise Exception(f"Holidays for {cat} not available, please remove it.") 25 | dict_holidays = _transform_dict_holidays(holiday_extractor(cat, years=years)) 26 | for key, val in dict_holidays.items(): 27 | total_holidays[f"{cat}_{key}"] = [int(ds.date() in val) for ds in dates] 28 | return pd.DataFrame(total_holidays, index=dates) 29 | 30 | # %% ../nbs/src/date_features.ipynb 8 31 | class CountryHolidays: 32 | """Given a list of countries, returns a dataframe with holidays for each country.""" 33 | 34 | def __init__(self, countries: list[str]): 35 | self.countries = countries 36 | 37 | def __call__(self, dates: pd.DatetimeIndex): 38 | try: 39 | from holidays.utils import country_holidays 40 | from holidays.utils import list_supported_countries 41 | except ModuleNotFoundError: 42 | raise Exception( 43 | "You have to install additional libraries to use holidays, " 44 | 'please install them using `pip install "nixtla[date_extras]"`' 45 | ) 46 | return _get_holidays_df( 47 | dates, self.countries, country_holidays, list_supported_countries() 48 | ) 49 | 50 | def __name__(self): 51 | return "CountryHolidays" 52 | 53 | # %% ../nbs/src/date_features.ipynb 12 54 | class SpecialDates: 55 | """Given a dictionary of categories and dates, returns a dataframe with the special dates.""" 56 | 57 | def __init__(self, special_dates: dict[str, list[str]]): 58 | self.special_dates = special_dates 59 | 60 | def __call__(self, dates: pd.DatetimeIndex): 61 | total_special_dates = dict() 62 | for key, val in self.special_dates.items(): 63 | date_vals = [ds.date() for ds in pd.to_datetime(val)] 64 | total_special_dates[key] = [int(ds.date() in date_vals) for ds in dates] 65 | return pd.DataFrame(total_special_dates, index=dates) 66 | 67 | def __name__(self): 68 | return "SpecialDates" 69 | -------------------------------------------------------------------------------- /nixtla/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nixtla/nixtla/dd497bc109e89fe2cdd9e56eae5c847b1d35e9b6/nixtla/py.typed -------------------------------------------------------------------------------- /nixtla/utils.py: -------------------------------------------------------------------------------- 1 | # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/src/utils.ipynb. 2 | 3 | # %% auto 0 4 | __all__ = ['colab_badge', 'in_colab'] 5 | 6 | # %% ../nbs/src/utils.ipynb 3 7 | import sys 8 | 9 | # %% ../nbs/src/utils.ipynb 4 10 | def colab_badge(path: str): 11 | from IPython.display import Markdown, display 12 | 13 | base_url = "https://colab.research.google.com/github" 14 | badge_svg = "https://colab.research.google.com/assets/colab-badge.svg" 15 | nb_url = f"{base_url}/Nixtla/nixtla/blob/main/nbs/{path}.ipynb" 16 | badge_md = f"[![]({badge_svg})]({nb_url})" 17 | display(Markdown(badge_md)) 18 | 19 | # %% ../nbs/src/utils.ipynb 5 20 | def in_colab(): 21 | return "google.colab" in sys.modules 22 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.ruff.lint] 2 | select = [ 3 | "F", # pyflakes 4 | ] 5 | -------------------------------------------------------------------------------- /settings.ini: -------------------------------------------------------------------------------- 1 | [DEFAULT] 2 | host = github 3 | lib_name = nixtla 4 | user = Nixtla 5 | description = Python SDK for Nixtla API (TimeGPT) 6 | keywords = time-series forecasting gpt 7 | author = Nixtla 8 | author_email = business@nixtla.io 9 | copyright = Nixtla Inc. 10 | branch = main 11 | version = 0.6.7.dev3 12 | min_python = 3.9 13 | audience = Developers 14 | language = English 15 | custom_sidebar = True 16 | license = apache2 17 | status = 4 18 | requirements = annotated-types httpx[zstd] orjson pandas pydantic>=1.10 tenacity tqdm utilsforecast>=0.2.8 19 | dev_requirements = black datasetsforecast fire hierarchicalforecast ipython<=8.32.0 ipywidgets jupyterlab nbdev neuralforecast numpy<2 plotly polars pre-commit pyreadr<0.5.3 python-dotenv pyyaml setuptools<70 statsforecast tabulate 20 | distributed_requirements = fugue[dask,ray,spark]>=0.8.7 dask<=2024.12.1 pandas<2.2 ray<2.6.3 21 | plotting_requirements = utilsforecast[plotting] 22 | date_extra_requirements = holidays 23 | nbs_path = nbs 24 | doc_path = _docs 25 | recursive = True 26 | doc_host = https://Nixtla.github.io 27 | doc_baseurl = /nixtla/ 28 | git_url = https://github.com/Nixtla/nixtla/ 29 | lib_path = nixtla 30 | title = nixtla 31 | black_formatting = True 32 | jupyter_hooks = True 33 | clean_ids = True 34 | readme_nb = nbs/index.ipynb 35 | tst_flags = distributed 36 | allowed_metadata_keys = 37 | allowed_cell_metadata_keys = 38 | clear_all = False 39 | put_version_in_init = True 40 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from pkg_resources import parse_version 2 | from configparser import ConfigParser 3 | import setuptools 4 | assert parse_version(setuptools.__version__)>=parse_version('36.2') 5 | 6 | # note: all settings are in settings.ini; edit there, not here 7 | config = ConfigParser(delimiters=['=']) 8 | config.read('settings.ini') 9 | cfg = config['DEFAULT'] 10 | 11 | cfg_keys = 'version description keywords author author_email'.split() 12 | expected = cfg_keys + "lib_name user branch license status min_python audience language".split() 13 | for o in expected: assert o in cfg, "missing expected setting: {}".format(o) 14 | setup_cfg = {o:cfg[o] for o in cfg_keys} 15 | 16 | licenses = { 17 | 'apache2': ('Apache Software License 2.0','OSI Approved :: Apache Software License'), 18 | 'mit': ('MIT License', 'OSI Approved :: MIT License'), 19 | 'gpl2': ('GNU General Public License v2', 'OSI Approved :: GNU General Public License v2 (GPLv2)'), 20 | 'gpl3': ('GNU General Public License v3', 'OSI Approved :: GNU General Public License v3 (GPLv3)'), 21 | 'bsd3': ('BSD License', 'OSI Approved :: BSD License'), 22 | } 23 | statuses = [ '1 - Planning', '2 - Pre-Alpha', '3 - Alpha', 24 | '4 - Beta', '5 - Production/Stable', '6 - Mature', '7 - Inactive' ] 25 | py_versions = '3.9 3.10 3.11 3.12 3.13'.split() 26 | 27 | requirements = cfg['requirements'].split() 28 | distributed_requirements = cfg['distributed_requirements'].split() 29 | plotting_requirements = cfg['plotting_requirements'].split() 30 | date_extra_requirements = cfg['date_extra_requirements'].split() 31 | dev_requirements = cfg['dev_requirements'].split() 32 | dev_requirements.extend(plotting_requirements) 33 | dev_requirements.extend(date_extra_requirements) 34 | 35 | min_python = cfg['min_python'] 36 | lic = licenses.get(cfg['license'].lower(), (cfg['license'], None)) 37 | 38 | setuptools.setup( 39 | name = cfg['lib_name'], 40 | license = lic[0], 41 | classifiers = [ 42 | 'Development Status :: ' + statuses[int(cfg['status'])], 43 | 'Intended Audience :: ' + cfg['audience'].title(), 44 | 'Natural Language :: ' + cfg['language'].title(), 45 | ] + ['Programming Language :: Python :: '+o for o in py_versions[py_versions.index(min_python):]] + (['License :: ' + lic[1] ] if lic[1] else []), 46 | url = cfg['git_url'], 47 | packages = setuptools.find_packages(exclude=['action_files']), 48 | include_package_data = True, 49 | install_requires = requirements, 50 | extras_require = { 51 | "dev": dev_requirements, 52 | "distributed": distributed_requirements, 53 | "plotting": plotting_requirements, 54 | "date_extras": date_extra_requirements, 55 | }, 56 | dependency_links = cfg.get('dep_links','').split(), 57 | python_requires = '>=' + cfg['min_python'], 58 | long_description = open('README.md', encoding='utf-8').read(), 59 | long_description_content_type = 'text/markdown', 60 | zip_safe = False, 61 | entry_points = { 62 | 'console_scripts': cfg.get('console_scripts','').split(), 63 | 'nbdev': [f'{cfg.get("lib_path")}={cfg.get("lib_path")}._modidx:d'] 64 | }, 65 | **setup_cfg) 66 | --------------------------------------------------------------------------------