The response has been limited to 50k tokens of the smallest files in the repo. You can remove this limitation by removing the max tokens filter.
├── .copyright.tmpl
├── .dockerignore
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    ├── badges
    │   └── README.md
    └── workflows
    │   ├── docs.yml
    │   ├── publish.yml
    │   └── tests.yml
├── .gitignore
├── .pre-commit-config.yaml
├── AUTHORS.md
├── CODEOWNERS
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── SECURITY.md
├── benchmark_anomaly.py
├── benchmark_forecast.py
├── conf
    ├── benchmark_anomaly.json
    └── benchmark_forecast.json
├── data
    ├── example.csv
    ├── iops_competition
    │   └── phase2.zip
    ├── multivariate
    │   ├── energy_power
    │   │   └── est_hourly.csv.gz
    │   ├── seattle_trail
    │   │   └── burke-gilman-trail-north-of-ne-70th-st-bike-and-ped-counter.csv
    │   └── solar_plant
    │   │   └── merged.zip
    ├── smap
    │   └── SMAP.tar.gz
    ├── synthetic_anomaly
    │   ├── horizontal.csv
    │   ├── horizontal_dip_anomaly.csv
    │   ├── horizontal_level_anomaly.csv
    │   ├── horizontal_shock_anomaly.csv
    │   ├── horizontal_spike_anomaly.csv
    │   ├── horizontal_trend_anomaly.csv
    │   ├── seasonal.csv
    │   ├── seasonal_dip_anomaly.csv
    │   ├── seasonal_level_anomaly.csv
    │   ├── seasonal_shock_anomaly.csv
    │   ├── seasonal_spike_anomaly.csv
    │   ├── seasonal_trend_anomaly.csv
    │   ├── upward_downward.csv
    │   ├── upward_downward_dip_anomaly.csv
    │   ├── upward_downward_level_anomaly.csv
    │   ├── upward_downward_shock_anomaly.csv
    │   ├── upward_downward_spike_anomaly.csv
    │   └── upward_downward_trend_anomaly.csv
    ├── test_transform.pkl
    └── walmart
    │   ├── walmart_mini.csv
    │   └── walmart_mini_error.csv
├── docker
    ├── Dockerfile
    ├── dashboard
    │   └── Dockerfile
    └── spark-on-k8s
    │   └── Dockerfile
├── docs
    ├── README.md
    ├── build_docs.sh
    ├── process_old_docs.py
    ├── requirements.txt
    └── source
    │   ├── _static
    │       └── figures
    │   ├── _templates
    │       ├── autosummary
    │       │   └── module.rst
    │       └── versions.html
    │   ├── architecture.rst
    │   ├── conf.py
    │   ├── index.rst
    │   ├── merlion.dashboard.rst
    │   ├── merlion.evaluate.rst
    │   ├── merlion.models.anomaly.change_point.rst
    │   ├── merlion.models.anomaly.forecast_based.rst
    │   ├── merlion.models.anomaly.rst
    │   ├── merlion.models.automl.rst
    │   ├── merlion.models.ensemble.rst
    │   ├── merlion.models.forecast.rst
    │   ├── merlion.models.rst
    │   ├── merlion.models.utils.rst
    │   ├── merlion.plot.rst
    │   ├── merlion.post_process.rst
    │   ├── merlion.rst
    │   ├── merlion.spark.rst
    │   ├── merlion.transform.rst
    │   ├── merlion.utils.rst
    │   ├── ts_datasets.anomaly.rst
    │   ├── ts_datasets.forecast.rst
    │   ├── ts_datasets.rst
    │   ├── tutorials
    │   └── tutorials.rst
├── examples
    ├── CustomDataset.ipynb
    ├── README.md
    ├── TimeSeries.ipynb
    ├── advanced
    │   ├── 1_AutoSARIMA_forecasting_tutorial.ipynb
    │   └── 2_ForecastInvertPOC.ipynb
    ├── anomaly
    │   ├── 0_AnomalyIntro.ipynb
    │   ├── 1_AnomalyFeatures.ipynb
    │   ├── 2_AnomalyMultivariate.ipynb
    │   └── 3_AnomalyNewModel.ipynb
    ├── forecast
    │   ├── 0_ForecastIntro.ipynb
    │   ├── 1_ForecastFeatures.ipynb
    │   ├── 2_ForecastMultivariate.ipynb
    │   ├── 3_ForecastExogenous.ipynb
    │   └── 4_ForecastNewModel.ipynb
    └── misc
    │   └── generate_synthetic_tsad_dataset.py
├── figures
    ├── anom_example.png
    ├── dashboard_anomaly.png
    ├── dashboard_file.png
    ├── dashboard_forecast.png
    └── forecast_example.png
├── k8s-spec
    ├── anomaly.yml
    └── forecast.yml
├── merlion
    ├── dashboard
    │   ├── __init__.py
    │   ├── __main__.py
    │   ├── assets
    │   │   ├── Acumin-BdPro.otf
    │   │   ├── base.css
    │   │   ├── fonts
    │   │   │   ├── SalesforceSans-Bold.woff
    │   │   │   ├── SalesforceSans-BoldItalic.woff
    │   │   │   ├── SalesforceSans-Italic.woff
    │   │   │   ├── SalesforceSans-Light.woff
    │   │   │   ├── SalesforceSans-LightItalic.woff
    │   │   │   ├── SalesforceSans-Regular.woff
    │   │   │   ├── SalesforceSans-Thin.woff
    │   │   │   └── SalesforceSans-ThinItalic.woff
    │   │   ├── merlion.css
    │   │   ├── merlion_small.svg
    │   │   ├── modal.css
    │   │   ├── resizing.js
    │   │   ├── styles.css
    │   │   └── upload.svg
    │   ├── callbacks
    │   │   ├── __init__.py
    │   │   ├── anomaly.py
    │   │   ├── data.py
    │   │   └── forecast.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── anomaly.py
    │   │   ├── data.py
    │   │   ├── forecast.py
    │   │   └── utils.py
    │   ├── pages
    │   │   ├── __init__.py
    │   │   ├── anomaly.py
    │   │   ├── data.py
    │   │   ├── forecast.py
    │   │   └── utils.py
    │   ├── server.py
    │   ├── settings.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── file_manager.py
    │   │   ├── layout.py
    │   │   ├── log.py
    │   │   └── plot.py
    ├── evaluate
    │   ├── anomaly.py
    │   ├── base.py
    │   └── forecast.py
    ├── models
    │   ├── anomaly
    │   │   ├── __init__.py
    │   │   ├── autoencoder.py
    │   │   ├── base.py
    │   │   ├── change_point
    │   │   │   ├── __init__.py
    │   │   │   └── bocpd.py
    │   │   ├── dagmm.py
    │   │   ├── dbl.py
    │   │   ├── deep_point_anomaly_detector.py
    │   │   ├── forecast_based
    │   │   │   ├── __init__.py
    │   │   │   ├── arima.py
    │   │   │   ├── base.py
    │   │   │   ├── ets.py
    │   │   │   ├── mses.py
    │   │   │   ├── prophet.py
    │   │   │   └── sarima.py
    │   │   ├── isolation_forest.py
    │   │   ├── lof.py
    │   │   ├── lstm_ed.py
    │   │   ├── random_cut_forest.py
    │   │   ├── spectral_residual.py
    │   │   ├── stat_threshold.py
    │   │   ├── vae.py
    │   │   ├── windstats.py
    │   │   ├── windstats_monthly.py
    │   │   ├── windstats_run.py
    │   │   └── zms.py
    │   ├── automl
    │   │   ├── __init__.py
    │   │   ├── autoets.py
    │   │   ├── autoprophet.py
    │   │   ├── autosarima.py
    │   │   ├── base.py
    │   │   ├── search.py
    │   │   └── seasonality.py
    │   ├── base.py
    │   ├── deep_base.py
    │   ├── defaults.py
    │   ├── ensemble
    │   │   ├── __init__.py
    │   │   ├── anomaly.py
    │   │   ├── base.py
    │   │   ├── combine.py
    │   │   └── forecast.py
    │   ├── factory.py
    │   ├── forecast
    │   │   ├── __init__.py
    │   │   ├── arima.py
    │   │   ├── autoformer.py
    │   │   ├── base.py
    │   │   ├── deep_ar.py
    │   │   ├── deep_base.py
    │   │   ├── ets.py
    │   │   ├── etsformer.py
    │   │   ├── informer.py
    │   │   ├── prophet.py
    │   │   ├── sarima.py
    │   │   ├── sklearn_base.py
    │   │   ├── smoother.py
    │   │   ├── transformer.py
    │   │   ├── trees.py
    │   │   └── vector_ar.py
    │   ├── layers.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── autosarima_utils.py
    │   │   ├── early_stopping.py
    │   │   ├── nn_modules
    │   │       ├── __init__.py
    │   │       ├── blocks.py
    │   │       ├── embed.py
    │   │       ├── enc_dec_autoformer.py
    │   │       ├── enc_dec_etsformer.py
    │   │       ├── enc_dec_transformer.py
    │   │       └── layers.py
    │   │   ├── rolling_window_dataset.py
    │   │   └── time_features.py
    ├── plot.py
    ├── post_process
    │   ├── base.py
    │   ├── calibrate.py
    │   ├── factory.py
    │   ├── sequence.py
    │   └── threshold.py
    ├── resources
    │   ├── gson-2.8.9.jar
    │   ├── randomcutforest-core-1.0.jar
    │   └── randomcutforest-serialization-json-1.0.jar
    ├── spark
    │   ├── dataset.py
    │   └── pandas_udf.py
    ├── transform
    │   ├── anomalize.py
    │   ├── base.py
    │   ├── bound.py
    │   ├── factory.py
    │   ├── moving_average.py
    │   ├── normalize.py
    │   ├── resample.py
    │   └── sequence.py
    └── utils
    │   ├── __init__.py
    │   ├── conj_priors.py
    │   ├── data_io.py
    │   ├── hts.py
    │   ├── istat.py
    │   ├── misc.py
    │   ├── resample.py
    │   ├── time_series.py
    │   └── ts_generator.py
├── merlion_logo.svg
├── pytest.ini
├── setup.py
├── spark_apps
    ├── anomaly.py
    └── forecast.py
├── tests
    ├── anomaly
    │   ├── __init__.py
    │   ├── forecast_based
    │   │   ├── __init__.py
    │   │   ├── test_arima.py
    │   │   ├── test_mses.py
    │   │   ├── test_prophet.py
    │   │   └── test_sarima.py
    │   ├── multivariate
    │   │   ├── test_autoencoder.py
    │   │   ├── test_dagmm.py
    │   │   ├── test_lstmed.py
    │   │   └── test_vae.py
    │   ├── test_anom_ensemble.py
    │   ├── test_dbl.py
    │   ├── test_default.py
    │   ├── test_dpad.py
    │   ├── test_isolation_forest.py
    │   ├── test_lof.py
    │   ├── test_random_cut_forest.py
    │   ├── test_spectral_residual.py
    │   ├── test_stat_threshold.py
    │   ├── test_windstats.py
    │   └── test_zms.py
    ├── change_point
    │   ├── __init__.py
    │   ├── test_bocpd.py
    │   └── test_conj_prior.py
    ├── evaluate
    │   ├── __init__.py
    │   ├── test_eval_anomaly.py
    │   └── test_eval_forecast.py
    ├── forecast
    │   ├── __init__.py
    │   ├── test_autoets.py
    │   ├── test_autosarima.py
    │   ├── test_baggingtrees.py
    │   ├── test_boostingtrees.py
    │   ├── test_deep_model.py
    │   ├── test_default.py
    │   ├── test_ets.py
    │   ├── test_exog.py
    │   ├── test_forecast_ensemble.py
    │   ├── test_istat.py
    │   ├── test_prophet.py
    │   ├── test_smoother.py
    │   └── test_vector_ar.py
    ├── spark
    │   ├── __init__.py
    │   ├── conftest.py
    │   ├── test_anomaly.py
    │   └── test_forecast.py
    ├── test_custom_dataset.py
    ├── test_generator.py
    ├── test_hts.py
    ├── test_plot.py
    └── transform
    │   ├── __init__.py
    │   ├── test_anomalize.py
    │   ├── test_inverse.py
    │   ├── test_moving_average.py
    │   ├── test_resample.py
    │   └── test_sequence.py
└── ts_datasets
    ├── README.md
    ├── setup.py
    └── ts_datasets
        ├── __init__.py
        ├── anomaly
            ├── __init__.py
            ├── base.py
            ├── custom.py
            ├── iops_competition.py
            ├── msl.py
            ├── nab.py
            ├── smap.py
            ├── smd.py
            ├── synthetic.py
            └── ucr.py
        ├── base.py
        └── forecast
            ├── __init__.py
            ├── custom.py
            ├── energy_power.py
            ├── m4.py
            ├── seattle_trail.py
            └── solar_plant.py


/.copyright.tmpl:
--------------------------------------------------------------------------------
1 | Copyright (c) ${years} ${owner}
2 | All rights reserved.
3 | SPDX-License-Identifier: BSD-3-Clause
4 | For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
5 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | # package
 2 | __pycache__
 3 | *.egg-info
 4 | docs
 5 | tmp
 6 | ts_datasets
 7 | # pytest
 8 | .pytest_cache
 9 | .coverage*
10 | htmlcov
11 | # IDE/system
12 | .idea
13 | *.swp
14 | .DS_Store
15 | sandbox
16 | .vscode
17 | Icon?
18 | # build files
19 | docs/build/*
20 | .ipynb_checkpoints
21 | venv/


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: "[BUG]"
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior
15 | 
16 | **Expected behavior**
17 | A clear and concise description of what you expected to happen.
18 | 
19 | **Screenshots**
20 | If applicable, add screenshots to help explain your problem.
21 | 
22 | **Desktop (please complete the following information):**
23 |  - OS: [e.g. Ubuntu 16.04 LTS]
24 |  - Merlion Version [e.g. 1.0.0]
25 | 
26 | **Additional context**
27 | Add any other context about the problem here.
28 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: "[FEATURE REQUEST]"
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/badges/README.md:
--------------------------------------------------------------------------------
1 | Branch for automatically uploading status badges.


--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
 1 | name: docs
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 |   release:
 9 |     types: [ published ]
10 | 
11 | jobs:
12 |   docs:
13 | 
14 |     runs-on: ubuntu-latest
15 | 
16 |     steps:
17 |     - uses: actions/checkout@v3
18 |       with:
19 |         fetch-depth: 0
20 |     - name: Set up Python
21 |       uses: actions/setup-python@v4
22 |       with:
23 |         python-version: '3.10'
24 |     - name: Install dependencies
25 |       run: |
26 |         sudo apt-get update -y
27 |         sudo apt-get install openjdk-11-jre-headless pandoc --fix-missing
28 |         python -m pip install --upgrade pip setuptools wheel
29 |     - name: Build Sphinx docs
30 |       run: |
31 |         docs/build_docs.sh
32 |       timeout-minutes: 10
33 |     - name: Deploy to gh-pages
34 |       uses: peaceiris/actions-gh-pages@v3
35 |       if: ${{ github.ref == 'refs/heads/main' || github.event_name == 'release' }}
36 |       with:
37 |         github_token: ${{ secrets.GITHUB_TOKEN }}
38 |         publish_dir: docs/build/html
39 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish to pip
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [ published ]
 6 | 
 7 | jobs:
 8 |   deploy:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - uses: actions/checkout@v3
12 |       - name: Set up Python
13 |         uses: actions/setup-python@v4
14 |         with:
15 |           python-version: '3.10'
16 |       - name: Install dependencies
17 |         run: |
18 |           python -m pip install --upgrade pip setuptools build
19 |       - name: Build package
20 |         run: |
21 |           python -m build
22 |       - name: Publish package
23 |         uses: pypa/gh-action-pypi-publish@release/v1
24 |         with:
25 |           user: __token__
26 |           password: ${{ secrets.PYPI_API_TOKEN }}
27 |           verbose: true
28 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # package
 2 | __pycache__
 3 | *.egg-info
 4 | tmp
 5 | # pytest
 6 | .pytest_cache
 7 | .coverage*
 8 | htmlcov
 9 | # IDE/system
10 | .idea
11 | *.swp
12 | .DS_Store
13 | sandbox
14 | .vscode
15 | Icon?
16 | # build files
17 | docs/build/*
18 | .ipynb_checkpoints
19 | venv/


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | - repo: https://github.com/psf/black
 3 |   rev: '22.10.0'
 4 |   hooks:
 5 |   - id: black
 6 |     args: ["--line-length", "120"]
 7 | - repo: https://github.com/johann-petrak/licenseheaders.git
 8 |   rev: 'v0.8.8'
 9 |   hooks:
10 |     - id: licenseheaders
11 |       args: ["-t", ".copyright.tmpl", "-cy", "-o", "salesforce.com, inc.",
12 |              "-E", ".py", "-x", "docs/source/conf.py", "-f"]
13 | 


--------------------------------------------------------------------------------
/AUTHORS.md:
--------------------------------------------------------------------------------
 1 | Aadyot Bhatnagar
 2 | Paul Kassianik
 3 | Chenghao Liu
 4 | Tian Lan
 5 | Wenzhuo Yang
 6 | Rowan Cassius
 7 | Doyen Sahoo
 8 | Devansh Arpit
 9 | Sri Subramanian
10 | Gerald Woo
11 | Amrita Saha
12 | Arun Kumar Jagota
13 | Gokulakrishnan Gopalakrishnan
14 | Manpreet Singh
15 | K C Krithika
16 | Sukumar Maddineni
17 | Daeki Cho
18 | Bo Zong
19 | Yingbo Zhou
20 | Caiming Xiong
21 | Silvio Savarese
22 | Steven Hoi
23 | Huan Wang


--------------------------------------------------------------------------------
/CODEOWNERS:
--------------------------------------------------------------------------------
1 | # Comment line immediately above ownership line is reserved for related gus information. Please be careful while editing.
2 | #ECCN:Open Source
3 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2021, Salesforce.com, Inc.
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 5 | 
 6 | * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 7 | 
 8 | * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 9 | 
10 | * Neither the name of Salesforce.com nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
11 | 
12 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include AUTHORS.md CODE_OF_CONDUCT.md LICENSE SECURITY.md
 2 | global-exclude *.py[cod]
 3 | exclude benchmark*.py
 4 | recursive-exclude conf *
 5 | recursive-exclude data *
 6 | recursive-exclude docs *
 7 | recursive-exclude examples *
 8 | recursive-exclude figures *
 9 | recursive-exclude tests *
10 | recursive-exclude ts_datasets *
11 | recursive-exclude venv *
12 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | ## Security
2 | 
3 | Please report any security issue to [security@salesforce.com](mailto:security@salesforce.com)
4 | as soon as it is discovered. This library limits its runtime dependencies in
5 | order to reduce the total cost of ownership as much as can be, but all consumers
6 | should remain vigilant and have their security stakeholders review all third-party
7 | products (3PP) like this one and their dependencies.


--------------------------------------------------------------------------------
/conf/benchmark_forecast.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "ARIMA": {"alias": "Arima"},
  3 |   "Arima": {
  4 |     "config": {
  5 |       "default": {
  6 |         "order": [30, 0, 10]
  7 |       }
  8 |     }
  9 |   },
 10 | 
 11 |   "SARIMA": {"alias": "Sarima"},
 12 |   "Sarima": {
 13 |     "config": {
 14 |       "default": {
 15 |         "order": [15, 1, 5],
 16 |         "seasonal_order": [2, 0, 1, 30]
 17 |       }
 18 |     }
 19 |   },
 20 | 
 21 |   "AutoSARIMA": {"alias": "AutoSarima"},
 22 |   "AutoSarima": {
 23 |     "model_type": "SeasonalityLayer",
 24 |     "config": {
 25 |       "default": {
 26 |         "model": {"name": "AutoSarima"},
 27 |         "periodicity_strategy": "min"
 28 |       }
 29 |     }
 30 |   },
 31 | 
 32 |   "ETS": {
 33 |     "config": {
 34 |       "default": {
 35 |         "damped_trend": true
 36 |       }
 37 |     }
 38 |   },
 39 | 
 40 |   "AutoETS": {
 41 |     "config": {
 42 |       "default": {
 43 |         "damped_trend": true
 44 |       }
 45 |     }
 46 |   },
 47 | 
 48 |   "MSES": {
 49 |     "config": {
 50 |       "default": {
 51 |         "max_forecast_steps": 100
 52 |       }
 53 |     }
 54 |   },
 55 | 
 56 |   "Prophet": {
 57 |     "config": {
 58 |       "default": {
 59 |         "uncertainty_samples": 0
 60 |       }
 61 |     }
 62 |   },
 63 | 
 64 |   "AutoProphet": {
 65 |     "config": {
 66 |       "default": {
 67 |         "uncertainty_samples": 0
 68 |       }
 69 |     }
 70 |   },
 71 | 
 72 |   "Var": {"alias":  "VectorAR"},
 73 |   "VAR": {"alias":  "VectorAR"},
 74 |   "VectorAR" : {
 75 |     "config": {
 76 |       "default": {
 77 |         "target_seq_index": 0,
 78 |         "maxlags": 168,
 79 |         "max_forecast_steps": 3
 80 |       }
 81 |     },
 82 |     "dataset": {}
 83 |   },
 84 |   "RandomForestForecaster" : {
 85 |     "config": {
 86 |       "default": {
 87 |           "target_seq_index": 0,
 88 |           "maxlags": 21,
 89 |           "max_forecast_steps": 3,
 90 |           "n_estimators": 100,
 91 |           "max_depth": 9,
 92 |           "prediction_stride": 1
 93 |       },
 94 |       "dataset": {}
 95 |     }
 96 |   },
 97 |   "ExtraTreesForecaster" : {
 98 |     "config": {
 99 |       "default": {
100 |           "target_seq_index": 0,
101 |           "maxlags": 21,
102 |           "max_forecast_steps": 3,
103 |           "n_estimators": 100,
104 |           "max_depth": 9,
105 |           "prediction_stride": 1
106 |       },
107 |       "dataset": {}
108 |     }
109 |   },
110 |   "LGBMForecaster" : {
111 |     "config": {
112 |       "default": {
113 |           "target_seq_index": 0,
114 |           "maxlags": 21,
115 |           "max_forecast_steps": 3,
116 |           "learning_rate": 0.1,
117 |           "n_estimators": 100,
118 |           "max_depth": 7,
119 |           "prediction_stride": 1
120 |       },
121 |       "dataset": {}
122 |     }
123 |   }
124 | }
125 | 


--------------------------------------------------------------------------------
/data/iops_competition/phase2.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/salesforce/Merlion/085ef8a69e5dcdfb9dcaa394cc21e087cccbb8f0/data/iops_competition/phase2.zip


--------------------------------------------------------------------------------
/data/multivariate/energy_power/est_hourly.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/salesforce/Merlion/085ef8a69e5dcdfb9dcaa394cc21e087cccbb8f0/data/multivariate/energy_power/est_hourly.csv.gz


--------------------------------------------------------------------------------
/data/multivariate/solar_plant/merged.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/salesforce/Merlion/085ef8a69e5dcdfb9dcaa394cc21e087cccbb8f0/data/multivariate/solar_plant/merged.zip


--------------------------------------------------------------------------------
/data/smap/SMAP.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/salesforce/Merlion/085ef8a69e5dcdfb9dcaa394cc21e087cccbb8f0/data/smap/SMAP.tar.gz


--------------------------------------------------------------------------------
/data/test_transform.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/salesforce/Merlion/085ef8a69e5dcdfb9dcaa394cc21e087cccbb8f0/data/test_transform.pkl


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.9-slim
 2 | WORKDIR /opt/Merlion
 3 | # Install Java
 4 | RUN rm -rf /var/lib/apt/lists/* && \
 5 |     apt-get clean && \
 6 |     apt-get update && \
 7 |     apt-get upgrade && \
 8 |     apt-get install -y --no-install-recommends openjdk-11-jre-headless && \
 9 |     rm -rf /var/lib/apt/lists/*
10 | # Install Merlion from source
11 | COPY *.md ./
12 | COPY setup.py ./
13 | COPY merlion merlion
14 | RUN pip install "./"
15 | 


--------------------------------------------------------------------------------
/docker/dashboard/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.9-slim
 2 | WORKDIR /opt/Merlion
 3 | # Install Java
 4 | RUN rm -rf /var/lib/apt/lists/* && \
 5 |     apt-get clean && \
 6 |     apt-get update && \
 7 |     apt-get upgrade && \
 8 |     apt-get install -y --no-install-recommends openjdk-11-jre-headless && \
 9 |     rm -rf /var/lib/apt/lists/*
10 | # Install Merlion from source & set up a gunicorn server
11 | COPY *.md ./
12 | COPY setup.py ./
13 | COPY merlion merlion
14 | RUN pip install gunicorn "./[dashboard]"
15 | CMD gunicorn -b 0.0.0.0:80 merlion.dashboard.server:server
16 | 


--------------------------------------------------------------------------------
/docker/spark-on-k8s/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG spark_uid=185
 2 | FROM gcr.io/spark-operator/spark-py:v3.1.1
 3 | 
 4 | # Change to root user for installation steps
 5 | USER 0
 6 | 
 7 | # Install pyarrow (for spark-sql) and Merlion; get pyspark & py4j from the PYTHONPATH
 8 | ENV PYTHONPATH="${SPARK_HOME}/python/lib/pyspark.zip:${SPARK_HOME}/python/lib/py4j-0.10.9-src.zip:${PYTHONPATH}"
 9 | COPY *.md ./
10 | COPY setup.py ./
11 | COPY merlion merlion
12 | RUN pip install pyarrow "./"
13 | 
14 | # Copy Merlion pyspark apps
15 | COPY spark_apps /opt/spark/apps
16 | COPY data/walmart/walmart_mini.csv .
17 | RUN chmod g+w /opt/spark/apps
18 | USER ${spark_uid}
19 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | To generate documentation using [Sphinx](https://www.sphinx-doc.org/en/master/index.html),  just run the script
2 | [`build_docs.sh`](build_docs.sh). The ``build/html`` directory will be populated with searchable, 
3 | indexed HTML documentation.
4 | 
5 | Note that our documentation also depends on [Pandoc](https://pandoc.org/installing.html) to render Jupyter notebooks.
6 | For Ubuntu, call ``sudo apt-get install pandoc``. For Mac OS, install [Homebrew](https://brew.sh/)
7 | and call ``brew install pandoc``.
8 | 


--------------------------------------------------------------------------------
/docs/process_old_docs.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | """
 8 | Script which removes redirects from the HTML API docs & updates the version matrix on old files.
 9 | """
10 | import os
11 | import re
12 | import shutil
13 | 
14 | from bs4 import BeautifulSoup as bs
15 | from git import Repo
16 | 
17 | 
18 | def create_version_dl(soup, prefix, current_version, all_versions):
19 |     dl = soup.new_tag("dl")
20 |     dt = soup.new_tag("dt")
21 |     dt.string = "Versions"
22 |     dl.append(dt)
23 |     for version in all_versions:
24 |         # Create the href for this version & bold it if it's the current version
25 |         href = soup.new_tag("a", href=f"{prefix}/{version}/index.html")
26 |         href.string = version
27 |         if version == current_version:
28 |             strong = soup.new_tag("strong")
29 |             strong.append(href)
30 |             href = strong
31 |         # Create a list item & add it to the dl
32 |         dd = soup.new_tag("dd")
33 |         dd.append(href)
34 |         dl.append(dd)
35 |     return dl
36 | 
37 | 
38 | def main():
39 |     # Get all the versions
40 |     repo = Repo(search_parent_directories=True)
41 |     versions = sorted([tag.name for tag in repo.tags if re.match("v[0-9].*", tag.name)], reverse=True)
42 |     versions = ["latest", *versions]
43 | 
44 |     dirname = os.path.join(os.path.dirname(os.path.abspath(__file__)), "build", "html")
45 |     for version in os.listdir(dirname):
46 |         # If this isn't a directory containing a numbered version's API docs, delete it
47 |         version_root = os.path.join(dirname, version)
48 |         if version == "latest" or version not in versions:
49 |             shutil.rmtree(version_root) if os.path.isdir(version_root) else os.remove(version_root)
50 |             continue
51 | 
52 |         # Update version matrix in HTML source versioned files
53 |         for subdir, _, files in os.walk(version_root):
54 |             html_files = [os.path.join(subdir, f) for f in files if f.endswith(".html")]
55 | 
56 |             # Determine how far the version root is from the files in this directory
57 |             prefix = ".."
58 |             while subdir and subdir != version_root:
59 |                 subdir = os.path.dirname(subdir)
60 |                 prefix += "/.."
61 | 
62 |             # Create the new description list for the version & write the new file
63 |             for file in html_files:
64 |                 with open(file) as f:
65 |                     soup = bs(f, "html.parser")
66 |                 version_dl = [dl for dl in soup.find_all("dl") if dl.find("dt", string="Versions")]
67 |                 if len(version_dl) == 0:
68 |                     continue
69 |                 version_dl[0].replace_with(create_version_dl(soup, prefix, version, versions))
70 |                 with open(file, "w", encoding="utf-8") as f:
71 |                     f.write(str(soup))
72 | 
73 | 
74 | if __name__ == "__main__":
75 |     main()
76 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
 1 | GitPython
 2 | beautifulsoup4
 3 | ipykernel
 4 | nbsphinx
 5 | pandoc
 6 | docutils==0.16
 7 | sphinx<6
 8 | sphinx_autodoc_typehints
 9 | sphinx_rtd_theme
10 | 


--------------------------------------------------------------------------------
/docs/source/_static/figures:
--------------------------------------------------------------------------------
1 | ../../../figures


--------------------------------------------------------------------------------
/docs/source/_templates/autosummary/module.rst:
--------------------------------------------------------------------------------
 1 | {{ fullname }}
 2 | {{ underline }}
 3 | 
 4 | .. currentmodule:: {{fullname}}
 5 | 
 6 | .. contents::
 7 |     :local:
 8 | 
 9 | .. automodule:: {{fullname}}
10 |    :members:
11 |    :undoc-members:
12 |    :show-inheritance:
13 | 
14 |     Members
15 |     =======
16 | 


--------------------------------------------------------------------------------
/docs/source/_templates/versions.html:
--------------------------------------------------------------------------------
 1 | {% if display_lower_left %}
 2 | {# Add rst-badge after rst-versions for small badge style. #}
 3 |   <div class="rst-versions" data-toggle="rst-versions" role="note" aria-label="versions">
 4 |     <span class="rst-current-version" data-toggle="rst-current-version">
 5 |       <span class="fa fa-book"> Versions</span>
 6 |       {{ current_version }}
 7 |       <span class="fa fa-caret-down"></span>
 8 |     </span>
 9 |     <div class="rst-other-versions">
10 |       {% if versions|length >= 1 %}
11 |       <dl>
12 |         <dt>{{ _('Versions') }}</dt>
13 |         {% for version in versions %}
14 |           {% if version == current_version %} <strong> {% endif %}
15 |           {% set rootdir = "/".join(pathto(root_doc).split("/")[:-1] + [".."]) %}
16 |           <dd><a href="{{ rootdir }}/{{ version }}/index.html">{{ version }}</a></dd>
17 |           {% if version == current_version %} </strong> {% endif %}
18 |         {% endfor %}
19 |       </dl>
20 |       {% endif %}
21 |     </div>
22 |   </div>
23 | {% endif %}
24 |  
25 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | # Configuration file for the Sphinx documentation builder.
  2 | #
  3 | # This file only contains a selection of the most common options. For a full
  4 | # list see the documentation:
  5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
  6 | 
  7 | # -- Path setup --------------------------------------------------------------
  8 | 
  9 | # If extensions (or modules to document with autodoc) are in another directory,
 10 | # add these directories to sys.path here. If the directory is relative to the
 11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 12 | #
 13 | from git import Repo
 14 | import os
 15 | import packaging.version
 16 | import pkg_resources
 17 | import re
 18 | import sys
 19 | 
 20 | sys.path.insert(0, os.path.abspath(".."))
 21 | 
 22 | 
 23 | # -- Project information -----------------------------------------------------
 24 | 
 25 | project = "Merlion"
 26 | copyright = "2021, salesforce.com, inc."
 27 | 
 28 | # The full version, including alpha/beta/rc tags
 29 | release = pkg_resources.get_distribution("salesforce-merlion").version
 30 | 
 31 | default_role = "any"
 32 | 
 33 | 
 34 | # -- General configuration ---------------------------------------------------
 35 | 
 36 | # Add any Sphinx extension module names here, as strings. They can be
 37 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 38 | # ones.
 39 | extensions = [
 40 |     "nbsphinx",
 41 |     "IPython.sphinxext.ipython_console_highlighting",
 42 |     "sphinx.ext.autodoc",
 43 |     "sphinx.ext.autosummary",
 44 |     "sphinx_autodoc_typehints",
 45 | ]
 46 | 
 47 | autoclass_content = "both"  # include both class docstring and __init__
 48 | autodoc_default_options = {
 49 |     # Make sure that any autodoc declarations show the right members
 50 |     "members": True,
 51 |     "undoc-members": True,
 52 |     "inherited-members": False,
 53 |     "show-inheritance": True,
 54 | }
 55 | autodoc_member_order = "bysource"
 56 | autosummary_generate = True  # Make _autosummary files and include them
 57 | 
 58 | # Add any paths that contain templates here, relative to this directory.
 59 | templates_path = ["_templates"]
 60 | 
 61 | 
 62 | # -- Options for HTML output -------------------------------------------------
 63 | 
 64 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 65 | # a list of builtin themes.
 66 | #
 67 | html_theme = "sphinx_rtd_theme"
 68 | 
 69 | html_theme_options = {"navigation_depth": -1}
 70 | 
 71 | # Set up something to display versions, but only do it if the current version is set in the environment.
 72 | if "current_version" in os.environ:
 73 |     current_version = os.environ["current_version"]
 74 |     stable_version = os.environ.get("stable_version", "latest")
 75 |     if current_version == stable_version != "latest":
 76 |         current_version = f"{current_version} (stable)"
 77 |     try:
 78 |         html_context
 79 |     except NameError:
 80 |         html_context = dict()
 81 |     html_context["display_lower_left"] = True
 82 | 
 83 |     repo = Repo(search_parent_directories=True)
 84 |     html_context["current_version"] = current_version
 85 |     html_context["version"] = current_version
 86 |     versions = sorted([tag.name for tag in repo.tags if re.match("v[0-9].*", tag.name)], reverse=True)
 87 |     versions = ["latest", *versions]
 88 |     html_context["versions"] = versions
 89 | 
 90 | else:
 91 |     current_version = "latest"
 92 | 
 93 | # List of patterns, relative to source directory, that match files and
 94 | # directories to ignore when looking for source files.
 95 | # This pattern also affects html_static_path and html_extra_path.
 96 | if current_version == "latest" or packaging.version.parse(current_version) > packaging.version.parse("1.3.0"):
 97 |     exclude_patterns = ["examples"]
 98 | else:
 99 |     exclude_patterns = ["tutorials"]
100 | exclude_patterns += ["**.ipynb_checkpoints"]
101 | 


--------------------------------------------------------------------------------
/docs/source/merlion.evaluate.rst:
--------------------------------------------------------------------------------
 1 | merlion.evaluate package
 2 | ========================
 3 | This sub-package implements utilities and metrics for evaluating the performance
 4 | of time series models on different tasks.
 5 | 
 6 | .. automodule:: merlion.evaluate
 7 |    :members:
 8 |    :undoc-members:
 9 |    :show-inheritance:
10 | 
11 | .. autosummary::
12 |     base
13 |     anomaly
14 |     forecast
15 | 
16 | merlion.evaluate.base
17 | ---------------------
18 | 
19 | .. automodule:: merlion.evaluate.base
20 |    :members:
21 |    :undoc-members:
22 |    :show-inheritance:
23 | 
24 | merlion.evaluate.anomaly
25 | ------------------------
26 | 
27 | .. automodule:: merlion.evaluate.anomaly
28 |    :members:
29 |    :undoc-members:
30 |    :show-inheritance:
31 | 
32 | merlion.evaluate.forecast
33 | -------------------------
34 | 
35 | .. automodule:: merlion.evaluate.forecast
36 |    :members:
37 |    :undoc-members:
38 |    :show-inheritance:
39 | 


--------------------------------------------------------------------------------
/docs/source/merlion.models.anomaly.change_point.rst:
--------------------------------------------------------------------------------
 1 | anomaly.change\_point
 2 | =====================
 3 | 
 4 | .. automodule:: merlion.models.anomaly.change_point
 5 |    :members:
 6 |    :undoc-members:
 7 |    :show-inheritance:
 8 | 
 9 | .. autosummary::
10 |     bocpd
11 | 
12 | anomaly.change\_point.bocpd
13 | ---------------------------
14 | 
15 | .. automodule:: merlion.models.anomaly.change_point.bocpd
16 |    :members:
17 |    :undoc-members:
18 |    :show-inheritance:
19 | 


--------------------------------------------------------------------------------
/docs/source/merlion.models.anomaly.forecast_based.rst:
--------------------------------------------------------------------------------
 1 | anomaly.forecast\_based
 2 | =======================
 3 | 
 4 | .. automodule:: merlion.models.anomaly.forecast_based
 5 |    :members:
 6 |    :undoc-members:
 7 |    :show-inheritance:
 8 | 
 9 | .. autosummary::
10 |     base
11 |     arima
12 |     sarima
13 |     ets
14 |     prophet
15 |     mses
16 | 
17 | anomaly.forecast\_based.base
18 | ----------------------------
19 | 
20 | .. automodule:: merlion.models.anomaly.forecast_based.base
21 |    :members:
22 |    :undoc-members:
23 |    :show-inheritance:
24 | 
25 | anomaly.forecast\_based.arima
26 | -----------------------------
27 | 
28 | .. automodule:: merlion.models.anomaly.forecast_based.arima
29 |    :members:
30 |    :undoc-members:
31 |    :show-inheritance:
32 | 
33 | anomaly.forecast\_based.sarima
34 | ------------------------------
35 | 
36 | .. automodule:: merlion.models.anomaly.forecast_based.sarima
37 |    :members:
38 |    :undoc-members:
39 |    :show-inheritance:
40 | 
41 | anomaly.forecast\_based.ets
42 | ---------------------------
43 | 
44 | .. automodule:: merlion.models.anomaly.forecast_based.ets
45 |    :members:
46 |    :undoc-members:
47 |    :show-inheritance:
48 | 
49 | anomaly.forecast\_based.prophet
50 | -------------------------------
51 | 
52 | .. automodule:: merlion.models.anomaly.forecast_based.prophet
53 |    :members:
54 |    :undoc-members:
55 |    :show-inheritance:
56 | 
57 | anomaly.forecast\_based.mses
58 | ----------------------------
59 | 
60 | .. automodule:: merlion.models.anomaly.forecast_based.mses
61 |    :members:
62 |    :undoc-members:
63 |    :show-inheritance:
64 | 


--------------------------------------------------------------------------------
/docs/source/merlion.models.anomaly.rst:
--------------------------------------------------------------------------------
  1 | anomaly
  2 | =======
  3 | 
  4 | .. automodule:: merlion.models.anomaly
  5 |    :members:
  6 |    :undoc-members:
  7 |    :show-inheritance:
  8 | 
  9 | Base classes
 10 | 
 11 | .. autosummary::
 12 |     base
 13 | 
 14 | Univariate models:
 15 | 
 16 | .. autosummary::
 17 |     dbl
 18 |     windstats
 19 |     spectral_residual
 20 |     stat_threshold
 21 |     zms
 22 | 
 23 | `Multivariate <tutorials/anomaly/2_AnomalyMultivariate>` models:
 24 | 
 25 | .. autosummary::
 26 |     isolation_forest
 27 |     random_cut_forest
 28 |     autoencoder
 29 |     dagmm
 30 |     lstm_ed
 31 |     vae
 32 |     deep_point_anomaly_detector
 33 | 
 34 | Subpackages
 35 | -----------
 36 | 
 37 | .. toctree::
 38 |    :maxdepth: 4
 39 | 
 40 |    merlion.models.anomaly.forecast_based
 41 |    merlion.models.anomaly.change_point
 42 | 
 43 | Base classes
 44 | ------------
 45 | 
 46 | anomaly.base
 47 | ^^^^^^^^^^^^
 48 | .. automodule:: merlion.models.anomaly.base
 49 |    :members:
 50 |    :undoc-members:
 51 |    :show-inheritance:
 52 | 
 53 | Univariate models
 54 | -----------------
 55 | 
 56 | anomaly.dbl
 57 | ^^^^^^^^^^^
 58 | .. automodule:: merlion.models.anomaly.dbl
 59 |    :members:
 60 |    :undoc-members:
 61 |    :show-inheritance:
 62 | 
 63 | anomaly.windstats
 64 | ^^^^^^^^^^^^^^^^^
 65 | .. automodule:: merlion.models.anomaly.windstats
 66 |    :members:
 67 |    :undoc-members:
 68 |    :show-inheritance:
 69 | 
 70 | anomaly.spectral\_residual
 71 | ^^^^^^^^^^^^^^^^^^^^^^^^^^
 72 | .. automodule:: merlion.models.anomaly.spectral_residual
 73 |    :members:
 74 |    :undoc-members:
 75 |    :show-inheritance:
 76 | 
 77 | anomaly.stat\_threshold
 78 | ^^^^^^^^^^^^^^^^^^^^^^^
 79 | .. automodule:: merlion.models.anomaly.stat_threshold
 80 |    :members:
 81 |    :undoc-members:
 82 |    :show-inheritance:
 83 | 
 84 | anomaly.zms
 85 | ^^^^^^^^^^^
 86 | .. automodule:: merlion.models.anomaly.zms
 87 |    :members:
 88 |    :undoc-members:
 89 |    :show-inheritance:
 90 | 
 91 | Multivariate models
 92 | -------------------
 93 | 
 94 | anomaly.isolation\_forest
 95 | ^^^^^^^^^^^^^^^^^^^^^^^^^
 96 | .. automodule:: merlion.models.anomaly.isolation_forest
 97 |    :members:
 98 |    :undoc-members:
 99 |    :show-inheritance:
100 | 
101 | anomaly.random\_cut\_forest
102 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^
103 | .. automodule:: merlion.models.anomaly.random_cut_forest
104 |    :members:
105 |    :undoc-members:
106 |    :show-inheritance:
107 | 
108 | anomaly.autoencoder
109 | ^^^^^^^^^^^^^^^^^^^
110 | .. automodule:: merlion.models.anomaly.autoencoder
111 |    :members:
112 |    :undoc-members:
113 |    :show-inheritance:
114 | 
115 | anomaly.vae
116 | ^^^^^^^^^^^
117 | .. automodule:: merlion.models.anomaly.vae
118 |    :members:
119 |    :undoc-members:
120 |    :show-inheritance:
121 | 
122 | anomaly.dagmm
123 | ^^^^^^^^^^^^^
124 | .. automodule:: merlion.models.anomaly.dagmm
125 |    :members:
126 |    :undoc-members:
127 |    :show-inheritance:
128 | 
129 | anomaly.lstm_ed
130 | ^^^^^^^^^^^^^^^
131 | .. automodule:: merlion.models.anomaly.lstm_ed
132 |    :members:
133 |    :undoc-members:
134 |    :show-inheritance:
135 | 
136 | anomaly.lof
137 | ^^^^^^^^^^^^^^^
138 | .. automodule:: merlion.models.anomaly.lof
139 |    :members:
140 |    :undoc-members:
141 |    :show-inheritance:
142 | 
143 | anomaly.deep\_point\_anomaly\_detector
144 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
145 | .. automodule:: merlion.models.anomaly.deep_point_anomaly_detector
146 |    :members:
147 |    :undoc-members:
148 |    :show-inheritance:
149 | 


--------------------------------------------------------------------------------
/docs/source/merlion.models.automl.rst:
--------------------------------------------------------------------------------
 1 | automl
 2 | ======
 3 | 
 4 | .. automodule:: merlion.models.automl
 5 |    :members:
 6 |    :undoc-members:
 7 |    :show-inheritance:
 8 | 
 9 | Base classes:
10 | 
11 | .. autosummary::
12 |     base
13 | 
14 | Models:
15 | 
16 | .. autosummary::
17 |     autoets
18 |     autoprophet
19 |     autosarima
20 | 
21 | Utilities:
22 | 
23 | .. autosummary::
24 |     seasonality
25 |     search
26 | 
27 | Base classes
28 | ------------
29 | automl.base
30 | ^^^^^^^^^^^
31 | .. automodule:: merlion.models.automl.base
32 |    :members:
33 |    :undoc-members:
34 |    :show-inheritance:
35 | 
36 | Models
37 | ------
38 | automl.autoets
39 | ^^^^^^^^^^^^^^
40 | .. automodule:: merlion.models.automl.autoets
41 |    :members:
42 |    :undoc-members:
43 |    :show-inheritance:
44 | 
45 | automl.autoprophet
46 | ^^^^^^^^^^^^^^^^^^
47 | .. automodule:: merlion.models.automl.autoprophet
48 |    :members:
49 |    :undoc-members:
50 |    :show-inheritance:
51 | 
52 | automl.autosarima
53 | ^^^^^^^^^^^^^^^^^
54 | .. automodule:: merlion.models.automl.autosarima
55 |    :members:
56 |    :undoc-members:
57 |    :show-inheritance:
58 | 
59 | 
60 | Utilities
61 | ---------
62 | 
63 | automl.seasonality
64 | ^^^^^^^^^^^^^^^^^^
65 | .. automodule:: merlion.models.automl.seasonality
66 |    :members:
67 |    :undoc-members:
68 |    :show-inheritance:
69 | 
70 | automl.search
71 | ^^^^^^^^^^^^^
72 | .. automodule:: merlion.models.automl.search
73 |    :members:
74 |    :undoc-members:
75 |    :show-inheritance:
76 | 


--------------------------------------------------------------------------------
/docs/source/merlion.models.ensemble.rst:
--------------------------------------------------------------------------------
 1 | ensemble
 2 | ========
 3 | 
 4 | .. automodule:: merlion.models.ensemble
 5 |    :members:
 6 |    :undoc-members:
 7 |    :show-inheritance:
 8 | 
 9 | .. autosummary::
10 |     base
11 |     combine
12 |     anomaly
13 |     forecast
14 | 
15 | ensemble.base
16 | -------------
17 | .. automodule:: merlion.models.ensemble.base
18 |    :members:
19 |    :undoc-members:
20 |    :show-inheritance:
21 | 
22 | ensemble.combine
23 | ----------------
24 | .. automodule:: merlion.models.ensemble.combine
25 |    :members:
26 |    :undoc-members:
27 |    :show-inheritance:
28 | 
29 | ensemble.anomaly
30 | ----------------
31 | .. automodule:: merlion.models.ensemble.anomaly
32 |    :members:
33 |    :undoc-members:
34 |    :show-inheritance:
35 | 
36 | ensemble.forecast
37 | -----------------
38 | .. automodule:: merlion.models.ensemble.forecast
39 |    :members:
40 |    :undoc-members:
41 |    :show-inheritance:
42 | 


--------------------------------------------------------------------------------
/docs/source/merlion.models.utils.rst:
--------------------------------------------------------------------------------
 1 | utils
 2 | =====
 3 | 
 4 | .. automodule:: merlion.models.utils
 5 |    :members:
 6 |    :undoc-members:
 7 |    :show-inheritance:
 8 | 
 9 | .. autosummary::
10 |     time_features
11 |     rolling_window_dataset
12 |     early_stopping
13 |     autosarima_utils
14 | 
15 | 
16 | utils.time\_features
17 | --------------------
18 | .. automodule:: merlion.models.utils.time_features
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | 
24 | utils.rolling\_window\_dataset
25 | ------------------------------
26 | 
27 | .. automodule:: merlion.models.utils.rolling_window_dataset
28 |    :members:
29 |    :undoc-members:
30 |    :show-inheritance:
31 | 
32 | 
33 | utils.early\_stopping
34 | ---------------------
35 | .. automodule:: merlion.models.utils.early_stopping
36 |    :members:
37 |    :undoc-members:
38 |    :show-inheritance:
39 | 
40 | 
41 | utils.autosarima\_utils
42 | -----------------------
43 | 
44 | .. automodule:: merlion.models.utils.autosarima_utils
45 |    :members:
46 |    :undoc-members:
47 |    :show-inheritance:


--------------------------------------------------------------------------------
/docs/source/merlion.plot.rst:
--------------------------------------------------------------------------------
1 | merlion.plot package
2 | ====================
3 | .. automodule:: merlion.plot
4 |    :members:
5 |    :undoc-members:
6 |    :show-inheritance:
7 | 


--------------------------------------------------------------------------------
/docs/source/merlion.post_process.rst:
--------------------------------------------------------------------------------
 1 | merlion.post\_process package
 2 | =============================
 3 | This package implements some simple rules to post-process the output of an
 4 | anomaly detection model. This includes rules for reshaping a sequence to follow
 5 | a standard normal distribution (:py:mod:`merlion.post_process.calibrate`), sparsifying
 6 | a sequence based on a threshold (:py:mod:`merlion.post_process.threshold`), and composing
 7 | together sequences of post-processing rules (:py:mod:`merlion.post_process.sequence`).
 8 | 
 9 | .. automodule:: merlion.post_process
10 |    :members:
11 |    :undoc-members:
12 |    :show-inheritance:
13 | 
14 | .. autosummary::
15 |     base
16 |     factory
17 |     sequence
18 |     calibrate
19 |     threshold
20 | 
21 | 
22 | merlion.post\_process.base
23 | --------------------------
24 | 
25 | .. automodule:: merlion.post_process.base
26 |    :members:
27 |    :undoc-members:
28 |    :show-inheritance:
29 | 
30 | merlion.post\_process.factory
31 | -----------------------------
32 | 
33 | .. automodule:: merlion.post_process.factory
34 |    :members:
35 |    :undoc-members:
36 |    :show-inheritance:
37 | 
38 | merlion.post\_process.sequence
39 | ------------------------------
40 | 
41 | .. automodule:: merlion.post_process.sequence
42 |    :members:
43 |    :undoc-members:
44 |    :show-inheritance:
45 | 
46 | .. _merlion.post_process.calibrate:
47 | 
48 | merlion.post\_process.calibrate
49 | -------------------------------
50 | 
51 | .. automodule:: merlion.post_process.calibrate
52 |    :members:
53 |    :undoc-members:
54 |    :show-inheritance:
55 | 
56 | merlion.post\_process.threshold
57 | -------------------------------
58 | 
59 | .. automodule:: merlion.post_process.threshold
60 |    :members:
61 |    :undoc-members:
62 |    :show-inheritance:
63 | 


--------------------------------------------------------------------------------
/docs/source/merlion.transform.rst:
--------------------------------------------------------------------------------
  1 | merlion.transform package
  2 | =========================
  3 | This package provides a number of useful data pre-processing transforms. Each
  4 | transform is a callable object that inherits either from `TransformBase` or
  5 | `InvertibleTransformBase`.
  6 | 
  7 | We will introduce the key features of transform objects using the `Rescale`
  8 | class. You may initialize a ``transform`` in three ways:
  9 | 
 10 | .. code-block:: python
 11 | 
 12 |     from merlion.transform.factory import TransformFactory
 13 |     from merlion.transform.normalize import Rescale
 14 | 
 15 |     # Use the initializer
 16 |     transform = Rescale(bias=5.0, scale=3.2)
 17 | 
 18 |     # Use the class's from_dict() method with the arguments you would normally
 19 |     # give to the initializer
 20 |     kwargs = dict(bias=5.0, scale=3.2)
 21 |     transform = Rescale.from_dict(kwargs)
 22 | 
 23 |     # Use the TransformFactory with the class's name, and the keyword arguments
 24 |     # you would normally give to the inializer
 25 |     transform = TransformFactory.create("Rescale", **kwargs)
 26 | 
 27 | After initializing a ``transform``, one may use it as follows:
 28 | 
 29 | .. code-block:: python
 30 | 
 31 |     transform.train(time_series)              # set any trainable params
 32 |     transformed = transform(time_series)      # apply the transform to the time series
 33 |     inverted = transform.invert(transformed)  # invert the transform
 34 |     state_dict = transform.to_dict()          # serialize to a JSON-compatible dict
 35 | 
 36 | Note that ``transform.invert()`` is supported even if the transform doesn't
 37 | inherit from `InvertibleTransformBase`! In this case, ``transform.invert()``
 38 | implements a *pseudo*-inverse that may not recover the original ``time_series``
 39 | exactly. Additionally, the dict returned by ``transform.to_dict()`` is exactly
 40 | the same as the dict expected by the class method ``TransformCls.from_dict()``.
 41 | 
 42 | .. automodule:: merlion.transform
 43 |    :members:
 44 |    :undoc-members:
 45 |    :show-inheritance:
 46 | 
 47 | Base primitives:
 48 | 
 49 | .. autosummary::
 50 |     factory
 51 |     base
 52 |     sequence
 53 | 
 54 | Resampling:
 55 | 
 56 | .. autosummary::
 57 |     resample
 58 |     moving_average
 59 | 
 60 | Normalization:
 61 | 
 62 | .. autosummary::
 63 |     bound
 64 |     normalize
 65 | 
 66 | Miscellaneous:
 67 | 
 68 | .. autosummary::
 69 |     anomalize
 70 | 
 71 | Base primitives
 72 | ---------------
 73 | 
 74 | transform.factory
 75 | ^^^^^^^^^^^^^^^^^
 76 | .. automodule:: merlion.transform.factory
 77 |    :members:
 78 |    :undoc-members:
 79 |    :show-inheritance:
 80 | 
 81 | transform.base
 82 | ^^^^^^^^^^^^^^
 83 | .. automodule:: merlion.transform.base
 84 |    :members:
 85 |    :undoc-members:
 86 |    :show-inheritance:
 87 | 
 88 | transform.sequence
 89 | ^^^^^^^^^^^^^^^^^^
 90 | .. automodule:: merlion.transform.sequence
 91 |    :members:
 92 |    :undoc-members:
 93 |    :show-inheritance:
 94 | 
 95 | Resampling
 96 | ----------
 97 | 
 98 | transform.resample
 99 | ^^^^^^^^^^^^^^^^^^
100 | .. automodule:: merlion.transform.resample
101 |    :members:
102 |    :undoc-members:
103 |    :show-inheritance:
104 | 
105 | transform.moving\_average
106 | ^^^^^^^^^^^^^^^^^^^^^^^^^
107 | .. automodule:: merlion.transform.moving_average
108 |    :members:
109 |    :undoc-members:
110 |    :show-inheritance:
111 | 
112 | Normalization
113 | -------------
114 | 
115 | transform.normalize
116 | ^^^^^^^^^^^^^^^^^^^
117 | .. automodule:: merlion.transform.normalize
118 |    :members:
119 |    :undoc-members:
120 |    :show-inheritance:
121 | 
122 | transform.bound
123 | ^^^^^^^^^^^^^^^
124 | .. automodule:: merlion.transform.bound
125 |    :members:
126 |    :undoc-members:
127 |    :show-inheritance:
128 | 
129 | 
130 | Miscellaneous
131 | -------------
132 | 
133 | transform.anomalize
134 | ^^^^^^^^^^^^^^^^^^^
135 | .. automodule:: merlion.transform.anomalize
136 |    :members:
137 |    :undoc-members:
138 |    :show-inheritance:
139 | 


--------------------------------------------------------------------------------
/docs/source/merlion.utils.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | merlion.utils package
 3 | =====================
 4 | This package contains various utilities, including the `TimeSeries` class and
 5 | utilities for resampling time series.
 6 | 
 7 | .. automodule:: merlion.utils
 8 |    :members:
 9 |    :undoc-members:
10 |    :show-inheritance:
11 | 
12 | .. autosummary::
13 |     time_series
14 |     resample
15 |     data_io
16 |     hts
17 |     ts_generator
18 |     conj_priors
19 |     istat
20 | 
21 | merlion.utils.time\_series
22 | --------------------------
23 | .. automodule:: merlion.utils.time_series
24 |    :members:
25 |    :undoc-members:
26 |    :show-inheritance:
27 | 
28 | merlion.utils.resample
29 | ----------------------
30 | .. automodule:: merlion.utils.resample
31 |    :members:
32 |    :undoc-members:
33 |    :show-inheritance:
34 | 
35 | merlion.utils.data\_io
36 | ----------------------
37 | .. automodule:: merlion.utils.data_io
38 |    :members:
39 |    :undoc-members:
40 |    :show-inheritance:
41 | 
42 | merlion.utils.hts
43 | -----------------
44 | .. automodule:: merlion.utils.hts
45 |    :members:
46 |    :undoc-members:
47 |    :show-inheritance:
48 | 
49 | merlion.utils.ts\_generator
50 | ---------------------------
51 | .. automodule:: merlion.utils.ts_generator
52 |    :members:
53 |    :undoc-members:
54 |    :show-inheritance:
55 | 
56 | merlion.utils.conj_priors
57 | -------------------------
58 | .. automodule:: merlion.utils.conj_priors
59 |    :members:
60 |    :undoc-members:
61 |    :show-inheritance:
62 | 
63 | merlion.utils.istat
64 | -------------------
65 | .. automodule:: merlion.utils.istat
66 |    :members:
67 |    :undoc-members:
68 |    :show-inheritance:
69 | 


--------------------------------------------------------------------------------
/docs/source/ts_datasets.anomaly.rst:
--------------------------------------------------------------------------------
1 | ts_datasets.anomaly package
2 | ===========================
3 | 
4 | .. automodule:: ts_datasets.anomaly
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/ts_datasets.forecast.rst:
--------------------------------------------------------------------------------
1 | ts_datasets.forecast package
2 | ============================
3 | 
4 | .. automodule:: ts_datasets.forecast
5 |    :members:
6 |    :show-inheritance:
7 | 


--------------------------------------------------------------------------------
/docs/source/ts_datasets.rst:
--------------------------------------------------------------------------------
 1 | ts_datasets: Easy Data Loading
 2 | ==============================
 3 | 
 4 | :py:mod:`ts_datasets` implements Python classes that manipulate numerous time series datasets
 5 | into standardized ``pandas.DataFrame`` s. The sub-modules are :py:mod:`ts_datasets.anomaly`
 6 | for time series anomaly detection, and :py:mod:`ts_datasets.forecast` for time series forecasting.
 7 | Simply install the package by calling ``pip install -e ts_datasets/`` from the root directory of Merlion.
 8 | Then, you can load a dataset (e.g. the "realAWSCloudwatch" split of the Numenta Anomaly Benchmark
 9 | or the "Hourly" subset of the M4 dataset) by calling
10 | 
11 | .. code-block:: python
12 | 
13 |     from ts_datasets.anomaly import NAB
14 |     from ts_datasets.forecast import M4
15 |     anom_dataset = NAB(subset="realAWSCloudwatch", rootdir=path_to_NAB)
16 |     forecast_dataset = M4(subset="Hourly", rootdir=path_to_M4)
17 | 
18 | If you install this package in editable mode (i.e. specify ``-e`` when calling ``pip install -e ts_datasets/``),
19 | there is no need to specify a ``rootdir`` for any of the data loaders.
20 | 
21 | The core features of general data loaders (e.g. for forecasting) are outlined in the API doc for
22 | :py:class:`ts_datasets.base.BaseDataset`, and the features for time series anomaly detection data loaders
23 | are outlined in the API doc for :py:class:`ts_datasets.anomaly.TSADBaseDataset`.
24 | 
25 | The easiest way to load a custom dataset is to use either the :py:class:`ts_datasets.forecast.CustomDataset` or
26 | :py:class:`ts_datasets.anomaly.CustomAnomalyDataset` classes. Please review the `tutorial <tutorials/CustomDataset>`
27 | to get started.
28 | 
29 | .. automodule:: ts_datasets
30 |    :members:
31 |    :undoc-members:
32 |    :show-inheritance:
33 | 
34 | Subpackages
35 | -----------
36 | 
37 | .. toctree::
38 |    :maxdepth: 4
39 | 
40 |    ts_datasets.anomaly
41 |    ts_datasets.forecast
42 | 
43 | datasets.base module
44 | --------------------
45 | 
46 | .. automodule:: ts_datasets.base
47 |    :members:
48 |    :undoc-members:
49 |    :show-inheritance:
50 | 


--------------------------------------------------------------------------------
/docs/source/tutorials:
--------------------------------------------------------------------------------
1 | ../../examples


--------------------------------------------------------------------------------
/docs/source/tutorials.rst:
--------------------------------------------------------------------------------
 1 | Tutorials & Example Code
 2 | ========================
 3 | 
 4 | Basics
 5 | ------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 2
 9 |    :glob:
10 | 
11 |    tutorials/TimeSeries.ipynb
12 |    tutorials/CustomDataset.ipynb
13 | 
14 | Anomaly Detection
15 | -----------------
16 | .. toctree::
17 |    :maxdepth: 2
18 |    :glob:
19 | 
20 |    tutorials/anomaly/*
21 | 
22 | Forecasting
23 | -----------
24 | .. toctree::
25 |    :maxdepth: 2
26 |    :glob:
27 | 
28 |    tutorials/forecast/*
29 | 
30 | Advanced Features
31 | -----------------
32 | .. toctree::
33 |    :maxdepth: 2
34 |    :glob:
35 | 
36 |    tutorials/advanced/*
37 | 


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
 1 | This file outlines how you should navigate the Jupyter notebooks in this folder.
 2 | All new users should start with [`TimeSeries.ipynb`](TimeSeries.ipynb), which explains
 3 | how to use Merlion's `UnivariateTimeSeries` and `TimeSeries` classes. These classes are
 4 | the core data format used throughout the repo. 
 5 | 
 6 | If you are interested in anomaly detection, you should next read 
 7 | [`anomaly/AnomalyIntro.ipynb`](anomaly/0_AnomalyIntro.ipynb) to understand how to use
 8 | anomaly detection models in Merlion. Afterwards, if you want to implement a new
 9 | anomaly detection model in Merlion, please read [`CONTRIBUTING.md`](../CONTRIBUTING.md)
10 | and [`anomaly/AnomalyNewModel.ipynb`](anomaly/3_AnomalyNewModel.ipynb).
11 | 
12 | If you are interested in forecasting, you should next read
13 | [`forecast/ForecastIntro.ipynb`](forecast/0_ForecastIntro.ipynb) to understand how to use
14 | forecasting models in Merlion. Afterward, if you want to implement a new forecasting
15 | model in Merlion, please read [`CONTRIBUTING.md`](../CONTRIBUTING.md) and
16 | and [`forecast/ForecastNewModel.ipynb`](forecast/3_ForecastNewModel.ipynb).
17 | 
18 | We offer more advanced tutorials on specific high-performing models (AutoSARIMA and Mixture of Experts forecaster)
19 | in the [`advanced`](advanced) subdirectory. If you are interested in other utilities offered by the `merlion`
20 | package, look at the resources inside the [`misc`](misc) subdirectory. For example,
21 | [`misc/generate_synthetic_tsad_dataset.py`](misc/generate_synthetic_tsad_dataset.py)
22 | is a script for generating an artifical anomaly detection dataset using `merlion`'s time series
23 | generation and anomaly injection modules. This particular dataset may be loaded using the data
24 | loader `ts_datasets.anomaly.Synthetic`.
25 | 


--------------------------------------------------------------------------------
/examples/misc/generate_synthetic_tsad_dataset.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2022 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | from os.path import abspath, dirname, join
 8 | from collections import OrderedDict
 9 | import os
10 | 
11 | import numpy as np
12 | from math import floor, ceil
13 | 
14 | from merlion.utils.ts_generator import GeneratorConcatenator, TimeSeriesGenerator
15 | from merlion.transform.anomalize import LevelShift, Shock, TrendChange
16 | 
17 | MERLION_ROOT = dirname(dirname(dirname(abspath(__file__))))
18 | DATADIR = join(MERLION_ROOT, "data")
19 | 
20 | 
21 | def main():
22 |     np.random.seed(12345)
23 |     n = 10000
24 | 
25 |     # Generate Synthetic Time Series
26 |     ts_generators = [
27 |         # generates a time series that trends upward before
28 |         # trending downward
29 |         GeneratorConcatenator(
30 |             generators=[
31 |                 # upward trend
32 |                 TimeSeriesGenerator(f=lambda x: x ** 1.6, n=floor(0.6 * n)),
33 |                 # downward trend
34 |                 TimeSeriesGenerator(f=lambda x: -x ** 1.2, n=ceil(0.4 * n)),
35 |             ],
36 |             noise=lambda: np.random.normal(0, 500),
37 |             string_outputs=True,
38 |             name="upward_downward",
39 |         ),
40 |         # generates a white noise series
41 |         TimeSeriesGenerator(f=lambda x: 0, n=n, name="horizontal"),
42 |         # generates a time series with multiple seasonality
43 |         TimeSeriesGenerator(f=lambda x: 2 * np.sin(x * 0.1) + np.sin(x * 0.02), n=n, name="seasonal"),
44 |     ]
45 | 
46 |     ts_list = [generator.generate(return_ts=True) for generator in ts_generators]
47 | 
48 |     # Initialize Anomaly Injection Transforms
49 |     anomalize_kwargs = dict(anom_prob=0.002, anom_width_range=(20, 200), alpha=0.5)
50 | 
51 |     anomalizers = OrderedDict(
52 |         shock=Shock(pos_prob=0.5, sd_range=(4, 8), **anomalize_kwargs),
53 |         spike=Shock(pos_prob=1.0, sd_range=(4, 8), **anomalize_kwargs),
54 |         dip=Shock(pos_prob=0.0, sd_range=(4, 8), **anomalize_kwargs),
55 |         level=LevelShift(pos_prob=0.5, sd_range=(3, 6), **anomalize_kwargs),
56 |         trend=TrendChange(anom_prob=0.01, pos_prob=0.5, scale_range=(2.5, 5)),
57 |     )
58 | 
59 |     # make directory for writing anomalized data
60 |     anom_dir = join(DATADIR, "synthetic_anomaly")
61 |     os.makedirs(anom_dir, exist_ok=True)
62 | 
63 |     for i, ts in enumerate(ts_list):
64 |         # write original ts
65 |         csv = join(anom_dir, f"{ts.names[0]}.csv")
66 |         ts.to_csv(csv)
67 |         # anomalize ts with each anomalizer
68 |         for j, (name, anom) in enumerate(anomalizers.items()):
69 |             np.random.seed(1000 * i + j)
70 |             anom_ts = anom(ts)
71 |             csv = join(anom_dir, f"{anom_ts.names[0]}_{name}_anomaly.csv")
72 |             anom_ts.to_csv(csv)
73 | 
74 | 
75 | if __name__ == "__main__":
76 |     main()
77 | 


--------------------------------------------------------------------------------
/figures/anom_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/salesforce/Merlion/085ef8a69e5dcdfb9dcaa394cc21e087cccbb8f0/figures/anom_example.png


--------------------------------------------------------------------------------
/figures/dashboard_anomaly.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/salesforce/Merlion/085ef8a69e5dcdfb9dcaa394cc21e087cccbb8f0/figures/dashboard_anomaly.png


--------------------------------------------------------------------------------
/figures/dashboard_file.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/salesforce/Merlion/085ef8a69e5dcdfb9dcaa394cc21e087cccbb8f0/figures/dashboard_file.png


--------------------------------------------------------------------------------
/figures/dashboard_forecast.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/salesforce/Merlion/085ef8a69e5dcdfb9dcaa394cc21e087cccbb8f0/figures/dashboard_forecast.png


--------------------------------------------------------------------------------
/figures/forecast_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/salesforce/Merlion/085ef8a69e5dcdfb9dcaa394cc21e087cccbb8f0/figures/forecast_example.png


--------------------------------------------------------------------------------
/k8s-spec/anomaly.yml:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright 2018 Google LLC
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     https://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 | # Support for Python is experimental, and requires building SNAPSHOT image of Apache Spark,
17 | # with `imagePullPolicy` set to Always
18 | 
19 | # Install the spark operator as follows:
20 | # helm install spark-operator spark-operator/spark-operator --namespace spark-operator --create-namespace --set sparkJobNamespace=spark-apps
21 | 
22 | apiVersion: "sparkoperator.k8s.io/v1beta2"
23 | kind: SparkApplication
24 | metadata:
25 |   name: anomaly
26 |   namespace: spark-apps
27 | spec:
28 |   sparkVersion: "3.1.1"
29 |   sparkConf:
30 |     spark.sql.execution.arrow.pyspark.enabled: "true"
31 | 
32 |   restartPolicy:
33 |     type: Never
34 | 
35 |   driver:
36 |     cores: 1
37 |     memory: "1G"
38 |     serviceAccount: spark-operator-spark
39 |     labels:
40 |       version: 3.1.1
41 | 
42 |   executor:
43 |     cores: 1
44 |     instances: 2
45 |     memory: "2G"
46 |     podSecurityContext:
47 |       runAsNonRoot: true
48 |       runAsUser: 185
49 |     labels:
50 |       version: 3.1.1
51 | 
52 |   type: Python
53 |   pythonVersion: "3"
54 |   mode: cluster
55 |   image: "merlion-spark:latest"
56 |   imagePullPolicy: Always
57 |   mainApplicationFile: local:///opt/spark/apps/anomaly.py
58 |   arguments:
59 |     - "--data"
60 |     - "/opt/spark/work-dir/walmart_mini.csv"  # can be on the cloud if you configure Spark appropriately
61 |     - "--output_path"
62 |     - "results"  # can be on the cloud if you configure Spark appropriately
63 |     - "--train_test_split"
64 |     - "2012-08-01"
65 |     - "--data_cols"
66 |     - '[
67 |         "Weekly_Sales",
68 |         "Unemployment",
69 |         "CPI",
70 |         "Fuel_Price",
71 |         "Temperature"
72 |       ]'
73 |     - "--index_cols"
74 |     - '["Store", "Dept"]'
75 |     - "--time_col"
76 |     - "Date"
77 |     - "--model"
78 |     - '{"name": "DefaultDetector"}'
79 | 


--------------------------------------------------------------------------------
/k8s-spec/forecast.yml:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright 2018 Google LLC
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     https://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | #
 16 | # Support for Python is experimental, and requires building SNAPSHOT image of Apache Spark,
 17 | # with `imagePullPolicy` set to Always
 18 | 
 19 | # Install the spark operator as follows:
 20 | # helm install spark-operator spark-operator/spark-operator --namespace spark-operator --create-namespace --set sparkJobNamespace=spark-apps
 21 | 
 22 | apiVersion: "sparkoperator.k8s.io/v1beta2"
 23 | kind: SparkApplication
 24 | metadata:
 25 |   name: forecast
 26 |   namespace: spark-apps
 27 | spec:
 28 |   sparkVersion: "3.1.1"
 29 |   sparkConf:
 30 |     spark.sql.execution.arrow.pyspark.enabled: "true"
 31 | 
 32 |   restartPolicy:
 33 |     type: Never
 34 | 
 35 |   driver:
 36 |     cores: 1
 37 |     memory: "1G"
 38 |     serviceAccount: spark-operator-spark
 39 |     labels:
 40 |       version: 3.1.1
 41 | 
 42 |   executor:
 43 |     cores: 1
 44 |     instances: 2
 45 |     memory: "2G"
 46 |     podSecurityContext:
 47 |       runAsNonRoot: true
 48 |       runAsUser: 185
 49 |     labels:
 50 |       version: 3.1.1
 51 | 
 52 |   type: Python
 53 |   pythonVersion: "3"
 54 |   mode: cluster
 55 |   image: "merlion-spark:latest"
 56 |   imagePullPolicy: Always
 57 |   mainApplicationFile: local:///opt/spark/apps/forecast.py
 58 |   arguments:
 59 |     - "--train_data"
 60 |     - "/opt/spark/work-dir/walmart_mini.csv"  # can be on the cloud if you configure Spark appropriately
 61 |     - "--output_path"
 62 |     - "results"  # can be on the cloud if you configure Spark appropriately
 63 |     - "--target_col"
 64 |     - "Weekly_Sales"
 65 |     - "--data_cols"
 66 |     - '[
 67 |         "Weekly_Sales",
 68 |         "Unemployment",
 69 |         "CPI",
 70 |         "Fuel_Price",
 71 |         "Temperature"
 72 |       ]'
 73 |     - "--index_cols"
 74 |     - '["Store", "Dept"]'
 75 |     - "--time_col"
 76 |     - "Date"
 77 |     - "--hierarchical"
 78 |     - "--agg_dict"
 79 |     - '{
 80 |         "Weekly_Sales": "sum",
 81 |       }'
 82 |     - "--model"
 83 |     - '{"name": "DefaultForecaster"}'
 84 |     - "--time_stamps"
 85 |     - '[
 86 |         "2012-11-02",
 87 |         "2012-11-09",
 88 |         "2012-11-16",
 89 |         "2012-11-23",
 90 |         "2012-11-30",
 91 |         "2012-12-07",
 92 |         "2012-12-14",
 93 |         "2012-12-21",
 94 |         "2012-12-28",
 95 |         "2013-01-04",
 96 |         "2013-01-11",
 97 |         "2013-01-18",
 98 |         "2013-01-25",
 99 |         "2013-02-01",
100 |         "2013-02-08",
101 |         "2013-02-15",
102 |         "2013-02-22",
103 |         "2013-03-01",
104 |         "2013-03-08",
105 |         "2013-03-15",
106 |         "2013-03-22",
107 |         "2013-03-29",
108 |         "2013-04-05",
109 |         "2013-04-12",
110 |         "2013-04-19",
111 |         "2013-04-26",
112 |         "2013-05-03"
113 |       ]'
114 | 


--------------------------------------------------------------------------------
/merlion/dashboard/__init__.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | try:
 8 |     import dash
 9 |     import diskcache
10 |     import dash_bootstrap_components
11 | except ImportError as e:
12 |     err = (
13 |         "Try installing Merlion with optional dependencies using `pip install salesforce-merlion[dashboard]` or "
14 |         "`pip install `salesforce-merlion[all]`"
15 |     )
16 |     raise ImportError(str(e) + ". " + err)
17 | 


--------------------------------------------------------------------------------
/merlion/dashboard/__main__.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | from merlion.dashboard.server import app
 8 | 
 9 | if __name__ == "__main__":
10 |     app.run_server(debug=False)
11 | 


--------------------------------------------------------------------------------
/merlion/dashboard/assets/Acumin-BdPro.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/salesforce/Merlion/085ef8a69e5dcdfb9dcaa394cc21e087cccbb8f0/merlion/dashboard/assets/Acumin-BdPro.otf


--------------------------------------------------------------------------------
/merlion/dashboard/assets/fonts/SalesforceSans-Bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/salesforce/Merlion/085ef8a69e5dcdfb9dcaa394cc21e087cccbb8f0/merlion/dashboard/assets/fonts/SalesforceSans-Bold.woff


--------------------------------------------------------------------------------
/merlion/dashboard/assets/fonts/SalesforceSans-BoldItalic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/salesforce/Merlion/085ef8a69e5dcdfb9dcaa394cc21e087cccbb8f0/merlion/dashboard/assets/fonts/SalesforceSans-BoldItalic.woff


--------------------------------------------------------------------------------
/merlion/dashboard/assets/fonts/SalesforceSans-Italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/salesforce/Merlion/085ef8a69e5dcdfb9dcaa394cc21e087cccbb8f0/merlion/dashboard/assets/fonts/SalesforceSans-Italic.woff


--------------------------------------------------------------------------------
/merlion/dashboard/assets/fonts/SalesforceSans-Light.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/salesforce/Merlion/085ef8a69e5dcdfb9dcaa394cc21e087cccbb8f0/merlion/dashboard/assets/fonts/SalesforceSans-Light.woff


--------------------------------------------------------------------------------
/merlion/dashboard/assets/fonts/SalesforceSans-LightItalic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/salesforce/Merlion/085ef8a69e5dcdfb9dcaa394cc21e087cccbb8f0/merlion/dashboard/assets/fonts/SalesforceSans-LightItalic.woff


--------------------------------------------------------------------------------
/merlion/dashboard/assets/fonts/SalesforceSans-Regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/salesforce/Merlion/085ef8a69e5dcdfb9dcaa394cc21e087cccbb8f0/merlion/dashboard/assets/fonts/SalesforceSans-Regular.woff


--------------------------------------------------------------------------------
/merlion/dashboard/assets/fonts/SalesforceSans-Thin.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/salesforce/Merlion/085ef8a69e5dcdfb9dcaa394cc21e087cccbb8f0/merlion/dashboard/assets/fonts/SalesforceSans-Thin.woff


--------------------------------------------------------------------------------
/merlion/dashboard/assets/fonts/SalesforceSans-ThinItalic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/salesforce/Merlion/085ef8a69e5dcdfb9dcaa394cc21e087cccbb8f0/merlion/dashboard/assets/fonts/SalesforceSans-ThinItalic.woff


--------------------------------------------------------------------------------
/merlion/dashboard/assets/resizing.js:
--------------------------------------------------------------------------------
 1 | /* resize figures in table upon callback get fires */
 2 | 
 3 | if(!window.dash_clientside) {window.dash_clientside = {};}
 4 | window.dash_clientside.clientside = {
 5 |    resize: function (value) {
 6 |        console.log("resizing...");
 7 |        window.dispatchEvent(new Event('resize'));
 8 |        return null
 9 |    }
10 | }


--------------------------------------------------------------------------------
/merlion/dashboard/assets/styles.css:
--------------------------------------------------------------------------------
 1 | 
 2 | a:link {
 3 |   color: #696969;
 4 |   text-decoration: none;
 5 | }
 6 | 
 7 | /* visited link */
 8 | a:visited {
 9 |   color: #696969;
10 |   text-decoration: none;
11 | }
12 | 
13 | /* mouse over link */
14 | a:hover {
15 |   opacity: 0.6;
16 | }
17 | 
18 | /* selected link */
19 | a:active {
20 |   color: lightgrey;
21 |   text-decoration: underline;
22 | }
23 | 
24 | .greyline {
25 |   width: 90%;
26 |   border-bottom: 1px solid lightgrey;
27 | }
28 | #tabs{
29 |   filter:drop-shadow(0px 4px 6px rgba(0, 0, 0, 0.2));
30 | }
31 | .tab {
32 |   /* border-style: solid;
33 |   border-color: rgb(0, 0, 0, 0.2); */
34 |   border-bottom-style: none;
35 |   border-top-style: none;
36 |   border-right-style: none;
37 |   padding: 5px 10px;
38 |   border:none !important;
39 | }
40 | 
41 | .rowrow {
42 |   margin: auto;
43 |   text-align: center;
44 |   width: 97%;
45 | }
46 | 
47 | .rowrow2 {
48 |   margin: auto;
49 |   width: 97%;
50 | }
51 | 
52 | .tablast {
53 |   border-style: solid;
54 |   border-color: rgb(0, 0, 0, 0.2);
55 |   border-bottom-style: none;
56 |   border-top-style: none;
57 |   color: black;
58 |   padding: 6px 20px;
59 |   text-align: center;
60 |   text-decoration: none;
61 |   display: inline-block;
62 | }
63 | 
64 | #learn-more-button {
65 |   float: right;
66 |   padding-left: 15px;
67 |   padding-right: 15px;
68 |   text-transform: none;
69 |   margin: 25px 25px;
70 | }
71 | 
72 | 


--------------------------------------------------------------------------------
/merlion/dashboard/assets/upload.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" x="0px" y="0px"
 2 | 	 width="20px" height="20px" viewBox="0 0 52 52" enable-background="new 0 0 52 52" xml:space="preserve">
 3 | <g>
 4 | 	<path fill="#1B96FF" d="M48.5,31h-3c-0.8,0-1.5,0.8-1.5,1.5v10c0,0.8-0.7,1.5-1.5,1.5h-33C8.7,44,8,43.3,8,42.5v-10
 5 | 		C8,31.8,7.3,31,6.5,31h-3C2.7,31,2,31.8,2,32.5V46c0,2.2,1.8,4,4,4h40c2.2,0,4-1.8,4-4V32.5C50,31.8,49.3,31,48.5,31z"/>
 6 | 	<path fill="#1B96FF" d="M27,2.4c-0.6-0.6-1.5-0.6-2.1,0L11.4,15.9c-0.6,0.6-0.6,1.5,0,2.1l2.1,2.1c0.6,0.6,1.5,0.6,2.1,0l5.6-5.6
 7 | 		c0.6-0.6,1.8-0.2,1.8,0.7v21.2c0,0.8,0.6,1.5,1.4,1.5h3c0.8,0,1.6-0.8,1.6-1.5V15.3c0-0.9,1-1.3,1.7-0.7l5.6,5.6
 8 | 		c0.6,0.6,1.5,0.6,2.1,0l2.1-2.1c0.6-0.6,0.6-1.5,0-2.1L27,2.4z"/>
 9 | </g>
10 | </svg>
11 | 


--------------------------------------------------------------------------------
/merlion/dashboard/callbacks/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2023 salesforce.com, inc.
3 | # All rights reserved.
4 | # SPDX-License-Identifier: BSD-3-Clause
5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
6 | #
7 | 


--------------------------------------------------------------------------------
/merlion/dashboard/models/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2023 salesforce.com, inc.
3 | # All rights reserved.
4 | # SPDX-License-Identifier: BSD-3-Clause
5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
6 | #
7 | 


--------------------------------------------------------------------------------
/merlion/dashboard/models/data.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | import sys
 8 | import logging
 9 | from collections import OrderedDict
10 | from merlion.dashboard.models.utils import DataMixin
11 | from merlion.dashboard.pages.utils import create_empty_figure
12 | from merlion.dashboard.utils.log import DashLogger
13 | from merlion.dashboard.utils.plot import data_table, plot_timeseries
14 | 
15 | dash_logger = DashLogger(stream=sys.stdout)
16 | 
17 | 
18 | class DataAnalyzer(DataMixin):
19 |     def __init__(self):
20 |         self.logger = logging.getLogger(__name__)
21 |         self.logger.setLevel(logging.DEBUG)
22 |         self.logger.addHandler(dash_logger)
23 | 
24 |     @staticmethod
25 |     def get_stats(df):
26 |         stats = {
27 |             "@global": OrderedDict(
28 |                 {
29 |                     "NO. of Variables": len(df.columns),
30 |                     "Time Series Length": len(df),
31 |                     "Has NaNs": bool(df.isnull().values.any()),
32 |                 }
33 |             ),
34 |             "@columns": list(df.columns),
35 |         }
36 |         for col in df.columns:
37 |             stats[col] = df[col].describe().to_dict(into=OrderedDict)
38 |         return stats
39 | 
40 |     @staticmethod
41 |     def get_data_table(df):
42 |         return data_table(df)
43 | 
44 |     @staticmethod
45 |     def get_data_figure(df):
46 |         if df is None:
47 |             return create_empty_figure()
48 |         else:
49 |             return plot_timeseries(df)
50 | 


--------------------------------------------------------------------------------
/merlion/dashboard/pages/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2023 salesforce.com, inc.
3 | # All rights reserved.
4 | # SPDX-License-Identifier: BSD-3-Clause
5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
6 | #
7 | 


--------------------------------------------------------------------------------
/merlion/dashboard/pages/utils.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | import dash_bootstrap_components as dbc
 8 | from dash import html, dash_table
 9 | import pandas as pd
10 | from merlion.dashboard.settings import *
11 | from merlion.dashboard.utils.plot import plot_timeseries
12 | 
13 | styles = {
14 |     "json-output": {"overflow-y": "scroll", "height": "calc(90% - 25px)", "border": "thin lightgrey solid"},
15 |     "tab": {"height": "calc(98vh - 80px)"},
16 |     "log-output": {
17 |         "overflow-y": "scroll",
18 |         "height": "calc(90% - 25px)",
19 |         "border": "thin lightgrey solid",
20 |         "white-space": "pre-wrap",
21 |     },
22 | }
23 | 
24 | 
25 | def create_modal(modal_id, header, content, content_id, button_id):
26 |     modal = html.Div(
27 |         [
28 |             dbc.Modal(
29 |                 [
30 |                     dbc.ModalHeader(dbc.ModalTitle(header)),
31 |                     dbc.ModalBody(content, id=content_id),
32 |                     dbc.ModalFooter(dbc.Button("Close", id=button_id, className="ml-auto", n_clicks=0)),
33 |                 ],
34 |                 id=modal_id,
35 |                 is_open=False,
36 |             )
37 |         ]
38 |     )
39 |     return modal
40 | 
41 | 
42 | def create_param_table(params=None, height=100):
43 |     if params is None or len(params) == 0:
44 |         data = [{"Parameter": "", "Value": ""}]
45 |     else:
46 |         data = [{"Parameter": key, "Value": str(value["default"])} for key, value in params.items()]
47 | 
48 |     table = dash_table.DataTable(
49 |         data=data,
50 |         columns=[{"id": "Parameter", "name": "Parameter"}, {"id": "Value", "name": "Value"}],
51 |         editable=True,
52 |         style_header_conditional=[{"textAlign": "center", "font-family": "Salesforce Sans"}],
53 |         style_cell_conditional=[{"textAlign": "center", "font-family": "Salesforce Sans"}],
54 |         style_table={"overflowX": "scroll", "overflowY": "scroll", "height": height},
55 |         style_header=dict(backgroundColor=TABLE_HEADER_COLOR, color="white"),
56 |         style_data=dict(backgroundColor=TABLE_DATA_COLOR),
57 |     )
58 |     return table
59 | 
60 | 
61 | def create_metric_table(metrics=None):
62 |     if metrics is None or len(metrics) == 0:
63 |         data, columns = {}, []
64 |         for i in range(4):
65 |             data[f"Metric {i}"] = "-"
66 |             columns.append({"id": f"Metric {i}", "name": f"Metric {i}"})
67 | 
68 |     else:
69 |         data = metrics
70 |         columns = [{"id": key, "name": key} for key in metrics.keys()]
71 | 
72 |     if not isinstance(data, list):
73 |         data = [data]
74 |     table = dash_table.DataTable(
75 |         data=data,
76 |         columns=columns,
77 |         editable=False,
78 |         style_header_conditional=[{"textAlign": "center", "font-family": "Salesforce Sans"}],
79 |         style_cell_conditional=[{"textAlign": "center", "font-family": "Salesforce Sans"}],
80 |         style_table={"overflowX": "scroll"},
81 |         style_header=dict(backgroundColor=TABLE_HEADER_COLOR, color="white"),
82 |         style_data=dict(backgroundColor=TABLE_DATA_COLOR),
83 |     )
84 |     return table
85 | 
86 | 
87 | def create_empty_figure():
88 |     return plot_timeseries(pd.DataFrame(index=pd.DatetimeIndex([])))
89 | 


--------------------------------------------------------------------------------
/merlion/dashboard/server.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | import dash
 8 | import dash_bootstrap_components as dbc
 9 | from dash import dcc
10 | from dash import html
11 | from dash.dependencies import Input, Output, State
12 | import logging
13 | 
14 | from merlion.dashboard.utils.layout import create_banner, create_layout
15 | from merlion.dashboard.pages.data import create_data_layout
16 | from merlion.dashboard.pages.forecast import create_forecasting_layout
17 | from merlion.dashboard.pages.anomaly import create_anomaly_layout
18 | 
19 | from merlion.dashboard.callbacks import data
20 | from merlion.dashboard.callbacks import forecast
21 | from merlion.dashboard.callbacks import anomaly
22 | 
23 | logging.basicConfig(format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", level=logging.INFO)
24 | 
25 | app = dash.Dash(
26 |     __name__,
27 |     meta_tags=[{"name": "viewport", "content": "width=device-width, initial-scale=1"}],
28 |     external_stylesheets=[dbc.themes.BOOTSTRAP],
29 |     title="Merlion Dashboard",
30 | )
31 | app.config["suppress_callback_exceptions"] = True
32 | app.layout = html.Div(
33 |     [
34 |         dcc.Location(id="url", refresh=False),
35 |         html.Div(id="page-content"),
36 |         dcc.Store(id="data-state"),
37 |         dcc.Store(id="anomaly-state"),
38 |         dcc.Store(id="forecasting-state"),
39 |     ]
40 | )
41 | server = app.server
42 | 
43 | 
44 | @app.callback(Output("page-content", "children"), [Input("url", "pathname")])
45 | def _display_page(pathname):
46 |     return html.Div(id="app-container", children=[create_banner(app), html.Br(), create_layout()])
47 | 
48 | 
49 | @app.callback(
50 |     Output("plots", "children"),
51 |     Input("tabs", "value"),
52 |     [State("data-state", "data"), State("anomaly-state", "data"), State("forecasting-state", "data")],
53 | )
54 | def _click_tab(tab, data_state, anomaly_state, forecasting_state):
55 |     if tab == "file-manager":
56 |         return create_data_layout()
57 |     elif tab == "forecasting":
58 |         return create_forecasting_layout()
59 |     elif tab == "anomaly":
60 |         return create_anomaly_layout()
61 | 


--------------------------------------------------------------------------------
/merlion/dashboard/settings.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2023 salesforce.com, inc.
3 | # All rights reserved.
4 | # SPDX-License-Identifier: BSD-3-Clause
5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
6 | #
7 | TABLE_HEADER_COLOR = "#014486"
8 | TABLE_DATA_COLOR = "white"
9 | 


--------------------------------------------------------------------------------
/merlion/dashboard/utils/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2023 salesforce.com, inc.
3 | # All rights reserved.
4 | # SPDX-License-Identifier: BSD-3-Clause
5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
6 | #
7 | 


--------------------------------------------------------------------------------
/merlion/dashboard/utils/file_manager.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | import os
 8 | import base64
 9 | import zipfile
10 | import diskcache
11 | from pathlib import Path
12 | from dash.long_callback import DiskcacheLongCallbackManager
13 | 
14 | 
15 | class SingletonClass:
16 |     def __new__(cls):
17 |         if not hasattr(cls, "instance"):
18 |             cls.instance = super(SingletonClass, cls).__new__(cls)
19 |         return cls.instance
20 | 
21 | 
22 | class FileManager(SingletonClass):
23 |     def __init__(self, directory=None):
24 |         self.directory = os.path.join(str(Path.home()), "merlion") if directory is None else directory
25 |         if not os.path.exists(self.directory):
26 |             os.makedirs(self.directory)
27 | 
28 |         self.data_folder = os.path.join(self.directory, "data")
29 |         if not os.path.exists(self.data_folder):
30 |             os.makedirs(self.data_folder)
31 | 
32 |         self.model_folder = os.path.join(self.directory, "models")
33 |         if not os.path.exists(self.model_folder):
34 |             os.makedirs(self.model_folder)
35 | 
36 |         self.cache_folder = os.path.join(self.directory, "cache")
37 |         self.long_callback_manager = DiskcacheLongCallbackManager(diskcache.Cache(self.cache_folder))
38 | 
39 |     def save_file(self, name, content):
40 |         data = content.encode("utf8").split(b";base64,")[1]
41 |         with open(os.path.join(self.data_folder, name), "wb") as fp:
42 |             fp.write(base64.decodebytes(data))
43 | 
44 |     def uploaded_files(self):
45 |         files = []
46 |         for filename in os.listdir(self.data_folder):
47 |             path = os.path.join(self.data_folder, filename)
48 |             if os.path.isfile(path):
49 |                 files.append(filename)
50 |         return files
51 | 
52 |     def get_model_download_path(self, model_name):
53 |         path = os.path.join(self.model_folder, model_name)
54 |         zip_file = os.path.join(path, f"{model_name}.zip")
55 |         with zipfile.ZipFile(zip_file, mode="w") as f:
56 |             for file in Path(path).iterdir():
57 |                 if Path(file).name != f"{model_name}.zip":
58 |                     f.write(file, arcname=file.name)
59 |         return zip_file
60 | 
61 |     def get_model_list(self):
62 |         models = []
63 |         for name in os.listdir(self.model_folder):
64 |             folder = os.path.join(self.model_folder, name)
65 |             if os.path.isdir(folder):
66 |                 models.append(name)
67 |         return models
68 | 
69 |     @property
70 |     def base_directory(self):
71 |         return self.directory
72 | 
73 |     @property
74 |     def data_directory(self):
75 |         return self.data_folder
76 | 
77 |     @property
78 |     def model_directory(self):
79 |         return self.model_folder
80 | 
81 |     def get_long_callback_manager(self):
82 |         return self.long_callback_manager
83 | 


--------------------------------------------------------------------------------
/merlion/dashboard/utils/layout.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | from dash import dcc
 8 | from dash import html
 9 | 
10 | 
11 | tab_style = {
12 |     "borderBottom": "1px solid #d6d6d6",
13 |     "padding": "6px",
14 |     "fontWeight": "bold",
15 |     "backgroundColor": "#1B96FF",
16 |     "color": "white",
17 | }
18 | 
19 | tab_selected_style = {
20 |     "borderTop": "1px solid #d6d6d6",
21 |     "borderBottom": "1px solid #d6d6d6",
22 |     "backgroundColor": "#0176D3",
23 |     "color": "white",
24 |     "padding": "6px",
25 |     "fontWeight": "bold",
26 | }
27 | 
28 | 
29 | def create_banner(app):
30 |     return html.Div(
31 |         id="banner",
32 |         className="banner",
33 |         children=[
34 |             html.Img(src=app.get_asset_url("merlion_small.svg")),
35 |             html.Plaintext("  Powered by Salesforce AI Research"),
36 |         ],
37 |     )
38 | 
39 | 
40 | def create_layout() -> html.Div:
41 |     children, values = [], []
42 |     # Data analysis tab
43 |     children.append(
44 |         dcc.Tab(label="File Manager", value="file-manager", style=tab_style, selected_style=tab_selected_style)
45 |     )
46 |     values.append("file-manager")
47 |     # Anomaly detection tab
48 |     children.append(
49 |         dcc.Tab(label="Anomaly Detection", value="anomaly", style=tab_style, selected_style=tab_selected_style)
50 |     )
51 |     values.append("anomaly")
52 |     # Forecasting tab
53 |     children.append(
54 |         dcc.Tab(label="Forecasting", value="forecasting", style=tab_style, selected_style=tab_selected_style)
55 |     )
56 |     values.append("forecasting")
57 | 
58 |     layout = html.Div(
59 |         id="app-content",
60 |         children=[dcc.Tabs(id="tabs", value=values[0] if values else "none", children=children), html.Div(id="plots")],
61 |     )
62 |     return layout
63 | 


--------------------------------------------------------------------------------
/merlion/dashboard/utils/log.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | import logging
 8 | 
 9 | 
10 | class DashLogger(logging.StreamHandler):
11 |     def __init__(self, stream=None):
12 |         super().__init__(stream=stream)
13 |         self.logs = list()
14 | 
15 |     def emit(self, record):
16 |         try:
17 |             msg = self.format(record)
18 |             self.logs.append(msg)
19 |             self.logs = self.logs[-1000:]
20 |             self.flush()
21 |         except Exception:
22 |             self.handleError(record)
23 | 


--------------------------------------------------------------------------------
/merlion/dashboard/utils/plot.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | import plotly
 8 | import plotly.graph_objects as go
 9 | from plotly.subplots import make_subplots
10 | from dash import dash_table, dcc
11 | from merlion.dashboard.settings import *
12 | 
13 | 
14 | def data_table(df, n=1000, page_size=10):
15 |     if df is not None:
16 |         df = df.head(n)
17 |         columns = [{"name": "Index", "id": "Index"}] + [{"name": c, "id": c} for c in df.columns]
18 |         data = []
19 |         for i in range(df.shape[0]):
20 |             d = {c: v for c, v in zip(df.columns, df.values[i])}
21 |             d.update({"Index": df.index[i]})
22 |             data.append(d)
23 | 
24 |         table = dash_table.DataTable(
25 |             id="table",
26 |             columns=columns,
27 |             data=data,
28 |             style_cell_conditional=[{"textAlign": "center"}],
29 |             style_table={"overflowX": "scroll"},
30 |             editable=False,
31 |             column_selectable="single",
32 |             page_action="native",
33 |             page_size=page_size,
34 |             page_current=0,
35 |             style_header=dict(backgroundColor=TABLE_HEADER_COLOR),
36 |             style_data=dict(backgroundColor=TABLE_DATA_COLOR),
37 |         )
38 |         return table
39 |     else:
40 |         return dash_table.DataTable()
41 | 
42 | 
43 | def plot_timeseries(ts, figure_height=500):
44 |     traces = []
45 |     color_list = plotly.colors.qualitative.Dark24
46 |     for i, col in enumerate(ts.columns):
47 |         v = ts[col]
48 |         if v.dtype in ["int", "float", "bool"]:
49 |             v = v.astype(float)
50 |             color = color_list[i % len(color_list)]
51 |             traces.append(go.Scatter(name=col, x=v.index, y=v.values.flatten(), mode="lines", line=dict(color=color)))
52 | 
53 |     layout = dict(
54 |         showlegend=True,
55 |         xaxis=dict(
56 |             title="Time",
57 |             type="date",
58 |             rangeselector=dict(
59 |                 buttons=list(
60 |                     [
61 |                         dict(count=7, label="1w", step="day", stepmode="backward"),
62 |                         dict(count=1, label="1m", step="month", stepmode="backward"),
63 |                         dict(count=6, label="6m", step="month", stepmode="backward"),
64 |                         dict(count=1, label="1y", step="year", stepmode="backward"),
65 |                         dict(step="all"),
66 |                     ]
67 |                 )
68 |             ),
69 |         ),
70 |     )
71 |     fig = make_subplots(figure=go.Figure(layout=layout))
72 |     fig.update_yaxes(title_text="Time Series")
73 |     for trace in traces:
74 |         fig.add_trace(trace)
75 |     fig.update_layout(
76 |         height=figure_height,
77 |         xaxis_rangeselector_font_color="white",
78 |         xaxis_rangeselector_activecolor="#0176D3",
79 |         xaxis_rangeselector_bgcolor="#1B96FF",
80 |         xaxis_rangeselector_font_family="Salesforce Sans",
81 |     )
82 |     return dcc.Graph(figure=fig)
83 | 


--------------------------------------------------------------------------------
/merlion/models/anomaly/__init__.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | """
 8 | Contains all anomaly detection models. Forecaster-based anomaly detection models
 9 | may be found in :py:mod:`merlion.models.anomaly.forecast_based`. Change-point detection models may be
10 | found in :py:mod:`merlion.models.anomaly.change_point`.
11 | 
12 | For anomaly detection, we define an abstract `DetectorBase` class which inherits from `ModelBase` and supports the
13 | following interface, in addition to ``model.save`` and ``DetectorClass.load`` defined for `ModelBase`:
14 | 
15 | 1.  ``model = DetectorClass(config)``
16 | 
17 |     - initialization with a model-specific config
18 |     - configs contain:
19 | 
20 |         -   a (potentially trainable) data pre-processing transform from :py:mod:`merlion.transform`;
21 |             note that ``model.transform`` is a property which refers to ``model.config.transform``
22 |         -   **a (potentially trainable) post-processing rule** from :py:mod:`merlion.post_process`;
23 |             note that ``model.post_rule`` is a property which refers to ``model.config.post_rule``.
24 |             In general, this post-rule will have two stages: :py:mod:`calibration <merlion.post_process.calibrate>`
25 |             and :py:mod:`thresholding <merlion.post_process.threshold>`.
26 |         -   booleans ``enable_calibrator`` and ``enable_threshold`` (both defaulting to ``True``) indicating
27 |             whether to enable calibration and thresholding in the post-rule.
28 |         -   model-specific hyperparameters
29 | 
30 | 2.  ``model.get_anomaly_score(time_series, time_series_prev=None)``
31 | 
32 |     -   returns a time series of anomaly scores for each timestamp in ``time_series``
33 |     -   ``time_series_prev`` (optional): the most recent context, only used for some models. If not provided, the
34 |         training data is used as the context instead.
35 | 
36 | 3.  ``model.get_anomaly_label(time_series, time_series_prev=None)``
37 | 
38 |     -   returns a time series of post-processed anomaly scores for each timestamp in ``time_series``. These scores
39 |         are calibrated to correspond to z-scores if ``enable_calibrator`` is ``True``, and they have also been filtered
40 |         by a thresholding rule (``model.threshold``) if ``enable_threshold`` is ``True``. ``threshold`` is specified
41 |         manually in the config (though it may be modified by `DetectorBase.train`), .
42 |     -   ``time_series_prev`` (optional): the most recent context, only used for some models. If not provided, the
43 |         training data is used as the context instead.
44 | 
45 | 4.  ``model.train(train_data, anomaly_labels=None, train_config=None, post_rule_train_config=None)``
46 | 
47 |     -   trains the model on the time series ``train_data``
48 |     -   ``anomaly_labels`` (optional): a time series aligned with ``train_data``, which indicates whether each
49 |         time stamp is anomalous
50 |     -   ``train_config`` (optional): extra configuration describing how the model should be trained. 
51 |         Not used for all models. Class-level default provided for models which do use it.
52 |     -   ``post_rule_train_config``: extra configuration describing how to train the model's post-rule. Class-level
53 |         default is provided for all models.
54 |     -   returns a time series of anomaly scores produced by the model on ``train_data``.
55 | """
56 | 


--------------------------------------------------------------------------------
/merlion/models/anomaly/change_point/__init__.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | """
 8 | Contains all change point detection algorithms. These models implement the anomaly detector interface, but
 9 | they are specialized for detecting change points in time series.
10 | """
11 | 


--------------------------------------------------------------------------------
/merlion/models/anomaly/forecast_based/__init__.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | """
 8 | Contains all forecaster-based anomaly detectors. These models support all functionality
 9 | of both anomaly detectors (:py:mod:`merlion.models.anomaly`) and forecasters
10 | (:py:mod:`merlion.models.forecast`).
11 | 
12 | Forecasting-based anomaly detectors are instances of an abstract `ForecastingDetectorBase`
13 | class. Many forecasting models support anomaly detection variants, where the anomaly score
14 | is based on the difference between the predicted and true time series value, and optionally
15 | the model's uncertainty in its own prediction.
16 | 
17 | Note that the model will detect anomalies in only one target univariate, though the underlying
18 | forecaster may model the full multivariate time series to predict said univariate.
19 | """
20 | 


--------------------------------------------------------------------------------
/merlion/models/anomaly/forecast_based/arima.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | """
 8 | Classic ARIMA (AutoRegressive Integrated Moving Average) forecasting model,
 9 | adapted for anomaly detection.
10 | """
11 | from merlion.models.anomaly.forecast_based.base import ForecastingDetectorBase
12 | from merlion.models.anomaly.base import DetectorConfig
13 | from merlion.models.forecast.arima import ArimaConfig, Arima
14 | from merlion.post_process.threshold import AggregateAlarms
15 | 
16 | 
17 | class ArimaDetectorConfig(ArimaConfig, DetectorConfig):
18 |     _default_threshold = AggregateAlarms(alm_threshold=2.5)
19 | 
20 | 
21 | class ArimaDetector(ForecastingDetectorBase, Arima):
22 |     config_class = ArimaDetectorConfig
23 | 


--------------------------------------------------------------------------------
/merlion/models/anomaly/forecast_based/ets.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | """
 8 | ETS (error, trend, seasonal) forecasting model, adapted for anomaly detection.
 9 | """
10 | from merlion.models.anomaly.base import NoCalibrationDetectorConfig
11 | from merlion.models.anomaly.forecast_based.base import ForecastingDetectorBase
12 | from merlion.models.forecast.ets import ETSConfig, ETS
13 | from merlion.post_process.threshold import AggregateAlarms
14 | 
15 | 
16 | class ETSDetectorConfig(ETSConfig, NoCalibrationDetectorConfig):
17 |     # Because the errors & residuals returned by ETS.train() are not
18 |     # representative of the test-time errors & residuals, ETSDetector inherits
19 |     # from NoCalibrationDetectorConfig and uses the model-predicted z-scores
20 |     # directly as anomaly scores.
21 |     _default_threshold = AggregateAlarms(alm_threshold=3.0)
22 | 
23 | 
24 | class ETSDetector(ForecastingDetectorBase, ETS):
25 |     config_class = ETSDetectorConfig
26 | 


--------------------------------------------------------------------------------
/merlion/models/anomaly/forecast_based/mses.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | """
 8 | MSES (Multi-Scale Exponential Smoother) forecasting model adapted for anomaly detection.
 9 | """
10 | import pandas as pd
11 | 
12 | from merlion.models.anomaly.forecast_based.base import ForecastingDetectorBase
13 | from merlion.models.anomaly.base import DetectorConfig
14 | from merlion.models.forecast.smoother import MSESConfig, MSES, MSESTrainConfig
15 | from merlion.post_process.threshold import AggregateAlarms
16 | from merlion.utils.time_series import TimeSeries
17 | 
18 | 
19 | class MSESDetectorConfig(MSESConfig, DetectorConfig):
20 |     """
21 |     Configuration class for an MSES forecasting model adapted for anomaly detection.
22 |     """
23 | 
24 |     _default_threshold = AggregateAlarms(alm_threshold=2)
25 | 
26 |     def __init__(self, max_forecast_steps: int, online_updates: bool = True, **kwargs):
27 |         super().__init__(max_forecast_steps=max_forecast_steps, **kwargs)
28 |         self.online_updates = online_updates
29 | 
30 | 
31 | class MSESDetector(ForecastingDetectorBase, MSES):
32 |     config_class = MSESDetectorConfig
33 | 
34 |     @property
35 |     def online_updates(self):
36 |         return self.config.online_updates
37 | 
38 |     @property
39 |     def _default_train_config(self):
40 |         return MSESTrainConfig(train_cadence=1 if self.online_updates else None)
41 | 
42 |     def get_anomaly_score(
43 |         self, time_series: TimeSeries, time_series_prev: TimeSeries = None, exog_data=None
44 |     ) -> TimeSeries:
45 |         if self.online_updates:
46 |             time_series, time_series_prev = self.transform_time_series(time_series, time_series_prev)
47 |             if time_series_prev is None:
48 |                 full_ts = time_series
49 |             else:
50 |                 full_ts = time_series_prev + time_series
51 |             forecast, err = self.update(full_ts.to_pd(), train_cadence=pd.to_timedelta(0))
52 |             forecast, err = [x.bisect(time_series.t0, t_in_left=False)[1] for x in [forecast, err]]
53 |             return TimeSeries.from_pd(self.forecast_to_anom_score(time_series, forecast, err))
54 |         else:
55 |             return super().get_anomaly_score(time_series, time_series_prev)
56 | 


--------------------------------------------------------------------------------
/merlion/models/anomaly/forecast_based/prophet.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | """
 8 | Adaptation of Facebook's Prophet forecasting model to anomaly detection.
 9 | """
10 | 
11 | from merlion.models.anomaly.forecast_based.base import ForecastingDetectorBase
12 | from merlion.models.anomaly.base import DetectorConfig
13 | from merlion.models.forecast.prophet import ProphetConfig, Prophet
14 | from merlion.post_process.threshold import AggregateAlarms
15 | 
16 | 
17 | class ProphetDetectorConfig(ProphetConfig, DetectorConfig):
18 |     _default_threshold = AggregateAlarms(alm_threshold=3)
19 | 
20 | 
21 | class ProphetDetector(ForecastingDetectorBase, Prophet):
22 |     config_class = ProphetDetectorConfig
23 | 


--------------------------------------------------------------------------------
/merlion/models/anomaly/forecast_based/sarima.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | """
 8 | Seasonal ARIMA (SARIMA) forecasting model, adapted for anomaly detection.
 9 | """
10 | from merlion.models.anomaly.base import DetectorConfig
11 | from merlion.models.anomaly.forecast_based.base import ForecastingDetectorBase
12 | from merlion.models.forecast.sarima import SarimaConfig, Sarima
13 | from merlion.post_process.threshold import AggregateAlarms
14 | 
15 | 
16 | class SarimaDetectorConfig(SarimaConfig, DetectorConfig):
17 |     _default_threshold = AggregateAlarms(alm_threshold=2.5)
18 | 
19 | 
20 | class SarimaDetector(ForecastingDetectorBase, Sarima):
21 |     config_class = SarimaDetectorConfig
22 | 


--------------------------------------------------------------------------------
/merlion/models/anomaly/isolation_forest.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | """
 8 | The classic isolation forest model for anomaly detection.
 9 | """
10 | import logging
11 | 
12 | import numpy as np
13 | import pandas as pd
14 | from sklearn.ensemble import IsolationForest as skl_IsolationForest
15 | 
16 | from merlion.models.anomaly.base import DetectorConfig, DetectorBase
17 | from merlion.transform.moving_average import DifferenceTransform
18 | from merlion.transform.sequence import TransformSequence
19 | from merlion.transform.resample import Shingle
20 | from merlion.utils import UnivariateTimeSeries, TimeSeries
21 | 
22 | logger = logging.getLogger(__name__)
23 | 
24 | 
25 | class IsolationForestConfig(DetectorConfig):
26 |     """
27 |     Configuration class for `IsolationForest`.
28 |     """
29 | 
30 |     _default_transform = TransformSequence([DifferenceTransform(), Shingle(size=2, stride=1)])
31 | 
32 |     def __init__(self, max_n_samples: int = None, n_estimators: int = 100, n_jobs=-1, **kwargs):
33 |         """
34 |         :param max_n_samples: Maximum number of samples to allow the isolation
35 |             forest to train on. Specify ``None`` to use all samples in the
36 |             training data.
37 |         :param n_estimators: number of trees in the isolation forest.
38 |         """
39 |         self.max_n_samples = 1.0 if max_n_samples is None else max_n_samples
40 |         self.n_estimators = n_estimators
41 |         self.n_jobs = n_jobs
42 |         # Isolation forest's uncalibrated scores are between 0 and 1
43 |         kwargs["max_score"] = 1.0
44 |         super().__init__(**kwargs)
45 | 
46 | 
47 | class IsolationForest(DetectorBase):
48 |     """
49 |     The classic isolation forest algorithm, proposed in
50 |     `Liu et al. 2008 <https://ieeexplore.ieee.org/document/4781136>`_
51 |     """
52 | 
53 |     config_class = IsolationForestConfig
54 | 
55 |     def __init__(self, config: IsolationForestConfig):
56 |         super().__init__(config)
57 |         self.model = skl_IsolationForest(
58 |             max_samples=config.max_n_samples, n_estimators=config.n_estimators, random_state=0, n_jobs=config.n_jobs
59 |         )
60 | 
61 |     @property
62 |     def require_even_sampling(self) -> bool:
63 |         return False
64 | 
65 |     @property
66 |     def require_univariate(self) -> bool:
67 |         return False
68 | 
69 |     def _train(self, train_data: pd.DataFrame, train_config=None) -> pd.DataFrame:
70 |         times, train_values = train_data.index, train_data.values
71 |         self.model.fit(train_values)
72 |         train_scores = -self.model.score_samples(train_values)
73 |         return pd.DataFrame(train_scores, index=times, columns=["anom_score"])
74 | 
75 |     def _get_anomaly_score(self, time_series: pd.DataFrame, time_series_prev: pd.DataFrame = None) -> pd.DataFrame:
76 |         # Return the negative of model's score, since model scores are in [-1, 0), where more negative = more anomalous
77 |         scores = -self.model.score_samples(np.array(time_series.values))
78 |         return pd.DataFrame(scores, index=time_series.index)
79 | 


--------------------------------------------------------------------------------
/merlion/models/anomaly/stat_threshold.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | """
 8 | Simple static thresholding model for anomaly detection.
 9 | """
10 | import pandas as pd
11 | 
12 | from merlion.models.base import NormalizingConfig
13 | from merlion.models.anomaly.base import DetectorConfig, DetectorBase
14 | from merlion.transform.moving_average import DifferenceTransform
15 | 
16 | 
17 | class StatThresholdConfig(DetectorConfig, NormalizingConfig):
18 |     """
19 |     Config class for `StatThreshold`.
20 |     """
21 | 
22 |     def __init__(self, target_seq_index: int = None, **kwargs):
23 |         """
24 |         :param target_seq_index (optional): The index of the univariate whose value we are considering thresholds of.
25 |             If not provided, the model only works for univariate data.
26 |         """
27 |         super().__init__(**kwargs)
28 |         self.target_seq_index = target_seq_index
29 | 
30 | 
31 | class StatThreshold(DetectorBase):
32 |     """
33 |     Anomaly detection based on a static threshold.
34 |     """
35 | 
36 |     config_class = StatThresholdConfig
37 | 
38 |     @property
39 |     def require_even_sampling(self) -> bool:
40 |         return False
41 | 
42 |     @property
43 |     def require_univariate(self) -> bool:
44 |         return self.config.target_seq_index is None
45 | 
46 |     def _train(self, train_data: pd.DataFrame, train_config=None) -> pd.DataFrame:
47 |         return pd.DataFrame(train_data.iloc[:, self.config.target_seq_index or 0])
48 | 
49 |     def _get_anomaly_score(self, time_series: pd.DataFrame, time_series_prev: pd.DataFrame = None) -> pd.DataFrame:
50 |         return pd.DataFrame(time_series.iloc[:, self.config.target_seq_index or 0])
51 | 


--------------------------------------------------------------------------------
/merlion/models/automl/__init__.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | """
 8 | Contains all AutoML model variants & some utilities.
 9 | """
10 | 


--------------------------------------------------------------------------------
/merlion/models/automl/search.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | """
 8 | Abstractions for hyperparameter search.
 9 | """
10 | from collections import OrderedDict
11 | import itertools
12 | from typing import Any, Dict, List
13 | 
14 | 
15 | class GridSearch:
16 |     """
17 |     Iterator over a grid of parameter values, skipping any restricted combinations of values.
18 |     """
19 | 
20 |     def __init__(self, param_values: Dict[str, List], restrictions: List[Dict[str, Any]] = None):
21 |         """
22 |         :param param_values: a dict mapping a set of parameter names to lists of values they can take on.
23 |         :param restrictions: a list of dicts indicating inadmissible combinations of parameter values.
24 |             For example, an ETS model has parameters error (add/mul), trend (add/mul/none), seasonal (add/mul),
25 |             and damped_trend (True/False). If we are only considering additive models, we would impose the restrictions
26 |             ``[{"error": "mul"}, {"trend": "mul"}, {"seasonal": "mul"}]``. Since a damped trend is only possible if
27 |             the model has a trend, we would add the restriction ``{"trend": None, "damped_trend": True}``.
28 |         """
29 |         self.param_values = param_values
30 |         self.restrictions = [] if restrictions is None else restrictions
31 | 
32 |     def __iter__(self):
33 |         for val_tuples in itertools.product(*(itertools.product([k], v) for k, v in self.param_values.items())):
34 |             val_dict = OrderedDict(val_tuples)
35 |             if not any(all(k in val_dict and val_dict[k] == v for k, v in r.items()) for r in self.restrictions):
36 |                 yield val_dict
37 | 


--------------------------------------------------------------------------------
/merlion/models/ensemble/__init__.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | """
 8 | Ensembles of models and automated model selection.
 9 | """
10 | 


--------------------------------------------------------------------------------
/merlion/models/forecast/__init__.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | """
 8 | Contains all forecasting models, including those which support
 9 | `exogenous regressors <tutorials/forecast/3_ForecastExogenous>`.
10 | 
11 | For forecasting, we define an abstract base `ForecasterBase` class which inherits from `ModelBase` and supports the
12 | following interface, in addition to ``model.save()`` and ``ForecasterClass.load`` defined for ``ModelBase``:
13 | 
14 | 1. ``model = ForecasterClass(config)``
15 | 
16 |     -   initialization with a model-specific config (which inherits from `ForecasterConfig`)
17 |     -   configs contain:
18 | 
19 |         -   a (potentially trainable) data pre-processing transform from :py:mod:`merlion.transform`;
20 |             note that ``model.transform`` is a property which refers to ``model.config.transform``
21 |         -   model-specific hyperparameters
22 |         -   **optionally, a maximum number of steps the model can forecast for**
23 | 
24 | 2. ``model.forecast(time_stamps, time_series_prev=None)``
25 | 
26 |     - returns the forecast (`TimeSeries`) for future values at the time stamps specified by ``time_stamps``,
27 |       as well as the standard error of that forecast (`TimeSeries`, may be ``None``)
28 |     - if ``time_series_prev`` is specified, it is used as the most recent context. Otherwise, the training data is used
29 | 
30 | 3.  ``model.train(train_data, train_config=None)``
31 | 
32 |     -   trains the model on the `TimeSeries` ``train_data``
33 |     -   ``train_config`` (optional): extra configuration describing how the model should be trained. 
34 |         Not used for all models. Class-level default provided for models which do use it.
35 |     -   returns the model's prediction ``train_data``, in the same format as if you called `ForecasterBase.forecast`
36 |         on the time stamps of ``train_data``
37 | """
38 | 


--------------------------------------------------------------------------------
/merlion/models/forecast/arima.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | """
 8 | The classic statistical forecasting model ARIMA (AutoRegressive Integrated
 9 | Moving Average).
10 | """
11 | import logging
12 | from typing import Tuple
13 | 
14 | from merlion.models.forecast.sarima import SarimaConfig, Sarima
15 | from merlion.transform.resample import TemporalResample
16 | 
17 | logger = logging.getLogger(__name__)
18 | 
19 | 
20 | class ArimaConfig(SarimaConfig):
21 |     """
22 |     Configuration class for `Arima`. Just a `Sarima` model with seasonal order ``(0, 0, 0, 0)``.
23 |     """
24 | 
25 |     _default_transform = TemporalResample(granularity=None, trainable_granularity=True)
26 | 
27 |     def __init__(self, order=(4, 1, 2), seasonal_order=(0, 0, 0, 0), **kwargs):
28 |         """
29 |         :param seasonal_order: (0, 0, 0, 0) because ARIMA has no seasonal order.
30 |         """
31 |         super().__init__(order=order, seasonal_order=seasonal_order, **kwargs)
32 | 
33 |     @property
34 |     def seasonal_order(self) -> Tuple[int, int, int, int]:
35 |         """
36 |         :return: (0, 0, 0, 0) because ARIMA has no seasonal order.
37 |         """
38 |         return 0, 0, 0, 0
39 | 
40 |     @seasonal_order.setter
41 |     def seasonal_order(self, seasonal_order: Tuple[int, int, int, int]):
42 |         assert tuple(seasonal_order) == (0, 0, 0, 0), "Seasonal order must be (0, 0, 0, 0) for ARIMA"
43 | 
44 | 
45 | class Arima(Sarima):
46 |     """
47 |     Implementation of the classic statistical model ARIMA (AutoRegressive Integrated Moving Average) for forecasting.
48 |     """
49 | 
50 |     config_class = ArimaConfig
51 | 


--------------------------------------------------------------------------------
/merlion/models/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | """
 8 | Contains various utility files & functions useful for different models.
 9 | """
10 | 


--------------------------------------------------------------------------------
/merlion/models/utils/early_stopping.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | """
 8 | Earlying Stopping  
 9 | """
10 | import logging
11 | 
12 | try:
13 |     import torch
14 |     import torch.nn as nn
15 |     import torch.nn.functional as F
16 | except ImportError as e:
17 |     err = (
18 |         "Try installing Merlion with optional dependencies using `pip install salesforce-merlion[deep-learning]` or "
19 |         "`pip install `salesforce-merlion[all]`"
20 |     )
21 |     raise ImportError(str(e) + ". " + err)
22 | 
23 | import numpy as np
24 | 
25 | 
26 | logger = logging.getLogger(__name__)
27 | 
28 | 
29 | class EarlyStopping:
30 |     """
31 |     Early stopping for deep model training
32 |     """
33 | 
34 |     def __init__(self, patience=7, delta=0):
35 |         """
36 |         :param patience: Number of epochs with no improvement after which training will be stopped.
37 |         :param delta: Minimum change in the monitored quantity to qualify as an improvement,
38 |             i.e. an absolute change of less than min_delta, will count as no improvement.
39 |         """
40 | 
41 |         self.patience = patience
42 |         self.counter = 0
43 |         self.best_score = None
44 |         self.early_stop = False
45 |         self.val_loss_min = np.Inf
46 |         self.delta = delta
47 |         self.best_model_state_dict = None
48 | 
49 |     def __call__(self, val_loss, model):
50 |         score = -val_loss
51 |         if self.best_score is None:
52 |             self.best_score = score
53 |             self.save_best_state_and_dict(val_loss, model)
54 |         elif score < self.best_score + self.delta:
55 |             self.counter += 1
56 |             logger.info(f"EarlyStopping counter: {self.counter} out of {self.patience}")
57 |             if self.counter >= self.patience:
58 |                 self.early_stop = True
59 |         else:
60 |             self.best_score = score
61 |             self.save_best_state_and_dict(val_loss, model)
62 |             self.counter = 0
63 | 
64 |     def save_best_state_and_dict(self, val_loss, model):
65 |         self.best_model_state_dict = model.state_dict()
66 | 
67 |         self.val_loss_min = val_loss
68 | 
69 |     def load_best_model(self, model):
70 |         model.load_state_dict(self.best_model_state_dict)
71 | 


--------------------------------------------------------------------------------
/merlion/models/utils/nn_modules/__init__.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | from .blocks import (
 8 |     AutoCorrelation,
 9 |     SeasonalLayernorm,
10 |     SeriesDecomposeBlock,
11 |     MovingAverageBlock,
12 |     FullAttention,
13 |     ProbAttention,
14 | )
15 | from .layers import AutoCorrelationLayer, ConvLayer, AttentionLayer
16 | 
17 | from .embed import DataEmbedding, DataEmbeddingWoPos, ETSEmbedding
18 | 


--------------------------------------------------------------------------------
/merlion/post_process/base.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | """
 8 | Base class for post-processing rules in Merlion.
 9 | """
10 | from abc import abstractmethod
11 | from copy import copy, deepcopy
12 | import inspect
13 | 
14 | from merlion.utils import TimeSeries
15 | from merlion.utils.misc import AutodocABCMeta
16 | 
17 | 
18 | class PostRuleBase(metaclass=AutodocABCMeta):
19 |     """
20 |     Base class for post-processing rules in Merlion. These objects are primarily
21 |     for post-processing the sequence of anomaly scores returned by anomaly detection
22 |     models. All post-rules are callable objects, and they have a ``train()`` method
23 |     which may accept additional implementation-specific keyword arguments.
24 |     """
25 | 
26 |     def to_dict(self):
27 |         params = inspect.signature(self.__init__).parameters
28 |         d = {k: deepcopy(getattr(self, k)) for k in params}
29 |         d["name"] = type(self).__name__
30 |         return d
31 | 
32 |     @classmethod
33 |     def from_dict(cls, state_dict):
34 |         state_dict = copy(state_dict)
35 |         state_dict.pop("name", None)
36 |         return cls(**state_dict)
37 | 
38 |     def __copy__(self):
39 |         return self.from_dict(self.to_dict())
40 | 
41 |     def __deepcopy__(self, memodict={}):
42 |         return self.__copy__()
43 | 
44 |     def __repr__(self):
45 |         kwargs = self.to_dict()
46 |         name = kwargs.pop("name")
47 |         kwargs_str = ", ".join(f"{k}={v}" for k, v in sorted(kwargs.items()))
48 |         return f"{name}({kwargs_str})"
49 | 
50 |     @abstractmethod
51 |     def train(self, anomaly_scores: TimeSeries):
52 |         raise NotImplementedError
53 | 
54 |     @abstractmethod
55 |     def __call__(self, anomaly_scores: TimeSeries):
56 |         raise NotImplementedError
57 | 


--------------------------------------------------------------------------------
/merlion/post_process/factory.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | """
 8 | Contains the `PostRuleFactory`.
 9 | """
10 | from typing import Type
11 | from merlion.post_process.base import PostRuleBase
12 | from merlion.utils import dynamic_import
13 | 
14 | import_alias = dict(
15 |     Threshold="merlion.post_process.threshold:Threshold",
16 |     AggregateAlarms="merlion.post_process.threshold:AggregateAlarms",
17 |     AdaptiveThreshold="merlion.post_process.threshold:AdaptiveThreshold",
18 |     AdaptiveAggregateAlarms="merlion.post_process.threshold:AdaptiveAggregateAlarms",
19 |     AnomScoreCalibrator="merlion.post_process.calibrate:AnomScoreCalibrator",
20 |     PostRuleSequence="merlion.post_process.sequence:PostRuleSequence",
21 | )
22 | 
23 | 
24 | class PostRuleFactory(object):
25 |     @classmethod
26 |     def get_post_rule_class(cls, name: str) -> Type[PostRuleBase]:
27 |         return dynamic_import(name, import_alias)
28 | 
29 |     @classmethod
30 |     def create(cls, name: str, **kwargs) -> PostRuleBase:
31 |         """
32 |         Uses the given ``kwargs`` to create a post-rule of the given name
33 |         """
34 |         post_rule_class = cls.get_post_rule_class(name)
35 |         return post_rule_class.from_dict(kwargs)
36 | 


--------------------------------------------------------------------------------
/merlion/post_process/sequence.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | """
 8 | Class to compose a sequence of post-rules into a single post-rule.
 9 | """
10 | import inspect
11 | from typing import Iterable
12 | 
13 | from merlion.post_process.base import PostRuleBase
14 | from merlion.post_process.factory import PostRuleFactory
15 | from merlion.utils import TimeSeries
16 | 
17 | 
18 | class PostRuleSequence(PostRuleBase):
19 |     def __init__(self, post_rules: Iterable):
20 |         self.post_rules = list(post_rules)
21 | 
22 |     def train(self, anomaly_scores: TimeSeries, **kwargs) -> TimeSeries:
23 |         for post_rule in self.post_rules:
24 |             params = inspect.signature(post_rule.train).parameters
25 |             if not any(v.kind.name == "VAR_KEYWORD" for v in params.values()):
26 |                 local_kwargs = {k: v for k, v in kwargs.items() if k in params}
27 |             anomaly_scores = post_rule.train(anomaly_scores, **local_kwargs)
28 |         return anomaly_scores
29 | 
30 |     def __call__(self, anomaly_scores: TimeSeries) -> TimeSeries:
31 |         for post_rule in self.post_rules:
32 |             anomaly_scores = post_rule(anomaly_scores)
33 |         return anomaly_scores
34 | 
35 |     def to_dict(self):
36 |         return {"name": type(self).__name__, "post_rules": [p.to_dict() for p in self.post_rules]}
37 | 
38 |     @classmethod
39 |     def from_dict(cls, state_dict):
40 |         post_rules = [
41 |             d if isinstance(d, PostRuleBase) else PostRuleFactory.create(**d) for d in state_dict["post_rules"]
42 |         ]
43 |         return cls(post_rules)
44 | 
45 |     def __repr__(self):
46 |         return "PostRuleSequence(\n " + ",\n ".join([repr(f) for f in self.post_rules]) + "\n)"
47 | 


--------------------------------------------------------------------------------
/merlion/resources/gson-2.8.9.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/salesforce/Merlion/085ef8a69e5dcdfb9dcaa394cc21e087cccbb8f0/merlion/resources/gson-2.8.9.jar


--------------------------------------------------------------------------------
/merlion/resources/randomcutforest-core-1.0.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/salesforce/Merlion/085ef8a69e5dcdfb9dcaa394cc21e087cccbb8f0/merlion/resources/randomcutforest-core-1.0.jar


--------------------------------------------------------------------------------
/merlion/resources/randomcutforest-serialization-json-1.0.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/salesforce/Merlion/085ef8a69e5dcdfb9dcaa394cc21e087cccbb8f0/merlion/resources/randomcutforest-serialization-json-1.0.jar


--------------------------------------------------------------------------------
/merlion/transform/bound.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | """
 8 | Transforms that clip the input.
 9 | """
10 | 
11 | from collections import OrderedDict
12 | import logging
13 | import numpy as np
14 | 
15 | from merlion.transform.base import TransformBase
16 | from merlion.utils import UnivariateTimeSeries, TimeSeries
17 | 
18 | logger = logging.getLogger(__name__)
19 | 
20 | 
21 | class LowerUpperClip(TransformBase):
22 |     """
23 |     Clips the values of a time series to lie between lower and upper.
24 |     """
25 | 
26 |     def __init__(self, lower=None, upper=None):
27 |         super().__init__()
28 |         assert not (lower is None and upper is None), "Must provide at least one of lower or upper"
29 |         if lower is not None and upper is not None:
30 |             assert lower < upper
31 |         self.lower = lower
32 |         self.upper = upper
33 | 
34 |     @property
35 |     def requires_inversion_state(self):
36 |         """
37 |         ``False`` because "inverting" value clipping is stateless.
38 |         """
39 |         return False
40 | 
41 |     def train(self, time_series: TimeSeries):
42 |         pass
43 | 
44 |     def __call__(self, time_series: TimeSeries) -> TimeSeries:
45 |         new_vars = OrderedDict()
46 |         for name, var in time_series.items():
47 |             x = np.clip(var.np_values, self.lower, self.upper)
48 |             new_vars[name] = UnivariateTimeSeries(var.index, x)
49 | 
50 |         return TimeSeries(new_vars)
51 | 


--------------------------------------------------------------------------------
/merlion/transform/factory.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | """
 8 | Contains the `TransformFactory` for instantiating transforms.
 9 | """
10 | 
11 | from typing import Type
12 | from merlion.transform.base import TransformBase
13 | from merlion.utils import dynamic_import
14 | 
15 | 
16 | import_alias = dict(
17 |     Identity="merlion.transform.base:Identity",
18 |     MovingAverage="merlion.transform.moving_average:MovingAverage",
19 |     ExponentialMovingAverage="merlion.transform.moving_average:ExponentialMovingAverage",
20 |     DifferenceTransform="merlion.transform.moving_average:DifferenceTransform",
21 |     LagTransform="merlion.transform.moving_average:LagTransform",
22 |     LowerUpperClip="merlion.transform.bound:LowerUpperClip",
23 |     Rescale="merlion.transform.normalize:Rescale",
24 |     AbsVal="merlion.transform.normalize:AbsVal",
25 |     MeanVarNormalize="merlion.transform.normalize:MeanVarNormalize",
26 |     MinMaxNormalize="merlion.transform.normalize:MinMaxNormalize",
27 |     BoxCoxTransform="merlion.transform.normalize:BoxCoxTransform",
28 |     TemporalResample="merlion.transform.resample:TemporalResample",
29 |     Shingle="merlion.transform.resample:Shingle",
30 |     TransformSequence="merlion.transform.sequence:TransformSequence",
31 |     TransformStack="merlion.transform.sequence:TransformStack",
32 |     InvertibleTransformSequence="merlion.transform.sequence:InvertibleTransformSequence",
33 | )
34 | 
35 | 
36 | class TransformFactory(object):
37 |     @classmethod
38 |     def get_transform_class(cls, name: str) -> Type[TransformBase]:
39 |         return dynamic_import(name, import_alias)
40 | 
41 |     @classmethod
42 |     def create(cls, name: str, **kwargs) -> TransformBase:
43 |         transform_class = cls.get_transform_class(name)
44 |         return transform_class.from_dict(kwargs)
45 | 


--------------------------------------------------------------------------------
/merlion/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | from merlion.utils.misc import dynamic_import
 8 | from merlion.utils.resample import to_pd_datetime, to_timestamp
 9 | from merlion.utils.time_series import UnivariateTimeSeries, TimeSeries
10 | 


--------------------------------------------------------------------------------
/merlion_logo.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <svg viewBox="0 0 475 125" xmlns="http://www.w3.org/2000/svg">
 3 |   <defs>
 4 |     <mask id="merlion">
 5 |       <circle style="fill: rgb(255, 255, 255); fill-rule: evenodd;" cx="1059.161" cy="-552.178" r="436.487"/>
 6 |       <path d="M 1065.97 743.828 C 1065.97 743.828 1074.82 756.641 1104.97 742.152 C 1104.97 742.152 1104.79 726.934 1076.8 726.191 C 1048.82 725.449 1060.55 729.531 1060.55 729.531 L 1065.97 743.828 Z M 1019.692 105.788 C 1186.644 50.493 1280.918 121.345 1361.865 159.466 C 1413.367 183.72 1460.719 212.499 1408.14 290.586 C 1403.2 325.277 1370.85 326.828 1350.36 434.133 C 1364.9 434.266 1375.96 433.473 1392.43 432.742 C 1390.98 437.477 1385.78 445.145 1380.83 453.422 L 1380.14 453.043 C 1367.93 475.609 1359.83 506.273 1354.92 530.113 L 1354.86 530.352 L 1354.5 532.121 L 1354.48 532.223 C 1350.48 551.68 1348.61 566.539 1348.38 568.461 L 1348.37 568.465 L 1348.36 568.617 L 1348.35 568.672 C 1347.4 575.406 1346.8 581.598 1346.46 586.926 L 1379.69 579.527 C 1351.5 668.84 1366.73 719.148 1303.68 775.535 L 1310.32 803.211 C 1268.22 798.031 1216.68 835.66 1141.58 820.383 C 1115.8 806.504 1111.68 815.742 1081.47 781.215 C 1061.97 758.918 1038.42 764.828 1031.22 731.887 C 999.563 730.07 962.398 727.082 964.949 691.66 C 965.844 679.148 967.336 648.309 971.137 640.098 C 979.477 622.129 989.816 630.41 1007.63 624.125 C 1010.59 627.766 1013.58 630.992 1016.61 633.848 L 1024.87 620.465 C 1024.87 620.465 1028.61 635.293 1040.57 647.945 L 1041.32 649.398 C 1093.13 667.285 1134.97 596.297 1062.68 588.473 L 1045.56 588.574 C 1035.25 596.699 1032.21 608.77 1032.21 608.77 L 1026.25 599.117 L 1006.96 588.813 C 1006.71 567.852 999.313 563.59 1003.22 542.496 C 1019.03 535.438 1036.63 534.711 1058.25 534.066 C 974.945 346.973 912.406 521.961 1006.97 322.227 C 1041.41 246.652 1037.3 179.41 1024.37 124.531 M 962.121 595.965 C 962.121 595.965 749.273 571.44 619.827 426.444 C 640.883 383.102 637.567 365.548 665.799 354.572 C 788.567 533.219 960.399 597.222 956.138 594.244" style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none'" transform="matrix(1, 0, 0, -1, 0, 0)"/>
 7 |     </mask>
 8 |   </defs>
 9 |   <g id="g10" transform="matrix(0.13, 0, 0, 0.13, -80.57, 135.32)">
10 |     <path d="m 2274.48,441.102 -0.96,337.796 h -64.27 L 2084.84,568.984 1958.48,778.898 h -64.66 V 441.102 h 73.35 v 198.336 l 98.85,-162.625 h 35.22 l 99.33,166.964 0.48,-202.675 z M 4122.79,778.898 V 573.805 l -167.93,205.093 h -64.67 V 441.102 h 77.21 V 646.191 L 4135.82,441.102 H 4200 V 778.898 Z M 3646.5,435.309 c 106.16,0 184.82,73.832 184.82,174.691 0,100.859 -78.66,174.688 -184.82,174.688 -106.65,0 -184.83,-74.313 -184.83,-174.688 0,-100.375 78.18,-174.691 184.83,-174.691 z m 0,66.593 c -60.32,0 -105.68,43.914 -105.68,108.098 0,64.18 45.36,108.094 105.68,108.094 60.32,0 105.68,-43.914 105.68,-108.094 0,-64.184 -45.36,-108.098 -105.68,-108.098 z m -321.87,-60.8 h 78.17 v 337.796 h -78.17 z m -291.47,0 h 247.55 v 63.699 h -169.38 v 274.097 h -78.17 z m -55.5,0 -75.76,108.574 c 43.92,18.82 69.49,55.98 69.49,106.656 0,75.754 -56.46,122.566 -146.7,122.566 H 2678.47 V 441.102 h 78.18 v 94.097 h 68.04 3.86 l 65.15,-94.097 z m -85.41,215.23 c 0,-36.684 -24.13,-58.879 -71.9,-58.879 h -63.7 v 117.746 h 63.7 c 47.77,0 71.9,-21.715 71.9,-58.867 z m -459.89,-152.5 v 78.18 h 156.84 v 60.801 h -156.84 v 73.351 h 177.59 v 62.734 H 2354.67 V 441.102 h 261.55 v 62.73 h -183.86" style="fill:#02144a;fill-opacity:1;fill-rule:evenodd;stroke:#ffffff;stroke-opacity:1;stroke-width:4;shape-rendering='geometricPrecision'" id="path14" transform="matrix(1, 0, 0, -1, -160, 35)"/>
11 |     <circle style="fill: rgb(0, 161, 224); fill-rule: evenodd;" cx="1059.161" cy="-552.178" r="436.487" mask="url(#merlion)"/>
12 |   </g>
13 | </svg>


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | log_format = %(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s
3 | log_date_format = %Y-%m-%d %H:%M:%S
4 | log_cli=true
5 | log_cli_level=INFO
6 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | from setuptools import setup, find_namespace_packages
 8 | 
 9 | MERLION_JARS = [
10 |     "resources/gson-2.8.9.jar",
11 |     "resources/randomcutforest-core-1.0.jar",
12 |     "resources/randomcutforest-serialization-json-1.0.jar",
13 | ]
14 | 
15 | MERLION_DASHBOARD_ASSETS = [
16 |     "dashboard/assets/fonts/SalesforceSans-Bold.woff",
17 |     "dashboard/assets/fonts/SalesforceSans-BoldItalic.woff",
18 |     "dashboard/assets/fonts/SalesforceSans-Italic.woff",
19 |     "dashboard/assets/fonts/SalesforceSans-Light.woff",
20 |     "dashboard/assets/fonts/SalesforceSans-LightItalic.woff",
21 |     "dashboard/assets/fonts/SalesforceSans-Regular.woff",
22 |     "dashboard/assets/fonts/SalesforceSans-Thin.woff",
23 |     "dashboard/assets/fonts/SalesforceSans-ThinItalic.woff",
24 |     "dashboard/assets/Acumin-BdPro.otf",
25 |     "dashboard/assets/base.css",
26 |     "dashboard/assets/merlion.css",
27 |     "dashboard/assets/merlion_small.svg",
28 |     "dashboard/assets/modal.css",
29 |     "dashboard/assets/resizing.js",
30 |     "dashboard/assets/styles.css",
31 |     "dashboard/assets/upload.svg",
32 | ]
33 | 
34 | # optional dependencies
35 | extra_require = {
36 |     "dashboard": ["dash[diskcache]>=2.4", "dash_bootstrap_components>=1.0", "diskcache"],
37 |     "deep-learning": ["torch>=1.9.0", "einops>=0.4.0"],
38 |     "spark": ["pyspark[sql]>=3"],
39 | }
40 | extra_require["all"] = sum(extra_require.values(), [])
41 | 
42 | 
43 | def read_file(fname):
44 |     with open(fname, "r", encoding="utf-8") as f:
45 |         return f.read()
46 | 
47 | 
48 | setup(
49 |     name="salesforce-merlion",
50 |     version="2.0.2",
51 |     author=", ".join(read_file("AUTHORS.md").split("\n")),
52 |     author_email="abhatnagar@salesforce.com",
53 |     description="Merlion: A Machine Learning Framework for Time Series Intelligence",
54 |     long_description=read_file("README.md"),
55 |     long_description_content_type="text/markdown",
56 |     keywords="time series, forecasting, anomaly detection, machine learning, autoML, "
57 |     "ensemble learning, benchmarking, Python, scientific toolkit",
58 |     url="https://github.com/salesforce/Merlion",
59 |     license="3-Clause BSD",
60 |     packages=find_namespace_packages(include="merlion.*"),
61 |     package_dir={"merlion": "merlion"},
62 |     package_data={"merlion": MERLION_JARS + MERLION_DASHBOARD_ASSETS},
63 |     install_requires=[
64 |         "cython",
65 |         "dill",
66 |         "GitPython",
67 |         "py4j",
68 |         "matplotlib",
69 |         "plotly>=4.13",
70 |         "numpy>=1.21,<2.0",  # 1.21 remediates a security risk
71 |         "packaging",
72 |         "pandas>=1.1.0",  # >=1.1.0 for origin kwarg to df.resample()
73 |         "prophet>=1.1",  # 1.1 removes dependency on pystan
74 |         "scikit-learn>=0.22",  # >=0.22 for changes to isolation forest algorithm
75 |         "scipy>=1.6.0",  # 1.6.0 adds multivariate_t density to scipy.stats
76 |         "statsmodels>=0.12.2",
77 |         "lightgbm",  # if running at MacOS, need OpenMP: "brew install libomp"
78 |         "tqdm",
79 |     ],
80 |     extras_require=extra_require,
81 |     python_requires=">=3.7.0",
82 |     zip_safe=False,
83 | )
84 | 


--------------------------------------------------------------------------------
/tests/anomaly/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2023 salesforce.com, inc.
3 | # All rights reserved.
4 | # SPDX-License-Identifier: BSD-3-Clause
5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
6 | #
7 | 


--------------------------------------------------------------------------------
/tests/anomaly/forecast_based/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2023 salesforce.com, inc.
3 | # All rights reserved.
4 | # SPDX-License-Identifier: BSD-3-Clause
5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
6 | #
7 | 


--------------------------------------------------------------------------------
/tests/anomaly/multivariate/test_autoencoder.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | import sys
 8 | import logging
 9 | import unittest
10 | import torch
11 | import random
12 | import numpy as np
13 | import pandas as pd
14 | from os.path import abspath, dirname, join
15 | from merlion.utils import TimeSeries
16 | from ts_datasets.anomaly import *
17 | from merlion.models.anomaly.autoencoder import AutoEncoder
18 | 
19 | rootdir = dirname(dirname(dirname(dirname(abspath(__file__)))))
20 | logger = logging.getLogger(__name__)
21 | 
22 | 
23 | def set_random_seeds():
24 |     torch.manual_seed(12345)
25 |     random.seed(12345)
26 |     np.random.seed(12345)
27 | 
28 | 
29 | def get_train_test_splits(df: pd.DataFrame, metadata: pd.DataFrame, n: int) -> (pd.DataFrame, pd.DataFrame, np.ndarray):
30 |     train_df = df[metadata.trainval]
31 |     test_df = df[~metadata.trainval]
32 |     test_labels = metadata[~metadata.trainval].anomaly.values
33 |     return train_df.tail(n), test_df.head(n), test_labels[:n]
34 | 
35 | 
36 | class TestAutoEncoder(unittest.TestCase):
37 |     def __init__(self, *args, **kwargs):
38 |         super().__init__(*args, **kwargs)
39 |         set_random_seeds()
40 | 
41 |         self.model = AutoEncoder(config=AutoEncoder.config_class(num_epochs=10))
42 |         self.dataset = MSL(rootdir=join(rootdir, "data", "smap"))
43 |         df, metadata = self.dataset[0]
44 |         self.train_df, self.test_df, self.test_labels = get_train_test_splits(df, metadata, 2000)
45 | 
46 |         logger.info("Training model...\n")
47 |         train_ts = TimeSeries.from_pd(self.train_df)
48 |         self.model.train(train_ts)
49 | 
50 |     def test_score(self):
51 |         print("-" * 80)
52 |         logger.info("test_score\n" + "-" * 80 + "\n")
53 |         test_ts = TimeSeries.from_pd(self.test_df)
54 |         score_ts = self.model.get_anomaly_score(test_ts)
55 |         scores = score_ts.to_pd().values.flatten()
56 |         min_score, max_score, sum_score = min(scores), max(scores), sum(scores)
57 | 
58 |         logger.info(f"scores look like: {scores[:10]}")
59 |         logger.info(f"min score = {min_score}")
60 |         logger.info(f"max score = {max_score}")
61 |         logger.info(f"sum score = {sum_score}")
62 | 
63 |     def test_save_load(self):
64 |         print("-" * 80)
65 |         logger.info("test_save_load\n" + "-" * 80 + "\n")
66 |         self.model.save(dirname=join(rootdir, "tmp", "ae"))
67 |         loaded_model = AutoEncoder.load(dirname=join(rootdir, "tmp", "ae"))
68 | 
69 |         test_ts = TimeSeries.from_pd(self.test_df)
70 |         scores = self.model.get_anomaly_score(test_ts)
71 |         loaded_model_scores = loaded_model.get_anomaly_score(test_ts)
72 |         self.assertSequenceEqual(list(scores), list(loaded_model_scores))
73 | 
74 |         alarms = self.model.get_anomaly_label(test_ts)
75 |         loaded_model_alarms = loaded_model.get_anomaly_label(test_ts)
76 |         self.assertSequenceEqual(list(alarms), list(loaded_model_alarms))
77 | 
78 | 
79 | if __name__ == "__main__":
80 |     logging.basicConfig(
81 |         format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", stream=sys.stdout, level=logging.DEBUG
82 |     )
83 |     unittest.main()
84 | 


--------------------------------------------------------------------------------
/tests/anomaly/multivariate/test_dagmm.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | import sys
 8 | import logging
 9 | import unittest
10 | import torch
11 | import random
12 | import numpy as np
13 | import pandas as pd
14 | from os.path import abspath, dirname, join
15 | from merlion.utils import TimeSeries
16 | from ts_datasets.anomaly import *
17 | from merlion.models.anomaly.dagmm import DAGMM
18 | 
19 | rootdir = dirname(dirname(dirname(dirname(abspath(__file__)))))
20 | logger = logging.getLogger(__name__)
21 | 
22 | 
23 | def set_random_seeds():
24 |     torch.manual_seed(12345)
25 |     random.seed(12345)
26 |     np.random.seed(12345)
27 | 
28 | 
29 | def get_train_test_splits(df: pd.DataFrame, metadata: pd.DataFrame, n: int) -> (pd.DataFrame, pd.DataFrame, np.ndarray):
30 |     train_df = df[metadata.trainval]
31 |     test_df = df[~metadata.trainval]
32 |     test_labels = metadata[~metadata.trainval].anomaly.values
33 |     return train_df.tail(n), test_df.head(n), test_labels[:n]
34 | 
35 | 
36 | class TestDAGMM(unittest.TestCase):
37 |     def __init__(self, *args, **kwargs):
38 |         super().__init__(*args, **kwargs)
39 |         set_random_seeds()
40 | 
41 |         self.model = DAGMM(config=DAGMM.config_class(num_epochs=10))
42 |         self.dataset = MSL(rootdir=join(rootdir, "data", "smap"))
43 |         df, metadata = self.dataset[0]
44 |         self.train_df, self.test_df, self.test_labels = get_train_test_splits(df, metadata, 500)
45 | 
46 |         logger.info("Training model...\n")
47 |         train_ts = TimeSeries.from_pd(self.train_df)
48 |         self.model.train(train_ts)
49 | 
50 |         logger.info("Training multiple timeseries model...\n")
51 |         self.model.train_multiple([train_ts] * 10)
52 | 
53 |     def test_score(self):
54 |         print("-" * 80)
55 |         logger.info("test_score\n" + "-" * 80 + "\n")
56 |         test_ts = TimeSeries.from_pd(self.test_df)
57 |         score_ts = self.model.get_anomaly_score(test_ts)
58 |         scores = score_ts.to_pd().values.flatten()
59 |         min_score, max_score, sum_score = min(scores), max(scores), sum(scores)
60 | 
61 |         logger.info(f"scores look like: {scores[:10]}")
62 |         logger.info(f"min score = {min_score}")
63 |         logger.info(f"max score = {max_score}")
64 |         logger.info(f"sum score = {sum_score}")
65 | 
66 |     def test_save_load(self):
67 |         print("-" * 80)
68 |         logger.info("test_save_load\n" + "-" * 80 + "\n")
69 |         self.model.save(dirname=join(rootdir, "tmp", "dagmm"))
70 |         loaded_model = DAGMM.load(dirname=join(rootdir, "tmp", "dagmm"))
71 | 
72 |         test_ts = TimeSeries.from_pd(self.test_df)
73 |         scores = self.model.get_anomaly_score(test_ts)
74 |         loaded_model_scores = loaded_model.get_anomaly_score(test_ts)
75 |         self.assertSequenceEqual(list(scores), list(loaded_model_scores))
76 | 
77 |         alarms = self.model.get_anomaly_label(test_ts)
78 |         loaded_model_alarms = loaded_model.get_anomaly_label(test_ts)
79 |         self.assertSequenceEqual(list(alarms), list(loaded_model_alarms))
80 | 
81 | 
82 | if __name__ == "__main__":
83 |     logging.basicConfig(
84 |         format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", stream=sys.stdout, level=logging.DEBUG
85 |     )
86 |     unittest.main()
87 | 


--------------------------------------------------------------------------------
/tests/anomaly/multivariate/test_lstmed.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | import sys
 8 | import logging
 9 | import unittest
10 | import torch
11 | import random
12 | import numpy as np
13 | import pandas as pd
14 | from os.path import abspath, dirname, join
15 | from merlion.utils import TimeSeries
16 | from ts_datasets.anomaly import *
17 | from merlion.models.anomaly.lstm_ed import LSTMED
18 | 
19 | rootdir = dirname(dirname(dirname(dirname(abspath(__file__)))))
20 | logger = logging.getLogger(__name__)
21 | 
22 | 
23 | def set_random_seeds():
24 |     torch.manual_seed(12345)
25 |     random.seed(12345)
26 |     np.random.seed(12345)
27 | 
28 | 
29 | def get_train_test_splits(df: pd.DataFrame, metadata: pd.DataFrame, n: int) -> (pd.DataFrame, pd.DataFrame, np.ndarray):
30 |     train_df = df[metadata.trainval]
31 |     test_df = df[~metadata.trainval]
32 |     test_labels = metadata[~metadata.trainval].anomaly.values
33 |     return train_df.tail(n), test_df.head(n), test_labels[:n]
34 | 
35 | 
36 | class TestLSTMED(unittest.TestCase):
37 |     def __init__(self, *args, **kwargs):
38 |         super().__init__(*args, **kwargs)
39 |         set_random_seeds()
40 | 
41 |         self.model = LSTMED(config=LSTMED.config_class(num_epochs=5))
42 |         self.dataset = MSL(rootdir=join(rootdir, "data", "smap"))
43 |         df, metadata = self.dataset[0]
44 |         self.train_df, self.test_df, self.test_labels = get_train_test_splits(df, metadata, 1000)
45 | 
46 |         logger.info("Training model...\n")
47 |         train_ts = TimeSeries.from_pd(self.train_df)
48 |         self.model.train(train_ts)
49 | 
50 |     def test_score(self):
51 |         print("-" * 80)
52 |         logger.info("test_score\n" + "-" * 80 + "\n")
53 |         test_ts = TimeSeries.from_pd(self.test_df)
54 |         score_ts = self.model.get_anomaly_label(test_ts)
55 |         scores = score_ts.to_pd().values.flatten()
56 |         min_score, max_score, sum_score = min(scores), max(scores), sum(scores)
57 | 
58 |         logger.info(f"scores look like: {scores[:10]}")
59 |         logger.info(f"min score = {min_score}")
60 |         logger.info(f"max score = {max_score}")
61 |         logger.info(f"sum score = {sum_score}")
62 | 
63 |     def test_save_load(self):
64 |         print("-" * 80)
65 |         logger.info("test_save_load\n" + "-" * 80 + "\n")
66 |         self.model.save(dirname=join(rootdir, "tmp", "lstmed"))
67 |         loaded_model = LSTMED.load(dirname=join(rootdir, "tmp", "lstmed"))
68 | 
69 |         test_ts = TimeSeries.from_pd(self.test_df)
70 |         scores = self.model.get_anomaly_score(test_ts)
71 |         loaded_model_scores = loaded_model.get_anomaly_score(test_ts)
72 |         self.assertSequenceEqual(list(scores), list(loaded_model_scores))
73 | 
74 |         alarms = self.model.get_anomaly_label(test_ts)
75 |         loaded_model_alarms = loaded_model.get_anomaly_label(test_ts)
76 |         self.assertSequenceEqual(list(alarms), list(loaded_model_alarms))
77 | 
78 | 
79 | if __name__ == "__main__":
80 |     logging.basicConfig(
81 |         format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", stream=sys.stdout, level=logging.DEBUG
82 |     )
83 |     unittest.main()
84 | 


--------------------------------------------------------------------------------
/tests/anomaly/multivariate/test_vae.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | import sys
 8 | import logging
 9 | import unittest
10 | import torch
11 | import random
12 | import numpy as np
13 | import pandas as pd
14 | from os.path import abspath, dirname, join
15 | from merlion.utils import TimeSeries
16 | from ts_datasets.anomaly import *
17 | from merlion.models.anomaly.vae import VAE
18 | 
19 | rootdir = dirname(dirname(dirname(dirname(abspath(__file__)))))
20 | logger = logging.getLogger(__name__)
21 | 
22 | 
23 | def set_random_seeds():
24 |     torch.manual_seed(12345)
25 |     random.seed(12345)
26 |     np.random.seed(12345)
27 | 
28 | 
29 | def get_train_test_splits(df: pd.DataFrame, metadata: pd.DataFrame, n: int) -> (pd.DataFrame, pd.DataFrame, np.ndarray):
30 |     train_df = df[metadata.trainval]
31 |     test_df = df[~metadata.trainval]
32 |     test_labels = metadata[~metadata.trainval].anomaly.values
33 |     return train_df.tail(n), test_df.head(n), test_labels[:n]
34 | 
35 | 
36 | class TestVAE(unittest.TestCase):
37 |     def __init__(self, *args, **kwargs):
38 |         super().__init__(*args, **kwargs)
39 |         set_random_seeds()
40 | 
41 |         self.model = VAE(config=VAE.config_class(num_epochs=5))
42 |         self.dataset = MSL(rootdir=join(rootdir, "data", "smap"))
43 |         df, metadata = self.dataset[0]
44 |         self.train_df, self.test_df, self.test_labels = get_train_test_splits(df, metadata, 5000)
45 | 
46 |         logger.info("Training model...\n")
47 |         train_ts = TimeSeries.from_pd(self.train_df)
48 |         self.model.train(train_ts)
49 | 
50 |     def test_score(self):
51 |         print("-" * 80)
52 |         logger.info("test_score\n" + "-" * 80 + "\n")
53 |         test_ts = TimeSeries.from_pd(self.test_df)
54 | 
55 |         set_random_seeds()
56 |         score_ts = self.model.get_anomaly_score(test_ts)
57 |         scores = score_ts.to_pd().values.flatten()
58 |         min_score, max_score, sum_score = min(scores), max(scores), sum(scores)
59 | 
60 |         logger.info(f"scores look like: {scores[:10]}")
61 |         logger.info(f"min score = {min_score}")
62 |         logger.info(f"max score = {max_score}")
63 |         logger.info(f"sum score = {sum_score}")
64 | 
65 |     def test_save_load(self):
66 |         print("-" * 80)
67 |         logger.info("test_save_load\n" + "-" * 80 + "\n")
68 |         self.model.save(dirname=join(rootdir, "tmp", "vae"))
69 |         loaded_model = VAE.load(dirname=join(rootdir, "tmp", "vae"))
70 | 
71 |         test_ts = TimeSeries.from_pd(self.test_df)
72 |         set_random_seeds()
73 |         scores = self.model.get_anomaly_score(test_ts)
74 |         set_random_seeds()
75 |         loaded_model_scores = loaded_model.get_anomaly_score(test_ts)
76 |         self.assertSequenceEqual(list(scores), list(loaded_model_scores))
77 | 
78 |         set_random_seeds()
79 |         alarms = self.model.get_anomaly_label(test_ts)
80 |         set_random_seeds()
81 |         loaded_model_alarms = loaded_model.get_anomaly_label(test_ts)
82 |         self.assertSequenceEqual(list(alarms), list(loaded_model_alarms))
83 | 
84 | 
85 | if __name__ == "__main__":
86 |     logging.basicConfig(
87 |         format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", stream=sys.stdout, level=logging.DEBUG
88 |     )
89 |     unittest.main()
90 | 


--------------------------------------------------------------------------------
/tests/anomaly/test_spectral_residual.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | import logging
 8 | import sys
 9 | import unittest
10 | from os.path import join, dirname, abspath
11 | 
12 | import numpy as np
13 | 
14 | from merlion.models.anomaly.spectral_residual import SpectralResidual, SpectralResidualConfig
15 | from merlion.post_process.threshold import AggregateAlarms
16 | from merlion.utils.data_io import csv_to_time_series
17 | 
18 | rootdir = dirname(dirname(dirname(abspath(__file__))))
19 | logger = logging.getLogger(__name__)
20 | 
21 | 
22 | class TestSpectralResidual(unittest.TestCase):
23 |     def __init__(self, *args, **kwargs) -> None:
24 |         super().__init__(*args, **kwargs)
25 |         self.csv_name = join(rootdir, "data", "example.csv")
26 |         self.test_len = 32768
27 |         self.data = csv_to_time_series(self.csv_name, timestamp_unit="ms", data_cols="kpi")
28 |         logger.info(f"Data looks like:\n{self.data[:5]}")
29 |         self.vals_train = self.data[: -self.test_len]
30 |         self.vals_test = self.data[-self.test_len :]
31 |         self.model = SpectralResidual(
32 |             SpectralResidualConfig(
33 |                 local_wind_sz=21,
34 |                 estimated_points=5,
35 |                 predicting_points=5,
36 |                 target_seq_index=0,
37 |                 threshold=AggregateAlarms(alm_threshold=3.5, min_alm_in_window=1),
38 |             )
39 |         )
40 |         print()
41 |         logger.info("Training model...\n")
42 |         self.model.train(self.vals_train)
43 | 
44 |     def test_score(self):
45 |         # score function returns the raw anomaly scores
46 |         print("-" * 80)
47 |         logger.info("test_score\n" + "-" * 80 + "\n")
48 |         scores = self.model.get_anomaly_score(self.vals_test)
49 |         logger.info(f"Scores look like:\n{scores[:5]}")
50 |         scores = scores.to_pd().values.flatten()
51 |         logger.info("max score = " + str(max(scores)))
52 |         logger.info("min score = " + str(min(scores)) + "\n")
53 | 
54 |         self.assertEqual(len(scores), len(self.model.transform(self.vals_test)))
55 | 
56 |     def test_alarm(self):
57 |         # alarm function returns the post-rule processed anomaly scores
58 |         print("-" * 80)
59 |         logger.info("test_alarm\n" + "-" * 80 + "\n")
60 |         alarms = self.model.get_anomaly_label(self.vals_test)
61 |         n_alarms = np.sum(alarms.to_pd().values != 0)
62 |         logger.info(f"Alarms look like:\n{alarms[:5]}")
63 |         logger.info(f"Number of alarms: {n_alarms}\n")
64 |         self.assertLessEqual(n_alarms, 6)
65 |         self.assertGreaterEqual(n_alarms, 1)
66 | 
67 |     def test_save_load(self):
68 |         print("-" * 80)
69 |         logger.info("test_save_load\n" + "-" * 80 + "\n")
70 |         self.model.save(dirname=join(rootdir, "tmp", "spectral_residual"))
71 |         loaded_model = SpectralResidual.load(dirname=join(rootdir, "tmp", "spectral_residual"))
72 | 
73 |         scores = self.model.get_anomaly_score(self.vals_test)
74 |         loaded_model_scores = loaded_model.get_anomaly_score(self.vals_test)
75 |         self.assertSequenceEqual(list(scores), list(loaded_model_scores))
76 | 
77 |         alarms = self.model.get_anomaly_label(self.vals_test)
78 |         loaded_model_alarms = loaded_model.get_anomaly_label(self.vals_test)
79 |         self.assertSequenceEqual(list(alarms), list(loaded_model_alarms))
80 | 
81 | 
82 | if __name__ == "__main__":
83 |     logging.basicConfig(
84 |         format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", stream=sys.stdout, level=logging.DEBUG
85 |     )
86 |     unittest.main()
87 | 


--------------------------------------------------------------------------------
/tests/anomaly/test_stat_threshold.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | import logging
 8 | from os.path import abspath, dirname, join
 9 | import sys
10 | import unittest
11 | 
12 | import numpy as np
13 | 
14 | from merlion.models.anomaly.stat_threshold import StatThreshold, StatThresholdConfig
15 | from merlion.post_process.threshold import AggregateAlarms
16 | from merlion.utils.data_io import csv_to_time_series
17 | 
18 | rootdir = dirname(dirname(dirname(abspath(__file__))))
19 | logger = logging.getLogger(__name__)
20 | 
21 | 
22 | class TestStatThreshold(unittest.TestCase):
23 |     def __init__(self, *args, **kwargs):
24 |         super().__init__(*args, **kwargs)
25 |         self.csv_name = join(rootdir, "data", "example.csv")
26 |         self.test_len = 32768
27 |         self.data = csv_to_time_series(self.csv_name, timestamp_unit="ms", data_cols=["kpi"])
28 |         logger.info(f"Data looks like:\n{self.data[:5]}")
29 |         self.vals_train = self.data[: -self.test_len]
30 |         self.vals_test = self.data[-self.test_len :]
31 |         self.model = StatThreshold(
32 |             StatThresholdConfig(enable_calibrator=True, threshold=AggregateAlarms(alm_threshold=3.5))
33 |         )
34 |         print()
35 |         logger.info("Training model...\n")
36 |         self.model.train(self.vals_train)
37 | 
38 |     def test_score(self):
39 |         # score function returns the raw anomaly scores
40 |         print("-" * 80)
41 |         logger.info("test_score\n" + "-" * 80 + "\n")
42 |         scores = self.model.get_anomaly_score(self.vals_test)
43 |         logger.info(f"Scores look like:\n{scores[:5]}")
44 |         scores = scores.to_pd().values.flatten()
45 |         logger.info("max score = " + str(max(scores)))
46 |         logger.info("min score = " + str(min(scores)) + "\n")
47 | 
48 |         self.assertEqual(len(scores), len(self.model.transform(self.vals_test)))
49 | 
50 |     def test_alarm(self):
51 |         # alarm function returns the post-rule processed anomaly scores
52 |         print("-" * 80)
53 |         logger.info("test_alarm\n" + "-" * 80 + "\n")
54 |         alarms = self.model.get_anomaly_label(self.vals_test)
55 |         n_alarms = np.sum(alarms.to_pd().values != 0)
56 |         logger.info(f"Alarms look like:\n{alarms[:5]}")
57 |         logger.info(f"Number of alarms: {n_alarms}\n")
58 |         scores = alarms.to_pd().values.flatten()
59 |         logger.info("max score = " + str(max(scores)))
60 |         logger.info("min score = " + str(min(scores)) + "\n")
61 |         self.assertLessEqual(n_alarms, 6)
62 | 
63 |     def test_save_load(self):
64 |         print("-" * 80)
65 |         logger.info("test_save_load\n" + "-" * 80 + "\n")
66 |         self.model.save(dirname=join(rootdir, "tmp", "stat_threshold"))
67 |         loaded_model = StatThreshold.load(dirname=join(rootdir, "tmp", "stat_threshold"))
68 | 
69 |         scores = self.model.get_anomaly_score(self.vals_test)
70 |         loaded_model_scores = loaded_model.get_anomaly_score(self.vals_test)
71 |         self.assertSequenceEqual(list(scores), list(loaded_model_scores))
72 | 
73 |         alarms = self.model.get_anomaly_label(self.vals_test)
74 |         loaded_model_alarms = loaded_model.get_anomaly_label(self.vals_test)
75 |         self.assertSequenceEqual(list(alarms), list(loaded_model_alarms))
76 | 
77 | 
78 | if __name__ == "__main__":
79 |     logging.basicConfig(
80 |         format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", stream=sys.stdout, level=logging.DEBUG
81 |     )
82 |     unittest.main()
83 | 


--------------------------------------------------------------------------------
/tests/anomaly/test_windstats.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | import logging
 8 | from os.path import abspath, dirname, join
 9 | import sys
10 | import unittest
11 | 
12 | import numpy as np
13 | 
14 | from merlion.models.anomaly.windstats import WindStatsConfig, WindStats
15 | from merlion.post_process.threshold import AggregateAlarms
16 | from merlion.utils.data_io import csv_to_time_series
17 | 
18 | rootdir = dirname(dirname(dirname(abspath(__file__))))
19 | logger = logging.getLogger(__name__)
20 | 
21 | 
22 | class TestWindStats(unittest.TestCase):
23 |     def __init__(self, *args, **kwargs):
24 |         super().__init__(*args, **kwargs)
25 |         self.csv_name = join(rootdir, "data", "example.csv")
26 |         self.test_len = 32768
27 |         self.data = csv_to_time_series(self.csv_name, timestamp_unit="ms", data_cols=["kpi"])
28 |         logger.info(f"Data looks like:\n{self.data[:5]}")
29 |         self.vals_train = self.data[: -self.test_len]
30 |         self.vals_test = self.data[-self.test_len :]
31 |         self.model = WindStats(
32 |             WindStatsConfig(
33 |                 wind_sz=30, threshold=AggregateAlarms(alm_threshold=4, alm_window_minutes=30, alm_suppress_minutes=300)
34 |             )
35 |         )
36 |         print()
37 |         logger.info("Training model...\n")
38 |         self.model.train(self.vals_train)
39 | 
40 |     def test_score(self):
41 |         # score function returns the raw anomaly scores
42 |         print("-" * 80)
43 |         logger.info("test_score\n" + "-" * 80 + "\n")
44 |         scores = self.model.get_anomaly_score(self.vals_test)
45 |         logger.info(f"Scores look like:\n{scores[:5]}")
46 |         scores = scores.to_pd().values.flatten()
47 |         logger.info("max score = " + str(max(scores)))
48 |         logger.info("min score = " + str(min(scores)) + "\n")
49 | 
50 |         self.assertEqual(len(scores), len(self.model.transform(self.vals_test)))
51 | 
52 |     def test_alarm(self):
53 |         # alarm function returns the post-rule processed anomaly scores
54 |         print("-" * 80)
55 |         logger.info("test_alarm\n" + "-" * 80 + "\n")
56 |         alarms = self.model.get_anomaly_label(self.vals_test)
57 |         n_alarms = np.sum(alarms.to_pd().values != 0)
58 |         logger.info(f"Alarms look like:\n{alarms[:5]}")
59 |         logger.info(f"Number of alarms: {n_alarms}\n")
60 |         self.assertLessEqual(n_alarms, 6)
61 | 
62 |     def test_save_load(self):
63 |         print("-" * 80)
64 |         logger.info("test_save_load\n" + "-" * 80 + "\n")
65 |         self.model.save(dirname=join(rootdir, "tmp", "windstats"))
66 |         loaded_model = WindStats.load(dirname=join(rootdir, "tmp", "windstats"))
67 | 
68 |         scores = self.model.get_anomaly_score(self.vals_test)
69 |         loaded_model_scores = loaded_model.get_anomaly_score(self.vals_test)
70 |         self.assertSequenceEqual(list(scores), list(loaded_model_scores))
71 | 
72 |         alarms = self.model.get_anomaly_label(self.vals_test)
73 |         loaded_model_alarms = loaded_model.get_anomaly_label(self.vals_test)
74 |         self.assertSequenceEqual(list(alarms), list(loaded_model_alarms))
75 | 
76 |         # serialize and deserialize
77 |         obj = self.model.to_bytes()
78 |         loaded_model = WindStats.from_bytes(obj)
79 |         loaded_model_scores = loaded_model.get_anomaly_score(self.vals_test)
80 |         self.assertSequenceEqual(list(scores), list(loaded_model_scores))
81 |         loaded_model_alarms = loaded_model.get_anomaly_label(self.vals_test)
82 |         self.assertSequenceEqual(list(alarms), list(loaded_model_alarms))
83 | 
84 | 
85 | if __name__ == "__main__":
86 |     logging.basicConfig(
87 |         format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", stream=sys.stdout, level=logging.DEBUG
88 |     )
89 |     unittest.main()
90 | 


--------------------------------------------------------------------------------
/tests/change_point/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2023 salesforce.com, inc.
3 | # All rights reserved.
4 | # SPDX-License-Identifier: BSD-3-Clause
5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
6 | #
7 | 


--------------------------------------------------------------------------------
/tests/evaluate/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2023 salesforce.com, inc.
3 | # All rights reserved.
4 | # SPDX-License-Identifier: BSD-3-Clause
5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
6 | #
7 | 


--------------------------------------------------------------------------------
/tests/forecast/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2023 salesforce.com, inc.
3 | # All rights reserved.
4 | # SPDX-License-Identifier: BSD-3-Clause
5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
6 | #
7 | 


--------------------------------------------------------------------------------
/tests/forecast/test_prophet.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | import os
 8 | import logging
 9 | import sys
10 | import unittest
11 | 
12 | import pandas as pd
13 | import numpy as np
14 | 
15 | from merlion.evaluate.forecast import ForecastMetric
16 | from merlion.models.automl.autoprophet import AutoProphet, AutoProphetConfig
17 | from merlion.models.anomaly.forecast_based.prophet import ProphetDetector, ProphetDetectorConfig
18 | from merlion.models.forecast.prophet import Prophet, ProphetConfig
19 | from merlion.utils.resample import to_timestamp
20 | from merlion.utils.time_series import TimeSeries
21 | from ts_datasets.forecast import CustomDataset
22 | 
23 | logger = logging.getLogger(__name__)
24 | rootdir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
25 | 
26 | 
27 | class TestProphet(unittest.TestCase):
28 |     def test_resample_time_stamps(self):
29 |         # arrange
30 |         config = ProphetConfig()
31 |         prophet = Prophet(config)
32 |         prophet.last_train_time = pd.Timestamp(year=2022, month=1, day=1)
33 |         prophet.timedelta = pd.Timedelta(days=1)
34 |         target = np.array([to_timestamp(pd.Timestamp(year=2022, month=1, day=2))])
35 | 
36 |         # act
37 |         output = prophet.resample_time_stamps(time_stamps=1)
38 | 
39 |         # assert
40 |         assert output == target
41 | 
42 |     def _test_exog(self, auto: bool):
43 |         print("-" * 80)
44 |         logger.info(f"TestProphet.test_exog{'_auto' if auto else ''}\n" + "-" * 80)
45 |         # Get train, test, and exogenous data
46 |         csv = os.path.join(rootdir, "data", "walmart", "walmart_mini.csv")
47 |         index_cols = ["Store", "Dept"]
48 |         target = ["Weekly_Sales"]
49 |         ts, md = CustomDataset(rootdir=csv, test_frac=0.25, index_cols=index_cols)[0]
50 |         train = TimeSeries.from_pd(ts.loc[md.trainval, target])
51 |         test = TimeSeries.from_pd(ts.loc[~md.trainval, target])
52 |         exog = TimeSeries.from_pd(ts[[c for c in ts.columns if "MarkDown" in c or "Holiday" in c]])
53 | 
54 |         # Train model & get prediction
55 |         model = Prophet(ProphetConfig())
56 |         exog_model = ProphetDetector(ProphetDetectorConfig())
57 |         if auto:
58 |             model = AutoProphet(model=model)
59 |             exog_model = AutoProphet(model=exog_model)
60 |         model.train(train_data=train)
61 |         exog_model.train(train_data=train, exog_data=exog)
62 |         pred, _ = model.forecast(time_stamps=test.time_stamps)
63 |         exog_pred, _ = exog_model.forecast(time_stamps=test.time_stamps, exog_data=exog)
64 | 
65 |         # Evaluate model
66 |         smape = ForecastMetric.sMAPE.value(test, pred)
67 |         exog_smape = ForecastMetric.sMAPE.value(test, exog_pred)
68 |         logger.info(f"sMAPE = {smape:.2f} (no exog)")
69 |         logger.info(f"sMAPE = {exog_smape:.2f} (with exog)")
70 | 
71 |         # Test that exog model can also get anomaly scores
72 |         anomaly_labels = exog_model.get_anomaly_label(test, exog_data=exog).to_pd()
73 |         logger.info(f"Alarms detected (anomaly detection): {anomaly_labels.sum().sum().item()}")
74 | 
75 |     def test_exog(self):
76 |         self._test_exog(auto=False)
77 | 
78 |     def test_exog_auto(self):
79 |         self._test_exog(auto=True)
80 | 
81 | 
82 | if __name__ == "__main__":
83 |     logging.basicConfig(
84 |         format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", stream=sys.stdout, level=logging.DEBUG
85 |     )
86 |     unittest.main()
87 | 


--------------------------------------------------------------------------------
/tests/spark/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2023 salesforce.com, inc.
3 | # All rights reserved.
4 | # SPDX-License-Identifier: BSD-3-Clause
5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
6 | #
7 | 


--------------------------------------------------------------------------------
/tests/spark/conftest.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | import pytest
 8 | from pyspark import SparkConf
 9 | from pyspark.sql import SparkSession
10 | 
11 | 
12 | @pytest.fixture(scope="session")
13 | def spark_session():
14 |     # Creates more helpful debug messages if Spark tests fail for some Java-related reason
15 |     try:
16 |         import faulthandler
17 | 
18 |         faulthandler.enable()
19 |         faulthandler.disable()
20 |     except:
21 |         pass
22 |     # Set timeout & heartbeat interval to 10 minutes to ensure tests can run to completion
23 |     conf = SparkConf(False).setMaster("local[2]").setAppName("unit-tests")
24 |     conf = conf.set("spark.network.timeout", "600000").set("spark.executor.heartbeatInterval", "600000")
25 |     return SparkSession.builder.config(conf=conf).getOrCreate()
26 | 


--------------------------------------------------------------------------------
/tests/spark/test_anomaly.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | from os.path import abspath, dirname, join
 8 | import logging
 9 | 
10 | from pyspark.sql.types import DateType, FloatType, StructField, StructType
11 | from merlion.spark.dataset import read_dataset, write_dataset, TSID_COL_NAME
12 | from merlion.spark.pandas_udf import anomaly
13 | 
14 | logger = logging.getLogger(__name__)
15 | rootdir = dirname(dirname(dirname(abspath(__file__))))
16 | 
17 | 
18 | def _run_job(spark, name: str, data_cols: list, model: dict, robust: bool = False):
19 |     logger.info(f"test_spark_anomaly_{name}\n{'-' * 80}")
20 |     index_cols = ["Store", "Dept"]
21 |     time_col = "Date"
22 |     train_test_split = "2012-09-15" if robust else "2012-06-01"
23 | 
24 |     df = read_dataset(
25 |         spark=spark,
26 |         file_format="csv",
27 |         path=join(rootdir, "data", "walmart", "walmart_mini_error.csv" if robust else "walmart_mini.csv"),
28 |         index_cols=index_cols,
29 |         time_col=time_col,
30 |         data_cols=data_cols,
31 |     )
32 |     index_cols = index_cols + [TSID_COL_NAME]
33 | 
34 |     index_fields = [df.schema[c] for c in index_cols]
35 |     pred_fields = [StructField(time_col, DateType()), StructField("anom_score", FloatType())]
36 |     output_schema = StructType(index_fields + pred_fields)
37 |     anomaly_df = df.groupBy(index_cols).applyInPandas(
38 |         lambda pdf: anomaly(
39 |             pdf,
40 |             index_cols=index_cols,
41 |             time_col=time_col,
42 |             train_test_split=train_test_split,
43 |             model=model,
44 |             predict_on_train=robust,
45 |         ),
46 |         schema=output_schema,
47 |     )
48 |     df.unpersist()
49 | 
50 |     output_path = join(rootdir, "tmp", "spark", "anomaly", name)
51 |     write_dataset(df=anomaly_df, time_col=time_col, path=output_path, file_format="csv")
52 |     anomaly_df.unpersist()
53 | 
54 | 
55 | def test_univariate(spark_session):
56 |     _run_job(spark=spark_session, name="univariate", data_cols=["Weekly_Sales"], model={"name": "StatThreshold"})
57 | 
58 | 
59 | def test_multivariate(spark_session):
60 |     _run_job(
61 |         spark=spark_session,
62 |         name="multivariate",
63 |         data_cols=["Weekly_Sales", "Temperature", "CPI"],
64 |         model={"name": "IsolationForest"},
65 |     )
66 | 
67 | 
68 | def test_robust(spark_session):
69 |     _run_job(
70 |         spark=spark_session,
71 |         name="robust",
72 |         data_cols=["Weekly_Sales", "Temperature", "CPI"],
73 |         model={"name": "IsolationForest"},
74 |         robust=True,
75 |     )
76 | 


--------------------------------------------------------------------------------
/tests/test_custom_dataset.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | import glob
 8 | import os
 9 | import pandas as pd
10 | from ts_datasets.forecast import CustomDataset
11 | from ts_datasets.anomaly import CustomAnomalyDataset
12 | 
13 | rootdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
14 | 
15 | 
16 | def test_custom_anom_dataset():
17 |     data_dir = os.path.join(rootdir, "data", "synthetic_anomaly")
18 |     dataset = CustomAnomalyDataset(rootdir=data_dir, test_frac=0.75, time_unit="s", assume_no_anomaly=True)
19 |     assert len(dataset) == len(glob.glob(os.path.join(data_dir, "*.csv")))
20 |     assert all("anomaly" in md.columns and "trainval" in md.columns for ts, md in dataset)
21 |     assert all(abs((~md.trainval).mean() - dataset.test_frac) < 2 / len(ts) for ts, md in dataset)
22 | 
23 | 
24 | def test_custom_dataset():
25 |     csv = os.path.join(rootdir, "data", "walmart", "walmart_mini.csv")
26 |     index_cols = ["Store", "Dept"]
27 |     data_cols = ["Weekly_Sales", "Temperature", "CPI"]
28 |     df = pd.read_csv(csv, index_col=[0, 1, 2], parse_dates=True)
29 |     dataset = CustomDataset(rootdir=csv, test_frac=0.25, data_cols=data_cols, index_cols=index_cols)
30 |     assert len(dataset) == len(df.groupby(index_cols).groups)
31 |     assert all(list(ts.columns) == data_cols for ts, md in dataset)
32 |     assert all((c in md.columns for c in ["trainval"] + index_cols) for ts, md in dataset)
33 |     assert all(abs((~md.trainval).mean() - dataset.test_frac) < 2 / len(ts) for ts, md in dataset)
34 | 


--------------------------------------------------------------------------------
/tests/test_generator.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | from os.path import abspath, dirname
 8 | import sys
 9 | import logging
10 | import unittest
11 | 
12 | import numpy as np
13 | from operator import mul
14 | from math import exp, log, sin
15 | 
16 | from merlion.utils.ts_generator import TimeSeriesGenerator, GeneratorComposer, GeneratorConcatenator
17 | 
18 | logger = logging.getLogger(__name__)
19 | rootdir = dirname(dirname(abspath(__file__)))
20 | 
21 | 
22 | class TestTimeSeriesGenerator(unittest.TestCase):
23 |     def test_generator_sequence(self):
24 |         logger.info("test_generator_sequence\n" + "-" * 80 + "\n")
25 | 
26 |         np.random.seed(1234)
27 |         y_generated = GeneratorComposer(
28 |             generators=[
29 |                 TimeSeriesGenerator(f=lambda x: x**1.3, n=3),
30 |                 TimeSeriesGenerator(f=lambda x: 4.5 / (1 + exp(-x)), scale=4.5, n=7),
31 |                 TimeSeriesGenerator(f=lambda x: sin(x) * sin(3 * x), n=11),
32 |             ],
33 |             n=20,
34 |             x0=-7,
35 |             step=1.5,
36 |             per_generator_noise=False,
37 |         ).generate(return_ts=False)
38 | 
39 |         np.random.seed(1234)
40 |         x = np.arange(20) * 1.5 - 7
41 |         y_expected = (4.5 / (1.0 + np.exp(-np.sin(x) * np.sin(3 * x)))) ** 1.3 + np.random.normal(size=20)
42 | 
43 |         self.assertAlmostEqual(np.max(np.abs(y_expected - y_generated)), 0, places=8)
44 | 
45 |     def test_generator_series(self):
46 |         logger.info("test_generator_series\n" + "-" * 80 + "\n")
47 | 
48 |         np.random.seed(1234)
49 |         y_generated = GeneratorConcatenator(
50 |             generators=[
51 |                 TimeSeriesGenerator(f=lambda x: x**2, n=3, x0=0),
52 |                 TimeSeriesGenerator(f=lambda x: exp(-(x % 5)), n=7, x0=10),
53 |                 TimeSeriesGenerator(f=lambda x: 4 * log(x), n=11, x0=-99),
54 |             ],
55 |             n=20,
56 |             x0=-7,
57 |             step=1.5,
58 |             noise=np.random.uniform,
59 |             distort=mul,
60 |             string_outputs=False,
61 |             per_generator_noise=False,
62 |         ).generate(return_ts=False)
63 | 
64 |         np.random.seed(1234)
65 |         x = np.arange(21) * 1.5 - 7
66 |         y_expected = np.hstack((x[:3] ** 2, np.exp(-(x[3:10] % 5)), np.log(x[10:21]) * 4)) * np.random.uniform(size=21)
67 | 
68 |         self.assertAlmostEqual(np.max(np.abs(y_expected - y_generated)), 0, places=8)
69 | 
70 | 
71 | if __name__ == "__main__":
72 |     logging.basicConfig(
73 |         format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", stream=sys.stdout, level=logging.DEBUG
74 |     )
75 |     unittest.main()
76 | 


--------------------------------------------------------------------------------
/tests/transform/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2023 salesforce.com, inc.
3 | # All rights reserved.
4 | # SPDX-License-Identifier: BSD-3-Clause
5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
6 | #
7 | 


--------------------------------------------------------------------------------
/tests/transform/test_anomalize.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | import logging
 8 | from os.path import abspath, dirname
 9 | import sys
10 | import unittest
11 | 
12 | import numpy as np
13 | 
14 | from merlion.utils.ts_generator import TimeSeriesGenerator
15 | from merlion.transform.anomalize import Shock, TrendChange
16 | 
17 | logger = logging.getLogger(__name__)
18 | rootdir = dirname(dirname(dirname(abspath(__file__))))
19 | 
20 | 
21 | class TestAnomalize(unittest.TestCase):
22 |     def __init__(self, *args, **kwargs):
23 |         super().__init__(*args, **kwargs)
24 |         logger.info("Generating Data...\n")
25 |         np.random.seed(111)
26 |         self.ts = TimeSeriesGenerator(f=lambda x: x**1.6, n=200, name="metric").generate(return_ts=True)
27 | 
28 |     def test_shock(self):
29 |         print("-" * 80)
30 |         logger.info("test_shock\n" + "-" * 80 + "\n")
31 | 
32 |         # test anomalies are statistically deviant from preceding values
33 |         shock = Shock(anom_prob=0.2, pos_prob=0.5, sd_range=(5, 5), anom_width_range=(1, 3))
34 |         anom_ts = shock(self.ts)
35 |         vals = anom_ts.univariates["metric"].values
36 |         labs = anom_ts.univariates["anomaly"].values
37 |         ems = self.ts.univariates["metric"].to_pd().ewm(alpha=shock.alpha, adjust=False).std(bias=True)
38 | 
39 |         for i, (x, is_anom, sd) in enumerate(zip(vals, labs, ems)):
40 |             if is_anom == 1.0 and labs[i - 1] == 0.0:
41 |                 shift = np.abs(x - vals[i - 1])
42 |                 assert shift > 3 * sd
43 | 
44 |     def test_trend_change(self):
45 |         print("-" * 80)
46 |         logger.info("test_trend_change\n" + "-" * 80 + "\n")
47 | 
48 |         # test strictly positive trend changes
49 |         trend = TrendChange(anom_prob=0.2, pos_prob=1.0, scale_range=(2, 3))
50 |         anom_ts = trend(self.ts)
51 |         self.assertTrue(all(self.ts.univariates["metric"].np_values <= anom_ts.univariates["metric"].np_values))
52 | 
53 |         # test strictly negative trend changes
54 |         trend = TrendChange(anom_prob=0.2, pos_prob=0.0, scale_range=(2, 3))
55 |         anom_ts = trend(self.ts)
56 |         self.assertTrue(all(self.ts.univariates["metric"].np_values >= anom_ts.univariates["metric"].np_values))
57 | 
58 |     def test_natural_bounds(self):
59 |         print("-" * 80)
60 |         logger.info("test_natural_bounds\n" + "-" * 80 + "\n")
61 | 
62 |         # generate data
63 |         np.random.seed(111)
64 |         ts = TimeSeriesGenerator(f=np.sin, n=200, name="metric").generate(return_ts=True)
65 | 
66 |         shock = Shock(anom_prob=0.5, sd_range=(5, 5), natural_bounds=(-1, 1))
67 |         anom_vals = shock(ts).univariates["metric"].values
68 |         self.assertTrue(all(np.abs(anom_vals) <= 1))
69 | 
70 |     def test_anom_prob(self):
71 |         print("-" * 80)
72 |         logger.info("test_anom_prob\n" + "-" * 80 + "\n")
73 | 
74 |         # test no anoms when anom_prob is 0
75 |         for anomaly in (Shock(anom_prob=0.0), TrendChange(anom_prob=0.0)):
76 |             anom_ts = anomaly(self.ts)
77 |             self.assertEqual(self.ts.univariates["metric"], anom_ts.univariates["metric"])
78 |             self.assertTrue(all(0.0 == anom_ts.univariates["anomaly"].np_values))
79 | 
80 | 
81 | if __name__ == "__main__":
82 |     logging.basicConfig(
83 |         format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", stream=sys.stdout, level=logging.DEBUG
84 |     )
85 |     unittest.main()
86 | 


--------------------------------------------------------------------------------
/tests/transform/test_inverse.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | import logging
 8 | from os.path import abspath, dirname, join
 9 | import pickle
10 | import sys
11 | import unittest
12 | 
13 | from merlion.utils import TimeSeries
14 | from merlion.transform.bound import LowerUpperClip
15 | from merlion.transform.moving_average import DifferenceTransform, ExponentialMovingAverage, LagTransform, MovingAverage
16 | from merlion.transform.normalize import MinMaxNormalize
17 | from merlion.transform.resample import TemporalResample, Shingle
18 | from merlion.transform.sequence import TransformSequence, TransformStack
19 | 
20 | 
21 | logger = logging.getLogger(__name__)
22 | rootdir = dirname(dirname(dirname(abspath(__file__))))
23 | 
24 | 
25 | class TestInverse(unittest.TestCase):
26 |     """Tests a number of transforms & their inverses."""
27 | 
28 |     def test_full(self):
29 |         with open(join(rootdir, "data", "test_transform.pkl"), "rb") as f:
30 |             df = pickle.load(f).drop(columns=["anomaly", "trainval"])
31 | 
32 |         ts = TimeSeries.from_pd(df)
33 |         transform = TransformSequence(
34 |             [
35 |                 MinMaxNormalize(),
36 |                 LowerUpperClip(0, 1),
37 |                 TemporalResample(),
38 |                 DifferenceTransform(),
39 |                 MovingAverage(weights=[0.1, 0.2, 0.3, 0.4]),
40 |                 LagTransform(k=20, pad=True),
41 |                 LagTransform(k=3, pad=False),
42 |                 TransformStack(
43 |                     [ExponentialMovingAverage(alpha=0.7), MovingAverage(weights=[0.1, 0.2, 0.3, 0.4])],
44 |                     check_aligned=False,
45 |                 ),
46 |                 Shingle(size=10, stride=7),
47 |             ]
48 |         )
49 |         transform.train(ts)
50 |         ts1 = transform(ts)
51 |         ts2 = transform.invert(ts1, retain_inversion_state=True)
52 |         df, df2 = ts.to_pd(), ts2.to_pd()
53 |         rae = ((df - df2).abs() / ((df - df.mean()).abs() + 1e-8)).mean().mean()
54 |         self.assertLess(rae, 1e-6)
55 | 
56 |         df2_prime = transform.invert(ts1).to_pd()
57 |         rae = ((df2_prime - df2) / ((df2 - df2.mean()).abs() + 1e-8)).mean().mean()
58 |         self.assertLess(rae, 1e-6)
59 | 
60 |         with self.assertRaises(RuntimeError) as context:
61 |             transform.invert(ts1)
62 |         self.assertTrue("Inversion state not set" in str(context.exception))
63 | 
64 | 
65 | if __name__ == "__main__":
66 |     logging.basicConfig(
67 |         format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", stream=sys.stdout, level=logging.DEBUG
68 |     )
69 |     unittest.main()
70 | 


--------------------------------------------------------------------------------
/tests/transform/test_moving_average.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | import numpy as np
 8 | import unittest
 9 | 
10 | from merlion.utils.time_series import UnivariateTimeSeries
11 | from merlion.transform.moving_average import (
12 |     DifferenceTransform,
13 |     LagTransform,
14 |     MovingPercentile,
15 |     ExponentialMovingAverage,
16 | )
17 | from merlion.utils.ts_generator import TimeSeriesGenerator
18 | 
19 | 
20 | class TestMovingAverage(unittest.TestCase):
21 |     def test_difference_transform(self):
22 |         n = 8
23 |         ts = UnivariateTimeSeries(range(n), range(n)).to_ts()
24 |         diff = DifferenceTransform()
25 | 
26 |         transformed_ts = diff(ts)
27 |         expected_ts = UnivariateTimeSeries(range(1, n), np.ones(n - 1)).to_ts()
28 |         self.assertEqual(expected_ts, transformed_ts)
29 | 
30 |     def test_lag_transform(self):
31 |         n = 8
32 |         ts = UnivariateTimeSeries(range(n), range(n)).to_ts()
33 | 
34 |         for k in range(1, 9):
35 |             lag = LagTransform(k)
36 |             transformed_ts = lag(ts)
37 |             expected_ts = UnivariateTimeSeries(range(k, n), np.repeat(k, n - k)).to_ts()
38 |             self.assertEqual(expected_ts, transformed_ts)
39 | 
40 |         lag = LagTransform(k=3, pad=True)
41 |         transformed_ts = lag(ts)
42 |         expected_vals = list(range(3)) + [3] * (n - 3)
43 |         expected_ts = UnivariateTimeSeries(range(n), expected_vals).to_ts()
44 |         self.assertEqual(expected_ts, transformed_ts)
45 | 
46 |     def test_moving_percentile(self):
47 |         n = 20
48 |         ts = UnivariateTimeSeries(range(n), range(n)).to_ts()
49 | 
50 |         transformed_ts = MovingPercentile(n_steps=1, q=23)(ts)
51 |         expected_ts = UnivariateTimeSeries(range(n), range(n)).to_ts()
52 |         self.assertEqual(expected_ts, transformed_ts)
53 | 
54 |         transformed_ts = MovingPercentile(n_steps=4, q=100)(ts)
55 |         expected_ts = UnivariateTimeSeries(range(n), range(n)).to_ts()
56 |         self.assertEqual(expected_ts, transformed_ts)
57 | 
58 |         transformed_ts = MovingPercentile(n_steps=6, q=0)(ts)
59 |         expected_ts = UnivariateTimeSeries(range(n), [0] * 6 + list(range(1, 14 + 1))).to_ts()
60 |         self.assertEqual(expected_ts, transformed_ts)
61 | 
62 |         transformed_ts = MovingPercentile(n_steps=3, q=50)(ts)
63 |         expected_ts = UnivariateTimeSeries(range(n), [0, 0.5] + list(range(1, 18 + 1))).to_ts()
64 |         self.assertEqual(expected_ts, transformed_ts)
65 | 
66 |     def test_exponential_moving_average_ci(self):
67 |         np.random.seed(12345)
68 |         name = "metric"
69 |         ts = TimeSeriesGenerator(f=lambda x: x, n=100, name=name).generate()
70 |         ema = ExponentialMovingAverage(alpha=0.1, ci=True)(ts)
71 |         y = ema.univariates[name]
72 |         lb = ema.univariates[f"{name}_lb"]
73 |         ub = ema.univariates[f"{name}_ub"]
74 |         self.assertTrue(all(l <= x <= u for (l, x, u) in zip(lb.values, y.values, ub.values)))
75 | 
76 | 
77 | if __name__ == "__main__":
78 |     unittest.main()
79 | 


--------------------------------------------------------------------------------
/tests/transform/test_sequence.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | import logging
 8 | from merlion.transform.base import Identity
 9 | from merlion.transform.sequence import TransformSequence, TransformStack
10 | import unittest
11 | 
12 | from merlion.utils import TimeSeries, UnivariateTimeSeries
13 | from merlion.transform.moving_average import LagTransform, MovingAverage
14 | 
15 | 
16 | class TestSequence(unittest.TestCase):
17 |     def test_transform_sequence(self):
18 |         n = 25
19 |         ts = TimeSeries([UnivariateTimeSeries(range(n), range(n))])
20 | 
21 |         f, g, h = Identity(), MovingAverage(n_steps=3), LagTransform(k=2)
22 |         seq = TransformSequence([f, g, h])
23 |         seq.train(ts)
24 | 
25 |         transformed_ts = seq(ts)
26 |         expected_ts = h(g(f(ts)))
27 |         self.assertEqual(expected_ts, transformed_ts)
28 | 
29 |     def test_transform_stack(self):
30 |         n = 25
31 |         ts = TimeSeries([UnivariateTimeSeries(range(n), range(n))])
32 | 
33 |         f, g, h = Identity(), MovingAverage(n_steps=3), LagTransform(k=2)
34 |         stack = TransformStack([f, g, h])
35 |         stack.train(ts)
36 | 
37 |         transformed_ts = stack(ts)
38 |         expected_ts = TimeSeries.from_ts_list([f(ts), g(ts), h(ts)])
39 |         self.assertEqual(expected_ts, transformed_ts)
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     unittest.main()
44 | 


--------------------------------------------------------------------------------
/ts_datasets/README.md:
--------------------------------------------------------------------------------
 1 | # ts_datasets
 2 | This library implements Python classes that manipulate numerous time series datasets
 3 | into standardized `pandas` DataFrames. The sub-modules are `ts_datasets.anomaly` for time series anomaly detection, and
 4 | `ts_datasets.forecast` for time series forecasting. Simply install the package by calling `pip install -e .` from the
 5 | command line. Then, you can load a dataset (e.g. the "realAWSCloudwatch" split of the Numenta Anomaly Benchmark) by
 6 | calling
 7 | ```python
 8 | from ts_datasets.anomaly import NAB
 9 | dataset = NAB(subset="realAWSCloudwatch", rootdir=path_to_NAB)
10 | ```
11 | Note that if you have installed this package in editable mode (i.e. by specifying `-e`), the root directory
12 | need not be specified.
13 | 
14 | Each dataset supports the following features: 
15 | 1.  ``__getitem__``: you may call ``ts, metadata = dataset[i]``. ``ts`` is a time-indexed ``pandas`` DataFrame, with
16 |     each column representing a different variable (in the case of multivariate time series). ``metadata`` is a dict or
17 |     ``pd.DataFrame`` with the same index as ``ts``, with different keys indicating different dataset-specific
18 |     metadata (train/test split, anomaly labels, etc.) for each timestamp.
19 | 2.  ``__len__``:  Calling ``len(dataset)`` will return the number of time series in the dataset.
20 | 3.  ``__iter__``: You may iterate over the `pandas` representations of the time series in the dataset with
21 |     ``for ts, metadata in dataset: ...``
22 | 
23 | For each time series in the dataset, `metadata` is a dict or `pd.DataFrame` that will always have the following keys:
24 | -   ``trainval``: (``bool``) a `pd.Series` indicating whether each timestamp of the time series should be used for
25 |     training/validation (if `True`) or testing (if `False`)
26 | 
27 | For anomaly detection datasets, ``metadata`` will also have the key:
28 | - ``anomaly``: (``bool``) a `pd.Series` indicating whether each timestamp is anomalous
29 | 
30 | We currently support the following datasets for time series anomaly detection (`ts_datasets.anomaly`):
31 | - [IOps Competition](http://iops.ai/competition_detail/?competition_id=5)
32 | - [Numenta Anomaly Benchmark](https://github.com/numenta/NAB)
33 | - Synthetic (synthetic data generated using [this script](../examples/misc/generate_synthetic_tsad_dataset.py))
34 | - [SMAP & MSL](https://github.com/khundman/telemanom/) (multivariate time series anomaly detection datasets from NASA)
35 | - [SMD](https://github.com/NetManAIOps/OmniAnomaly) (server machine dataset)
36 | 
37 | We currently support the following datasets for time series forecasting (`ts_datasets.forecast`):
38 | - [M4 Competition](https://github.com/Mcompetitions/M4-methods/tree/master/Dataset)
39 |     - There are 100,000 univariate time series with different granularity, including Yearly (23,000 sequences),
40 |       Quarterly (24,000 sequences), Monthly (48,000 sequences), Weekly (359 sequences), Daily (4,227 sequences) and
41 |       Hourly (414 sequences) data.
42 | - [Energy Power Grid](https://www.kaggle.com/robikscube/hourly-energy-consumption)
43 |     - There is one 10-variable time series.
44 |     - Each univariate records the energy power usage in a particular region.
45 | - [Seattle Trail for Bike and Pedestrian](https://www.kaggle.com/city-of-seattle/seattle-burke-gilman-trail)
46 |     - There is one 5-variable time series. 
47 |     - Each univariate records the bicycle/pedestrian flow along a different
48 |       direction on the trail
49 | - [Solar Energy Plant](https://www.nrel.gov/grid/solar-power-data.html)
50 |     - There is one 405-variable time series. 
51 |     - Each univariate records the solar energy power in each detector in the plant
52 |     - By default, the data loader returns only the first 100 of 405 univariates
53 | 
54 | More details on each dataset can be found in their class-level docstrings, or in the API doc.
55 | 


--------------------------------------------------------------------------------
/ts_datasets/setup.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | from setuptools import find_packages, setup
 8 | 
 9 | setup(
10 |     name="ts_datasets",
11 |     version="0.1.0",
12 |     author="Aadyot Bhatnagar, Tian Lan, Chenghao Liu, Wenzhuo Yang",
13 |     author_email="abhatnagar@salesforce.com",
14 |     description="A library for easily loading time series anomaly detection & forecasting datasets",
15 |     long_description=open("README.md", "r", encoding="utf-8").read(),
16 |     long_description_content_type="text/markdown",
17 |     license="Apache 2.0",
18 |     packages=find_packages(include=["ts_datasets*"]),
19 |     install_requires=["cython", "numpy", "pandas", "requests", "tqdm", "wheel", "gdown"],
20 | )
21 | 


--------------------------------------------------------------------------------
/ts_datasets/ts_datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | """
 8 | .. autosummary::
 9 |     anomaly
10 |     forecast
11 | """
12 | 


--------------------------------------------------------------------------------
/ts_datasets/ts_datasets/anomaly/__init__.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | """
 8 | Datasets for time series anomaly detection (TSAD). All the time series in these
 9 | datasets have anomaly labels.
10 | """
11 | from ts_datasets.anomaly.base import TSADBaseDataset
12 | from ts_datasets.anomaly.custom import CustomAnomalyDataset
13 | from ts_datasets.anomaly.iops_competition import IOpsCompetition
14 | from ts_datasets.anomaly.nab import NAB
15 | from ts_datasets.anomaly.synthetic import Synthetic
16 | from ts_datasets.anomaly.ucr import UCR
17 | 
18 | from ts_datasets.anomaly.smd import SMD
19 | from ts_datasets.anomaly.smap import SMAP
20 | from ts_datasets.anomaly.msl import MSL
21 | 
22 | __all__ = [
23 |     "get_dataset",
24 |     "TSADBaseDataset",
25 |     "CustomAnomalyDataset",
26 |     "IOpsCompetition",
27 |     "NAB",
28 |     "Synthetic",
29 |     "UCR",
30 |     "SMD",
31 |     "SMAP",
32 |     "MSL",
33 | ]
34 | 
35 | 
36 | def get_dataset(dataset_name: str, rootdir: str = None, **kwargs) -> TSADBaseDataset:
37 |     """
38 |     :param dataset_name: the name of the dataset to load, formatted as
39 |         ``<name>`` or ``<name>_<subset>``, e.g. ``IOPsCompetition``
40 |         or ``NAB_realAWSCloudwatch``
41 |     :param rootdir: the directory where the desired dataset is stored. Not
42 |         required if the package :py:mod:`ts_datasets` is installed in editable
43 |         mode, i.e. with flag ``-e``.
44 |     :param kwargs: keyword arguments for the data loader you are trying to load.
45 |     :return: the data loader for the desired dataset (and subset) desired
46 |     """
47 |     name_subset = dataset_name.split("_", maxsplit=1)
48 |     valid_datasets = set(__all__).difference({"TSADBaseDataset", "get_dataset"})
49 |     if name_subset[0] in valid_datasets:
50 |         cls = globals()[name_subset[0]]
51 |     else:
52 |         raise KeyError(
53 |             "Dataset should be formatted as <name> or "
54 |             "<name>_<subset>, where <name> is one of "
55 |             f"{valid_datasets}. Got {dataset_name} instead."
56 |         )
57 |     if not hasattr(cls, "valid_subsets") and len(name_subset) == 2:
58 |         raise ValueError(
59 |             f"Dataset {name_subset[0]} does not have any subsets, "
60 |             f"but attempted to load subset {name_subset[1]} by "
61 |             f"specifying dataset name {dataset_name}."
62 |         )
63 | 
64 |     if len(name_subset) > 1:
65 |         kwargs.update(subset=name_subset[1])
66 |     return cls(rootdir=rootdir, **kwargs)
67 | 


--------------------------------------------------------------------------------
/ts_datasets/ts_datasets/anomaly/base.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | import numpy as np
 8 | import pandas as pd
 9 | 
10 | from ts_datasets.base import BaseDataset, _main_fns_docstr
11 | 
12 | _intro_docstr = """
13 | Base dataset class for storing time series intended for anomaly detection.
14 | """
15 | 
16 | _extra_note = """
17 | 
18 | .. note::
19 | 
20 |     For each time series, the ``metadata`` will always have the key ``anomaly``, which is a 
21 |     ``pd.Series`` of ``bool`` indicating whether each timestamp is anomalous.
22 | """
23 | 
24 | 
25 | class TSADBaseDataset(BaseDataset):
26 |     __doc__ = _intro_docstr + _main_fns_docstr + _extra_note
27 | 
28 |     @property
29 |     def max_lead_sec(self):
30 |         """
31 |         The maximum number of seconds an anomaly may be detected early, for
32 |         this dataset. ``None`` signifies no early detections allowed, or that
33 |         the user may override this value with something better suited for their
34 |         purposes.
35 |         """
36 |         return None
37 | 
38 |     @property
39 |     def max_lag_sec(self):
40 |         """
41 |         The maximum number of seconds after the start of an anomaly, that we
42 |         consider detections to be accurate (and not ignored for being too late).
43 |         ``None`` signifies that any detection in the window is acceptable, or
44 |         that the user may override this value with something better suited for
45 |         their purposes.
46 |         """
47 |         return None
48 | 
49 |     def describe(self):
50 |         anom_bds = []
51 |         anom_locs = []
52 |         anom_in_trainval = []
53 |         for ts, md in self:
54 |             boundaries = md.anomaly.iloc[1:] != md.anomaly.values[:-1]
55 |             boundaries = boundaries[boundaries].index
56 |             if len(boundaries) == 0:
57 |                 continue
58 | 
59 |             ts_len = ts.index[-1] - ts.index[0]
60 |             if md.anomaly.iloc[0]:
61 |                 anom_bds.append((ts.index[0], boundaries[0]))
62 |                 anom_locs.append((boundaries[0] - ts.index[0]) / ts_len)
63 |                 anom_in_trainval.append(True)
64 | 
65 |             for t0, tf in zip(boundaries[:-1], boundaries[1:]):
66 |                 if md.anomaly[t0]:
67 |                     anom_bds.append((t0, tf))
68 |                     anom_locs.append((tf - ts.index[0]) / ts_len)
69 |                     anom_in_trainval.append(bool(md.trainval[t0]))
70 | 
71 |             if md.anomaly[boundaries[-1]]:
72 |                 anom_bds.append((boundaries[-1], ts.index[-1]))
73 |                 anom_locs.append(1.0)
74 |                 anom_in_trainval.append(False)
75 | 
76 |         print("=" * 80)
77 |         print(f"Time series in dataset have average length {int(np.mean([len(ts) for ts, md in self]))}.")
78 |         print(f"Time series in dataset have {len(anom_bds) / len(self):.1f} anomalies on average.")
79 |         print(
80 |             f"{sum(anom_in_trainval) / len(anom_in_trainval) * 100:.1f}% of "
81 |             f"anomalies are in the train/val split of their respective time "
82 |             f"series."
83 |         )
84 |         print(f"Anomalies in dataset have average length {pd.Timedelta(np.mean([(tf - t0) for t0, tf in anom_bds]))}.")
85 |         print(
86 |             f"Average anomaly occurs {np.mean(anom_locs) * 100:.1f}% "
87 |             f"(+/- {np.std(anom_locs) * 100:.1f}%) of the way through "
88 |             f"its respective time series."
89 |         )
90 |         print("=" * 80)
91 | 


--------------------------------------------------------------------------------
/ts_datasets/ts_datasets/anomaly/custom.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | import glob
 8 | import logging
 9 | import os
10 | 
11 | import pandas as pd
12 | 
13 | from ts_datasets.forecast.custom import CustomDataset
14 | from ts_datasets.anomaly.base import TSADBaseDataset
15 | 
16 | logger = logging.getLogger(__name__)
17 | 
18 | 
19 | class CustomAnomalyDataset(CustomDataset, TSADBaseDataset):
20 |     """
21 |     Wrapper to load a custom dataset for anomaly detection. Please review the `tutorial <tutorials/CustomDataset>`
22 |     to get started.
23 |     """
24 | 
25 |     def __init__(
26 |         self,
27 |         rootdir,
28 |         test_frac=0.5,
29 |         assume_no_anomaly=False,
30 |         time_col=None,
31 |         time_unit="s",
32 |         data_cols=None,
33 |         index_cols=None,
34 |     ):
35 |         """
36 |         :param rootdir: Filename of a single CSV, or a directory containing many CSVs. Each CSV must contain 1
37 |             or more time series.
38 |         :param test_frac: If we don't find a column "trainval" in the time series, this is the fraction of each
39 |             time series which we use for testing.
40 |         :param assume_no_anomaly: If we don't find a column "anomaly" in the time series, we assume there are no
41 |             anomalies in the data if this value is ``True``, and we throw an exception if this value is ``False``.
42 |         :param time_col: Name of the column used to index time. We use the first non-index, non-metadata column
43 |             if none is given.
44 |         :param data_cols: Name of the columns to fetch from the dataset. If ``None``, use all non-time, non-index columns.
45 |         :param time_unit: If the time column is numerical, we assume it is a timestamp expressed in this unit.
46 |         :param index_cols: If a CSV file contains multiple time series, these are the columns used to index those
47 |             time series. For example, a CSV file may contain time series of sales for many (store, department) pairs.
48 |             In this case, ``index_cols`` may be ``["Store", "Dept"]``. The values of the index columns will be added
49 |             to the metadata of the data loader.
50 |         """
51 |         self.assume_no_anomaly = assume_no_anomaly
52 |         super().__init__(
53 |             rootdir=rootdir,
54 |             test_frac=test_frac,
55 |             time_col=time_col,
56 |             time_unit=time_unit,
57 |             data_cols=data_cols,
58 |             index_cols=index_cols,
59 |         )
60 | 
61 |     @property
62 |     def metadata_cols(self):
63 |         return ["anomaly", "trainval"]
64 | 
65 |     def check_ts_for_metadata(self, ts, col):
66 |         if col == "anomaly":
67 |             if col not in ts:
68 |                 if self.assume_no_anomaly:
69 |                     ts[col] = False
70 |                 else:
71 |                     raise ValueError(f"Time series {ts} does not have metadata column {col}.")
72 |             ts[col] = ts[col].astype(bool)
73 |         else:
74 |             ts = super().check_ts_for_metadata(ts, col)
75 |         return ts
76 | 


--------------------------------------------------------------------------------
/ts_datasets/ts_datasets/anomaly/msl.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | import os
 8 | import sys
 9 | import logging
10 | from ts_datasets.anomaly.base import TSADBaseDataset
11 | from ts_datasets.anomaly.smd import download, combine_train_test_datasets
12 | from ts_datasets.anomaly.smap import preprocess, load_data
13 | 
14 | _logger = logging.getLogger(__name__)
15 | _logger.setLevel(logging.DEBUG)
16 | _handler = logging.StreamHandler(sys.stdout)
17 | _handler.setLevel(logging.DEBUG)
18 | _logger.addHandler(_handler)
19 | 
20 | 
21 | class MSL(TSADBaseDataset):
22 |     """
23 |     Soil Moisture Active Passive (SMAP) satellite and Mars Science Laboratory (MSL) rover Datasets.
24 |     SMAP and MSL are two realworld public datasets, which are two real-world datasets expert-labeled by NASA.
25 | 
26 |     - source: https://github.com/khundman/telemanom
27 |     """
28 | 
29 |     url = "https://www.dropbox.com/s/uv9ojw353qwzqht/SMAP.tar.gz?dl=1"
30 | 
31 |     def __init__(self, subset=None, rootdir=None):
32 |         super().__init__()
33 | 
34 |         if rootdir is None:
35 |             fdir = os.path.dirname(os.path.abspath(__file__))
36 |             merlion_root = os.path.abspath(os.path.join(fdir, "..", "..", ".."))
37 |             rootdir = os.path.join(merlion_root, "data", "smap")
38 | 
39 |         # Download the SMAP dataset if it doesn't exist
40 |         download(_logger, rootdir, MSL.url, "SMAP")
41 |         preprocess(_logger, os.path.join(rootdir, "SMAP"), dataset="MSL")
42 |         # Load training/test datasets
43 |         df, metadata = combine_train_test_datasets(*load_data(os.path.join(rootdir, "SMAP"), "MSL"))
44 |         self.time_series.append(df)
45 |         self.metadata.append(metadata)
46 | 


--------------------------------------------------------------------------------
/ts_datasets/ts_datasets/anomaly/smap.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | import os
 8 | import sys
 9 | import csv
10 | import ast
11 | import logging
12 | import pickle
13 | import numpy as np
14 | import pandas as pd
15 | from ts_datasets.anomaly.base import TSADBaseDataset
16 | from ts_datasets.anomaly.smd import download, combine_train_test_datasets
17 | 
18 | _logger = logging.getLogger(__name__)
19 | _logger.setLevel(logging.DEBUG)
20 | _handler = logging.StreamHandler(sys.stdout)
21 | _handler.setLevel(logging.DEBUG)
22 | _logger.addHandler(_handler)
23 | 
24 | 
25 | class SMAP(TSADBaseDataset):
26 |     """
27 |     Soil Moisture Active Passive (SMAP) satellite and Mars Science Laboratory (MSL) rover Datasets.
28 |     SMAP and MSL are two realworld public datasets, which are two real-world datasets expert-labeled by NASA.
29 | 
30 |     - source: https://github.com/khundman/telemanom
31 |     """
32 | 
33 |     url = "https://www.dropbox.com/s/uv9ojw353qwzqht/SMAP.tar.gz?dl=1"
34 | 
35 |     def __init__(self, subset=None, rootdir=None):
36 |         super().__init__()
37 | 
38 |         if rootdir is None:
39 |             fdir = os.path.dirname(os.path.abspath(__file__))
40 |             merlion_root = os.path.abspath(os.path.join(fdir, "..", "..", ".."))
41 |             rootdir = os.path.join(merlion_root, "data", "smap")
42 | 
43 |         # Download the SMAP dataset if it doesn't exist
44 |         download(_logger, rootdir, SMAP.url, "SMAP")
45 |         preprocess(_logger, os.path.join(rootdir, "SMAP"), dataset="SMAP")
46 |         # Load training/test datasets
47 |         df, metadata = combine_train_test_datasets(*load_data(os.path.join(rootdir, "SMAP"), "SMAP"))
48 |         self.time_series.append(df)
49 |         self.metadata.append(metadata)
50 | 
51 | 
52 | def preprocess(logger, data_folder, dataset):
53 |     if (
54 |         os.path.exists(os.path.join(data_folder, f"{dataset}_test_label.pkl"))
55 |         and os.path.exists(os.path.join(data_folder, f"{dataset}_train.pkl"))
56 |         and os.path.exists(os.path.join(data_folder, f"{dataset}_test.pkl"))
57 |     ):
58 |         return
59 | 
60 |     logger.info(f"Preprocessing {dataset}")
61 |     with open(os.path.join(data_folder, "labeled_anomalies.csv"), "r") as f:
62 |         csv_reader = csv.reader(f, delimiter=",")
63 |         res = [row for row in csv_reader][1:]
64 |     res = sorted(res, key=lambda k: k[0])
65 | 
66 |     labels = []
67 |     data_info = [row for row in res if row[1] == dataset and row[0] != "P-2"]
68 |     for row in data_info:
69 |         anomalies = ast.literal_eval(row[2])
70 |         length = int(row[-1])
71 |         label = np.zeros([length], dtype=bool)
72 |         for anomaly in anomalies:
73 |             label[anomaly[0] : anomaly[1] + 1] = True
74 |         labels.extend(label)
75 |     labels = np.asarray(labels)
76 |     with open(os.path.join(data_folder, f"{dataset}_test_label.pkl"), "wb") as f:
77 |         pickle.dump(labels, f)
78 | 
79 |     for category in ["train", "test"]:
80 |         data = []
81 |         for row in data_info:
82 |             data.extend(np.load(os.path.join(data_folder, category, row[0] + ".npy")))
83 |         data = np.asarray(data)
84 |         with open(os.path.join(data_folder, f"{dataset}_{category}.pkl"), "wb") as f:
85 |             pickle.dump(data, f)
86 | 
87 | 
88 | def load_data(directory, dataset):
89 |     with open(os.path.join(directory, f"{dataset}_test.pkl"), "rb") as f:
90 |         test_data = pickle.load(f)
91 |     with open(os.path.join(directory, f"{dataset}_test_label.pkl"), "rb") as f:
92 |         test_labels = pickle.load(f)
93 |     with open(os.path.join(directory, f"{dataset}_train.pkl"), "rb") as f:
94 |         train_data = pickle.load(f)
95 |     train_df, test_df = pd.DataFrame(train_data), pd.DataFrame(test_data)
96 |     return train_df, test_df, test_labels.astype(int)
97 | 


--------------------------------------------------------------------------------
/ts_datasets/ts_datasets/anomaly/synthetic.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | import glob
 8 | import os
 9 | 
10 | import pandas as pd
11 | 
12 | from ts_datasets.anomaly.base import TSADBaseDataset
13 | 
14 | 
15 | class Synthetic(TSADBaseDataset):
16 |     """
17 |     Wrapper to load a sythetically generated dataset.
18 |     The dataset was generated using three base time series, each of which
19 |     was separately injected with shocks, spikes, dips and level shifts, making
20 |     a total of 15 time series (including the base time series without anomalies).
21 |     Subsets can are defined by the base time series used ("horizontal",
22 |     "seasonal", "upward_downward"), or the type of injected anomaly ("shock",
23 |     "spike", "dip", "level"). The "anomaly" subset refers to all times series with
24 |     injected anomalies (12) while "base" refers to all time series without them (3).
25 |     """
26 | 
27 |     base_ts_subsets = ["horizontal", "seasonal", "upward_downward"]
28 |     anomaly_subsets = ["shock", "spike", "dip", "level", "trend"]
29 |     valid_subsets = ["anomaly", "all", "base"] + base_ts_subsets + anomaly_subsets
30 | 
31 |     def __init__(self, subset="anomaly", rootdir=None):
32 |         super().__init__()
33 | 
34 |         assert subset in self.valid_subsets, f"subset should be in {self.valid_subsets}, but got {subset}"
35 |         self.subset = subset
36 | 
37 |         if rootdir is None:
38 |             fdir = os.path.dirname(os.path.abspath(__file__))
39 |             merlion_root = os.path.abspath(os.path.join(fdir, "..", "..", ".."))
40 |             rootdir = os.path.join(merlion_root, "data", "synthetic_anomaly")
41 | 
42 |         csvs = sorted(glob.glob(f"{rootdir}/*.csv"))
43 |         if subset == "base":
44 |             csvs = [csv for csv in csvs if "anom" not in os.path.basename(csv)]
45 |         elif subset != "all":
46 |             csvs = [csv for csv in csvs if "anom" in os.path.basename(csv)]
47 |         if subset in self.base_ts_subsets + self.anomaly_subsets:
48 |             csvs = [csv for csv in csvs if subset in os.path.basename(csv)]
49 | 
50 |         for csv in csvs:
51 |             df = pd.read_csv(csv)
52 |             df["timestamp"] = pd.to_datetime(df["timestamp"], unit="s")
53 |             df = df.set_index("timestamp")
54 | 
55 |             ts = df[df.columns[0:1]]
56 |             metadata = pd.DataFrame(
57 |                 {
58 |                     "anomaly": df["anomaly"].astype(bool) if df.shape[1] > 1 else [False] * len(df),
59 |                     "trainval": [j < len(df) * 0.5 for j in range(len(df))],
60 |                 },
61 |                 index=df.index,
62 |             )
63 | 
64 |             self.time_series.append(ts)
65 |             self.metadata.append(metadata)
66 | 


--------------------------------------------------------------------------------
/ts_datasets/ts_datasets/anomaly/ucr.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | import glob
 8 | import os
 9 | import logging
10 | import requests
11 | from pathlib import Path
12 | import sys
13 | import zipfile
14 | 
15 | import numpy as np
16 | import pandas as pd
17 | 
18 | from ts_datasets.anomaly.base import TSADBaseDataset
19 | 
20 | logger = logging.getLogger(__name__)
21 | logger.setLevel(logging.DEBUG)
22 | handler = logging.StreamHandler(sys.stdout)
23 | handler.setLevel(logging.DEBUG)
24 | logger.addHandler(handler)
25 | 
26 | 
27 | class UCR(TSADBaseDataset):
28 |     """
29 |     Data loader for the Hexagon ML/UC Riverside Time Series Anomaly Archive.
30 | 
31 |     See `here <https://compete.hexagon-ml.com/practice/competition/39/>`_ for details.
32 | 
33 |     Hoang Anh Dau, Eamonn Keogh, Kaveh Kamgar, Chin-Chia Michael Yeh, Yan Zhu,
34 |     Shaghayegh Gharghabi, Chotirat Ann Ratanamahatana, Yanping Chen, Bing Hu,
35 |     Nurjahan Begum, Anthony Bagnall , Abdullah Mueen, Gustavo Batista, & Hexagon-ML (2019).
36 |     The UCR Time Series Classification Archive. URL https://www.cs.ucr.edu/~eamonn/time_series_data_2018/
37 |     """
38 | 
39 |     def __init__(self, rootdir=None):
40 |         super().__init__()
41 |         if rootdir is None:
42 |             fdir = os.path.dirname(os.path.abspath(__file__))
43 |             merlion_root = os.path.abspath(os.path.join(fdir, "..", "..", ".."))
44 |             rootdir = os.path.join(merlion_root, "data", "ucr")
45 | 
46 |         self.download(rootdir)
47 |         self.time_series = sorted(
48 |             glob.glob(
49 |                 os.path.join(
50 |                     rootdir, "UCR_TimeSeriesAnomalyDatasets2021", "FilesAreInHere", "UCR_Anomaly_FullData", "*.txt"
51 |                 )
52 |             )
53 |         )
54 | 
55 |     def __getitem__(self, i):
56 |         fname = self.time_series[i]
57 |         split, anom_start, anom_end = [int(x) for x in fname[: -len(".txt")].split("_")[-3:]]
58 |         name = fname.split("_")[-4]
59 |         arr = np.loadtxt(fname)
60 |         trainval = [i < split for i in range(len(arr))]
61 |         anomaly = [anom_start <= i <= anom_end for i in range(len(arr))]
62 |         index = pd.date_range(start=0, periods=len(arr), freq="1min")
63 |         df = pd.DataFrame({name: arr}, index=index)
64 |         return (
65 |             df,
66 |             pd.DataFrame(
67 |                 {
68 |                     "anomaly": [anom_start - 100 <= i <= anom_end + 100 for i in range(len(arr))],
69 |                     "trainval": [i < split for i in range(len(arr))],
70 |                 },
71 |                 index=index,
72 |             ),
73 |         )
74 | 
75 |     def download(self, rootdir):
76 |         filename = "UCR_TimeSeriesAnomalyDatasets2021.zip"
77 |         url = f"https://www.cs.ucr.edu/~eamonn/time_series_data_2018/{filename}"
78 | 
79 |         os.makedirs(rootdir, exist_ok=True)
80 |         compressed_file = os.path.join(rootdir, filename)
81 | 
82 |         # Download the compressed dataset
83 |         if not os.path.exists(compressed_file):
84 |             logger.info("Downloading " + url)
85 |             with requests.get(url, stream=True) as r:
86 |                 with open(compressed_file, "wb") as f:
87 |                     for chunk in r.iter_content(chunk_size=16 * 1024**2):
88 |                         if chunk:  # filter out keep-alive new chunks
89 |                             f.write(chunk)
90 |                             f.flush()
91 | 
92 |         # Uncompress the downloaded zip file
93 |         if not os.path.isfile(os.path.join(rootdir, "_SUCCESS")):
94 |             logger.info(f"Uncompressing {compressed_file}")
95 |             with zipfile.ZipFile(compressed_file, "r") as zip_ref:
96 |                 zip_ref.extractall(rootdir)
97 |             Path(os.path.join(rootdir, "_SUCCESS")).touch()
98 | 


--------------------------------------------------------------------------------
/ts_datasets/ts_datasets/base.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | import pandas as pd
 8 | from typing import Tuple
 9 | 
10 | _intro_docstr = "Base dataset class for storing time series as ``pd.DataFrame`` s."
11 | 
12 | _main_fns_docstr = """
13 | Each dataset supports the following features:
14 | 
15 | 1.  ``__getitem__``: you may call ``ts, metadata = dataset[i]``. ``ts`` is a time-indexed ``pandas`` DataFrame, with
16 |     each column representing a different variable (in the case of multivariate time series). ``metadata`` is a dict or
17 |     ``pd.DataFrame`` with the same index as ``ts``, with different keys indicating different dataset-specific
18 |     metadata (train/test split, anomaly labels, etc.) for each timestamp.
19 | 2.  ``__len__``:  Calling ``len(dataset)`` will return the number of time series in the dataset.
20 | 3.  ``__iter__``: You may iterate over the ``pandas`` representations of the time series in the dataset with
21 |     ``for ts, metadata in dataset: ...``
22 | 
23 | .. note::
24 | 
25 |     For each time series, the ``metadata`` will always have the key ``trainval``, which is a 
26 |     ``pd.Series`` of ``bool`` indicating whether each timestamp of the time series should be
27 |     training/validation (if ``True``) or testing (if ``False``). 
28 | """
29 | 
30 | 
31 | class BaseDataset:
32 |     __doc__ = _intro_docstr + _main_fns_docstr
33 | 
34 |     time_series: list
35 |     """
36 |     A list of all individual time series contained in the dataset. Iterating over
37 |     the dataset will iterate over this list. Note that for some large datasets, 
38 |     ``time_series`` may be a list of filenames, which are read lazily either during
39 |     iteration, or whenever ``__getitem__`` is invoked.
40 |     """
41 | 
42 |     metadata: list
43 |     """
44 |     A list containing the metadata for all individual time series in the dataset.
45 |     """
46 | 
47 |     def __init__(self):
48 |         self.subset = None
49 |         self.time_series = []
50 |         self.metadata = []
51 | 
52 |     def __getitem__(self, i) -> Tuple[pd.DataFrame, pd.DataFrame]:
53 |         return self.time_series[i], self.metadata[i]
54 | 
55 |     def __len__(self):
56 |         return len(self.time_series)
57 | 
58 |     def __iter__(self):
59 |         return (self[i] for i in range(len(self)))
60 | 
61 |     def describe(self):
62 |         for ts_df in self.time_series:
63 |             print(f"length of the data: {len(ts_df)}")
64 |             print(f"timestamp index name: {ts_df.index.name}")
65 |             print(f"number of data columns: {len(ts_df.columns)}")
66 |             print("data columns names (the first 20): ")
67 |             print(ts_df.columns[:20])
68 |             print(f"number of null entries: {ts_df.isnull().sum()}")
69 | 


--------------------------------------------------------------------------------
/ts_datasets/ts_datasets/forecast/__init__.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | """
 8 | Datasets for time series forecasting. Really, these are just time series with
 9 | no labels of any sort.
10 | """
11 | from ts_datasets.base import BaseDataset
12 | from ts_datasets.forecast.custom import CustomDataset
13 | from ts_datasets.forecast.m4 import M4
14 | from ts_datasets.forecast.energy_power import EnergyPower
15 | from ts_datasets.forecast.seattle_trail import SeattleTrail
16 | from ts_datasets.forecast.solar_plant import SolarPlant
17 | 
18 | __all__ = ["get_dataset", "CustomDataset", "M4", "EnergyPower", "SeattleTrail", "SolarPlant"]
19 | 
20 | 
21 | def get_dataset(dataset_name: str, rootdir: str = None, **kwargs) -> BaseDataset:
22 |     """
23 |     :param dataset_name: the name of the dataset to load, formatted as
24 |         ``<name>`` or ``<name>_<subset>``, e.g. ``EnergyPower`` or ``M4_Hourly``
25 |     :param rootdir: the directory where the desired dataset is stored. Not
26 |         required if the package :py:mod:`ts_datasets` is installed in editable
27 |         mode, i.e. with flag ``-e``.
28 |     :param kwargs: keyword arguments for the data loader you are trying to load.
29 |     :return: the data loader for the desired dataset (and subset) desired
30 |     """
31 |     name_subset = dataset_name.split("_", maxsplit=1)
32 |     valid_datasets = set(__all__).difference({"get_dataset"})
33 |     if name_subset[0] in valid_datasets:
34 |         cls = globals()[name_subset[0]]
35 |     else:
36 |         raise KeyError(
37 |             "Dataset should be formatted as <name> or "
38 |             "<name>_<subset>, where <name> is one of "
39 |             f"{valid_datasets}. Got {dataset_name} instead."
40 |         )
41 |     if not hasattr(cls, "valid_subsets") and len(name_subset) == 2:
42 |         raise ValueError(
43 |             f"Dataset {name_subset[0]} does not have any subsets, "
44 |             f"but attempted to load subset {name_subset[1]} by "
45 |             f"specifying dataset name {dataset_name}."
46 |         )
47 | 
48 |     if len(name_subset) > 1:
49 |         kwargs.update(subset=name_subset[1])
50 |     return cls(rootdir=rootdir, **kwargs)
51 | 


--------------------------------------------------------------------------------
/ts_datasets/ts_datasets/forecast/energy_power.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | import glob
 8 | import logging
 9 | import os
10 | 
11 | import pandas as pd
12 | 
13 | from ts_datasets.base import BaseDataset
14 | 
15 | logger = logging.getLogger(__name__)
16 | 
17 | 
18 | class EnergyPower(BaseDataset):
19 |     """
20 |     Wrapper to load the open source energy grid power usage dataset.
21 | 
22 |     - source: https://www.kaggle.com/robikscube/hourly-energy-consumption
23 |     - contains one 10-variable time series
24 |     """
25 | 
26 |     def __init__(self, rootdir=None):
27 |         """
28 |         :param rootdir: The root directory at which the dataset can be found.
29 |         """
30 |         super().__init__()
31 |         if rootdir is None:
32 |             fdir = os.path.dirname(os.path.abspath(__file__))
33 |             merlion_root = os.path.abspath(os.path.join(fdir, "..", "..", ".."))
34 |             rootdir = os.path.join(merlion_root, "data", "multivariate", "energy_power")
35 | 
36 |         assert (
37 |             "energy_power" in rootdir.split("/")[-1]
38 |         ), "energy_power should be found as the last level of the directory for this dataset"
39 | 
40 |         dsetdirs = [rootdir]
41 |         extension = "csv.gz"
42 | 
43 |         fnames = sum([sorted(glob.glob(f"{d}/*.{extension}")) for d in dsetdirs], [])
44 |         assert len(fnames) == 1, f"rootdir {rootdir} does not contain dataset file."
45 | 
46 |         start_timestamp = "2014-01-01 00:00:00"
47 | 
48 |         for i, fn in enumerate(sorted(fnames)):
49 |             df = pd.read_csv(fn, index_col="Datetime", parse_dates=True)
50 |             df = df[df.index >= start_timestamp]
51 |             df.drop(["NI", "PJM_Load"], axis=1, inplace=True)
52 |             df.index.rename("timestamp", inplace=True)
53 |             assert isinstance(df.index, pd.DatetimeIndex)
54 |             df.sort_index(inplace=True)
55 | 
56 |             self.time_series.append(df)
57 |             self.metadata.append(
58 |                 {
59 |                     "trainval": pd.Series(df.index <= "2018-01-01 00:00:00", index=df.index),
60 |                     "start_timestamp": start_timestamp,
61 |                 }
62 |             )
63 | 


--------------------------------------------------------------------------------
/ts_datasets/ts_datasets/forecast/seattle_trail.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | import glob
 8 | import logging
 9 | import os
10 | 
11 | import pandas as pd
12 | 
13 | from ts_datasets.base import BaseDataset
14 | 
15 | logger = logging.getLogger(__name__)
16 | 
17 | 
18 | class SeattleTrail(BaseDataset):
19 |     """
20 |     Wrapper to load the open source Seattle Trail pedestrian/bike traffic
21 |     dataset.
22 | 
23 |     - source: https://www.kaggle.com/city-of-seattle/seattle-burke-gilman-trail
24 |     - contains one 5-variable time series
25 |     """
26 | 
27 |     def __init__(self, rootdir=None):
28 |         """
29 |         :param rootdir: The root directory at which the dataset can be found.
30 |         """
31 |         super().__init__()
32 |         if rootdir is None:
33 |             fdir = os.path.dirname(os.path.abspath(__file__))
34 |             merlion_root = os.path.abspath(os.path.join(fdir, "..", "..", ".."))
35 |             rootdir = os.path.join(merlion_root, "data", "multivariate", "seattle_trail")
36 | 
37 |         assert (
38 |             "seattle_trail" in rootdir.split("/")[-1]
39 |         ), "seattle_trail should be found as the last level of the directory for this dataset"
40 | 
41 |         dsetdirs = [rootdir]
42 |         extension = "csv"
43 | 
44 |         fnames = sum([sorted(glob.glob(f"{d}/*.{extension}")) for d in dsetdirs], [])
45 |         assert len(fnames) == 1, f"rootdir {rootdir} does not contain dataset file."
46 |         for i, fn in enumerate(sorted(fnames)):
47 |             df = pd.read_csv(fn)
48 | 
49 |             df["timestamp"] = pd.to_datetime(df["Date"])
50 |             df.set_index("timestamp", inplace=True)
51 |             df.drop("Date", axis=1, inplace=True)
52 |             assert isinstance(df.index, pd.DatetimeIndex)
53 |             df.sort_index(inplace=True)
54 | 
55 |             self.time_series.append(df)
56 |             self.metadata.append(
57 |                 {"trainval": pd.Series(df.index <= "2019-01-01 00:00:00", index=df.index), "quantile_clip": 300}
58 |             )
59 | 


--------------------------------------------------------------------------------
/ts_datasets/ts_datasets/forecast/solar_plant.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023 salesforce.com, inc.
 3 | # All rights reserved.
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | #
 7 | import glob
 8 | import logging
 9 | import os
10 | import zipfile
11 | 
12 | import pandas as pd
13 | 
14 | from ts_datasets.base import BaseDataset
15 | 
16 | logger = logging.getLogger(__name__)
17 | 
18 | 
19 | class SolarPlant(BaseDataset):
20 |     """
21 |     Wrapper to load the open source solar plant power dataset.
22 | 
23 |     - source: https://www.nrel.gov/grid/solar-power-data.html
24 |     - contains one 405-variable time series
25 | 
26 |     .. note::
27 | 
28 |         The loader currently only includes the first 100 (of 405) variables.
29 |     """
30 | 
31 |     def __init__(self, rootdir=None, num_columns=100):
32 |         """
33 |         :param rootdir: The root directory at which the dataset can be found.
34 |         :param num_columns: indicates how many univariate columns should be returned
35 |         """
36 |         super().__init__()
37 |         if rootdir is None:
38 |             fdir = os.path.dirname(os.path.abspath(__file__))
39 |             merlion_root = os.path.abspath(os.path.join(fdir, "..", "..", ".."))
40 |             rootdir = os.path.join(merlion_root, "data", "multivariate", "solar_plant")
41 | 
42 |         assert (
43 |             "solar_plant" in rootdir.split("/")[-1]
44 |         ), "solar_plant should be found as the last level of the directory for this dataset"
45 | 
46 |         # Get all filenames, extracting the zipfile if needed
47 |         fnames = glob.glob(f"{rootdir}/*.csv")
48 |         if len(fnames) == 0 and os.path.isfile(f"{rootdir}/merged.zip"):
49 |             with zipfile.ZipFile(f"{rootdir}/merged.zip", "r") as zip_ref:
50 |                 zip_ref.extractall(rootdir)
51 |             fnames = glob.glob(f"{rootdir}/*.csv")
52 |         assert len(fnames) == 1, f"rootdir {rootdir} does not contain dataset file."
53 | 
54 |         for i, fn in enumerate(sorted(fnames)):
55 | 
56 |             df = pd.read_csv(fn)
57 | 
58 |             df["timestamp"] = pd.to_datetime(df["Datetime"])
59 |             df.set_index("timestamp", inplace=True)
60 |             df.drop(["LocalTime", "Datetime"], axis=1, inplace=True)
61 |             num_columns = min(num_columns, len(df.columns))
62 |             cols = [f"Power_{i}" for i in range(num_columns)]
63 |             df = df[cols]
64 |             assert isinstance(df.index, pd.DatetimeIndex)
65 |             df.sort_index(inplace=True)
66 | 
67 |             self.time_series.append(df)
68 |             self.metadata.append(
69 |                 {
70 |                     "trainval": pd.Series(df.index <= "2006-10-01 00:00:00", index=df.index),
71 |                     "granularity": "30min",
72 |                     "aggregation": "Sum",
73 |                 }
74 |             )
75 | 


--------------------------------------------------------------------------------