├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    ├── PULL_REQUEST_TEMPLATE.md
    └── workflows
    │   ├── python-publish.yml
    │   └── testing.yml
├── .gitignore
├── .readthedocs.yml
├── CITATION.cff
├── LICENSE
├── MANIFEST.in
├── README.md
├── benchmark
    ├── README.md
    ├── benchamark_results.csv
    ├── benchmark.ipynb
    ├── benchmark_results.svg
    ├── dataset.py
    └── requirements_benchmark.txt
├── docs
    ├── Makefile
    ├── make.bat
    ├── requirements-docs.txt
    └── source
    │   ├── _templates
    │       └── class.rst
    │   ├── api
    │       ├── streamad.evaluate.md
    │       ├── streamad.md
    │       ├── streamad.model.md
    │       ├── streamad.process.md
    │       └── streamad.util.md
    │   ├── benchmark.md
    │   ├── conf.py
    │   ├── example
    │       ├── calibrator_usage.ipynb
    │       ├── dataset_usage.ipynb
    │       ├── ensemble_usage.ipynb
    │       ├── example.md
    │       ├── multivariate.ipynb
    │       └── univariate.ipynb
    │   ├── images
    │       ├── logo_html.svg
    │       ├── logo_htmlwithname.svg
    │       └── logo_index.svg
    │   ├── index.md
    │   ├── overview.md
    │   ├── references.md
    │   └── refs.bib
├── example
    ├── README.md
    ├── dataset_usage.ipynb
    ├── multivariate.ipynb
    ├── thresholder_usage.ipynb
    └── univariate.ipynb
├── poetry.lock
├── pyproject.toml
├── streamad
    ├── __init__.py
    ├── base
    │   ├── __init__.py
    │   ├── detector.py
    │   └── metrics.py
    ├── evaluate
    │   ├── __init__.py
    │   ├── numenta_aware_metrics.py
    │   ├── point_aware_metrics.py
    │   ├── series_aware_metrics.py
    │   └── ts_metrics.py
    ├── meta.yaml
    ├── model
    │   ├── KNN_Detector.py
    │   ├── Mad_Dectector.py
    │   ├── OCSVM_Detector.py
    │   ├── SArima_Detector.py
    │   ├── SR_Detector.py
    │   ├── __init__.py
    │   ├── hstree_Detector.py
    │   ├── loda_Detector.py
    │   ├── random_Detector.py
    │   ├── rrcf_Detector.py
    │   ├── rshash_Detector.py
    │   ├── spot_Detector.py
    │   ├── xStream_Detector.py
    │   ├── zscore_Detector.py
    │   └── zspot_Detector.py
    ├── process
    │   ├── __init__.py
    │   ├── tdigest_calibrator.py
    │   ├── vote_ensemble.py
    │   ├── weight_ensemble.py
    │   └── zscore_calibrator.py
    ├── util
    │   ├── __init__.py
    │   ├── data
    │   │   ├── multiDS.csv
    │   │   └── uniDS.csv
    │   ├── dataset.py
    │   ├── math_toolkit.py
    │   ├── plot.py
    │   └── stream_generator.py
    └── version.py
└── test
    ├── __init__.py
    ├── test_OCSVM.py
    ├── test_calibrator.py
    ├── test_ensemble.py
    ├── test_evaluate.py
    ├── test_hstree.py
    ├── test_knncad.py
    ├── test_loda.py
    ├── test_mad.py
    ├── test_plot.py
    ├── test_random.py
    ├── test_rrcf.py
    ├── test_rshash.py
    ├── test_sarima.py
    ├── test_sdft.py
    ├── test_spot.py
    ├── test_sr.py
    ├── test_stats.py
    ├── test_xstream.py
    └── test_zscore.py


/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: bug, enhancement
 6 | assignees: Fengrui-Liu
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Desktop (please complete the following information):**
27 |  - OS: [e.g. Windows 11]
28 | 
29 | **Package version (please complete the following information):**
30 |  - Version [e.g. pypi 0.1.1]
31 | 
32 | **Additional context**
33 | Add any other context about the problem here.
34 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: enhancement
 6 | assignees: Fengrui-Liu
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ## Use this template to add a new detector/feature.
 2 | ### Model Name:
 3 | 
 4 | 
 5 | ### Paper/Project related links or docs:
 6 | 
 7 | 
 8 | 
 9 | ### New Model Submissions:
10 | 
11 | * [ ] Have you created a <NewModel>.py in ~/streamad/model/?
12 | * [ ] Have you created a <NewModel>_example.py in ~/examples/?
13 | * [ ] Have you created a test_<NewModel>.py in ~/test/?
14 | * [ ] Have you created Google style doc for each Class in <NewModel.py>?


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 3 | 
 4 | # This workflow uses actions that are not certified by GitHub.
 5 | # They are provided by a third-party and are governed by
 6 | # separate terms of service, privacy policy, and support
 7 | # documentation.
 8 | 
 9 | name: Upload Python Package
10 | 
11 | on:
12 |   release:
13 |     types: [published]
14 | 
15 | permissions:
16 |   contents: read
17 | 
18 | jobs:
19 |   deploy:
20 |     runs-on: ubuntu-latest
21 | 
22 |     steps:
23 |       - uses: actions/checkout@v3
24 |       - name: Build and publish to PyPI
25 |         uses: JRubics/poetry-publish@v1.16
26 |         with:
27 |           user: __token__
28 |           pypi_token: ${{ secrets.PYPI_API_TOKEN }}
29 |           verbose: true
30 | 


--------------------------------------------------------------------------------
/.github/workflows/testing.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |       - dev
 8 |   pull_request:
 9 |     branches:
10 |       - main
11 |       - dev
12 |   workflow_call:
13 |     inputs:
14 |       username:
15 |         required: false
16 |         type: string
17 |     secrets:
18 |       access-token:
19 |         required: false
20 | 
21 | jobs:
22 |   build:
23 |     strategy:
24 |       fail-fast: false
25 |       matrix:
26 |         os: [ubuntu-latest]
27 |         python-version: ["3.8", "3.9", "3.10", "3.11"]
28 |         poetry-version: ["1.4.2"]
29 |     runs-on: ${{ matrix.os }}
30 | 
31 |     steps:
32 |       - run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event."
33 |       - run: echo "🐧 This job is now running on a ${{ runner.os }} server hosted by GitHub!"
34 |       - run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}."
35 |       #----------------------------------------------
36 |       #       check-out repo and set-up python
37 |       #----------------------------------------------
38 |       - name: Check out repository code
39 |         uses: actions/checkout@master
40 |       - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner."
41 |       - run: echo "🖥️ The workflow is now ready to test your code on the runner."
42 |       - name: List files in the repository
43 |         run: |
44 |           ls ${{ github.workspace }}
45 |       - name: Python ${{ matrix.python-version }}
46 |         uses: actions/setup-python@master
47 |         with:
48 |           python-version: ${{ matrix.python-version }}
49 |       #----------------------------------------------
50 |       #  -----  install & configure poetry  -----
51 |       #----------------------------------------------
52 |       - name: Install Poetry
53 |         uses: abatilo/actions-poetry@v2
54 |         with:
55 |           virtualenvs-create: true
56 |           virtualenvs-in-project: true
57 |           installer-parallel: true
58 |           poetry-version: ${{ matrix.poetry-version }}
59 |       - name: Install dependencies
60 |         run: poetry install --no-interaction --no-root
61 |       - name: Generate coverage report
62 |         run: poetry run pytest --cov=./ --cov-report=xml
63 |       - name: Upload coverage to Codecov
64 |         uses: codecov/codecov-action@v2
65 |         with:
66 |           token: ${{ secrets.CODECOV_TOKEN }}
67 |           directory: ./
68 |           files: ./coverage.xml
69 |           env_vars: OS,PYTHON
70 |           fail_ci_if_error: true
71 |           flags: pytests
72 |           name: codecov-umbrella
73 |           verbose: true
74 |       - run: echo "🍏 This job's status is ${{ job.status }}."
75 | 
76 |   publish:
77 |     runs-on: ubuntu-latest
78 |     if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
79 |     steps:
80 |       - uses: actions/checkout@v3
81 |       - uses: actions/setup-python@v3
82 |       - name: Install dependencies
83 |         run: |
84 |           python -m pip install --upgrade pip
85 |           pip install build
86 |       - name: Build package
87 |         run: python -m build
88 |       - name: Install twine to check the package
89 |         run: pip install twine
90 |       - name: Check the package
91 |         run: twine check dist/*
92 |       - name: Publish distribution 📦 to PyPI
93 |         if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
94 |         uses: pypa/gh-action-pypi-publish@v1.5.0
95 |         with:
96 |           user: __token__
97 |           password: ${{ secrets.PYPI_API_TOKEN }}
98 |           verbose: true
99 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | .idea
  2 | .cache/
  3 | .pytest_cache
  4 | __pycache__
  5 | .vscode
  6 | 
  7 | # Byte-compiled / optimized / DLL files
  8 | *.py[cod]
  9 | *$py.class
 10 | 
 11 | # C extensions
 12 | *.so
 13 | .pytest_cache/
 14 | 
 15 | 
 16 | # Distribution / packaging
 17 | .Python
 18 | build/
 19 | develop-eggs/
 20 | dist/
 21 | downloads/
 22 | eggs/
 23 | .eggs/
 24 | lib/
 25 | lib64/
 26 | parts/
 27 | sdist/
 28 | var/
 29 | wheels/
 30 | share/python-wheels/
 31 | *.egg-info/
 32 | .installed.cfg
 33 | *.egg
 34 | MANIFEST
 35 | 
 36 | # PyInstaller
 37 | #  Usually these files are written by a python script from a template
 38 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 39 | *.manifest
 40 | *.spec
 41 | 
 42 | # Installer logs
 43 | pip-log.txt
 44 | pip-delete-this-directory.txt
 45 | 
 46 | # Unit test / coverage reports
 47 | htmlcov/
 48 | .tox/
 49 | .nox/
 50 | .coverage
 51 | .coverage.*
 52 | .cache
 53 | nosetests.xml
 54 | coverage.xml
 55 | *.cover
 56 | *.py,cover
 57 | .hypothesis/
 58 | .pytest_cache/
 59 | cover/
 60 | 
 61 | # Translations
 62 | *.mo
 63 | *.pot
 64 | 
 65 | # Django stuff:
 66 | *.log
 67 | local_settings.py
 68 | db.sqlite3
 69 | db.sqlite3-journal
 70 | 
 71 | # Flask stuff:
 72 | instance/
 73 | .webassets-cache
 74 | 
 75 | # Scrapy stuff:
 76 | .scrapy
 77 | 
 78 | 
 79 | 
 80 | # PyBuilder
 81 | .pybuilder/
 82 | target/
 83 | 
 84 | # Jupyter Notebook
 85 | .ipynb_checkpoints
 86 | test.ipynb
 87 | 
 88 | # IPython
 89 | profile_default/
 90 | ipython_config.py
 91 | 
 92 | # pyenv
 93 | #   For a library or package, you might want to ignore these files since the code is
 94 | #   intended to run in multiple environments; otherwise, check them in:
 95 | # .python-version
 96 | 
 97 | # pipenv
 98 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 99 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
100 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
101 | #   install all needed dependencies.
102 | #Pipfile.lock
103 | 
104 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
105 | __pypackages__/
106 | 
107 | # Celery stuff
108 | celerybeat-schedule
109 | celerybeat.pid
110 | 
111 | # SageMath parsed files
112 | *.sage.py
113 | 
114 | # Environments
115 | .env
116 | .venv
117 | env/
118 | venv/
119 | ENV/
120 | env.bak/
121 | venv.bak/
122 | 
123 | # Spyder project settings
124 | .spyderproject
125 | .spyproject
126 | 
127 | # Rope project settings
128 | .ropeproject
129 | 
130 | # mkdocs documentation
131 | /site
132 | /docs/build/
133 | /docs/source/benchmark/streamad-benchmark-dataset/
134 | 
135 | # benchmark
136 | 
137 | /benchmark/streamad-benchmark-dataset/
138 | 
139 | # mypy
140 | .mypy_cache/
141 | .dmypy.json
142 | dmypy.json
143 | 
144 | # Pyre type checker
145 | .pyre/
146 | 
147 | # pytype static type analyzer
148 | .pytype/
149 | 
150 | # Cython debug symbols
151 | cython_debug/
152 | 
153 | # Others
154 | benchmark/
155 | .DS_Store


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | build:
 4 |   os: "ubuntu-22.04"
 5 |   tools:
 6 |     python: "3.8"
 7 |   jobs:
 8 |     post_create_environment:
 9 |       - pip install -U setuptools==58.2.0
10 | 
11 | 
12 | 
13 | python:
14 |   install:
15 |     - method: pip
16 |       path: .
17 |       extra_requirements:
18 |         - rtd
19 |     - requirements: docs/requirements-docs.txt
20 | 
21 | sphinx:
22 |   builder: html
23 |   fail_on_warning: true
24 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: "If you use this software, please cite it as below."
 3 | authors:
 4 |   - family-names: Liu
 5 |     given-names: Fengrui
 6 |     orcid:
 7 | title: "StreamAD"
 8 | version: 0.3.0
 9 | doi:
10 | date-released: 2022-05-15


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2019 Seldon Technologies Ltd.
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include requirements.txt
3 | prune test


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # StreamAD
 2 | 
 3 | ![StreamAD Logo](docs/source/images/logo_htmlwithname.svg)
 4 | 
 5 | 
 6 | 
 7 | Anomaly detection for data streams/time series. Detectors process the univariate or multivariate data one by one to simulte a real-time scene.
 8 | 
 9 | 
10 | 
11 | [Documentation](https://streamad.readthedocs.io/en/latest/)
12 | 
13 | 
14 | <!--- BADGES: START --->
15 | 
16 | 
17 | 
18 | ![PyPI](https://img.shields.io/pypi/v/streamad)
19 | ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/StreamAD?style=flat)
20 | ![PyPI - Implementation](https://img.shields.io/pypi/implementation/streamad)
21 | 
22 | ![Read the Docs](https://img.shields.io/readthedocs/streamad?style=flat)
23 | ![GitHub](https://img.shields.io/github/license/Fengrui-Liu/StreamAD)
24 | [![Downloads](https://static.pepy.tech/personalized-badge/streamad?period=total&units=international_system&left_color=grey&right_color=orange&left_text=Downloads)](https://pepy.tech/project/streamad)
25 | 
26 | 
27 | ![example workflow](https://github.com/Fengrui-Liu/StreamAD/actions/workflows/testing.yml//badge.svg)
28 | [![codecov](https://codecov.io/gh/Fengrui-Liu/StreamAD/branch/main/graph/badge.svg?token=AQG26L2RA7)](https://codecov.io/gh/Fengrui-Liu/StreamAD)
29 | [![Maintainability](https://api.codeclimate.com/v1/badges/525d7e3663ee4c5c0daa/maintainability)](https://codeclimate.com/github/Fengrui-Liu/StreamAD/maintainability)
30 | [![FOSSA Status](https://app.fossa.com/api/projects/git%2Bgithub.com%2FFengrui-Liu%2FStreamAD.svg?type=small)](https://app.fossa.com/projects/git%2Bgithub.com%2FFengrui-Liu%2FStreamAD?ref=badge_small)
31 | 
32 | 
33 | 
34 | ---
35 | 
36 | 
37 | 
38 | ## Installation
39 | 
40 | The stable version can be installed from PyPI:
41 | 
42 | ```bash
43 | pip install streamad
44 | ```
45 | 
46 | The development version can be installed from GitHub:
47 | 
48 | ```bash
49 | pip install git+https://github.com/Fengrui-Liu/StreamAD
50 | ```
51 | 
52 | ---
53 | 
54 | ## Quick Start
55 | 
56 | Start once detection within 5 lines of code. You can find more example with visualization results [here](https://streamad.readthedocs.io/en/latest/example/example.html).
57 | 
58 | ```python
59 | from streamad.util import StreamGenerator, UnivariateDS
60 | from streamad.model import SpotDetector
61 | 
62 | ds = UnivariateDS()
63 | stream = StreamGenerator(ds.data)
64 | model = SpotDetector()
65 | 
66 | for x in stream.iter_item():
67 |     score = model.fit_score(x)
68 | ```
69 | 
70 | ## Models
71 | 
72 | ### For univariate time series
73 | 
74 | If you want to detect multivarite time series with these models, you need to apply them on each feature separately.
75 | 
76 | | Model Example                                                                                                     | API Usage                                                                                                         | Paper                                                                                                                                                                                                         |
77 | | ----------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
78 | | [KNNCAD](https://streamad.readthedocs.io/en/latest/example/univariate.html#knncad-detector)                       | [streamad.model.KNNDetector](https://streamad.readthedocs.io/en/latest/api/streamad.model.html#knndetector)       | [Conformalized density- and distance-based anomaly detection in time-series data](https://arxiv.org/abs/1608.04585)                                                                                           |
79 | | [SPOT](https://streamad.readthedocs.io/en/latest/example/univariate.html#spot-detector)                           | [streamad.model.SpotDetector](https://streamad.readthedocs.io/en/latest/api/streamad.model.html#spotdetector)     | [Anomaly detection in streams with extreme value theory](https://dl.acm.org/doi/10.1145/3097983.3098144)                                                                                                      |
80 | | [Spectral Residual](https://streamad.readthedocs.io/en/latest/example/univariate.html#spectral-residual-detector) | [streamad.model.SRDetector](https://streamad.readthedocs.io/en/latest/api/streamad.model.html#srdetector)         | [Time-series anomaly detection service at microsoft](https://arxiv.org/abs/1906.03821)                                                                                                                        |
81 | | [Z score](https://streamad.readthedocs.io/en/latest/example/univariate.html#z-score-detector)                     | [streamad.model.ZScoreDetector](https://streamad.readthedocs.io/en/latest/api/streamad.model.html#zscoredetector) | [Standard score](https://en.wikipedia.org/wiki/Standard_score)                                                                                                                                                |
82 | | [One-class SVM](https://streamad.readthedocs.io/en/latest/example/univariate.html#one-class-svm-detector)         | [streamad.model.OCSVMDetector](https://streamad.readthedocs.io/en/latest/api/streamad.model.html#ocsvmdetector)   | [One-class SVM](https://en.wikipedia.org/w/index.php?title=One-class_classification&oldid=1098733917)                                                                                                         |
83 | | [MAD](https://streamad.readthedocs.io/en/latest/example/univariate.html#median-absolute-deviation-detector)       | [streamad.model.MadDetector](https://streamad.readthedocs.io/en/latest/api/streamad.model.html#maddetector)       | [Median absolute deviation](https://www.influxdata.com/blog/anomaly-detection-with-median-absolute-deviation/#:~:text=How%20Median%20Absolute%20Deviation%20algorithm,time%20series%20at%20that%20timestamp/) |
84 | | [SARIMAX](https://streamad.readthedocs.io/en/latest/example/univariate.html#seasonal-arima-detector)              | [streamad.model.SArimaDetector](https://streamad.readthedocs.io/en/latest/api/streamad.model.html#sarimadetector) | [Seasonal Arima Detector](https://www.statsmodels.org/dev/generated/statsmodels.tsa.statespace.sarimax.SARIMAX.html?highlight=sarimax#statsmodels.tsa.statespace.sarimax.SARIMAX)                             |
85 | 
86 | ### For multivariate time series
87 | 
88 | These models are compatible with univariate time series.
89 | 
90 | 
91 | 
92 | | Models Example                                                                                         | API Usage                                                                                                          | Paper                                                                                                                                                                     |
93 | | ------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
94 | | [xStream](https://streamad.readthedocs.io/en/latest/example/multivariate.html#xstream-detector)        | [streamad.model.xStramDetector](https://streamad.readthedocs.io/en/latest/api/streamad.model.html#xstreamdetector) | [Xstream: outlier detection in feature-evolving data streams](http://www.kdd.org/kdd2018/accepted-papers/view/xstream-outlier-detection-in-feature-evolving-data-streams) |
95 | | [RShash](https://streamad.readthedocs.io/en/latest/example/multivariate.html#rshash-detector)          | [streamad.model.RShashDetector](https://streamad.readthedocs.io/en/latest/api/streamad.model.html#rshashdetector)  | [Subspace Outlier Detection in Linear Time with Randomized Hashing](https://ieeexplore.ieee.org/document/7837870)                                                         |
96 | | [HSTree](https://streamad.readthedocs.io/en/latest/example/multivariate.html#half-space-tree-detector) | [streamad.model.HSTreeDetector](https://streamad.readthedocs.io/en/latest/api/streamad.model.html#hstreedetector)  | [Fast Anomaly Detection for Streaming Data](https://www.ijcai.org/Proceedings/11/Papers/254.pdf)                                                                          |
97 | | [LODA](https://streamad.readthedocs.io/en/latest/example/multivariate.html#loda-detector)              | [streamad.model.LodaDetector](https://streamad.readthedocs.io/en/latest/api/streamad.model.html#lodadetector)      | [Lightweight on-line detector of anomalies](https://link.springer.com/article/10.1007/s10994-015-5521-0)                                                                  |
98 | | [RRCF](https://streamad.readthedocs.io/en/latest/example/univariate.html#rrcf-detector)                | [streamad.model.RrcfDetector](https://streamad.readthedocs.io/en/latest/api/streamad.model.html#rrcfdetector)      | [Robust random cut forest based anomaly detection on streams](http://proceedings.mlr.press/v48/guha16.pdf)                                                                |
99 | 


--------------------------------------------------------------------------------
/benchmark/README.md:
--------------------------------------------------------------------------------
1 | 1. [GAIA Dataset](https://github.com/CloudWise-OpenSource/GAIA-DataSet)
2 | 2. []


--------------------------------------------------------------------------------
/benchmark/benchamark_results.csv:
--------------------------------------------------------------------------------
 1 | Detector,Dataset,Key,Size(#),Time(s),Point_Precision,Point_Recall,Point_Fbeta,Series_Precision,Series_Recall,Series_Fbeta,Numenta_Precision,Numenta_Recall,Numenta_Fbeta
 2 | SpotDetector,GAIA,linear_data_4_from2018-12-19to2019-01-31_8313,12672,0.45219812600000253,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
 3 | SpotDetector,GAIA,linear_data_42_from2018-12-19to2019-01-31_8153,12672,0.4196338829999995,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
 4 | SpotDetector,GAIA,linear_data_7_from2018-12-19to2019-01-31_8300,12672,0.4480964440000008,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
 5 | SpotDetector,GAIA,linear_data_11_from2018-12-19to2019-01-31_8164,12672,0.44065757699999963,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
 6 | SpotDetector,GAIA,linear_data_32_from2018-12-19to2019-01-31_8151,12672,0.501862311,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
 7 | SpotDetector,GAIA,linear_data_25_from2018-12-19to2019-01-31_8412,12672,0.4510404809999997,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
 8 | SpotDetector,GAIA,linear_data_4_from2018-12-19to2019-01-31_8313,12672,0.694301566,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
 9 | SpotDetector,GAIA,linear_data_42_from2018-12-19to2019-01-31_8153,12672,0.9583882379999977,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
10 | SpotDetector,GAIA,linear_data_7_from2018-12-19to2019-01-31_8300,12672,0.7049085780000013,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
11 | SpotDetector,GAIA,linear_data_11_from2018-12-19to2019-01-31_8164,12672,0.49868877999999484,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
12 | SpotDetector,GAIA,linear_data_32_from2018-12-19to2019-01-31_8151,12672,0.4977096179999947,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
13 | SpotDetector,GAIA,linear_data_25_from2018-12-19to2019-01-31_8412,12672,0.4146735479999961,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
14 | SpotDetector,GAIA,linear_data_8_from2018-12-19to2019-01-31_8329,12672,0.9985857739999986,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
15 | SpotDetector,GAIA,linear_data_57_from2018-12-19to2019-01-31_8268,12672,0.3831847010000047,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
16 | SpotDetector,GAIA,linear_data_41_from2018-12-19to2019-01-31_8386,12672,0.37832419400000106,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
17 | SpotDetector,GAIA,linear_data_29_from2018-12-19to2019-01-31_8410,12672,0.4857962309999948,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
18 | SpotDetector,GAIA,linear_data_54_from2018-12-19to2019-01-31_8319,12672,0.38469699799999546,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
19 | SpotDetector,GAIA,linear_data_12_from2018-12-19to2019-01-31_8372,12672,0.4077938490000008,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
20 | SpotDetector,GAIA,linear_data_26_from2018-12-19to2019-01-31_8348,12672,0.44336228300001324,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
21 | SpotDetector,GAIA,linear_data_31_from2018-12-19to2019-01-31_8192,12672,0.6270404440000021,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
22 | SpotDetector,GAIA,linear_data_58_from2018-12-19to2019-01-31_8334,12672,0.4341263150000003,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
23 | SpotDetector,GAIA,linear_data_30_from2018-12-19to2019-01-31_8376,12672,0.45635435400001256,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
24 | SpotDetector,GAIA,linear_data_20_from2018-12-19to2019-01-31_8210,12672,0.4395088150000106,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
25 | SpotDetector,GAIA,linear_data_43_from2019-11-16to2019-12-16_2584,8640,0.4916386119999885,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
26 | SpotDetector,GAIA,linear_data_48_from2018-12-19to2019-01-31_8425,12672,0.449180405000007,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
27 | SpotDetector,GAIA,linear_data_51_from2018-12-19to2019-01-31_8286,12672,0.5169517649999875,0.6666666666666666,0.017699115044247787,0.034482758620689655,0.75,0.007352941176470588,0.014563106796116504,0.6666666666666666,0.014705882352941176,0.028776978417266192
28 | SpotDetector,GAIA,linear_data_2_from2018-12-19to2019-01-31_8304,12672,0.40478352799999584,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
29 | SpotDetector,GAIA,linear_data_17_from2018-12-19to2019-01-31_8159,12672,0.3556049849999994,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
30 | SpotDetector,GAIA,linear_data_13_from2019-11-16to2019-12-16_1646,8640,0.3012597190000008,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
31 | SpotDetector,GAIA,linear_data_18_from2018-12-19to2019-01-31_8167,12672,0.373629550000004,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
32 | SpotDetector,GAIA,linear_data_34_from2018-12-19to2019-01-31_8385,12672,0.37975166599999,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
33 | SpotDetector,GAIA,linear_data_47_from2019-01-01to2019-01-31_7946,8806,2.7575944400000054,0.2,0.009345794392523364,0.017857142857142856,0.2,0.018518518518518517,0.03389830508474576,0.2,0.018518518518518517,0.03389830508474576
34 | SpotDetector,GAIA,linear_data_37_from2018-12-19to2019-01-31_8378,12672,1.0630981340000005,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
35 | SpotDetector,GAIA,linear_data_1_from2019-01-01to2019-01-31_7861,8806,1.670446443000003,0.5,0.017857142857142856,0.03448275862068965,0.5,0.027777777777777776,0.05263157894736842,0.5,0.05555555555555555,0.09999999999999999
36 | SpotDetector,GAIA,linear_data_23_from2018-12-19to2019-01-31_8199,12672,0.5604692799999924,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
37 | SpotDetector,GAIA,linear_data_52_from2019-01-01to2019-01-31_8145,8806,0.34901906199999644,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
38 | SpotDetector,GAIA,linear_data_44_from2018-12-19to2019-01-31_8396,12672,0.5879434660000129,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
39 | SpotDetector,GAIA,linear_data_38_from2018-12-19to2019-01-31_8224,12672,0.43967261599999574,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
40 | SpotDetector,GAIA,linear_data_14_from2018-12-19to2019-01-31_8213,12672,0.4007684850000004,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
41 | SpotDetector,GAIA,linear_data_33_from2018-12-19to2019-01-31_8232,12672,0.4994144230000046,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
42 | SpotDetector,GAIA,linear_data_9_from2018-12-19to2019-01-31_8315,12672,0.4864235219999955,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
43 | SpotDetector,GAIA,linear_data_56_from2019-01-01to2019-01-31_8146,8807,0.24272840499997983,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
44 | SpotDetector,GAIA,linear_data_6_from2018-12-19to2019-01-31_8263,12672,0.49525077100000203,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
45 | SpotDetector,GAIA,linear_data_24_from2018-12-19to2019-01-31_8360,12672,0.3405894979999857,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
46 | SpotDetector,GAIA,linear_data_10_from2018-12-19to2019-01-31_8158,12672,0.33526661499999477,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
47 | SpotDetector,GAIA,linear_data_40_from2018-12-19to2019-01-31_8219,12672,0.3877679789999888,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
48 | SpotDetector,GAIA,linear_data_28_from2018-12-19to2019-01-31_8163,12672,0.4542016530000126,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
49 | SpotDetector,GAIA,linear_data_55_from2018-12-19to2019-01-31_8332,12672,0.36237533799999255,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
50 | SpotDetector,GAIA,linear_data_5_from2018-12-19to2019-01-31_8277,12672,0.4070936169999868,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
51 | SpotDetector,GAIA,linear_data_27_from2018-12-19to2019-01-31_8177,12672,0.38263655999998036,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
52 | SpotDetector,GAIA,linear_data_21_from2018-12-19to2019-01-31_8172,12672,0.3919302930000015,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
53 | SpotDetector,GAIA,linear_data_3_from2018-12-19to2019-01-31_8273,12672,0.4034993919999863,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
54 | SpotDetector,GAIA,linear_data_35_from2018-12-19to2019-01-31_8185,12672,0.36679661100001226,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
55 | SpotDetector,GAIA,linear_data_19_from2018-12-19to2019-01-31_8417,12672,0.5563156700000036,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
56 | SpotDetector,GAIA,linear_data_16_from2018-12-19to2019-01-31_8373,12672,0.35345210799999904,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
57 | SpotDetector,GAIA,linear_data_46_from2018-12-19to2019-01-31_8354,12672,0.34292623700000036,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
58 | SpotDetector,GAIA,linear_data_50_from2018-12-19to2019-01-31_8318,12672,0.422890265999996,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
59 | SpotDetector,GAIA,linear_data_49_from2018-12-19to2019-01-31_8380,12672,0.9232572329999869,0.6666666666666666,0.015873015873015872,0.031007751937984492,0.75,0.007352941176470588,0.014563106796116504,0.6666666666666666,0.014705882352941176,0.028776978417266192
60 | SpotDetector,GAIA,linear_data_22_from2018-12-19to2019-01-31_8166,12672,0.3804424410000138,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
61 | SpotDetector,GAIA,linear_data_36_from2018-12-19to2019-01-31_8387,12672,0.3825128079999729,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
62 | SpotDetector,GAIA,linear_data_0_from2018-12-19to2019-01-31_8339,12672,0.3520609029999946,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
63 | SpotDetector,GAIA,linear_data_15_from2018-12-19to2019-01-31_8367,12672,0.6504752200000041,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
64 | SpotDetector,GAIA,linear_data_53_from2018-12-19to2019-01-31_8331,12672,0.3944678900000156,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
65 | SpotDetector,GAIA,linear_data_39_from2018-12-19to2019-01-31_8225,12672,0.5024501979999911,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
66 | SpotDetector,GAIA,linear_data_45_from2018-12-19to2019-01-31_8221,12672,0.35027252800000497,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
67 | 


--------------------------------------------------------------------------------
/benchmark/dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import subprocess
  3 | import pandas as pd
  4 | import numpy as np
  5 | from typing import Literal
  6 | import json
  7 | import ast
  8 | import gdown
  9 | import zipfile
 10 | 
 11 | DS = {
 12 |     "AIOPS_KPI": ["preliminary_train", "finals_train", "finals_ground_truth"],
 13 |     "MICRO": [],
 14 |     "AWSCloudwatch": [],
 15 |     "GAIA": [
 16 |         "changepoint_data",
 17 |         "concept_drift_data",
 18 |         "linear_data",
 19 |         "low_signal-to-noise_ratio_data",
 20 |         "partially_stationary_data",
 21 |         "periodic_data",
 22 |         "staircase_data",
 23 |     ],
 24 |     "MSL": ["test"],
 25 |     "SMD": [],
 26 |     "CHM": [],
 27 | }
 28 | 
 29 | 
 30 | def check(ds_name, path="./streamad-benchmark-dataset"):
 31 |     assert ds_name in DS, f"Unavailable dataset {ds_name}, only support {list(DS.keys())}"
 32 | 
 33 |     if not os.path.exists(path):
 34 |         os.makedirs(path)
 35 | 
 36 | 
 37 | def download_ds(ds_name, path="./streamad-benchmark-dataset"):
 38 |     check(ds_name, path)
 39 | 
 40 |     if os.path.exists(path + "/" + ds_name):
 41 |         print("Dataset {} already exists".format(ds_name))
 42 |         return
 43 | 
 44 |     if str.lower(ds_name) == "aiops_kpi":
 45 |         subprocess.check_call(
 46 |             [
 47 |                 "git",
 48 |                 "clone",
 49 |                 "--depth=1",
 50 |                 "https://github.com/NetManAIOps/KPI-Anomaly-Detection.git",
 51 |                 path + "/AIOPS_KPI",
 52 |             ]
 53 |         )
 54 |         subprocess.check_call(
 55 |             [
 56 |                 "unzip",
 57 |                 path + "/AIOPS_KPI/Finals_dataset/phase2_ground_truth.hdf.zip",
 58 |                 "-d",
 59 |                 path + "/AIOPS_KPI/Finals_dataset/",
 60 |             ]
 61 |         )
 62 |         subprocess.check_call(
 63 |             [
 64 |                 "unzip",
 65 |                 path + "/AIOPS_KPI/Finals_dataset/phase2_train.csv.zip",
 66 |                 "-d",
 67 |                 path + "/AIOPS_KPI/Finals_dataset/",
 68 |             ]
 69 |         )
 70 |     elif str.lower(ds_name) == "micro":
 71 |         os.makedirs(path + "/MICRO/", exist_ok=True)
 72 |         gdown.download(
 73 |             id="1nkEsD1g7THm_T58KwUQZ7o-b174fdx-n",
 74 |             output=path + "/MICRO/data.zip",
 75 |         )
 76 | 
 77 |         with zipfile.ZipFile(path + "/MICRO/data.zip") as zip_ref:
 78 |             zip_ref.extractall(path + "/MICRO/")
 79 | 
 80 |         for root, dirs, files in os.walk(path + "/MICRO/AIOps挑战赛数据"):
 81 |             for filename in files:
 82 |                 if filename.endswith(".zip"):
 83 |                     fileSpec = path + "/MICRO/AIOps挑战赛数据/" + filename
 84 |                     with zipfile.ZipFile(fileSpec) as zip_ref:
 85 |                         zip_ref.extractall(path + "/MICRO/")
 86 | 
 87 |     elif str.lower(ds_name) == "awscloudwatch":
 88 |         subprocess.check_call(
 89 |             [
 90 |                 "git",
 91 |                 "clone",
 92 |                 "--depth=1",
 93 |                 "--filter=tree:0",
 94 |                 "--sparse",
 95 |                 "https://github.com/numenta/NAB.git",
 96 |                 path + "/AWSCloudwatch",
 97 |             ]
 98 |         )
 99 |         subprocess.check_call(
100 |             [
101 |                 "cd "
102 |                 + path
103 |                 + "/AWSCloudwatch/ && git sparse-checkout set data/realAWSCloudwatch && wget https://raw.githubusercontent.com/numenta/NAB/master/labels/combined_labels.json"
104 |             ],
105 |             shell=True,
106 |         )
107 |     elif str.lower(ds_name) == "gaia":
108 |         subprocess.check_call(
109 |             [
110 |                 "wget",
111 |                 "https://raw.githubusercontent.com/CloudWise-OpenSource/GAIA-DataSet/main/Companion_Data/metric_detection.zip",
112 |                 "-P",
113 |                 path + "/GAIA",
114 |             ]
115 |         )
116 |         subprocess.check_call(
117 |             [
118 |                 "unzip",
119 |                 path + "/GAIA/metric_detection.zip",
120 |                 "-d",
121 |                 path + "/GAIA/",
122 |             ]
123 |         )
124 | 
125 |     elif str.lower(ds_name) == "msl":
126 |         subprocess.check_call(
127 |             [
128 |                 "wget",
129 |                 "https://s3-us-west-2.amazonaws.com/telemanom/data.zip",
130 |                 "-P",
131 |                 path + "/MSL",
132 |             ]
133 |         )
134 | 
135 |         subprocess.check_call(
136 |             [
137 |                 "unzip",
138 |                 path + "/MSL/data.zip",
139 |                 "-d",
140 |                 path + "/MSL/",
141 |             ]
142 |         )
143 |         subprocess.check_call(
144 |             [
145 |                 "rm",
146 |                 path + "/MSL/data.zip",
147 |             ]
148 |         )
149 | 
150 |         subprocess.check_call(
151 |             [
152 |                 "wget",
153 |                 "https://raw.githubusercontent.com/khundman/telemanom/master/labeled_anomalies.csv",
154 |                 "-P",
155 |                 path + "/MSL",
156 |             ]
157 |         )
158 |     elif str.lower(ds_name) == "chm":
159 |         subprocess.check_call(
160 |             [
161 |                 "git",
162 |                 "clone",
163 |                 "--depth=1",
164 |                 "https://github.com/Fengrui-Liu/Cloud-host-metrics-dataset",
165 |                 path + "/CHM",
166 |             ]
167 |         )
168 |         subprocess.check_call(
169 |             ["unzip", path + "/CHM/data.zip", "-d", path + "/CHM/"]
170 |         )
171 |         subprocess.check_call(["rm", "-rf", path + "/CHM/.git"])
172 |     elif str.lower(ds_name) == "smd":
173 |         subprocess.check_call(
174 |             [
175 |                 "git",
176 |                 "clone",
177 |                 "--depth=1",
178 |                 "https://github.com/NetManAIOps/OmniAnomaly",
179 |                 path + "/SMD",
180 |             ]
181 |         )
182 | 
183 | 
184 | def prepare_ds(
185 |     ds_name: Literal["AIOPS_KPI"], path="./streamad-benchmark-dataset"
186 | ):
187 |     # check(ds_name, path)
188 | 
189 |     download_ds(ds_name, path)
190 | 
191 | 
192 | def read_ds(ds_name, ds_file, path="./streamad-benchmark-dataset"):
193 |     check(ds_name, path)
194 | 
195 |     if str.lower(ds_name) == "aiops_kpi":
196 |         if ds_file == "preliminary_train":
197 |             df = pd.read_csv(
198 |                 path + "/" + ds_name + "/Preliminary_dataset/train.csv"
199 |             )
200 | 
201 |         elif ds_file == "finals_train":
202 |             df = pd.read_csv(
203 |                 path + "/" + ds_name + "/Finals_dataset/phase2_train.csv"
204 |             )
205 | 
206 |         elif ds_file == "finals_ground_truth":
207 |             df = pd.read_hdf(
208 |                 path + "/" + ds_name + "/Finals_dataset/phase2_ground_truth.hdf"
209 |             )
210 |         else:
211 |             raise FileNotFoundError(
212 |                 "Unavailable dataset file, only support {}".format(DS[ds_name])
213 |             )
214 | 
215 |         df_groups = df.groupby("KPI ID")
216 |         keys = df_groups.groups.keys()
217 |         dfs = {}
218 |         for key in keys:
219 |             df_key = df_groups.get_group(key)
220 |             df_key = df_key[["timestamp", "value", "label"]]
221 |             df_label = df_key["label"]
222 |             dfs[key] = (df_key, df_label)
223 | 
224 |         return dfs
225 | 
226 |     elif str.lower(ds_name) == "micro":
227 |         labels = pd.read_csv(path + "/MICRO/故障整理（预赛）.csv", index_col=["index"])
228 |         labels = labels.dropna(subset=["kpi", "start_time"])
229 |         dfs = {}
230 |         for idx, fault in labels.iterrows():
231 |             start_time = fault["start_time"]
232 |             duration = fault["duration"]
233 |             folder = pd.to_datetime(start_time).strftime("%Y_%m_%d")
234 |             start_time = pd.to_datetime(start_time + "+0800", utc=True)
235 |             end_time = start_time + pd.Timedelta(duration)
236 | 
237 |             df_lst = []
238 |             for root, dirs, files in os.walk(
239 |                 path + "/MICRO/" + folder + "/平台指标/"
240 |             ):
241 |                 for filename in files:
242 |                     if filename.endswith(".csv"):
243 |                         df = pd.read_csv(
244 |                             path + "/MICRO/" + folder + "/平台指标/" + filename
245 |                         )
246 |                         df_lst.append(df)
247 | 
248 |             df = pd.concat(df_lst, axis=0)
249 | 
250 |             for kpi in fault["kpi"].split(";"):
251 |                 df_kpi = df[
252 |                     (df["name"] == kpi) & (df["cmdb_id"] == fault["name"])
253 |                 ][["timestamp", "value"]]
254 |                 df_kpi["label"] = 0
255 |                 df_kpi.loc[
256 |                     (df_kpi["timestamp"] > start_time.timestamp() * 1000)
257 |                     & (df_kpi["timestamp"] < end_time.timestamp() * 1000),
258 |                     "label",
259 |                 ] = 1
260 |                 dfs[kpi + "_" + fault["name"]] = (df_kpi, df_kpi["label"])
261 | 
262 |         return dfs
263 | 
264 |     elif str.lower(ds_name) == "awscloudwatch":
265 |         labels = json.load(open(path + "/AWSCloudwatch/combined_labels.json"))
266 |         dfs = {}
267 |         for f in os.listdir(path + "/AWSCloudwatch/data/realAWSCloudwatch"):
268 |             if f.endswith(".csv"):
269 |                 df = pd.read_csv(
270 |                     path + "/AWSCloudwatch/data/realAWSCloudwatch/" + f
271 |                 )
272 |                 df = df[["timestamp", "value"]]
273 |                 key = "realAWSCloudwatch/" + f
274 |                 label = labels[key]
275 |                 df["label"] = 0
276 |                 df.loc[df["timestamp"].isin(label), "label"] = 1
277 |                 df_label = df["label"]
278 | 
279 |                 dfs[f.split(".")[0]] = (df, df_label)
280 |         return dfs
281 | 
282 |     elif str.lower(ds_name) == "gaia":
283 |         if ds_file in DS[ds_name]:
284 |             dfs = {}
285 |             folder = path + "/GAIA/metric_detection/" + ds_file
286 |             for root, dirs, files in os.walk(folder):
287 |                 for item in files:
288 |                     df = pd.read_csv(root + "/" + item)
289 |                     df_label = df["label"]
290 |                     dfs[item.split(".csv")[0]] = (df, df_label)
291 |             return dfs
292 |         elif ds_file == 'all':
293 |             dfs = {}
294 |             for ds_file in DS[ds_name]:
295 |                 folder = path + "/GAIA/metric_detection/" + ds_file
296 |                 for root, dirs, files in os.walk(folder):
297 |                     for item in files:
298 |                         df = pd.read_csv(root + "/" + item)
299 |                         df_label = df["label"]
300 |                         dfs[item.split(".csv")[0]] = (df, df_label)
301 |             return dfs
302 |         else:
303 |             raise FileNotFoundError
304 | 
305 |     elif str.lower(ds_name) == "msl":
306 |         labels = pd.read_csv(path + "/MSL/labeled_anomalies.csv")
307 |         if ds_file in DS[ds_name]:
308 |             dfs = {}
309 |             folder = path + "/MSL/data/" + ds_file
310 |             for root, dirs, files in os.walk(folder):
311 |                 for item in files:
312 |                     name = item.replace(".npy", "")
313 |                     df = pd.DataFrame(np.load(root + "/" + item))
314 |                     df.columns = df.columns.astype(str)
315 |                     anomalies = labels[labels["chan_id"] == name][
316 |                         "anomaly_sequences"
317 |                     ]
318 |                     df["label"] = 0
319 |                     if len(anomalies) > 0:
320 |                         anomalies = ast.literal_eval(anomalies.values[0])
321 |                         for seg in anomalies:
322 |                             seg_begin = seg[0]
323 |                             seg_end = seg[1]
324 |                             df.iloc[seg_begin:seg_end] = 1
325 | 
326 |                     dfs[name] = (df, df["label"])
327 | 
328 |             return dfs
329 | 
330 |         else:
331 |             raise FileNotFoundError
332 | 
333 |     elif str.lower(ds_name) == "chm":
334 |         dfs = {}
335 |         for root, dirs, files in os.walk(path + "/CHM/data"):
336 |             for item in files:
337 |                 df = pd.read_csv(root + "/" + item, index_col=["timestamp"])
338 |                 df = df.sort_index()
339 |                 dfs[item.split(".csv")[0]] = (df, df["label"])
340 | 
341 |         return dfs
342 |     elif str.lower(ds_name) == "smd":
343 |         dfs = {}
344 |         for root, dirs, files in os.walk(
345 |             path + "/SMD/ServerMachineDataset/test"
346 |         ):
347 |             for item in files:
348 |                 df = pd.read_csv(root + "/" + item, header=None)
349 |                 label = pd.read_csv(
350 |                     path + "/SMD/ServerMachineDataset/test_label/" + item,
351 |                     header=None,
352 |                     names=["label"],
353 |                 )
354 |                 df.columns = df.columns.astype(str)
355 |                 df["label"] = label
356 |                 dfs[item.split(".txt")[0]] = (df, df["label"])
357 |         return dfs
358 | 
359 | 
360 | if __name__ == "__main__":
361 |     ds_name = "SMD"
362 |     df_file = ""
363 |     prepare_ds(
364 |         ds_name=ds_name,
365 |         path="./benchmark/streamad-benchmark-dataset",
366 |     )
367 |     dfs = read_ds(
368 |         ds_name=ds_name,
369 |         ds_file=df_file,
370 |         path="./benchmark/streamad-benchmark-dataset",
371 |     )
372 | 
373 |     dfs
374 | 


--------------------------------------------------------------------------------
/benchmark/requirements_benchmark.txt:
--------------------------------------------------------------------------------
1 | kaleido
2 | tabulate


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 
22 | api:
23 | 	sphinx-apidoc -f -E -M -o ./source ../streamad
24 | 
25 | doc:
26 | 	make clean
27 | 	sphinx-autogen -o generated -t source/_templates/class.rst source/index.md
28 | 	make html
29 | 
30 | 
31 | gh-pages:
32 | 	rm -rf /tmp/gh-pages
33 | 	cp -r $(BUILDDIR)/html/ /tmp/gh-pages
34 | 	git checkout gh-pages
35 | 	cd .. && rm -rf * && cp -r /tmp/gh-pages/* ./ && rm -rf /tmp/gh-pages && git add . && git commit -m "Updated gh-pages" && git push && git checkout main


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/requirements-docs.txt:
--------------------------------------------------------------------------------
 1 | sphinx_copybutton
 2 | jupytext
 3 | sphinxcontrib.bibtex
 4 | sphinx_autodoc_typehints
 5 | sphinxcontrib.apidoc
 6 | sphinx-book-theme
 7 | sphinx_design
 8 | sphinx-togglebutton
 9 | sphinx == 4.2
10 | recommonmark
11 | myst_nb
12 | plotly
13 | rrcf
14 | tdigest
15 | setuptools==58.2.0


--------------------------------------------------------------------------------
/docs/source/_templates/class.rst:
--------------------------------------------------------------------------------
 1 | {{ fullname }}
 2 | {{ underline }}
 3 | .. currentmodule:: {{ module }}
 4 | .. autoclass:: {{ objname }}
 5 |    {% block methods %}
 6 |    {% if methods %}
 7 |    .. rubric:: Methods
 8 |    .. autosummary::
 9 |    {% for item in methods %}
10 |       ~{{ name }}.{{ item }}
11 |    {%- endfor %}
12 |    {% endif %}
13 |    {% endblock %}
14 |    {% block attributes %}
15 |    {% if attributes %}
16 |    .. rubric:: Attributes
17 |    .. autosummary::
18 |    {% for item in attributes %}
19 |       ~{{ name }}.{{ item }}
20 |    {%- endfor %}
21 |    {% endif %}
22 |    {% endblock %}


--------------------------------------------------------------------------------
/docs/source/api/streamad.evaluate.md:
--------------------------------------------------------------------------------
 1 | # StreamAD Evaluation
 2 | 
 3 | 
 4 | 
 5 | ## Point aware metrics
 6 | ```{eval-rst}
 7 | .. autoclass:: streamad.evaluate.PointAwareMetircs
 8 |     :show-inheritance:
 9 |     :members: parse
10 | ```
11 | 
12 | ## Time-series aware metrics
13 | ```{eval-rst}
14 | .. autoclass:: streamad.evaluate.SeriesAwareMetircs
15 |     :show-inheritance:
16 |     :members: parse
17 | ```
18 | 
19 | ## Numenta aware metrics
20 | ```{eval-rst}
21 | .. autoclass:: streamad.evaluate.NumentaAwareMetircs
22 |     :show-inheritance:
23 |     :members: parse
24 | ```
25 | 


--------------------------------------------------------------------------------
/docs/source/api/streamad.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # API Reference
 3 | 
 4 | 
 5 | 
 6 | ```{toctree}
 7 | :maxdepth: 2
 8 | 
 9 | streamad.model
10 | streamad.util
11 | streamad.evaluate
12 | streamad.process
13 | ```
14 | 


--------------------------------------------------------------------------------
/docs/source/api/streamad.model.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # StreamAD Detector
  3 | 
  4 | 
  5 | ## Univariate Anomaly Detector
  6 | 
  7 | If you want to detect multivarite time series with these models, you need to apply them on each feature separately.
  8 | ### KNNDetector
  9 | 
 10 | ```{eval-rst}
 11 | .. autoclass:: streamad.model.KNNDetector
 12 |     :show-inheritance:
 13 |     :members: parse
 14 | ```
 15 | 
 16 | ----
 17 | 
 18 | ### SpotDetector
 19 | 
 20 | ```{eval-rst}
 21 | .. autoclass:: streamad.model.SpotDetector
 22 |     :show-inheritance:
 23 |     :members: parse
 24 | ```
 25 | 
 26 | 
 27 | ----
 28 | 
 29 | 
 30 | ### RrcfDetector
 31 | 
 32 | ```{eval-rst}
 33 | .. autoclass:: streamad.model.RrcfDetector
 34 |     :show-inheritance:
 35 |     :members: parse
 36 | ```
 37 | 
 38 | ----
 39 | 
 40 | 
 41 | ### SRDetector
 42 | 
 43 | ```{eval-rst}
 44 | .. autoclass:: streamad.model.SRDetector
 45 |     :show-inheritance:
 46 |     :members: parse
 47 | ```
 48 | 
 49 | ----
 50 | 
 51 | 
 52 | ### ZScoreDetector
 53 | 
 54 | ```{eval-rst}
 55 | .. autoclass:: streamad.model.ZScoreDetector
 56 |     :show-inheritance:
 57 |     :members: parse
 58 | ```
 59 | 
 60 | ----
 61 | 
 62 | 
 63 | ## Multivariate Anomaly Detector
 64 | 
 65 | These models are compatible with univariate time series.
 66 | 
 67 | ### xStreamDetector
 68 | 
 69 | ```{eval-rst}
 70 | .. autoclass:: streamad.model.xStreamDetector
 71 |     :show-inheritance:
 72 |     :members: parse
 73 | ```
 74 | 
 75 | ----
 76 | 
 77 | ### RShashDetector
 78 | 
 79 | ```{eval-rst}
 80 | .. autoclass:: streamad.model.RShashDetector
 81 |     :show-inheritance:
 82 |     :members: parse
 83 | ```
 84 | 
 85 | ----
 86 | 
 87 | ### HSTreeDetector
 88 | 
 89 | ```{eval-rst}
 90 | .. autoclass:: streamad.model.HSTreeDetector
 91 |     :show-inheritance:
 92 |     :members: parse
 93 | ```
 94 | 
 95 | ----
 96 | 
 97 | ### LodaDetector
 98 | 
 99 | ```{eval-rst}
100 | .. autoclass:: streamad.model.LodaDetector
101 |     :show-inheritance:
102 |     :members: parse
103 | ```
104 | 
105 | ----
106 | 
107 | ### RandomDetector
108 | 
109 | ```{eval-rst}
110 | .. autoclass:: streamad.model.RandomDetector
111 |     :show-inheritance:
112 |     :members: parse
113 | ```
114 | 


--------------------------------------------------------------------------------
/docs/source/api/streamad.process.md:
--------------------------------------------------------------------------------
 1 | # StreamAD Process
 2 | 
 3 | 
 4 | ## Post process
 5 | 
 6 | 
 7 | 
 8 | ### ZScoreCalibrator
 9 | ```{eval-rst}
10 | .. autoclass:: streamad.process.ZScoreCalibrator
11 |     :show-inheritance:
12 |     :members: parse
13 | ```
14 | 
15 | ---
16 | ### TDigestCalibrator
17 | ```{eval-rst}
18 | .. autoclass:: streamad.process.TDigestCalibrator
19 |     :show-inheritance:
20 |     :members: parse
21 | ```


--------------------------------------------------------------------------------
/docs/source/api/streamad.util.md:
--------------------------------------------------------------------------------
 1 | # StreamAD Utilities
 2 | 
 3 | 
 4 | ## Dataset
 5 | 
 6 | 
 7 | 
 8 | ### UnivariateDS
 9 | ```{eval-rst}
10 | .. autoclass:: streamad.util.UnivariateDS
11 |     :show-inheritance:
12 |     :members: parse
13 | ```
14 | 
15 | ----
16 | 
17 | ### MultivariateDS
18 | ```{eval-rst}
19 | .. autoclass:: streamad.util.MultivariateDS
20 |     :show-inheritance:
21 |     :members: parse
22 | ```
23 | 
24 | ----
25 | 
26 | ### CustomDS
27 | ```{eval-rst}
28 | .. autoclass:: streamad.util.CustomDS
29 |     :show-inheritance:
30 |     :members: parse
31 | ```
32 | 
33 | ----
34 | 
35 | ## Generator
36 | 
37 | ### StreamGenerator
38 | ```{eval-rst}
39 | .. autoclass:: streamad.util.StreamGenerator
40 |     :show-inheritance:
41 |     :members: parse
42 | ```
43 | 
44 | ----
45 | 
46 | ## Math toolkit
47 | 
48 | ### Statistic
49 | ```{eval-rst}
50 | .. autoclass:: streamad.util.StreamStatistic
51 |     :show-inheritance:
52 |     :members: parse
53 | ```
54 | 
55 | 
56 | 
57 | ----
58 | 
59 | ## Visualization
60 | 
61 | ### Plot
62 | ```{eval-rst}
63 | .. autoclass:: streamad.util.plot
64 |     :show-inheritance:
65 |     :members: parse
66 | ```
67 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | # Configuration file for the Sphinx documentation builder.
  2 | #
  3 | # This file only contains a selection of the most common options. For a full
  4 | # list see the documentation:
  5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
  6 | 
  7 | # -- Path setup --------------------------------------------------------------
  8 | 
  9 | # If extensions (or modules to document with autodoc) are in another directory,
 10 | # add these directories to sys.path here. If the directory is relative to the
 11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 12 | #
 13 | import os
 14 | import sys
 15 | from os.path import dirname, abspath
 16 | 
 17 | 
 18 | sys.path.insert(0, os.path.abspath("../.."))
 19 | StreamAD_dir = dirname(dirname(dirname(abspath(__file__))))
 20 | version_path = os.path.join(StreamAD_dir, "streamad", "version.py")
 21 | exec(open(version_path).read())
 22 | 
 23 | # -- Project information -----------------------------------------------------
 24 | 
 25 | project = "StreamAD"
 26 | copyright = "2023, Fengrui-Liu"
 27 | author = "Fengrui-Liu"
 28 | 
 29 | # The full version, including alpha/beta/rc tags
 30 | version = __version__
 31 | release = __version__
 32 | 
 33 | 
 34 | # -- General configuration ---------------------------------------------------
 35 | 
 36 | # Add any Sphinx extension module names here, as strings. They can be
 37 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 38 | # ones.
 39 | extensions = [
 40 |     "sphinx.ext.mathjax",
 41 |     "sphinx_copybutton",
 42 |     "sphinx.ext.autosummary",
 43 |     "sphinx.ext.viewcode",
 44 |     "sphinx.ext.napoleon",
 45 |     "sphinx.ext.todo",
 46 |     "sphinx.ext.coverage",
 47 |     "sphinx.ext.doctest",
 48 |     "sphinx_autodoc_typehints",
 49 |     "sphinxcontrib.bibtex",
 50 |     "sphinx.ext.autodoc",
 51 |     "sphinx.ext.autosectionlabel",
 52 |     "sphinx.ext.githubpages",
 53 |     "sphinx.ext.intersphinx",
 54 |     "sphinx.ext.ifconfig",
 55 |     # "sphinxcontrib.apidoc",
 56 |     # "myst_parser",
 57 |     "myst_nb",
 58 |     "sphinx_design",
 59 |     "sphinx.ext.autosectionlabel",
 60 | ]
 61 | 
 62 | 
 63 | source_suffix = [".rst", ".md", ".ipynb"]
 64 | 
 65 | myst_enable_extensions = [
 66 |     "amsmath",
 67 |     "colon_fence",
 68 |     "deflist",
 69 |     "dollarmath",
 70 |     "html_image",
 71 | ]
 72 | myst_url_schemes = ("http", "https", "mailto")
 73 | myst_footnote_transition = False
 74 | autosectionlabel_prefix_document = True
 75 | nb_execution_mode = "off"
 76 | suppress_warnings = ["mystnb.unknown_mime_type"]
 77 | nb_execution_show_tb = "READTHEDOCS" in os.environ
 78 | html_js_files = [
 79 |     "https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"
 80 | ]
 81 | # -- nbsphinx settings -------------------------------------------------------
 82 | # nbsphinx_execute = "auto"
 83 | 
 84 | # Create symlinks for example notebooks
 85 | import glob
 86 | 
 87 | nb_files = [
 88 |     os.path.basename(f)
 89 |     for f in glob.glob(os.path.join("example", "*.ipynb"))
 90 |     if not os.path.basename(f).startswith("temp_")
 91 | ]
 92 | for nb_file in nb_files:
 93 |     target = os.path.join("../../example", nb_file)
 94 |     if os.path.exists(target):
 95 |         os.remove(target)
 96 |     os.symlink(os.path.join("../docs/source/example", nb_file), target)
 97 | 
 98 | 
 99 | # -- Bibliography ------------------------------------------------------------
100 | bibtex_bibfiles = ["refs.bib"]
101 | bibtex_default_style = "unsrt"
102 | bibtex_reference_style = "author_year"
103 | 
104 | # apidoc settings
105 | apidoc_module_dir = "../../streamad"
106 | apidoc_output_dir = "api"
107 | apidoc_excluded_paths = ["**/*test*"]
108 | apidoc_module_first = True
109 | apidoc_separate_modules = True
110 | apidoc_extra_args = ["-d 6"]
111 | 
112 | # mock imports
113 | # autodoc_mock_imports = ["pandas", "numpy", "scipy"]
114 | 
115 | # Napoleon settings
116 | napoleon_google_docstring = True
117 | napoleon_numpy_docstring = False
118 | napoleon_include_init_with_doc = True
119 | napoleon_include_private_with_doc = False
120 | napoleon_include_special_with_doc = True
121 | napoleon_use_admonition_for_examples = False
122 | napoleon_use_admonition_for_notes = False
123 | napoleon_use_admonition_for_references = False
124 | napoleon_use_ivar = False
125 | napoleon_use_param = True
126 | napoleon_use_rtype = False
127 | 
128 | # nbsphinx_execute_arguments = [
129 | #     "--InlineBackend.figure_formats={'svg', 'pdf'}",
130 | #     "--InlineBackend.rc={'figure.dpi': 96}",
131 | # ]
132 | # nbsphinx_input_prompt = "In [%s]:"
133 | # nbsphinx_output_prompt = "Out[%s]:"
134 | master_doc = "index"
135 | pygments_style = "sphinx"
136 | 
137 | # Add any paths that contain templates here, relative to this directory.
138 | templates_path = ["_templates"]
139 | 
140 | # The language for content autogenerated by Sphinx. Refer to documentation
141 | # for a list of supported languages.
142 | #
143 | # This is also used if you do content translation via gettext catalogs.
144 | # Usually you set "language" from the command line for these cases.
145 | language = "en"
146 | 
147 | # List of patterns, relative to source directory, that match files and
148 | # directories to ignore when looking for source files.
149 | # This pattern also affects html_static_path and html_extra_path.
150 | exclude_patterns = ["../build"]
151 | 
152 | 
153 | # -- Options for HTML output -------------------------------------------------
154 | 
155 | # The theme to use for HTML and HTML Help pages.  See the documentation for
156 | # a list of builtin themes.
157 | #
158 | html_theme = "sphinx_book_theme"
159 | 
160 | 
161 | html_theme_options = {
162 |     "use_repository_button": True,
163 |     "repository_url": "https://github.com/Fengrui-Liu/StreamAD",
164 | }
165 | # Add any paths that contain custom static files (such as style sheets) here,
166 | # relative to this directory. They are copied after the builtin static files,
167 | # so a file named "default.css" will overwrite the builtin "default.css".
168 | # html_static_path = ["_static"]
169 | 
170 | 
171 | highlight_language = "none"
172 | 
173 | # Prefix document path to section labels, otherwise autogenerated labels would look like 'heading'
174 | # rather than 'path/to/file:heading'
175 | autosectionlabel_prefix_document = True
176 | 
177 | autodoc_default_options = {
178 |     "members": True,
179 |     "inherited-members": True,
180 | }
181 | autodoc_typehints = "none"
182 | 
183 | numpydoc_show_class_members = False
184 | autosummary_generate = True
185 | autosummary_imported_members = True
186 | 
187 | 
188 | html_logo = "images/logo_htmlwithname.svg"
189 | html_favicon = "images/logo_html.svg"
190 | 
191 | 
192 | # -- myst-parser configuration -----------------------------------------------
193 | # See https://myst-parser.readthedocs.io/en/stable/syntax/optional.html for
194 | # details of available extensions.
195 | myst_enable_extensions = [
196 |     "dollarmath",
197 |     "amsmath",
198 |     "colon_fence",
199 |     "smartquotes",
200 |     "tasklist",
201 |     "html_image",
202 | ]
203 | 
204 | # Create heading anchors for h1 to h3 (useful for local toc's)
205 | myst_heading_anchors = 3
206 | 
207 | 
208 | def remove_module_docstring(app, what, name, obj, options, lines):
209 |     if what == "module" and name == "streamad":
210 |         del lines[:]
211 | 
212 | 
213 | def setup(app):
214 |     app.connect("autodoc-process-docstring", remove_module_docstring)
215 | 


--------------------------------------------------------------------------------
/docs/source/example/example.md:
--------------------------------------------------------------------------------
 1 | # Examples
 2 | 
 3 | 
 4 | 
 5 | ```{toctree}
 6 | 
 7 | dataset_usage
 8 | ```
 9 | 
10 | ----
11 | 
12 | 
13 | ```{toctree}
14 | 
15 | univariate
16 | ```
17 | 
18 | ----
19 | 
20 | ```{toctree}
21 | multivariate
22 | ```
23 | 
24 | ----
25 | 
26 | 
27 | ```{toctree}
28 | 
29 | calibrator_usage
30 | ```
31 | 
32 | ----
33 | 
34 | ```{toctree}
35 | 
36 | ensemble_usage
37 | ```


--------------------------------------------------------------------------------
/docs/source/images/logo_html.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg width="50px" height="50px" viewBox="0 0 50 50" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 3 |     <title>切片</title>
 4 |     <g id="页面-1" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
 5 |         <line x1="15" y1="2.16196917" x2="2" y2="47" id="路径-3" stroke="#454474" stroke-width="4"></line>
 6 |         <line x1="22.5" y1="2.14285714" x2="9" y2="48" id="路径-3" stroke="#454474" stroke-width="4"></line>
 7 |         <line x1="29" y1="2" x2="16" y2="48" id="路径-3" stroke="#454474" stroke-width="4"></line>
 8 |         <line x1="35.53125" y1="2.14285714" x2="23" y2="48" id="路径-3" stroke="#FCA84C" stroke-width="4"></line>
 9 |         <line x1="42.5294118" y1="1.13636364" x2="29" y2="49" id="路径-3" stroke="#FCA84C" stroke-width="4"></line>
10 |         <line x1="48" y1="4" x2="36" y2="48" id="路径-3" stroke="#454474" stroke-width="4"></line>
11 |         <rect id="矩形" stroke="#454474" stroke-width="4" fill-opacity="0" fill="#FFFFFF" x="2" y="2" width="46" height="46"></rect>
12 |         <text id="STREAM" font-family="Arial-Black, Arial Black" font-size="14" font-weight="700" line-spacing="13" fill="#454474">
13 |             <tspan x="50.1240234" y="35.8310547">STREAM</tspan>
14 |         </text>
15 |     </g>
16 | </svg>


--------------------------------------------------------------------------------
/docs/source/images/logo_htmlwithname.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg width="147px" height="78px" viewBox="0 0 147 78" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 3 |     <title>切片</title>
 4 |     <g id="页面-1" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
 5 |         <line x1="32" y1="15.1619692" x2="19" y2="60" id="路径-3" stroke="#454474" stroke-width="4"></line>
 6 |         <line x1="39.5" y1="15.1428571" x2="26" y2="61" id="路径-3" stroke="#454474" stroke-width="4"></line>
 7 |         <line x1="46" y1="15" x2="33" y2="61" id="路径-3" stroke="#454474" stroke-width="4"></line>
 8 |         <line x1="52.53125" y1="15.1428571" x2="40" y2="61" id="路径-3" stroke="#FCA84C" stroke-width="4"></line>
 9 |         <line x1="59.5294118" y1="14.1363636" x2="46" y2="62" id="路径-3" stroke="#FCA84C" stroke-width="4"></line>
10 |         <line x1="65" y1="17" x2="53" y2="61" id="路径-3" stroke="#454474" stroke-width="4"></line>
11 |         <rect id="矩形" stroke="#454474" stroke-width="4" fill-opacity="0" fill="#FFFFFF" x="19" y="15" width="46" height="46"></rect>
12 |         <text id="AD" font-family="Arial-Black, Arial Black" font-size="14" font-weight="700" line-spacing="13" fill="#454474">
13 |             <tspan x="70" y="65">AD</tspan>
14 |             <tspan x="94.4931641" y="67.1689453" font-size="19"></tspan>
15 |         </text>
16 |         <text id="STREAM" font-family="Arial-Black, Arial Black" font-size="14" font-weight="700" line-spacing="13" fill="#454474">
17 |             <tspan x="67.1240234" y="48.8310547">STREAM</tspan>
18 |         </text>
19 |     </g>
20 | </svg>


--------------------------------------------------------------------------------
/docs/source/images/logo_index.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg width="1061px" height="203px" viewBox="0 0 1061 203" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 3 |     <title>StreamingAD</title>
 4 |     <g id="页面-1" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
 5 |         <polyline id="路径" stroke="#454474" stroke-width="14" points="62 57 128 100.5 62 144"></polyline>
 6 |         <polyline id="路径" stroke="#454474" stroke-width="14" points="128 57 194 100.5 128 144"></polyline>
 7 |         <polyline id="路径" stroke="#454474" stroke-width="14" points="194 57 260 100.5 194 144"></polyline>
 8 |         <polyline id="路径" stroke="#454474" stroke-width="14" points="260 57 297.124775 81.468602 326 100.5 260 144"></polyline>
 9 |         <polyline id="路径" stroke="#FCA84C" stroke-width="14" points="326 57 392 100.5 326 144"></polyline>
10 |         <polyline id="路径" stroke="#454474" stroke-width="14" points="392 57 458 100.5 392 144"></polyline>
11 |         <polyline id="路径" stroke="#454474" stroke-width="14" points="458 57 524 100.5 458 144"></polyline>
12 |         <text id="StreamAD" font-family="Arial-Black, Arial Black" font-size="75" font-weight="700">
13 |             <tspan x="576" y="131" fill="#474376">Stream</tspan>
14 |             <tspan x="872.99707" y="131" fill="#FCA93F">AD</tspan>
15 |         </text>
16 |     </g>
17 | </svg>


--------------------------------------------------------------------------------
/docs/source/index.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # StreamAD
 3 | 
 4 | ```{toctree}
 5 | :caption: Overview
 6 | :maxdepth: 2
 7 | 
 8 | overview
 9 | example/example
10 | ```
11 | 
12 | 
13 | 
14 | ```{toctree}
15 | :caption: Benchmark
16 | :hidden:
17 | :titlesonly:
18 | :maxdepth: 1
19 | 
20 | benchmark
21 | ```
22 | 
23 | 
24 | 
25 | 
26 | 
27 | ```{toctree}
28 | :caption: Reference
29 | :hidden:
30 | :titlesonly:
31 | :maxdepth: 1
32 | 
33 | api/streamad
34 | references
35 | ```


--------------------------------------------------------------------------------
/docs/source/overview.md:
--------------------------------------------------------------------------------
1 | ```{include} ../../README.md
2 | :relative-images:
3 | ```
4 | 


--------------------------------------------------------------------------------
/docs/source/references.md:
--------------------------------------------------------------------------------
1 | # Paper Reference
2 | 
3 | 
4 | 
5 | 
6 | ```{bibliography} refs.bib
7 | :labelprefix: md
8 | ```


--------------------------------------------------------------------------------
/docs/source/refs.bib:
--------------------------------------------------------------------------------
  1 | @article{DBLP:journals/corr/BurnaevI16,
  2 |   author    = {Evgeny Burnaev and
  3 |                Vladislav Ishimtsev},
  4 |   title     = {Conformalized density- and distance-based anomaly detection in time-series
  5 |                data},
  6 |   journal   = {CoRR},
  7 |   volume    = {abs/1608.04585},
  8 |   year      = {2016},
  9 |   url       = {http://arxiv.org/abs/1608.04585},
 10 |   archivePrefix = {arXiv},
 11 |   eprint    = {1608.04585},
 12 |   timestamp = {Mon, 13 Aug 2018 16:47:12 +0200},
 13 |   biburl    = {https://dblp.org/rec/journals/corr/BurnaevI16.bib},
 14 |   bibsource = {dblp computer science bibliography, https://dblp.org}
 15 | }
 16 | 
 17 | 
 18 | @inproceedings{DBLP:conf/kdd/ManzoorLA18,
 19 |   author    = {Emaad A. Manzoor and
 20 |                Hemank Lamba and
 21 |                Leman Akoglu},
 22 |   editor    = {Yike Guo and
 23 |                Faisal Farooq},
 24 |   title     = {xStream: Outlier Detection in Feature-Evolving Data Streams},
 25 |   booktitle = {Proceedings of the 24th {ACM} {SIGKDD} International Conference on
 26 |                Knowledge Discovery {\&} Data Mining, {KDD} 2018, London, UK,
 27 |                August 19-23, 2018},
 28 |   pages     = {1963--1972},
 29 |   publisher = {{ACM}},
 30 |   year      = {2018},
 31 |   url       = {https://doi.org/10.1145/3219819.3220107},
 32 |   doi       = {10.1145/3219819.3220107},
 33 |   timestamp = {Fri, 19 Jun 2020 12:43:05 +0200},
 34 |   biburl    = {https://dblp.org/rec/conf/kdd/ManzoorLA18.bib},
 35 |   bibsource = {dblp computer science bibliography, https://dblp.org}
 36 | }
 37 | 
 38 | 
 39 | @inproceedings{DBLP:conf/kdd/SifferFTL17,
 40 |   author    = {Alban Siffer and
 41 |                Pierre{-}Alain Fouque and
 42 |                Alexandre Termier and
 43 |                Christine Largou{\"{e}}t},
 44 |   title     = {Anomaly Detection in Streams with Extreme Value Theory},
 45 |   booktitle = {Proceedings of the 23rd {ACM} {SIGKDD} International Conference on
 46 |                Knowledge Discovery and Data Mining, Halifax, NS, Canada, August 13
 47 |                - 17, 2017},
 48 |   pages     = {1067--1075},
 49 |   publisher = {{ACM}},
 50 |   year      = {2017},
 51 |   url       = {https://doi.org/10.1145/3097983.3098144},
 52 |   doi       = {10.1145/3097983.3098144},
 53 |   timestamp = {Fri, 25 Dec 2020 01:14:16 +0100},
 54 |   biburl    = {https://dblp.org/rec/conf/kdd/SifferFTL17.bib},
 55 |   bibsource = {dblp computer science bibliography, https://dblp.org}
 56 | }
 57 | 
 58 | @inproceedings{DBLP:conf/icml/GuhaMRS16,
 59 |   author    = {Sudipto Guha and
 60 |                Nina Mishra and
 61 |                Gourav Roy and
 62 |                Okke Schrijvers},
 63 |   editor    = {Maria{-}Florina Balcan and
 64 |                Kilian Q. Weinberger},
 65 |   title     = {Robust Random Cut Forest Based Anomaly Detection on Streams},
 66 |   booktitle = {Proceedings of the 33nd International Conference on Machine Learning,
 67 |                {ICML} 2016, New York City, NY, USA, June 19-24, 2016},
 68 |   series    = {{JMLR} Workshop and Conference Proceedings},
 69 |   volume    = {48},
 70 |   pages     = {2712--2721},
 71 |   publisher = {JMLR.org},
 72 |   year      = {2016},
 73 |   url       = {http://proceedings.mlr.press/v48/guha16.html},
 74 |   timestamp = {Wed, 29 May 2019 08:41:46 +0200},
 75 |   biburl    = {https://dblp.org/rec/conf/icml/GuhaMRS16.bib},
 76 |   bibsource = {dblp computer science bibliography, https://dblp.org}
 77 | }
 78 | 
 79 | 
 80 | @inproceedings{DBLP:conf/kdd/RenXWYHKXYTZ19,
 81 |   author    = {Hansheng Ren and
 82 |                Bixiong Xu and
 83 |                Yujing Wang and
 84 |                Chao Yi and
 85 |                Congrui Huang and
 86 |                Xiaoyu Kou and
 87 |                Tony Xing and
 88 |                Mao Yang and
 89 |                Jie Tong and
 90 |                Qi Zhang},
 91 |   editor    = {Ankur Teredesai and
 92 |                Vipin Kumar and
 93 |                Ying Li and
 94 |                R{\'{o}}mer Rosales and
 95 |                Evimaria Terzi and
 96 |                George Karypis},
 97 |   title     = {Time-Series Anomaly Detection Service at Microsoft},
 98 |   booktitle = {Proceedings of the 25th {ACM} {SIGKDD} International Conference on
 99 |                Knowledge Discovery {\&} Data Mining, {KDD} 2019, Anchorage, AK,
100 |                USA, August 4-8, 2019},
101 |   pages     = {3009--3017},
102 |   publisher = {{ACM}},
103 |   year      = {2019},
104 |   url       = {https://doi.org/10.1145/3292500.3330680},
105 |   doi       = {10.1145/3292500.3330680},
106 |   timestamp = {Thu, 28 Apr 2022 17:42:59 +0200},
107 |   biburl    = {https://dblp.org/rec/conf/kdd/RenXWYHKXYTZ19.bib},
108 |   bibsource = {dblp computer science bibliography, https://dblp.org}
109 | }
110 | 
111 | 
112 | @inproceedings{DBLP:conf/icdm/SatheA16,
113 |   author    = {Saket Sathe and
114 |                Charu C. Aggarwal},
115 |   editor    = {Francesco Bonchi and
116 |                Josep Domingo{-}Ferrer and
117 |                Ricardo Baeza{-}Yates and
118 |                Zhi{-}Hua Zhou and
119 |                Xindong Wu},
120 |   title     = {Subspace Outlier Detection in Linear Time with Randomized Hashing},
121 |   booktitle = {{IEEE} 16th International Conference on Data Mining, {ICDM} 2016,
122 |                December 12-15, 2016, Barcelona, Spain},
123 |   pages     = {459--468},
124 |   publisher = {{IEEE} Computer Society},
125 |   year      = {2016},
126 |   url       = {https://doi.org/10.1109/ICDM.2016.0057},
127 |   doi       = {10.1109/ICDM.2016.0057},
128 |   timestamp = {Wed, 17 Mar 2021 09:50:14 +0100},
129 |   biburl    = {https://dblp.org/rec/conf/icdm/SatheA16.bib},
130 |   bibsource = {dblp computer science bibliography, https://dblp.org}
131 | }
132 | 
133 | @inproceedings{DBLP:conf/ijcai/TanTL11,
134 |   author    = {Swee Chuan Tan and
135 |                Kai Ming Ting and
136 |                Fei Tony Liu},
137 |   editor    = {Toby Walsh},
138 |   title     = {Fast Anomaly Detection for Streaming Data},
139 |   booktitle = {{IJCAI} 2011, Proceedings of the 22nd International Joint Conference
140 |                on Artificial Intelligence, Barcelona, Catalonia, Spain, July 16-22,
141 |                2011},
142 |   pages     = {1511--1516},
143 |   publisher = {{IJCAI/AAAI}},
144 |   year      = {2011},
145 |   url       = {https://doi.org/10.5591/978-1-57735-516-8/IJCAI11-254},
146 |   doi       = {10.5591/978-1-57735-516-8/IJCAI11-254},
147 |   timestamp = {Tue, 20 Aug 2019 16:16:04 +0200},
148 |   biburl    = {https://dblp.org/rec/conf/ijcai/TanTL11.bib},
149 |   bibsource = {dblp computer science bibliography, https://dblp.org}
150 | }
151 | 
152 | @article{DBLP:journals/simpa/Dunning21,
153 |   author    = {Ted Dunning},
154 |   title     = {The t-digest: Efficient estimates of distributions},
155 |   journal   = {Softw. Impacts},
156 |   volume    = {7},
157 |   pages     = {100049},
158 |   year      = {2021},
159 |   url       = {https://doi.org/10.1016/j.simpa.2020.100049},
160 |   doi       = {10.1016/j.simpa.2020.100049},
161 |   timestamp = {Wed, 05 May 2021 14:43:00 +0200},
162 |   biburl    = {https://dblp.org/rec/journals/simpa/Dunning21.bib},
163 |   bibsource = {dblp computer science bibliography, https://dblp.org}
164 | }
165 | 
166 | 
167 | @article{DBLP:journals/ml/Pevny16,
168 |   author    = {Tom{\'{a}}s Pevn{\'{y}}},
169 |   title     = {Loda: Lightweight on-line detector of anomalies},
170 |   journal   = {Mach. Learn.},
171 |   volume    = {102},
172 |   number    = {2},
173 |   pages     = {275--304},
174 |   year      = {2016},
175 |   url       = {https://doi.org/10.1007/s10994-015-5521-0},
176 |   doi       = {10.1007/s10994-015-5521-0},
177 |   timestamp = {Sun, 25 Jul 2021 11:37:58 +0200},
178 |   biburl    = {https://dblp.org/rec/journals/ml/Pevny16.bib},
179 |   bibsource = {dblp computer science bibliography, https://dblp.org}
180 | }
181 | 
182 | @article{DBLP:journals/ijon/AhmadLPA17,
183 |   author    = {Subutai Ahmad and
184 |                Alexander Lavin and
185 |                Scott Purdy and
186 |                Zuha Agha},
187 |   title     = {Unsupervised real-time anomaly detection for streaming data},
188 |   journal   = {Neurocomputing},
189 |   volume    = {262},
190 |   pages     = {134--147},
191 |   year      = {2017},
192 |   url       = {https://doi.org/10.1016/j.neucom.2017.04.070},
193 |   doi       = {10.1016/j.neucom.2017.04.070},
194 |   timestamp = {Fri, 31 Jan 2020 14:18:54 +0100},
195 |   biburl    = {https://dblp.org/rec/journals/ijon/AhmadLPA17.bib},
196 |   bibsource = {dblp computer science bibliography, https://dblp.org}
197 | }
198 | 
199 | @misc{ enwiki:1086685336,
200 |   author = "{Wikipedia contributors}",
201 |   title = "Standard score --- {Wikipedia}{,} The Free Encyclopedia",
202 |   year = "2022",
203 |   url = "https://en.wikipedia.org/w/index.php?title=Standard_score&oldid=1086685336",
204 |   note = "[Online; accessed 19-June-2022]"
205 | }
206 | 
207 | 
208 | @misc{ enwiki:1089762876,
209 |   author = "{Wikipedia contributors}",
210 |   title = "Precision and recall --- {Wikipedia}{,} The Free Encyclopedia",
211 |   year = "2022",
212 |   url = "https://en.wikipedia.org/w/index.php?title=Precision_and_recall&oldid=1089762876",
213 |   note = "[Online; accessed 19-June-2022]"
214 | }
215 | 
216 | @inproceedings{DBLP:conf/nips/TatbulLZAG18,
217 |   author    = {Nesime Tatbul and
218 |                Tae Jun Lee and
219 |                Stan Zdonik and
220 |                Mejbah Alam and
221 |                Justin Gottschlich},
222 |   editor    = {Samy Bengio and
223 |                Hanna M. Wallach and
224 |                Hugo Larochelle and
225 |                Kristen Grauman and
226 |                Nicol{\`{o}} Cesa{-}Bianchi and
227 |                Roman Garnett},
228 |   title     = {Precision and Recall for Time Series},
229 |   booktitle = {Advances in Neural Information Processing Systems 31: Annual Conference
230 |                on Neural Information Processing Systems 2018, NeurIPS 2018, December
231 |                3-8, 2018, Montr{\'{e}}al, Canada},
232 |   pages     = {1924--1934},
233 |   year      = {2018},
234 |   url       = {https://proceedings.neurips.cc/paper/2018/hash/8f468c873a32bb0619eaeb2050ba45d1-Abstract.html},
235 |   timestamp = {Mon, 16 May 2022 15:41:51 +0200},
236 |   biburl    = {https://dblp.org/rec/conf/nips/TatbulLZAG18.bib},
237 |   bibsource = {dblp computer science bibliography, https://dblp.org}
238 | }
239 | 
240 | @Misc{InfluxDB:MAD,
241 |   howpublished =  {Website},
242 |   note         =  {\url{https://www.influxdata.com/blog/anomaly-detection-with-median-absolute-deviation/#:~:text=How%20Median%20Absolute%20Deviation%20algorithm,time%20series%20at%20that%20timestamp/} Accessed 7 July 2020},
243 |   title        =  {Anomaly Detection with Median Absolute Deviation},
244 |   author       =  {Anais Dotis-Georgiou}
245 | }
246 | 
247 | 
248 |   @misc{ enwiki:1098733917,
249 |     author = "{Wikipedia contributors}",
250 |     title = "One-class classification --- {Wikipedia}{,} The Free Encyclopedia",
251 |     year = "2022",
252 |     url = "https://en.wikipedia.org/w/index.php?title=One-class_classification&oldid=1098733917",
253 |     note = "[Online; accessed 24-July-2022]"
254 |   }
255 | 
256 | 
257 | 
258 | @article{assimakopoulos2000theta,
259 |   title={The theta model: a decomposition approach to forecasting},
260 |   author={Assimakopoulos, Vassilis and Nikolopoulos, Konstantinos},
261 |   journal={International journal of forecasting},
262 |   volume={16},
263 |   number={4},
264 |   pages={521--530},
265 |   year={2000},
266 |   publisher={Elsevier}
267 | }
268 | 
269 | @book{durbin2012time,
270 |   title={Time series analysis by state space methods},
271 |   author={Durbin, James and Koopman, Siem Jan},
272 |   volume={38},
273 |   year={2012},
274 |   publisher={OUP Oxford}
275 | }


--------------------------------------------------------------------------------
/example/README.md:
--------------------------------------------------------------------------------
1 | This folder contains the symlink of all examples from docs/source/examples/
2 | 
3 | If it is fail to open in some certain OS, please check the origin file or the online docs.


--------------------------------------------------------------------------------
/example/dataset_usage.ipynb:
--------------------------------------------------------------------------------
1 | ../docs/source/example/dataset_usage.ipynb


--------------------------------------------------------------------------------
/example/multivariate.ipynb:
--------------------------------------------------------------------------------
1 | ../docs/source/example/multivariate.ipynb


--------------------------------------------------------------------------------
/example/thresholder_usage.ipynb:
--------------------------------------------------------------------------------
1 | ../docs/source/example/thresholder_usage.ipynb


--------------------------------------------------------------------------------
/example/univariate.ipynb:
--------------------------------------------------------------------------------
1 | ../docs/source/example/univariate.ipynb


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "streamad"
 3 | version = "0.3.1"
 4 | description = "An anomaly detection package for data streams."
 5 | authors = ["Fengrui-Liu <liufengrui18z@ict.ac.cn>"]
 6 | license = "Apache Software License"
 7 | readme = "README.md"
 8 | 
 9 | [tool.poetry.dependencies]
10 | python = "^3.8"
11 | numpy = "^1.22"
12 | pandas = "^1.3.0"
13 | scikit-learn = "^1.0.0"
14 | mmh3 = "^3.0.0"
15 | rrcf = "^0.4.4"
16 | plotly = "^5.14.1"
17 | tdigest = "^0.5.2.2"
18 | statsmodels = "0.13.5"
19 | fast-histogram = "^0.11"
20 | pytest = "^7.3.1"
21 | pytest-cov = "^4.0.0"
22 | scipy = "^1.3.1"
23 | 
24 | 
25 | [build-system]
26 | requires = ["poetry-core"]
27 | build-backend = "poetry.core.masonry.api"
28 | 


--------------------------------------------------------------------------------
/streamad/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding=utf-8
 3 | #
 4 | # Author: liufr
 5 | # Github: https://github.com/Fengrui-Liu
 6 | # LastEditTime: 2021-01-05 21:30:25
 7 | # Copyright 2021 liufr
 8 | # Description:
 9 | #
10 | 
11 | from . import base
12 | from . import model
13 | from . import util
14 | from .version import __version__
15 | 
16 | __all__ = ["__version__", "base", "model", "util"]
17 | 


--------------------------------------------------------------------------------
/streamad/base/__init__.py:
--------------------------------------------------------------------------------
1 | from .detector import BaseDetector
2 | from .metrics import BaseMetrics
3 | 
4 | 
5 | __all__ = ["BaseDetector", "BaseMetrics"]
6 | 


--------------------------------------------------------------------------------
/streamad/base/detector.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | 
 3 | import numpy as np
 4 | from collections import deque
 5 | 
 6 | 
 7 | class BaseDetector(ABC):
 8 |     """Abstract class for Detector, supporting for customize detector."""
 9 | 
10 |     def __init__(
11 |         self,
12 |         window_len: int = 50,
13 |         detrend: bool = False,
14 |         detrend_len: int = 10,
15 |         data_type: str = "multivariate",
16 |         score_first: bool = False,
17 |     ):
18 |         """Initialize the attributes of the BaseDetector class
19 | 
20 | 
21 |         Args:
22 |             window_len (int, optional): Length of window for observations. Defaults to 50.
23 |             detrend (bool, optional): Data is detrended by subtracting the mean. Defaults to True.
24 |             detrend_len (int, optional): Length of data for reference to detrend. Defaults to 10.
25 |             data_type (str, optional): Multi/Univariate data type. Defaults to "multivariate".
26 |         """
27 | 
28 |         self.data_type = data_type
29 |         self.index = -1
30 |         self.detrend = detrend
31 |         self.window_len = window_len
32 |         self.detrend_len = detrend_len
33 |         self.window = deque(maxlen=self.window_len)
34 |         self.detrend_window = deque(maxlen=self.detrend_len)
35 |         self.score_first = score_first
36 | 
37 |     def _check(self, X) -> bool:
38 |         """Check whether the detector can handle the data."""
39 |         x_shape = X.shape[0]
40 | 
41 |         if self.data_type == "univariate":
42 |             assert x_shape == 1, "The data is not univariate."
43 |         elif self.data_type == "multivariate":
44 |             assert x_shape >= 1, "The data is not univariate or multivariate."
45 | 
46 |         if np.isnan(X).any():
47 |             return False
48 |         self.index += 1
49 |         return True
50 | 
51 |     def _detrend(self, X: np.ndarray) -> np.ndarray:
52 |         """Detrend the data by subtracting the mean.
53 | 
54 |         Args:
55 |             X (np.ndarray): Data of current observation.
56 | 
57 |         Returns:
58 |             np.ndarray: Detrended data.
59 |         """
60 | 
61 |         self.detrend_window.append(X)
62 | 
63 |         return X - np.mean(self.detrend_window, axis=0)
64 | 
65 |     @abstractmethod
66 |     def fit(self, X: np.ndarray, timestamp: int = None):
67 |         return NotImplementedError
68 | 
69 |     @abstractmethod
70 |     def score(self, X: np.ndarray, timestamp: int = None) -> float:
71 |         return NotImplementedError
72 | 
73 |     def fit_score(self, X: np.ndarray, timestamp: int = None) -> float:
74 |         """Fit one observation and calculate its anomaly score.
75 | 
76 |         Args:
77 |             X (np.ndarray): Data of current observation.
78 | 
79 |         Returns:
80 |             float: Anomaly score. A high score indicates a high degree of anomaly.
81 |         """
82 | 
83 |         check_flag = self._check(X)
84 |         if not check_flag:
85 |             return None
86 |         X = self._detrend(X) if self.detrend else X
87 | 
88 |         if self.index < self.window_len:
89 |             self.fit(X, timestamp)
90 |             return None
91 | 
92 |         if self.score_first:
93 |             score = self.score(X, timestamp)
94 |             self.fit(X, timestamp)
95 |         else:
96 |             score = self.fit(X, timestamp).score(X, timestamp)
97 | 
98 |         return float(abs(score))
99 | 


--------------------------------------------------------------------------------
/streamad/base/metrics.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | import numpy as np
 3 | 
 4 | 
 5 | class BaseMetrics(ABC):
 6 |     """
 7 |     Abstract class for evaluation metrics, supporting for customize evaluation.
 8 |     """
 9 | 
10 |     def __init__(self) -> None:
11 |         super().__init__()
12 |         self.y_pred = None
13 |         self.y_true = None
14 | 
15 |     @abstractmethod
16 |     def evaluate(self, y_true: np.ndarray, y_pred: np.ndarray):
17 |         y_pred = np.array(y_pred)
18 |         y_pred[y_pred == None] = 0
19 |         self.y_true = y_true.astype(int)
20 |         self.y_pred = y_pred.astype(int)
21 |         return
22 | 


--------------------------------------------------------------------------------
/streamad/evaluate/__init__.py:
--------------------------------------------------------------------------------
 1 | from .point_aware_metrics import PointAwareMetircs
 2 | from .series_aware_metrics import SeriesAwareMetircs
 3 | from .numenta_aware_metrics import NumentaAwareMetircs
 4 | 
 5 | __all__ = [
 6 |     "PointAwareMetircs",
 7 |     "SeriesAwareMetircs",
 8 |     "NumentaAwareMetircs",
 9 | ]
10 | 


--------------------------------------------------------------------------------
/streamad/evaluate/numenta_aware_metrics.py:
--------------------------------------------------------------------------------
 1 | from streamad.base import BaseMetrics
 2 | from streamad.evaluate.ts_metrics import TSMetric
 3 | import numpy as np
 4 | 
 5 | 
 6 | class NumentaAwareMetircs(BaseMetrics):
 7 |     def __init__(self, anomaly_threshold: float = 0.8, beta: float = 1.0):
 8 |         """Numenta metrics calculation methods. :cite:`DBLP:journals/ijon/AhmadLPA17`.
 9 | 
10 |         Args:
11 |             anomaly_threshold (float, optional): A threshold to determine the anomalies, it can covert the anomaly scores to binary (0/1) indicators. Defaults to 0.8.
12 |             beta (float, optional): F-beta score, like a F1-score. Defaults to 1.0.
13 |         """
14 |         super().__init__()
15 |         self.threshold = anomaly_threshold
16 |         self.beta = beta
17 |         self.precision = None
18 |         self.recall = None
19 |         self.Fbeta = None
20 | 
21 |     def evaluate(self, y_true: np.ndarray, y_pred: np.ndarray) -> tuple:
22 |         super().evaluate(y_true, y_pred)
23 | 
24 |         select = self.y_pred > self.threshold
25 |         self.y_pred[select] = 1
26 |         self.y_pred[~select] = 0
27 | 
28 |         metric = TSMetric(
29 |             metric_option="numenta",
30 |             beta=self.beta,
31 |             alpha_r=0.0,
32 |             cardinality="one",
33 |             bias_p="flat",
34 |             bias_r="flat",
35 |         )
36 |         self.precision, self.recall, self.Fbeta = metric.score(
37 |             self.y_true, self.y_pred
38 |         )
39 | 
40 |         return self.precision, self.recall, self.Fbeta
41 | 


--------------------------------------------------------------------------------
/streamad/evaluate/point_aware_metrics.py:
--------------------------------------------------------------------------------
 1 | from streamad.base import BaseMetrics
 2 | from streamad.evaluate.ts_metrics import TSMetric
 3 | import numpy as np
 4 | 
 5 | 
 6 | class PointAwareMetircs(BaseMetrics):
 7 |     def __init__(self, anomaly_threshold: float = 0.8, beta: float = 1.0):
 8 |         """Classic metrics :cite:`enwiki:1089762876`
 9 | 
10 |         Args:
11 |             anomaly_threshold (float, optional): A threshold to determine the anomalies, it can covert the anomaly scores to binary (0/1) indicators. Defaults to 0.8.
12 |             beta (float, optional): F-beta score, like a F1-score. Defaults to 1.0.
13 |         """
14 |         super().__init__()
15 |         self.threshold = anomaly_threshold
16 |         self.beta = beta
17 |         self.precision = None
18 |         self.recall = None
19 |         self.Fbeta = None
20 | 
21 |     def evaluate(self, y_true: np.ndarray, y_pred: np.ndarray) -> tuple:
22 |         super().evaluate(y_true, y_pred)
23 | 
24 |         select = self.y_pred > self.threshold
25 |         self.y_pred[select] = 1
26 |         self.y_pred[~select] = 0
27 | 
28 |         metric = TSMetric(
29 |             metric_option="classic",
30 |             beta=self.beta,
31 |             alpha_r=0.0,
32 |             cardinality="one",
33 |             bias_p="flat",
34 |             bias_r="flat",
35 |         )
36 |         self.precision, self.recall, self.Fbeta = metric.score(
37 |             self.y_true, self.y_pred
38 |         )
39 | 
40 |         return self.precision, self.recall, self.Fbeta
41 | 


--------------------------------------------------------------------------------
/streamad/evaluate/series_aware_metrics.py:
--------------------------------------------------------------------------------
 1 | from streamad.base import BaseMetrics
 2 | from streamad.evaluate.ts_metrics import TSMetric
 3 | import numpy as np
 4 | 
 5 | 
 6 | class SeriesAwareMetircs(BaseMetrics):
 7 |     def __init__(
 8 |         self,
 9 |         anomaly_threshold: float = 0.8,
10 |         beta: float = 1.0,
11 |         bias_p: str = "flat",
12 |         bias_r: str = "flat",
13 |     ):
14 |         """Time series aware metrics :cite:`DBLP:conf/nips/TatbulLZAG18`
15 | 
16 |         Args:
17 |             anomaly_threshold (float, optional): A threshold to determine the anomalies, it can covert the anomaly scores to binary (0/1) indicators. Defaults to 0.8.
18 |             beta (float, optional):  F-beta score, like a F1-score. Defaults to 1.0.
19 |             bias_p (str, optional): Bias for precision. Optionals are "flat", "front", "middle", "back". Defaults to "flat".
20 |             bias_r (str, optional): Bias for recall. Optionals are "flat", "front", "middle", "back". Defaults to "flat".
21 |         """
22 |         super().__init__()
23 |         self.threshold = anomaly_threshold
24 |         self.beta = beta
25 |         self.bias_p = bias_p
26 |         self.bias_r = bias_r
27 |         self.precision = None
28 |         self.recall = None
29 |         self.Fbeta = None
30 | 
31 |     def evaluate(self, y_true: np.ndarray, y_pred: np.ndarray) -> tuple:
32 |         super().evaluate(y_true, y_pred)
33 | 
34 |         select = self.y_pred > self.threshold
35 |         self.y_pred[select] = 1
36 |         self.y_pred[~select] = 0
37 | 
38 |         metric = TSMetric(
39 |             metric_option="time-series",
40 |             beta=self.beta,
41 |             alpha_r=0.0,
42 |             cardinality="reciprocal",
43 |             bias_p=self.bias_p,
44 |             bias_r=self.bias_r,
45 |         )
46 |         self.precision, self.recall, self.Fbeta = metric.score(
47 |             self.y_true, self.y_pred
48 |         )
49 | 
50 |         return self.precision, self.recall, self.Fbeta
51 | 


--------------------------------------------------------------------------------
/streamad/evaluate/ts_metrics.py:
--------------------------------------------------------------------------------
  1 | # This script is from https://github.com/KurochkinAlexey/Time-series-precision-recall, Thanks!
  2 | 
  3 | import numpy as np
  4 | 
  5 | 
  6 | class TSMetric:
  7 |     def __init__(
  8 |         self,
  9 |         metric_option="classic",
 10 |         beta=1.0,
 11 |         alpha_r=0.0,
 12 |         cardinality="one",
 13 |         bias_p="flat",
 14 |         bias_r="flat",
 15 |     ):
 16 | 
 17 |         assert (alpha_r >= 0) & (alpha_r <= 1)
 18 |         assert metric_option in ["classic", "time-series", "numenta"]
 19 |         assert beta > 0
 20 |         assert cardinality in ["one", "reciprocal", "udf_gamma"]
 21 |         assert bias_p in ["flat", "front", "middle", "back"]
 22 |         assert bias_r in ["flat", "front", "middle", "back"]
 23 | 
 24 |         self.metric_option = metric_option
 25 |         self.beta = beta
 26 |         self.alpha_r = alpha_r
 27 |         self.alpha_p = 0
 28 |         self.cardinality = cardinality
 29 |         self.bias_p = bias_p
 30 |         self.bias_r = bias_r
 31 | 
 32 |     def _udf_gamma(self, overlap, task_type):
 33 |         """
 34 |         user defined gamma
 35 |         """
 36 |         return 1.0
 37 | 
 38 |     def _gamma_select(self, gamma, overlap, task_type):
 39 |         if gamma == "one":
 40 |             return 1.0
 41 |         elif gamma == "reciprocal":
 42 |             if overlap > 1:
 43 |                 return 1.0 / overlap
 44 |             else:
 45 |                 return 1.0
 46 |         elif gamma == "udf_gamma_def":
 47 |             if overlap > 1:
 48 |                 return 1.0 / self._udf_gamma(overlap, task_type)
 49 |             else:
 50 |                 return 1.0
 51 | 
 52 |     def _gamma_function(self, overlap_count, task_type):
 53 |         overlap = overlap_count[0]
 54 |         if task_type == 0:
 55 |             return self._gamma_select(self.cardinality, overlap, task_type)
 56 |         elif task_type == 1:
 57 |             return self._gamma_select(self.cardinality, overlap, task_type)
 58 |         else:
 59 |             raise Exception("invalid argument in gamma function")
 60 | 
 61 |     def _compute_omega_reward(self, r1, r2, overlap_count, task_type):
 62 |         if r1[1] < r2[0] or r1[0] > r2[1]:
 63 |             return 0
 64 |         else:
 65 |             overlap_count[0] += 1
 66 |             overlap = np.zeros(r1.shape)
 67 |             overlap[0] = max(r1[0], r2[0])
 68 |             overlap[1] = min(r1[1], r2[1])
 69 |             return self._omega_function(r1, overlap, task_type)
 70 | 
 71 |     def _omega_function(self, rrange, overlap, task_type):
 72 |         anomaly_length = rrange[1] - rrange[0] + 1
 73 |         my_positional_bias = 0
 74 |         max_positional_bias = 0
 75 |         temp_bias = 0
 76 |         for i in range(1, anomaly_length + 1):
 77 |             temp_bias = self._delta_function(i, anomaly_length, task_type)
 78 |             max_positional_bias += temp_bias
 79 |             j = rrange[0] + i - 1
 80 |             if j >= overlap[0] and j <= overlap[1]:
 81 |                 my_positional_bias += temp_bias
 82 |         if max_positional_bias > 0:
 83 |             res = my_positional_bias / max_positional_bias
 84 |             return res
 85 |         else:
 86 |             return 0
 87 | 
 88 |     def _delta_function(self, t, anomaly_length, task_type):
 89 |         if task_type == 0:
 90 |             return self._delta_select(self.bias_p, t, anomaly_length, task_type)
 91 |         elif task_type == 1:
 92 |             return self._delta_select(self.bias_r, t, anomaly_length, task_type)
 93 |         else:
 94 |             raise Exception("Invalid task type in delta function")
 95 | 
 96 |     def _delta_select(self, delta, t, anomaly_length, task_type):
 97 |         if delta == "flat":
 98 |             return 1.0
 99 |         elif delta == "front":
100 |             return float(anomaly_length - t + 1.0)
101 |         elif delta == "middle":
102 |             if t <= anomaly_length / 2.0:
103 |                 return float(t)
104 |             else:
105 |                 return float(anomaly_length - t + 1.0)
106 |         elif delta == "back":
107 |             return float(t)
108 |         elif delta == "udf_delta":
109 |             return self._udf_delta(t, anomaly_length, task_type)
110 |         else:
111 |             raise Exception("Invalid positional bias value")
112 | 
113 |     def _udf_delta(self, t, anomaly_length, task_type):
114 |         """
115 |         user defined delta function
116 |         """
117 |         return 1.0
118 | 
119 |     def _update_precision(self, real_anomalies, predicted_anomalies):
120 |         precision = 0
121 |         if len(predicted_anomalies) == 0:
122 |             return 0
123 |         for i in range(len(predicted_anomalies)):
124 |             range_p = predicted_anomalies[i, :]
125 |             omega_reward = 0
126 |             overlap_count = [0]
127 |             for j in range(len(real_anomalies)):
128 |                 range_r = real_anomalies[j, :]
129 |                 omega_reward += self._compute_omega_reward(
130 |                     range_p, range_r, overlap_count, 0
131 |                 )
132 |             overlap_reward = (
133 |                 self._gamma_function(overlap_count, 0) * omega_reward
134 |             )
135 |             if overlap_count[0] > 0:
136 |                 existence_reward = 1
137 |             else:
138 |                 existence_reward = 0
139 | 
140 |             precision += (
141 |                 self.alpha_p * existence_reward
142 |                 + (1 - self.alpha_p) * overlap_reward
143 |             )
144 |         precision /= len(predicted_anomalies)
145 |         return precision
146 | 
147 |     def _update_recall(self, real_anomalies, predicted_anomalies):
148 |         recall = 0
149 |         if len(real_anomalies) == 0:
150 |             return 0
151 |         for i in range(len(real_anomalies)):
152 |             omega_reward = 0
153 |             overlap_count = [0]
154 |             range_r = real_anomalies[i, :]
155 |             for j in range(len(predicted_anomalies)):
156 |                 range_p = predicted_anomalies[j, :]
157 |                 omega_reward += self._compute_omega_reward(
158 |                     range_r, range_p, overlap_count, 1
159 |                 )
160 |             overlap_reward = (
161 |                 self._gamma_function(overlap_count, 1) * omega_reward
162 |             )
163 | 
164 |             if overlap_count[0] > 0:
165 |                 existence_reward = 1
166 |             else:
167 |                 existence_reward = 0
168 | 
169 |             recall += (
170 |                 self.alpha_r * existence_reward
171 |                 + (1 - self.alpha_r) * overlap_reward
172 |             )
173 |         recall /= len(real_anomalies)
174 |         return recall
175 | 
176 |     def _shift(self, arr, num, fill_value=np.nan):
177 |         arr = np.roll(arr, num)
178 |         if num < 0:
179 |             arr[num:] = fill_value
180 |         elif num > 0:
181 |             arr[:num] = fill_value
182 |         return arr
183 | 
184 |     def _prepare_data(self, values_real, values_pred):
185 | 
186 |         assert len(values_real) == len(values_pred)
187 | 
188 |         if self.metric_option == "classic":
189 |             real_anomalies = np.argwhere(values_real == 1).repeat(2, axis=1)
190 |             predicted_anomalies = np.argwhere(values_pred == 1).repeat(
191 |                 2, axis=1
192 |             )
193 | 
194 |         elif self.metric_option == "time-series":
195 |             predicted_anomalies_ = np.argwhere(values_pred == 1).ravel()
196 |             predicted_anomalies_shift_forward = self._shift(
197 |                 predicted_anomalies_, 1, fill_value=predicted_anomalies_[0]
198 |             )
199 |             predicted_anomalies_shift_backward = self._shift(
200 |                 predicted_anomalies_, -1, fill_value=predicted_anomalies_[-1]
201 |             )
202 |             predicted_anomalies_start = np.argwhere(
203 |                 (predicted_anomalies_shift_forward - predicted_anomalies_) != -1
204 |             ).ravel()
205 |             predicted_anomalies_finish = np.argwhere(
206 |                 (predicted_anomalies_ - predicted_anomalies_shift_backward)
207 |                 != -1
208 |             ).ravel()
209 |             predicted_anomalies = np.hstack(
210 |                 [
211 |                     predicted_anomalies_[predicted_anomalies_start].reshape(
212 |                         -1, 1
213 |                     ),
214 |                     predicted_anomalies_[predicted_anomalies_finish].reshape(
215 |                         -1, 1
216 |                     ),
217 |                 ]
218 |             )
219 | 
220 |             real_anomalies_ = np.argwhere(values_real == 1).ravel()
221 |             real_anomalies_shift_forward = self._shift(
222 |                 real_anomalies_,
223 |                 1,
224 |                 fill_value=real_anomalies_[0] if len(real_anomalies_) else 0,
225 |             )
226 |             real_anomalies_shift_backward = self._shift(
227 |                 real_anomalies_,
228 |                 -1,
229 |                 fill_value=real_anomalies_[-1] if len(real_anomalies_) else 0,
230 |             )
231 |             real_anomalies_start = np.argwhere(
232 |                 (real_anomalies_shift_forward - real_anomalies_) != -1
233 |             ).ravel()
234 |             real_anomalies_finish = np.argwhere(
235 |                 (real_anomalies_ - real_anomalies_shift_backward) != -1
236 |             ).ravel()
237 |             real_anomalies = np.hstack(
238 |                 [
239 |                     real_anomalies_[real_anomalies_start].reshape(-1, 1),
240 |                     real_anomalies_[real_anomalies_finish].reshape(-1, 1),
241 |                 ]
242 |             )
243 | 
244 |         elif self.metric_option == "numenta":
245 |             predicted_anomalies = np.argwhere(values_pred == 1).repeat(
246 |                 2, axis=1
247 |             )
248 |             real_anomalies_ = np.argwhere(values_real == 1).ravel()
249 |             real_anomalies_shift_forward = self._shift(
250 |                 real_anomalies_,
251 |                 1,
252 |                 fill_value=real_anomalies_[0] if len(real_anomalies_) else 0,
253 |             )
254 |             real_anomalies_shift_backward = self._shift(
255 |                 real_anomalies_,
256 |                 -1,
257 |                 fill_value=real_anomalies_[-1] if len(real_anomalies_) else 0,
258 |             )
259 |             real_anomalies_start = np.argwhere(
260 |                 (real_anomalies_shift_forward - real_anomalies_) != -1
261 |             ).ravel()
262 |             real_anomalies_finish = np.argwhere(
263 |                 (real_anomalies_ - real_anomalies_shift_backward) != -1
264 |             ).ravel()
265 |             real_anomalies = np.hstack(
266 |                 [
267 |                     real_anomalies_[real_anomalies_start].reshape(-1, 1),
268 |                     real_anomalies_[real_anomalies_finish].reshape(-1, 1),
269 |                 ]
270 |             )
271 |         return real_anomalies, predicted_anomalies
272 | 
273 |     def score(self, values_real, values_predicted):
274 |         assert isinstance(values_real, np.ndarray)
275 |         assert isinstance(values_predicted, np.ndarray)
276 | 
277 |         if not values_predicted.any():
278 |             if not values_real.any():
279 |                 return 1.0, 1.0, 1.0
280 |             else:
281 |                 return 0.0, 0.0, 0.0
282 | 
283 |         real_anomalies, predicted_anomalies = self._prepare_data(
284 |             values_real, values_predicted
285 |         )
286 |         precision = self._update_precision(real_anomalies, predicted_anomalies)
287 |         recall = self._update_recall(real_anomalies, predicted_anomalies)
288 |         if precision + recall != 0:
289 |             Fbeta = (
290 |                 (1 + self.beta**2)
291 |                 * precision
292 |                 * recall
293 |                 / (self.beta**2 * precision + recall)
294 |             )
295 |         else:
296 |             Fbeta = 0
297 | 
298 |         return precision, recall, Fbeta
299 | 


--------------------------------------------------------------------------------
/streamad/meta.yaml:
--------------------------------------------------------------------------------
 1 | {% set name = "streamad" %}
 2 | {% set version = "0.3.0" %}
 3 | 
 4 | package:
 5 |   name: {{ name|lower }}
 6 |   version: {{ version }}
 7 | 
 8 | source:
 9 |   url: https://pypi.io/packages/source/{{ name[0] }}/{{ name }}/streamad-{{ version }}.tar.gz
10 |   sha256: b775e2ca53f296e7a6d3c117b7becc263146b01e57e7326296564931c0b4ef9c
11 | 
12 | build:
13 |   noarch: python
14 |   script: {{ PYTHON }} -m pip install . -vv
15 |   number: 0
16 | 
17 | requirements:
18 |   host:
19 |     - pip
20 |     - python >= 3.7
21 |     - setuptools ==58.2.0
22 |   run:
23 |     - mmh3 >=3.0.0
24 |     - numpy >=1.19
25 |     - pandas >=1.3.0
26 |     - plotly
27 |     - python >=3.7
28 |     - scikit-learn >=1.0.0
29 |     - scipy >=1.7.0
30 | 
31 | test:
32 |   imports:
33 |     - streamad
34 |   commands:
35 |     - pip check
36 |   requires:
37 |     - pip
38 | 
39 | about:
40 |   home: https://github.com/Fengrui-Liu/StreamAD
41 |   summary: An anomaly detection package for data streams.
42 |   license: Apache-2.0
43 |   license_file: LICENSE
44 | 
45 | extra:
46 |   recipe-maintainers:
47 |     - Fengrui-Liu
48 | 


--------------------------------------------------------------------------------
/streamad/model/KNN_Detector.py:
--------------------------------------------------------------------------------
 1 | from collections import deque
 2 | from copy import deepcopy
 3 | 
 4 | import numpy as np
 5 | from scipy.spatial.distance import cdist
 6 | from streamad.base import BaseDetector
 7 | 
 8 | 
 9 | class KNNDetector(BaseDetector):
10 |     def __init__(self, k_neighbor: int = 5, **kwargs):
11 |         """Univariate KNN-CAD model with mahalanobis distance :cite:`DBLP:journals/corr/BurnaevI16`.
12 | 
13 |         Args:
14 |             k_neighbor (int, optional): The number of neighbors to cumulate distances. Defaults to 5.
15 |         """
16 |         super().__init__(data_type="univariate", **kwargs)
17 |         self.window = deque(maxlen=int(np.sqrt(self.window_len)))
18 |         self.buffer = deque(maxlen=self.window_len - self.window.maxlen)
19 | 
20 |         assert (
21 |             k_neighbor < self.buffer.maxlen
22 |         ), "k_neighbor must be less than the length of buffer"
23 | 
24 |         self.k = k_neighbor
25 | 
26 |     def fit(self, X: np.ndarray, timestamp: int = None):
27 | 
28 |         self.window.append(X[0])
29 | 
30 |         if len(self.window) == self.window.maxlen:
31 |             self.buffer.append(deepcopy(self.window))
32 | 
33 |         return self
34 | 
35 |     def score(self, X: np.ndarray, timestamp: int = None) -> float:
36 | 
37 |         window = deepcopy(self.window)
38 |         window.pop()
39 |         window.append(X[0])
40 | 
41 |         try:
42 |             dist = cdist(np.array([window]), self.buffer, metric="mahalanobis")[
43 |                 0
44 |             ]
45 |         except:
46 |             dist = cdist(
47 |                 np.array([window]),
48 |                 self.buffer,
49 |                 metric="mahalanobis",
50 |                 VI=np.linalg.pinv(self.buffer),
51 |             )[0]
52 |         score = np.sum(np.partition(np.array(dist), self.k + 1)[1 : self.k + 1])
53 | 
54 |         return float(score)
55 | 


--------------------------------------------------------------------------------
/streamad/model/Mad_Dectector.py:
--------------------------------------------------------------------------------
 1 | from streamad.base import BaseDetector
 2 | import numpy as np
 3 | from collections import deque
 4 | 
 5 | 
 6 | class MadDetector(BaseDetector):
 7 |     def __init__(self, **kwargs):
 8 |         """Median Absolute Deviation Detector :cite: `InfluxDB:MAD`.
 9 | 
10 |         Args:
11 |             window_len (int, optional): Length of sliding window. Defaults to 50.
12 |             threshold (float, optional): threshold to decide a anomaly data. Defaults to 3.0.
13 | 
14 |         parameters:
15 |             scale_factor : Multiple relationship between standard deviation and absolute median difference under normal distribution.
16 | 
17 |         """
18 |         super().__init__(data_type="univariate", **kwargs)
19 |         self.scale_factor = 1.4826
20 | 
21 |     def fit(self, X: np.ndarray, timestamp: int = None):
22 |         self.window.append(X[0])
23 | 
24 |         return self
25 | 
26 |     def score(self, X: np.ndarray, timestamp: int = None):
27 |         ori_median = np.median(self.window)
28 |         abs_diff = np.abs(self.window - ori_median)
29 |         mad = self.scale_factor * np.median(abs_diff)
30 |         score = np.divide(
31 |             abs_diff[-1], mad, out=np.array(abs_diff[-1] / 1e-5), where=mad != 0
32 |         )
33 | 
34 |         return score
35 | 


--------------------------------------------------------------------------------
/streamad/model/OCSVM_Detector.py:
--------------------------------------------------------------------------------
 1 | from sklearn.svm import OneClassSVM
 2 | import numpy as np
 3 | from streamad.base.detector import BaseDetector
 4 | from collections import deque
 5 | from typing import Literal
 6 | 
 7 | 
 8 | class OCSVMDetector(BaseDetector):
 9 |     def __init__(
10 |         self,
11 |         nu: float = 0.5,
12 |         kernel: Literal[
13 |             "linear", "poly", "rbf", "sigmoid", "precomputed"
14 |         ] = "rbf",
15 |         **kwargs
16 |     ):
17 |         """One-Class SVM Detector :cite:`enwiki:1098733917`.
18 | 
19 |         Args:
20 |             nu (float, optional): An upper bound on the fraction of training errors and a lower bound of the fraction of support vectors. Defaults to 0.5.
21 |             kernel (str, optional): The kernel type to be used in the algorithm. Defaults to "rbf".
22 |         """
23 |         super().__init__(data_type="multivariate", **kwargs)
24 |         self.nu = nu
25 |         self.kernel = kernel
26 |         self.model = None
27 | 
28 |     def fit(self, X: np.ndarray, timestamp: int = None):
29 | 
30 |         self.window.append(X)
31 |         if self.index >= self.window_len:
32 |             self.model = OneClassSVM(
33 |                 gamma="scale", nu=self.nu, kernel=self.kernel
34 |             )
35 |             self.model.fit(list(self.window))
36 | 
37 |         return self
38 | 
39 |     def score(self, X: np.ndarray, timestamp: int = None):
40 | 
41 |         score = self.model.decision_function(X.reshape(1, -1))
42 |         return abs(score)
43 | 


--------------------------------------------------------------------------------
/streamad/model/SArima_Detector.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | import warnings
 3 | 
 4 | import numpy as np
 5 | import statsmodels.api as sm
 6 | from streamad.base.detector import BaseDetector
 7 | 
 8 | warnings.filterwarnings("ignore")
 9 | 
10 | 
11 | class SArimaDetector(BaseDetector):
12 |     def __init__(self, **kwargs):
13 |         """Auto Regressive Integrated Moving Averages Detector :cite:`durbin2012time`
14 | 
15 |         Args:
16 |             window_len (int, optional): Length of sliding window. Defaults to 50.
17 |         """
18 |         super().__init__(data_type="univariate", **kwargs)
19 |         self.best_result = None
20 |         self.best_model = None
21 |         self.best_order = None
22 |         self.best_seasonal_order = None
23 | 
24 |     def _init_fit(self):
25 |         best_aic = float("inf")
26 |         p = d = q = range(0, 2)
27 |         pdq = list(itertools.product(p, d, q))
28 |         seasonal_pdq = [
29 |             (x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))
30 |         ]
31 |         for param in pdq:
32 |             for param_seasonal in seasonal_pdq:
33 |                 model = sm.tsa.statespace.SARIMAX(
34 |                     list(self.window),
35 |                     order=param,
36 |                     seasonal_order=param_seasonal,
37 |                     enforce_stationarity=False,
38 |                     enforce_invertibility=False,
39 |                 )
40 |                 result = model.fit(disp=0)
41 |                 aic = result.aic
42 |                 if aic < best_aic:
43 |                     self.best_model = model
44 |                     best_aic = aic
45 |                     self.best_order = param
46 |                     self.best_seasonal_order = param_seasonal
47 | 
48 |         self.best_result = self.best_model.fit(disp=0)
49 | 
50 |     def fit(self, X: np.ndarray, timestamp: int = None):
51 |         self.window.append(X[0])
52 |         if self.index == self.window_len:
53 |             self._init_fit()
54 | 
55 |         if self.index > self.window_len:
56 |             self.best_result = self.best_result.append(X)
57 | 
58 |         return self
59 | 
60 |     def score(self, X: np.ndarray, timestamp: int = None):
61 |         pred_uc = self.best_result.get_forecast(steps=1)
62 | 
63 |         pred_ci = pred_uc.conf_int()
64 |         pred_mid = (pred_ci[0, 0] + pred_ci[0, 1]) / 2
65 |         pred_range = pred_ci[0, 1] - pred_ci[0, 0]
66 | 
67 |         if pred_ci[0, 0] > X:
68 |             score = abs((X - pred_mid) / pred_range)
69 |             return score
70 |         elif X > pred_ci[0, 1]:
71 |             score = abs((X - pred_mid) / pred_range)
72 |             return score
73 |         else:
74 |             score = 0
75 |             return float(score)
76 | 


--------------------------------------------------------------------------------
/streamad/model/SR_Detector.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from streamad.base import BaseDetector
  3 | from collections import deque
  4 | from copy import deepcopy
  5 | 
  6 | EPS = 1e-8
  7 | 
  8 | 
  9 | class SRDetector(BaseDetector):
 10 |     def __init__(
 11 |         self,
 12 |         extend_len: int = 5,
 13 |         ahead_len: int = 10,
 14 |         mag_num: int = 5,
 15 |         **kwargs
 16 |     ):
 17 |         """Spectral Residual Detector :cite:`DBLP:conf/kdd/RenXWYHKXYTZ19`.
 18 | 
 19 |         Args:
 20 |             window_len (int, optional): Length of sliding window. Defaults to 50.
 21 |             extend_len (int, optional): Length to be extended, for FFT transforme. Defaults to 5.
 22 |             ahead_len (int, optional): Length to look ahead for references. Defaults to 10.
 23 |             mag_num (int, optional): Number of FFT magnitude. Defaults to 5.
 24 |         """
 25 |         super().__init__(data_type="univariate", **kwargs)
 26 |         self.extend_len = extend_len
 27 |         assert ahead_len > 1, "ahead_len must be greater than 1"
 28 |         self.ahead_len = ahead_len
 29 |         self.mag_num = mag_num
 30 | 
 31 |     def fit(self, X: np.ndarray, timestamp: int = None):
 32 |         self.window.append(X[0])
 33 | 
 34 |         return self
 35 | 
 36 |     def score(self, X: np.ndarray, timestamp: int = None) -> float:
 37 |         window = deepcopy(self.window)
 38 | 
 39 |         window.pop()
 40 |         window.append(X[0])
 41 | 
 42 |         extended_window = self._extend_window(window)
 43 | 
 44 |         mags = self._sr_transform(extended_window)
 45 |         anomaly_scores = self._spectral_score(mags)
 46 | 
 47 |         return anomaly_scores[-1 - self.extend_len]
 48 | 
 49 |     def _spectral_score(self, mags):
 50 |         avg_mag = self._average_filter(mags, n=self.mag_num * 10)
 51 |         safeDivisors = np.clip(avg_mag, EPS, avg_mag.max())
 52 | 
 53 |         raw_scores = np.divide(
 54 |             np.abs(mags - avg_mag),
 55 |             safeDivisors,
 56 |             out=np.zeros_like(mags),
 57 |             where=safeDivisors != 0,
 58 |         )
 59 |         scores = np.clip(raw_scores / 10.0, 0, 1.0)
 60 | 
 61 |         return scores
 62 | 
 63 |     def _sr_transform(self, window):
 64 |         trans = np.fft.fft(window)
 65 |         mag = np.sqrt(trans.real**2 + trans.imag**2)
 66 |         eps_index = np.where(mag <= EPS)[0]
 67 |         mag[eps_index] = EPS
 68 | 
 69 |         mag_log = np.log(mag)
 70 |         mag_log[eps_index] = 0
 71 | 
 72 |         spectral = np.exp(
 73 |             mag_log - self._average_filter(mag_log, n=self.mag_num)
 74 |         )
 75 | 
 76 |         trans.real = trans.real * spectral / mag
 77 |         trans.imag = trans.imag * spectral / mag
 78 | 
 79 |         trans.real[eps_index] = 0
 80 |         trans.imag[eps_index] = 0
 81 | 
 82 |         wave_r = np.fft.ifft(trans)
 83 | 
 84 |         mag = np.sqrt(wave_r.real**2, wave_r.imag**2)
 85 | 
 86 |         return mag
 87 | 
 88 |     def _average_filter(self, values, n=3):
 89 |         if n >= len(values):
 90 |             n = len(values)
 91 | 
 92 |         res = np.cumsum(values, dtype=float)
 93 |         res[n:] = res[n:] - res[:-n]
 94 |         res[n:] = res[n:] / n
 95 | 
 96 |         for i in range(1, n):
 97 |             res[i] /= i + 1
 98 | 
 99 |         return res
100 | 
101 |     def _extend_window(self, window):
102 |         predicted_window = [
103 |             self._predict_next(list(window)[-self.ahead_len : -1])
104 |         ] * self.extend_len
105 | 
106 |         extended_window = np.concatenate((window, predicted_window), axis=0)
107 | 
108 |         return extended_window
109 | 
110 |     def _predict_next(self, ahead_window):
111 |         assert (
112 |             len(ahead_window) > 1
113 |         ), "ahead window must have at least 2 elements"
114 | 
115 |         ele_last = ahead_window[-1]
116 |         n = len(ahead_window)
117 | 
118 |         slopes = [
119 |             (ele_last - ele) / (n - 1 - i)
120 |             for i, ele in enumerate(ahead_window[:-1])
121 |         ]
122 | 
123 |         return ahead_window[1] + sum(slopes)
124 | 


--------------------------------------------------------------------------------
/streamad/model/__init__.py:
--------------------------------------------------------------------------------
 1 | from .KNN_Detector import KNNDetector
 2 | from .xStream_Detector import xStreamDetector
 3 | from .spot_Detector import SpotDetector
 4 | from .rshash_Detector import RShashDetector
 5 | from .random_Detector import RandomDetector
 6 | from .SR_Detector import SRDetector
 7 | from .rrcf_Detector import RrcfDetector
 8 | from .hstree_Detector import HSTreeDetector
 9 | from .zscore_Detector import ZScoreDetector
10 | from .loda_Detector import LodaDetector
11 | from .OCSVM_Detector import OCSVMDetector
12 | from .Mad_Dectector import MadDetector
13 | from .SArima_Detector import SArimaDetector
14 | from .zspot_Detector import ZSpotDetector
15 | 
16 | __all__ = [
17 |     "KNNDetector",
18 |     "xStreamDetector",
19 |     "SpotDetector",
20 |     "RandomDetector",
21 |     "RShashDetector",
22 |     "SRDetector",
23 |     "RrcfDetector",
24 |     "HSTreeDetector",
25 |     "ZScoreDetector",
26 |     "LodaDetector",
27 |     "OCSVMDetector",
28 |     "MadDetector",
29 |     "SArimaDetector",
30 |     "ZSpotDetector"
31 | ]
32 | 


--------------------------------------------------------------------------------
/streamad/model/hstree_Detector.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from streamad.base import BaseDetector
  3 | from streamad.util import StreamStatistic
  4 | 
  5 | 
  6 | class Leaf:
  7 |     def __init__(
  8 |         self,
  9 |         left=None,
 10 |         right=None,
 11 |         depth=0,
 12 |     ):
 13 |         self.left = left
 14 |         self.right = right
 15 |         self.r = 0
 16 |         self.l = 0
 17 |         self.split_attrib = 0
 18 |         self.split_value = 0.0
 19 |         self.k = depth
 20 | 
 21 | 
 22 | class HSTreeDetector(BaseDetector):
 23 |     def __init__(self, tree_height: int = 10, tree_num: int = 20, **kwargs):
 24 |         """Half space tree detectors. :cite:`DBLP:conf/ijcai/TanTL11`.
 25 | 
 26 |         Args:
 27 |             tree_height (int, optional): Height of a half space tree. Defaults to 10.
 28 |             tree_num (int, optional): Totla number of the trees. Defaults to 20.
 29 |         """
 30 |         super().__init__(data_type="multivariate", **kwargs)
 31 |         self.tree_height = tree_height
 32 |         self.tree_num = tree_num
 33 |         self.forest = []
 34 |         self.data_stats = StreamStatistic()
 35 | 
 36 |         self.dimensions = None
 37 | 
 38 |     def _generate_max_min(self):
 39 |         max_arr = np.zeros(self.dimensions)
 40 |         min_arr = np.zeros(self.dimensions)
 41 |         for q in range(self.dimensions):
 42 |             s_q = np.random.random_sample()
 43 |             max_value = max(s_q, 1 - s_q)
 44 |             max_arr[q] = s_q + max_value
 45 |             min_arr[q] = s_q - max_value
 46 | 
 47 |         return max_arr, min_arr
 48 | 
 49 |     def _init_a_tree(self, max_arr, min_arr, k):
 50 |         if k == self.tree_height:
 51 |             return Leaf(depth=k)
 52 | 
 53 |         leaf = Leaf()
 54 |         q = np.random.randint(self.dimensions)
 55 |         p = (max_arr[q] + min_arr[q]) / 2.0
 56 |         temp = max_arr[q]
 57 |         max_arr[q] = p
 58 |         leaf.left = self._init_a_tree(max_arr, min_arr, k + 1)
 59 |         max_arr[q] = temp
 60 |         min_arr[q] = p
 61 |         leaf.right = self._init_a_tree(max_arr, min_arr, k + 1)
 62 |         leaf.split_attrib = q
 63 |         leaf.split_value = p
 64 |         leaf.k = k
 65 |         return leaf
 66 | 
 67 |     def _update_tree_mass(self, tree, X, is_ref_window):
 68 |         if tree:
 69 |             if tree.k != 0:
 70 |                 if is_ref_window:
 71 |                     tree.r += 1
 72 | 
 73 |                 tree.l += 1
 74 |             if X[tree.split_attrib] > tree.split_value:
 75 |                 tree_new = tree.right
 76 |             else:
 77 |                 tree_new = tree.left
 78 |             self._update_tree_mass(tree_new, X, is_ref_window)
 79 | 
 80 |     def _reset_tree(self, tree):
 81 |         if tree:
 82 |             tree.r = tree.l
 83 |             tree.l = 0
 84 |             self._reset_tree(tree.left)
 85 |             self._reset_tree(tree.right)
 86 | 
 87 |     def fit(self, X: np.ndarray, timestamp: int = None):
 88 |         self.data_stats.update(X)
 89 | 
 90 |         X_normalized = np.divide(
 91 |             X - self.data_stats.get_min(),
 92 |             self.data_stats.get_max() - self.data_stats.get_min(),
 93 |             out=np.zeros_like(X, dtype=float),
 94 |             where=self.data_stats.get_max() - self.data_stats.get_min() != 0,
 95 |             dtype=float,
 96 |         )
 97 |         X_normalized[np.abs(X_normalized) == np.inf] = 0
 98 | 
 99 |         if self.dimensions is None:
100 |             self.dimensions = len(X)
101 |             for _ in range(self.tree_num):
102 |                 max_arr, min_arr = self._generate_max_min()
103 |                 tree = self._init_a_tree(max_arr, min_arr, 0)
104 |                 self.forest.append(tree)
105 | 
106 |         if self.index < self.window_len:
107 |             for tree in self.forest:
108 |                 self._update_tree_mass(tree, X_normalized, True)
109 |         else:
110 |             if self.index % self.window_len == 0:
111 |                 for tree in self.forest:
112 |                     self._reset_tree(tree)
113 | 
114 |             for tree in self.forest:
115 |                 self._update_tree_mass(tree, X_normalized, False)
116 | 
117 |         return self
118 | 
119 |     def score(self, X: np.ndarray, timestamp: int = None) -> float:
120 |         score = 0.0
121 | 
122 |         X_normalized = np.divide(
123 |             X - self.data_stats.get_min(),
124 |             self.data_stats.get_max() - self.data_stats.get_min(),
125 |             out=np.zeros_like(X, dtype=float),
126 |             where=self.data_stats.get_max() - self.data_stats.get_min() != 0,
127 |         )
128 |         X_normalized[np.abs(X_normalized) == np.inf] = 0
129 | 
130 |         for tree in self.forest:
131 |             score += self._score_tree(tree, X_normalized, 0)
132 | 
133 |         score = score / self.tree_num
134 | 
135 |         return float(score)
136 | 
137 |     def _score_tree(self, tree, X, k):
138 |         s = 0
139 |         if not tree:
140 |             return s
141 | 
142 |         s += tree.r * (2**k)
143 | 
144 |         if X[tree.split_attrib] > tree.split_value:
145 |             tree_new = tree.right
146 |         else:
147 |             tree_new = tree.left
148 | 
149 |         s += self._score_tree(tree_new, X, k + 1)
150 | 
151 |         return s
152 | 


--------------------------------------------------------------------------------
/streamad/model/loda_Detector.py:
--------------------------------------------------------------------------------
  1 | from collections import deque
  2 | 
  3 | import numpy as np
  4 | from streamad.base import BaseDetector
  5 | from fast_histogram import histogram1d
  6 | 
  7 | 
  8 | class LodaDetector(BaseDetector):
  9 |     def __init__(self, random_cuts_num: int = 10, **kwargs):
 10 |         """Multivariate LODA Detector :cite:`DBLP:journals/ml/Pevny16`.
 11 | 
 12 |         Args:
 13 |             window_len (int, optional): The length of window. Defaults to 50.
 14 |             random_cuts_num (int, optional): The number of random experiments. Defaults to 10.
 15 |         """
 16 |         super().__init__(data_type="multivariate", **kwargs)
 17 | 
 18 |         self.random_cuts_num = random_cuts_num
 19 |         self.bins_num = int(
 20 |             1 * (self.window_len**1) * (np.log(self.window_len) ** -1)
 21 |         )
 22 |         self._weights = np.ones(random_cuts_num) / random_cuts_num
 23 |         self.components_num = None
 24 |         self.nonzero_components_num = None
 25 |         self.zero_components_num = None
 26 |         self._projections = None
 27 |         self._histograms = None
 28 |         self._limits = None
 29 | 
 30 |     def fit(self, X: np.ndarray, timestamp: int = None):
 31 |         self.window.append(X)
 32 |         if self.index == 0:
 33 |             self.components_num = len(X)
 34 |             self.nonzero_components_num = int(np.sqrt(self.components_num))
 35 |             self.zero_components_num = (
 36 |                 self.components_num - self.nonzero_components_num
 37 |             )
 38 | 
 39 |         elif len(self.window) == self.window.maxlen:
 40 |             self._projections = np.random.randn(
 41 |                 self.random_cuts_num, self.components_num
 42 |             )
 43 |             self._histograms = np.zeros([self.random_cuts_num, self.bins_num])
 44 |             self._limits = np.zeros([self.random_cuts_num, self.bins_num + 1])
 45 | 
 46 |             for i in range(self.random_cuts_num):
 47 |                 rands = np.random.permutation(self.components_num)[
 48 |                     : self.zero_components_num
 49 |                 ]
 50 |                 self._projections[i, rands] = 0.0
 51 |                 projected_data = self._projections[i, :].dot(
 52 |                     np.array(self.window).T
 53 |                 )
 54 | 
 55 |                 try:
 56 |                     self._histograms[i, :] = (
 57 |                         histogram1d(
 58 |                             projected_data,
 59 |                             range=(
 60 |                                 projected_data.min(),
 61 |                                 projected_data.max() + 1e-12,
 62 |                             ),
 63 |                             bins=self.bins_num,
 64 |                         )
 65 |                         + 1e-12
 66 |                     )
 67 |                 except:
 68 |                     self._histograms[i, :] = (
 69 |                         histogram1d(
 70 |                             projected_data,
 71 |                             range=(
 72 |                                 projected_data.min(),
 73 |                                 projected_data.max() + 1e-5,
 74 |                             ),
 75 |                             bins=self.bins_num,
 76 |                         )
 77 |                         + 1e-12
 78 |                     )
 79 |                 self._limits[i, :] = np.linspace(
 80 |                     projected_data.min(),
 81 |                     projected_data.max() + 1e-12,
 82 |                     num=self.bins_num + 1,
 83 |                 )
 84 | 
 85 |                 self._histograms[i, :] /= np.sum(self._histograms[i, :])
 86 | 
 87 |         return self
 88 | 
 89 |     def score(self, X: np.ndarray, timestamp: int = None):
 90 |         score = 0
 91 | 
 92 |         for i in range(self.random_cuts_num):
 93 |             projected_data = self._projections[i, :].dot(np.array(X).T)
 94 |             inds = np.searchsorted(
 95 |                 self._limits[i, : self.bins_num - 1],
 96 |                 projected_data,
 97 |                 side="left",
 98 |             )
 99 |             score += -self._weights[i] * np.log(self._histograms[i, inds])
100 | 
101 |         score = score / self.random_cuts_num
102 |         return float(score)
103 | 
104 | 
105 | if __name__ == "__main__":
106 |     import cProfile
107 |     import resource
108 | 
109 |     # from line_profiler import LineProfiler
110 | 
111 |     # lp = LineProfiler()
112 | 
113 |     model = LodaDetector()
114 | 
115 |     # lp.add_function(model.fit)
116 |     # lp.add_function(model.score)
117 |     # lp_wrapper = lp(model.fit_score)
118 |     import sys
119 | 
120 |     for i in range(1500):
121 |         # lp_wrapper(np.array([i]))
122 |         model.fit_score(np.array([i * 10]))
123 | 
124 |         r = sys.getsizeof(model)
125 |         # r = resource.getrusage(resource.RUSAGE_CHILDREN).ru_maxrss
126 |         print(r)
127 | 
128 |     # lp.print_stats()
129 | 


--------------------------------------------------------------------------------
/streamad/model/random_Detector.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | import numpy as np
 4 | from streamad.base import BaseDetector
 5 | 
 6 | 
 7 | class RandomDetector(BaseDetector):
 8 |     """Return random anomaly score. A minimum score for benchmark."""
 9 | 
10 |     def __init__(self, **kwargs):
11 |         super().__init__(data_type="multivariate", **kwargs)
12 | 
13 |     def fit(self, X: np.ndarray, timestamp: int = None):
14 |         return self
15 | 
16 |     def score(self, X: np.ndarray, timestamp: int = None):
17 | 
18 |         return random.random()
19 | 


--------------------------------------------------------------------------------
/streamad/model/rrcf_Detector.py:
--------------------------------------------------------------------------------
 1 | from collections import deque
 2 | 
 3 | import numpy as np
 4 | import rrcf
 5 | from streamad.base import BaseDetector
 6 | from copy import deepcopy
 7 | 
 8 | 
 9 | class RrcfDetector(BaseDetector):
10 |     def __init__(self, num_trees=10, tree_size=12, **kwargs):
11 |         """Rrcf detector :cite:`DBLP:conf/icml/GuhaMRS16`.
12 | 
13 |         Args:
14 |             window_len (int, optional): Length of sliding window. Defaults to 50.
15 |             num_trees (int, optional): Number of trees. Defaults to 10.
16 |             tree_size (int, optional): Size of each tree. Defaults to 12.
17 |         """
18 | 
19 |         super().__init__(data_type="multivariate", **kwargs)
20 |         self.num_trees = num_trees
21 |         self.tree_size = tree_size
22 |         self.forest = []
23 |         for _ in range(num_trees):
24 |             tree = rrcf.RCTree()
25 |             self.forest.append(tree)
26 |         self.avg_codisp = {}
27 | 
28 |         self.shingle = deque(maxlen=int(np.sqrt(self.window_len)))
29 | 
30 |     def fit(self, X: np.ndarray, timestamp: int = None):
31 |         self.shingle.append(X)
32 | 
33 |         if not self.forest[0].ndim:
34 |             dim = X.shape[0]
35 |             for tree in self.forest:
36 |                 tree.ndim = dim
37 | 
38 |         if self.shingle.maxlen == len(self.shingle):
39 |             if self.index > (self.shingle.maxlen + self.tree_size):
40 |                 list(
41 |                     map(
42 |                         lambda x: x.forget_point(self.index - self.tree_size),
43 |                         self.forest,
44 |                     )
45 |                 )
46 | 
47 |             list(
48 |                 map(
49 |                     lambda x: x.insert_point(self.shingle, self.index),
50 |                     self.forest,
51 |                 )
52 |             )
53 | 
54 |         return self
55 | 
56 |     def score(self, X: np.ndarray, timestamp: int = None):
57 |         score_list = list(map(lambda x: x.codisp(self.index), self.forest))
58 | 
59 |         score = sum(score_list) / self.num_trees
60 | 
61 |         return float(score)
62 | 
63 | 
64 | if __name__ == "__main__":
65 |     import cProfile
66 |     from line_profiler import LineProfiler
67 | 
68 |     lp = LineProfiler()
69 | 
70 |     model = RrcfDetector()
71 | 
72 |     # lp.add_function(_Chain.fit)
73 |     # lp.add_function(_Chain.score)
74 |     # lp.add_function(_Chain.bincount)
75 |     lp.add_function(model.fit)
76 |     lp.add_function(model.score)
77 |     lp_wrapper = lp(model.fit_score)
78 | 
79 |     for i in range(1500):
80 |         lp_wrapper(np.array([i]))
81 |         # model.fit_score(np.array([i]))
82 | 
83 |     lp.print_stats()
84 | 


--------------------------------------------------------------------------------
/streamad/model/rshash_Detector.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from streamad.base import BaseDetector
  3 | from streamad.util import StreamStatistic
  4 | from collections import deque
  5 | 
  6 | 
  7 | class RShashDetector(BaseDetector):
  8 |     def __init__(
  9 |         self, decay=0.015, components_num=10, hash_num: int = 10, **kwargs
 10 |     ):
 11 |         """Multivariate RSHashDetector :cite:`DBLP:conf/icdm/SatheA16`.
 12 | 
 13 |         Args:
 14 |             window_len (int, optional): Length of data to burn in/init. Defaults to 50.
 15 |             decay (float, optional): Decay ratio. Defaults to 0.015.
 16 |             components_num (int, optional): Number of components. Defaults to 10.
 17 |             hash_num (int, optional): Number of hash functions. Defaults to 10.
 18 |         """
 19 |         super().__init__(data_type="multivariate", **kwargs)
 20 | 
 21 |         self.decay = decay
 22 |         self.data_stats = StreamStatistic()
 23 | 
 24 |         self.hash_num = hash_num
 25 |         self.components_num = components_num
 26 |         self.cmsketches = [{} for _ in range(hash_num)]
 27 | 
 28 |         self.alpha = None
 29 | 
 30 |         self.effective_s = max(1000, 1.0 / (1 - np.power(2, -self.decay)))
 31 |         self.f = np.random.uniform(
 32 |             low=1.0 / np.sqrt(self.effective_s),
 33 |             high=1 - (1.0 / np.sqrt(self.effective_s)),
 34 |             size=self.components_num,
 35 |         )
 36 | 
 37 |     def _burn_in(self):
 38 |         # Normalized the init data
 39 |         buffer = np.array(self.window)
 40 |         buffer_normalized = np.divide(
 41 |             buffer - self.data_stats.get_min(),
 42 |             self.data_stats.get_max() - self.data_stats.get_min(),
 43 |             out=np.zeros_like(buffer).astype(float),
 44 |             where=self.data_stats.get_max() - self.data_stats.get_min() != 0,
 45 |         )
 46 |         buffer_normalized[np.abs(buffer_normalized) == np.inf] = 0
 47 | 
 48 |         for r in range(self.components_num):
 49 |             for i in range(buffer.shape[0]):
 50 |                 Y = np.floor(
 51 |                     (buffer_normalized[i, :] + np.array(self.alpha[r]))
 52 |                     / self.f[r]
 53 |                 )
 54 | 
 55 |                 # mod_entry = np.insert(Y, 0, r)
 56 |                 mod_entry = np.concatenate(([r], Y))
 57 |                 mod_entry = tuple(mod_entry.astype(int))
 58 | 
 59 |                 for w in range(self.hash_num):
 60 |                     try:
 61 |                         value = self.cmsketches[w][mod_entry]
 62 |                     except KeyError:
 63 |                         value = (0, 0)
 64 | 
 65 |                     value = (0, value[1] + 1)
 66 |                     self.cmsketches[w][mod_entry] = value
 67 | 
 68 |     def fit(self, X: np.ndarray, timestamp: int = None):
 69 |         if self.index == 0:
 70 |             self.alpha = [
 71 |                 np.random.uniform(low=0, high=self.f[r], size=len(X))
 72 |                 for r in range(self.components_num)
 73 |             ]
 74 | 
 75 |         self.data_stats.update(X)
 76 | 
 77 |         if self.index == self.window.maxlen - 1:
 78 |             self._burn_in()
 79 | 
 80 |         if len(self.window) < self.window.maxlen:
 81 |             self.window.append(X)
 82 |             return self
 83 | 
 84 |         return self
 85 | 
 86 |     def score(self, X: np.ndarray, timestamp: int = None) -> float:
 87 |         X_normalized = np.divide(
 88 |             X - self.data_stats.get_min(),
 89 |             self.data_stats.get_max() - self.data_stats.get_min(),
 90 |             out=np.zeros_like(X).astype(float),
 91 |             where=self.data_stats.get_max() - self.data_stats.get_min() != 0,
 92 |         )
 93 |         X_normalized[np.abs(X_normalized) == np.inf] = 0
 94 | 
 95 |         score_instance = 0
 96 | 
 97 |         for r in range(self.components_num):
 98 |             Y = np.floor((X_normalized + np.array(self.alpha[r])) / self.f[r])
 99 |             # mod_entry = np.insert(Y, 0, r)
100 |             mod_entry = np.concatenate(([r], Y))
101 |             mod_entry = tuple(mod_entry.astype(int))
102 | 
103 |             c = []
104 | 
105 |             for w in range(len(self.cmsketches)):
106 |                 try:
107 |                     value = self.cmsketches[w][mod_entry]
108 |                 except KeyError:
109 |                     value = (self.index, 0)
110 | 
111 |                 tstamp = value[0]
112 |                 wt = value[1]
113 |                 new_wt = wt * np.power(2, -self.decay * (self.index - tstamp))
114 |                 c.append(new_wt)
115 |                 new_tstamp = self.index
116 |                 self.cmsketches[w][mod_entry] = (new_tstamp, new_wt + 1)
117 | 
118 |             min_c = min(c)
119 |             c = np.log(1 + min_c)
120 |             score_instance += c
121 | 
122 |         score = score_instance / self.components_num
123 | 
124 |         return float(score)
125 | 


--------------------------------------------------------------------------------
/streamad/model/spot_Detector.py:
--------------------------------------------------------------------------------
  1 | from streamad.base import BaseDetector
  2 | import numpy as np
  3 | from math import log
  4 | from scipy.optimize import minimize
  5 | from collections import deque
  6 | import heapq
  7 | 
  8 | np.seterr(divide="ignore", invalid="ignore")
  9 | 
 10 | 
 11 | class SpotDetector(BaseDetector):
 12 |     def __init__(
 13 |         self,
 14 |         prob: float = 1e-4,
 15 |         back_mean_len: int = 20,
 16 |         num_threshold_up: int = 20,
 17 |         num_threshold_down: int = 20,
 18 |         deviance_ratio: float = 0.01,
 19 |         global_memory: bool = True,
 20 |         **kwargs
 21 |     ):
 22 |         """Univariate Spot model :cite:`DBLP:conf/kdd/SifferFTL17`.
 23 | 
 24 |         Args:
 25 |             prob (float, optional): Threshold for the probability of anomalies, a small float value. Defaults to 1e-4.. Defaults to 1e-4.
 26 |             back_mean_len (int, optional): The length of backward window to calculate the first-order difference. Defaults to 20.
 27 |             num_threshold_up (int, optional): Number of peaks over upper threshold to estimate distribution. Defaults to 20.
 28 |             num_threshold_down (int, optional): Number of peaks over lower threshold to estimate distribution. Defaults to 20.
 29 |             deviance_ratio (float, optional): Deviance ratio aginest the absolute value of data, which is useful when the value is very large and deviances are small. Defaults to 0.01.
 30 |             window_len (int, optional): Length of the window for reference. Defaults to 200.
 31 |         """
 32 | 
 33 |         super().__init__(data_type="univariate", **kwargs)
 34 | 
 35 |         self.prob = prob
 36 |         self.deviance_ratio = deviance_ratio
 37 |         self.global_memory = global_memory
 38 |         # self.window = deque(maxlen=self.window_len)
 39 |         self.back_mean_len = back_mean_len
 40 |         self.back_mean_window = deque(maxlen=self.back_mean_len)
 41 |         # self.window_len = self.window_len - self.back_mean_len
 42 |         assert (
 43 |             self.window_len > 0
 44 |         ), "window_len is too small, default value is 200"
 45 | 
 46 |         self.num_threshold = {
 47 |             "up": num_threshold_up,
 48 |             "down": num_threshold_down,
 49 |         }
 50 | 
 51 |         nonedict = {"up": None, "down": None}
 52 | 
 53 |         self.extreme_quantile = dict.copy(nonedict)
 54 |         self.init_threshold = dict.copy(nonedict)
 55 |         self.peaks = dict.copy(nonedict)
 56 |         self.history_peaks = {"up": [], "down": []}
 57 |         # self.peaks = {'up':deque(maxlen=20),'down':deque(maxlen=20)}
 58 |         self.gamma = dict.copy(nonedict)
 59 |         self.sigma = dict.copy(nonedict)
 60 |         self.normal_X = None
 61 | 
 62 |         # self.thup = []
 63 |         # self.thdown = []
 64 | 
 65 |     def _grimshaw(self, side, epsilon=1e-8, n_points=10):
 66 |         def u(s):
 67 |             return 1 + np.log(s).mean()
 68 | 
 69 |         def v(s):
 70 |             return np.mean(1 / s)
 71 | 
 72 |         def w(Y, t):
 73 |             s = 1 + t * Y
 74 |             us = u(s)
 75 |             vs = v(s)
 76 |             return us * vs - 1
 77 | 
 78 |         def jac_w(Y, t):
 79 |             s = 1 + t * Y
 80 |             us = u(s)
 81 |             vs = v(s)
 82 |             jac_us = np.divide(
 83 |                 1, t, out=np.array(1 / epsilon), where=t != 0
 84 |             ) * (1 - vs)
 85 |             jac_vs = np.divide(
 86 |                 1, t, out=np.array(1 / epsilon), where=t != 0
 87 |             ) * (-vs + np.mean(1 / s**2))
 88 |             return us * jac_vs + vs * jac_us
 89 | 
 90 |         self.peaks[side][self.peaks[side] == 0] = epsilon
 91 |         Ym = self.peaks[side].min()
 92 |         YM = self.peaks[side].max()
 93 |         Ymean = self.peaks[side].mean()
 94 | 
 95 |         a = np.divide(-1, YM, out=np.array(-epsilon), where=YM != 0)
 96 |         if abs(a) < 2 * epsilon:
 97 |             epsilon = abs(a) / n_points
 98 | 
 99 |         # a = a + epsilon
100 |         b = 2 * np.divide(
101 |             (Ymean - Ym),
102 |             (Ymean * Ym),
103 |             out=np.array((Ymean - Ym) / epsilon - epsilon),
104 |             where=(Ymean * Ym) != 0,
105 |         )
106 |         c = 2 * np.divide(
107 |             Ymean - Ym,
108 |             Ym**2,
109 |             out=np.array((Ymean - Ym) / epsilon + epsilon),
110 |             where=Ym != 0,
111 |         )
112 | 
113 |         d = a + epsilon
114 |         e = -epsilon
115 | 
116 |         left_zeros = self._rootsFinder(
117 |             lambda t: w(self.peaks[side], t),
118 |             lambda t: jac_w(self.peaks[side], t),
119 |             (d, e) if d < e else (e, d),
120 |             n_points,
121 |             "regular",
122 |         )
123 | 
124 |         right_zeros = self._rootsFinder(
125 |             lambda t: w(self.peaks[side], t),
126 |             lambda t: jac_w(self.peaks[side], t),
127 |             (b, c) if b < c else (c, b),
128 |             n_points,
129 |             "regular",
130 |         )
131 | 
132 |         # all the possible roots
133 |         zeros = np.concatenate((left_zeros, right_zeros))
134 | 
135 |         # 0 is always a solution so we initialize with it
136 |         gamma_best = 0
137 |         sigma_best = Ymean
138 |         ll_best = self._log_likelihood(self.peaks[side], gamma_best, sigma_best)
139 | 
140 |         # we look for better candidates
141 |         for z in zeros:
142 |             gamma = u(1 + z * self.peaks[side]) - 1
143 |             sigma = np.divide(
144 |                 gamma, z, out=np.array(gamma / epsilon), where=z != 0
145 |             )
146 |             ll = self._log_likelihood(self.peaks[side], gamma, sigma)
147 |             if ll > ll_best:
148 |                 gamma_best = gamma
149 |                 sigma_best = sigma
150 |                 ll_best = ll
151 | 
152 |         return gamma_best, sigma_best, ll_best
153 | 
154 |     def _rootsFinder(self, fun, jac, bounds, npoints, method):
155 |         """
156 |         Find possible roots of a scalar function
157 | 
158 |         Parameters
159 |         ----------
160 |         fun : function
161 |                     scalar function
162 |         jac : function
163 |             first order derivative of the function
164 |         bounds : tuple
165 |             (min,max) interval for the roots search
166 |         npoints : int
167 |             maximum number of roots to output
168 |         method : str
169 |             'regular' : regular sample of the search interval, 'random' : uniform (distribution) sample of the search interval
170 | 
171 |         Returns
172 |         ----------
173 |         numpy.array
174 |             possible roots of the function
175 |         """
176 |         if method == "regular":
177 |             step = (bounds[1] - bounds[0]) / (npoints + 1)
178 |             try:
179 |                 X0 = np.arange(bounds[0] + step, bounds[1], step)
180 |             except:
181 |                 X0 = np.random.uniform(bounds[0], bounds[1], npoints)
182 |         elif method == "random":
183 |             X0 = np.random.uniform(bounds[0], bounds[1], npoints)
184 | 
185 |         def objFun(X, f, jac):
186 |             g = 0
187 |             j = np.zeros(X.shape)
188 |             i = 0
189 |             for x in X:
190 |                 fx = f(x)
191 |                 g = g + fx**2
192 |                 j[i] = 2 * fx * jac(x)
193 |                 i = i + 1
194 |             return g, j
195 | 
196 |         opt = minimize(
197 |             lambda X: objFun(X, fun, jac),
198 |             X0,
199 |             method="L-BFGS-B",
200 |             jac=True,
201 |             bounds=[bounds] * len(X0),
202 |         )
203 | 
204 |         X = opt.x
205 |         np.round(X, decimals=5)
206 |         return np.unique(X)
207 | 
208 |     def _log_likelihood(self, Y, gamma, sigma):
209 |         """
210 |         Compute the log-likelihood for the Generalized Pareto Distribution (μ=0)
211 | 
212 |         Parameters
213 |         ----------
214 |         Y : numpy.array
215 |                     observations
216 |         gamma : float
217 |             GPD index parameter
218 |         sigma : float
219 |             GPD scale parameter (>0)
220 | 
221 |         Returns
222 |         ----------
223 |         float
224 |             log-likelihood of the sample Y to be drawn from a GPD(γ,σ,μ=0)
225 |         """
226 |         n = Y.size
227 |         if gamma != 0:
228 |             tau = gamma / sigma
229 |             L = (
230 |                 -n * log(sigma)
231 |                 - (1 + (1 / gamma)) * (np.log(1 + tau * Y)).sum()
232 |             )
233 |         else:
234 |             L = n * (1 + log(abs(Y.mean()) + 1e-8))
235 | 
236 |         return L
237 | 
238 |     def _quantile(self, side, gamma, sigma):
239 |         if side == "up":
240 |             r = self.window_len * self.prob / self.num_threshold[side]
241 |             # r = 1000 * self.prob
242 | 
243 |             if gamma != 0:
244 |                 return self.init_threshold["up"] + (sigma / gamma) * (
245 |                     pow(r, -gamma) - 1
246 |                 )
247 |             else:
248 |                 return self.init_threshold["up"] - sigma * log(r)
249 |         elif side == "down":
250 |             r = self.window_len * self.prob / self.num_threshold[side]
251 |             # r = 1000 * self.prob
252 | 
253 |             if gamma != 0:
254 |                 return self.init_threshold["down"] - (sigma / gamma) * (
255 |                     pow(r, -gamma) - 1
256 |                 )
257 |             else:
258 |                 return self.init_threshold["down"] + sigma * log(r)
259 |         else:
260 |             raise ValueError("The side is not right")
261 | 
262 |     def _init_drift(self, verbose=False):
263 |         for side in ["up", "down"]:
264 |             self._update_one_side(side)
265 | 
266 |         return self
267 | 
268 |     def _update_one_side(self, side: str):
269 |         if side == "up":
270 |             candidates = (
271 |                 list(self.window) + self.history_peaks[side]
272 |                 if self.global_memory
273 |                 else list(self.window)
274 |             )
275 | 
276 |             self.history_peaks[side] = heapq.nlargest(
277 |                 self.num_threshold[side],
278 |                 candidates,
279 |             )
280 |             self.init_threshold[side] = self.history_peaks[side][-1]
281 |             self.peaks[side] = np.array(self.history_peaks[side]) - np.array(
282 |                 self.init_threshold[side]
283 |             )
284 |         elif side == "down":
285 |             candidates = (
286 |                 list(self.window) + self.history_peaks[side]
287 |                 if self.global_memory
288 |                 else list(self.window)
289 |             )
290 | 
291 |             self.history_peaks[side] = heapq.nsmallest(
292 |                 self.num_threshold[side],
293 |                 candidates,
294 |             )
295 |             self.init_threshold[side] = self.history_peaks[side][-1]
296 |             self.peaks[side] = np.array(self.init_threshold[side]) - np.array(
297 |                 self.history_peaks[side]
298 |             )
299 | 
300 |         # remove the largest incase the first anomaly change the threshold
301 |         # self.peaks[side] = self.peaks[side][1:]
302 |         gamma, sigma, _ = self._grimshaw(side)
303 |         self.extreme_quantile[side] = self._quantile(side, gamma, sigma)
304 |         self.gamma[side] = gamma
305 |         self.sigma[side] = sigma
306 | 
307 |     def _cal_back_mean(self, X):
308 |         back_mean = (
309 |             np.mean(self.back_mean_window)
310 |             if self.back_mean_window.maxlen > 0
311 |             else np.array(0.0)
312 |         )
313 | 
314 |         return X - back_mean
315 | 
316 |     def fit(self, X: np.ndarray, timestamp: int = None):
317 |         X = float(X[0])
318 | 
319 |         self.back_mean_window.append(X)
320 | 
321 |         if self.index >= self.back_mean_len:
322 |             self.normal_X = self._cal_back_mean(X)
323 |             self.window.append(self.normal_X)
324 | 
325 |         if self.index == self.window_len:
326 |             self._init_drift()
327 | 
328 |         if self.index >= self.window_len:
329 |             last_X = (
330 |                 self.window[-2]
331 |                 if self.back_mean_len == 0
332 |                 else (X - self.window[-1])
333 |             )
334 | 
335 |             if (
336 |                 abs(
337 |                     np.divide(
338 |                         X - last_X, last_X, np.array(X), where=last_X != 0
339 |                     )
340 |                 )
341 |                 < self.deviance_ratio
342 |             ):
343 |                 return self
344 | 
345 |             if self.normal_X > self.init_threshold["up"]:
346 |                 self._update_one_side("up")
347 | 
348 |             elif self.normal_X < self.init_threshold["down"]:
349 |                 self._update_one_side("down")
350 | 
351 |         return self
352 | 
353 |     def score(self, X: np.ndarray, timestamp: int = None):
354 |         X = float(X[0])
355 | 
356 |         # if self.score_first:
357 |         #     last_X = self._cal_back_mean(X)
358 |         # else:
359 |         last_X = (
360 |             self.window[-2]
361 |             if self.back_mean_len == 0
362 |             else (X - self.window[-1])
363 |         )
364 | 
365 |         if (
366 |             abs(np.divide(X - last_X, last_X, np.array(X), where=last_X != 0))
367 |             < self.deviance_ratio
368 |         ):
369 |             score = 0.0
370 | 
371 |         elif (
372 |             self.normal_X > self.extreme_quantile["up"]
373 |             or self.normal_X < self.extreme_quantile["down"]
374 |         ):
375 |             score = 1.0
376 | 
377 |         elif self.normal_X > self.init_threshold["up"]:
378 |             side = "up"
379 |             score = np.divide(
380 |                 self.normal_X - self.init_threshold[side],
381 |                 (self.extreme_quantile[side] - self.init_threshold[side]),
382 |                 np.array(0.5),
383 |                 where=(
384 |                     self.extreme_quantile[side] - self.init_threshold[side] != 0
385 |                 ),
386 |             )
387 | 
388 |         elif self.normal_X < self.init_threshold["down"]:
389 |             side = "down"
390 |             score = np.divide(
391 |                 self.init_threshold[side] - self.normal_X,
392 |                 (self.init_threshold[side] - self.extreme_quantile[side]),
393 |                 np.array(0.5),
394 |                 where=(
395 |                     self.init_threshold[side] - self.extreme_quantile[side] != 0
396 |                 ),
397 |             )
398 |         else:
399 |             score = 0.0
400 | 
401 |         # self.thup.append(self.extreme_quantile["up"] + hist_mean)
402 |         # self.thdown.append(self.extreme_quantile["down"] + hist_mean)
403 | 
404 |         return float(score)
405 | 


--------------------------------------------------------------------------------
/streamad/model/xStream_Detector.py:
--------------------------------------------------------------------------------
  1 | from streamad.base import BaseDetector
  2 | import numpy as np
  3 | import mmh3
  4 | from math import floor
  5 | 
  6 | 
  7 | class xStreamDetector(BaseDetector):
  8 |     def __init__(
  9 |         self,
 10 |         n_components: int = 8,
 11 |         n_chains: int = 8,
 12 |         depth: int = 8,
 13 |         **kwargs,
 14 |     ):
 15 |         """Multivariate xStreamDetector :cite:`DBLP:conf/kdd/ManzoorLA18`.
 16 | 
 17 |         Args:
 18 |             n_components (int, optional): Number of streamhash projection, similar to feature numbers. Defaults to 50.
 19 |             n_chains (int, optional): Number of half-space chains. Defaults to 8.
 20 |             depth (int, optional): Maximum depth for each chain. Defaults to 8.
 21 |         """
 22 | 
 23 |         super().__init__(data_type="multivariate", **kwargs)
 24 |         self.projector = StreamhashProjector(
 25 |             num_components=n_components, density=1 / 3.0
 26 |         )
 27 |         self.cur_window = []
 28 |         self.ref_window = []
 29 | 
 30 |         delta = np.ones(n_components) * 0.5
 31 |         self.hs_chains = _hsChains(
 32 |             deltamax=delta, n_chains=n_chains, depth=depth
 33 |         )
 34 | 
 35 |     def fit(self, X: np.ndarray, timestamp: int = None):
 36 |         projected_X = self.projector.transform(X)
 37 |         self.cur_window.append(projected_X)
 38 |         self.hs_chains.fit(projected_X)
 39 | 
 40 |         self.ref_window = self.cur_window
 41 |         self.cur_window = []
 42 | 
 43 |         deltamax = np.ptp(self.ref_window, axis=0) / 2.0
 44 |         deltamax[np.abs(deltamax) <= 0.0001] = 1.0
 45 | 
 46 |         self.hs_chains.set_deltamax(deltamax=deltamax)
 47 | 
 48 |         return self
 49 | 
 50 |     def score(self, X: np.ndarray, timestamp: int = None):
 51 |         projected_X = self.projector.transform(X)
 52 | 
 53 |         score = -1.0 * self.hs_chains.score_chains(projected_X)
 54 | 
 55 |         return score
 56 | 
 57 | 
 58 | class _Chain:
 59 |     def __init__(self, deltamax, depth):
 60 |         self.depth = depth
 61 |         self.deltamax = deltamax
 62 |         self.rand = np.random.rand(len(deltamax))
 63 |         self.rand_shift = self.rand * deltamax
 64 |         self.cmsketch_ref = [{} for _ in range(depth)] * depth
 65 |         self.is_first_window = True
 66 |         self.fs = [np.random.randint(0, len(deltamax)) for _ in range(depth)]
 67 | 
 68 |     @staticmethod
 69 |     def float_to_int(x):
 70 |         return x // 1
 71 | 
 72 |     def bincount(self, X):
 73 |         scores = np.zeros(self.depth)
 74 |         prebins = np.zeros(X.shape[0], dtype=float)
 75 |         depthcount = np.zeros(len(self.deltamax), dtype=int)
 76 |         for depth in range(self.depth):
 77 |             f = self.fs[depth]
 78 |             depthcount[f] += 1
 79 |             if depthcount[f] == 1:
 80 |                 prebins[f] = X[f] + self.rand_shift[f] / self.deltamax[f]
 81 |             else:
 82 |                 prebins[f] = (
 83 |                     2.0 * prebins[f] - self.rand_shift[f] / self.deltamax[f]
 84 |                 )
 85 | 
 86 |             cmsketch = self.cmsketch_ref[depth]
 87 | 
 88 |             l = tuple(map(floor, prebins))
 89 | 
 90 |             if l in cmsketch:
 91 |                 scores[depth] = cmsketch[l]
 92 |             else:
 93 |                 scores[depth] = 0.0
 94 | 
 95 |         return scores
 96 | 
 97 |     def score(self, X):
 98 |         scores = self.bincount(X)
 99 | 
100 |         depths = np.arange(1, self.depth + 1)
101 | 
102 |         scores = np.log2(1.0 + scores) + depths
103 |         return np.min(scores)
104 | 
105 |     def fit(self, X):
106 |         prebins = np.zeros(X.shape, dtype=float)
107 |         depthcount = np.zeros(len(self.deltamax), dtype=int)
108 |         for depth in range(self.depth):
109 |             f = self.fs[depth]
110 |             depthcount[f] += 1
111 | 
112 |             if depthcount[f] == 1:
113 |                 prebins[f] = (X[f] + self.rand_shift[f]) / self.deltamax[f]
114 |             else:
115 |                 prebins[f] = (
116 |                     2.0 * prebins[f] - self.rand_shift[f] / self.deltamax[f]
117 |                 )
118 | 
119 |             if self.is_first_window:
120 |                 cmsketch = self.cmsketch_ref[depth]
121 | 
122 |                 l = tuple(map(floor, prebins))
123 | 
124 |                 if l not in cmsketch:
125 |                     cmsketch[l] = 0
126 |                 cmsketch[l] += 1
127 | 
128 |                 self.cmsketch_ref[depth] = cmsketch
129 |             else:
130 |                 cmsketch = self.cmsketch_ref[depth]
131 | 
132 |                 l = tuple(map(floor, prebins))
133 | 
134 |                 if l not in cmsketch:
135 |                     cmsketch[l] = 0
136 |                 cmsketch[l] += 1
137 |                 self.cmsketch_ref[depth] = cmsketch
138 | 
139 |         return self
140 | 
141 | 
142 | class _hsChains:
143 |     def __init__(self, deltamax, n_chains: int = 100, depth: int = 25) -> None:
144 |         self.nchains = n_chains
145 |         self.depth = depth
146 |         self.chains = [_Chain(deltamax, depth) for _ in range(n_chains)]
147 | 
148 |     def score_chains(self, X):
149 |         scores = 0
150 |         for chain in self.chains:
151 |             scores += chain.score(X)
152 | 
153 |         scores = float(scores) / float(self.nchains)
154 | 
155 |         return scores
156 | 
157 |     def fit(self, X):
158 |         # for chain in self.chains:
159 |         #     chain.fit(X)
160 |         list(map(lambda x: x.fit(X), self.chains))
161 | 
162 |     def set_deltamax(self, deltamax):
163 |         # list(map(lambda x: x.deltamax = deltamax, self.chains))
164 |         # list(map(lambda x: x.rand_shift = x.rand * deltamax, self.chains))
165 |         for chain in self.chains:
166 |             chain.deltamax = deltamax
167 |             chain.rand_shift = chain.rand * deltamax
168 | 
169 | 
170 | class StreamhashProjector:
171 |     def __init__(self, num_components, density=1 / 3.0):
172 |         self.keys = np.arange(0, num_components, 1)
173 |         self.constant = np.sqrt(1.0 / density) / np.sqrt(num_components)
174 |         self.density = density
175 |         self.n_components = num_components
176 | 
177 |     def transform(self, X):
178 |         """Projects particular (next) timestep's vector to (possibly) lower dimensional space.
179 | 
180 |         Args:
181 |             X (float array of shape (num_features,)): Input feature vector.
182 | 
183 |         Returns:
184 |             projected_X (float array of shape (num_components,)): Projected feature vector.
185 |         """
186 |         ndim = X.shape[0]
187 | 
188 |         feature_names = [str(i) for i in range(ndim)]
189 | 
190 |         R = np.array(
191 |             [
192 |                 [self._hash_string(k, f) for f in feature_names]
193 |                 for k in self.keys
194 |             ]
195 |         )
196 | 
197 |         Y = np.dot(X, R.T).squeeze()
198 | 
199 |         return Y
200 | 
201 |     def _hash_string(self, k, s):
202 |         hash_value = int(mmh3.hash(s, signed=False, seed=k)) / (2.0**32 - 1)
203 |         s = self.density
204 |         if hash_value <= s / 2.0:
205 |             return -1 * self.constant
206 |         elif hash_value <= s:
207 |             return self.constant
208 |         else:
209 |             return 0
210 | 
211 | 
212 | if __name__ == "__main__":
213 |     import cProfile
214 |     from line_profiler import LineProfiler
215 | 
216 |     lp = LineProfiler()
217 | 
218 |     model = xStreamDetector()
219 | 
220 |     lp.add_function(_Chain.fit)
221 |     lp.add_function(_Chain.score)
222 |     lp.add_function(_Chain.bincount)
223 |     # lp.add_function(model.fit)
224 |     # lp.add_function(model.score)
225 |     lp_wrapper = lp(model.fit_score)
226 | 
227 |     for i in range(1500):
228 |         # lp_wrapper(np.array([i]))
229 |         model.fit_score(np.array([i]))
230 | 
231 |     lp.print_stats()
232 | 


--------------------------------------------------------------------------------
/streamad/model/zscore_Detector.py:
--------------------------------------------------------------------------------
 1 | from streamad.base import BaseDetector
 2 | import numpy as np
 3 | from streamad.util import StreamStatistic
 4 | 
 5 | 
 6 | class ZScoreDetector(BaseDetector):
 7 |     def __init__(self, is_global: bool = False, **kwargs):
 8 |         """Univariate Z-Score Detecto :cite:`enwiki:1086685336`
 9 | 
10 |         Args:
11 |             window_len (int, optional):  Length of the window for reference. Defaults to 50.
12 |             is_global (bool, optional): Whether to detect anomalies from a global view. Defaults to False.
13 |         """
14 |         super().__init__(data_type="univariate", **kwargs)
15 | 
16 |         self.stat = StreamStatistic(
17 |             is_global=is_global, window_len=self.window_len
18 |         )
19 | 
20 |     def fit(self, X: np.ndarray, timestamp: int = None):
21 |         self.stat.update(X[0])
22 |         return self
23 | 
24 |     def score(self, X: np.ndarray, timestamp: int = None):
25 |         mean = self.stat.get_mean()
26 |         std = self.stat.get_std()
27 | 
28 |         score = np.divide(
29 |             (X[0] - mean), std, out=np.zeros_like(X[0]), where=std != 0
30 |         )
31 | 
32 |         return score
33 | 


--------------------------------------------------------------------------------
/streamad/model/zspot_Detector.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import heapq
  3 | from collections import deque
  4 | from copy import deepcopy
  5 | 
  6 | import numpy as np
  7 | from streamad.base import BaseDetector
  8 | 
  9 | 
 10 | class ZSpotDetector(BaseDetector):
 11 |     def __init__(
 12 |         self,
 13 |         back_mean_len: int = 20,
 14 |         num_over_threshold: int = 30,
 15 |         deviance_ratio: float = 0.01,
 16 |         z: int = 2,
 17 |         expire_days: int = 14,
 18 |         ignore_n: int = 10,
 19 |         **kwargs
 20 |     ):
 21 | 
 22 |         super().__init__(data_type="univariate", **kwargs)
 23 | 
 24 |         self.deviance_ratio = deviance_ratio
 25 | 
 26 |         self.back_mean_len = back_mean_len
 27 |         self.back_mean_window = deque(maxlen=max(self.back_mean_len, 2))
 28 | 
 29 |         self.num_over_threshold = num_over_threshold
 30 | 
 31 |         nonedict = {"up": None, "down": None}
 32 | 
 33 |         self.extreme_quantile = dict.copy(nonedict)
 34 |         self.local_init_threshold = dict.copy(nonedict)
 35 |         self.global_init_threshold = dict.copy(nonedict)
 36 | 
 37 |         self.last_date = None
 38 | 
 39 |         self.date = deque(maxlen=expire_days)
 40 |         self.date_peaks = deque(maxlen=expire_days)
 41 | 
 42 |         self.history_peaks = {"up": [], "down": []}
 43 |         self.normal_X = None
 44 |         self.time_X = None
 45 |         self.z = z
 46 |         self.ignore_n = ignore_n
 47 | 
 48 |         assert self.window_len > self.ignore_n + self.back_mean_len, "window_len must be larger than (ignore_n + back_mean_len)"
 49 | 
 50 |     def _update_oneside(self, side: str, init: bool = False):
 51 |         if side == "up":
 52 |             if init is False:
 53 |                 self.local_init_threshold[side] = heapq.heappushpop(
 54 |                     self.history_peaks[side], self.normal_X
 55 |                 )
 56 |             else:
 57 |                 self.local_init_threshold[side] = self.history_peaks[side][0]
 58 | 
 59 |             peaks = deepcopy(self.history_peaks[side])
 60 |             for i in self.date_peaks:
 61 |                 peaks.extend(i[side])
 62 | 
 63 |             selected_peaks = heapq.nlargest(self.num_over_threshold, peaks)
 64 |             self.global_init_threshold[side] = selected_peaks[-1]
 65 |             selected_peaks = np.array(selected_peaks) - np.array(
 66 |                 self.global_init_threshold[side]
 67 |             )
 68 |             std = np.sqrt(
 69 |                 np.sum([i**2 for i in selected_peaks])
 70 |                 / self.num_over_threshold
 71 |             )
 72 |             self.extreme_quantile[side] = (
 73 |                 self.global_init_threshold[side] + self.z * std
 74 |             )
 75 |         elif side == "down":
 76 | 
 77 |             if init is False:
 78 |                 self.local_init_threshold[side] = -heapq.heappushpop(
 79 |                     self.history_peaks[side], -self.normal_X
 80 |                 )
 81 |             else:
 82 |                 self.local_init_threshold[side] = -self.history_peaks[side][0]
 83 | 
 84 |             peaks = deepcopy(self.history_peaks[side])
 85 |             for i in self.date_peaks:
 86 |                 peaks.extend(i[side])
 87 | 
 88 |             selected_peaks = heapq.nlargest(self.num_over_threshold, peaks)
 89 |             self.global_init_threshold[side] = -selected_peaks[-1]
 90 |             selected_peaks = np.array(selected_peaks) + np.array(
 91 |                 self.global_init_threshold[side]
 92 |             )
 93 |             std = np.sqrt(
 94 |                 np.sum([i**2 for i in selected_peaks])
 95 |                 / self.num_over_threshold
 96 |             )
 97 |             self.extreme_quantile[side] = (
 98 |                 self.global_init_threshold[side] - self.z * std
 99 |             )
100 | 
101 |         else:
102 |             raise NotImplementedError
103 | 
104 |     def _cal_back_mean(self, X):
105 | 
106 |         back_mean = np.array(0)
107 | 
108 |         if self.back_mean_len == 1:
109 |             # least back_mean_window is 2
110 |             back_mean = self.back_mean_window[-1]
111 |         elif self.back_mean_len > 1:
112 |             back_mean = np.mean(self.back_mean_window)
113 | 
114 |         return X - back_mean
115 | 
116 |     def fit(self, X: np.ndarray, timestamp=None):
117 |         """Fit the data to the detector.
118 | 
119 |         Args:
120 |             X (np.ndarray): Data of current observation.
121 |         """
122 |         X = float(X[0])
123 | 
124 |         if self.index >= self.back_mean_len + self.ignore_n:
125 |             self.normal_X = self._cal_back_mean(X)
126 |             self.time_X = datetime.datetime.fromtimestamp(timestamp)
127 | 
128 |             if self.last_date is None:
129 |                 self.last_date = self.time_X.date()
130 |                 self.history_peaks["up"] = [self.normal_X]
131 |                 self.history_peaks["down"] = [-self.normal_X]
132 | 
133 |             elif self.last_date != self.time_X.date():
134 |                 self.date.append(self.last_date)
135 |                 self.date_peaks.append(deepcopy(self.history_peaks))
136 |                 self.last_date = self.time_X.date()
137 |                 self.history_peaks["up"] = [self.normal_X]
138 |                 self.history_peaks["down"] = [-self.normal_X]
139 | 
140 |             elif self.last_date == self.time_X.date():
141 |                 if len(self.history_peaks["up"]) < self.num_over_threshold:
142 |                     heapq.heappush(self.history_peaks["up"], self.normal_X)
143 |                     # We use negative x to simulate a maxheap
144 |                     heapq.heappush(self.history_peaks["down"], -self.normal_X)
145 | 
146 |                     # if len(self.history_peaks["up"]) == self.num_over_threshold:
147 |                     self._update_oneside("up", init=True)
148 |                     self._update_oneside("down", init=True)
149 | 
150 |                 elif self.normal_X > self.local_init_threshold["up"]:
151 |                     self._update_oneside("up")
152 |                 elif self.normal_X < self.local_init_threshold["down"]:
153 |                     self._update_oneside("down")
154 | 
155 |         if self.index >= self.ignore_n:
156 |             self.back_mean_window.append(X)
157 |         return self
158 | 
159 |     def score(self, X: np.ndarray, timestamp=None) -> float:
160 | 
161 |         curr_X = self.back_mean_window[-1]
162 |         last_X = self.back_mean_window[-2]
163 | 
164 |         if (
165 |             abs(
166 |                 np.divide(
167 |                     curr_X - last_X, last_X, np.array(curr_X), where=last_X != 0
168 |                 )
169 |             )
170 |             < self.deviance_ratio
171 |         ):
172 |             score = 0.0
173 | 
174 |         elif (
175 |             self.normal_X > self.extreme_quantile["up"]
176 |             or self.normal_X < self.extreme_quantile["down"]
177 |         ):
178 |             score = 1.0
179 | 
180 |         elif self.normal_X > self.global_init_threshold["up"]:
181 |             side = "up"
182 |             score = np.divide(
183 |                 self.normal_X - self.global_init_threshold[side],
184 |                 (
185 |                     self.extreme_quantile[side]
186 |                     - self.global_init_threshold[side]
187 |                 ),
188 |                 np.array(0.9),
189 |                 where=(
190 |                     self.extreme_quantile[side]
191 |                     - self.global_init_threshold[side]
192 |                     != 0
193 |                 ),
194 |             )
195 |         elif self.normal_X < self.global_init_threshold["down"]:
196 |             side = "down"
197 |             score = np.divide(
198 |                 self.global_init_threshold[side] - self.normal_X,
199 |                 (
200 |                     self.global_init_threshold[side]
201 |                     - self.extreme_quantile[side]
202 |                 ),
203 |                 np.array(0.5),
204 |                 where=(
205 |                     self.global_init_threshold[side]
206 |                     - self.extreme_quantile[side]
207 |                     != 0
208 |                 ),
209 |             )
210 |         else:
211 |             score = 0.0
212 | 
213 |         return float(score)
214 | 


--------------------------------------------------------------------------------
/streamad/process/__init__.py:
--------------------------------------------------------------------------------
 1 | from .zscore_calibrator import ZScoreCalibrator
 2 | from .tdigest_calibrator import TDigestCalibrator
 3 | from .weight_ensemble import WeightEnsemble
 4 | from .vote_ensemble import VoteEnsemble
 5 | 
 6 | __all__ = [
 7 |     "ZScoreCalibrator",
 8 |     "TDigestCalibrator",
 9 |     "WeightEnsemble",
10 |     "VoteEnsemble",
11 | ]
12 | 


--------------------------------------------------------------------------------
/streamad/process/tdigest_calibrator.py:
--------------------------------------------------------------------------------
 1 | from tdigest import TDigest
 2 | from collections import deque
 3 | 
 4 | 
 5 | class TDigestCalibrator:
 6 |     def __init__(
 7 |         self,
 8 |         percentile_up: float = 95,
 9 |         percentile_down: float = 5,
10 |         is_global: bool = True,
11 |         window_len: int = 100,
12 |     ) -> None:
13 |         """A calibrator which can filter out outliers using t-digest, and normalize the anomaly scores into [0,1] :cite:`DBLP:journals/simpa/Dunning21`.
14 | 
15 |         Args:
16 |             percentile_up (float, optional): We regard the scores above `percentile_up` as anomalies. Defaults to 95.
17 |             percentile_down (float, optional): We regard the scores below `percentile_down` as anomalies. Defaults to 5.
18 |             is_global (bool, optional): Method to record, a global way or a rolling window way. Defaults to True.
19 |             window_len (int, optional): The length of rolling window, ignore this when `is_global=True`. Defaults to 100.
20 |         """
21 |         self.percentile_up = percentile_up
22 |         self.percentile_down = percentile_down
23 |         self.init_data = []
24 |         self.init_flag = False
25 | 
26 |         assert (
27 |             percentile_up >= 0
28 |             and percentile_up <= 100
29 |             and percentile_down >= 0
30 |             and percentile_down <= 100
31 |         ), "percentile must be between 0 and 100"
32 | 
33 |         self.is_global = is_global
34 |         self.score_stats = TDigest()
35 |         self.score_deque = deque(maxlen=window_len)
36 | 
37 |     def normalize(self, score: float) -> float:
38 |         if not score:
39 |             return None
40 | 
41 |         self.score_deque.append(score)
42 | 
43 |         if self.is_global:
44 |             self.score_stats.update(score)
45 |         else:
46 |             self.score_stats = TDigest()
47 |             self.score_stats.batch_update(self.score_deque)
48 |         if self.score_deque.maxlen != len(self.score_deque):
49 |             return None
50 | 
51 |         percentile_up = self.score_stats.percentile(self.percentile_up)
52 |         percentile_down = self.score_stats.percentile(self.percentile_down)
53 | 
54 |         if score > percentile_up or score < percentile_down:
55 |             score = 1.0
56 |         else:
57 |             score = 0.0
58 | 
59 |         return score
60 | 


--------------------------------------------------------------------------------
/streamad/process/vote_ensemble.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class VoteEnsemble:
 5 |     def __init__(self, threshold: float = 0.8):
 6 |         """Anomaly scores ensemble with votes.
 7 | 
 8 |         Args:
 9 |             threshold (float, optional): Anomaly scores that over threshold are regard as votes. Defaults to 0.8.
10 |         """
11 |         self.thredshold = threshold
12 | 
13 |     def ensemble(self, scores: list):
14 |         """Ensemble anomaly scores from ordered detectors.
15 | 
16 |         Args:
17 |             scores (list): A list of anomaly scores with orders.
18 | 
19 |         Returns:
20 |             float: Ensembled anomaly scores.
21 |         """
22 | 
23 |         assert (
24 |             type(scores) == list or type(scores) == np.ndarray
25 |         ), "Unsupport score types, it should be list or numpy.ndarray"
26 | 
27 |         if (np.array(scores) == None).any():
28 |             return None
29 | 
30 |         assert (
31 |             (np.array(scores) >= 0) & (np.array(scores) <= 1)
32 |         ).all(), (
33 |             "Scores should be in [0,1], you can call calibrator before ensemble"
34 |         )
35 | 
36 |         votes = np.array(scores) >= self.thredshold
37 | 
38 |         if sum(votes) > len(votes) / 2:
39 |             return 1.0
40 | 
41 |         return 0.0
42 | 


--------------------------------------------------------------------------------
/streamad/process/weight_ensemble.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class WeightEnsemble:
 5 |     def __init__(self, ensemble_weights: list = None):
 6 |         """Anomaly scores ensemble with weighted average.
 7 | 
 8 |         Args:
 9 |             ensemble_weights (list, optional): Weights for scores with orders, we use equal weights/mean to recalculate the scores when it is None. Defaults to None.
10 |         """
11 | 
12 |         assert (
13 |             type(ensemble_weights) == list
14 |             or type(ensemble_weights) == np.ndarray
15 |         )
16 | 
17 |         self.weights = ensemble_weights
18 |         self.sum_weights = np.sum(self.weights) if ensemble_weights else None
19 | 
20 |     def ensemble(self, scores: list) -> float:
21 |         """Ensemble anomaly scores from ordered detectors.
22 | 
23 |         Args:
24 |             scores (list): A list of anomaly scores with orders.
25 | 
26 |         Returns:
27 |             float: Ensembled anomaly scores.
28 |         """
29 | 
30 |         assert (
31 |             type(scores) == list or type(scores) == np.ndarray
32 |         ), "Unsupport score types, it should be list or numpy.ndarray"
33 | 
34 |         assert len(scores) == len(
35 |             self.weights
36 |         ), "Inconsistent weights and scores length"
37 | 
38 |         if (np.array(scores) == None).any():
39 |             return None
40 | 
41 |         assert (
42 |             (np.array(scores) >= 0) & (np.array(scores) <= 1)
43 |         ).all(), (
44 |             "Scores should be in [0,1], you can call calibrator before ensemble"
45 |         )
46 | 
47 |         if self.weights is None:
48 |             return np.mean(scores)
49 | 
50 |         return np.dot(scores, self.weights) / self.sum_weights
51 | 


--------------------------------------------------------------------------------
/streamad/process/zscore_calibrator.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from streamad.util import StreamStatistic
 3 | 
 4 | 
 5 | class ZScoreCalibrator:
 6 |     def __init__(
 7 |         self,
 8 |         sigma: int = 3,
 9 |         extreme_sigma: int = 5,
10 |         is_global: bool = True,
11 |         window_len: int = 100,
12 |     ) -> None:
13 |         """A calibrator which can filter out outliers using z-score, and normalize the anomaly scores into [0,1].
14 | 
15 |         Args:
16 |             sigma (int, optional): Zscore threshold, we regard the scores out of sigma as potential anomalies. Defaults to 2.
17 |             extreme_sigma (int, optional): Zscore threshold for extreme values, we regard the scores out of extreme_sigma as extreme anomalies. Defaults to 3.
18 |             is_global (bool, optional): Method to record, a global way or a rolling window way. Defaults to True.
19 |             window_len (int, optional): The length of rolling window, ignore this when `is_global=True`. Defaults to 100.
20 |         """
21 |         self.sigma = sigma
22 |         self.extreme_sigma = extreme_sigma
23 |         self.init_data = []
24 |         self.init_flag = False
25 |         self.score_stats = StreamStatistic(
26 |             is_global=is_global, window_len=window_len
27 |         )
28 | 
29 |     def normalize(self, score: float) -> float:
30 | 
31 |         if score is None:
32 |             return None
33 | 
34 |         self.score_stats.update(score)
35 | 
36 |         if (
37 |             self.score_stats._window.maxlen != len(self.score_stats._window)
38 |             and self.score_stats._window.maxlen >= self.score_stats._num_items
39 |         ):
40 |             return None
41 | 
42 |         score_mean = self.score_stats.get_mean()
43 |         score_std = self.score_stats.get_std()
44 | 
45 |         sigma = np.divide(
46 |             (score - score_mean),
47 |             score_std,
48 |             out=np.array((score - score_mean) / 1e-5),
49 |             where=score_std != 0,
50 |         )
51 |         sigma = abs(sigma)
52 | 
53 |         if sigma > self.extreme_sigma:
54 |             return 1.0
55 |         elif sigma > self.sigma:
56 |             score_max = self.score_stats.get_max()
57 |             score_min = self.score_stats.get_min()
58 |             score = np.divide(
59 |                 (score - score_min),
60 |                 (score_max - score_min),
61 |                 out=min(np.array((score - score_min) / 1e-5), np.array(1.0)),
62 |                 where=score_max != score_min,
63 |             )
64 |             score = abs(score)
65 |         else:
66 |             return 0.0
67 | 
68 |         return score
69 | 


--------------------------------------------------------------------------------
/streamad/util/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding=utf-8
 3 | #
 4 | # Author: liufr
 5 | # Github: https://github.com/Fengrui-Liu
 6 | # LastEditTime: 2021-01-11 14:35:09
 7 | # Copyright 2021 liufr
 8 | # Description:
 9 | #
10 | 
11 | from .stream_generator import StreamGenerator
12 | from .math_toolkit import StreamStatistic
13 | from .dataset import MultivariateDS, UnivariateDS, CustomDS
14 | from .plot import plot
15 | 
16 | 
17 | __all__ = [
18 |     "StreamGenerator",
19 |     "StreamStatistic",
20 |     "MultivariateDS",
21 |     "UnivariateDS",
22 |     "CustomDS",
23 |     "plot",
24 | ]
25 | 


--------------------------------------------------------------------------------
/streamad/util/dataset.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | from os.path import dirname, join
 3 | from typing import Union
 4 | 
 5 | import numpy as np
 6 | import pandas as pd
 7 | 
 8 | warnings.simplefilter(action="ignore", category=FutureWarning)
 9 | 
10 | 
11 | class DS:
12 |     def __init__(self) -> None:
13 | 
14 |         self.data = None
15 |         self.date = None
16 |         self.label = None
17 |         self.features = None
18 |         self.names = None
19 | 
20 |     def preprocess(self) -> None:
21 |         self.preprocess_data()
22 |         self.preprocess_timestamp()
23 |         self.preprocess_label()
24 |         self.preprocess_feature()
25 | 
26 |     def preprocess_data(self) -> None:
27 |         if type(self.path) == str:
28 |             try:
29 |                 self.data = pd.read_csv(self.path)
30 |             except FileExistsError:
31 |                 print("Cannot read this file:", self.path)
32 |         elif type(self.path) == np.ndarray:
33 |             self.data = pd.DataFrame(self.path)
34 |         elif type(self.path) == pd.DataFrame:
35 |             self.data = self.path
36 |         self.names = self.data.columns.values
37 | 
38 |     def preprocess_timestamp(self) -> None:
39 |         if "timestamp" in self.names.tolist():
40 |             self.date = self.data["timestamp"].values
41 |         else:
42 |             self.date = self.data.index.values
43 | 
44 |     def preprocess_label(self) -> None:
45 |         if "label" in self.names.tolist():
46 |             self.label = np.array(self.data["label"].values)
47 | 
48 |     def preprocess_feature(self) -> None:
49 |         self.features = np.setdiff1d(
50 |             self.names, np.array(["label", "timestamp"])
51 |         )
52 |         self.data = np.array(self.data[self.features])
53 | 
54 | 
55 | class MultivariateDS(DS):
56 |     """
57 |     Load multivariate dataset.
58 |     """
59 | 
60 |     def __init__(self, has_names=False) -> None:
61 |         super().__init__()
62 |         module_path = dirname(__file__)
63 |         self.path = join(module_path, "data", "multiDS.csv")
64 |         self.preprocess()
65 | 
66 | 
67 | class UnivariateDS(DS):
68 |     """
69 |     Load univariate dataset.
70 |     """
71 | 
72 |     def __init__(self) -> None:
73 |         super().__init__()
74 |         module_path = dirname(__file__)
75 |         self.path = join(module_path, "data", "uniDS.csv")
76 |         self.preprocess()
77 | 
78 | 
79 | class CustomDS(DS):
80 |     """
81 |     Load custom dataset.
82 |     Args:
83 |         f_path (Union[str, np.ndarray]): Dataset or its path.
84 |         label (np.ndarray, optional): Anomaly labels for dataset. Defaults to None.
85 |     """
86 | 
87 |     def __init__(
88 |         self, f_path: Union[str, np.ndarray], label: np.ndarray = None
89 |     ):
90 | 
91 |         super().__init__()
92 |         self.path = f_path
93 |         self.label = label
94 |         self.preprocess()
95 | 


--------------------------------------------------------------------------------
/streamad/util/math_toolkit.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import numpy as np
  4 | from collections import deque, defaultdict
  5 | 
  6 | 
  7 | class StreamStatistic:
  8 |     """Data statistics for the streaming data, with supporting max, min, sum, mean, sum of squares, var, std and standard scaler."""
  9 | 
 10 |     def __init__(self, is_global: bool = True, window_len: int = 10):
 11 |         """Statistics for the streaming data, with supporting max, min, sum, mean, sum of squares, var, std and standard scaler.
 12 | 
 13 |         Args:
 14 |             is_global (bool, optional): For whole stream or a windowed stream. Defaults to True.
 15 |             window_len (int, optional): Rolloing window length. Only works when is_global is False. Defaults to 10.
 16 |         """
 17 |         self._is_uni = False
 18 |         self._is_global = is_global
 19 |         self._window = deque(maxlen=window_len)
 20 |         self._num_items = 0
 21 | 
 22 |         self._max = defaultdict(lambda: -math.inf)
 23 |         self._min = defaultdict(lambda: math.inf)
 24 |         self._sum = defaultdict(float)
 25 |         self._mean = defaultdict(float)
 26 |         self._sum_squares = defaultdict(float)
 27 |         self._var = defaultdict(float)
 28 |         self._std = defaultdict(float)
 29 | 
 30 |     def update(self, X: np.ndarray):
 31 |         """Update a pd.Series to stream
 32 | 
 33 |         Args:
 34 |             X (np.ndarray): An item from StreamGenerator
 35 | 
 36 |         """
 37 | 
 38 |         self._num_items += 1
 39 | 
 40 |         if isinstance(X, int) or isinstance(X, float):
 41 |             X = np.array([X])
 42 |             self._is_uni = True
 43 |         elif isinstance(X, np.ndarray):
 44 |             X = np.array([X]).flatten()
 45 |             if len(X) == 1:
 46 |                 self._is_uni = True
 47 |             else:
 48 |                 self._is_uni = False
 49 |         else:
 50 |             raise NotImplementedError("Only support int, float and np.ndarray")
 51 | 
 52 |         if self._is_global:
 53 | 
 54 |             tmp = defaultdict(float)
 55 | 
 56 |             for index, item in enumerate(X):
 57 |                 self._max[index] = (
 58 |                     self._max[index] if self._max[index] > item else item
 59 |                 )
 60 |                 self._min[index] = (
 61 |                     self._min[index] if self._min[index] < item else item
 62 |                 )
 63 |                 self._sum[index] += X[index]
 64 |                 old_mean = self._mean[index]
 65 |                 tmp[index] = item - self._mean[index]
 66 |                 self._mean[index] = self._sum[index] / self._num_items
 67 |                 self._sum_squares[index] += (X[index] - old_mean) * (
 68 |                     X[index] - self._mean[index]
 69 |                 )
 70 |                 self._var[index] = self._sum_squares[index] / self._num_items
 71 |                 self._std[index] = math.sqrt(self._var[index])
 72 |         else:
 73 |             self._window.append(X)
 74 | 
 75 |     def get_max(self):
 76 |         """
 77 |         Get max statistic.
 78 |         """
 79 | 
 80 |         if self._is_global:
 81 |             result = [_ for _ in self._max.values()]
 82 |         else:
 83 |             result = np.max(self._window, axis=0)
 84 | 
 85 |         return result[0] if self._is_uni else np.array(result)
 86 | 
 87 |     def get_min(self):
 88 |         """
 89 |         Get min statistic.
 90 |         """
 91 | 
 92 |         if self._is_global:
 93 |             result = [_ for _ in self._min.values()]
 94 |         else:
 95 |             result = np.min(self._window, axis=0)
 96 | 
 97 |         return result[0] if self._is_uni else np.array(result)
 98 | 
 99 |     def get_mean(self):
100 |         """
101 |         Get mean statistic.
102 |         """
103 | 
104 |         if self._is_global:
105 |             result = [_ for _ in self._mean.values()]
106 |         else:
107 |             result = np.mean(self._window, axis=0)
108 | 
109 |         return result[0] if self._is_uni else np.array(result)
110 | 
111 |     def get_std(self):
112 |         """
113 |         Get max statistic.
114 |         """
115 | 
116 |         if self._is_global:
117 |             result = [_ for _ in self._std.values()]
118 |         else:
119 |             result = np.std(self._window, axis=0)
120 | 
121 |         return result[0] if self._is_uni else np.array(result)
122 | 
123 |     def get_sum(self):
124 |         """
125 |         Get sum statistic.
126 |         """
127 | 
128 |         if self._is_global:
129 |             result = [_ for _ in self._sum.values()]
130 |         else:
131 |             result = np.sum(self._window, axis=0)
132 | 
133 |         return result[0] if self._is_uni else np.array(result)
134 | 
135 |     def get_var(self):
136 |         """
137 |         Get var statistic.
138 |         """
139 | 
140 |         if self._is_global:
141 |             result = [_ for _ in self._var.values()]
142 |         else:
143 |             result = np.var(self._window, axis=0)
144 | 
145 |         return result[0] if self._is_uni else np.array(result)
146 | 
147 | 
148 | class SDFT:
149 |     def __init__(self, window_len) -> None:
150 |         self.window_len = window_len
151 |         self.window = deque(maxlen=window_len)
152 |         self.coefficients = deque(maxlen=window_len)
153 | 
154 |     def update(self, X: np.ndarray):
155 |         # def _get_coefficients(coeff, diff, i):
156 |         #     self.coefficients[i] = (coeff + diff) * np.exp(
157 |         #         2j * np.pi * i / self.window_len
158 |         #     )
159 | 
160 |         #     return
161 | 
162 |         if len(self.window) < self.window_len - 1:
163 |             self.window.append(X)
164 |         elif len(self.window) == self.window_len - 1:
165 |             self.window.append(X)
166 |             self.coefficients.extend(np.fft.fft(self.window))
167 |         else:
168 |             diff = X - self.window[0]
169 | 
170 |             for i, c in enumerate(self.coefficients):
171 |                 self.coefficients[i] = (c + diff) * np.exp(
172 |                     2j * np.pi * i / self.window_len
173 |                 )
174 | 
175 |             # This vectorize seems to be slower than the loop above
176 |             # vfunc = np.vectorize(_get_coefficients)
177 |             # vfunc(
178 |             #     self.coefficients, diff, [i for i in range(self.window_len)]
179 |             # )
180 |             self.window.append(X)
181 | 
182 |         return self
183 | 


--------------------------------------------------------------------------------
/streamad/util/plot.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import plotly.graph_objects as go
 3 | from plotly.subplots import make_subplots
 4 | 
 5 | 
 6 | def plot(
 7 |     data: np.ndarray,
 8 |     scores: np.ndarray,
 9 |     date: np.ndarray = None,
10 |     features: np.ndarray = None,
11 |     label: np.ndarray = None,
12 | ):
13 |     """Plot data, score and ground truth (if exists).
14 | 
15 |     Args:
16 |         data (np.array): Original data stream.
17 |         scores (np.array): Anomaly scores of the data stream.
18 |         date (np.array, optional): Timestamp of the data. Defaults to None.
19 |         features (np.array, optional): Features name. Defaults to None.
20 |         label (np.array, optional): Ground truth. Defaults to None.
21 |     """
22 | 
23 |     if features is None:
24 |         features = ["f" + str(i) for i in range(np.array(data).shape[1])]
25 |     else:
26 |         assert (
27 |             len(features) == data.shape[1]
28 |         ), "Number of features must match data dimension."
29 | 
30 |     if date is None:
31 |         date = [i for i in range(np.array(data).shape[0])]
32 |     else:
33 |         assert (
34 |             len(date) == data.shape[0]
35 |         ), "Number of date must match data dimension."
36 | 
37 |     height = 100 * len(features) + 80
38 |     row_heights = [100 / height for _ in range(len(features))]
39 |     row_heights.append(80 / height)
40 | 
41 |     fig = make_subplots(
42 |         rows=len(features) + 1,
43 |         cols=1,
44 |         shared_xaxes=True,
45 |         vertical_spacing=20 / height,
46 |         row_heights=row_heights,
47 |     )
48 | 
49 |     # Plot data by features
50 |     for i, feature in enumerate(features):
51 |         anomalies = np.where(label == 1)[0] if label is not None else []
52 |         fig.add_trace(
53 |             go.Scatter(
54 |                 x=date,
55 |                 y=data[:, i],
56 |                 mode="lines+markers",
57 |                 name=str(feature),
58 |                 selectedpoints=anomalies,
59 |                 selected=dict(marker=dict(color="red", size=5)),
60 |                 unselected=dict(marker=dict(size=0)),
61 |             ),
62 |             row=i + 1,
63 |             col=1,
64 |         )
65 | 
66 |     # Plot score
67 |     fig.add_trace(
68 |         go.Scatter(x=date, y=scores, name="anomaly score", marker_color="red"),
69 |         row=len(features) + 1,
70 |         col=1,
71 |     )
72 |     # fig.update_xaxes(rangeslider={"visible": True}, row=2, col=1)
73 |     fig.update_layout(
74 |         margin=dict(l=10, r=10, t=10, b=10),
75 |         legend=dict(
76 |             orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1
77 |         ),
78 |         height=height,
79 |     )
80 |     return fig
81 | 


--------------------------------------------------------------------------------
/streamad/util/stream_generator.py:
--------------------------------------------------------------------------------
 1 | from typing import Generator
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | class StreamGenerator:
 7 |     """Load static dataset and generate observation once a time.
 8 | 
 9 |     Args:
10 |         X (np.ndarray): Origin static dataset.
11 | 
12 |     Raises:
13 |         TypeError: Unexpected input data type.
14 |     """
15 | 
16 |     def __init__(
17 |         self, X: np.ndarray,
18 |     ):
19 | 
20 |         if isinstance(X, np.ndarray):
21 |             self.X = X
22 |         else:
23 |             raise TypeError("Unexpected input data type, except np.ndarray.")
24 | 
25 |     def iter_item(self) -> Generator:
26 |         """Iterate item once a time from the dataset.
27 | 
28 |         Yields:
29 |             Generator: One observation from the dataset.
30 |         """
31 | 
32 |         for i in range(len(self.X)):
33 |             yield self.X[i]
34 | 


--------------------------------------------------------------------------------
/streamad/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.3.1"  # pragma: no cover
2 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fengrui-Liu/StreamAD/d2e38f4c35349b05c9bbd3ac753efc9a96e0ab05/test/__init__.py


--------------------------------------------------------------------------------
/test/test_OCSVM.py:
--------------------------------------------------------------------------------
 1 | from streamad.util import StreamGenerator, UnivariateDS, MultivariateDS
 2 | from streamad.model import OCSVMDetector
 3 | 
 4 | 
 5 | def test_score():
 6 |     ds = UnivariateDS()
 7 |     stream = StreamGenerator(ds.data)
 8 |     detector = OCSVMDetector()
 9 |     for x in stream.iter_item():
10 |         score = detector.fit_score(x)
11 | 
12 |         if score is not None:
13 |             assert type(score) is float
14 | 
15 | 
16 | def test_multi_score():
17 |     ds = MultivariateDS()
18 |     stream = StreamGenerator(ds.data)
19 |     detector = OCSVMDetector()
20 |     for x in stream.iter_item():
21 |         score = detector.fit_score(x)
22 | 
23 |         if score is not None:
24 |             assert type(score) is float
25 | 


--------------------------------------------------------------------------------
/test/test_calibrator.py:
--------------------------------------------------------------------------------
 1 | from streamad.util import StreamGenerator, UnivariateDS
 2 | from streamad.model import KNNDetector
 3 | from streamad.process import ZScoreCalibrator, TDigestCalibrator
 4 | 
 5 | 
 6 | def test_ZScoreCalibrator():
 7 |     ds = UnivariateDS()
 8 |     stream = StreamGenerator(ds.data)
 9 |     detector = KNNDetector()
10 |     calibrator = ZScoreCalibrator(sigma=2, extreme_sigma=3)
11 | 
12 |     for x in stream.iter_item():
13 |         score = detector.fit_score(x)
14 |         score = calibrator.normalize(score)
15 |         if score is not None:
16 |             assert 0 <= score <= 1
17 | 
18 | def test_ZScoreCalibrator_global():
19 |     ds = UnivariateDS()
20 |     stream = StreamGenerator(ds.data)
21 |     detector = KNNDetector()
22 |     calibrator = ZScoreCalibrator(sigma=2, is_global=True)
23 | 
24 |     for x in stream.iter_item():
25 |         score = detector.fit_score(x)
26 |         score = calibrator.normalize(score)
27 |         if score is not None:
28 |             assert 0 <= score <= 1
29 | 
30 | 
31 | def test_TDigestCalibrator():
32 |     ds = UnivariateDS()
33 |     stream = StreamGenerator(ds.data)
34 |     detector = KNNDetector()
35 |     calibrator = TDigestCalibrator(percentile_up=93, percentile_down=0)
36 | 
37 |     for x in stream.iter_item():
38 |         score = detector.fit_score(x)
39 |         normalized_score = calibrator.normalize(score)
40 |         if normalized_score is not None:
41 |             assert 0 <= normalized_score <= 1
42 | 
43 | 
44 | def test_TDigestCalibrator_global():
45 |     ds = UnivariateDS()
46 |     stream = StreamGenerator(ds.data)
47 |     detector = KNNDetector()
48 |     calibrator = TDigestCalibrator(
49 |         percentile_up=93, percentile_down=0, is_global=True
50 |     )
51 | 
52 |     for x in stream.iter_item():
53 |         score = detector.fit_score(x)
54 |         score = calibrator.normalize(score)
55 |         if score is not None:
56 |             assert 0 <= score <= 1
57 | 


--------------------------------------------------------------------------------
/test/test_ensemble.py:
--------------------------------------------------------------------------------
 1 | from streamad.util import StreamGenerator, UnivariateDS
 2 | from streamad.model import KNNDetector, SpotDetector
 3 | from streamad.process import ZScoreCalibrator, VoteEnsemble, WeightEnsemble
 4 | 
 5 | 
 6 | def test_VoteEnsemble():
 7 | 
 8 |     ds = UnivariateDS()
 9 |     stream = StreamGenerator(ds.data)
10 |     knn_detector = KNNDetector()
11 |     spot_detector = SpotDetector()
12 |     knn_calibrator = ZScoreCalibrator(sigma=2)
13 |     spot_calibrator = ZScoreCalibrator(sigma=2)
14 |     ensemble = VoteEnsemble(threshold=0.8)
15 | 
16 |     for x in stream.iter_item():
17 | 
18 |         knn_score = knn_detector.fit_score(x)
19 |         spot_score = spot_detector.fit_score(x)
20 | 
21 |         knn_normalized_score = knn_calibrator.normalize(knn_score)
22 |         spot_normalized_score = spot_calibrator.normalize(spot_score)
23 | 
24 |         score = ensemble.ensemble([knn_normalized_score, spot_normalized_score])
25 |         if score is not None:
26 |             assert 0 <= score <= 1
27 | 
28 | 
29 | def test_WeightEnsemble():
30 | 
31 |     ds = UnivariateDS()
32 |     stream = StreamGenerator(ds.data)
33 |     knn_detector = KNNDetector()
34 |     spot_detector = SpotDetector()
35 |     knn_calibrator = ZScoreCalibrator(sigma=3)
36 |     spot_calibrator = ZScoreCalibrator(sigma=3)
37 |     ensemble = WeightEnsemble(ensemble_weights=[0.6, 0.4])
38 | 
39 |     for x in stream.iter_item():
40 |         knn_score = knn_detector.fit_score(x)
41 |         spot_score = spot_detector.fit_score(x)
42 | 
43 |         knn_normalized_score = knn_calibrator.normalize(knn_score)
44 |         spot_normalized_score = spot_calibrator.normalize(spot_score)
45 | 
46 |         score = ensemble.ensemble([knn_normalized_score, spot_normalized_score])
47 | 
48 |         if score is not None:
49 |             assert 0 <= score <= 1
50 | 


--------------------------------------------------------------------------------
/test/test_evaluate.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from streamad.evaluate import (
 3 |     NumentaAwareMetircs,
 4 |     PointAwareMetircs,
 5 |     SeriesAwareMetircs,
 6 | )
 7 | 
 8 | 
 9 | def test_point_aware_metrics():
10 |     values_real = np.array([0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0])
11 |     values_pred = np.array([0, 0, 0, None, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0])
12 | 
13 |     metric = PointAwareMetircs(anomaly_threshold=0.8)
14 | 
15 |     (precision, recall, f1,) = metric.evaluate(values_real, values_pred)
16 |     assert 0.0 <= precision <= 1.0
17 |     assert 0.0 <= recall <= 1.0
18 |     assert 0.0 <= f1 <= 1.0
19 | 
20 | 
21 | def test_series_aware_metrics():
22 |     values_real = np.array([0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0])
23 |     values_pred = np.array([0, 0, 0, None, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0])
24 | 
25 |     # Flat bias
26 |     metric = SeriesAwareMetircs(
27 |         anomaly_threshold=0.8, bias_p="flat", bias_r="flat"
28 |     )
29 | 
30 |     (precision, recall, f1,) = metric.evaluate(values_real, values_pred)
31 |     assert 0.0 <= precision <= 1.0
32 |     assert 0.0 <= recall <= 1.0
33 |     assert 0.0 <= f1 <= 1.0
34 | 
35 |     # Front bias
36 |     metric = SeriesAwareMetircs(
37 |         anomaly_threshold=0.8, bias_p="flat", bias_r="front"
38 |     )
39 | 
40 |     (precision, recall, f1,) = metric.evaluate(values_real, values_pred)
41 |     assert 0.0 <= precision <= 1.0
42 |     assert 0.0 <= recall <= 1.0
43 |     assert 0.0 <= f1 <= 1.0
44 | 
45 |     # Middle bias
46 |     metric = SeriesAwareMetircs(
47 |         anomaly_threshold=0.8, bias_p="flat", bias_r="middle"
48 |     )
49 | 
50 |     (precision, recall, f1,) = metric.evaluate(values_real, values_pred)
51 |     assert 0.0 <= precision <= 1.0
52 |     assert 0.0 <= recall <= 1.0
53 |     assert 0.0 <= f1 <= 1.0
54 | 
55 |     # Back bias
56 |     metric = SeriesAwareMetircs(
57 |         anomaly_threshold=0.8, bias_p="flat", bias_r="back"
58 |     )
59 | 
60 |     (precision, recall, f1,) = metric.evaluate(values_real, values_pred)
61 |     assert 0.0 <= precision <= 1.0
62 |     assert 0.0 <= recall <= 1.0
63 |     assert 0.0 <= f1 <= 1.0
64 | 
65 | 
66 | def test_numenta_aware_metrics():
67 |     values_real = np.array([0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0])
68 |     values_pred = np.array([0, 0, 0, None, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0])
69 | 
70 |     metric = NumentaAwareMetircs(anomaly_threshold=0.8)
71 | 
72 |     (precision, recall, f1,) = metric.evaluate(values_real, values_pred)
73 |     assert 0.0 <= precision <= 1.0
74 |     assert 0.0 <= recall <= 1.0
75 |     assert 0.0 <= f1 <= 1.0
76 | 


--------------------------------------------------------------------------------
/test/test_hstree.py:
--------------------------------------------------------------------------------
 1 | from streamad.util import StreamGenerator, UnivariateDS, MultivariateDS
 2 | from streamad.model import HSTreeDetector
 3 | 
 4 | 
 5 | def test_score():
 6 |     ds = UnivariateDS()
 7 |     stream = StreamGenerator(ds.data)
 8 |     detector = HSTreeDetector()
 9 |     for x in stream.iter_item():
10 |         score = detector.fit_score(x)
11 | 
12 |         if score is not None:
13 |             assert type(score) is float
14 | 
15 | 
16 | def test_multi_score():
17 |     ds = MultivariateDS()
18 |     stream = StreamGenerator(ds.data)
19 |     detector = HSTreeDetector()
20 |     for x in stream.iter_item():
21 |         score = detector.fit_score(x)
22 | 
23 |         if score is not None:
24 |             assert type(score) is float
25 | 


--------------------------------------------------------------------------------
/test/test_knncad.py:
--------------------------------------------------------------------------------
 1 | from streamad.util import StreamGenerator, UnivariateDS
 2 | from streamad.model import KNNDetector
 3 | 
 4 | 
 5 | def test_score():
 6 |     ds = UnivariateDS()
 7 |     stream = StreamGenerator(ds.data)
 8 |     detector = KNNDetector()
 9 |     for x in stream.iter_item():
10 |         score = detector.fit_score(x)
11 | 
12 |         if score is not None:
13 |             assert type(score) is float
14 | 


--------------------------------------------------------------------------------
/test/test_loda.py:
--------------------------------------------------------------------------------
 1 | from streamad.util import StreamGenerator, UnivariateDS, MultivariateDS
 2 | from streamad.model import LodaDetector
 3 | 
 4 | 
 5 | def test_score():
 6 |     ds = UnivariateDS()
 7 |     stream = StreamGenerator(ds.data)
 8 |     detector = LodaDetector()
 9 |     for x in stream.iter_item():
10 |         score = detector.fit_score(x)
11 | 
12 |         if score is not None:
13 |             assert type(score) is float
14 | 
15 | 
16 | def test_multi_score():
17 |     ds = MultivariateDS()
18 |     stream = StreamGenerator(ds.data)
19 |     detector = LodaDetector()
20 |     for x in stream.iter_item():
21 |         score = detector.fit_score(x)
22 | 
23 |         if score is not None:
24 |             assert type(score) is float
25 | 


--------------------------------------------------------------------------------
/test/test_mad.py:
--------------------------------------------------------------------------------
 1 | from streamad.util import StreamGenerator, UnivariateDS
 2 | from streamad.model import MadDetector
 3 | 
 4 | 
 5 | def test_score():
 6 |     ds = UnivariateDS()
 7 |     stream = StreamGenerator(ds.data)
 8 |     detector = MadDetector()
 9 |     for x in stream.iter_item():
10 |         score = detector.fit_score(x)
11 | 
12 |         if score is not None:
13 |             assert type(score) is float
14 | 


--------------------------------------------------------------------------------
/test/test_plot.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from streamad.util import StreamGenerator, CustomDS, plot
 3 | from streamad.model import ZScoreDetector
 4 | 
 5 | 
 6 | def test_plot():
 7 |     n, A, center, phi = 730, 50, 100, 30
 8 |     T = 2 * np.pi / 100
 9 |     t = np.arange(n)
10 |     ds = A * np.sin(T * t - phi * T) + center
11 |     ds[235:255] = 80
12 |     label = np.array([0] * n)
13 |     label[235:255] = 1
14 | 
15 |     ds = CustomDS(ds, label)  # You can also use a file path here
16 |     stream = StreamGenerator(ds.data)
17 |     model = ZScoreDetector()
18 | 
19 |     scores = []
20 | 
21 |     for x in stream.iter_item():
22 |         score = model.fit_score(x)
23 |         scores.append(score)
24 |         # print("\r Anomaly score: {}".format(score), end="", flush="True")
25 | 
26 |     data, label, date, features = ds.data, ds.label, ds.date, ds.features
27 |     plot(data=data, scores=scores, date=date, features=features, label=label)
28 | 


--------------------------------------------------------------------------------
/test/test_random.py:
--------------------------------------------------------------------------------
 1 | from streamad.util import StreamGenerator, UnivariateDS
 2 | from streamad.model import RandomDetector
 3 | 
 4 | 
 5 | def test_score():
 6 |     ds = UnivariateDS()
 7 |     stream = StreamGenerator(ds.data)
 8 |     detector = RandomDetector()
 9 |     for x in stream.iter_item():
10 |         score = detector.fit_score(x)
11 | 
12 |         if score is not None:
13 |             assert type(score) is float
14 | 


--------------------------------------------------------------------------------
/test/test_rrcf.py:
--------------------------------------------------------------------------------
 1 | from streamad.util import StreamGenerator, UnivariateDS
 2 | from streamad.model import RrcfDetector
 3 | 
 4 | 
 5 | def test_score():
 6 |     ds = UnivariateDS()
 7 |     stream = StreamGenerator(ds.data)
 8 |     detector = RrcfDetector()
 9 |     for x in stream.iter_item():
10 |         score = detector.fit_score(x)
11 | 
12 |         if score is not None:
13 |             assert type(score) is float
14 | 


--------------------------------------------------------------------------------
/test/test_rshash.py:
--------------------------------------------------------------------------------
 1 | from streamad.util import StreamGenerator, UnivariateDS, MultivariateDS
 2 | from streamad.model import RShashDetector
 3 | 
 4 | 
 5 | def test_score():
 6 |     ds = UnivariateDS()
 7 |     stream = StreamGenerator(ds.data)
 8 |     detector = RShashDetector()
 9 |     for x in stream.iter_item():
10 |         score = detector.fit_score(x)
11 | 
12 |         if score is not None:
13 |             assert type(score) is float
14 | 
15 | 
16 | def test_multi_score():
17 |     ds = MultivariateDS()
18 |     stream = StreamGenerator(ds.data)
19 |     detector = RShashDetector()
20 |     for x in stream.iter_item():
21 |         score = detector.fit_score(x)
22 | 
23 |         if score is not None:
24 |             assert type(score) is float
25 | 


--------------------------------------------------------------------------------
/test/test_sarima.py:
--------------------------------------------------------------------------------
 1 | from streamad.util import StreamGenerator, UnivariateDS
 2 | from streamad.model import SArimaDetector
 3 | 
 4 | 
 5 | def test_sarima():
 6 |     ds = UnivariateDS()
 7 |     stream = StreamGenerator(ds.data)
 8 |     detector = SArimaDetector()
 9 |     for x in stream.iter_item():
10 |         score = detector.fit_score(x)
11 |         if score is not None:
12 |             assert type(score) is float
13 | 


--------------------------------------------------------------------------------
/test/test_sdft.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from streamad.util.math_toolkit import SDFT
 3 | 
 4 | 
 5 | def test_sdft():
 6 |     X = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
 7 | 
 8 |     window_size = 5
 9 |     sdft = SDFT(window_size)
10 |     for i, x in enumerate(X):
11 |         sdft = sdft.update(x)
12 |         if i + 1 >= window_size:
13 |             print("co:", sdft.coefficients)
14 |             print("----")
15 |             print("np:", np.fft.fft(X[i + 1 - window_size : i + 1]))
16 |             print("----------------------")
17 |             # assert np.allclose(
18 |             #     sdft.coefficients, np.fft.fft(X[i + 1 - window_size : i + 1])
19 |             # )
20 | 
21 | 
22 | def test_dft_time():
23 |     import time
24 | 
25 |     X = np.random.randn(1000000)
26 | 
27 |     sdft = SDFT(10)
28 |     start_time = time.time()
29 |     for x in X:
30 |         sdft = sdft.update(x)
31 |     print("sdft", time.time() - start_time)
32 | 
33 |     start_time = time.time()
34 |     for i in range(len(X) - 10):
35 |         np.fft.fft(X[i : i + 10])
36 | 
37 |     print("np", time.time() - start_time)
38 | 


--------------------------------------------------------------------------------
/test/test_spot.py:
--------------------------------------------------------------------------------
 1 | from streamad.util import StreamGenerator, UnivariateDS
 2 | from streamad.model import SpotDetector
 3 | 
 4 | 
 5 | def test_score():
 6 |     ds = UnivariateDS()
 7 |     stream = StreamGenerator(ds.data)
 8 |     detector = SpotDetector()
 9 |     for x in stream.iter_item():
10 |         score = detector.fit_score(x)
11 | 
12 |         if score is not None:
13 |             assert type(score) is float
14 | 


--------------------------------------------------------------------------------
/test/test_sr.py:
--------------------------------------------------------------------------------
 1 | from streamad.util import StreamGenerator, UnivariateDS
 2 | from streamad.model import SRDetector
 3 | 
 4 | 
 5 | def test_score():
 6 |     ds = UnivariateDS()
 7 |     stream = StreamGenerator(ds.data)
 8 |     detector = SRDetector()
 9 |     for x in stream.iter_item():
10 |         score = detector.fit_score(x)
11 | 
12 |         if score is not None:
13 |             assert type(score) is float
14 | 


--------------------------------------------------------------------------------
/test/test_stats.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from streamad.util import (
  3 |     MultivariateDS,
  4 |     StreamGenerator,
  5 |     StreamStatistic,
  6 |     UnivariateDS,
  7 | )
  8 | 
  9 | 
 10 | def test_uni_stats():
 11 |     ds = UnivariateDS()
 12 |     data = ds.data
 13 |     stream = StreamGenerator(data)
 14 |     stats = StreamStatistic()
 15 | 
 16 |     for X in stream.iter_item():
 17 |         stats.update(X)
 18 | 
 19 |     assert stats.get_max() == np.max(data)
 20 |     assert stats.get_min() == np.min(data)
 21 |     assert abs(stats.get_sum() - np.sum(data)) < 1e-5
 22 |     assert abs(stats.get_mean() - np.mean(data)) < 1e-5
 23 |     assert abs(stats.get_std() - np.std(data)) < 1e-5
 24 |     assert abs(stats.get_var() - np.var(data)) < 1e-5
 25 | 
 26 | 
 27 | def test_multi_stats():
 28 |     ds = MultivariateDS()
 29 |     data = ds.data
 30 |     stream = StreamGenerator(data)
 31 |     stats = StreamStatistic()
 32 | 
 33 |     for X in stream.iter_item():
 34 |         stats.update(X)
 35 | 
 36 |     assert (
 37 |         sum([abs(i - j) for i, j in zip(stats.get_max(), np.max(data, axis=0))])
 38 |         < 1e-5
 39 |     )
 40 | 
 41 |     assert (
 42 |         sum([abs(i - j) for i, j in zip(stats.get_min(), np.min(data, axis=0))])
 43 |         < 1e-5
 44 |     )
 45 | 
 46 |     assert (
 47 |         sum([abs(i - j) for i, j in zip(stats.get_sum(), np.sum(data, axis=0))])
 48 |         < 1e-5
 49 |     )
 50 | 
 51 |     assert (
 52 |         sum(
 53 |             [
 54 |                 abs(i - j)
 55 |                 for i, j in zip(stats.get_mean(), np.mean(data, axis=0))
 56 |             ]
 57 |         )
 58 |         < 1e-5
 59 |     )
 60 | 
 61 |     assert (
 62 |         sum([abs(i - j) for i, j in zip(stats.get_std(), np.std(data, axis=0))])
 63 |         < 1e-5
 64 |     )
 65 | 
 66 |     assert (
 67 |         sum([abs(i - j) for i, j in zip(stats.get_var(), np.var(data, axis=0))])
 68 |         < 1e-5
 69 |     )
 70 | 
 71 | 
 72 | def test_windowed_uni_stats():
 73 | 
 74 |     ds = UnivariateDS()
 75 |     data = ds.data
 76 |     stream = StreamGenerator(data)
 77 |     stats = StreamStatistic(is_global=False, window_len=10)
 78 | 
 79 |     for X in stream.iter_item():
 80 |         stats.update(X)
 81 | 
 82 |     assert stats.get_max() == np.max(data[-10:])
 83 |     assert stats.get_min() == np.min(data[-10:])
 84 |     assert stats.get_sum() == np.sum(data[-10:])
 85 |     assert stats.get_mean() == np.mean(data[-10:])
 86 |     assert stats.get_std() == np.std(data[-10:])
 87 |     assert stats.get_var() == np.var(data[-10:])
 88 | 
 89 | 
 90 | def test_windows_multi_stats():
 91 | 
 92 |     ds = MultivariateDS()
 93 |     data = ds.data
 94 |     stream = StreamGenerator(data)
 95 |     stats = StreamStatistic(is_global=False, window_len=10)
 96 | 
 97 |     for X in stream.iter_item():
 98 |         stats.update(X)
 99 | 
100 |     assert (
101 |         sum(
102 |             [
103 |                 abs(i - j)
104 |                 for i, j in zip(stats.get_max(), np.max(data[-10:], axis=0))
105 |             ]
106 |         )
107 |         < 1e-5
108 |     )
109 | 
110 |     assert (
111 |         sum(
112 |             [
113 |                 abs(i - j)
114 |                 for i, j in zip(stats.get_min(), np.min(data[-10:], axis=0))
115 |             ]
116 |         )
117 |         < 1e-5
118 |     )
119 | 
120 |     assert (
121 |         sum(
122 |             [
123 |                 abs(i - j)
124 |                 for i, j in zip(stats.get_sum(), np.sum(data[-10:], axis=0))
125 |             ]
126 |         )
127 |         < 1e-5
128 |     )
129 | 
130 |     assert (
131 |         sum(
132 |             [
133 |                 abs(i - j)
134 |                 for i, j in zip(stats.get_mean(), np.mean(data[-10:], axis=0))
135 |             ]
136 |         )
137 |         < 1e-5
138 |     )
139 | 
140 |     assert (
141 |         sum(
142 |             [
143 |                 abs(i - j)
144 |                 for i, j in zip(stats.get_std(), np.std(data[-10:], axis=0))
145 |             ]
146 |         )
147 |         < 1e-5
148 |     )
149 | 
150 |     assert (
151 |         sum(
152 |             [
153 |                 abs(i - j)
154 |                 for i, j in zip(stats.get_var(), np.var(data[-10:], axis=0))
155 |             ]
156 |         )
157 |         < 1e-5
158 |     )
159 | 


--------------------------------------------------------------------------------
/test/test_xstream.py:
--------------------------------------------------------------------------------
 1 | from streamad.util import StreamGenerator, UnivariateDS, MultivariateDS
 2 | from streamad.model import xStreamDetector
 3 | 
 4 | 
 5 | def test_score():
 6 |     ds = UnivariateDS()
 7 |     stream = StreamGenerator(ds.data)
 8 |     detector = xStreamDetector()
 9 |     for x in stream.iter_item():
10 |         score = detector.fit_score(x)
11 | 
12 |         if score is not None:
13 |             assert type(score) is float
14 | 
15 | 
16 | def test_multi_score():
17 |     ds = MultivariateDS()
18 |     stream = StreamGenerator(ds.data)
19 |     detector = xStreamDetector()
20 |     for x in stream.iter_item():
21 |         score = detector.fit_score(x)
22 | 
23 |         if score is not None:
24 |             assert type(score) is float
25 | 


--------------------------------------------------------------------------------
/test/test_zscore.py:
--------------------------------------------------------------------------------
 1 | from streamad.util import StreamGenerator, UnivariateDS
 2 | from streamad.model import ZScoreDetector
 3 | 
 4 | 
 5 | def test_score():
 6 |     ds = UnivariateDS()
 7 |     stream = StreamGenerator(ds.data)
 8 |     detector = ZScoreDetector()
 9 |     for x in stream.iter_item():
10 |         score = detector.fit_score(x)
11 | 
12 |         if score is not None:
13 |             assert type(score) is float
14 | 


--------------------------------------------------------------------------------